xref: /freebsd/contrib/expat/tests/basic_tests.c (revision 9cc9b8b372842b9a941d235c5e9949a214e5284f)
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Copyright (c) 2024-2026 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23    Copyright (c) 2026      Francesco Bertolaccini
24    Copyright (c) 2026      Matthew Fernandez <matthew.fernandez@gmail.com>
25    Licensed under the MIT license:
26 
27    Permission is  hereby granted,  free of charge,  to any  person obtaining
28    a  copy  of  this  software   and  associated  documentation  files  (the
29    "Software"),  to  deal in  the  Software  without restriction,  including
30    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
31    distribute, sublicense, and/or sell copies of the Software, and to permit
32    persons  to whom  the Software  is  furnished to  do so,  subject to  the
33    following conditions:
34 
35    The above copyright  notice and this permission notice  shall be included
36    in all copies or substantial portions of the Software.
37 
38    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
39    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
40    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
41    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
42    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
43    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
44    USE OR OTHER DEALINGS IN THE SOFTWARE.
45 */
46 
47 #if defined(NDEBUG)
48 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
49 #endif
50 
51 #include <assert.h>
52 
53 #include <stdio.h>
54 #include <string.h>
55 #include <time.h>
56 
57 #if ! defined(__cplusplus)
58 #  include <stdbool.h>
59 #endif
60 
61 #include "expat_config.h"
62 
63 #include "expat.h"
64 #include "internal.h"
65 #include "minicheck.h"
66 #include "structdata.h"
67 #include "common.h"
68 #include "dummy.h"
69 #include "handlers.h"
70 #include "siphash.h"
71 #include "basic_tests.h"
72 
73 static void
basic_setup(void)74 basic_setup(void) {
75   g_parser = XML_ParserCreate(NULL);
76   if (g_parser == NULL)
77     fail("Parser not created.");
78 }
79 
80 /*
81  * Character & encoding tests.
82  */
83 
START_TEST(test_nul_byte)84 START_TEST(test_nul_byte) {
85   char text[] = "<doc>\0</doc>";
86 
87   /* test that a NUL byte (in US-ASCII data) is an error */
88   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
89       == XML_STATUS_OK)
90     fail("Parser did not report error on NUL-byte.");
91   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
92     xml_failure(g_parser);
93 }
94 END_TEST
95 
START_TEST(test_u0000_char)96 START_TEST(test_u0000_char) {
97   /* test that a NUL byte (in US-ASCII data) is an error */
98   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
99                  "Parser did not report error on NUL-byte.");
100 }
101 END_TEST
102 
START_TEST(test_siphash_self)103 START_TEST(test_siphash_self) {
104   if (! sip24_valid())
105     fail("SipHash self-test failed");
106 }
107 END_TEST
108 
START_TEST(test_siphash_spec)109 START_TEST(test_siphash_spec) {
110   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
111   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
112                          "\x0a\x0b\x0c\x0d\x0e";
113   const size_t len = sizeof(message) - 1;
114   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
115   struct siphash state;
116   struct sipkey key;
117 
118   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
119                   "\x0a\x0b\x0c\x0d\x0e\x0f");
120   sip24_init(&state, &key);
121 
122   /* Cover spread across calls */
123   sip24_update(&state, message, 4);
124   sip24_update(&state, message + 4, len - 4);
125 
126   /* Cover null length */
127   sip24_update(&state, message, 0);
128 
129   if (sip24_final(&state) != expected)
130     fail("sip24_final failed spec test\n");
131 
132   /* Cover wrapper */
133   if (siphash24(message, len, &key) != expected)
134     fail("siphash24 failed spec test\n");
135 }
136 END_TEST
137 
START_TEST(test_bom_utf8)138 START_TEST(test_bom_utf8) {
139   /* This test is really just making sure we don't core on a UTF-8 BOM. */
140   const char *text = "\357\273\277<e/>";
141 
142   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
143       == XML_STATUS_ERROR)
144     xml_failure(g_parser);
145 }
146 END_TEST
147 
START_TEST(test_bom_utf16_be)148 START_TEST(test_bom_utf16_be) {
149   char text[] = "\376\377\0<\0e\0/\0>";
150 
151   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
152       == XML_STATUS_ERROR)
153     xml_failure(g_parser);
154 }
155 END_TEST
156 
START_TEST(test_bom_utf16_le)157 START_TEST(test_bom_utf16_le) {
158   char text[] = "\377\376<\0e\0/\0>\0";
159 
160   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
161       == XML_STATUS_ERROR)
162     xml_failure(g_parser);
163 }
164 END_TEST
165 
START_TEST(test_nobom_utf16_le)166 START_TEST(test_nobom_utf16_le) {
167   char text[] = " \0<\0e\0/\0>\0";
168 
169   if (g_chunkSize == 1) {
170     // TODO: with just the first byte, we can't tell the difference between
171     // UTF-16-LE and UTF-8. Avoid the failure for now.
172     return;
173   }
174 
175   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
176       == XML_STATUS_ERROR)
177     xml_failure(g_parser);
178 }
179 END_TEST
180 
START_TEST(test_hash_collision)181 START_TEST(test_hash_collision) {
182   /* For full coverage of the lookup routine, we need to ensure a
183    * hash collision even though we can only tell that we have one
184    * through breakpoint debugging or coverage statistics.  The
185    * following will cause a hash collision on machines with a 64-bit
186    * long type; others will have to experiment.  The full coverage
187    * tests invoked from qa.sh usually provide a hash collision, but
188    * not always.  This is an attempt to provide insurance.
189    */
190 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
191   const char *text
192       = "<doc>\n"
193         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
194         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
195         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
196         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
197         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
198         "<d8>This triggers the table growth and collides with b2</d8>\n"
199         "</doc>\n";
200 
201   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
202   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
203       == XML_STATUS_ERROR)
204     xml_failure(g_parser);
205 }
206 END_TEST
207 #undef COLLIDING_HASH_SALT
208 
START_TEST(test_hash_salt_setter)209 START_TEST(test_hash_salt_setter) {
210   const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
211                                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
212   XML_Parser parser = XML_ParserCreate(NULL);
213 
214   // NULL parser should be rejected
215   assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE);
216 
217   // NULL entropy should be rejected
218   assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE);
219 
220   // Setting should be allowed more than once
221   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
222   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
223 
224   // But not after parsing has started
225   assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */)
226               == XML_STATUS_OK);
227   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE);
228 
229   XML_ParserFree(parser);
230 }
231 END_TEST
232 
233 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)234 START_TEST(test_danish_latin1) {
235   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
236                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
237 #ifdef XML_UNICODE
238   const XML_Char *expected
239       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
240 #else
241   const XML_Char *expected
242       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
243 #endif
244   run_character_check(text, expected);
245 }
246 END_TEST
247 
248 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)249 START_TEST(test_french_charref_hexidecimal) {
250   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
251                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
252 #ifdef XML_UNICODE
253   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
254 #else
255   const XML_Char *expected
256       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
257 #endif
258   run_character_check(text, expected);
259 }
260 END_TEST
261 
START_TEST(test_french_charref_decimal)262 START_TEST(test_french_charref_decimal) {
263   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
264                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
265 #ifdef XML_UNICODE
266   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
267 #else
268   const XML_Char *expected
269       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
270 #endif
271   run_character_check(text, expected);
272 }
273 END_TEST
274 
START_TEST(test_french_latin1)275 START_TEST(test_french_latin1) {
276   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
277                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
278 #ifdef XML_UNICODE
279   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
280 #else
281   const XML_Char *expected
282       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
283 #endif
284   run_character_check(text, expected);
285 }
286 END_TEST
287 
START_TEST(test_french_utf8)288 START_TEST(test_french_utf8) {
289   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
290                      "<doc>\xC3\xA9</doc>";
291 #ifdef XML_UNICODE
292   const XML_Char *expected = XCS("\x00e9");
293 #else
294   const XML_Char *expected = XCS("\xC3\xA9");
295 #endif
296   run_character_check(text, expected);
297 }
298 END_TEST
299 
300 /* Regression test for SF bug #600479.
301    XXX There should be a test that exercises all legal XML Unicode
302    characters as PCDATA and attribute value content, and XML Name
303    characters as part of element and attribute names.
304 */
START_TEST(test_utf8_false_rejection)305 START_TEST(test_utf8_false_rejection) {
306   const char *text = "<doc>\xEF\xBA\xBF</doc>";
307 #ifdef XML_UNICODE
308   const XML_Char *expected = XCS("\xfebf");
309 #else
310   const XML_Char *expected = XCS("\xEF\xBA\xBF");
311 #endif
312   run_character_check(text, expected);
313 }
314 END_TEST
315 
316 /* Regression test for SF bug #477667.
317    This test assures that any 8-bit character followed by a 7-bit
318    character will not be mistakenly interpreted as a valid UTF-8
319    sequence.
320 */
START_TEST(test_illegal_utf8)321 START_TEST(test_illegal_utf8) {
322   char text[100];
323   int i;
324 
325   for (i = 128; i <= 255; ++i) {
326     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
327     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
328         == XML_STATUS_OK) {
329       snprintf(text, sizeof(text),
330                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
331                i);
332       fail(text);
333     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
334       xml_failure(g_parser);
335     /* Reset the parser since we use the same parser repeatedly. */
336     XML_ParserReset(g_parser, NULL);
337   }
338 }
339 END_TEST
340 
341 /* Examples, not masks: */
342 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
343 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
344 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
345 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
346 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
347 
START_TEST(test_utf8_auto_align)348 START_TEST(test_utf8_auto_align) {
349   struct TestCase {
350     ptrdiff_t expectedMovementInChars;
351     const char *input;
352   };
353 
354   struct TestCase cases[] = {
355       {00, ""},
356 
357       {00, UTF8_LEAD_1},
358 
359       {-1, UTF8_LEAD_2},
360       {00, UTF8_LEAD_2 UTF8_FOLLOW},
361 
362       {-1, UTF8_LEAD_3},
363       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
364       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
365 
366       {-1, UTF8_LEAD_4},
367       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
368       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
369       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
370   };
371 
372   size_t i = 0;
373   bool success = true;
374   for (; i < sizeof(cases) / sizeof(*cases); i++) {
375     const char *fromLim = cases[i].input + strlen(cases[i].input);
376     const char *const fromLimInitially = fromLim;
377     ptrdiff_t actualMovementInChars;
378 
379     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
380 
381     actualMovementInChars = (fromLim - fromLimInitially);
382     if (actualMovementInChars != cases[i].expectedMovementInChars) {
383       size_t j = 0;
384       success = false;
385       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
386              ", actually moved by %2d chars: \"",
387              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
388              (int)actualMovementInChars);
389       for (; j < strlen(cases[i].input); j++) {
390         printf("\\x%02x", (unsigned char)cases[i].input[j]);
391       }
392       printf("\"\n");
393     }
394   }
395 
396   if (! success) {
397     fail("UTF-8 auto-alignment is not bullet-proof\n");
398   }
399 }
400 END_TEST
401 
START_TEST(test_utf16)402 START_TEST(test_utf16) {
403   /* <?xml version="1.0" encoding="UTF-16"?>
404    *  <doc a='123'>some {A} text</doc>
405    *
406    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
407    */
408   char text[]
409       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
410         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
411         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
412         "\000'\000?\000>\000\n"
413         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
414         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
415         "<\000/\000d\000o\000c\000>";
416 #ifdef XML_UNICODE
417   const XML_Char *expected = XCS("some \xff21 text");
418 #else
419   const XML_Char *expected = XCS("some \357\274\241 text");
420 #endif
421   CharData storage;
422 
423   CharData_Init(&storage);
424   XML_SetUserData(g_parser, &storage);
425   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
426   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
427       == XML_STATUS_ERROR)
428     xml_failure(g_parser);
429   CharData_CheckXMLChars(&storage, expected);
430 }
431 END_TEST
432 
START_TEST(test_utf16_le_epilog_newline)433 START_TEST(test_utf16_le_epilog_newline) {
434   unsigned int first_chunk_bytes = 17;
435   char text[] = "\xFF\xFE"                  /* BOM */
436                 "<\000e\000/\000>\000"      /* document element */
437                 "\r\000\n\000\r\000\n\000"; /* epilog */
438 
439   if (first_chunk_bytes >= sizeof(text) - 1)
440     fail("bad value of first_chunk_bytes");
441   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
442       == XML_STATUS_ERROR)
443     xml_failure(g_parser);
444   else {
445     enum XML_Status rc;
446     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
447                                  (int)(sizeof(text) - first_chunk_bytes - 1),
448                                  XML_TRUE);
449     if (rc == XML_STATUS_ERROR)
450       xml_failure(g_parser);
451   }
452 }
453 END_TEST
454 
455 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)456 START_TEST(test_not_utf16) {
457   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
458                      "<doc>Hi</doc>";
459 
460   /* Use a handler to provoke the appropriate code paths */
461   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
462   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
463                  "UTF-16 declared in UTF-8 not faulted");
464 }
465 END_TEST
466 
467 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)468 START_TEST(test_bad_encoding) {
469   const char *text = "<doc>Hi</doc>";
470 
471   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
472     fail("XML_SetEncoding failed");
473   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
474                  "Unknown encoding not faulted");
475 }
476 END_TEST
477 
478 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)479 START_TEST(test_latin1_umlauts) {
480   const char *text
481       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
482         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
483         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
484 #ifdef XML_UNICODE
485   /* Expected results in UTF-16 */
486   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
487       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
488 #else
489   /* Expected results in UTF-8 */
490   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
491       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
492 #endif
493 
494   run_character_check(text, expected);
495   XML_ParserReset(g_parser, NULL);
496   run_attribute_check(text, expected);
497   /* Repeat with a default handler */
498   XML_ParserReset(g_parser, NULL);
499   XML_SetDefaultHandler(g_parser, dummy_default_handler);
500   run_character_check(text, expected);
501   XML_ParserReset(g_parser, NULL);
502   XML_SetDefaultHandler(g_parser, dummy_default_handler);
503   run_attribute_check(text, expected);
504 }
505 END_TEST
506 
507 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)508 START_TEST(test_long_utf8_character) {
509   const char *text
510       = "<?xml version='1.0' encoding='utf-8'?>\n"
511         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
512         "<do\xf0\x90\x80\x80/>";
513   expect_failure(text, XML_ERROR_INVALID_TOKEN,
514                  "4-byte UTF-8 character in element name not faulted");
515 }
516 END_TEST
517 
518 /* Test that a long latin-1 attribute (too long to convert in one go)
519  * is correctly converted
520  */
START_TEST(test_long_latin1_attribute)521 START_TEST(test_long_latin1_attribute) {
522   const char *text
523       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
524         "<doc att='"
525         /* 64 characters per line */
526         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
527         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
528         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
529         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
530         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
531         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
532         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
533         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
534         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
535         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
536         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
537         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
538         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
539         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
540         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
541         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
542         /* Last character splits across a buffer boundary */
543         "\xe4'>\n</doc>";
544 
545   const XML_Char *expected =
546       /* 64 characters per line */
547       /* clang-format off */
548         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
549         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
550         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
551         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
552         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
553         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
554         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
555         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
556         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
557         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
558         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
559         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
560         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
561         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
562         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
563         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
564   /* clang-format on */
565 #ifdef XML_UNICODE
566                                                   XCS("\x00e4");
567 #else
568                                                   XCS("\xc3\xa4");
569 #endif
570 
571   run_attribute_check(text, expected);
572 }
573 END_TEST
574 
575 /* Test that a long ASCII attribute (too long to convert in one go)
576  * is correctly converted
577  */
START_TEST(test_long_ascii_attribute)578 START_TEST(test_long_ascii_attribute) {
579   const char *text
580       = "<?xml version='1.0' encoding='us-ascii'?>\n"
581         "<doc att='"
582         /* 64 characters per line */
583         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
584         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
585         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
586         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
587         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
588         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
589         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
590         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
591         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
592         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
593         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
594         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
595         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
596         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
597         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
598         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
599         "01234'>\n</doc>";
600   const XML_Char *expected =
601       /* 64 characters per line */
602       /* clang-format off */
603         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
604         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
605         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
606         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
607         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
608         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
609         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
610         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
611         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
612         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
613         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
614         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
615         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
616         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
617         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
618         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
619         XCS("01234");
620   /* clang-format on */
621 
622   run_attribute_check(text, expected);
623 }
624 END_TEST
625 
626 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)627 START_TEST(test_line_number_after_parse) {
628   const char *text = "<tag>\n"
629                      "\n"
630                      "\n</tag>";
631   XML_Size lineno;
632 
633   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
634       == XML_STATUS_ERROR)
635     xml_failure(g_parser);
636   lineno = XML_GetCurrentLineNumber(g_parser);
637   if (lineno != 4) {
638     char buffer[100];
639     snprintf(buffer, sizeof(buffer),
640              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
641     fail(buffer);
642   }
643 }
644 END_TEST
645 
646 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)647 START_TEST(test_column_number_after_parse) {
648   const char *text = "<tag></tag>";
649   XML_Size colno;
650 
651   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
652       == XML_STATUS_ERROR)
653     xml_failure(g_parser);
654   colno = XML_GetCurrentColumnNumber(g_parser);
655   if (colno != 11) {
656     char buffer[100];
657     snprintf(buffer, sizeof(buffer),
658              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
659     fail(buffer);
660   }
661 }
662 END_TEST
663 
664 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)665 START_TEST(test_line_and_column_numbers_inside_handlers) {
666   const char *text = "<a>\n"      /* Unix end-of-line */
667                      "  <b>\r\n"  /* Windows end-of-line */
668                      "    <c/>\r" /* Mac OS end-of-line */
669                      "  </b>\n"
670                      "  <d>\n"
671                      "    <f/>\n"
672                      "  </d>\n"
673                      "</a>";
674   const StructDataEntry expected[]
675       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
676          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
677          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
678          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
679          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
680   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
681   StructData storage;
682 
683   StructData_Init(&storage);
684   XML_SetUserData(g_parser, &storage);
685   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
686   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
687   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
688       == XML_STATUS_ERROR)
689     xml_failure(g_parser);
690 
691   StructData_CheckItems(&storage, expected, expected_count);
692   StructData_Dispose(&storage);
693 }
694 END_TEST
695 
696 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)697 START_TEST(test_line_number_after_error) {
698   const char *text = "<a>\n"
699                      "  <b>\n"
700                      "  </a>"; /* missing </b> */
701   XML_Size lineno;
702   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
703       != XML_STATUS_ERROR)
704     fail("Expected a parse error");
705 
706   lineno = XML_GetCurrentLineNumber(g_parser);
707   if (lineno != 3) {
708     char buffer[100];
709     snprintf(buffer, sizeof(buffer),
710              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
711     fail(buffer);
712   }
713 }
714 END_TEST
715 
716 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)717 START_TEST(test_column_number_after_error) {
718   const char *text = "<a>\n"
719                      "  <b>\n"
720                      "  </a>"; /* missing </b> */
721   XML_Size colno;
722   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
723       != XML_STATUS_ERROR)
724     fail("Expected a parse error");
725 
726   colno = XML_GetCurrentColumnNumber(g_parser);
727   if (colno != 4) {
728     char buffer[100];
729     snprintf(buffer, sizeof(buffer),
730              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
731     fail(buffer);
732   }
733 }
734 END_TEST
735 
736 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)737 START_TEST(test_really_long_lines) {
738   /* This parses an input line longer than INIT_DATA_BUF_SIZE
739      characters long (defined to be 1024 in xmlparse.c).  We take a
740      really cheesy approach to building the input buffer, because
741      this avoids writing bugs in buffer-filling code.
742   */
743   const char *text
744       = "<e>"
745         /* 64 chars */
746         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
747         /* until we have at least 1024 characters on the line: */
748         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
749         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
750         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
751         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
752         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
753         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
754         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
755         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764         "</e>";
765   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
766       == XML_STATUS_ERROR)
767     xml_failure(g_parser);
768 }
769 END_TEST
770 
771 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)772 START_TEST(test_really_long_encoded_lines) {
773   /* As above, except that we want to provoke an output buffer
774    * overflow with a non-trivial encoding.  For this we need to pass
775    * the whole cdata in one go, not byte-by-byte.
776    */
777   void *buffer;
778   const char *text
779       = "<?xml version='1.0' encoding='iso-8859-1'?>"
780         "<e>"
781         /* 64 chars */
782         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
783         /* until we have at least 1024 characters on the line: */
784         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
785         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
786         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
787         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
788         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
789         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
790         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
791         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
792         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
793         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
794         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
795         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
796         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
797         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
798         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
799         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
800         "</e>";
801   int parse_len = (int)strlen(text);
802 
803   /* Need a cdata handler to provoke the code path we want to test */
804   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
805   buffer = XML_GetBuffer(g_parser, parse_len);
806   if (buffer == NULL)
807     fail("Could not allocate parse buffer");
808   assert(buffer != NULL);
809   memcpy(buffer, text, parse_len);
810   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
811     xml_failure(g_parser);
812 }
813 END_TEST
814 
815 /*
816  * Element event tests.
817  */
818 
START_TEST(test_end_element_events)819 START_TEST(test_end_element_events) {
820   const char *text = "<a><b><c/></b><d><f/></d></a>";
821   const XML_Char *expected = XCS("/c/b/f/d/a");
822   CharData storage;
823 
824   CharData_Init(&storage);
825   XML_SetUserData(g_parser, &storage);
826   XML_SetEndElementHandler(g_parser, end_element_event_handler);
827   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
828       == XML_STATUS_ERROR)
829     xml_failure(g_parser);
830   CharData_CheckXMLChars(&storage, expected);
831 }
832 END_TEST
833 
834 /*
835  * Attribute tests.
836  */
837 
838 /* Helper used by the following tests; this checks any "attr" and "refs"
839    attributes to make sure whitespace has been normalized.
840 
841    Return true if whitespace has been normalized in a string, using
842    the rules for attribute value normalization.  The 'is_cdata' flag
843    is needed since CDATA attributes don't need to have multiple
844    whitespace characters collapsed to a single space, while other
845    attribute data types do.  (Section 3.3.3 of the recommendation.)
846 */
847 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)848 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
849   int blanks = 0;
850   int at_start = 1;
851   while (*s) {
852     if (*s == XCS(' '))
853       ++blanks;
854     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
855       return 0;
856     else {
857       if (at_start) {
858         at_start = 0;
859         if (blanks && ! is_cdata)
860           /* illegal leading blanks */
861           return 0;
862       } else if (blanks > 1 && ! is_cdata)
863         return 0;
864       blanks = 0;
865     }
866     ++s;
867   }
868   if (blanks && ! is_cdata)
869     return 0;
870   return 1;
871 }
872 
873 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)874 START_TEST(test_helper_is_whitespace_normalized) {
875   assert(is_whitespace_normalized(XCS("abc"), 0));
876   assert(is_whitespace_normalized(XCS("abc"), 1));
877   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
878   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
879   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
880   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
881   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
882   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
883   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
884   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
885   assert(! is_whitespace_normalized(XCS(" "), 0));
886   assert(is_whitespace_normalized(XCS(" "), 1));
887   assert(! is_whitespace_normalized(XCS("\t"), 0));
888   assert(! is_whitespace_normalized(XCS("\t"), 1));
889   assert(! is_whitespace_normalized(XCS("\n"), 0));
890   assert(! is_whitespace_normalized(XCS("\n"), 1));
891   assert(! is_whitespace_normalized(XCS("\r"), 0));
892   assert(! is_whitespace_normalized(XCS("\r"), 1));
893   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
894 }
895 END_TEST
896 
897 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)898 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
899                                           const XML_Char **atts) {
900   int i;
901   UNUSED_P(userData);
902   UNUSED_P(name);
903   for (i = 0; atts[i] != NULL; i += 2) {
904     const XML_Char *attrname = atts[i];
905     const XML_Char *value = atts[i + 1];
906     if (xcstrcmp(XCS("attr"), attrname) == 0
907         || xcstrcmp(XCS("ents"), attrname) == 0
908         || xcstrcmp(XCS("refs"), attrname) == 0) {
909       if (! is_whitespace_normalized(value, 0)) {
910         char buffer[256];
911         snprintf(buffer, sizeof(buffer),
912                  "attribute value not normalized: %" XML_FMT_STR
913                  "='%" XML_FMT_STR "'",
914                  attrname, value);
915         fail(buffer);
916       }
917     }
918   }
919 }
920 
START_TEST(test_attr_whitespace_normalization)921 START_TEST(test_attr_whitespace_normalization) {
922   const char *text
923       = "<!DOCTYPE doc [\n"
924         "  <!ATTLIST doc\n"
925         "            attr NMTOKENS #REQUIRED\n"
926         "            ents ENTITIES #REQUIRED\n"
927         "            refs IDREFS   #REQUIRED>\n"
928         "]>\n"
929         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
930         "     ents=' ent-1   \t\r\n"
931         "            ent-2  ' >\n"
932         "  <e id='id-1'/>\n"
933         "  <e id='id-2'/>\n"
934         "</doc>";
935 
936   XML_SetStartElementHandler(g_parser,
937                              check_attr_contains_normalized_whitespace);
938   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
939       == XML_STATUS_ERROR)
940     xml_failure(g_parser);
941 }
942 END_TEST
943 
944 /*
945  * XML declaration tests.
946  */
947 
START_TEST(test_xmldecl_misplaced)948 START_TEST(test_xmldecl_misplaced) {
949   expect_failure("\n"
950                  "<?xml version='1.0'?>\n"
951                  "<a/>",
952                  XML_ERROR_MISPLACED_XML_PI,
953                  "failed to report misplaced XML declaration");
954 }
955 END_TEST
956 
START_TEST(test_xmldecl_invalid)957 START_TEST(test_xmldecl_invalid) {
958   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
959                  "Failed to report invalid XML declaration");
960 }
961 END_TEST
962 
START_TEST(test_xmldecl_missing_attr)963 START_TEST(test_xmldecl_missing_attr) {
964   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
965                  "Failed to report missing XML declaration attribute");
966 }
967 END_TEST
968 
START_TEST(test_xmldecl_missing_value)969 START_TEST(test_xmldecl_missing_value) {
970   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
971                  "<doc/>",
972                  XML_ERROR_XML_DECL,
973                  "Failed to report missing attribute value");
974 }
975 END_TEST
976 
977 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)978 START_TEST(test_unknown_encoding_internal_entity) {
979   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
980                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
981                      "<test a='&foo;'/>";
982 
983   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
984   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
985       == XML_STATUS_ERROR)
986     xml_failure(g_parser);
987 }
988 END_TEST
989 
990 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)991 START_TEST(test_unrecognised_encoding_internal_entity) {
992   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
993                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
994                      "<test a='&foo;'/>";
995 
996   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
997   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
998       != XML_STATUS_ERROR)
999     fail("Unrecognised encoding not rejected");
1000 }
1001 END_TEST
1002 
1003 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)1004 START_TEST(test_ext_entity_set_encoding) {
1005   const char *text = "<!DOCTYPE doc [\n"
1006                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1007                      "]>\n"
1008                      "<doc>&en;</doc>";
1009   ExtTest test_data
1010       = {/* This text says it's an unsupported encoding, but it's really
1011             UTF-8, which we tell Expat using XML_SetEncoding().
1012          */
1013          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
1014 #ifdef XML_UNICODE
1015   const XML_Char *expected = XCS("\x00e9");
1016 #else
1017   const XML_Char *expected = XCS("\xc3\xa9");
1018 #endif
1019 
1020   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1021   run_ext_character_check(text, &test_data, expected);
1022 }
1023 END_TEST
1024 
1025 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1026 START_TEST(test_ext_entity_no_handler) {
1027   const char *text = "<!DOCTYPE doc [\n"
1028                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1029                      "]>\n"
1030                      "<doc>&en;</doc>";
1031 
1032   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1033   run_character_check(text, XCS(""));
1034 }
1035 END_TEST
1036 
1037 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1038 START_TEST(test_ext_entity_set_bom) {
1039   const char *text = "<!DOCTYPE doc [\n"
1040                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1041                      "]>\n"
1042                      "<doc>&en;</doc>";
1043   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1044                        "<?xml encoding='iso-8859-3'?>"
1045                        "\xC3\xA9",
1046                        XCS("utf-8"), NULL};
1047 #ifdef XML_UNICODE
1048   const XML_Char *expected = XCS("\x00e9");
1049 #else
1050   const XML_Char *expected = XCS("\xc3\xa9");
1051 #endif
1052 
1053   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1054   run_ext_character_check(text, &test_data, expected);
1055 }
1056 END_TEST
1057 
1058 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1059 START_TEST(test_ext_entity_bad_encoding) {
1060   const char *text = "<!DOCTYPE doc [\n"
1061                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1062                      "]>\n"
1063                      "<doc>&en;</doc>";
1064   ExtFaults fault
1065       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1066          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1067 
1068   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1069   XML_SetUserData(g_parser, &fault);
1070   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1071                  "Bad encoding should not have been accepted");
1072 }
1073 END_TEST
1074 
1075 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1076 START_TEST(test_ext_entity_bad_encoding_2) {
1077   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1078                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1079                      "<doc>&entity;</doc>";
1080   ExtFaults fault
1081       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1082          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1083 
1084   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1085   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1086   XML_SetUserData(g_parser, &fault);
1087   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1088                  "Bad encoding not faulted in external entity handler");
1089 }
1090 END_TEST
1091 
1092 /* Test that no error is reported for unknown entities if we don't
1093    read an external subset.  This was fixed in Expat 1.95.5.
1094 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1095 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1096   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1097                      "<doc>&entity;</doc>";
1098 
1099   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1100       == XML_STATUS_ERROR)
1101     xml_failure(g_parser);
1102 }
1103 END_TEST
1104 
1105 /* Test that an error is reported for unknown entities if we don't
1106    have an external subset.
1107 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1108 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1109   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1110                  "Parser did not report undefined entity w/out a DTD.");
1111 }
1112 END_TEST
1113 
1114 /* Test that an error is reported for unknown entities if we don't
1115    read an external subset, but have been declared standalone.
1116 */
START_TEST(test_wfc_undeclared_entity_standalone)1117 START_TEST(test_wfc_undeclared_entity_standalone) {
1118   const char *text
1119       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1120         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1121         "<doc>&entity;</doc>";
1122 
1123   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1124                  "Parser did not report undefined entity (standalone).");
1125 }
1126 END_TEST
1127 
1128 /* Test that an error is reported for unknown entities if we have read
1129    an external subset, and standalone is true.
1130 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1131 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1132   const char *text
1133       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1134         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1135         "<doc>&entity;</doc>";
1136   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1137 
1138   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1139   XML_SetUserData(g_parser, &test_data);
1140   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1141   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1142                  "Parser did not report undefined entity (external DTD).");
1143 }
1144 END_TEST
1145 
1146 /* Test that external entity handling is not done if the parsing flag
1147  * is set to UNLESS_STANDALONE
1148  */
START_TEST(test_entity_with_external_subset_unless_standalone)1149 START_TEST(test_entity_with_external_subset_unless_standalone) {
1150   const char *text
1151       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1152         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1153         "<doc>&entity;</doc>";
1154   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1155 
1156   XML_SetParamEntityParsing(g_parser,
1157                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1158   XML_SetUserData(g_parser, &test_data);
1159   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1160   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1161                  "Parser did not report undefined entity");
1162 }
1163 END_TEST
1164 
1165 /* Test that no error is reported for unknown entities if we have read
1166    an external subset, and standalone is false.
1167 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1168 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1169   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1170                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1171                      "<doc>&entity;</doc>";
1172   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1173 
1174   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1175   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1176   run_ext_character_check(text, &test_data, XCS(""));
1177 }
1178 END_TEST
1179 
1180 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1181 START_TEST(test_not_standalone_handler_reject) {
1182   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1183                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1184                      "<doc>&entity;</doc>";
1185   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1186 
1187   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1188   XML_SetUserData(g_parser, &test_data);
1189   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1190   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1191   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1192                  "NotStandalone handler failed to reject");
1193 
1194   /* Try again but without external entity handling */
1195   XML_ParserReset(g_parser, NULL);
1196   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1197   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1198                  "NotStandalone handler failed to reject");
1199 }
1200 END_TEST
1201 
1202 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1203 START_TEST(test_not_standalone_handler_accept) {
1204   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1205                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1206                      "<doc>&entity;</doc>";
1207   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1208 
1209   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1210   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1211   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1212   run_ext_character_check(text, &test_data, XCS(""));
1213 
1214   /* Repeat without the external entity handler */
1215   XML_ParserReset(g_parser, NULL);
1216   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1217   run_character_check(text, XCS(""));
1218 }
1219 END_TEST
1220 
START_TEST(test_entity_start_tag_level_greater_than_one)1221 START_TEST(test_entity_start_tag_level_greater_than_one) {
1222   const char *const text = "<!DOCTYPE t1 [\n"
1223                            "  <!ENTITY e1 'hello'>\n"
1224                            "]>\n"
1225                            "<t1>\n"
1226                            "  <t2>&e1;</t2>\n"
1227                            "</t1>\n";
1228 
1229   XML_Parser parser = XML_ParserCreate(NULL);
1230   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1231                                       /*isFinal*/ XML_TRUE)
1232               == XML_STATUS_OK);
1233   XML_ParserFree(parser);
1234 }
1235 END_TEST
1236 
START_TEST(test_wfc_no_recursive_entity_refs)1237 START_TEST(test_wfc_no_recursive_entity_refs) {
1238   const char *text = "<!DOCTYPE doc [\n"
1239                      "  <!ENTITY entity '&#38;entity;'>\n"
1240                      "]>\n"
1241                      "<doc>&entity;</doc>";
1242 
1243   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1244                  "Parser did not report recursive entity reference.");
1245 }
1246 END_TEST
1247 
START_TEST(test_no_indirectly_recursive_entity_refs)1248 START_TEST(test_no_indirectly_recursive_entity_refs) {
1249   struct TestCase {
1250     const char *doc;
1251     bool usesParameterEntities;
1252   };
1253 
1254   const struct TestCase cases[] = {
1255       // general entity + character data
1256       {"<!DOCTYPE a [\n"
1257        "  <!ENTITY e1 '&e2;'>\n"
1258        "  <!ENTITY e2 '&e1;'>\n"
1259        "]><a>&e2;</a>\n",
1260        false},
1261 
1262       // general entity + attribute value
1263       {"<!DOCTYPE a [\n"
1264        "  <!ENTITY e1 '&e2;'>\n"
1265        "  <!ENTITY e2 '&e1;'>\n"
1266        "]><a k1='&e2;' />\n",
1267        false},
1268 
1269       // parameter entity
1270       {"<!DOCTYPE doc [\n"
1271        "  <!ENTITY % p1 '&#37;p2;'>\n"
1272        "  <!ENTITY % p2 '&#37;p1;'>\n"
1273        "  <!ENTITY % define_g \"<!ENTITY g '&#37;p2;'>\">\n"
1274        "  %define_g;\n"
1275        "]>\n"
1276        "<doc/>\n",
1277        true},
1278   };
1279   const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1280 
1281   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1282     for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1283          j++) {
1284       const XML_Bool reset_wanted = reset_or_not[j];
1285       const char *const doc = cases[i].doc;
1286       const bool usesParameterEntities = cases[i].usesParameterEntities;
1287 
1288       set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1289 
1290 #ifdef XML_DTD // both GE and DTD
1291       const bool rejection_expected = true;
1292 #elif XML_GE == 1 // GE but not DTD
1293       const bool rejection_expected = ! usesParameterEntities;
1294 #else             // neither DTD nor GE
1295       const bool rejection_expected = false;
1296 #endif
1297 
1298       XML_Parser parser = XML_ParserCreate(NULL);
1299 
1300 #ifdef XML_DTD
1301       if (usesParameterEntities) {
1302         assert_true(
1303             XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1304             == 1);
1305       }
1306 #else
1307       UNUSED_P(usesParameterEntities);
1308 #endif // XML_DTD
1309 
1310       const enum XML_Status status
1311           = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1312                                     /*isFinal*/ XML_TRUE);
1313 
1314       if (rejection_expected) {
1315         assert_true(status == XML_STATUS_ERROR);
1316         assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1317       } else {
1318         assert_true(status == XML_STATUS_OK);
1319       }
1320 
1321       if (reset_wanted) {
1322         // This covers free'ing of (eventually) all three open entity lists by
1323         // XML_ParserReset.
1324         XML_ParserReset(parser, NULL);
1325       }
1326 
1327       // This covers free'ing of (eventually) all three open entity lists by
1328       // XML_ParserFree (unless XML_ParserReset has already done that above).
1329       XML_ParserFree(parser);
1330     }
1331   }
1332 }
1333 END_TEST
1334 
START_TEST(test_recursive_external_parameter_entity_2)1335 START_TEST(test_recursive_external_parameter_entity_2) {
1336   struct TestCase {
1337     const char *doc;
1338     enum XML_Status expectedStatus;
1339   };
1340 
1341   struct TestCase cases[] = {
1342       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1343       {"<!ENTITY % p1 '%p1;'>"
1344        "<!ENTITY % p1 'first declaration wins'>",
1345        XML_STATUS_ERROR},
1346       {"<!ENTITY % p1 'first declaration wins'>"
1347        "<!ENTITY % p1 '%p1;'>",
1348        XML_STATUS_OK},
1349       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1350   };
1351 
1352   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1353     const char *const doc = cases[i].doc;
1354     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1355     set_subtest("%s", doc);
1356 
1357     XML_Parser parser = XML_ParserCreate(NULL);
1358     assert_true(parser != NULL);
1359 
1360     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1361     assert_true(ext_parser != NULL);
1362 
1363     const enum XML_Status actualStatus
1364         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1365 
1366     assert_true(actualStatus == expectedStatus);
1367     if (actualStatus != XML_STATUS_OK) {
1368       assert_true(XML_GetErrorCode(ext_parser)
1369                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1370     }
1371 
1372     XML_ParserFree(ext_parser);
1373     XML_ParserFree(parser);
1374   }
1375 }
1376 END_TEST
1377 
1378 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1379 START_TEST(test_ext_entity_invalid_parse) {
1380   const char *text = "<!DOCTYPE doc [\n"
1381                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1382                      "]>\n"
1383                      "<doc>&en;</doc>";
1384   const ExtFaults faults[]
1385       = {{"<", "Incomplete element declaration not faulted", NULL,
1386           XML_ERROR_UNCLOSED_TOKEN},
1387          {"<\xe2\x82", /* First two bytes of a three-byte char */
1388           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1389          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1390           XML_ERROR_PARTIAL_CHAR},
1391          {NULL, NULL, NULL, XML_ERROR_NONE}};
1392   const ExtFaults *fault = faults;
1393 
1394   for (; fault->parse_text != NULL; fault++) {
1395     set_subtest("\"%s\"", fault->parse_text);
1396     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1397     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1398     XML_SetUserData(g_parser, (void *)fault);
1399     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1400                    "Parser did not report external entity error");
1401     XML_ParserReset(g_parser, NULL);
1402   }
1403 }
1404 END_TEST
1405 
1406 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1407 START_TEST(test_dtd_default_handling) {
1408   const char *text = "<!DOCTYPE doc [\n"
1409                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1410                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1411                      "<!ELEMENT doc EMPTY>\n"
1412                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1413                      "<?pi in dtd?>\n"
1414                      "<!--comment in dtd-->\n"
1415                      "]><doc/>";
1416 
1417   XML_SetDefaultHandler(g_parser, accumulate_characters);
1418   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1419   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1420   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1421   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1422   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1423   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1424   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1425   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1426   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1427   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1428   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1429 }
1430 END_TEST
1431 
1432 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1433 START_TEST(test_dtd_attr_handling) {
1434   const char *prolog = "<!DOCTYPE doc [\n"
1435                        "<!ELEMENT doc EMPTY>\n";
1436   AttTest attr_data[]
1437       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1438           "]>"
1439           "<doc a='two'/>",
1440           XCS("doc"), XCS("a"),
1441           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1442           NULL, XML_TRUE},
1443          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1444           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1445           "]>"
1446           "<doc/>",
1447           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1448          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1449           "]>"
1450           "<doc/>",
1451           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1452          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1453           "]>"
1454           "<doc/>",
1455           XCS("doc"), XCS("a"), XCS("CDATA"),
1456 #ifdef XML_UNICODE
1457           XCS("\x06f2"),
1458 #else
1459           XCS("\xdb\xb2"),
1460 #endif
1461           XML_FALSE},
1462          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1463   AttTest *test;
1464 
1465   for (test = attr_data; test->definition != NULL; test++) {
1466     set_subtest("%s", test->definition);
1467     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1468     XML_SetUserData(g_parser, test);
1469     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1470                                 XML_FALSE)
1471         == XML_STATUS_ERROR)
1472       xml_failure(g_parser);
1473     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1474                                 (int)strlen(test->definition), XML_TRUE)
1475         == XML_STATUS_ERROR)
1476       xml_failure(g_parser);
1477     XML_ParserReset(g_parser, NULL);
1478   }
1479 }
1480 END_TEST
1481 
1482 /* See related SF bug #673791.
1483    When namespace processing is enabled, setting the namespace URI for
1484    a prefix is not allowed; this test ensures that it *is* allowed
1485    when namespace processing is not enabled.
1486    (See Namespaces in XML, section 2.)
1487 */
START_TEST(test_empty_ns_without_namespaces)1488 START_TEST(test_empty_ns_without_namespaces) {
1489   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1490                      "  <e xmlns:prefix=''/>\n"
1491                      "</doc>";
1492 
1493   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1494       == XML_STATUS_ERROR)
1495     xml_failure(g_parser);
1496 }
1497 END_TEST
1498 
1499 /* Regression test for SF bug #824420.
1500    Checks that an xmlns:prefix attribute set in an attribute's default
1501    value isn't misinterpreted.
1502 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1503 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1504   const char *text = "<!DOCTYPE e:element [\n"
1505                      "  <!ATTLIST e:element\n"
1506                      "    xmlns:e CDATA 'http://example.org/'>\n"
1507                      "      ]>\n"
1508                      "<e:element/>";
1509 
1510   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1511       == XML_STATUS_ERROR)
1512     xml_failure(g_parser);
1513 }
1514 END_TEST
1515 
1516 /* Regression test for SF bug #1515266: missing check of stopped
1517    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1518 START_TEST(test_stop_parser_between_char_data_calls) {
1519   /* The sample data must be big enough that there are two calls to
1520      the character data handler from within the inner "for" loop of
1521      the XML_TOK_DATA_CHARS case in doContent(), and the character
1522      handler must stop the parser and clear the character data
1523      handler.
1524   */
1525   const char *text = long_character_data_text;
1526 
1527   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1528   g_resumable = XML_FALSE;
1529   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1530       != XML_STATUS_ERROR)
1531     xml_failure(g_parser);
1532   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1533     xml_failure(g_parser);
1534 }
1535 END_TEST
1536 
1537 /* Regression test for SF bug #1515266: missing check of stopped
1538    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1539 START_TEST(test_suspend_parser_between_char_data_calls) {
1540   /* The sample data must be big enough that there are two calls to
1541      the character data handler from within the inner "for" loop of
1542      the XML_TOK_DATA_CHARS case in doContent(), and the character
1543      handler must stop the parser and clear the character data
1544      handler.
1545   */
1546   const char *text = long_character_data_text;
1547 
1548   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1549   g_resumable = XML_TRUE;
1550   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1551   // we won't know exactly how much input we actually managed to give Expat.
1552   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1553       != XML_STATUS_SUSPENDED)
1554     xml_failure(g_parser);
1555   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1556     xml_failure(g_parser);
1557   /* Try parsing directly */
1558   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1559       != XML_STATUS_ERROR)
1560     fail("Attempt to continue parse while suspended not faulted");
1561   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1562     fail("Suspended parse not faulted with correct error");
1563 }
1564 END_TEST
1565 
1566 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1567 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1568   const char *text = long_character_data_text;
1569 
1570   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1571   g_resumable = XML_FALSE;
1572   g_abortable = XML_FALSE;
1573   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1574       != XML_STATUS_ERROR)
1575     fail("Failed to double-stop parser");
1576 
1577   XML_ParserReset(g_parser, NULL);
1578   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1579   g_resumable = XML_TRUE;
1580   g_abortable = XML_FALSE;
1581   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1582   // we won't know exactly how much input we actually managed to give Expat.
1583   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1584       != XML_STATUS_SUSPENDED)
1585     fail("Failed to double-suspend parser");
1586 
1587   XML_ParserReset(g_parser, NULL);
1588   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1589   g_resumable = XML_TRUE;
1590   g_abortable = XML_TRUE;
1591   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1592       != XML_STATUS_ERROR)
1593     fail("Failed to suspend-abort parser");
1594 }
1595 END_TEST
1596 
START_TEST(test_good_cdata_ascii)1597 START_TEST(test_good_cdata_ascii) {
1598   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1599   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1600 
1601   CharData storage;
1602   CharData_Init(&storage);
1603   XML_SetUserData(g_parser, &storage);
1604   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1605   /* Add start and end handlers for coverage */
1606   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1607   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1608 
1609   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1610       == XML_STATUS_ERROR)
1611     xml_failure(g_parser);
1612   CharData_CheckXMLChars(&storage, expected);
1613 
1614   /* Try again, this time with a default handler */
1615   XML_ParserReset(g_parser, NULL);
1616   CharData_Init(&storage);
1617   XML_SetUserData(g_parser, &storage);
1618   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1619   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1620 
1621   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1622       == XML_STATUS_ERROR)
1623     xml_failure(g_parser);
1624   CharData_CheckXMLChars(&storage, expected);
1625 }
1626 END_TEST
1627 
START_TEST(test_good_cdata_utf16)1628 START_TEST(test_good_cdata_utf16) {
1629   /* Test data is:
1630    *   <?xml version='1.0' encoding='utf-16'?>
1631    *   <a><![CDATA[hello]]></a>
1632    */
1633   const char text[]
1634       = "\0<\0?\0x\0m\0l\0"
1635         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1636         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1637         "1\0"
1638         "6\0'"
1639         "\0?\0>\0\n"
1640         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1641   const XML_Char *expected = XCS("hello");
1642 
1643   CharData storage;
1644   CharData_Init(&storage);
1645   XML_SetUserData(g_parser, &storage);
1646   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1647 
1648   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1649       == XML_STATUS_ERROR)
1650     xml_failure(g_parser);
1651   CharData_CheckXMLChars(&storage, expected);
1652 }
1653 END_TEST
1654 
START_TEST(test_good_cdata_utf16_le)1655 START_TEST(test_good_cdata_utf16_le) {
1656   /* Test data is:
1657    *   <?xml version='1.0' encoding='utf-16'?>
1658    *   <a><![CDATA[hello]]></a>
1659    */
1660   const char text[]
1661       = "<\0?\0x\0m\0l\0"
1662         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1663         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1664         "1\0"
1665         "6\0'"
1666         "\0?\0>\0\n"
1667         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1668   const XML_Char *expected = XCS("hello");
1669 
1670   CharData storage;
1671   CharData_Init(&storage);
1672   XML_SetUserData(g_parser, &storage);
1673   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1674 
1675   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1676       == XML_STATUS_ERROR)
1677     xml_failure(g_parser);
1678   CharData_CheckXMLChars(&storage, expected);
1679 }
1680 END_TEST
1681 
1682 /* Test UTF16 conversion of a long cdata string */
1683 
1684 /* 16 characters: handy macro to reduce visual clutter */
1685 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1686 
START_TEST(test_long_cdata_utf16)1687 START_TEST(test_long_cdata_utf16) {
1688   /* Test data is:
1689    * <?xlm version='1.0' encoding='utf-16'?>
1690    * <a><![CDATA[
1691    * ABCDEFGHIJKLMNOP
1692    * ]]></a>
1693    */
1694   const char text[]
1695       = "\0<\0?\0x\0m\0l\0 "
1696         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1697         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1698         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1699       /* 64 characters per line */
1700       /* clang-format off */
1701         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1702         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1703         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1704         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1705         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1706         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1707         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1708         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1709         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1710         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1711         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1712         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1713         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1714         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1715         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1716         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1717         A_TO_P_IN_UTF16
1718         /* clang-format on */
1719         "\0]\0]\0>\0<\0/\0a\0>";
1720   const XML_Char *expected =
1721       /* clang-format off */
1722         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1723         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1724         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1725         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1726         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1727         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1728         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1729         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1730         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1731         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1732         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1733         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1734         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1735         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1736         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1737         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1738         XCS("ABCDEFGHIJKLMNOP");
1739   /* clang-format on */
1740   CharData storage;
1741   void *buffer;
1742 
1743   CharData_Init(&storage);
1744   XML_SetUserData(g_parser, &storage);
1745   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1746   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1747   if (buffer == NULL)
1748     fail("Could not allocate parse buffer");
1749   assert(buffer != NULL);
1750   memcpy(buffer, text, sizeof(text) - 1);
1751   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1752     xml_failure(g_parser);
1753   CharData_CheckXMLChars(&storage, expected);
1754 }
1755 END_TEST
1756 
1757 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1758 START_TEST(test_multichar_cdata_utf16) {
1759   /* Test data is:
1760    *   <?xml version='1.0' encoding='utf-16'?>
1761    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1762    *
1763    * where {MINIM} is U+1d15e (a minim or half-note)
1764    *   UTF-16: 0xd834 0xdd5e
1765    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1766    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1767    *   UTF-16: 0xd834 0xdd5f
1768    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1769    */
1770   const char text[] = "\0<\0?\0x\0m\0l\0"
1771                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1772                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1773                       "1\0"
1774                       "6\0'"
1775                       "\0?\0>\0\n"
1776                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1777                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1778                       "\0]\0]\0>\0<\0/\0a\0>";
1779 #ifdef XML_UNICODE
1780   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1781 #else
1782   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1783 #endif
1784   CharData storage;
1785 
1786   CharData_Init(&storage);
1787   XML_SetUserData(g_parser, &storage);
1788   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1789 
1790   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1791       == XML_STATUS_ERROR)
1792     xml_failure(g_parser);
1793   CharData_CheckXMLChars(&storage, expected);
1794 }
1795 END_TEST
1796 
1797 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1798 START_TEST(test_utf16_bad_surrogate_pair) {
1799   /* Test data is:
1800    *   <?xml version='1.0' encoding='utf-16'?>
1801    *   <a><![CDATA[{BADLINB}]]></a>
1802    *
1803    * where {BADLINB} is U+10000 (the first Linear B character)
1804    * with the UTF-16 surrogate pair in the wrong order, i.e.
1805    *   0xdc00 0xd800
1806    */
1807   const char text[] = "\0<\0?\0x\0m\0l\0"
1808                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1809                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1810                       "1\0"
1811                       "6\0'"
1812                       "\0?\0>\0\n"
1813                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1814                       "\xdc\x00\xd8\x00"
1815                       "\0]\0]\0>\0<\0/\0a\0>";
1816 
1817   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1818       != XML_STATUS_ERROR)
1819     fail("Reversed UTF-16 surrogate pair not faulted");
1820   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1821     xml_failure(g_parser);
1822 }
1823 END_TEST
1824 
START_TEST(test_bad_cdata)1825 START_TEST(test_bad_cdata) {
1826   struct CaseData {
1827     const char *text;
1828     enum XML_Error expectedError;
1829   };
1830 
1831   struct CaseData cases[]
1832       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1833          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1834          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1835          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1836          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1837          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1838          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1839          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1840 
1841          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1842          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1843          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1844 
1845          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1846          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1847          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1848          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1849          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1850          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1851          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1852 
1853          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1854          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1855          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1856 
1857   size_t i = 0;
1858   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1859     set_subtest("%s", cases[i].text);
1860     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1861         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1862     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1863 
1864     assert(actualStatus == XML_STATUS_ERROR);
1865 
1866     if (actualError != cases[i].expectedError) {
1867       char message[100];
1868       snprintf(message, sizeof(message),
1869                "Expected error %d but got error %d for case %u: \"%s\"\n",
1870                cases[i].expectedError, actualError, (unsigned int)i + 1,
1871                cases[i].text);
1872       fail(message);
1873     }
1874 
1875     XML_ParserReset(g_parser, NULL);
1876   }
1877 }
1878 END_TEST
1879 
1880 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1881 START_TEST(test_bad_cdata_utf16) {
1882   struct CaseData {
1883     size_t text_bytes;
1884     const char *text;
1885     enum XML_Error expected_error;
1886   };
1887 
1888   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1889                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1890                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1891                         "1\0"
1892                         "6\0'"
1893                         "\0?\0>\0\n"
1894                         "\0<\0a\0>";
1895   struct CaseData cases[] = {
1896       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1897       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1898       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1899       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1900       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1901       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1902       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1903       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1904       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1905       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1906       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1907       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1908       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1909       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1910       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1911       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1912       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1913       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1914       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1915       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1916       /* Now add a four-byte UTF-16 character */
1917       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1918        XML_ERROR_UNCLOSED_CDATA_SECTION},
1919       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1920       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1921        XML_ERROR_PARTIAL_CHAR},
1922       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1923        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1924   size_t i;
1925 
1926   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1927     set_subtest("case %lu", (long unsigned)(i + 1));
1928     enum XML_Status actual_status;
1929     enum XML_Error actual_error;
1930 
1931     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1932                                 XML_FALSE)
1933         == XML_STATUS_ERROR)
1934       xml_failure(g_parser);
1935     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1936                                             (int)cases[i].text_bytes, XML_TRUE);
1937     assert(actual_status == XML_STATUS_ERROR);
1938     actual_error = XML_GetErrorCode(g_parser);
1939     if (actual_error != cases[i].expected_error) {
1940       char message[1024];
1941 
1942       snprintf(message, sizeof(message),
1943                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1944                ") for case %lu\n",
1945                cases[i].expected_error,
1946                XML_ErrorString(cases[i].expected_error), actual_error,
1947                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1948       fail(message);
1949     }
1950     XML_ParserReset(g_parser, NULL);
1951   }
1952 }
1953 END_TEST
1954 
1955 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1956 START_TEST(test_stop_parser_between_cdata_calls) {
1957   const char *text = long_cdata_text;
1958 
1959   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1960   g_resumable = XML_FALSE;
1961   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1962 }
1963 END_TEST
1964 
1965 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1966 START_TEST(test_suspend_parser_between_cdata_calls) {
1967   if (g_chunkSize != 0) {
1968     // this test does not use SINGLE_BYTES, because of suspension
1969     return;
1970   }
1971 
1972   const char *text = long_cdata_text;
1973   enum XML_Status result;
1974 
1975   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1976   g_resumable = XML_TRUE;
1977   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1978   // we won't know exactly how much input we actually managed to give Expat.
1979   result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1980   if (result != XML_STATUS_SUSPENDED) {
1981     if (result == XML_STATUS_ERROR)
1982       xml_failure(g_parser);
1983     fail("Parse not suspended in CDATA handler");
1984   }
1985   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1986     xml_failure(g_parser);
1987 }
1988 END_TEST
1989 
1990 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1991 START_TEST(test_memory_allocation) {
1992   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1993   char *p;
1994 
1995   if (buffer == NULL) {
1996     fail("Allocation failed");
1997   } else {
1998     /* Try writing to memory; some OSes try to cheat! */
1999     buffer[0] = 'T';
2000     buffer[1] = 'E';
2001     buffer[2] = 'S';
2002     buffer[3] = 'T';
2003     buffer[4] = '\0';
2004     if (strcmp(buffer, "TEST") != 0) {
2005       fail("Memory not writable");
2006     } else {
2007       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
2008       if (p == NULL) {
2009         fail("Reallocation failed");
2010       } else {
2011         /* Write again, just to be sure */
2012         buffer = p;
2013         buffer[0] = 'V';
2014         if (strcmp(buffer, "VEST") != 0) {
2015           fail("Reallocated memory not writable");
2016         }
2017       }
2018     }
2019     XML_MemFree(g_parser, buffer);
2020   }
2021 }
2022 END_TEST
2023 
2024 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)2025 START_TEST(test_default_current) {
2026   const char *text = "<doc>hell]</doc>";
2027   const char *entity_text = "<!DOCTYPE doc [\n"
2028                             "<!ENTITY entity '&#37;'>\n"
2029                             "]>\n"
2030                             "<doc>&entity;</doc>";
2031 
2032   set_subtest("with defaulting");
2033   {
2034     struct handler_record_list storage;
2035     storage.count = 0;
2036     XML_SetDefaultHandler(g_parser, record_default_handler);
2037     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2038     XML_SetUserData(g_parser, &storage);
2039     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2040         == XML_STATUS_ERROR)
2041       xml_failure(g_parser);
2042     int i = 0;
2043     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2044     // we should have gotten one or more cdata callbacks, totaling 5 chars
2045     int cdata_len_remaining = 5;
2046     while (cdata_len_remaining > 0) {
2047       const struct handler_record_entry *c_entry
2048           = handler_record_get(&storage, i++);
2049       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2050       assert_true(c_entry->arg > 0);
2051       assert_true(c_entry->arg <= cdata_len_remaining);
2052       cdata_len_remaining -= c_entry->arg;
2053       // default handler must follow, with the exact same len argument.
2054       assert_record_handler_called(&storage, i++, "record_default_handler",
2055                                    c_entry->arg);
2056     }
2057     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2058     assert_true(storage.count == i);
2059   }
2060 
2061   /* Again, without the defaulting */
2062   set_subtest("no defaulting");
2063   {
2064     struct handler_record_list storage;
2065     storage.count = 0;
2066     XML_ParserReset(g_parser, NULL);
2067     XML_SetDefaultHandler(g_parser, record_default_handler);
2068     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2069     XML_SetUserData(g_parser, &storage);
2070     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2071         == XML_STATUS_ERROR)
2072       xml_failure(g_parser);
2073     int i = 0;
2074     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2075     // we should have gotten one or more cdata callbacks, totaling 5 chars
2076     int cdata_len_remaining = 5;
2077     while (cdata_len_remaining > 0) {
2078       const struct handler_record_entry *c_entry
2079           = handler_record_get(&storage, i++);
2080       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2081       assert_true(c_entry->arg > 0);
2082       assert_true(c_entry->arg <= cdata_len_remaining);
2083       cdata_len_remaining -= c_entry->arg;
2084     }
2085     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2086     assert_true(storage.count == i);
2087   }
2088 
2089   /* Now with an internal entity to complicate matters */
2090   set_subtest("with internal entity");
2091   {
2092     struct handler_record_list storage;
2093     storage.count = 0;
2094     XML_ParserReset(g_parser, NULL);
2095     XML_SetDefaultHandler(g_parser, record_default_handler);
2096     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2097     XML_SetUserData(g_parser, &storage);
2098     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2099                                 XML_TRUE)
2100         == XML_STATUS_ERROR)
2101       xml_failure(g_parser);
2102     /* The default handler suppresses the entity */
2103     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2104     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2105     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2106     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2107     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2108     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2109     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2110     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2111     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2112     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2113     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2114     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2115     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2116     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2117     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2118     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2119     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2120     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2121     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2122     assert_true(storage.count == 19);
2123   }
2124 
2125   /* Again, with a skip handler */
2126   set_subtest("with skip handler");
2127   {
2128     struct handler_record_list storage;
2129     storage.count = 0;
2130     XML_ParserReset(g_parser, NULL);
2131     XML_SetDefaultHandler(g_parser, record_default_handler);
2132     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2133     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2134     XML_SetUserData(g_parser, &storage);
2135     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2136                                 XML_TRUE)
2137         == XML_STATUS_ERROR)
2138       xml_failure(g_parser);
2139     /* The default handler suppresses the entity */
2140     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2141     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2142     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2143     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2144     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2145     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2146     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2147     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2148     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2149     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2150     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2151     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2152     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2153     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2154     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2155     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2156     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2157     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2158     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2159     assert_true(storage.count == 19);
2160   }
2161 
2162   /* This time, allow the entity through */
2163   set_subtest("allow entity");
2164   {
2165     struct handler_record_list storage;
2166     storage.count = 0;
2167     XML_ParserReset(g_parser, NULL);
2168     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2169     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2170     XML_SetUserData(g_parser, &storage);
2171     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2172                                 XML_TRUE)
2173         == XML_STATUS_ERROR)
2174       xml_failure(g_parser);
2175     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2176     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2177     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2178     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2179     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2180     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2181     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2182     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2183     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2184     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2185     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2186     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2187     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2188     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2189     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2190     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2191     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2192     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2193     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2194     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2195     assert_true(storage.count == 20);
2196   }
2197 
2198   /* Finally, without passing the cdata to the default handler */
2199   set_subtest("not passing cdata");
2200   {
2201     struct handler_record_list storage;
2202     storage.count = 0;
2203     XML_ParserReset(g_parser, NULL);
2204     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2205     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2206     XML_SetUserData(g_parser, &storage);
2207     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2208                                 XML_TRUE)
2209         == XML_STATUS_ERROR)
2210       xml_failure(g_parser);
2211     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2212     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2213     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2214     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2215     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2216     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2217     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2218     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2219     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2220     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2221     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2222     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2223     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2224     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2225     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2226     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2227     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2228     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2229                                  1);
2230     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2231     assert_true(storage.count == 19);
2232   }
2233 }
2234 END_TEST
2235 
2236 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2237 START_TEST(test_dtd_elements) {
2238   const char *text = "<!DOCTYPE doc [\n"
2239                      "<!ELEMENT doc (chapter)>\n"
2240                      "<!ELEMENT chapter (#PCDATA)>\n"
2241                      "]>\n"
2242                      "<doc><chapter>Wombats are go</chapter></doc>";
2243 
2244   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2245   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2246       == XML_STATUS_ERROR)
2247     xml_failure(g_parser);
2248 }
2249 END_TEST
2250 
2251 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2252 element_decl_check_model(void *userData, const XML_Char *name,
2253                          XML_Content *model) {
2254   UNUSED_P(userData);
2255   uint32_t errorFlags = 0;
2256 
2257   /* Expected model array structure is this:
2258    * [0] (type 6, quant 0)
2259    *   [1] (type 5, quant 0)
2260    *     [3] (type 4, quant 0, name "bar")
2261    *     [4] (type 4, quant 0, name "foo")
2262    *     [5] (type 4, quant 3, name "xyz")
2263    *   [2] (type 4, quant 2, name "zebra")
2264    */
2265   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2266   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2267 
2268   if (model != NULL) {
2269     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2270     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2271     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2272     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2273     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2274 
2275     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2276     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2277     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2278     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2279     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2280 
2281     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2282     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2283     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2284     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2285     errorFlags
2286         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2287 
2288     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2289     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2290     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2291     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2292     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2293 
2294     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2295     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2296     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2297     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2298     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2299 
2300     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2301     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2302     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2303     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2304     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2305   }
2306 
2307   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2308   XML_FreeContentModel(g_parser, model);
2309 }
2310 
START_TEST(test_dtd_elements_nesting)2311 START_TEST(test_dtd_elements_nesting) {
2312   // Payload inspired by a test in Perl's XML::Parser
2313   const char *text = "<!DOCTYPE foo [\n"
2314                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2315                      "]>\n"
2316                      "<foo/>";
2317 
2318   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2319 
2320   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2321   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2322       == XML_STATUS_ERROR)
2323     xml_failure(g_parser);
2324 
2325   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2326     fail("Element declaration model regression detected");
2327 }
2328 END_TEST
2329 
2330 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2331 START_TEST(test_set_foreign_dtd) {
2332   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2333   const char *text2 = "<doc>&entity;</doc>";
2334   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2335 
2336   /* Check hash salt is passed through too */
2337   XML_SetHashSalt(g_parser, 0x12345678);
2338   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2339   XML_SetUserData(g_parser, &test_data);
2340   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2341   /* Add a default handler to exercise more code paths */
2342   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2343   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2344     fail("Could not set foreign DTD");
2345   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2346       == XML_STATUS_ERROR)
2347     xml_failure(g_parser);
2348 
2349   /* Ensure that trying to set the DTD after parsing has started
2350    * is faulted, even if it's the same setting.
2351    */
2352   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2353       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2354     fail("Failed to reject late foreign DTD setting");
2355   /* Ditto for the hash salt */
2356   if (XML_SetHashSalt(g_parser, 0x23456789))
2357     fail("Failed to reject late hash salt change");
2358 
2359   /* Now finish the parse */
2360   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2361       == XML_STATUS_ERROR)
2362     xml_failure(g_parser);
2363 }
2364 END_TEST
2365 
2366 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2367 START_TEST(test_foreign_dtd_not_standalone) {
2368   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2369                      "<doc>&entity;</doc>";
2370   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2371 
2372   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2373   XML_SetUserData(g_parser, &test_data);
2374   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2375   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2376   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2377     fail("Could not set foreign DTD");
2378   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2379                  "NotStandalonehandler failed to reject");
2380 }
2381 END_TEST
2382 
2383 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2384 START_TEST(test_invalid_foreign_dtd) {
2385   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2386                      "<doc>&entity;</doc>";
2387   ExtFaults test_data
2388       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2389 
2390   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2391   XML_SetUserData(g_parser, &test_data);
2392   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2393   XML_UseForeignDTD(g_parser, XML_TRUE);
2394   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2395                  "Bad DTD should not have been accepted");
2396 }
2397 END_TEST
2398 
2399 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2400 START_TEST(test_foreign_dtd_with_doctype) {
2401   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2402                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2403   const char *text2 = "<doc>&entity;</doc>";
2404   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2405 
2406   /* Check hash salt is passed through too */
2407   XML_SetHashSalt(g_parser, 0x12345678);
2408   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2409   XML_SetUserData(g_parser, &test_data);
2410   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2411   /* Add a default handler to exercise more code paths */
2412   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2413   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2414     fail("Could not set foreign DTD");
2415   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2416       == XML_STATUS_ERROR)
2417     xml_failure(g_parser);
2418 
2419   /* Ensure that trying to set the DTD after parsing has started
2420    * is faulted, even if it's the same setting.
2421    */
2422   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2423       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2424     fail("Failed to reject late foreign DTD setting");
2425   /* Ditto for the hash salt */
2426   if (XML_SetHashSalt(g_parser, 0x23456789))
2427     fail("Failed to reject late hash salt change");
2428 
2429   /* Now finish the parse */
2430   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2431       == XML_STATUS_ERROR)
2432     xml_failure(g_parser);
2433 }
2434 END_TEST
2435 
2436 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2437 START_TEST(test_foreign_dtd_without_external_subset) {
2438   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2439                      "<doc>&foo;</doc>";
2440 
2441   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2442   XML_SetUserData(g_parser, NULL);
2443   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2444   XML_UseForeignDTD(g_parser, XML_TRUE);
2445   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2446       == XML_STATUS_ERROR)
2447     xml_failure(g_parser);
2448 }
2449 END_TEST
2450 
START_TEST(test_empty_foreign_dtd)2451 START_TEST(test_empty_foreign_dtd) {
2452   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2453                      "<doc>&entity;</doc>";
2454 
2455   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2456   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2457   XML_UseForeignDTD(g_parser, XML_TRUE);
2458   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2459                  "Undefined entity not faulted");
2460 }
2461 END_TEST
2462 
2463 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2464 START_TEST(test_set_base) {
2465   const XML_Char *old_base;
2466   const XML_Char *new_base = XCS("/local/file/name.xml");
2467 
2468   old_base = XML_GetBase(g_parser);
2469   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2470     fail("Unable to set base");
2471   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2472     fail("Base setting not correct");
2473   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2474     fail("Unable to NULL base");
2475   if (XML_GetBase(g_parser) != NULL)
2476     fail("Base setting not nulled");
2477   XML_SetBase(g_parser, old_base);
2478 }
2479 END_TEST
2480 
2481 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2482 START_TEST(test_attributes) {
2483   const char *text = "<!DOCTYPE doc [\n"
2484                      "<!ELEMENT doc (tag)>\n"
2485                      "<!ATTLIST doc id ID #REQUIRED>\n"
2486                      "]>"
2487                      "<doc a='1' id='one' b='2'>"
2488                      "<tag c='3'/>"
2489                      "</doc>";
2490   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2491                          {XCS("b"), XCS("2")},
2492                          {XCS("id"), XCS("one")},
2493                          {NULL, NULL}};
2494   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2495   ElementInfo info[] = {{XCS("doc"), 3, 0, XCS("id"), doc_info},
2496                         {XCS("tag"), 1, 0, NULL, tag_info},
2497                         {NULL, 0, 0, NULL, NULL}};
2498 
2499   XML_Parser parser = XML_ParserCreate(NULL);
2500   assert_true(parser != NULL);
2501   ParserAndElementInfo parserAndElementInfos = {
2502       parser,
2503       info,
2504   };
2505 
2506   XML_SetStartElementHandler(parser, counting_start_element_handler);
2507   XML_SetUserData(parser, &parserAndElementInfos);
2508   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2509       == XML_STATUS_ERROR)
2510     xml_failure(parser);
2511 
2512   XML_ParserFree(parser);
2513 }
2514 END_TEST
2515 
START_TEST(test_duplicate_cdata_attribute)2516 START_TEST(test_duplicate_cdata_attribute) {
2517   /*
2518   https://www.w3.org/TR/xml/#attdecls
2519 
2520   Test the following statement from the linked specification:
2521     When more than one definition is provided for the same attribute of a given
2522     element type, the first declaration is binding and later declarations are
2523     ignored.
2524   */
2525 
2526   const char *text
2527       = "<!DOCTYPE doc [\n"
2528         "  <!ATTLIST doc attribute CDATA 'expected' attribute CDATA 'ignored'>\n"
2529         "]>\n"
2530         "<doc/>\n";
2531   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2532   ElementInfo info[]
2533       = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2534 
2535   XML_Parser parser = XML_ParserCreate(NULL);
2536   assert_true(parser != NULL);
2537 
2538   ParserAndElementInfo parserAndElementInfos = {
2539       parser,
2540       info,
2541   };
2542 
2543   XML_SetStartElementHandler(parser, counting_start_element_handler);
2544   XML_SetUserData(parser, &parserAndElementInfos);
2545 
2546   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2547       != XML_STATUS_OK)
2548     xml_failure(parser);
2549 
2550   XML_ParserFree(parser);
2551 }
2552 END_TEST
2553 
START_TEST(test_duplicate_id_attribute_1)2554 START_TEST(test_duplicate_id_attribute_1) {
2555   /*
2556   https://www.w3.org/TR/xml/#attdecls
2557 
2558   Test the following statement from the linked specification:
2559     When more than one definition is provided for the same attribute of a given
2560     element type, the first declaration is binding and later declarations are
2561     ignored.
2562   */
2563 
2564   const char *text
2565       = "<!DOCTYPE doc [\n"
2566         "  <!ATTLIST doc identifier CDATA 'expected' identifier ID #REQUIRED>\n"
2567         "]>\n"
2568         "<doc/>\n";
2569   AttrInfo doc_info[] = {{XCS("identifier"), XCS("expected")}, {NULL, NULL}};
2570   ElementInfo info[]
2571       = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2572 
2573   XML_Parser parser = XML_ParserCreate(NULL);
2574   assert_true(parser != NULL);
2575 
2576   ParserAndElementInfo parserAndElementInfos = {
2577       parser,
2578       info,
2579   };
2580 
2581   XML_SetStartElementHandler(parser, counting_start_element_handler);
2582   XML_SetUserData(parser, &parserAndElementInfos);
2583 
2584   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2585       != XML_STATUS_OK)
2586     xml_failure(parser);
2587 
2588   XML_ParserFree(parser);
2589 }
2590 END_TEST
2591 
START_TEST(test_duplicate_id_attribute_2)2592 START_TEST(test_duplicate_id_attribute_2) {
2593   /*
2594   https://www.w3.org/TR/xml/#attdecls
2595 
2596   Test the following statement from the linked specification:
2597     When more than one definition is provided for the same attribute of a given
2598     element type, the first declaration is binding and later declarations are
2599     ignored.
2600   */
2601 
2602   const char *text
2603       = "<!DOCTYPE doc [\n"
2604         "  <!ATTLIST doc identifier ID #REQUIRED identifier CDATA 'unexpected'>\n"
2605         "]>\n"
2606         "<doc/>\n";
2607   AttrInfo doc_info[] = {{NULL, NULL}};
2608 
2609   ElementInfo info[]
2610       = {{XCS("doc"), 0, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2611 
2612   XML_Parser parser = XML_ParserCreate(NULL);
2613   assert_true(parser != NULL);
2614 
2615   ParserAndElementInfo parserAndElementInfos = {
2616       parser,
2617       info,
2618   };
2619 
2620   XML_SetStartElementHandler(parser, counting_start_element_handler);
2621   XML_SetUserData(parser, &parserAndElementInfos);
2622 
2623   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2624       != XML_STATUS_OK)
2625     xml_failure(parser);
2626 
2627   XML_ParserFree(parser);
2628 }
2629 END_TEST
2630 
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl)2631 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl) {
2632   /*
2633   https://www.w3.org/TR/xml/#attdecls
2634 
2635   Test the following statement from the linked specification:
2636     When more than one AttlistDecl is provided for a given element type,
2637     the contents of all those provided are merged.
2638   */
2639   const char *text = "<!DOCTYPE doc [\n"
2640                      "  <!ATTLIST doc attribute CDATA 'expected'>\n"
2641                      "  <!ATTLIST doc attribute CDATA 'ignored'>\n"
2642                      "]>\n"
2643                      "<doc/>\n";
2644   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected")}, {NULL, NULL}};
2645   ElementInfo info[]
2646       = {{XCS("doc"), 0, 1, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
2647 
2648   XML_Parser parser = XML_ParserCreate(NULL);
2649   assert_true(parser != NULL);
2650 
2651   ParserAndElementInfo parserAndElementInfos = {
2652       parser,
2653       info,
2654   };
2655 
2656   XML_SetStartElementHandler(parser, counting_start_element_handler);
2657   XML_SetUserData(parser, &parserAndElementInfos);
2658 
2659   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2660       != XML_STATUS_OK)
2661     xml_failure(parser);
2662 
2663   XML_ParserFree(parser);
2664 }
2665 END_TEST
2666 
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2)2667 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_2) {
2668   /*
2669   https://www.w3.org/TR/xml/#attdecls
2670 
2671   Test the following statement from the linked specification:
2672     When more than one AttlistDecl is provided for a given element type,
2673     the contents of all those provided are merged.
2674   */
2675   const char *text = "<!DOCTYPE doc [\n"
2676                      "  <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2677                      "  <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2678                      "  <!ATTLIST doc attribute CDATA 'ignored_doc'>\n"
2679                      "]>\n"
2680                      "<doc><tag></tag></doc>\n";
2681   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")}, {NULL, NULL}};
2682   AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2683   ElementInfo info[] = {{XCS("doc"), 0, 1, NULL, doc_info},
2684                         {XCS("tag"), 0, 1, NULL, tag_info},
2685                         {NULL, 0, 0, NULL, NULL}};
2686 
2687   XML_Parser parser = XML_ParserCreate(NULL);
2688   assert_true(parser != NULL);
2689 
2690   ParserAndElementInfo parserAndElementInfos = {
2691       parser,
2692       info,
2693   };
2694 
2695   XML_SetStartElementHandler(parser, counting_start_element_handler);
2696   XML_SetUserData(parser, &parserAndElementInfos);
2697 
2698   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2699       != XML_STATUS_OK)
2700     xml_failure(parser);
2701 
2702   XML_ParserFree(parser);
2703 }
2704 END_TEST
2705 
START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3)2706 START_TEST(test_duplicate_cdata_attribute_multiple_attlistdecl_3) {
2707   /*
2708   https://www.w3.org/TR/xml/#attdecls
2709 
2710   Test the following statement from the linked specification:
2711     When more than one AttlistDecl is provided for a given element type,
2712     the contents of all those provided are merged.
2713   */
2714   const char *text
2715       = "<!DOCTYPE doc [\n"
2716         "  <!ATTLIST doc attribute CDATA 'expected_doc'>\n"
2717         "  <!ATTLIST tag attribute CDATA 'expected_tag'>\n"
2718         "  <!ATTLIST doc second_attribute CDATA 'second_expected_doc' attribute CDATA 'ignored_doc'>\n"
2719         "]>\n"
2720         "<doc><tag></tag></doc>\n";
2721   AttrInfo doc_info[] = {{XCS("attribute"), XCS("expected_doc")},
2722                          {XCS("second_attribute"), XCS("second_expected_doc")},
2723                          {NULL, NULL}};
2724   AttrInfo tag_info[] = {{XCS("attribute"), XCS("expected_tag")}, {NULL, NULL}};
2725   ElementInfo info[] = {{XCS("doc"), 0, 2, NULL, doc_info},
2726                         {XCS("tag"), 0, 1, NULL, tag_info},
2727                         {NULL, 0, 0, NULL, NULL}};
2728 
2729   XML_Parser parser = XML_ParserCreate(NULL);
2730   assert_true(parser != NULL);
2731 
2732   ParserAndElementInfo parserAndElementInfos = {
2733       parser,
2734       info,
2735   };
2736 
2737   XML_SetStartElementHandler(parser, counting_start_element_handler);
2738   XML_SetUserData(parser, &parserAndElementInfos);
2739 
2740   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2741       != XML_STATUS_OK)
2742     xml_failure(parser);
2743 
2744   XML_ParserFree(parser);
2745 }
2746 END_TEST
2747 
START_TEST(test_duplicate_id_attribute_multiple_attlistdecl)2748 START_TEST(test_duplicate_id_attribute_multiple_attlistdecl) {
2749   /*
2750   https://www.w3.org/TR/xml/#attdecls
2751 
2752   Test the following statement from the linked specification:
2753     When more than one AttlistDecl is provided for a given element type,
2754     the contents of all those provided are merged.
2755   */
2756   const char *text = "<!DOCTYPE doc [\n"
2757                      "  <!ATTLIST doc identifier ID #REQUIRED>\n"
2758                      "  <!ATTLIST tag identifier CDATA 'identifier_tag'>\n"
2759                      "  <!ATTLIST doc identifier CDATA 'ignored'>\n"
2760                      "]>\n"
2761                      "<doc identifier='doc_identity'><tag></tag></doc>\n";
2762   AttrInfo doc_info[]
2763       = {{XCS("identifier"), XCS("doc_identity")}, {NULL, NULL}};
2764   AttrInfo tag_info[]
2765       = {{XCS("identifier"), XCS("identifier_tag")}, {NULL, NULL}};
2766   ElementInfo info[] = {{XCS("doc"), 1, 0, XCS("identifier"), doc_info},
2767                         {XCS("tag"), 0, 1, NULL, tag_info},
2768                         {NULL, 0, 0, NULL, NULL}};
2769 
2770   XML_Parser parser = XML_ParserCreate(NULL);
2771   assert_true(parser != NULL);
2772 
2773   ParserAndElementInfo parserAndElementInfos = {
2774       parser,
2775       info,
2776   };
2777 
2778   XML_SetStartElementHandler(parser, counting_start_element_handler);
2779   XML_SetUserData(parser, &parserAndElementInfos);
2780 
2781   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2782       != XML_STATUS_OK)
2783     xml_failure(parser);
2784 
2785   XML_ParserFree(parser);
2786 }
2787 END_TEST
2788 
2789 /* Test reset works correctly in the middle of processing an internal
2790  * entity.  Exercises some obscure code in XML_ParserReset().
2791  */
START_TEST(test_reset_in_entity)2792 START_TEST(test_reset_in_entity) {
2793   if (g_chunkSize != 0) {
2794     // this test does not use SINGLE_BYTES, because of suspension
2795     return;
2796   }
2797 
2798   const char *text = "<!DOCTYPE doc [\n"
2799                      "<!ENTITY wombat 'wom'>\n"
2800                      "<!ENTITY entity 'hi &wom; there'>\n"
2801                      "]>\n"
2802                      "<doc>&entity;</doc>";
2803   XML_ParsingStatus status;
2804 
2805   g_resumable = XML_TRUE;
2806   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2807   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2808   // we won't know exactly how much input we actually managed to give Expat.
2809   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2810       == XML_STATUS_ERROR)
2811     xml_failure(g_parser);
2812   XML_GetParsingStatus(g_parser, &status);
2813   if (status.parsing != XML_SUSPENDED)
2814     fail("Parsing status not SUSPENDED");
2815   XML_ParserReset(g_parser, NULL);
2816   XML_GetParsingStatus(g_parser, &status);
2817   if (status.parsing != XML_INITIALIZED)
2818     fail("Parsing status doesn't reset to INITIALIZED");
2819 }
2820 END_TEST
2821 
2822 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2823 START_TEST(test_resume_invalid_parse) {
2824   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2825 
2826   g_resumable = XML_TRUE;
2827   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2828   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2829       == XML_STATUS_ERROR)
2830     xml_failure(g_parser);
2831   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2832     fail("Resumed invalid parse not faulted");
2833   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2834     fail("Invalid parse not correctly faulted");
2835 }
2836 END_TEST
2837 
2838 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2839 START_TEST(test_resume_resuspended) {
2840   const char *text = "<doc>Hello<meep/>world</doc>";
2841 
2842   g_resumable = XML_TRUE;
2843   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2844   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2845       == XML_STATUS_ERROR)
2846     xml_failure(g_parser);
2847   g_resumable = XML_TRUE;
2848   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2849   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2850     fail("Resumption not suspended");
2851   /* This one should succeed and finish up */
2852   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2853     xml_failure(g_parser);
2854 }
2855 END_TEST
2856 
2857 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2858 START_TEST(test_cdata_default) {
2859   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2860   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2861   CharData storage;
2862 
2863   CharData_Init(&storage);
2864   XML_SetUserData(g_parser, &storage);
2865   XML_SetDefaultHandler(g_parser, accumulate_characters);
2866 
2867   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2868       == XML_STATUS_ERROR)
2869     xml_failure(g_parser);
2870   CharData_CheckXMLChars(&storage, expected);
2871 }
2872 END_TEST
2873 
2874 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2875 START_TEST(test_subordinate_reset) {
2876   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2877                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2878                      "<doc>&entity;</doc>";
2879 
2880   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2881   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2882   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2883       == XML_STATUS_ERROR)
2884     xml_failure(g_parser);
2885 }
2886 END_TEST
2887 
2888 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2889 START_TEST(test_subordinate_suspend) {
2890   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2891                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2892                      "<doc>&entity;</doc>";
2893 
2894   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2895   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2896   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2897       == XML_STATUS_ERROR)
2898     xml_failure(g_parser);
2899 }
2900 END_TEST
2901 
2902 /* Test suspending a subordinate parser from an XML declaration */
2903 /* Increases code coverage of the tests */
2904 
START_TEST(test_subordinate_xdecl_suspend)2905 START_TEST(test_subordinate_xdecl_suspend) {
2906   const char *text
2907       = "<!DOCTYPE doc [\n"
2908         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2909         "]>\n"
2910         "<doc>&entity;</doc>";
2911 
2912   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2913   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2914   g_resumable = XML_TRUE;
2915   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2916       == XML_STATUS_ERROR)
2917     xml_failure(g_parser);
2918 }
2919 END_TEST
2920 
START_TEST(test_subordinate_xdecl_abort)2921 START_TEST(test_subordinate_xdecl_abort) {
2922   const char *text
2923       = "<!DOCTYPE doc [\n"
2924         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2925         "]>\n"
2926         "<doc>&entity;</doc>";
2927 
2928   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2929   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2930   g_resumable = XML_FALSE;
2931   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2932       == XML_STATUS_ERROR)
2933     xml_failure(g_parser);
2934 }
2935 END_TEST
2936 
2937 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2938 START_TEST(test_ext_entity_invalid_suspended_parse) {
2939   const char *text = "<!DOCTYPE doc [\n"
2940                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2941                      "]>\n"
2942                      "<doc>&en;</doc>";
2943   ExtFaults faults[]
2944       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2945           "Incomplete element declaration not faulted", NULL,
2946           XML_ERROR_UNCLOSED_TOKEN},
2947          {/* First two bytes of a three-byte char */
2948           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2949           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2950          {NULL, NULL, NULL, XML_ERROR_NONE}};
2951   ExtFaults *fault;
2952 
2953   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2954     set_subtest("%s", fault->parse_text);
2955     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2956     XML_SetExternalEntityRefHandler(g_parser,
2957                                     external_entity_suspending_faulter);
2958     XML_SetUserData(g_parser, fault);
2959     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2960                    "Parser did not report external entity error");
2961     XML_ParserReset(g_parser, NULL);
2962   }
2963 }
2964 END_TEST
2965 
2966 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2967 START_TEST(test_explicit_encoding) {
2968   const char *text1 = "<doc>Hello ";
2969   const char *text2 = " World</doc>";
2970 
2971   /* Just check that we can set the encoding to NULL before starting */
2972   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2973     fail("Failed to initialise encoding to NULL");
2974   /* Say we are UTF-8 */
2975   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2976     fail("Failed to set explicit encoding");
2977   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2978       == XML_STATUS_ERROR)
2979     xml_failure(g_parser);
2980   /* Try to switch encodings mid-parse */
2981   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2982     fail("Allowed encoding change");
2983   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2984       == XML_STATUS_ERROR)
2985     xml_failure(g_parser);
2986   /* Try now the parse is over */
2987   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2988     fail("Failed to unset encoding");
2989 }
2990 END_TEST
2991 
2992 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2993 START_TEST(test_trailing_cr) {
2994   const char *text = "<doc>\r";
2995   int found_cr;
2996 
2997   /* Try with a character handler, for code coverage */
2998   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2999   XML_SetUserData(g_parser, &found_cr);
3000   found_cr = 0;
3001   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3002       == XML_STATUS_OK)
3003     fail("Failed to fault unclosed doc");
3004   if (found_cr == 0)
3005     fail("Did not catch the carriage return");
3006   XML_ParserReset(g_parser, NULL);
3007 
3008   /* Now with a default handler instead */
3009   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
3010   XML_SetUserData(g_parser, &found_cr);
3011   found_cr = 0;
3012   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3013       == XML_STATUS_OK)
3014     fail("Failed to fault unclosed doc");
3015   if (found_cr == 0)
3016     fail("Did not catch default carriage return");
3017 }
3018 END_TEST
3019 
3020 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)3021 START_TEST(test_ext_entity_trailing_cr) {
3022   const char *text = "<!DOCTYPE doc [\n"
3023                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3024                      "]>\n"
3025                      "<doc>&en;</doc>";
3026   int found_cr;
3027 
3028   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3029   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
3030   XML_SetUserData(g_parser, &found_cr);
3031   found_cr = 0;
3032   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3033       != XML_STATUS_OK)
3034     xml_failure(g_parser);
3035   if (found_cr == 0)
3036     fail("No carriage return found");
3037   XML_ParserReset(g_parser, NULL);
3038 
3039   /* Try again with a different trailing CR */
3040   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3041   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
3042   XML_SetUserData(g_parser, &found_cr);
3043   found_cr = 0;
3044   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3045       != XML_STATUS_OK)
3046     xml_failure(g_parser);
3047   if (found_cr == 0)
3048     fail("No carriage return found");
3049 }
3050 END_TEST
3051 
3052 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)3053 START_TEST(test_trailing_rsqb) {
3054   const char *text8 = "<doc>]";
3055   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
3056   int found_rsqb;
3057   int text8_len = (int)strlen(text8);
3058 
3059   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3060   XML_SetUserData(g_parser, &found_rsqb);
3061   found_rsqb = 0;
3062   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
3063       == XML_STATUS_OK)
3064     fail("Failed to fault unclosed doc");
3065   if (found_rsqb == 0)
3066     fail("Did not catch the right square bracket");
3067 
3068   /* Try again with a different encoding */
3069   XML_ParserReset(g_parser, NULL);
3070   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
3071   XML_SetUserData(g_parser, &found_rsqb);
3072   found_rsqb = 0;
3073   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3074                               XML_TRUE)
3075       == XML_STATUS_OK)
3076     fail("Failed to fault unclosed doc");
3077   if (found_rsqb == 0)
3078     fail("Did not catch the right square bracket");
3079 
3080   /* And finally with a default handler */
3081   XML_ParserReset(g_parser, NULL);
3082   XML_SetDefaultHandler(g_parser, rsqb_handler);
3083   XML_SetUserData(g_parser, &found_rsqb);
3084   found_rsqb = 0;
3085   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
3086                               XML_TRUE)
3087       == XML_STATUS_OK)
3088     fail("Failed to fault unclosed doc");
3089   if (found_rsqb == 0)
3090     fail("Did not catch the right square bracket");
3091 }
3092 END_TEST
3093 
3094 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)3095 START_TEST(test_ext_entity_trailing_rsqb) {
3096   const char *text = "<!DOCTYPE doc [\n"
3097                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3098                      "]>\n"
3099                      "<doc>&en;</doc>";
3100   int found_rsqb;
3101 
3102   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3103   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
3104   XML_SetUserData(g_parser, &found_rsqb);
3105   found_rsqb = 0;
3106   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3107       != XML_STATUS_OK)
3108     xml_failure(g_parser);
3109   if (found_rsqb == 0)
3110     fail("No right square bracket found");
3111 }
3112 END_TEST
3113 
3114 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)3115 START_TEST(test_ext_entity_good_cdata) {
3116   const char *text = "<!DOCTYPE doc [\n"
3117                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
3118                      "]>\n"
3119                      "<doc>&en;</doc>";
3120 
3121   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3122   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
3123   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3124       != XML_STATUS_OK)
3125     xml_failure(g_parser);
3126 }
3127 END_TEST
3128 
3129 /* Test user parameter settings */
START_TEST(test_user_parameters)3130 START_TEST(test_user_parameters) {
3131   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3132                      "<!-- Primary parse -->\n"
3133                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
3134                      "<doc>&entity;";
3135   const char *epilog = "<!-- Back to primary parser -->\n"
3136                        "</doc>";
3137 
3138   g_comment_count = 0;
3139   g_skip_count = 0;
3140   g_xdecl_count = 0;
3141   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3142   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
3143   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
3144   XML_SetCommentHandler(g_parser, data_check_comment_handler);
3145   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
3146   XML_UseParserAsHandlerArg(g_parser);
3147   XML_SetUserData(g_parser, (void *)1);
3148   g_handler_data = g_parser;
3149   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3150       == XML_STATUS_ERROR)
3151     xml_failure(g_parser);
3152   /* Ensure we can't change policy mid-parse */
3153   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
3154     fail("Changed param entity parsing policy while parsing");
3155   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
3156       == XML_STATUS_ERROR)
3157     xml_failure(g_parser);
3158   if (g_comment_count != 3)
3159     fail("Comment handler not invoked enough times");
3160   if (g_skip_count != 1)
3161     fail("Skip handler not invoked enough times");
3162   if (g_xdecl_count != 1)
3163     fail("XML declaration handler not invoked");
3164 }
3165 END_TEST
3166 
3167 /* Test that an explicit external entity handler argument replaces
3168  * the parser as the first argument.
3169  *
3170  * We do not call the first parameter to the external entity handler
3171  * 'parser' for once, since the first time the handler is called it
3172  * will actually be a text string.  We need to be able to access the
3173  * global 'parser' variable to create our external entity parser from,
3174  * since there are code paths we need to ensure get executed.
3175  */
START_TEST(test_ext_entity_ref_parameter)3176 START_TEST(test_ext_entity_ref_parameter) {
3177   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
3178                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
3179                      "<doc>&entity;</doc>";
3180 
3181   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3182   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3183   /* Set a handler arg that is not NULL and not parser (which is
3184    * what NULL would cause to be passed.
3185    */
3186   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
3187   g_handler_data = text;
3188   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3189       == XML_STATUS_ERROR)
3190     xml_failure(g_parser);
3191 
3192   /* Now try again with unset args */
3193   XML_ParserReset(g_parser, NULL);
3194   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3195   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
3196   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
3197   g_handler_data = g_parser;
3198   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3199       == XML_STATUS_ERROR)
3200     xml_failure(g_parser);
3201 }
3202 END_TEST
3203 
3204 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)3205 START_TEST(test_empty_parse) {
3206   const char *text = "<doc></doc>";
3207   const char *partial = "<doc>";
3208 
3209   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
3210     fail("Parsing empty string faulted");
3211   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3212     fail("Parsing final empty string not faulted");
3213   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
3214     fail("Parsing final empty string faulted for wrong reason");
3215 
3216   /* Now try with valid text before the empty end */
3217   XML_ParserReset(g_parser, NULL);
3218   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3219       == XML_STATUS_ERROR)
3220     xml_failure(g_parser);
3221   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
3222     fail("Parsing final empty string faulted");
3223 
3224   /* Now try with invalid text before the empty end */
3225   XML_ParserReset(g_parser, NULL);
3226   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
3227                               XML_FALSE)
3228       == XML_STATUS_ERROR)
3229     xml_failure(g_parser);
3230   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3231     fail("Parsing final incomplete empty string not faulted");
3232 }
3233 END_TEST
3234 
3235 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)3236 START_TEST(test_negative_len_parse) {
3237   const char *const doc = "<root/>";
3238   for (int isFinal = 0; isFinal < 2; isFinal++) {
3239     set_subtest("isFinal=%d", isFinal);
3240 
3241     XML_Parser parser = XML_ParserCreate(NULL);
3242 
3243     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3244       fail("There was not supposed to be any initial parse error.");
3245 
3246     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
3247 
3248     if (status != XML_STATUS_ERROR)
3249       fail("Negative len was expected to fail the parse but did not.");
3250 
3251     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3252       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3253 
3254     XML_ParserFree(parser);
3255   }
3256 }
3257 END_TEST
3258 
3259 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)3260 START_TEST(test_negative_len_parse_buffer) {
3261   const char *const doc = "<root/>";
3262   for (int isFinal = 0; isFinal < 2; isFinal++) {
3263     set_subtest("isFinal=%d", isFinal);
3264 
3265     XML_Parser parser = XML_ParserCreate(NULL);
3266 
3267     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
3268       fail("There was not supposed to be any initial parse error.");
3269 
3270     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
3271 
3272     if (buffer == NULL)
3273       fail("XML_GetBuffer failed.");
3274 
3275     memcpy(buffer, doc, strlen(doc));
3276 
3277     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
3278 
3279     if (status != XML_STATUS_ERROR)
3280       fail("Negative len was expected to fail the parse but did not.");
3281 
3282     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3283       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3284 
3285     XML_ParserFree(parser);
3286   }
3287 }
3288 END_TEST
3289 
3290 /* Test odd corners of the XML_GetBuffer interface */
3291 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)3292 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
3293   const XML_Feature *feature = XML_GetFeatureList();
3294 
3295   if (feature == NULL)
3296     return XML_STATUS_ERROR;
3297   for (; feature->feature != XML_FEATURE_END; feature++) {
3298     if (feature->feature == feature_id) {
3299       *presult = feature->value;
3300       return XML_STATUS_OK;
3301     }
3302   }
3303   return XML_STATUS_ERROR;
3304 }
3305 
3306 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)3307 START_TEST(test_get_buffer_1) {
3308   const char *text = get_buffer_test_text;
3309   void *buffer;
3310   long context_bytes;
3311 
3312   /* Attempt to allocate a negative length buffer */
3313   if (XML_GetBuffer(g_parser, -12) != NULL)
3314     fail("Negative length buffer not failed");
3315 
3316   /* Now get a small buffer and extend it past valid length */
3317   buffer = XML_GetBuffer(g_parser, 1536);
3318   if (buffer == NULL)
3319     fail("1.5K buffer failed");
3320   assert(buffer != NULL);
3321   memcpy(buffer, text, strlen(text));
3322   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3323       == XML_STATUS_ERROR)
3324     xml_failure(g_parser);
3325   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3326     fail("INT_MAX buffer not failed");
3327 
3328   /* Now try extending it a more reasonable but still too large
3329    * amount.  The allocator in XML_GetBuffer() doubles the buffer
3330    * size until it exceeds the requested amount or INT_MAX.  If it
3331    * exceeds INT_MAX, it rejects the request, so we want a request
3332    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
3333    * with an extra byte just to ensure that the request is off any
3334    * boundary.  The request will be inflated internally by
3335    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3336    * request.
3337    */
3338   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3339     context_bytes = 0;
3340   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3341     fail("INT_MAX- buffer not failed");
3342 
3343   /* Now try extending it a carefully crafted amount */
3344   if (XML_GetBuffer(g_parser, 1000) == NULL)
3345     fail("1000 buffer failed");
3346 }
3347 END_TEST
3348 
3349 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)3350 START_TEST(test_get_buffer_2) {
3351   const char *text = get_buffer_test_text;
3352   void *buffer;
3353 
3354   /* Now get a decent buffer */
3355   buffer = XML_GetBuffer(g_parser, 1536);
3356   if (buffer == NULL)
3357     fail("1.5K buffer failed");
3358   assert(buffer != NULL);
3359   memcpy(buffer, text, strlen(text));
3360   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3361       == XML_STATUS_ERROR)
3362     xml_failure(g_parser);
3363 
3364   /* Extend it, to catch a different code path */
3365   if (XML_GetBuffer(g_parser, 1024) == NULL)
3366     fail("1024 buffer failed");
3367 }
3368 END_TEST
3369 
3370 /* Test for signed integer overflow CVE-2022-23852 */
3371 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)3372 START_TEST(test_get_buffer_3_overflow) {
3373   XML_Parser parser = XML_ParserCreate(NULL);
3374   assert(parser != NULL);
3375 
3376   const char *const text = "\n";
3377   const int expectedKeepValue = (int)strlen(text);
3378 
3379   // After this call, variable "keep" in XML_GetBuffer will
3380   // have value expectedKeepValue
3381   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3382                               XML_FALSE /* isFinal */)
3383       == XML_STATUS_ERROR)
3384     xml_failure(parser);
3385 
3386   assert(expectedKeepValue > 0);
3387   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3388     fail("enlarging buffer not failed");
3389 
3390   XML_ParserFree(parser);
3391 }
3392 END_TEST
3393 #endif // XML_CONTEXT_BYTES > 0
3394 
START_TEST(test_buffer_can_grow_to_max)3395 START_TEST(test_buffer_can_grow_to_max) {
3396   const char *const prefixes[] = {
3397       "",
3398       "<",
3399       "<x a='",
3400       "<doc><x a='",
3401       "<document><x a='",
3402       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3403       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3404       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3405       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3406       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3407   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3408   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3409 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3410   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3411   // Can we make a big allocation?
3412   for (int i = 1; i <= 2; i++) {
3413     void *const big = malloc(maxbuf);
3414     if (big != NULL) {
3415       free(big);
3416       break;
3417     }
3418     // The big allocation failed. Let's be a little lenient.
3419     maxbuf = maxbuf / 2;
3420     fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf);
3421   }
3422 #endif
3423 
3424   for (int i = 0; i < num_prefixes; ++i) {
3425     set_subtest("\"%s\"", prefixes[i]);
3426     XML_Parser parser = XML_ParserCreate(NULL);
3427 #if XML_GE == 1
3428     assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3429                 == XML_TRUE); // i.e. deactivate
3430 #endif
3431     const int prefix_len = (int)strlen(prefixes[i]);
3432     const enum XML_Status s
3433         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3434     if (s != XML_STATUS_OK)
3435       xml_failure(parser);
3436 
3437     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3438     // subtracting the whole prefix is easiest, and close enough.
3439     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3440     // The limit should be consistent; no prefix should allow us to
3441     // reach above the max buffer size.
3442     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3443     XML_ParserFree(parser);
3444   }
3445 }
3446 END_TEST
3447 
START_TEST(test_getbuffer_allocates_on_zero_len)3448 START_TEST(test_getbuffer_allocates_on_zero_len) {
3449   for (int first_len = 1; first_len >= 0; first_len--) {
3450     set_subtest("with len=%d first", first_len);
3451     XML_Parser parser = XML_ParserCreate(NULL);
3452     assert_true(parser != NULL);
3453     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3454     assert_true(XML_GetBuffer(parser, 0) != NULL);
3455     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3456       xml_failure(parser);
3457     XML_ParserFree(parser);
3458   }
3459 }
3460 END_TEST
3461 
3462 /* Test position information macros */
START_TEST(test_byte_info_at_end)3463 START_TEST(test_byte_info_at_end) {
3464   const char *text = "<doc></doc>";
3465 
3466   if (XML_GetCurrentByteIndex(g_parser) != -1
3467       || XML_GetCurrentByteCount(g_parser) != 0)
3468     fail("Byte index/count incorrect at start of parse");
3469   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3470       == XML_STATUS_ERROR)
3471     xml_failure(g_parser);
3472   /* At end, the count will be zero and the index the end of string */
3473   if (XML_GetCurrentByteCount(g_parser) != 0)
3474     fail("Terminal byte count incorrect");
3475   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3476     fail("Terminal byte index incorrect");
3477 }
3478 END_TEST
3479 
3480 /* Test position information from errors */
3481 #define PRE_ERROR_STR "<doc></"
3482 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3483 START_TEST(test_byte_info_at_error) {
3484   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3485 
3486   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3487       == XML_STATUS_OK)
3488     fail("Syntax error not faulted");
3489   if (XML_GetCurrentByteCount(g_parser) != 0)
3490     fail("Error byte count incorrect");
3491   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3492     fail("Error byte index incorrect");
3493 }
3494 END_TEST
3495 #undef PRE_ERROR_STR
3496 #undef POST_ERROR_STR
3497 
3498 /* Test position information in handler */
3499 #define START_ELEMENT "<e>"
3500 #define CDATA_TEXT "Hello"
3501 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3502 START_TEST(test_byte_info_at_cdata) {
3503   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3504   int offset, size;
3505   ByteTestData data;
3506 
3507   /* Check initial context is empty */
3508   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3509     fail("Unexpected context at start of parse");
3510 
3511   data.start_element_len = (int)strlen(START_ELEMENT);
3512   data.cdata_len = (int)strlen(CDATA_TEXT);
3513   data.total_string_len = (int)strlen(text);
3514   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3515   XML_SetUserData(g_parser, &data);
3516   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3517     xml_failure(g_parser);
3518 }
3519 END_TEST
3520 #undef START_ELEMENT
3521 #undef CDATA_TEXT
3522 #undef END_ELEMENT
3523 
3524 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3525 START_TEST(test_predefined_entities) {
3526   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3527   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3528   const XML_Char *result = XCS("<>&\"'");
3529   CharData storage;
3530 
3531   XML_SetDefaultHandler(g_parser, accumulate_characters);
3532   /* run_character_check uses XML_SetCharacterDataHandler(), which
3533    * unfortunately heads off a code path that we need to exercise.
3534    */
3535   CharData_Init(&storage);
3536   XML_SetUserData(g_parser, &storage);
3537   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3538       == XML_STATUS_ERROR)
3539     xml_failure(g_parser);
3540   /* The default handler doesn't translate the entities */
3541   CharData_CheckXMLChars(&storage, expected);
3542 
3543   /* Now try again and check the translation */
3544   XML_ParserReset(g_parser, NULL);
3545   run_character_check(text, result);
3546 }
3547 END_TEST
3548 
3549 /* Regression test that an invalid tag in an external parameter
3550  * reference in an external DTD is correctly faulted.
3551  *
3552  * Only a few specific tags are legal in DTDs ignoring comments and
3553  * processing instructions, all of which begin with an exclamation
3554  * mark.  "<el/>" is not one of them, so the parser should raise an
3555  * error on encountering it.
3556  */
START_TEST(test_invalid_tag_in_dtd)3557 START_TEST(test_invalid_tag_in_dtd) {
3558   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3559                      "<doc></doc>\n";
3560 
3561   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3562   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3563   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3564                  "Invalid tag IN DTD external param not rejected");
3565 }
3566 END_TEST
3567 
3568 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3569 START_TEST(test_not_predefined_entities) {
3570   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3571                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3572   int i = 0;
3573 
3574   while (text[i] != NULL) {
3575     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3576                    "Undefined entity not rejected");
3577     XML_ParserReset(g_parser, NULL);
3578     i++;
3579   }
3580 }
3581 END_TEST
3582 
3583 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3584 START_TEST(test_ignore_section) {
3585   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3586                      "<doc><e>&entity;</e></doc>";
3587   const XML_Char *expected
3588       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3589   CharData storage;
3590 
3591   CharData_Init(&storage);
3592   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3593   XML_SetUserData(g_parser, &storage);
3594   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3595   XML_SetDefaultHandler(g_parser, accumulate_characters);
3596   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3597   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3598   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3599   XML_SetStartElementHandler(g_parser, dummy_start_element);
3600   XML_SetEndElementHandler(g_parser, dummy_end_element);
3601   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3602       == XML_STATUS_ERROR)
3603     xml_failure(g_parser);
3604   CharData_CheckXMLChars(&storage, expected);
3605 }
3606 END_TEST
3607 
START_TEST(test_ignore_section_utf16)3608 START_TEST(test_ignore_section_utf16) {
3609   const char text[] =
3610       /* <!DOCTYPE d SYSTEM 's'> */
3611       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3612       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3613       /* <d><e>&en;</e></d> */
3614       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3615   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3616   CharData storage;
3617 
3618   CharData_Init(&storage);
3619   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3620   XML_SetUserData(g_parser, &storage);
3621   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3622   XML_SetDefaultHandler(g_parser, accumulate_characters);
3623   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3624   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3625   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3626   XML_SetStartElementHandler(g_parser, dummy_start_element);
3627   XML_SetEndElementHandler(g_parser, dummy_end_element);
3628   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3629       == XML_STATUS_ERROR)
3630     xml_failure(g_parser);
3631   CharData_CheckXMLChars(&storage, expected);
3632 }
3633 END_TEST
3634 
START_TEST(test_ignore_section_utf16_be)3635 START_TEST(test_ignore_section_utf16_be) {
3636   const char text[] =
3637       /* <!DOCTYPE d SYSTEM 's'> */
3638       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3639       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3640       /* <d><e>&en;</e></d> */
3641       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3642   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3643   CharData storage;
3644 
3645   CharData_Init(&storage);
3646   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3647   XML_SetUserData(g_parser, &storage);
3648   XML_SetExternalEntityRefHandler(g_parser,
3649                                   external_entity_load_ignore_utf16_be);
3650   XML_SetDefaultHandler(g_parser, accumulate_characters);
3651   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3652   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3653   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3654   XML_SetStartElementHandler(g_parser, dummy_start_element);
3655   XML_SetEndElementHandler(g_parser, dummy_end_element);
3656   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3657       == XML_STATUS_ERROR)
3658     xml_failure(g_parser);
3659   CharData_CheckXMLChars(&storage, expected);
3660 }
3661 END_TEST
3662 
3663 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3664 START_TEST(test_bad_ignore_section) {
3665   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3666                      "<doc><e>&entity;</e></doc>";
3667   ExtFaults faults[]
3668       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3669           XML_ERROR_SYNTAX},
3670          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3671           XML_ERROR_INVALID_TOKEN},
3672          {/* FIrst two bytes of a three-byte char */
3673           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3674           XML_ERROR_PARTIAL_CHAR},
3675          {NULL, NULL, NULL, XML_ERROR_NONE}};
3676   ExtFaults *fault;
3677 
3678   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3679     set_subtest("%s", fault->parse_text);
3680     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3681     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3682     XML_SetUserData(g_parser, fault);
3683     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3684                    "Incomplete IGNORE section not failed");
3685     XML_ParserReset(g_parser, NULL);
3686   }
3687 }
3688 END_TEST
3689 
3690 struct bom_testdata {
3691   const char *external;
3692   int split;
3693   XML_Bool nested_callback_happened;
3694 };
3695 
3696 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3697 external_bom_checker(XML_Parser parser, const XML_Char *context,
3698                      const XML_Char *base, const XML_Char *systemId,
3699                      const XML_Char *publicId) {
3700   const char *text;
3701   UNUSED_P(base);
3702   UNUSED_P(systemId);
3703   UNUSED_P(publicId);
3704 
3705   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3706   if (ext_parser == NULL)
3707     fail("Could not create external entity parser");
3708 
3709   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3710     struct bom_testdata *const testdata = XML_GetUserData(parser);
3711     const char *const external = testdata->external;
3712     const int split = testdata->split;
3713     testdata->nested_callback_happened = XML_TRUE;
3714 
3715     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3716         != XML_STATUS_OK) {
3717       xml_failure(ext_parser);
3718     }
3719     text = external + split; // the parse below will continue where we left off.
3720   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3721     text = "<!ELEMENT doc EMPTY>\n"
3722            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3723            "<!ENTITY % e2 '%e1;'>\n";
3724   } else {
3725     fail("unknown systemId");
3726   }
3727 
3728   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3729       != XML_STATUS_OK)
3730     xml_failure(ext_parser);
3731 
3732   XML_ParserFree(ext_parser);
3733   return XML_STATUS_OK;
3734 }
3735 
3736 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3737 START_TEST(test_external_bom_consumed) {
3738   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3739                            "<doc></doc>\n";
3740   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3741   const int len = (int)strlen(external);
3742   for (int split = 0; split <= len; ++split) {
3743     set_subtest("split at byte %d", split);
3744 
3745     struct bom_testdata testdata;
3746     testdata.external = external;
3747     testdata.split = split;
3748     testdata.nested_callback_happened = XML_FALSE;
3749 
3750     XML_Parser parser = XML_ParserCreate(NULL);
3751     if (parser == NULL) {
3752       fail("Couldn't create parser");
3753     }
3754     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3755     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3756     XML_SetUserData(parser, &testdata);
3757     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3758         == XML_STATUS_ERROR)
3759       xml_failure(parser);
3760     if (! testdata.nested_callback_happened) {
3761       fail("ref handler not called");
3762     }
3763     XML_ParserFree(parser);
3764   }
3765 }
3766 END_TEST
3767 
3768 /* Test recursive parsing */
START_TEST(test_external_entity_values)3769 START_TEST(test_external_entity_values) {
3770   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3771                      "<doc></doc>\n";
3772   ExtFaults data_004_2[] = {
3773       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3774       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3775        XML_ERROR_INVALID_TOKEN},
3776       {"'wombat", "Unterminated string not faulted", NULL,
3777        XML_ERROR_UNCLOSED_TOKEN},
3778       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3779        XML_ERROR_PARTIAL_CHAR},
3780       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3781       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3782        XML_ERROR_XML_DECL},
3783       {/* UTF-8 BOM */
3784        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3785        XML_ERROR_NONE},
3786       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3787        "Invalid token after text declaration not faulted", NULL,
3788        XML_ERROR_INVALID_TOKEN},
3789       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3790        "Unterminated string after text decl not faulted", NULL,
3791        XML_ERROR_UNCLOSED_TOKEN},
3792       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3793        "Partial UTF-8 character after text decl not faulted", NULL,
3794        XML_ERROR_PARTIAL_CHAR},
3795       {"%e1;", "Recursive parameter entity not faulted", NULL,
3796        XML_ERROR_RECURSIVE_ENTITY_REF},
3797       {NULL, NULL, NULL, XML_ERROR_NONE}};
3798   int i;
3799 
3800   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3801     set_subtest("%s", data_004_2[i].parse_text);
3802     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3803     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3804     XML_SetUserData(g_parser, &data_004_2[i]);
3805     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3806         == XML_STATUS_ERROR)
3807       xml_failure(g_parser);
3808     XML_ParserReset(g_parser, NULL);
3809   }
3810 }
3811 END_TEST
3812 
3813 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3814 START_TEST(test_ext_entity_not_standalone) {
3815   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3816                      "<doc></doc>";
3817 
3818   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3819   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3820   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3821                  "Standalone rejection not caught");
3822 }
3823 END_TEST
3824 
START_TEST(test_ext_entity_value_abort)3825 START_TEST(test_ext_entity_value_abort) {
3826   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3827                      "<doc></doc>\n";
3828 
3829   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3830   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3831   g_resumable = XML_FALSE;
3832   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3833       == XML_STATUS_ERROR)
3834     xml_failure(g_parser);
3835 }
3836 END_TEST
3837 
START_TEST(test_bad_public_doctype)3838 START_TEST(test_bad_public_doctype) {
3839   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3840                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3841                      "<doc></doc>";
3842 
3843   /* Setting a handler provokes a particular code path */
3844   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3845                             dummy_end_doctype_handler);
3846   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3847 }
3848 END_TEST
3849 
3850 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3851 START_TEST(test_attribute_enum_value) {
3852   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3853                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3854                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3855   ExtTest dtd_data
3856       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3857          "<!ELEMENT a EMPTY>\n"
3858          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3859          NULL, NULL};
3860   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3861 
3862   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3863   XML_SetUserData(g_parser, &dtd_data);
3864   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3865   /* An attribute list handler provokes a different code path */
3866   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3867   run_ext_character_check(text, &dtd_data, expected);
3868 }
3869 END_TEST
3870 
3871 /* Slightly bizarrely, the library seems to silently ignore entity
3872  * definitions for predefined entities, even when they are wrong.  The
3873  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3874  * to happen, so this is currently treated as acceptable.
3875  */
START_TEST(test_predefined_entity_redefinition)3876 START_TEST(test_predefined_entity_redefinition) {
3877   const char *text = "<!DOCTYPE doc [\n"
3878                      "<!ENTITY apos 'foo'>\n"
3879                      "]>\n"
3880                      "<doc>&apos;</doc>";
3881   run_character_check(text, XCS("'"));
3882 }
3883 END_TEST
3884 
3885 /* Test that the parser stops processing the DTD after an unresolved
3886  * parameter entity is encountered.
3887  */
START_TEST(test_dtd_stop_processing)3888 START_TEST(test_dtd_stop_processing) {
3889   const char *text = "<!DOCTYPE doc [\n"
3890                      "%foo;\n"
3891                      "<!ENTITY bar 'bas'>\n"
3892                      "]><doc/>";
3893 
3894   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3895   init_dummy_handlers();
3896   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3897       == XML_STATUS_ERROR)
3898     xml_failure(g_parser);
3899   if (get_dummy_handler_flags() != 0)
3900     fail("DTD processing still going after undefined PE");
3901 }
3902 END_TEST
3903 
3904 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3905 START_TEST(test_public_notation_no_sysid) {
3906   const char *text = "<!DOCTYPE doc [\n"
3907                      "<!NOTATION note PUBLIC 'foo'>\n"
3908                      "<!ELEMENT doc EMPTY>\n"
3909                      "]>\n<doc/>";
3910 
3911   init_dummy_handlers();
3912   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3913   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3914       == XML_STATUS_ERROR)
3915     xml_failure(g_parser);
3916   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3917     fail("Notation declaration handler not called");
3918 }
3919 END_TEST
3920 
START_TEST(test_nested_groups)3921 START_TEST(test_nested_groups) {
3922   const char *text
3923       = "<!DOCTYPE doc [\n"
3924         "<!ELEMENT doc "
3925         /* Sixteen elements per line */
3926         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3927         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3928         "))))))))))))))))))))))))))))))))>\n"
3929         "<!ELEMENT e EMPTY>"
3930         "]>\n"
3931         "<doc><e/></doc>";
3932   CharData storage;
3933 
3934   CharData_Init(&storage);
3935   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3936   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3937   XML_SetUserData(g_parser, &storage);
3938   init_dummy_handlers();
3939   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3940       == XML_STATUS_ERROR)
3941     xml_failure(g_parser);
3942   CharData_CheckXMLChars(&storage, XCS("doce"));
3943   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3944     fail("Element handler not fired");
3945 }
3946 END_TEST
3947 
START_TEST(test_group_choice)3948 START_TEST(test_group_choice) {
3949   const char *text = "<!DOCTYPE doc [\n"
3950                      "<!ELEMENT doc (a|b|c)+>\n"
3951                      "<!ELEMENT a EMPTY>\n"
3952                      "<!ELEMENT b (#PCDATA)>\n"
3953                      "<!ELEMENT c ANY>\n"
3954                      "]>\n"
3955                      "<doc>\n"
3956                      "<a/>\n"
3957                      "<b attr='foo'>This is a foo</b>\n"
3958                      "<c></c>\n"
3959                      "</doc>\n";
3960 
3961   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3962   init_dummy_handlers();
3963   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3964       == XML_STATUS_ERROR)
3965     xml_failure(g_parser);
3966   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3967     fail("Element handler flag not raised");
3968 }
3969 END_TEST
3970 
START_TEST(test_standalone_parameter_entity)3971 START_TEST(test_standalone_parameter_entity) {
3972   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3973                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3974                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3975                      "%entity;\n"
3976                      "]>\n"
3977                      "<doc></doc>";
3978   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3979 
3980   XML_SetUserData(g_parser, dtd_data);
3981   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3982   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3983   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3984       == XML_STATUS_ERROR)
3985     xml_failure(g_parser);
3986 }
3987 END_TEST
3988 
3989 /* Test skipping of parameter entity in an external DTD */
3990 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3991 START_TEST(test_skipped_parameter_entity) {
3992   const char *text = "<?xml version='1.0'?>\n"
3993                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3994                      "<!ELEMENT root (#PCDATA|a)* >\n"
3995                      "]>\n"
3996                      "<root></root>";
3997   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3998 
3999   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4000   XML_SetUserData(g_parser, &dtd_data);
4001   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4002   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
4003   init_dummy_handlers();
4004   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4005       == XML_STATUS_ERROR)
4006     xml_failure(g_parser);
4007   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
4008     fail("Skip handler not executed");
4009 }
4010 END_TEST
4011 
4012 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)4013 START_TEST(test_recursive_external_parameter_entity) {
4014   const char *text = "<?xml version='1.0'?>\n"
4015                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
4016                      "<!ELEMENT root (#PCDATA|a)* >\n"
4017                      "]>\n"
4018                      "<root></root>";
4019   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
4020                         "Recursive external parameter entity not faulted", NULL,
4021                         XML_ERROR_RECURSIVE_ENTITY_REF};
4022 
4023   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4024   XML_SetUserData(g_parser, &dtd_data);
4025   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4026   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4027                  "Recursive external parameter not spotted");
4028 }
4029 END_TEST
4030 
4031 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)4032 START_TEST(test_undefined_ext_entity_in_external_dtd) {
4033   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
4034                      "<doc></doc>\n";
4035 
4036   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4037   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4038   XML_SetUserData(g_parser, NULL);
4039   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4040       == XML_STATUS_ERROR)
4041     xml_failure(g_parser);
4042 
4043   /* Now repeat without the external entity ref handler invoking
4044    * another copy of itself.
4045    */
4046   XML_ParserReset(g_parser, NULL);
4047   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4048   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
4049   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
4050   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4051       == XML_STATUS_ERROR)
4052     xml_failure(g_parser);
4053 }
4054 END_TEST
4055 
4056 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)4057 START_TEST(test_suspend_xdecl) {
4058   const char *text = long_character_data_text;
4059 
4060   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
4061   XML_SetUserData(g_parser, g_parser);
4062   g_resumable = XML_TRUE;
4063   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4064   // we won't know exactly how much input we actually managed to give Expat.
4065   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4066       != XML_STATUS_SUSPENDED)
4067     xml_failure(g_parser);
4068   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
4069     xml_failure(g_parser);
4070   /* Attempt to start a new parse while suspended */
4071   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4072       != XML_STATUS_ERROR)
4073     fail("Attempt to parse while suspended not faulted");
4074   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
4075     fail("Suspended parse not faulted with correct error");
4076 }
4077 END_TEST
4078 
4079 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)4080 START_TEST(test_abort_epilog) {
4081   const char *text = "<doc></doc>\n\r\n";
4082   XML_Char trigger_char = XCS('\r');
4083 
4084   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4085   XML_SetUserData(g_parser, &trigger_char);
4086   g_resumable = XML_FALSE;
4087   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4088       != XML_STATUS_ERROR)
4089     fail("Abort not triggered");
4090   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
4091     xml_failure(g_parser);
4092 }
4093 END_TEST
4094 
4095 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)4096 START_TEST(test_abort_epilog_2) {
4097   const char *text = "<doc></doc>\n";
4098   XML_Char trigger_char = XCS('\n');
4099 
4100   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4101   XML_SetUserData(g_parser, &trigger_char);
4102   g_resumable = XML_FALSE;
4103   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
4104 }
4105 END_TEST
4106 
4107 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)4108 START_TEST(test_suspend_epilog) {
4109   const char *text = "<doc></doc>\n";
4110   XML_Char trigger_char = XCS('\n');
4111 
4112   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
4113   XML_SetUserData(g_parser, &trigger_char);
4114   g_resumable = XML_TRUE;
4115   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4116       != XML_STATUS_SUSPENDED)
4117     xml_failure(g_parser);
4118 }
4119 END_TEST
4120 
START_TEST(test_suspend_in_sole_empty_tag)4121 START_TEST(test_suspend_in_sole_empty_tag) {
4122   const char *text = "<doc/>";
4123   enum XML_Status rc;
4124 
4125   XML_SetEndElementHandler(g_parser, suspending_end_handler);
4126   XML_SetUserData(g_parser, g_parser);
4127   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
4128   if (rc == XML_STATUS_ERROR)
4129     xml_failure(g_parser);
4130   else if (rc != XML_STATUS_SUSPENDED)
4131     fail("Suspend not triggered");
4132   rc = XML_ResumeParser(g_parser);
4133   if (rc == XML_STATUS_ERROR)
4134     xml_failure(g_parser);
4135   else if (rc != XML_STATUS_OK)
4136     fail("Resume failed");
4137 }
4138 END_TEST
4139 
START_TEST(test_unfinished_epilog)4140 START_TEST(test_unfinished_epilog) {
4141   const char *text = "<doc></doc><";
4142 
4143   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
4144                  "Incomplete epilog entry not faulted");
4145 }
4146 END_TEST
4147 
START_TEST(test_partial_char_in_epilog)4148 START_TEST(test_partial_char_in_epilog) {
4149   const char *text = "<doc></doc>\xe2\x82";
4150 
4151   /* First check that no fault is raised if the parse is not finished */
4152   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
4153       == XML_STATUS_ERROR)
4154     xml_failure(g_parser);
4155   /* Now check that it is faulted once we finish */
4156   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
4157     fail("Partial character in epilog not faulted");
4158   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
4159     xml_failure(g_parser);
4160 }
4161 END_TEST
4162 
4163 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)4164 START_TEST(test_suspend_resume_internal_entity) {
4165   const char *text
4166       = "<!DOCTYPE doc [\n"
4167         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
4168         "]>\n"
4169         "<doc>&foo;</doc>\n";
4170   const XML_Char *expected1 = XCS("Hi");
4171   const XML_Char *expected2 = XCS("HiHo");
4172   CharData storage;
4173 
4174   CharData_Init(&storage);
4175   XML_SetStartElementHandler(g_parser, start_element_suspender);
4176   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4177   XML_SetUserData(g_parser, &storage);
4178   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4179   // we won't know exactly how much input we actually managed to give Expat.
4180   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4181       != XML_STATUS_SUSPENDED)
4182     xml_failure(g_parser);
4183   CharData_CheckXMLChars(&storage, XCS(""));
4184   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
4185     xml_failure(g_parser);
4186   CharData_CheckXMLChars(&storage, expected1);
4187   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4188     xml_failure(g_parser);
4189   CharData_CheckXMLChars(&storage, expected2);
4190 }
4191 END_TEST
4192 
START_TEST(test_suspend_resume_internal_entity_issue_629)4193 START_TEST(test_suspend_resume_internal_entity_issue_629) {
4194   const char *const text
4195       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
4196         "<"
4197         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4198         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4199         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4200         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4201         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4202         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4203         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4204         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4205         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4206         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4207         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4208         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4209         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4210         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4211         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4212         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4213         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4214         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4215         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4216         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4217         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4218         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4219         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4220         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4221         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4222         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4223         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4224         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4225         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4226         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4227         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4228         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4229         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4230         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4231         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4232         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4233         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4234         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4235         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4236         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
4237         "/>"
4238         "</b></a>";
4239   const size_t firstChunkSizeBytes = 54;
4240 
4241   XML_Parser parser = XML_ParserCreate(NULL);
4242   XML_SetUserData(parser, parser);
4243   XML_SetCommentHandler(parser, suspending_comment_handler);
4244 
4245   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
4246       != XML_STATUS_SUSPENDED)
4247     xml_failure(parser);
4248   if (XML_ResumeParser(parser) != XML_STATUS_OK)
4249     xml_failure(parser);
4250   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
4251                               (int)(strlen(text) - firstChunkSizeBytes),
4252                               XML_TRUE)
4253       != XML_STATUS_OK)
4254     xml_failure(parser);
4255   XML_ParserFree(parser);
4256 }
4257 END_TEST
4258 
4259 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)4260 START_TEST(test_resume_entity_with_syntax_error) {
4261   if (g_chunkSize != 0) {
4262     // this test does not use SINGLE_BYTES, because of suspension
4263     return;
4264   }
4265 
4266   const char *text = "<!DOCTYPE doc [\n"
4267                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
4268                      "]>\n"
4269                      "<doc>&foo;</doc>\n";
4270 
4271   XML_SetStartElementHandler(g_parser, start_element_suspender);
4272   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4273   // we won't know exactly how much input we actually managed to give Expat.
4274   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4275       != XML_STATUS_SUSPENDED)
4276     xml_failure(g_parser);
4277   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
4278     fail("Syntax error in entity not faulted");
4279   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
4280     xml_failure(g_parser);
4281 }
4282 END_TEST
4283 
4284 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)4285 START_TEST(test_suspend_resume_parameter_entity) {
4286   const char *text = "<!DOCTYPE doc [\n"
4287                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
4288                      "%foo;\n"
4289                      "]>\n"
4290                      "<doc>Hello, world</doc>";
4291   const XML_Char *expected = XCS("Hello, world");
4292   CharData storage;
4293 
4294   CharData_Init(&storage);
4295   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4296   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
4297   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4298   XML_SetUserData(g_parser, &storage);
4299   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4300       != XML_STATUS_SUSPENDED)
4301     xml_failure(g_parser);
4302   CharData_CheckXMLChars(&storage, XCS(""));
4303   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4304     xml_failure(g_parser);
4305   CharData_CheckXMLChars(&storage, expected);
4306 }
4307 END_TEST
4308 
4309 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)4310 START_TEST(test_restart_on_error) {
4311   const char *text = "<$doc><doc></doc>";
4312 
4313   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4314       != XML_STATUS_ERROR)
4315     fail("Invalid tag name not faulted");
4316   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4317     xml_failure(g_parser);
4318   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4319     fail("Restarting invalid parse not faulted");
4320   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4321     xml_failure(g_parser);
4322 }
4323 END_TEST
4324 
4325 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)4326 START_TEST(test_reject_lt_in_attribute_value) {
4327   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4328                      "<doc></doc>";
4329 
4330   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4331                  "Bad attribute default not faulted");
4332 }
4333 END_TEST
4334 
START_TEST(test_reject_unfinished_param_in_att_value)4335 START_TEST(test_reject_unfinished_param_in_att_value) {
4336   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4337                      "<doc></doc>";
4338 
4339   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4340                  "Bad attribute default not faulted");
4341 }
4342 END_TEST
4343 
START_TEST(test_trailing_cr_in_att_value)4344 START_TEST(test_trailing_cr_in_att_value) {
4345   const char *text = "<doc a='value\r'/>";
4346 
4347   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4348       == XML_STATUS_ERROR)
4349     xml_failure(g_parser);
4350 }
4351 END_TEST
4352 
4353 /* Try parsing a general entity within a parameter entity in a
4354  * standalone internal DTD.  Covers a corner case in the parser.
4355  */
START_TEST(test_standalone_internal_entity)4356 START_TEST(test_standalone_internal_entity) {
4357   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4358                      "<!DOCTYPE doc [\n"
4359                      "  <!ELEMENT doc (#PCDATA)>\n"
4360                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
4361                      "  <!ENTITY ge 'AttDefaultValue'>\n"
4362                      "  %pe;\n"
4363                      "]>\n"
4364                      "<doc att2='any'/>";
4365 
4366   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4367   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4368       == XML_STATUS_ERROR)
4369     xml_failure(g_parser);
4370 }
4371 END_TEST
4372 
4373 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)4374 START_TEST(test_skipped_external_entity) {
4375   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4376                      "<doc></doc>\n";
4377   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4378                        "<!ENTITY % e2 '%e1;'>\n",
4379                        NULL, NULL};
4380 
4381   XML_SetUserData(g_parser, &test_data);
4382   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4383   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4384   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4385       == XML_STATUS_ERROR)
4386     xml_failure(g_parser);
4387 }
4388 END_TEST
4389 
4390 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)4391 START_TEST(test_skipped_null_loaded_ext_entity) {
4392   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4393                      "<doc />";
4394   ExtHdlrData test_data
4395       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4396          "<!ENTITY % pe2 '%pe1;'>\n"
4397          "%pe2;\n",
4398          external_entity_null_loader, NULL};
4399 
4400   XML_SetUserData(g_parser, &test_data);
4401   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4402   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4403   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4404       == XML_STATUS_ERROR)
4405     xml_failure(g_parser);
4406 }
4407 END_TEST
4408 
START_TEST(test_skipped_unloaded_ext_entity)4409 START_TEST(test_skipped_unloaded_ext_entity) {
4410   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4411                      "<doc />";
4412   ExtHdlrData test_data
4413       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4414          "<!ENTITY % pe2 '%pe1;'>\n"
4415          "%pe2;\n",
4416          NULL, NULL};
4417 
4418   XML_SetUserData(g_parser, &test_data);
4419   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4420   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4421   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4422       == XML_STATUS_ERROR)
4423     xml_failure(g_parser);
4424 }
4425 END_TEST
4426 
4427 /* Test that a parameter entity value ending with a carriage return
4428  * has it translated internally into a newline.
4429  */
START_TEST(test_param_entity_with_trailing_cr)4430 START_TEST(test_param_entity_with_trailing_cr) {
4431 #define PARAM_ENTITY_NAME "pe"
4432 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4433   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4434                      "<doc/>";
4435   ExtTest test_data
4436       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4437          "%" PARAM_ENTITY_NAME ";\n",
4438          NULL, NULL};
4439 
4440   XML_SetUserData(g_parser, &test_data);
4441   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4442   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4443   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4444   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4445                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4446   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4447       == XML_STATUS_ERROR)
4448     xml_failure(g_parser);
4449   int entity_match_flag = get_param_entity_match_flag();
4450   if (entity_match_flag == ENTITY_MATCH_FAIL)
4451     fail("Parameter entity CR->NEWLINE conversion failed");
4452   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4453     fail("Parameter entity not parsed");
4454 }
4455 #undef PARAM_ENTITY_NAME
4456 #undef PARAM_ENTITY_CORE_VALUE
4457 END_TEST
4458 
START_TEST(test_invalid_character_entity)4459 START_TEST(test_invalid_character_entity) {
4460   const char *text = "<!DOCTYPE doc [\n"
4461                      "  <!ENTITY entity '&#x110000;'>\n"
4462                      "]>\n"
4463                      "<doc>&entity;</doc>";
4464 
4465   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4466                  "Out of range character reference not faulted");
4467 }
4468 END_TEST
4469 
START_TEST(test_invalid_character_entity_2)4470 START_TEST(test_invalid_character_entity_2) {
4471   const char *text = "<!DOCTYPE doc [\n"
4472                      "  <!ENTITY entity '&#xg0;'>\n"
4473                      "]>\n"
4474                      "<doc>&entity;</doc>";
4475 
4476   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4477                  "Out of range character reference not faulted");
4478 }
4479 END_TEST
4480 
START_TEST(test_invalid_character_entity_3)4481 START_TEST(test_invalid_character_entity_3) {
4482   const char text[] =
4483       /* <!DOCTYPE doc [\n */
4484       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4485       /* U+0E04 = KHO KHWAI
4486        * U+0E08 = CHO CHAN */
4487       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4488       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4489       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4490       /* ]>\n */
4491       "\0]\0>\0\n"
4492       /* <doc>&entity;</doc> */
4493       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4494 
4495   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4496       != XML_STATUS_ERROR)
4497     fail("Invalid start of entity name not faulted");
4498   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4499     xml_failure(g_parser);
4500 }
4501 END_TEST
4502 
START_TEST(test_invalid_character_entity_4)4503 START_TEST(test_invalid_character_entity_4) {
4504   const char *text = "<!DOCTYPE doc [\n"
4505                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4506                      "]>\n"
4507                      "<doc>&entity;</doc>";
4508 
4509   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4510                  "Out of range character reference not faulted");
4511 }
4512 END_TEST
4513 
4514 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4515 START_TEST(test_pi_handled_in_default) {
4516   const char *text = "<?test processing instruction?>\n<doc/>";
4517   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4518   CharData storage;
4519 
4520   CharData_Init(&storage);
4521   XML_SetDefaultHandler(g_parser, accumulate_characters);
4522   XML_SetUserData(g_parser, &storage);
4523   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4524       == XML_STATUS_ERROR)
4525     xml_failure(g_parser);
4526   CharData_CheckXMLChars(&storage, expected);
4527 }
4528 END_TEST
4529 
4530 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4531 START_TEST(test_comment_handled_in_default) {
4532   const char *text = "<!-- This is a comment -->\n<doc/>";
4533   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4534   CharData storage;
4535 
4536   CharData_Init(&storage);
4537   XML_SetDefaultHandler(g_parser, accumulate_characters);
4538   XML_SetUserData(g_parser, &storage);
4539   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4540       == XML_STATUS_ERROR)
4541     xml_failure(g_parser);
4542   CharData_CheckXMLChars(&storage, expected);
4543 }
4544 END_TEST
4545 
4546 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4547 START_TEST(test_pi_yml) {
4548   const char *text = "<?yml something like data?><doc/>";
4549   const XML_Char *expected = XCS("yml: something like data\n");
4550   CharData storage;
4551 
4552   CharData_Init(&storage);
4553   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4554   XML_SetUserData(g_parser, &storage);
4555   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4556       == XML_STATUS_ERROR)
4557     xml_failure(g_parser);
4558   CharData_CheckXMLChars(&storage, expected);
4559 }
4560 END_TEST
4561 
START_TEST(test_pi_xnl)4562 START_TEST(test_pi_xnl) {
4563   const char *text = "<?xnl nothing like data?><doc/>";
4564   const XML_Char *expected = XCS("xnl: nothing like data\n");
4565   CharData storage;
4566 
4567   CharData_Init(&storage);
4568   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4569   XML_SetUserData(g_parser, &storage);
4570   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4571       == XML_STATUS_ERROR)
4572     xml_failure(g_parser);
4573   CharData_CheckXMLChars(&storage, expected);
4574 }
4575 END_TEST
4576 
START_TEST(test_pi_xmm)4577 START_TEST(test_pi_xmm) {
4578   const char *text = "<?xmm everything like data?><doc/>";
4579   const XML_Char *expected = XCS("xmm: everything like data\n");
4580   CharData storage;
4581 
4582   CharData_Init(&storage);
4583   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4584   XML_SetUserData(g_parser, &storage);
4585   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4586       == XML_STATUS_ERROR)
4587     xml_failure(g_parser);
4588   CharData_CheckXMLChars(&storage, expected);
4589 }
4590 END_TEST
4591 
START_TEST(test_utf16_pi)4592 START_TEST(test_utf16_pi) {
4593   const char text[] =
4594       /* <?{KHO KHWAI}{CHO CHAN}?>
4595        * where {KHO KHWAI} = U+0E04
4596        * and   {CHO CHAN}  = U+0E08
4597        */
4598       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4599       /* <q/> */
4600       "<\0q\0/\0>\0";
4601 #ifdef XML_UNICODE
4602   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4603 #else
4604   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4605 #endif
4606   CharData storage;
4607 
4608   CharData_Init(&storage);
4609   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4610   XML_SetUserData(g_parser, &storage);
4611   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4612       == XML_STATUS_ERROR)
4613     xml_failure(g_parser);
4614   CharData_CheckXMLChars(&storage, expected);
4615 }
4616 END_TEST
4617 
START_TEST(test_utf16_be_pi)4618 START_TEST(test_utf16_be_pi) {
4619   const char text[] =
4620       /* <?{KHO KHWAI}{CHO CHAN}?>
4621        * where {KHO KHWAI} = U+0E04
4622        * and   {CHO CHAN}  = U+0E08
4623        */
4624       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4625       /* <q/> */
4626       "\0<\0q\0/\0>";
4627 #ifdef XML_UNICODE
4628   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4629 #else
4630   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4631 #endif
4632   CharData storage;
4633 
4634   CharData_Init(&storage);
4635   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4636   XML_SetUserData(g_parser, &storage);
4637   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4638       == XML_STATUS_ERROR)
4639     xml_failure(g_parser);
4640   CharData_CheckXMLChars(&storage, expected);
4641 }
4642 END_TEST
4643 
4644 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4645 START_TEST(test_utf16_be_comment) {
4646   const char text[] =
4647       /* <!-- Comment A --> */
4648       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4649       /* <doc/> */
4650       "\0<\0d\0o\0c\0/\0>";
4651   const XML_Char *expected = XCS(" Comment A ");
4652   CharData storage;
4653 
4654   CharData_Init(&storage);
4655   XML_SetCommentHandler(g_parser, accumulate_comment);
4656   XML_SetUserData(g_parser, &storage);
4657   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4658       == XML_STATUS_ERROR)
4659     xml_failure(g_parser);
4660   CharData_CheckXMLChars(&storage, expected);
4661 }
4662 END_TEST
4663 
START_TEST(test_utf16_le_comment)4664 START_TEST(test_utf16_le_comment) {
4665   const char text[] =
4666       /* <!-- Comment B --> */
4667       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4668       /* <doc/> */
4669       "<\0d\0o\0c\0/\0>\0";
4670   const XML_Char *expected = XCS(" Comment B ");
4671   CharData storage;
4672 
4673   CharData_Init(&storage);
4674   XML_SetCommentHandler(g_parser, accumulate_comment);
4675   XML_SetUserData(g_parser, &storage);
4676   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4677       == XML_STATUS_ERROR)
4678     xml_failure(g_parser);
4679   CharData_CheckXMLChars(&storage, expected);
4680 }
4681 END_TEST
4682 
4683 /* Test that the unknown encoding handler with map entries that expect
4684  * conversion but no conversion function is faulted
4685  */
START_TEST(test_missing_encoding_conversion_fn)4686 START_TEST(test_missing_encoding_conversion_fn) {
4687   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4688                      "<doc>\x81</doc>";
4689 
4690   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4691   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4692    * character introducing a two-byte sequence.  For this, it
4693    * requires a convert function.  The above function call doesn't
4694    * pass one through, so when BadEncodingHandler actually gets
4695    * called it should supply an invalid encoding.
4696    */
4697   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4698                  "Encoding with missing convert() not faulted");
4699 }
4700 END_TEST
4701 
START_TEST(test_failing_encoding_conversion_fn)4702 START_TEST(test_failing_encoding_conversion_fn) {
4703   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4704                      "<doc>\x81</doc>";
4705 
4706   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4707   /* BadEncodingHandler sets up an encoding with every top-bit-set
4708    * character introducing a two-byte sequence.  For this, it
4709    * requires a convert function.  The above function call passes
4710    * one that insists all possible sequences are invalid anyway.
4711    */
4712   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4713                  "Encoding with failing convert() not faulted");
4714 }
4715 END_TEST
4716 
4717 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4718 START_TEST(test_unknown_encoding_success) {
4719   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4720                      /* Equivalent to <eoc>Hello, world</eoc> */
4721                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4722 
4723   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4724   run_character_check(text, XCS("Hello, world"));
4725 }
4726 END_TEST
4727 
4728 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4729 START_TEST(test_unknown_encoding_bad_name) {
4730   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4731                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4732 
4733   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4734   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4735                  "Bad name start in unknown encoding not faulted");
4736 }
4737 END_TEST
4738 
4739 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4740 START_TEST(test_unknown_encoding_bad_name_2) {
4741   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4742                      "<d\xffoc>Hello, world</d\xffoc>";
4743 
4744   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4745   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4746                  "Bad name in unknown encoding not faulted");
4747 }
4748 END_TEST
4749 
4750 /* Test element name that is long enough to fill the conversion buffer
4751  * in an unknown encoding, finishing with an encoded character.
4752  */
START_TEST(test_unknown_encoding_long_name_1)4753 START_TEST(test_unknown_encoding_long_name_1) {
4754   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4755                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4756                      "Hi"
4757                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4758   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4759   CharData storage;
4760 
4761   CharData_Init(&storage);
4762   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4763   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4764   XML_SetUserData(g_parser, &storage);
4765   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4766       == XML_STATUS_ERROR)
4767     xml_failure(g_parser);
4768   CharData_CheckXMLChars(&storage, expected);
4769 }
4770 END_TEST
4771 
4772 /* Test element name that is long enough to fill the conversion buffer
4773  * in an unknown encoding, finishing with an simple character.
4774  */
START_TEST(test_unknown_encoding_long_name_2)4775 START_TEST(test_unknown_encoding_long_name_2) {
4776   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4777                      "<abcdefghabcdefghabcdefghijklmnop>"
4778                      "Hi"
4779                      "</abcdefghabcdefghabcdefghijklmnop>";
4780   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4781   CharData storage;
4782 
4783   CharData_Init(&storage);
4784   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4785   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4786   XML_SetUserData(g_parser, &storage);
4787   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4788       == XML_STATUS_ERROR)
4789     xml_failure(g_parser);
4790   CharData_CheckXMLChars(&storage, expected);
4791 }
4792 END_TEST
4793 
START_TEST(test_invalid_unknown_encoding)4794 START_TEST(test_invalid_unknown_encoding) {
4795   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4796                      "<doc>Hello world</doc>";
4797 
4798   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4799   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4800                  "Invalid unknown encoding not faulted");
4801 }
4802 END_TEST
4803 
START_TEST(test_unknown_ascii_encoding_ok)4804 START_TEST(test_unknown_ascii_encoding_ok) {
4805   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4806                      "<doc>Hello, world</doc>";
4807 
4808   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4809   run_character_check(text, XCS("Hello, world"));
4810 }
4811 END_TEST
4812 
START_TEST(test_unknown_ascii_encoding_fail)4813 START_TEST(test_unknown_ascii_encoding_fail) {
4814   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4815                      "<doc>Hello, \x80 world</doc>";
4816 
4817   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4818   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4819                  "Invalid character not faulted");
4820 }
4821 END_TEST
4822 
START_TEST(test_unknown_encoding_invalid_length)4823 START_TEST(test_unknown_encoding_invalid_length) {
4824   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4825                      "<doc>Hello, world</doc>";
4826 
4827   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4828   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4829                  "Invalid unknown encoding not faulted");
4830 }
4831 END_TEST
4832 
START_TEST(test_unknown_encoding_invalid_topbit)4833 START_TEST(test_unknown_encoding_invalid_topbit) {
4834   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4835                      "<doc>Hello, world</doc>";
4836 
4837   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4838   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4839                  "Invalid unknown encoding not faulted");
4840 }
4841 END_TEST
4842 
START_TEST(test_unknown_encoding_invalid_surrogate)4843 START_TEST(test_unknown_encoding_invalid_surrogate) {
4844   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4845                      "<doc>Hello, \x82 world</doc>";
4846 
4847   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4848   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4849                  "Invalid unknown encoding not faulted");
4850 }
4851 END_TEST
4852 
START_TEST(test_unknown_encoding_invalid_high)4853 START_TEST(test_unknown_encoding_invalid_high) {
4854   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4855                      "<doc>Hello, world</doc>";
4856 
4857   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4858   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4859                  "Invalid unknown encoding not faulted");
4860 }
4861 END_TEST
4862 
START_TEST(test_unknown_encoding_invalid_attr_value)4863 START_TEST(test_unknown_encoding_invalid_attr_value) {
4864   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4865                      "<doc attr='\xff\x30'/>";
4866 
4867   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4868   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4869                  "Invalid attribute valid not faulted");
4870 }
4871 END_TEST
4872 
START_TEST(test_unknown_encoding_user_data_primary)4873 START_TEST(test_unknown_encoding_user_data_primary) {
4874   // This test is based on ideas contributed by Artiphishell Inc.
4875   const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n"
4876                            "<root />\n";
4877   XML_Parser parser = XML_ParserCreate(NULL);
4878   XML_SetUnknownEncodingHandler(parser,
4879                                 user_data_checking_unknown_encoding_handler,
4880                                 (void *)(intptr_t)0xC0FFEE);
4881 
4882   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4883               == XML_STATUS_OK);
4884 
4885   XML_ParserFree(parser);
4886 }
4887 END_TEST
4888 
START_TEST(test_unknown_encoding_user_data_secondary)4889 START_TEST(test_unknown_encoding_user_data_secondary) {
4890   // This test is based on ideas contributed by Artiphishell Inc.
4891   const char *const text_main = "<!DOCTYPE r [\n"
4892                                 "  <!ENTITY ext SYSTEM 'ext.ent'>\n"
4893                                 "]>\n"
4894                                 "<r>&ext;</r>\n";
4895   const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n"
4896                                     "<e>data</e>";
4897   ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL};
4898   XML_Parser parser = XML_ParserCreate(NULL);
4899   XML_SetExternalEntityRefHandler(parser, external_entity_loader2);
4900   XML_SetUnknownEncodingHandler(parser,
4901                                 user_data_checking_unknown_encoding_handler,
4902                                 (void *)(intptr_t)0xC0FFEE);
4903   XML_SetUserData(parser, &test_data);
4904 
4905   assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main),
4906                                       XML_TRUE)
4907               == XML_STATUS_OK);
4908 
4909   XML_ParserFree(parser);
4910 }
4911 END_TEST
4912 
4913 /* Test an external entity parser set to use latin-1 detects UTF-16
4914  * BOMs correctly.
4915  */
4916 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4917 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4918   const char *text = "<!DOCTYPE doc [\n"
4919                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4920                      "]>\n"
4921                      "<doc>&en;</doc>";
4922   ExtTest2 test_data
4923       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4924          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4925           *   0x4c = L and 0x20 is a space
4926           */
4927          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4928 #ifdef XML_UNICODE
4929   const XML_Char *expected = XCS("\x00ff\x00feL ");
4930 #else
4931   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4932   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4933 #endif
4934   CharData storage;
4935 
4936   CharData_Init(&storage);
4937   test_data.storage = &storage;
4938   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4939   XML_SetUserData(g_parser, &test_data);
4940   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4941   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4942       == XML_STATUS_ERROR)
4943     xml_failure(g_parser);
4944   CharData_CheckXMLChars(&storage, expected);
4945 }
4946 END_TEST
4947 
START_TEST(test_ext_entity_latin1_utf16be_bom)4948 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4949   const char *text = "<!DOCTYPE doc [\n"
4950                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4951                      "]>\n"
4952                      "<doc>&en;</doc>";
4953   ExtTest2 test_data
4954       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4955          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4956           *   0x4c = L and 0x20 is a space
4957           */
4958          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4959 #ifdef XML_UNICODE
4960   const XML_Char *expected = XCS("\x00fe\x00ff L");
4961 #else
4962   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4963   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4964 #endif
4965   CharData storage;
4966 
4967   CharData_Init(&storage);
4968   test_data.storage = &storage;
4969   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4970   XML_SetUserData(g_parser, &test_data);
4971   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4972   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4973       == XML_STATUS_ERROR)
4974     xml_failure(g_parser);
4975   CharData_CheckXMLChars(&storage, expected);
4976 }
4977 END_TEST
4978 
4979 /* Parsing the full buffer rather than a byte at a time makes a
4980  * difference to the encoding scanning code, so repeat the above tests
4981  * without breaking them down by byte.
4982  */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4983 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4984   const char *text = "<!DOCTYPE doc [\n"
4985                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4986                      "]>\n"
4987                      "<doc>&en;</doc>";
4988   ExtTest2 test_data
4989       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4990          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4991           *   0x4c = L and 0x20 is a space
4992           */
4993          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4994 #ifdef XML_UNICODE
4995   const XML_Char *expected = XCS("\x00ff\x00feL ");
4996 #else
4997   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4998   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4999 #endif
5000   CharData storage;
5001 
5002   CharData_Init(&storage);
5003   test_data.storage = &storage;
5004   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5005   XML_SetUserData(g_parser, &test_data);
5006   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5007   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5008       == XML_STATUS_ERROR)
5009     xml_failure(g_parser);
5010   CharData_CheckXMLChars(&storage, expected);
5011 }
5012 END_TEST
5013 
START_TEST(test_ext_entity_latin1_utf16be_bom2)5014 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
5015   const char *text = "<!DOCTYPE doc [\n"
5016                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5017                      "]>\n"
5018                      "<doc>&en;</doc>";
5019   ExtTest2 test_data
5020       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
5021          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
5022           *   0x4c = L and 0x20 is a space
5023           */
5024          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
5025 #ifdef XML_UNICODE
5026   const XML_Char *expected = XCS("\x00fe\x00ff L");
5027 #else
5028   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
5029   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
5030 #endif
5031   CharData storage;
5032 
5033   CharData_Init(&storage);
5034   test_data.storage = &storage;
5035   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5036   XML_SetUserData(g_parser, &test_data);
5037   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5038   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5039       == XML_STATUS_ERROR)
5040     xml_failure(g_parser);
5041   CharData_CheckXMLChars(&storage, expected);
5042 }
5043 END_TEST
5044 
5045 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)5046 START_TEST(test_ext_entity_utf16_be) {
5047   const char *text = "<!DOCTYPE doc [\n"
5048                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5049                      "]>\n"
5050                      "<doc>&en;</doc>";
5051   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
5052 #ifdef XML_UNICODE
5053   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5054 #else
5055   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
5056                                  "\xe6\x94\x80"   /* U+6500 */
5057                                  "\xe2\xbc\x80"   /* U+2F00 */
5058                                  "\xe3\xb8\x80"); /* U+3E00 */
5059 #endif
5060   CharData storage;
5061 
5062   CharData_Init(&storage);
5063   test_data.storage = &storage;
5064   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5065   XML_SetUserData(g_parser, &test_data);
5066   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5067   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5068       == XML_STATUS_ERROR)
5069     xml_failure(g_parser);
5070   CharData_CheckXMLChars(&storage, expected);
5071 }
5072 END_TEST
5073 
5074 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)5075 START_TEST(test_ext_entity_utf16_le) {
5076   const char *text = "<!DOCTYPE doc [\n"
5077                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5078                      "]>\n"
5079                      "<doc>&en;</doc>";
5080   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
5081 #ifdef XML_UNICODE
5082   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
5083 #else
5084   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
5085                                  "\xe6\x94\x80"   /* U+6500 */
5086                                  "\xe2\xbc\x80"   /* U+2F00 */
5087                                  "\xe3\xb8\x80"); /* U+3E00 */
5088 #endif
5089   CharData storage;
5090 
5091   CharData_Init(&storage);
5092   test_data.storage = &storage;
5093   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5094   XML_SetUserData(g_parser, &test_data);
5095   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5096   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5097       == XML_STATUS_ERROR)
5098     xml_failure(g_parser);
5099   CharData_CheckXMLChars(&storage, expected);
5100 }
5101 END_TEST
5102 
5103 /* Test little-endian UTF-16 given no explicit encoding.
5104  * The existing default encoding (UTF-8) is assumed to hold without a
5105  * BOM to contradict it, so the entity value will in fact provoke an
5106  * error because 0x00 is not a valid XML character.  We parse the
5107  * whole buffer in one go rather than feeding it in byte by byte to
5108  * exercise different code paths in the initial scanning routines.
5109  */
START_TEST(test_ext_entity_utf16_unknown)5110 START_TEST(test_ext_entity_utf16_unknown) {
5111   const char *text = "<!DOCTYPE doc [\n"
5112                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5113                      "]>\n"
5114                      "<doc>&en;</doc>";
5115   ExtFaults2 test_data
5116       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
5117          XML_ERROR_INVALID_TOKEN};
5118 
5119   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
5120   XML_SetUserData(g_parser, &test_data);
5121   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5122                  "Invalid character should not have been accepted");
5123 }
5124 END_TEST
5125 
5126 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)5127 START_TEST(test_ext_entity_utf8_non_bom) {
5128   const char *text = "<!DOCTYPE doc [\n"
5129                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
5130                      "]>\n"
5131                      "<doc>&en;</doc>";
5132   ExtTest2 test_data
5133       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
5134          3, NULL, NULL};
5135 #ifdef XML_UNICODE
5136   const XML_Char *expected = XCS("\xfec0");
5137 #else
5138   const XML_Char *expected = XCS("\xef\xbb\x80");
5139 #endif
5140   CharData storage;
5141 
5142   CharData_Init(&storage);
5143   test_data.storage = &storage;
5144   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5145   XML_SetUserData(g_parser, &test_data);
5146   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5147   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5148       == XML_STATUS_ERROR)
5149     xml_failure(g_parser);
5150   CharData_CheckXMLChars(&storage, expected);
5151 }
5152 END_TEST
5153 
5154 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)5155 START_TEST(test_utf8_in_cdata_section) {
5156   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
5157 #ifdef XML_UNICODE
5158   const XML_Char *expected = XCS("one \x00e9 two");
5159 #else
5160   const XML_Char *expected = XCS("one \xc3\xa9 two");
5161 #endif
5162 
5163   run_character_check(text, expected);
5164 }
5165 END_TEST
5166 
5167 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)5168 START_TEST(test_utf8_in_cdata_section_2) {
5169   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
5170 #ifdef XML_UNICODE
5171   const XML_Char *expected = XCS("\x00e9]\x00e9two");
5172 #else
5173   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
5174 #endif
5175 
5176   run_character_check(text, expected);
5177 }
5178 END_TEST
5179 
START_TEST(test_utf8_in_start_tags)5180 START_TEST(test_utf8_in_start_tags) {
5181   struct test_case {
5182     bool goodName;
5183     bool goodNameStart;
5184     const char *tagName;
5185   };
5186 
5187   // The idea with the tests below is this:
5188   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
5189   // go to isNever and are hence not a concern.
5190   //
5191   // We start with a character that is a valid name character
5192   // (or even name-start character, see XML 1.0r4 spec) and then we flip
5193   // single bits at places where (1) the result leaves the UTF-8 encoding space
5194   // and (2) we stay in the same n-byte sequence family.
5195   //
5196   // The flipped bits are highlighted in angle brackets in comments,
5197   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
5198   // the most significant bit to 1 to leave UTF-8 encoding space.
5199   struct test_case cases[] = {
5200       // 1-byte UTF-8: [0xxx xxxx]
5201       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
5202       {false, false, "\xBA"}, // [<1>011 1010]
5203       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
5204       {false, false, "\xB9"}, // [<1>011 1001]
5205 
5206       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
5207       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
5208                                   // Arabic small waw U+06E5
5209       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
5210       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
5211       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
5212       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
5213                                   // combining char U+0301
5214       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
5215       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
5216       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
5217 
5218       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
5219       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
5220                                       // Devanagari Letter A U+0905
5221       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
5222       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
5223       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
5224       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
5225       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
5226       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
5227                                       // combining char U+0901
5228       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
5229       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
5230       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
5231       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
5232       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
5233   };
5234   const bool atNameStart[] = {true, false};
5235 
5236   size_t i = 0;
5237   char doc[1024];
5238   size_t failCount = 0;
5239 
5240   // we need all the bytes to be parsed, but we don't want the errors that can
5241   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
5242   if (g_reparseDeferralEnabledDefault) {
5243     return;
5244   }
5245 
5246   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
5247     size_t j = 0;
5248     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
5249       const bool expectedSuccess
5250           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
5251       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
5252                cases[i].tagName);
5253       XML_Parser parser = XML_ParserCreate(NULL);
5254 
5255       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
5256           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
5257 
5258       bool success = true;
5259       if ((status == XML_STATUS_OK) != expectedSuccess) {
5260         success = false;
5261       }
5262       if ((status == XML_STATUS_ERROR)
5263           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
5264         success = false;
5265       }
5266 
5267       if (! success) {
5268         fprintf(
5269             stderr,
5270             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
5271             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
5272             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
5273         failCount++;
5274       }
5275 
5276       XML_ParserFree(parser);
5277     }
5278   }
5279 
5280   if (failCount > 0) {
5281     fail("UTF-8 regression detected");
5282   }
5283 }
5284 END_TEST
5285 
5286 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)5287 START_TEST(test_trailing_spaces_in_elements) {
5288   const char *text = "<doc   >Hi</doc >";
5289   const XML_Char *expected = XCS("doc/doc");
5290   CharData storage;
5291 
5292   CharData_Init(&storage);
5293   XML_SetElementHandler(g_parser, record_element_start_handler,
5294                         record_element_end_handler);
5295   XML_SetUserData(g_parser, &storage);
5296   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5297       == XML_STATUS_ERROR)
5298     xml_failure(g_parser);
5299   CharData_CheckXMLChars(&storage, expected);
5300 }
5301 END_TEST
5302 
START_TEST(test_utf16_attribute)5303 START_TEST(test_utf16_attribute) {
5304   const char text[] =
5305       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
5306        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5307        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5308        */
5309       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
5310   const XML_Char *expected = XCS("a");
5311   CharData storage;
5312 
5313   CharData_Init(&storage);
5314   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5315   XML_SetUserData(g_parser, &storage);
5316   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5317       == XML_STATUS_ERROR)
5318     xml_failure(g_parser);
5319   CharData_CheckXMLChars(&storage, expected);
5320 }
5321 END_TEST
5322 
START_TEST(test_utf16_second_attr)5323 START_TEST(test_utf16_second_attr) {
5324   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
5325    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5326    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5327    */
5328   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
5329                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
5330   const XML_Char *expected = XCS("1");
5331   CharData storage;
5332 
5333   CharData_Init(&storage);
5334   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5335   XML_SetUserData(g_parser, &storage);
5336   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5337       == XML_STATUS_ERROR)
5338     xml_failure(g_parser);
5339   CharData_CheckXMLChars(&storage, expected);
5340 }
5341 END_TEST
5342 
START_TEST(test_attr_after_solidus)5343 START_TEST(test_attr_after_solidus) {
5344   const char *text = "<doc attr1='a' / attr2='b'>";
5345 
5346   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5347 }
5348 END_TEST
5349 
START_TEST(test_utf16_pe)5350 START_TEST(test_utf16_pe) {
5351   /* <!DOCTYPE doc [
5352    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5353    * %{KHO KHWAI}{CHO CHAN};
5354    * ]>
5355    * <doc></doc>
5356    *
5357    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5358    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5359    */
5360   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5361                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5362                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5363                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5364                       "\0%\x0e\x04\x0e\x08\0;\0\n"
5365                       "\0]\0>\0\n"
5366                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5367 #ifdef XML_UNICODE
5368   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5369 #else
5370   const XML_Char *expected
5371       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5372 #endif
5373   CharData storage;
5374 
5375   CharData_Init(&storage);
5376   XML_SetUserData(g_parser, &storage);
5377   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5378   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5379       == XML_STATUS_ERROR)
5380     xml_failure(g_parser);
5381   CharData_CheckXMLChars(&storage, expected);
5382 }
5383 END_TEST
5384 
5385 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)5386 START_TEST(test_bad_attr_desc_keyword) {
5387   const char *text = "<!DOCTYPE doc [\n"
5388                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5389                      "]>\n"
5390                      "<doc />";
5391 
5392   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5393                  "Bad keyword !IMPLIED not faulted");
5394 }
5395 END_TEST
5396 
5397 /* Test that an invalid attribute description keyword consisting of
5398  * UTF-16 characters with their top bytes non-zero are correctly
5399  * faulted
5400  */
START_TEST(test_bad_attr_desc_keyword_utf16)5401 START_TEST(test_bad_attr_desc_keyword_utf16) {
5402   /* <!DOCTYPE d [
5403    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5404    * ]><d/>
5405    *
5406    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5407    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5408    */
5409   const char text[]
5410       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5411         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5412         "\0#\x0e\x04\x0e\x08\0>\0\n"
5413         "\0]\0>\0<\0d\0/\0>";
5414 
5415   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5416       != XML_STATUS_ERROR)
5417     fail("Invalid UTF16 attribute keyword not faulted");
5418   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5419     xml_failure(g_parser);
5420 }
5421 END_TEST
5422 
5423 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
5424  * using prefix-encoding (see above) to trigger specific code paths
5425  */
START_TEST(test_bad_doctype)5426 START_TEST(test_bad_doctype) {
5427   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5428                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5429 
5430   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5431   expect_failure(text, XML_ERROR_SYNTAX,
5432                  "Invalid bytes in DOCTYPE not faulted");
5433 }
5434 END_TEST
5435 
START_TEST(test_bad_doctype_utf8)5436 START_TEST(test_bad_doctype_utf8) {
5437   const char *text = "<!DOCTYPE \xDB\x25"
5438                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
5439   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5440                  "Invalid UTF-8 in DOCTYPE not faulted");
5441 }
5442 END_TEST
5443 
START_TEST(test_bad_doctype_utf16)5444 START_TEST(test_bad_doctype_utf16) {
5445   const char text[] =
5446       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5447        *
5448        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5449        * (name character) but not a valid letter (name start character)
5450        */
5451       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5452       "\x06\xf2"
5453       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5454 
5455   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5456       != XML_STATUS_ERROR)
5457     fail("Invalid bytes in DOCTYPE not faulted");
5458   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5459     xml_failure(g_parser);
5460 }
5461 END_TEST
5462 
START_TEST(test_bad_doctype_plus)5463 START_TEST(test_bad_doctype_plus) {
5464   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5465                      "<1+>&foo;</1+>";
5466 
5467   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5468                  "'+' in document name not faulted");
5469 }
5470 END_TEST
5471 
START_TEST(test_bad_doctype_star)5472 START_TEST(test_bad_doctype_star) {
5473   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5474                      "<1*>&foo;</1*>";
5475 
5476   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5477                  "'*' in document name not faulted");
5478 }
5479 END_TEST
5480 
START_TEST(test_bad_doctype_query)5481 START_TEST(test_bad_doctype_query) {
5482   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5483                      "<1?>&foo;</1?>";
5484 
5485   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5486                  "'?' in document name not faulted");
5487 }
5488 END_TEST
5489 
START_TEST(test_unknown_encoding_bad_ignore)5490 START_TEST(test_unknown_encoding_bad_ignore) {
5491   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5492                      "<!DOCTYPE doc SYSTEM 'foo'>"
5493                      "<doc><e>&entity;</e></doc>";
5494   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5495                      "Invalid character not faulted", XCS("prefix-conv"),
5496                      XML_ERROR_INVALID_TOKEN};
5497 
5498   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5499   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5500   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5501   XML_SetUserData(g_parser, &fault);
5502   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5503                  "Bad IGNORE section with unknown encoding not failed");
5504 }
5505 END_TEST
5506 
START_TEST(test_entity_in_utf16_be_attr)5507 START_TEST(test_entity_in_utf16_be_attr) {
5508   const char text[] =
5509       /* <e a='&#228; &#x00E4;'></e> */
5510       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5511       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5512 #ifdef XML_UNICODE
5513   const XML_Char *expected = XCS("\x00e4 \x00e4");
5514 #else
5515   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5516 #endif
5517   CharData storage;
5518 
5519   CharData_Init(&storage);
5520   XML_SetUserData(g_parser, &storage);
5521   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5522   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5523       == XML_STATUS_ERROR)
5524     xml_failure(g_parser);
5525   CharData_CheckXMLChars(&storage, expected);
5526 }
5527 END_TEST
5528 
START_TEST(test_entity_in_utf16_le_attr)5529 START_TEST(test_entity_in_utf16_le_attr) {
5530   const char text[] =
5531       /* <e a='&#228; &#x00E4;'></e> */
5532       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5533       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5534 #ifdef XML_UNICODE
5535   const XML_Char *expected = XCS("\x00e4 \x00e4");
5536 #else
5537   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5538 #endif
5539   CharData storage;
5540 
5541   CharData_Init(&storage);
5542   XML_SetUserData(g_parser, &storage);
5543   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5544   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5545       == XML_STATUS_ERROR)
5546     xml_failure(g_parser);
5547   CharData_CheckXMLChars(&storage, expected);
5548 }
5549 END_TEST
5550 
START_TEST(test_entity_public_utf16_be)5551 START_TEST(test_entity_public_utf16_be) {
5552   const char text[] =
5553       /* <!DOCTYPE d [ */
5554       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5555       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5556       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5557       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5558       /* %e; */
5559       "\0%\0e\0;\0\n"
5560       /* ]> */
5561       "\0]\0>\0\n"
5562       /* <d>&j;</d> */
5563       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5564   ExtTest2 test_data
5565       = {/* <!ENTITY j 'baz'> */
5566          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5567   const XML_Char *expected = XCS("baz");
5568   CharData storage;
5569 
5570   CharData_Init(&storage);
5571   test_data.storage = &storage;
5572   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5573   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5574   XML_SetUserData(g_parser, &test_data);
5575   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5576   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5577       == XML_STATUS_ERROR)
5578     xml_failure(g_parser);
5579   CharData_CheckXMLChars(&storage, expected);
5580 }
5581 END_TEST
5582 
START_TEST(test_entity_public_utf16_le)5583 START_TEST(test_entity_public_utf16_le) {
5584   const char text[] =
5585       /* <!DOCTYPE d [ */
5586       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5587       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5588       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5589       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5590       /* %e; */
5591       "%\0e\0;\0\n\0"
5592       /* ]> */
5593       "]\0>\0\n\0"
5594       /* <d>&j;</d> */
5595       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5596   ExtTest2 test_data
5597       = {/* <!ENTITY j 'baz'> */
5598          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5599   const XML_Char *expected = XCS("baz");
5600   CharData storage;
5601 
5602   CharData_Init(&storage);
5603   test_data.storage = &storage;
5604   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5605   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5606   XML_SetUserData(g_parser, &test_data);
5607   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5608   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5609       == XML_STATUS_ERROR)
5610     xml_failure(g_parser);
5611   CharData_CheckXMLChars(&storage, expected);
5612 }
5613 END_TEST
5614 
5615 /* Test that a doctype with neither an internal nor external subset is
5616  * faulted
5617  */
START_TEST(test_short_doctype)5618 START_TEST(test_short_doctype) {
5619   const char *text = "<!DOCTYPE doc></doc>";
5620   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5621                  "DOCTYPE without subset not rejected");
5622 }
5623 END_TEST
5624 
START_TEST(test_short_doctype_2)5625 START_TEST(test_short_doctype_2) {
5626   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5627   expect_failure(text, XML_ERROR_SYNTAX,
5628                  "DOCTYPE without Public ID not rejected");
5629 }
5630 END_TEST
5631 
START_TEST(test_short_doctype_3)5632 START_TEST(test_short_doctype_3) {
5633   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5634   expect_failure(text, XML_ERROR_SYNTAX,
5635                  "DOCTYPE without System ID not rejected");
5636 }
5637 END_TEST
5638 
START_TEST(test_long_doctype)5639 START_TEST(test_long_doctype) {
5640   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5641   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5642 }
5643 END_TEST
5644 
START_TEST(test_bad_entity)5645 START_TEST(test_bad_entity) {
5646   const char *text = "<!DOCTYPE doc [\n"
5647                      "  <!ENTITY foo PUBLIC>\n"
5648                      "]>\n"
5649                      "<doc/>";
5650   expect_failure(text, XML_ERROR_SYNTAX,
5651                  "ENTITY without Public ID is not rejected");
5652 }
5653 END_TEST
5654 
5655 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5656 START_TEST(test_bad_entity_2) {
5657   const char *text = "<!DOCTYPE doc [\n"
5658                      "  <!ENTITY % foo bar>\n"
5659                      "]>\n"
5660                      "<doc/>";
5661   expect_failure(text, XML_ERROR_SYNTAX,
5662                  "ENTITY without Public ID is not rejected");
5663 }
5664 END_TEST
5665 
START_TEST(test_bad_entity_3)5666 START_TEST(test_bad_entity_3) {
5667   const char *text = "<!DOCTYPE doc [\n"
5668                      "  <!ENTITY % foo PUBLIC>\n"
5669                      "]>\n"
5670                      "<doc/>";
5671   expect_failure(text, XML_ERROR_SYNTAX,
5672                  "Parameter ENTITY without Public ID is not rejected");
5673 }
5674 END_TEST
5675 
START_TEST(test_bad_entity_4)5676 START_TEST(test_bad_entity_4) {
5677   const char *text = "<!DOCTYPE doc [\n"
5678                      "  <!ENTITY % foo SYSTEM>\n"
5679                      "]>\n"
5680                      "<doc/>";
5681   expect_failure(text, XML_ERROR_SYNTAX,
5682                  "Parameter ENTITY without Public ID is not rejected");
5683 }
5684 END_TEST
5685 
START_TEST(test_bad_notation)5686 START_TEST(test_bad_notation) {
5687   const char *text = "<!DOCTYPE doc [\n"
5688                      "  <!NOTATION n SYSTEM>\n"
5689                      "]>\n"
5690                      "<doc/>";
5691   expect_failure(text, XML_ERROR_SYNTAX,
5692                  "Notation without System ID is not rejected");
5693 }
5694 END_TEST
5695 
5696 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5697 START_TEST(test_default_doctype_handler) {
5698   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5699                      "  <!ENTITY foo 'bar'>\n"
5700                      "]>\n"
5701                      "<doc>&foo;</doc>";
5702   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5703                               {XCS("'test.dtd'"), 10, XML_FALSE},
5704                               {NULL, 0, XML_FALSE}};
5705   int i;
5706 
5707   XML_SetUserData(g_parser, &test_data);
5708   XML_SetDefaultHandler(g_parser, checking_default_handler);
5709   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5710   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5711       == XML_STATUS_ERROR)
5712     xml_failure(g_parser);
5713   for (i = 0; test_data[i].expected != NULL; i++)
5714     if (! test_data[i].seen)
5715       fail("Default handler not run for public !DOCTYPE");
5716 }
5717 END_TEST
5718 
START_TEST(test_empty_element_abort)5719 START_TEST(test_empty_element_abort) {
5720   const char *text = "<abort/>";
5721 
5722   XML_SetStartElementHandler(g_parser, start_element_suspender);
5723   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5724       != XML_STATUS_ERROR)
5725     fail("Expected to error on abort");
5726 }
5727 END_TEST
5728 
5729 /* Regression test for GH issue #612: unfinished m_declAttributeType
5730  * allocation in ->m_tempPool can corrupt following allocation.
5731  */
START_TEST(test_pool_integrity_with_unfinished_attr)5732 START_TEST(test_pool_integrity_with_unfinished_attr) {
5733   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5734                      "<!DOCTYPE foo [\n"
5735                      "<!ELEMENT foo ANY>\n"
5736                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5737                      "%entp;\n"
5738                      "]>\n"
5739                      "<a></a>\n";
5740   const XML_Char *expected = XCS("COMMENT");
5741   CharData storage;
5742 
5743   CharData_Init(&storage);
5744   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5745   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5746   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5747   XML_SetCommentHandler(g_parser, accumulate_comment);
5748   XML_SetUserData(g_parser, &storage);
5749   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5750       == XML_STATUS_ERROR)
5751     xml_failure(g_parser);
5752   CharData_CheckXMLChars(&storage, expected);
5753 }
5754 END_TEST
5755 
5756 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5757 START_TEST(test_entity_ref_no_elements) {
5758   const char *const text = "<!DOCTYPE foo [\n"
5759                            "<!ENTITY e1 \"test\">\n"
5760                            "]> <foo>&e1;"; // intentionally missing newline
5761 
5762   XML_Parser parser = XML_ParserCreate(NULL);
5763   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5764               == XML_STATUS_ERROR);
5765   assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5766   XML_ParserFree(parser);
5767 }
5768 END_TEST
5769 
5770 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5771 START_TEST(test_deep_nested_entity) {
5772   const size_t N_LINES = 60000;
5773   const size_t SIZE_PER_LINE = 50;
5774 
5775   char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5776   if (text == NULL) {
5777     fail("malloc failed");
5778   }
5779 
5780   char *textPtr = text;
5781 
5782   // Create the XML
5783   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5784                       "<!DOCTYPE foo [\n"
5785                       "	<!ENTITY s0 'deepText'>\n");
5786 
5787   for (size_t i = 1; i < N_LINES; ++i) {
5788     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5789                         (long unsigned)i, (long unsigned)(i - 1));
5790   }
5791 
5792   snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5793            (long unsigned)(N_LINES - 1));
5794 
5795   const XML_Char *const expected = XCS("deepText");
5796 
5797   CharData storage;
5798   CharData_Init(&storage);
5799 
5800   XML_Parser parser = XML_ParserCreate(NULL);
5801 
5802   XML_SetCharacterDataHandler(parser, accumulate_characters);
5803   XML_SetUserData(parser, &storage);
5804 
5805   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5806       == XML_STATUS_ERROR)
5807     xml_failure(parser);
5808 
5809   CharData_CheckXMLChars(&storage, expected);
5810   XML_ParserFree(parser);
5811   free(text);
5812 }
5813 END_TEST
5814 
5815 /* Tests if chained entity references in attributes
5816 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5817 START_TEST(test_deep_nested_attribute_entity) {
5818   const size_t N_LINES = 60000;
5819   const size_t SIZE_PER_LINE = 100;
5820 
5821   char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5822   if (text == NULL) {
5823     fail("malloc failed");
5824   }
5825 
5826   char *textPtr = text;
5827 
5828   // Create the XML
5829   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5830                       "<!DOCTYPE foo [\n"
5831                       "	<!ENTITY s0 'deepText'>\n");
5832 
5833   for (size_t i = 1; i < N_LINES; ++i) {
5834     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5835                         (long unsigned)i, (long unsigned)(i - 1));
5836   }
5837 
5838   snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5839            (long unsigned)(N_LINES - 1));
5840 
5841   AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5842   ElementInfo info[]
5843       = {{XCS("foo"), 1, 0, NULL, doc_info}, {NULL, 0, 0, NULL, NULL}};
5844 
5845   XML_Parser parser = XML_ParserCreate(NULL);
5846   ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5847 
5848   XML_SetStartElementHandler(parser, counting_start_element_handler);
5849   XML_SetUserData(parser, &parserPlusElemenInfo);
5850 
5851   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5852       == XML_STATUS_ERROR)
5853     xml_failure(parser);
5854 
5855   XML_ParserFree(parser);
5856   free(text);
5857 }
5858 END_TEST
5859 
START_TEST(test_deep_nested_entity_delayed_interpretation)5860 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5861   const size_t N_LINES = 70000;
5862   const size_t SIZE_PER_LINE = 100;
5863 
5864   char *const text = malloc((N_LINES + 4) * SIZE_PER_LINE);
5865   if (text == NULL) {
5866     fail("malloc failed");
5867   }
5868 
5869   char *textPtr = text;
5870 
5871   // Create the XML
5872   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5873                       "<!DOCTYPE foo [\n"
5874                       "	<!ENTITY %% s0 'deepText'>\n");
5875 
5876   for (size_t i = 1; i < N_LINES; ++i) {
5877     textPtr += snprintf(textPtr, SIZE_PER_LINE,
5878                         "  <!ENTITY %% s%lu '&#37;s%lu;'>\n", (long unsigned)i,
5879                         (long unsigned)(i - 1));
5880   }
5881 
5882   snprintf(textPtr, SIZE_PER_LINE,
5883            "  <!ENTITY %% define_g \"<!ENTITY g '&#37;s%lu;'>\">\n"
5884            "  %%define_g;\n"
5885            "]>\n"
5886            "<foo/>\n",
5887            (long unsigned)(N_LINES - 1));
5888 
5889   XML_Parser parser = XML_ParserCreate(NULL);
5890 
5891   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5892   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5893       == XML_STATUS_ERROR)
5894     xml_failure(parser);
5895 
5896   XML_ParserFree(parser);
5897   free(text);
5898 }
5899 END_TEST
5900 
START_TEST(test_nested_entity_suspend)5901 START_TEST(test_nested_entity_suspend) {
5902   const char *const text = "<!DOCTYPE a [\n"
5903                            "  <!ENTITY e1 '<!--e1-->'>\n"
5904                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5905                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5906                            "]>\n"
5907                            "<a><!--start-->&e3;<!--end--></a>";
5908   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5909       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5910   CharData storage;
5911   CharData_Init(&storage);
5912   XML_Parser parser = XML_ParserCreate(NULL);
5913   ParserPlusStorage parserPlusStorage = {parser, &storage};
5914 
5915   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5916   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5917   XML_SetUserData(parser, &parserPlusStorage);
5918 
5919   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5920   while (status == XML_STATUS_SUSPENDED) {
5921     status = XML_ResumeParser(parser);
5922   }
5923   if (status != XML_STATUS_OK)
5924     xml_failure(parser);
5925 
5926   CharData_CheckXMLChars(&storage, expected);
5927   XML_ParserFree(parser);
5928 }
5929 END_TEST
5930 
START_TEST(test_nested_entity_suspend_2)5931 START_TEST(test_nested_entity_suspend_2) {
5932   const char *const text = "<!DOCTYPE doc [\n"
5933                            "  <!ENTITY ge1 'head1Ztail1'>\n"
5934                            "  <!ENTITY ge2 'head2&ge1;tail2'>\n"
5935                            "  <!ENTITY ge3 'head3&ge2;tail3'>\n"
5936                            "]>\n"
5937                            "<doc>&ge3;</doc>";
5938   const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5939       XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5940   CharData storage;
5941   CharData_Init(&storage);
5942   XML_Parser parser = XML_ParserCreate(NULL);
5943   ParserPlusStorage parserPlusStorage = {parser, &storage};
5944 
5945   XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5946   XML_SetUserData(parser, &parserPlusStorage);
5947 
5948   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5949   while (status == XML_STATUS_SUSPENDED) {
5950     status = XML_ResumeParser(parser);
5951   }
5952   if (status != XML_STATUS_OK)
5953     xml_failure(parser);
5954 
5955   CharData_CheckXMLChars(&storage, expected);
5956   XML_ParserFree(parser);
5957 }
5958 END_TEST
5959 
5960 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5961 START_TEST(test_big_tokens_scale_linearly) {
5962   const struct {
5963     const char *pre;
5964     const char *post;
5965   } text[] = {
5966       {"<a>", "</a>"},                      // assumed good, used as baseline
5967       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5968       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5969       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5970       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5971   };
5972   const int num_cases = sizeof(text) / sizeof(text[0]);
5973   char aaaaaa[4096];
5974   const int fillsize = (int)sizeof(aaaaaa);
5975   const int fillcount = 100;
5976   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5977   const unsigned max_factor = 4;
5978   const unsigned max_scanned = max_factor * approx_bytes;
5979 
5980   memset(aaaaaa, 'a', fillsize);
5981 
5982   if (! g_reparseDeferralEnabledDefault) {
5983     return; // heuristic is disabled; we would get O(n^2) and fail.
5984   }
5985 
5986   for (int i = 0; i < num_cases; ++i) {
5987     XML_Parser parser = XML_ParserCreate(NULL);
5988     assert_true(parser != NULL);
5989     enum XML_Status status;
5990     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5991 
5992     // parse the start text
5993     g_bytesScanned = 0;
5994     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5995                                      (int)strlen(text[i].pre), XML_FALSE);
5996     if (status != XML_STATUS_OK) {
5997       xml_failure(parser);
5998     }
5999 
6000     // parse lots of 'a', failing the test early if it takes too long
6001     unsigned past_max_count = 0;
6002     for (int f = 0; f < fillcount; ++f) {
6003       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
6004       if (status != XML_STATUS_OK) {
6005         xml_failure(parser);
6006       }
6007       if (g_bytesScanned > max_scanned) {
6008         // We're not done, and have already passed the limit -- the test will
6009         // definitely fail. This block allows us to save time by failing early.
6010         const unsigned pushed
6011             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
6012         fprintf(
6013             stderr,
6014             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6015             f + 1, fillcount, pushed, g_bytesScanned,
6016             g_bytesScanned / (double)pushed, max_scanned, max_factor);
6017         past_max_count++;
6018         // We are failing, but allow a few log prints first. If we don't reach
6019         // a count of five, the test will fail after the loop instead.
6020         assert_true(past_max_count < 5);
6021       }
6022     }
6023 
6024     // parse the end text
6025     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
6026                                      (int)strlen(text[i].post), XML_TRUE);
6027     if (status != XML_STATUS_OK) {
6028       xml_failure(parser);
6029     }
6030 
6031     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
6032     if (g_bytesScanned > max_scanned) {
6033       fprintf(
6034           stderr,
6035           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
6036           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
6037           max_factor);
6038       fail("scanned too many bytes");
6039     }
6040 
6041     XML_ParserFree(parser);
6042   }
6043 }
6044 END_TEST
6045 
START_TEST(test_set_reparse_deferral)6046 START_TEST(test_set_reparse_deferral) {
6047   const char *const pre = "<d>";
6048   const char *const start = "<x attr='";
6049   const char *const end = "'></x>";
6050   char eeeeee[100];
6051   const int fillsize = (int)sizeof(eeeeee);
6052   memset(eeeeee, 'e', fillsize);
6053 
6054   for (int enabled = 0; enabled <= 1; enabled += 1) {
6055     set_subtest("deferral=%d", enabled);
6056 
6057     XML_Parser parser = XML_ParserCreate(NULL);
6058     assert_true(parser != NULL);
6059     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6060     // pre-grow the buffer to avoid reparsing due to almost-fullness
6061     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6062 
6063     CharData storage;
6064     CharData_Init(&storage);
6065     XML_SetUserData(parser, &storage);
6066     XML_SetStartElementHandler(parser, start_element_event_handler);
6067 
6068     enum XML_Status status;
6069     // parse the start text
6070     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6071     if (status != XML_STATUS_OK) {
6072       xml_failure(parser);
6073     }
6074     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6075 
6076     // ..and the start of the token
6077     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6078     if (status != XML_STATUS_OK) {
6079       xml_failure(parser);
6080     }
6081     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
6082 
6083     // try to parse lots of 'e', but the token isn't finished
6084     for (int c = 0; c < 100; ++c) {
6085       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6086       if (status != XML_STATUS_OK) {
6087         xml_failure(parser);
6088       }
6089     }
6090     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6091 
6092     // end the <x> token.
6093     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6094     if (status != XML_STATUS_OK) {
6095       xml_failure(parser);
6096     }
6097 
6098     if (enabled) {
6099       // In general, we may need to push more data to trigger a reparse attempt,
6100       // but in this test, the data is constructed to always require it.
6101       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
6102       // 2x the token length should suffice; the +1 covers the start and end.
6103       for (int c = 0; c < 101; ++c) {
6104         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6105         if (status != XML_STATUS_OK) {
6106           xml_failure(parser);
6107         }
6108       }
6109     }
6110     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
6111 
6112     XML_ParserFree(parser);
6113   }
6114 }
6115 END_TEST
6116 
6117 struct element_decl_data {
6118   XML_Parser parser;
6119   int count;
6120 };
6121 
6122 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)6123 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
6124   UNUSED_P(name);
6125   struct element_decl_data *testdata = (struct element_decl_data *)userData;
6126   testdata->count += 1;
6127   XML_FreeContentModel(testdata->parser, model);
6128 }
6129 
6130 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)6131 external_inherited_parser(XML_Parser p, const XML_Char *context,
6132                           const XML_Char *base, const XML_Char *systemId,
6133                           const XML_Char *publicId) {
6134   UNUSED_P(base);
6135   UNUSED_P(systemId);
6136   UNUSED_P(publicId);
6137   const char *const pre = "<!ELEMENT document ANY>\n";
6138   const char *const start = "<!ELEMENT ";
6139   const char *const end = " ANY>\n";
6140   const char *const post = "<!ELEMENT xyz ANY>\n";
6141   const int enabled = *(int *)XML_GetUserData(p);
6142   char eeeeee[100];
6143   char spaces[100];
6144   const int fillsize = (int)sizeof(eeeeee);
6145   assert_true(fillsize == (int)sizeof(spaces));
6146   memset(eeeeee, 'e', fillsize);
6147   memset(spaces, ' ', fillsize);
6148 
6149   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
6150   assert_true(parser != NULL);
6151   // pre-grow the buffer to avoid reparsing due to almost-fullness
6152   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
6153 
6154   struct element_decl_data testdata;
6155   testdata.parser = parser;
6156   testdata.count = 0;
6157   XML_SetUserData(parser, &testdata);
6158   XML_SetElementDeclHandler(parser, element_decl_counter);
6159 
6160   enum XML_Status status;
6161   // parse the initial text
6162   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6163   if (status != XML_STATUS_OK) {
6164     xml_failure(parser);
6165   }
6166   assert_true(testdata.count == 1); // first element should be done
6167 
6168   // ..and the start of the big token
6169   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
6170   if (status != XML_STATUS_OK) {
6171     xml_failure(parser);
6172   }
6173   assert_true(testdata.count == 1); // still just the first one
6174 
6175   // try to parse lots of 'e', but the token isn't finished
6176   for (int c = 0; c < 100; ++c) {
6177     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
6178     if (status != XML_STATUS_OK) {
6179       xml_failure(parser);
6180     }
6181   }
6182   assert_true(testdata.count == 1); // *still* just the first one
6183 
6184   // end the big token.
6185   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6186   if (status != XML_STATUS_OK) {
6187     xml_failure(parser);
6188   }
6189 
6190   if (enabled) {
6191     // In general, we may need to push more data to trigger a reparse attempt,
6192     // but in this test, the data is constructed to always require it.
6193     assert_true(testdata.count == 1); // or the test is incorrect
6194     // 2x the token length should suffice; the +1 covers the start and end.
6195     for (int c = 0; c < 101; ++c) {
6196       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
6197       if (status != XML_STATUS_OK) {
6198         xml_failure(parser);
6199       }
6200     }
6201   }
6202   assert_true(testdata.count == 2); // the big token should be done
6203 
6204   // parse the final text
6205   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
6206   if (status != XML_STATUS_OK) {
6207     xml_failure(parser);
6208   }
6209   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
6210 
6211   XML_ParserFree(parser);
6212   return XML_STATUS_OK;
6213 }
6214 
START_TEST(test_reparse_deferral_is_inherited)6215 START_TEST(test_reparse_deferral_is_inherited) {
6216   const char *const text
6217       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
6218   for (int enabled = 0; enabled <= 1; ++enabled) {
6219     set_subtest("deferral=%d", enabled);
6220 
6221     XML_Parser parser = XML_ParserCreate(NULL);
6222     assert_true(parser != NULL);
6223     XML_SetUserData(parser, (void *)&enabled);
6224     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6225     // this handler creates a sub-parser and checks that its deferral behavior
6226     // is what we expected, based on the value of `enabled` (in userdata).
6227     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
6228     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
6229     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
6230       xml_failure(parser);
6231 
6232     XML_ParserFree(parser);
6233   }
6234 }
6235 END_TEST
6236 
START_TEST(test_set_reparse_deferral_on_null_parser)6237 START_TEST(test_set_reparse_deferral_on_null_parser) {
6238   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
6239   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
6240   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
6241   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
6242   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
6243               == XML_FALSE);
6244   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
6245               == XML_FALSE);
6246 }
6247 END_TEST
6248 
START_TEST(test_set_reparse_deferral_on_the_fly)6249 START_TEST(test_set_reparse_deferral_on_the_fly) {
6250   const char *const pre = "<d><x attr='";
6251   const char *const end = "'></x>";
6252   char iiiiii[100];
6253   const int fillsize = (int)sizeof(iiiiii);
6254   memset(iiiiii, 'i', fillsize);
6255 
6256   XML_Parser parser = XML_ParserCreate(NULL);
6257   assert_true(parser != NULL);
6258   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
6259 
6260   CharData storage;
6261   CharData_Init(&storage);
6262   XML_SetUserData(parser, &storage);
6263   XML_SetStartElementHandler(parser, start_element_event_handler);
6264 
6265   enum XML_Status status;
6266   // parse the start text
6267   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
6268   if (status != XML_STATUS_OK) {
6269     xml_failure(parser);
6270   }
6271   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6272 
6273   // try to parse some 'i', but the token isn't finished
6274   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
6275   if (status != XML_STATUS_OK) {
6276     xml_failure(parser);
6277   }
6278   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6279 
6280   // end the <x> token.
6281   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6282   if (status != XML_STATUS_OK) {
6283     xml_failure(parser);
6284   }
6285   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
6286 
6287   // now change the heuristic setting and add *no* data
6288   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
6289   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
6290   status = XML_Parse(parser, "", 0, XML_FALSE);
6291   if (status != XML_STATUS_OK) {
6292     xml_failure(parser);
6293   }
6294   CharData_CheckXMLChars(&storage, XCS("dx"));
6295 
6296   XML_ParserFree(parser);
6297 }
6298 END_TEST
6299 
START_TEST(test_set_bad_reparse_option)6300 START_TEST(test_set_bad_reparse_option) {
6301   XML_Parser parser = XML_ParserCreate(NULL);
6302   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
6303   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
6304   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
6305   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
6306   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
6307   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
6308   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
6309   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
6310   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
6311   XML_ParserFree(parser);
6312 }
6313 END_TEST
6314 
6315 static size_t g_totalAlloc = 0;
6316 static size_t g_biggestAlloc = 0;
6317 
6318 static void *
counting_realloc(void * ptr,size_t size)6319 counting_realloc(void *ptr, size_t size) {
6320   g_totalAlloc += size;
6321   if (size > g_biggestAlloc) {
6322     g_biggestAlloc = size;
6323   }
6324   return realloc(ptr, size);
6325 }
6326 
6327 static void *
counting_malloc(size_t size)6328 counting_malloc(size_t size) {
6329   return counting_realloc(NULL, size);
6330 }
6331 
START_TEST(test_bypass_heuristic_when_close_to_bufsize)6332 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
6333   if (g_chunkSize != 0) {
6334     // this test does not use SINGLE_BYTES, because it depends on very precise
6335     // buffer fills.
6336     return;
6337   }
6338   if (! g_reparseDeferralEnabledDefault) {
6339     return; // this test is irrelevant when the deferral heuristic is disabled.
6340   }
6341 
6342   const int document_length = 65536;
6343   char *const document = malloc(document_length);
6344   assert_true(document != NULL);
6345 
6346   const XML_Memory_Handling_Suite memfuncs = {
6347       counting_malloc,
6348       counting_realloc,
6349       free,
6350   };
6351 
6352   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6353   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6354   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6355 
6356   for (const int *leading = leading_list; *leading >= 0; leading++) {
6357     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6358       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6359         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6360                     *fillsize);
6361         // start by checking that the test looks reasonably valid
6362         assert_true(*leading + *bigtoken <= document_length);
6363 
6364         // put 'x' everywhere; some will be overwritten by elements.
6365         memset(document, 'x', document_length);
6366         // maybe add an initial tag
6367         if (*leading) {
6368           assert_true(*leading >= 3); // or the test case is invalid
6369           memcpy(document, "<a>", 3);
6370         }
6371         // add the large token
6372         document[*leading + 0] = '<';
6373         document[*leading + 1] = 'b';
6374         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6375         document[*leading + *bigtoken - 1] = '>';
6376 
6377         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6378         const int expected_elem_total = 1 + (*leading ? 1 : 0);
6379 
6380         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6381         assert_true(parser != NULL);
6382 
6383         CharData storage;
6384         CharData_Init(&storage);
6385         XML_SetUserData(parser, &storage);
6386         XML_SetStartElementHandler(parser, start_element_event_handler);
6387 
6388         g_biggestAlloc = 0;
6389         g_totalAlloc = 0;
6390         int offset = 0;
6391         // fill data until the big token is covered (but not necessarily parsed)
6392         while (offset < *leading + *bigtoken) {
6393           assert_true(offset + *fillsize <= document_length);
6394           const enum XML_Status status
6395               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6396           if (status != XML_STATUS_OK) {
6397             xml_failure(parser);
6398           }
6399           offset += *fillsize;
6400         }
6401         // Now, check that we've had a buffer allocation that could fit the
6402         // context bytes and our big token. In order to detect a special case,
6403         // we need to know how many bytes of our big token were included in the
6404         // first push that contained _any_ bytes of the big token:
6405         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6406         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6407           // Special case: we aren't saving any context, and the whole big token
6408           // was covered by a single fill, so Expat may have parsed directly
6409           // from our input pointer, without allocating an internal buffer.
6410         } else if (*leading < XML_CONTEXT_BYTES) {
6411           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6412         } else {
6413           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6414         }
6415         // fill data until the big token is actually parsed
6416         while (storage.count < expected_elem_total) {
6417           const size_t alloc_before = g_totalAlloc;
6418           assert_true(offset + *fillsize <= document_length);
6419           const enum XML_Status status
6420               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6421           if (status != XML_STATUS_OK) {
6422             xml_failure(parser);
6423           }
6424           offset += *fillsize;
6425           // since all the bytes of the big token are already in the buffer,
6426           // the bufsize ceiling should make us finish its parsing without any
6427           // further buffer allocations. We assume that there will be no other
6428           // large allocations in this test.
6429           assert_true(g_totalAlloc - alloc_before < 4096);
6430         }
6431         // test-the-test: was our alloc even called?
6432         assert_true(g_totalAlloc > 0);
6433         // test-the-test: there shouldn't be any extra start elements
6434         assert_true(storage.count == expected_elem_total);
6435 
6436         XML_ParserFree(parser);
6437       }
6438     }
6439   }
6440   free(document);
6441 }
6442 END_TEST
6443 
START_TEST(test_varying_buffer_fills)6444 START_TEST(test_varying_buffer_fills) {
6445   const int KiB = 1024;
6446   const int MiB = 1024 * KiB;
6447   const int document_length = 16 * MiB;
6448   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6449 
6450   if (g_chunkSize != 0) {
6451     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6452   }
6453 
6454   char *const document = malloc(document_length);
6455   assert_true(document != NULL);
6456   memset(document, 'x', document_length);
6457   document[0] = '<';
6458   document[1] = 't';
6459   memset(&document[2], ' ', big - 2); // a very spacy token
6460   document[big - 1] = '>';
6461 
6462   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6463   // When reparse deferral is enabled, the final (negated) value is the expected
6464   // maximum number of bytes scanned in parse attempts.
6465   const int testcases[][30] = {
6466       {8 * MiB, -8 * MiB},
6467       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6468       // zero-size fills shouldn't trigger the bypass
6469       {4 * MiB, 0, 4 * MiB, -12 * MiB},
6470       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6471       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6472       // try to hit the buffer ceiling only once (at the end)
6473       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6474       // try to hit the same buffer ceiling multiple times
6475       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6476 
6477       // try to hit every ceiling, by always landing 1K shy of the buffer size
6478       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6479        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6480 
6481       // try to avoid every ceiling, by always landing 1B past the buffer size
6482       // the normal 2x heuristic threshold still forces parse attempts.
6483       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6484        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6485        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6486        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6487        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6488        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6489        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6490        -(10 * MiB + 682 * KiB + 7)},
6491       // try to avoid every ceiling again, except on our last fill.
6492       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6493        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6494        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6495        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6496        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6497        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6498        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6499        -(10 * MiB + 682 * KiB + 6)},
6500 
6501       // try to hit ceilings on the way multiple times
6502       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6503        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6504        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
6505        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
6506        // we'll make a parse attempt at every parse call
6507        -(45 * MiB + 12)},
6508   };
6509   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6510   for (int test_i = 0; test_i < testcount; test_i++) {
6511     const int *fillsize = testcases[test_i];
6512     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6513                 fillsize[2], fillsize[3]);
6514     XML_Parser parser = XML_ParserCreate(NULL);
6515     assert_true(parser != NULL);
6516 
6517     CharData storage;
6518     CharData_Init(&storage);
6519     XML_SetUserData(parser, &storage);
6520     XML_SetStartElementHandler(parser, start_element_event_handler);
6521 
6522     g_bytesScanned = 0;
6523     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6524     int offset = 0;
6525     while (*fillsize >= 0) {
6526       assert_true(offset + *fillsize <= document_length); // or test is invalid
6527       const enum XML_Status status
6528           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6529       if (status != XML_STATUS_OK) {
6530         xml_failure(parser);
6531       }
6532       offset += *fillsize;
6533       fillsize++;
6534       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6535       worstcase_bytes += offset; // we might've tried to parse all pending bytes
6536     }
6537     assert_true(storage.count == 1); // the big token should've been parsed
6538     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6539     if (g_reparseDeferralEnabledDefault) {
6540       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6541       const unsigned max_bytes_scanned = -*fillsize;
6542       if (g_bytesScanned > max_bytes_scanned) {
6543         fprintf(stderr,
6544                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6545                 g_bytesScanned, max_bytes_scanned);
6546         fail("too many bytes scanned in parse attempts");
6547       }
6548     }
6549     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6550 
6551     XML_ParserFree(parser);
6552   }
6553   free(document);
6554 }
6555 END_TEST
6556 
START_TEST(test_empty_ext_param_entity_in_value)6557 START_TEST(test_empty_ext_param_entity_in_value) {
6558   const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>";
6559   ExtOption options[] = {
6560       {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">"
6561                        "<!ENTITY ge \"%pe;\">"},
6562       {XCS("empty"), ""},
6563       {NULL, NULL},
6564   };
6565 
6566   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6567   XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner);
6568   XML_SetUserData(g_parser, options);
6569   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6570       == XML_STATUS_ERROR)
6571     xml_failure(g_parser);
6572 }
6573 END_TEST
6574 
6575 void
make_basic_test_case(Suite * s)6576 make_basic_test_case(Suite *s) {
6577   TCase *tc_basic = tcase_create("basic tests");
6578 
6579   suite_add_tcase(s, tc_basic);
6580   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6581 
6582   tcase_add_test(tc_basic, test_nul_byte);
6583   tcase_add_test(tc_basic, test_u0000_char);
6584   tcase_add_test(tc_basic, test_siphash_self);
6585   tcase_add_test(tc_basic, test_siphash_spec);
6586   tcase_add_test(tc_basic, test_bom_utf8);
6587   tcase_add_test(tc_basic, test_bom_utf16_be);
6588   tcase_add_test(tc_basic, test_bom_utf16_le);
6589   tcase_add_test(tc_basic, test_nobom_utf16_le);
6590   tcase_add_test(tc_basic, test_hash_collision);
6591   tcase_add_test(tc_basic, test_hash_salt_setter);
6592   tcase_add_test(tc_basic, test_illegal_utf8);
6593   tcase_add_test(tc_basic, test_utf8_auto_align);
6594   tcase_add_test(tc_basic, test_utf16);
6595   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6596   tcase_add_test(tc_basic, test_not_utf16);
6597   tcase_add_test(tc_basic, test_bad_encoding);
6598   tcase_add_test(tc_basic, test_latin1_umlauts);
6599   tcase_add_test(tc_basic, test_long_utf8_character);
6600   tcase_add_test(tc_basic, test_long_latin1_attribute);
6601   tcase_add_test(tc_basic, test_long_ascii_attribute);
6602   /* Regression test for SF bug #491986. */
6603   tcase_add_test(tc_basic, test_danish_latin1);
6604   /* Regression test for SF bug #514281. */
6605   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6606   tcase_add_test(tc_basic, test_french_charref_decimal);
6607   tcase_add_test(tc_basic, test_french_latin1);
6608   tcase_add_test(tc_basic, test_french_utf8);
6609   tcase_add_test(tc_basic, test_utf8_false_rejection);
6610   tcase_add_test(tc_basic, test_line_number_after_parse);
6611   tcase_add_test(tc_basic, test_column_number_after_parse);
6612   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6613   tcase_add_test(tc_basic, test_line_number_after_error);
6614   tcase_add_test(tc_basic, test_column_number_after_error);
6615   tcase_add_test(tc_basic, test_really_long_lines);
6616   tcase_add_test(tc_basic, test_really_long_encoded_lines);
6617   tcase_add_test(tc_basic, test_end_element_events);
6618   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6619   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6620   tcase_add_test(tc_basic, test_xmldecl_misplaced);
6621   tcase_add_test(tc_basic, test_xmldecl_invalid);
6622   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6623   tcase_add_test(tc_basic, test_xmldecl_missing_value);
6624   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6625   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6626   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6627   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6628   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6629   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6630   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6631   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6632   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6633   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6634   tcase_add_test(tc_basic,
6635                  test_wfc_undeclared_entity_with_external_subset_standalone);
6636   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6637   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6638   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6639   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6640   tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6641   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6642   tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6643   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6644   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6645   tcase_add_test(tc_basic, test_dtd_attr_handling);
6646   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6647   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6648   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6649   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6650   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6651   tcase_add_test(tc_basic, test_good_cdata_ascii);
6652   tcase_add_test(tc_basic, test_good_cdata_utf16);
6653   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6654   tcase_add_test(tc_basic, test_long_cdata_utf16);
6655   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6656   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6657   tcase_add_test(tc_basic, test_bad_cdata);
6658   tcase_add_test(tc_basic, test_bad_cdata_utf16);
6659   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6660   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6661   tcase_add_test(tc_basic, test_memory_allocation);
6662   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6663   tcase_add_test(tc_basic, test_dtd_elements);
6664   tcase_add_test(tc_basic, test_dtd_elements_nesting);
6665   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6666   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6667   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6668   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6669   tcase_add_test__ifdef_xml_dtd(tc_basic,
6670                                 test_foreign_dtd_without_external_subset);
6671   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6672   tcase_add_test(tc_basic, test_set_base);
6673   tcase_add_test(tc_basic, test_attributes);
6674   tcase_add_test(tc_basic, test_duplicate_cdata_attribute);
6675   tcase_add_test(tc_basic, test_duplicate_id_attribute_1);
6676   tcase_add_test(tc_basic, test_duplicate_id_attribute_2);
6677   tcase_add_test(tc_basic, test_duplicate_cdata_attribute_multiple_attlistdecl);
6678   tcase_add_test(tc_basic,
6679                  test_duplicate_cdata_attribute_multiple_attlistdecl_2);
6680   tcase_add_test(tc_basic,
6681                  test_duplicate_cdata_attribute_multiple_attlistdecl_3);
6682   tcase_add_test(tc_basic, test_duplicate_id_attribute_multiple_attlistdecl);
6683   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6684   tcase_add_test(tc_basic, test_resume_invalid_parse);
6685   tcase_add_test(tc_basic, test_resume_resuspended);
6686   tcase_add_test(tc_basic, test_cdata_default);
6687   tcase_add_test(tc_basic, test_subordinate_reset);
6688   tcase_add_test(tc_basic, test_subordinate_suspend);
6689   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6690   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6691   tcase_add_test__ifdef_xml_dtd(tc_basic,
6692                                 test_ext_entity_invalid_suspended_parse);
6693   tcase_add_test(tc_basic, test_explicit_encoding);
6694   tcase_add_test(tc_basic, test_trailing_cr);
6695   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6696   tcase_add_test(tc_basic, test_trailing_rsqb);
6697   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6698   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6699   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6700   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6701   tcase_add_test(tc_basic, test_empty_parse);
6702   tcase_add_test(tc_basic, test_negative_len_parse);
6703   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6704   tcase_add_test(tc_basic, test_get_buffer_1);
6705   tcase_add_test(tc_basic, test_get_buffer_2);
6706 #if XML_CONTEXT_BYTES > 0
6707   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6708 #endif
6709   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6710   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6711   tcase_add_test(tc_basic, test_byte_info_at_end);
6712   tcase_add_test(tc_basic, test_byte_info_at_error);
6713   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6714   tcase_add_test(tc_basic, test_predefined_entities);
6715   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6716   tcase_add_test(tc_basic, test_not_predefined_entities);
6717   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6718   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6719   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6720   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6721   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6722   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6723   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6724   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6725   tcase_add_test(tc_basic, test_bad_public_doctype);
6726   tcase_add_test(tc_basic, test_attribute_enum_value);
6727   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6728   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6729   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6730   tcase_add_test(tc_basic, test_nested_groups);
6731   tcase_add_test(tc_basic, test_group_choice);
6732   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6733   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6734   tcase_add_test__ifdef_xml_dtd(tc_basic,
6735                                 test_recursive_external_parameter_entity);
6736   tcase_add_test__ifdef_xml_dtd(tc_basic,
6737                                 test_recursive_external_parameter_entity_2);
6738   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6739   tcase_add_test(tc_basic, test_suspend_xdecl);
6740   tcase_add_test(tc_basic, test_abort_epilog);
6741   tcase_add_test(tc_basic, test_abort_epilog_2);
6742   tcase_add_test(tc_basic, test_suspend_epilog);
6743   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6744   tcase_add_test(tc_basic, test_unfinished_epilog);
6745   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6746   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6747   tcase_add_test__ifdef_xml_dtd(tc_basic,
6748                                 test_suspend_resume_internal_entity_issue_629);
6749   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6750   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6751   tcase_add_test(tc_basic, test_restart_on_error);
6752   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6753   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6754   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6755   tcase_add_test(tc_basic, test_standalone_internal_entity);
6756   tcase_add_test(tc_basic, test_skipped_external_entity);
6757   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6758   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6759   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6760   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6761   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6762   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6763   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6764   tcase_add_test(tc_basic, test_pi_handled_in_default);
6765   tcase_add_test(tc_basic, test_comment_handled_in_default);
6766   tcase_add_test(tc_basic, test_pi_yml);
6767   tcase_add_test(tc_basic, test_pi_xnl);
6768   tcase_add_test(tc_basic, test_pi_xmm);
6769   tcase_add_test(tc_basic, test_utf16_pi);
6770   tcase_add_test(tc_basic, test_utf16_be_pi);
6771   tcase_add_test(tc_basic, test_utf16_be_comment);
6772   tcase_add_test(tc_basic, test_utf16_le_comment);
6773   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6774   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6775   tcase_add_test(tc_basic, test_unknown_encoding_success);
6776   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6777   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6778   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6779   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6780   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6781   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6782   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6783   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6784   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6785   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6786   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6787   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6788   tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary);
6789   tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary);
6790   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6791   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6792   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6793   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6794   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6795   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6796   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6797   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6798   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6799   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6800   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6801   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6802   tcase_add_test(tc_basic, test_utf16_attribute);
6803   tcase_add_test(tc_basic, test_utf16_second_attr);
6804   tcase_add_test(tc_basic, test_attr_after_solidus);
6805   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6806   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6807   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6808   tcase_add_test(tc_basic, test_bad_doctype);
6809   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6810   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6811   tcase_add_test(tc_basic, test_bad_doctype_plus);
6812   tcase_add_test(tc_basic, test_bad_doctype_star);
6813   tcase_add_test(tc_basic, test_bad_doctype_query);
6814   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6815   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6816   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6817   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6818   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6819   tcase_add_test(tc_basic, test_short_doctype);
6820   tcase_add_test(tc_basic, test_short_doctype_2);
6821   tcase_add_test(tc_basic, test_short_doctype_3);
6822   tcase_add_test(tc_basic, test_long_doctype);
6823   tcase_add_test(tc_basic, test_bad_entity);
6824   tcase_add_test(tc_basic, test_bad_entity_2);
6825   tcase_add_test(tc_basic, test_bad_entity_3);
6826   tcase_add_test(tc_basic, test_bad_entity_4);
6827   tcase_add_test(tc_basic, test_bad_notation);
6828   tcase_add_test(tc_basic, test_default_doctype_handler);
6829   tcase_add_test(tc_basic, test_empty_element_abort);
6830   tcase_add_test__ifdef_xml_dtd(tc_basic,
6831                                 test_pool_integrity_with_unfinished_attr);
6832   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value);
6833   tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6834   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6835   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6836   tcase_add_test__if_xml_ge(tc_basic,
6837                             test_deep_nested_entity_delayed_interpretation);
6838   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6839   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6840   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6841   tcase_add_test(tc_basic, test_set_reparse_deferral);
6842   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6843   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6844   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6845   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6846   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6847   tcase_add_test(tc_basic, test_varying_buffer_fills);
6848 }
6849