xref: /freebsd/contrib/expat/tests/basic_tests.c (revision ffd294a1f4c23863c3e515d16dce31d5509bcb01)
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #if defined(NDEBUG)
45 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47 
48 #include <assert.h>
49 
50 #include <stdio.h>
51 #include <string.h>
52 #include <time.h>
53 
54 #if ! defined(__cplusplus)
55 #  include <stdbool.h>
56 #endif
57 
58 #include "expat_config.h"
59 
60 #include "expat.h"
61 #include "internal.h"
62 #include "minicheck.h"
63 #include "structdata.h"
64 #include "common.h"
65 #include "dummy.h"
66 #include "handlers.h"
67 #include "siphash.h"
68 #include "basic_tests.h"
69 
70 static void
basic_setup(void)71 basic_setup(void) {
72   g_parser = XML_ParserCreate(NULL);
73   if (g_parser == NULL)
74     fail("Parser not created.");
75 }
76 
77 /*
78  * Character & encoding tests.
79  */
80 
START_TEST(test_nul_byte)81 START_TEST(test_nul_byte) {
82   char text[] = "<doc>\0</doc>";
83 
84   /* test that a NUL byte (in US-ASCII data) is an error */
85   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86       == XML_STATUS_OK)
87     fail("Parser did not report error on NUL-byte.");
88   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89     xml_failure(g_parser);
90 }
91 END_TEST
92 
START_TEST(test_u0000_char)93 START_TEST(test_u0000_char) {
94   /* test that a NUL byte (in US-ASCII data) is an error */
95   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96                  "Parser did not report error on NUL-byte.");
97 }
98 END_TEST
99 
START_TEST(test_siphash_self)100 START_TEST(test_siphash_self) {
101   if (! sip24_valid())
102     fail("SipHash self-test failed");
103 }
104 END_TEST
105 
START_TEST(test_siphash_spec)106 START_TEST(test_siphash_spec) {
107   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109                          "\x0a\x0b\x0c\x0d\x0e";
110   const size_t len = sizeof(message) - 1;
111   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112   struct siphash state;
113   struct sipkey key;
114 
115   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116                   "\x0a\x0b\x0c\x0d\x0e\x0f");
117   sip24_init(&state, &key);
118 
119   /* Cover spread across calls */
120   sip24_update(&state, message, 4);
121   sip24_update(&state, message + 4, len - 4);
122 
123   /* Cover null length */
124   sip24_update(&state, message, 0);
125 
126   if (sip24_final(&state) != expected)
127     fail("sip24_final failed spec test\n");
128 
129   /* Cover wrapper */
130   if (siphash24(message, len, &key) != expected)
131     fail("siphash24 failed spec test\n");
132 }
133 END_TEST
134 
START_TEST(test_bom_utf8)135 START_TEST(test_bom_utf8) {
136   /* This test is really just making sure we don't core on a UTF-8 BOM. */
137   const char *text = "\357\273\277<e/>";
138 
139   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140       == XML_STATUS_ERROR)
141     xml_failure(g_parser);
142 }
143 END_TEST
144 
START_TEST(test_bom_utf16_be)145 START_TEST(test_bom_utf16_be) {
146   char text[] = "\376\377\0<\0e\0/\0>";
147 
148   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149       == XML_STATUS_ERROR)
150     xml_failure(g_parser);
151 }
152 END_TEST
153 
START_TEST(test_bom_utf16_le)154 START_TEST(test_bom_utf16_le) {
155   char text[] = "\377\376<\0e\0/\0>\0";
156 
157   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158       == XML_STATUS_ERROR)
159     xml_failure(g_parser);
160 }
161 END_TEST
162 
START_TEST(test_nobom_utf16_le)163 START_TEST(test_nobom_utf16_le) {
164   char text[] = " \0<\0e\0/\0>\0";
165 
166   if (g_chunkSize == 1) {
167     // TODO: with just the first byte, we can't tell the difference between
168     // UTF-16-LE and UTF-8. Avoid the failure for now.
169     return;
170   }
171 
172   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173       == XML_STATUS_ERROR)
174     xml_failure(g_parser);
175 }
176 END_TEST
177 
START_TEST(test_hash_collision)178 START_TEST(test_hash_collision) {
179   /* For full coverage of the lookup routine, we need to ensure a
180    * hash collision even though we can only tell that we have one
181    * through breakpoint debugging or coverage statistics.  The
182    * following will cause a hash collision on machines with a 64-bit
183    * long type; others will have to experiment.  The full coverage
184    * tests invoked from qa.sh usually provide a hash collision, but
185    * not always.  This is an attempt to provide insurance.
186    */
187 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188   const char *text
189       = "<doc>\n"
190         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195         "<d8>This triggers the table growth and collides with b2</d8>\n"
196         "</doc>\n";
197 
198   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200       == XML_STATUS_ERROR)
201     xml_failure(g_parser);
202 }
203 END_TEST
204 #undef COLLIDING_HASH_SALT
205 
206 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)207 START_TEST(test_danish_latin1) {
208   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210 #ifdef XML_UNICODE
211   const XML_Char *expected
212       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213 #else
214   const XML_Char *expected
215       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216 #endif
217   run_character_check(text, expected);
218 }
219 END_TEST
220 
221 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)222 START_TEST(test_french_charref_hexidecimal) {
223   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225 #ifdef XML_UNICODE
226   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227 #else
228   const XML_Char *expected
229       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230 #endif
231   run_character_check(text, expected);
232 }
233 END_TEST
234 
START_TEST(test_french_charref_decimal)235 START_TEST(test_french_charref_decimal) {
236   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238 #ifdef XML_UNICODE
239   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240 #else
241   const XML_Char *expected
242       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243 #endif
244   run_character_check(text, expected);
245 }
246 END_TEST
247 
START_TEST(test_french_latin1)248 START_TEST(test_french_latin1) {
249   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251 #ifdef XML_UNICODE
252   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254   const XML_Char *expected
255       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257   run_character_check(text, expected);
258 }
259 END_TEST
260 
START_TEST(test_french_utf8)261 START_TEST(test_french_utf8) {
262   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263                      "<doc>\xC3\xA9</doc>";
264 #ifdef XML_UNICODE
265   const XML_Char *expected = XCS("\x00e9");
266 #else
267   const XML_Char *expected = XCS("\xC3\xA9");
268 #endif
269   run_character_check(text, expected);
270 }
271 END_TEST
272 
273 /* Regression test for SF bug #600479.
274    XXX There should be a test that exercises all legal XML Unicode
275    characters as PCDATA and attribute value content, and XML Name
276    characters as part of element and attribute names.
277 */
START_TEST(test_utf8_false_rejection)278 START_TEST(test_utf8_false_rejection) {
279   const char *text = "<doc>\xEF\xBA\xBF</doc>";
280 #ifdef XML_UNICODE
281   const XML_Char *expected = XCS("\xfebf");
282 #else
283   const XML_Char *expected = XCS("\xEF\xBA\xBF");
284 #endif
285   run_character_check(text, expected);
286 }
287 END_TEST
288 
289 /* Regression test for SF bug #477667.
290    This test assures that any 8-bit character followed by a 7-bit
291    character will not be mistakenly interpreted as a valid UTF-8
292    sequence.
293 */
START_TEST(test_illegal_utf8)294 START_TEST(test_illegal_utf8) {
295   char text[100];
296   int i;
297 
298   for (i = 128; i <= 255; ++i) {
299     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301         == XML_STATUS_OK) {
302       snprintf(text, sizeof(text),
303                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304                i);
305       fail(text);
306     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307       xml_failure(g_parser);
308     /* Reset the parser since we use the same parser repeatedly. */
309     XML_ParserReset(g_parser, NULL);
310   }
311 }
312 END_TEST
313 
314 /* Examples, not masks: */
315 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320 
START_TEST(test_utf8_auto_align)321 START_TEST(test_utf8_auto_align) {
322   struct TestCase {
323     ptrdiff_t expectedMovementInChars;
324     const char *input;
325   };
326 
327   struct TestCase cases[] = {
328       {00, ""},
329 
330       {00, UTF8_LEAD_1},
331 
332       {-1, UTF8_LEAD_2},
333       {00, UTF8_LEAD_2 UTF8_FOLLOW},
334 
335       {-1, UTF8_LEAD_3},
336       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338 
339       {-1, UTF8_LEAD_4},
340       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343   };
344 
345   size_t i = 0;
346   bool success = true;
347   for (; i < sizeof(cases) / sizeof(*cases); i++) {
348     const char *fromLim = cases[i].input + strlen(cases[i].input);
349     const char *const fromLimInitially = fromLim;
350     ptrdiff_t actualMovementInChars;
351 
352     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353 
354     actualMovementInChars = (fromLim - fromLimInitially);
355     if (actualMovementInChars != cases[i].expectedMovementInChars) {
356       size_t j = 0;
357       success = false;
358       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359              ", actually moved by %2d chars: \"",
360              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361              (int)actualMovementInChars);
362       for (; j < strlen(cases[i].input); j++) {
363         printf("\\x%02x", (unsigned char)cases[i].input[j]);
364       }
365       printf("\"\n");
366     }
367   }
368 
369   if (! success) {
370     fail("UTF-8 auto-alignment is not bullet-proof\n");
371   }
372 }
373 END_TEST
374 
START_TEST(test_utf16)375 START_TEST(test_utf16) {
376   /* <?xml version="1.0" encoding="UTF-16"?>
377    *  <doc a='123'>some {A} text</doc>
378    *
379    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380    */
381   char text[]
382       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385         "\000'\000?\000>\000\n"
386         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388         "<\000/\000d\000o\000c\000>";
389 #ifdef XML_UNICODE
390   const XML_Char *expected = XCS("some \xff21 text");
391 #else
392   const XML_Char *expected = XCS("some \357\274\241 text");
393 #endif
394   CharData storage;
395 
396   CharData_Init(&storage);
397   XML_SetUserData(g_parser, &storage);
398   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400       == XML_STATUS_ERROR)
401     xml_failure(g_parser);
402   CharData_CheckXMLChars(&storage, expected);
403 }
404 END_TEST
405 
START_TEST(test_utf16_le_epilog_newline)406 START_TEST(test_utf16_le_epilog_newline) {
407   unsigned int first_chunk_bytes = 17;
408   char text[] = "\xFF\xFE"                  /* BOM */
409                 "<\000e\000/\000>\000"      /* document element */
410                 "\r\000\n\000\r\000\n\000"; /* epilog */
411 
412   if (first_chunk_bytes >= sizeof(text) - 1)
413     fail("bad value of first_chunk_bytes");
414   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415       == XML_STATUS_ERROR)
416     xml_failure(g_parser);
417   else {
418     enum XML_Status rc;
419     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420                                  sizeof(text) - first_chunk_bytes - 1,
421                                  XML_TRUE);
422     if (rc == XML_STATUS_ERROR)
423       xml_failure(g_parser);
424   }
425 }
426 END_TEST
427 
428 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)429 START_TEST(test_not_utf16) {
430   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431                      "<doc>Hi</doc>";
432 
433   /* Use a handler to provoke the appropriate code paths */
434   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436                  "UTF-16 declared in UTF-8 not faulted");
437 }
438 END_TEST
439 
440 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)441 START_TEST(test_bad_encoding) {
442   const char *text = "<doc>Hi</doc>";
443 
444   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445     fail("XML_SetEncoding failed");
446   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447                  "Unknown encoding not faulted");
448 }
449 END_TEST
450 
451 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)452 START_TEST(test_latin1_umlauts) {
453   const char *text
454       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457 #ifdef XML_UNICODE
458   /* Expected results in UTF-16 */
459   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461 #else
462   /* Expected results in UTF-8 */
463   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465 #endif
466 
467   run_character_check(text, expected);
468   XML_ParserReset(g_parser, NULL);
469   run_attribute_check(text, expected);
470   /* Repeat with a default handler */
471   XML_ParserReset(g_parser, NULL);
472   XML_SetDefaultHandler(g_parser, dummy_default_handler);
473   run_character_check(text, expected);
474   XML_ParserReset(g_parser, NULL);
475   XML_SetDefaultHandler(g_parser, dummy_default_handler);
476   run_attribute_check(text, expected);
477 }
478 END_TEST
479 
480 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)481 START_TEST(test_long_utf8_character) {
482   const char *text
483       = "<?xml version='1.0' encoding='utf-8'?>\n"
484         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485         "<do\xf0\x90\x80\x80/>";
486   expect_failure(text, XML_ERROR_INVALID_TOKEN,
487                  "4-byte UTF-8 character in element name not faulted");
488 }
489 END_TEST
490 
491 /* Test that a long latin-1 attribute (too long to convert in one go)
492  * is correctly converted
493  */
START_TEST(test_long_latin1_attribute)494 START_TEST(test_long_latin1_attribute) {
495   const char *text
496       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497         "<doc att='"
498         /* 64 characters per line */
499         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515         /* Last character splits across a buffer boundary */
516         "\xe4'>\n</doc>";
517 
518   const XML_Char *expected =
519       /* 64 characters per line */
520       /* clang-format off */
521         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537   /* clang-format on */
538 #ifdef XML_UNICODE
539                                                   XCS("\x00e4");
540 #else
541                                                   XCS("\xc3\xa4");
542 #endif
543 
544   run_attribute_check(text, expected);
545 }
546 END_TEST
547 
548 /* Test that a long ASCII attribute (too long to convert in one go)
549  * is correctly converted
550  */
START_TEST(test_long_ascii_attribute)551 START_TEST(test_long_ascii_attribute) {
552   const char *text
553       = "<?xml version='1.0' encoding='us-ascii'?>\n"
554         "<doc att='"
555         /* 64 characters per line */
556         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572         "01234'>\n</doc>";
573   const XML_Char *expected =
574       /* 64 characters per line */
575       /* clang-format off */
576         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592         XCS("01234");
593   /* clang-format on */
594 
595   run_attribute_check(text, expected);
596 }
597 END_TEST
598 
599 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)600 START_TEST(test_line_number_after_parse) {
601   const char *text = "<tag>\n"
602                      "\n"
603                      "\n</tag>";
604   XML_Size lineno;
605 
606   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607       == XML_STATUS_ERROR)
608     xml_failure(g_parser);
609   lineno = XML_GetCurrentLineNumber(g_parser);
610   if (lineno != 4) {
611     char buffer[100];
612     snprintf(buffer, sizeof(buffer),
613              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614     fail(buffer);
615   }
616 }
617 END_TEST
618 
619 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)620 START_TEST(test_column_number_after_parse) {
621   const char *text = "<tag></tag>";
622   XML_Size colno;
623 
624   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625       == XML_STATUS_ERROR)
626     xml_failure(g_parser);
627   colno = XML_GetCurrentColumnNumber(g_parser);
628   if (colno != 11) {
629     char buffer[100];
630     snprintf(buffer, sizeof(buffer),
631              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632     fail(buffer);
633   }
634 }
635 END_TEST
636 
637 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)638 START_TEST(test_line_and_column_numbers_inside_handlers) {
639   const char *text = "<a>\n"      /* Unix end-of-line */
640                      "  <b>\r\n"  /* Windows end-of-line */
641                      "    <c/>\r" /* Mac OS end-of-line */
642                      "  </b>\n"
643                      "  <d>\n"
644                      "    <f/>\n"
645                      "  </d>\n"
646                      "</a>";
647   const StructDataEntry expected[]
648       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654   StructData storage;
655 
656   StructData_Init(&storage);
657   XML_SetUserData(g_parser, &storage);
658   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661       == XML_STATUS_ERROR)
662     xml_failure(g_parser);
663 
664   StructData_CheckItems(&storage, expected, expected_count);
665   StructData_Dispose(&storage);
666 }
667 END_TEST
668 
669 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)670 START_TEST(test_line_number_after_error) {
671   const char *text = "<a>\n"
672                      "  <b>\n"
673                      "  </a>"; /* missing </b> */
674   XML_Size lineno;
675   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676       != XML_STATUS_ERROR)
677     fail("Expected a parse error");
678 
679   lineno = XML_GetCurrentLineNumber(g_parser);
680   if (lineno != 3) {
681     char buffer[100];
682     snprintf(buffer, sizeof(buffer),
683              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684     fail(buffer);
685   }
686 }
687 END_TEST
688 
689 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)690 START_TEST(test_column_number_after_error) {
691   const char *text = "<a>\n"
692                      "  <b>\n"
693                      "  </a>"; /* missing </b> */
694   XML_Size colno;
695   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696       != XML_STATUS_ERROR)
697     fail("Expected a parse error");
698 
699   colno = XML_GetCurrentColumnNumber(g_parser);
700   if (colno != 4) {
701     char buffer[100];
702     snprintf(buffer, sizeof(buffer),
703              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704     fail(buffer);
705   }
706 }
707 END_TEST
708 
709 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)710 START_TEST(test_really_long_lines) {
711   /* This parses an input line longer than INIT_DATA_BUF_SIZE
712      characters long (defined to be 1024 in xmlparse.c).  We take a
713      really cheesy approach to building the input buffer, because
714      this avoids writing bugs in buffer-filling code.
715   */
716   const char *text
717       = "<e>"
718         /* 64 chars */
719         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720         /* until we have at least 1024 characters on the line: */
721         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737         "</e>";
738   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739       == XML_STATUS_ERROR)
740     xml_failure(g_parser);
741 }
742 END_TEST
743 
744 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)745 START_TEST(test_really_long_encoded_lines) {
746   /* As above, except that we want to provoke an output buffer
747    * overflow with a non-trivial encoding.  For this we need to pass
748    * the whole cdata in one go, not byte-by-byte.
749    */
750   void *buffer;
751   const char *text
752       = "<?xml version='1.0' encoding='iso-8859-1'?>"
753         "<e>"
754         /* 64 chars */
755         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756         /* until we have at least 1024 characters on the line: */
757         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773         "</e>";
774   int parse_len = (int)strlen(text);
775 
776   /* Need a cdata handler to provoke the code path we want to test */
777   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778   buffer = XML_GetBuffer(g_parser, parse_len);
779   if (buffer == NULL)
780     fail("Could not allocate parse buffer");
781   assert(buffer != NULL);
782   memcpy(buffer, text, parse_len);
783   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784     xml_failure(g_parser);
785 }
786 END_TEST
787 
788 /*
789  * Element event tests.
790  */
791 
START_TEST(test_end_element_events)792 START_TEST(test_end_element_events) {
793   const char *text = "<a><b><c/></b><d><f/></d></a>";
794   const XML_Char *expected = XCS("/c/b/f/d/a");
795   CharData storage;
796 
797   CharData_Init(&storage);
798   XML_SetUserData(g_parser, &storage);
799   XML_SetEndElementHandler(g_parser, end_element_event_handler);
800   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801       == XML_STATUS_ERROR)
802     xml_failure(g_parser);
803   CharData_CheckXMLChars(&storage, expected);
804 }
805 END_TEST
806 
807 /*
808  * Attribute tests.
809  */
810 
811 /* Helper used by the following tests; this checks any "attr" and "refs"
812    attributes to make sure whitespace has been normalized.
813 
814    Return true if whitespace has been normalized in a string, using
815    the rules for attribute value normalization.  The 'is_cdata' flag
816    is needed since CDATA attributes don't need to have multiple
817    whitespace characters collapsed to a single space, while other
818    attribute data types do.  (Section 3.3.3 of the recommendation.)
819 */
820 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)821 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822   int blanks = 0;
823   int at_start = 1;
824   while (*s) {
825     if (*s == XCS(' '))
826       ++blanks;
827     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828       return 0;
829     else {
830       if (at_start) {
831         at_start = 0;
832         if (blanks && ! is_cdata)
833           /* illegal leading blanks */
834           return 0;
835       } else if (blanks > 1 && ! is_cdata)
836         return 0;
837       blanks = 0;
838     }
839     ++s;
840   }
841   if (blanks && ! is_cdata)
842     return 0;
843   return 1;
844 }
845 
846 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)847 START_TEST(test_helper_is_whitespace_normalized) {
848   assert(is_whitespace_normalized(XCS("abc"), 0));
849   assert(is_whitespace_normalized(XCS("abc"), 1));
850   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858   assert(! is_whitespace_normalized(XCS(" "), 0));
859   assert(is_whitespace_normalized(XCS(" "), 1));
860   assert(! is_whitespace_normalized(XCS("\t"), 0));
861   assert(! is_whitespace_normalized(XCS("\t"), 1));
862   assert(! is_whitespace_normalized(XCS("\n"), 0));
863   assert(! is_whitespace_normalized(XCS("\n"), 1));
864   assert(! is_whitespace_normalized(XCS("\r"), 0));
865   assert(! is_whitespace_normalized(XCS("\r"), 1));
866   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867 }
868 END_TEST
869 
870 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)871 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872                                           const XML_Char **atts) {
873   int i;
874   UNUSED_P(userData);
875   UNUSED_P(name);
876   for (i = 0; atts[i] != NULL; i += 2) {
877     const XML_Char *attrname = atts[i];
878     const XML_Char *value = atts[i + 1];
879     if (xcstrcmp(XCS("attr"), attrname) == 0
880         || xcstrcmp(XCS("ents"), attrname) == 0
881         || xcstrcmp(XCS("refs"), attrname) == 0) {
882       if (! is_whitespace_normalized(value, 0)) {
883         char buffer[256];
884         snprintf(buffer, sizeof(buffer),
885                  "attribute value not normalized: %" XML_FMT_STR
886                  "='%" XML_FMT_STR "'",
887                  attrname, value);
888         fail(buffer);
889       }
890     }
891   }
892 }
893 
START_TEST(test_attr_whitespace_normalization)894 START_TEST(test_attr_whitespace_normalization) {
895   const char *text
896       = "<!DOCTYPE doc [\n"
897         "  <!ATTLIST doc\n"
898         "            attr NMTOKENS #REQUIRED\n"
899         "            ents ENTITIES #REQUIRED\n"
900         "            refs IDREFS   #REQUIRED>\n"
901         "]>\n"
902         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903         "     ents=' ent-1   \t\r\n"
904         "            ent-2  ' >\n"
905         "  <e id='id-1'/>\n"
906         "  <e id='id-2'/>\n"
907         "</doc>";
908 
909   XML_SetStartElementHandler(g_parser,
910                              check_attr_contains_normalized_whitespace);
911   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912       == XML_STATUS_ERROR)
913     xml_failure(g_parser);
914 }
915 END_TEST
916 
917 /*
918  * XML declaration tests.
919  */
920 
START_TEST(test_xmldecl_misplaced)921 START_TEST(test_xmldecl_misplaced) {
922   expect_failure("\n"
923                  "<?xml version='1.0'?>\n"
924                  "<a/>",
925                  XML_ERROR_MISPLACED_XML_PI,
926                  "failed to report misplaced XML declaration");
927 }
928 END_TEST
929 
START_TEST(test_xmldecl_invalid)930 START_TEST(test_xmldecl_invalid) {
931   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932                  "Failed to report invalid XML declaration");
933 }
934 END_TEST
935 
START_TEST(test_xmldecl_missing_attr)936 START_TEST(test_xmldecl_missing_attr) {
937   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938                  "Failed to report missing XML declaration attribute");
939 }
940 END_TEST
941 
START_TEST(test_xmldecl_missing_value)942 START_TEST(test_xmldecl_missing_value) {
943   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944                  "<doc/>",
945                  XML_ERROR_XML_DECL,
946                  "Failed to report missing attribute value");
947 }
948 END_TEST
949 
950 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)951 START_TEST(test_unknown_encoding_internal_entity) {
952   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954                      "<test a='&foo;'/>";
955 
956   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958       == XML_STATUS_ERROR)
959     xml_failure(g_parser);
960 }
961 END_TEST
962 
963 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)964 START_TEST(test_unrecognised_encoding_internal_entity) {
965   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967                      "<test a='&foo;'/>";
968 
969   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971       != XML_STATUS_ERROR)
972     fail("Unrecognised encoding not rejected");
973 }
974 END_TEST
975 
976 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)977 START_TEST(test_ext_entity_set_encoding) {
978   const char *text = "<!DOCTYPE doc [\n"
979                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980                      "]>\n"
981                      "<doc>&en;</doc>";
982   ExtTest test_data
983       = {/* This text says it's an unsupported encoding, but it's really
984             UTF-8, which we tell Expat using XML_SetEncoding().
985          */
986          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987 #ifdef XML_UNICODE
988   const XML_Char *expected = XCS("\x00e9");
989 #else
990   const XML_Char *expected = XCS("\xc3\xa9");
991 #endif
992 
993   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994   run_ext_character_check(text, &test_data, expected);
995 }
996 END_TEST
997 
998 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)999 START_TEST(test_ext_entity_no_handler) {
1000   const char *text = "<!DOCTYPE doc [\n"
1001                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002                      "]>\n"
1003                      "<doc>&en;</doc>";
1004 
1005   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006   run_character_check(text, XCS(""));
1007 }
1008 END_TEST
1009 
1010 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1011 START_TEST(test_ext_entity_set_bom) {
1012   const char *text = "<!DOCTYPE doc [\n"
1013                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014                      "]>\n"
1015                      "<doc>&en;</doc>";
1016   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017                        "<?xml encoding='iso-8859-3'?>"
1018                        "\xC3\xA9",
1019                        XCS("utf-8"), NULL};
1020 #ifdef XML_UNICODE
1021   const XML_Char *expected = XCS("\x00e9");
1022 #else
1023   const XML_Char *expected = XCS("\xc3\xa9");
1024 #endif
1025 
1026   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027   run_ext_character_check(text, &test_data, expected);
1028 }
1029 END_TEST
1030 
1031 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1032 START_TEST(test_ext_entity_bad_encoding) {
1033   const char *text = "<!DOCTYPE doc [\n"
1034                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035                      "]>\n"
1036                      "<doc>&en;</doc>";
1037   ExtFaults fault
1038       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040 
1041   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042   XML_SetUserData(g_parser, &fault);
1043   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044                  "Bad encoding should not have been accepted");
1045 }
1046 END_TEST
1047 
1048 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1049 START_TEST(test_ext_entity_bad_encoding_2) {
1050   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052                      "<doc>&entity;</doc>";
1053   ExtFaults fault
1054       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056 
1057   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059   XML_SetUserData(g_parser, &fault);
1060   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061                  "Bad encoding not faulted in external entity handler");
1062 }
1063 END_TEST
1064 
1065 /* Test that no error is reported for unknown entities if we don't
1066    read an external subset.  This was fixed in Expat 1.95.5.
1067 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1068 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070                      "<doc>&entity;</doc>";
1071 
1072   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073       == XML_STATUS_ERROR)
1074     xml_failure(g_parser);
1075 }
1076 END_TEST
1077 
1078 /* Test that an error is reported for unknown entities if we don't
1079    have an external subset.
1080 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1081 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083                  "Parser did not report undefined entity w/out a DTD.");
1084 }
1085 END_TEST
1086 
1087 /* Test that an error is reported for unknown entities if we don't
1088    read an external subset, but have been declared standalone.
1089 */
START_TEST(test_wfc_undeclared_entity_standalone)1090 START_TEST(test_wfc_undeclared_entity_standalone) {
1091   const char *text
1092       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094         "<doc>&entity;</doc>";
1095 
1096   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097                  "Parser did not report undefined entity (standalone).");
1098 }
1099 END_TEST
1100 
1101 /* Test that an error is reported for unknown entities if we have read
1102    an external subset, and standalone is true.
1103 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1104 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105   const char *text
1106       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108         "<doc>&entity;</doc>";
1109   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110 
1111   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112   XML_SetUserData(g_parser, &test_data);
1113   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115                  "Parser did not report undefined entity (external DTD).");
1116 }
1117 END_TEST
1118 
1119 /* Test that external entity handling is not done if the parsing flag
1120  * is set to UNLESS_STANDALONE
1121  */
START_TEST(test_entity_with_external_subset_unless_standalone)1122 START_TEST(test_entity_with_external_subset_unless_standalone) {
1123   const char *text
1124       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126         "<doc>&entity;</doc>";
1127   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128 
1129   XML_SetParamEntityParsing(g_parser,
1130                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131   XML_SetUserData(g_parser, &test_data);
1132   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134                  "Parser did not report undefined entity");
1135 }
1136 END_TEST
1137 
1138 /* Test that no error is reported for unknown entities if we have read
1139    an external subset, and standalone is false.
1140 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1141 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144                      "<doc>&entity;</doc>";
1145   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146 
1147   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149   run_ext_character_check(text, &test_data, XCS(""));
1150 }
1151 END_TEST
1152 
1153 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1154 START_TEST(test_not_standalone_handler_reject) {
1155   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157                      "<doc>&entity;</doc>";
1158   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159 
1160   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161   XML_SetUserData(g_parser, &test_data);
1162   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165                  "NotStandalone handler failed to reject");
1166 
1167   /* Try again but without external entity handling */
1168   XML_ParserReset(g_parser, NULL);
1169   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171                  "NotStandalone handler failed to reject");
1172 }
1173 END_TEST
1174 
1175 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1176 START_TEST(test_not_standalone_handler_accept) {
1177   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179                      "<doc>&entity;</doc>";
1180   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181 
1182   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185   run_ext_character_check(text, &test_data, XCS(""));
1186 
1187   /* Repeat without the external entity handler */
1188   XML_ParserReset(g_parser, NULL);
1189   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190   run_character_check(text, XCS(""));
1191 }
1192 END_TEST
1193 
START_TEST(test_wfc_no_recursive_entity_refs)1194 START_TEST(test_wfc_no_recursive_entity_refs) {
1195   const char *text = "<!DOCTYPE doc [\n"
1196                      "  <!ENTITY entity '&#38;entity;'>\n"
1197                      "]>\n"
1198                      "<doc>&entity;</doc>";
1199 
1200   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201                  "Parser did not report recursive entity reference.");
1202 }
1203 END_TEST
1204 
START_TEST(test_recursive_external_parameter_entity_2)1205 START_TEST(test_recursive_external_parameter_entity_2) {
1206   struct TestCase {
1207     const char *doc;
1208     enum XML_Status expectedStatus;
1209   };
1210 
1211   struct TestCase cases[] = {
1212       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213       {"<!ENTITY % p1 '%p1;'>"
1214        "<!ENTITY % p1 'first declaration wins'>",
1215        XML_STATUS_ERROR},
1216       {"<!ENTITY % p1 'first declaration wins'>"
1217        "<!ENTITY % p1 '%p1;'>",
1218        XML_STATUS_OK},
1219       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1220   };
1221 
1222   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223     const char *const doc = cases[i].doc;
1224     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225     set_subtest("%s", doc);
1226 
1227     XML_Parser parser = XML_ParserCreate(NULL);
1228     assert_true(parser != NULL);
1229 
1230     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231     assert_true(ext_parser != NULL);
1232 
1233     const enum XML_Status actualStatus
1234         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235 
1236     assert_true(actualStatus == expectedStatus);
1237     if (actualStatus != XML_STATUS_OK) {
1238       assert_true(XML_GetErrorCode(ext_parser)
1239                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1240     }
1241 
1242     XML_ParserFree(ext_parser);
1243     XML_ParserFree(parser);
1244   }
1245 }
1246 END_TEST
1247 
1248 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1249 START_TEST(test_ext_entity_invalid_parse) {
1250   const char *text = "<!DOCTYPE doc [\n"
1251                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252                      "]>\n"
1253                      "<doc>&en;</doc>";
1254   const ExtFaults faults[]
1255       = {{"<", "Incomplete element declaration not faulted", NULL,
1256           XML_ERROR_UNCLOSED_TOKEN},
1257          {"<\xe2\x82", /* First two bytes of a three-byte char */
1258           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1260           XML_ERROR_PARTIAL_CHAR},
1261          {NULL, NULL, NULL, XML_ERROR_NONE}};
1262   const ExtFaults *fault = faults;
1263 
1264   for (; fault->parse_text != NULL; fault++) {
1265     set_subtest("\"%s\"", fault->parse_text);
1266     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1267     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1268     XML_SetUserData(g_parser, (void *)fault);
1269     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1270                    "Parser did not report external entity error");
1271     XML_ParserReset(g_parser, NULL);
1272   }
1273 }
1274 END_TEST
1275 
1276 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1277 START_TEST(test_dtd_default_handling) {
1278   const char *text = "<!DOCTYPE doc [\n"
1279                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1280                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1281                      "<!ELEMENT doc EMPTY>\n"
1282                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1283                      "<?pi in dtd?>\n"
1284                      "<!--comment in dtd-->\n"
1285                      "]><doc/>";
1286 
1287   XML_SetDefaultHandler(g_parser, accumulate_characters);
1288   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1289   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1290   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1291   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1292   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1293   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1294   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1295   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1296   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1297   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1298   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1299 }
1300 END_TEST
1301 
1302 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1303 START_TEST(test_dtd_attr_handling) {
1304   const char *prolog = "<!DOCTYPE doc [\n"
1305                        "<!ELEMENT doc EMPTY>\n";
1306   AttTest attr_data[]
1307       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308           "]>"
1309           "<doc a='two'/>",
1310           XCS("doc"), XCS("a"),
1311           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312           NULL, XML_TRUE},
1313          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315           "]>"
1316           "<doc/>",
1317           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319           "]>"
1320           "<doc/>",
1321           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323           "]>"
1324           "<doc/>",
1325           XCS("doc"), XCS("a"), XCS("CDATA"),
1326 #ifdef XML_UNICODE
1327           XCS("\x06f2"),
1328 #else
1329           XCS("\xdb\xb2"),
1330 #endif
1331           XML_FALSE},
1332          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333   AttTest *test;
1334 
1335   for (test = attr_data; test->definition != NULL; test++) {
1336     set_subtest("%s", test->definition);
1337     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1338     XML_SetUserData(g_parser, test);
1339     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340                                 XML_FALSE)
1341         == XML_STATUS_ERROR)
1342       xml_failure(g_parser);
1343     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344                                 (int)strlen(test->definition), XML_TRUE)
1345         == XML_STATUS_ERROR)
1346       xml_failure(g_parser);
1347     XML_ParserReset(g_parser, NULL);
1348   }
1349 }
1350 END_TEST
1351 
1352 /* See related SF bug #673791.
1353    When namespace processing is enabled, setting the namespace URI for
1354    a prefix is not allowed; this test ensures that it *is* allowed
1355    when namespace processing is not enabled.
1356    (See Namespaces in XML, section 2.)
1357 */
START_TEST(test_empty_ns_without_namespaces)1358 START_TEST(test_empty_ns_without_namespaces) {
1359   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360                      "  <e xmlns:prefix=''/>\n"
1361                      "</doc>";
1362 
1363   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1364       == XML_STATUS_ERROR)
1365     xml_failure(g_parser);
1366 }
1367 END_TEST
1368 
1369 /* Regression test for SF bug #824420.
1370    Checks that an xmlns:prefix attribute set in an attribute's default
1371    value isn't misinterpreted.
1372 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1373 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1374   const char *text = "<!DOCTYPE e:element [\n"
1375                      "  <!ATTLIST e:element\n"
1376                      "    xmlns:e CDATA 'http://example.org/'>\n"
1377                      "      ]>\n"
1378                      "<e:element/>";
1379 
1380   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1381       == XML_STATUS_ERROR)
1382     xml_failure(g_parser);
1383 }
1384 END_TEST
1385 
1386 /* Regression test for SF bug #1515266: missing check of stopped
1387    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1388 START_TEST(test_stop_parser_between_char_data_calls) {
1389   /* The sample data must be big enough that there are two calls to
1390      the character data handler from within the inner "for" loop of
1391      the XML_TOK_DATA_CHARS case in doContent(), and the character
1392      handler must stop the parser and clear the character data
1393      handler.
1394   */
1395   const char *text = long_character_data_text;
1396 
1397   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1398   g_resumable = XML_FALSE;
1399   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1400       != XML_STATUS_ERROR)
1401     xml_failure(g_parser);
1402   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1403     xml_failure(g_parser);
1404 }
1405 END_TEST
1406 
1407 /* Regression test for SF bug #1515266: missing check of stopped
1408    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1409 START_TEST(test_suspend_parser_between_char_data_calls) {
1410   /* The sample data must be big enough that there are two calls to
1411      the character data handler from within the inner "for" loop of
1412      the XML_TOK_DATA_CHARS case in doContent(), and the character
1413      handler must stop the parser and clear the character data
1414      handler.
1415   */
1416   const char *text = long_character_data_text;
1417 
1418   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1419   g_resumable = XML_TRUE;
1420   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1421       != XML_STATUS_SUSPENDED)
1422     xml_failure(g_parser);
1423   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1424     xml_failure(g_parser);
1425   /* Try parsing directly */
1426   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1427       != XML_STATUS_ERROR)
1428     fail("Attempt to continue parse while suspended not faulted");
1429   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1430     fail("Suspended parse not faulted with correct error");
1431 }
1432 END_TEST
1433 
1434 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1435 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1436   const char *text = long_character_data_text;
1437 
1438   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1439   g_resumable = XML_FALSE;
1440   g_abortable = XML_FALSE;
1441   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1442       != XML_STATUS_ERROR)
1443     fail("Failed to double-stop parser");
1444 
1445   XML_ParserReset(g_parser, NULL);
1446   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1447   g_resumable = XML_TRUE;
1448   g_abortable = XML_FALSE;
1449   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1450       != XML_STATUS_SUSPENDED)
1451     fail("Failed to double-suspend parser");
1452 
1453   XML_ParserReset(g_parser, NULL);
1454   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1455   g_resumable = XML_TRUE;
1456   g_abortable = XML_TRUE;
1457   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1458       != XML_STATUS_ERROR)
1459     fail("Failed to suspend-abort parser");
1460 }
1461 END_TEST
1462 
START_TEST(test_good_cdata_ascii)1463 START_TEST(test_good_cdata_ascii) {
1464   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466 
1467   CharData storage;
1468   CharData_Init(&storage);
1469   XML_SetUserData(g_parser, &storage);
1470   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1471   /* Add start and end handlers for coverage */
1472   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1473   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1474 
1475   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1476       == XML_STATUS_ERROR)
1477     xml_failure(g_parser);
1478   CharData_CheckXMLChars(&storage, expected);
1479 
1480   /* Try again, this time with a default handler */
1481   XML_ParserReset(g_parser, NULL);
1482   CharData_Init(&storage);
1483   XML_SetUserData(g_parser, &storage);
1484   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1485   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1486 
1487   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1488       == XML_STATUS_ERROR)
1489     xml_failure(g_parser);
1490   CharData_CheckXMLChars(&storage, expected);
1491 }
1492 END_TEST
1493 
START_TEST(test_good_cdata_utf16)1494 START_TEST(test_good_cdata_utf16) {
1495   /* Test data is:
1496    *   <?xml version='1.0' encoding='utf-16'?>
1497    *   <a><![CDATA[hello]]></a>
1498    */
1499   const char text[]
1500       = "\0<\0?\0x\0m\0l\0"
1501         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503         "1\0"
1504         "6\0'"
1505         "\0?\0>\0\n"
1506         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507   const XML_Char *expected = XCS("hello");
1508 
1509   CharData storage;
1510   CharData_Init(&storage);
1511   XML_SetUserData(g_parser, &storage);
1512   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1513 
1514   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1515       == XML_STATUS_ERROR)
1516     xml_failure(g_parser);
1517   CharData_CheckXMLChars(&storage, expected);
1518 }
1519 END_TEST
1520 
START_TEST(test_good_cdata_utf16_le)1521 START_TEST(test_good_cdata_utf16_le) {
1522   /* Test data is:
1523    *   <?xml version='1.0' encoding='utf-16'?>
1524    *   <a><![CDATA[hello]]></a>
1525    */
1526   const char text[]
1527       = "<\0?\0x\0m\0l\0"
1528         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530         "1\0"
1531         "6\0'"
1532         "\0?\0>\0\n"
1533         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534   const XML_Char *expected = XCS("hello");
1535 
1536   CharData storage;
1537   CharData_Init(&storage);
1538   XML_SetUserData(g_parser, &storage);
1539   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1540 
1541   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1542       == XML_STATUS_ERROR)
1543     xml_failure(g_parser);
1544   CharData_CheckXMLChars(&storage, expected);
1545 }
1546 END_TEST
1547 
1548 /* Test UTF16 conversion of a long cdata string */
1549 
1550 /* 16 characters: handy macro to reduce visual clutter */
1551 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552 
START_TEST(test_long_cdata_utf16)1553 START_TEST(test_long_cdata_utf16) {
1554   /* Test data is:
1555    * <?xlm version='1.0' encoding='utf-16'?>
1556    * <a><![CDATA[
1557    * ABCDEFGHIJKLMNOP
1558    * ]]></a>
1559    */
1560   const char text[]
1561       = "\0<\0?\0x\0m\0l\0 "
1562         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565       /* 64 characters per line */
1566       /* clang-format off */
1567         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1568         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1569         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1570         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1571         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1572         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1573         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1574         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1575         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1576         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1577         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1578         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1579         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1580         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1581         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1582         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1583         A_TO_P_IN_UTF16
1584         /* clang-format on */
1585         "\0]\0]\0>\0<\0/\0a\0>";
1586   const XML_Char *expected =
1587       /* clang-format off */
1588         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604         XCS("ABCDEFGHIJKLMNOP");
1605   /* clang-format on */
1606   CharData storage;
1607   void *buffer;
1608 
1609   CharData_Init(&storage);
1610   XML_SetUserData(g_parser, &storage);
1611   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613   if (buffer == NULL)
1614     fail("Could not allocate parse buffer");
1615   assert(buffer != NULL);
1616   memcpy(buffer, text, sizeof(text) - 1);
1617   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1618     xml_failure(g_parser);
1619   CharData_CheckXMLChars(&storage, expected);
1620 }
1621 END_TEST
1622 
1623 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1624 START_TEST(test_multichar_cdata_utf16) {
1625   /* Test data is:
1626    *   <?xml version='1.0' encoding='utf-16'?>
1627    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628    *
1629    * where {MINIM} is U+1d15e (a minim or half-note)
1630    *   UTF-16: 0xd834 0xdd5e
1631    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1632    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633    *   UTF-16: 0xd834 0xdd5f
1634    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1635    */
1636   const char text[] = "\0<\0?\0x\0m\0l\0"
1637                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639                       "1\0"
1640                       "6\0'"
1641                       "\0?\0>\0\n"
1642                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644                       "\0]\0]\0>\0<\0/\0a\0>";
1645 #ifdef XML_UNICODE
1646   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647 #else
1648   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649 #endif
1650   CharData storage;
1651 
1652   CharData_Init(&storage);
1653   XML_SetUserData(g_parser, &storage);
1654   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1655 
1656   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1657       == XML_STATUS_ERROR)
1658     xml_failure(g_parser);
1659   CharData_CheckXMLChars(&storage, expected);
1660 }
1661 END_TEST
1662 
1663 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1664 START_TEST(test_utf16_bad_surrogate_pair) {
1665   /* Test data is:
1666    *   <?xml version='1.0' encoding='utf-16'?>
1667    *   <a><![CDATA[{BADLINB}]]></a>
1668    *
1669    * where {BADLINB} is U+10000 (the first Linear B character)
1670    * with the UTF-16 surrogate pair in the wrong order, i.e.
1671    *   0xdc00 0xd800
1672    */
1673   const char text[] = "\0<\0?\0x\0m\0l\0"
1674                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676                       "1\0"
1677                       "6\0'"
1678                       "\0?\0>\0\n"
1679                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680                       "\xdc\x00\xd8\x00"
1681                       "\0]\0]\0>\0<\0/\0a\0>";
1682 
1683   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1684       != XML_STATUS_ERROR)
1685     fail("Reversed UTF-16 surrogate pair not faulted");
1686   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1687     xml_failure(g_parser);
1688 }
1689 END_TEST
1690 
START_TEST(test_bad_cdata)1691 START_TEST(test_bad_cdata) {
1692   struct CaseData {
1693     const char *text;
1694     enum XML_Error expectedError;
1695   };
1696 
1697   struct CaseData cases[]
1698       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706 
1707          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710 
1711          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1713          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718 
1719          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722 
1723   size_t i = 0;
1724   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725     set_subtest("%s", cases[i].text);
1726     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1727         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1728     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1729 
1730     assert(actualStatus == XML_STATUS_ERROR);
1731 
1732     if (actualError != cases[i].expectedError) {
1733       char message[100];
1734       snprintf(message, sizeof(message),
1735                "Expected error %d but got error %d for case %u: \"%s\"\n",
1736                cases[i].expectedError, actualError, (unsigned int)i + 1,
1737                cases[i].text);
1738       fail(message);
1739     }
1740 
1741     XML_ParserReset(g_parser, NULL);
1742   }
1743 }
1744 END_TEST
1745 
1746 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1747 START_TEST(test_bad_cdata_utf16) {
1748   struct CaseData {
1749     size_t text_bytes;
1750     const char *text;
1751     enum XML_Error expected_error;
1752   };
1753 
1754   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757                         "1\0"
1758                         "6\0'"
1759                         "\0?\0>\0\n"
1760                         "\0<\0a\0>";
1761   struct CaseData cases[] = {
1762       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782       /* Now add a four-byte UTF-16 character */
1783       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1784        XML_ERROR_UNCLOSED_CDATA_SECTION},
1785       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1787        XML_ERROR_PARTIAL_CHAR},
1788       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1789        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1790   size_t i;
1791 
1792   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793     set_subtest("case %lu", (long unsigned)(i + 1));
1794     enum XML_Status actual_status;
1795     enum XML_Error actual_error;
1796 
1797     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798                                 XML_FALSE)
1799         == XML_STATUS_ERROR)
1800       xml_failure(g_parser);
1801     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1802                                             (int)cases[i].text_bytes, XML_TRUE);
1803     assert(actual_status == XML_STATUS_ERROR);
1804     actual_error = XML_GetErrorCode(g_parser);
1805     if (actual_error != cases[i].expected_error) {
1806       char message[1024];
1807 
1808       snprintf(message, sizeof(message),
1809                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810                ") for case %lu\n",
1811                cases[i].expected_error,
1812                XML_ErrorString(cases[i].expected_error), actual_error,
1813                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814       fail(message);
1815     }
1816     XML_ParserReset(g_parser, NULL);
1817   }
1818 }
1819 END_TEST
1820 
1821 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1822 START_TEST(test_stop_parser_between_cdata_calls) {
1823   const char *text = long_cdata_text;
1824 
1825   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1826   g_resumable = XML_FALSE;
1827   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1828 }
1829 END_TEST
1830 
1831 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1832 START_TEST(test_suspend_parser_between_cdata_calls) {
1833   const char *text = long_cdata_text;
1834   enum XML_Status result;
1835 
1836   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1837   g_resumable = XML_TRUE;
1838   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1839   if (result != XML_STATUS_SUSPENDED) {
1840     if (result == XML_STATUS_ERROR)
1841       xml_failure(g_parser);
1842     fail("Parse not suspended in CDATA handler");
1843   }
1844   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1845     xml_failure(g_parser);
1846 }
1847 END_TEST
1848 
1849 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1850 START_TEST(test_memory_allocation) {
1851   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852   char *p;
1853 
1854   if (buffer == NULL) {
1855     fail("Allocation failed");
1856   } else {
1857     /* Try writing to memory; some OSes try to cheat! */
1858     buffer[0] = 'T';
1859     buffer[1] = 'E';
1860     buffer[2] = 'S';
1861     buffer[3] = 'T';
1862     buffer[4] = '\0';
1863     if (strcmp(buffer, "TEST") != 0) {
1864       fail("Memory not writable");
1865     } else {
1866       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867       if (p == NULL) {
1868         fail("Reallocation failed");
1869       } else {
1870         /* Write again, just to be sure */
1871         buffer = p;
1872         buffer[0] = 'V';
1873         if (strcmp(buffer, "VEST") != 0) {
1874           fail("Reallocated memory not writable");
1875         }
1876       }
1877     }
1878     XML_MemFree(g_parser, buffer);
1879   }
1880 }
1881 END_TEST
1882 
1883 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)1884 START_TEST(test_default_current) {
1885   const char *text = "<doc>hell]</doc>";
1886   const char *entity_text = "<!DOCTYPE doc [\n"
1887                             "<!ENTITY entity '&#37;'>\n"
1888                             "]>\n"
1889                             "<doc>&entity;</doc>";
1890 
1891   set_subtest("with defaulting");
1892   {
1893     struct handler_record_list storage;
1894     storage.count = 0;
1895     XML_SetDefaultHandler(g_parser, record_default_handler);
1896     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1897     XML_SetUserData(g_parser, &storage);
1898     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1899         == XML_STATUS_ERROR)
1900       xml_failure(g_parser);
1901     int i = 0;
1902     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903     // we should have gotten one or more cdata callbacks, totaling 5 chars
1904     int cdata_len_remaining = 5;
1905     while (cdata_len_remaining > 0) {
1906       const struct handler_record_entry *c_entry
1907           = handler_record_get(&storage, i++);
1908       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909       assert_true(c_entry->arg > 0);
1910       assert_true(c_entry->arg <= cdata_len_remaining);
1911       cdata_len_remaining -= c_entry->arg;
1912       // default handler must follow, with the exact same len argument.
1913       assert_record_handler_called(&storage, i++, "record_default_handler",
1914                                    c_entry->arg);
1915     }
1916     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917     assert_true(storage.count == i);
1918   }
1919 
1920   /* Again, without the defaulting */
1921   set_subtest("no defaulting");
1922   {
1923     struct handler_record_list storage;
1924     storage.count = 0;
1925     XML_ParserReset(g_parser, NULL);
1926     XML_SetDefaultHandler(g_parser, record_default_handler);
1927     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1928     XML_SetUserData(g_parser, &storage);
1929     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1930         == XML_STATUS_ERROR)
1931       xml_failure(g_parser);
1932     int i = 0;
1933     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934     // we should have gotten one or more cdata callbacks, totaling 5 chars
1935     int cdata_len_remaining = 5;
1936     while (cdata_len_remaining > 0) {
1937       const struct handler_record_entry *c_entry
1938           = handler_record_get(&storage, i++);
1939       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940       assert_true(c_entry->arg > 0);
1941       assert_true(c_entry->arg <= cdata_len_remaining);
1942       cdata_len_remaining -= c_entry->arg;
1943     }
1944     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945     assert_true(storage.count == i);
1946   }
1947 
1948   /* Now with an internal entity to complicate matters */
1949   set_subtest("with internal entity");
1950   {
1951     struct handler_record_list storage;
1952     storage.count = 0;
1953     XML_ParserReset(g_parser, NULL);
1954     XML_SetDefaultHandler(g_parser, record_default_handler);
1955     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1956     XML_SetUserData(g_parser, &storage);
1957     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1958                                 XML_TRUE)
1959         == XML_STATUS_ERROR)
1960       xml_failure(g_parser);
1961     /* The default handler suppresses the entity */
1962     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981     assert_true(storage.count == 19);
1982   }
1983 
1984   /* Again, with a skip handler */
1985   set_subtest("with skip handler");
1986   {
1987     struct handler_record_list storage;
1988     storage.count = 0;
1989     XML_ParserReset(g_parser, NULL);
1990     XML_SetDefaultHandler(g_parser, record_default_handler);
1991     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1992     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1993     XML_SetUserData(g_parser, &storage);
1994     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1995                                 XML_TRUE)
1996         == XML_STATUS_ERROR)
1997       xml_failure(g_parser);
1998     /* The default handler suppresses the entity */
1999     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018     assert_true(storage.count == 19);
2019   }
2020 
2021   /* This time, allow the entity through */
2022   set_subtest("allow entity");
2023   {
2024     struct handler_record_list storage;
2025     storage.count = 0;
2026     XML_ParserReset(g_parser, NULL);
2027     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2028     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2029     XML_SetUserData(g_parser, &storage);
2030     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2031                                 XML_TRUE)
2032         == XML_STATUS_ERROR)
2033       xml_failure(g_parser);
2034     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054     assert_true(storage.count == 20);
2055   }
2056 
2057   /* Finally, without passing the cdata to the default handler */
2058   set_subtest("not passing cdata");
2059   {
2060     struct handler_record_list storage;
2061     storage.count = 0;
2062     XML_ParserReset(g_parser, NULL);
2063     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2064     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2065     XML_SetUserData(g_parser, &storage);
2066     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2067                                 XML_TRUE)
2068         == XML_STATUS_ERROR)
2069       xml_failure(g_parser);
2070     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088                                  1);
2089     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090     assert_true(storage.count == 19);
2091   }
2092 }
2093 END_TEST
2094 
2095 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2096 START_TEST(test_dtd_elements) {
2097   const char *text = "<!DOCTYPE doc [\n"
2098                      "<!ELEMENT doc (chapter)>\n"
2099                      "<!ELEMENT chapter (#PCDATA)>\n"
2100                      "]>\n"
2101                      "<doc><chapter>Wombats are go</chapter></doc>";
2102 
2103   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2104   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2105       == XML_STATUS_ERROR)
2106     xml_failure(g_parser);
2107 }
2108 END_TEST
2109 
2110 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2111 element_decl_check_model(void *userData, const XML_Char *name,
2112                          XML_Content *model) {
2113   UNUSED_P(userData);
2114   uint32_t errorFlags = 0;
2115 
2116   /* Expected model array structure is this:
2117    * [0] (type 6, quant 0)
2118    *   [1] (type 5, quant 0)
2119    *     [3] (type 4, quant 0, name "bar")
2120    *     [4] (type 4, quant 0, name "foo")
2121    *     [5] (type 4, quant 3, name "xyz")
2122    *   [2] (type 4, quant 2, name "zebra")
2123    */
2124   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126 
2127   if (model != NULL) {
2128     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133 
2134     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139 
2140     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2144     errorFlags
2145         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146 
2147     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152 
2153     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158 
2159     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164   }
2165 
2166   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2167   XML_FreeContentModel(g_parser, model);
2168 }
2169 
START_TEST(test_dtd_elements_nesting)2170 START_TEST(test_dtd_elements_nesting) {
2171   // Payload inspired by a test in Perl's XML::Parser
2172   const char *text = "<!DOCTYPE foo [\n"
2173                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174                      "]>\n"
2175                      "<foo/>";
2176 
2177   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178 
2179   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2180   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2181       == XML_STATUS_ERROR)
2182     xml_failure(g_parser);
2183 
2184   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2185     fail("Element declaration model regression detected");
2186 }
2187 END_TEST
2188 
2189 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2190 START_TEST(test_set_foreign_dtd) {
2191   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192   const char *text2 = "<doc>&entity;</doc>";
2193   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194 
2195   /* Check hash salt is passed through too */
2196   XML_SetHashSalt(g_parser, 0x12345678);
2197   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2198   XML_SetUserData(g_parser, &test_data);
2199   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2200   /* Add a default handler to exercise more code paths */
2201   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2202   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2203     fail("Could not set foreign DTD");
2204   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2205       == XML_STATUS_ERROR)
2206     xml_failure(g_parser);
2207 
2208   /* Ensure that trying to set the DTD after parsing has started
2209    * is faulted, even if it's the same setting.
2210    */
2211   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2212       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2213     fail("Failed to reject late foreign DTD setting");
2214   /* Ditto for the hash salt */
2215   if (XML_SetHashSalt(g_parser, 0x23456789))
2216     fail("Failed to reject late hash salt change");
2217 
2218   /* Now finish the parse */
2219   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2220       == XML_STATUS_ERROR)
2221     xml_failure(g_parser);
2222 }
2223 END_TEST
2224 
2225 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2226 START_TEST(test_foreign_dtd_not_standalone) {
2227   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228                      "<doc>&entity;</doc>";
2229   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230 
2231   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2232   XML_SetUserData(g_parser, &test_data);
2233   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2234   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2235   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2236     fail("Could not set foreign DTD");
2237   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2238                  "NotStandalonehandler failed to reject");
2239 }
2240 END_TEST
2241 
2242 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2243 START_TEST(test_invalid_foreign_dtd) {
2244   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245                      "<doc>&entity;</doc>";
2246   ExtFaults test_data
2247       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248 
2249   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2250   XML_SetUserData(g_parser, &test_data);
2251   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2252   XML_UseForeignDTD(g_parser, XML_TRUE);
2253   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2254                  "Bad DTD should not have been accepted");
2255 }
2256 END_TEST
2257 
2258 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2259 START_TEST(test_foreign_dtd_with_doctype) {
2260   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262   const char *text2 = "<doc>&entity;</doc>";
2263   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264 
2265   /* Check hash salt is passed through too */
2266   XML_SetHashSalt(g_parser, 0x12345678);
2267   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2268   XML_SetUserData(g_parser, &test_data);
2269   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2270   /* Add a default handler to exercise more code paths */
2271   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2272   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2273     fail("Could not set foreign DTD");
2274   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2275       == XML_STATUS_ERROR)
2276     xml_failure(g_parser);
2277 
2278   /* Ensure that trying to set the DTD after parsing has started
2279    * is faulted, even if it's the same setting.
2280    */
2281   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2282       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2283     fail("Failed to reject late foreign DTD setting");
2284   /* Ditto for the hash salt */
2285   if (XML_SetHashSalt(g_parser, 0x23456789))
2286     fail("Failed to reject late hash salt change");
2287 
2288   /* Now finish the parse */
2289   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2290       == XML_STATUS_ERROR)
2291     xml_failure(g_parser);
2292 }
2293 END_TEST
2294 
2295 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2296 START_TEST(test_foreign_dtd_without_external_subset) {
2297   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2298                      "<doc>&foo;</doc>";
2299 
2300   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2301   XML_SetUserData(g_parser, NULL);
2302   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2303   XML_UseForeignDTD(g_parser, XML_TRUE);
2304   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2305       == XML_STATUS_ERROR)
2306     xml_failure(g_parser);
2307 }
2308 END_TEST
2309 
START_TEST(test_empty_foreign_dtd)2310 START_TEST(test_empty_foreign_dtd) {
2311   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312                      "<doc>&entity;</doc>";
2313 
2314   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2315   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2316   XML_UseForeignDTD(g_parser, XML_TRUE);
2317   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2318                  "Undefined entity not faulted");
2319 }
2320 END_TEST
2321 
2322 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2323 START_TEST(test_set_base) {
2324   const XML_Char *old_base;
2325   const XML_Char *new_base = XCS("/local/file/name.xml");
2326 
2327   old_base = XML_GetBase(g_parser);
2328   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2329     fail("Unable to set base");
2330   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2331     fail("Base setting not correct");
2332   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2333     fail("Unable to NULL base");
2334   if (XML_GetBase(g_parser) != NULL)
2335     fail("Base setting not nulled");
2336   XML_SetBase(g_parser, old_base);
2337 }
2338 END_TEST
2339 
2340 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2341 START_TEST(test_attributes) {
2342   const char *text = "<!DOCTYPE doc [\n"
2343                      "<!ELEMENT doc (tag)>\n"
2344                      "<!ATTLIST doc id ID #REQUIRED>\n"
2345                      "]>"
2346                      "<doc a='1' id='one' b='2'>"
2347                      "<tag c='3'/>"
2348                      "</doc>";
2349   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350                          {XCS("b"), XCS("2")},
2351                          {XCS("id"), XCS("one")},
2352                          {NULL, NULL}};
2353   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355                         {XCS("tag"), 1, NULL, NULL},
2356                         {NULL, 0, NULL, NULL}};
2357   info[0].attributes = doc_info;
2358   info[1].attributes = tag_info;
2359 
2360   XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2361   XML_SetUserData(g_parser, info);
2362   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2363       == XML_STATUS_ERROR)
2364     xml_failure(g_parser);
2365 }
2366 END_TEST
2367 
2368 /* Test reset works correctly in the middle of processing an internal
2369  * entity.  Exercises some obscure code in XML_ParserReset().
2370  */
START_TEST(test_reset_in_entity)2371 START_TEST(test_reset_in_entity) {
2372   const char *text = "<!DOCTYPE doc [\n"
2373                      "<!ENTITY wombat 'wom'>\n"
2374                      "<!ENTITY entity 'hi &wom; there'>\n"
2375                      "]>\n"
2376                      "<doc>&entity;</doc>";
2377   XML_ParsingStatus status;
2378 
2379   g_resumable = XML_TRUE;
2380   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2381   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2382       == XML_STATUS_ERROR)
2383     xml_failure(g_parser);
2384   XML_GetParsingStatus(g_parser, &status);
2385   if (status.parsing != XML_SUSPENDED)
2386     fail("Parsing status not SUSPENDED");
2387   XML_ParserReset(g_parser, NULL);
2388   XML_GetParsingStatus(g_parser, &status);
2389   if (status.parsing != XML_INITIALIZED)
2390     fail("Parsing status doesn't reset to INITIALIZED");
2391 }
2392 END_TEST
2393 
2394 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2395 START_TEST(test_resume_invalid_parse) {
2396   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2397 
2398   g_resumable = XML_TRUE;
2399   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2400   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2401       == XML_STATUS_ERROR)
2402     xml_failure(g_parser);
2403   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2404     fail("Resumed invalid parse not faulted");
2405   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2406     fail("Invalid parse not correctly faulted");
2407 }
2408 END_TEST
2409 
2410 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2411 START_TEST(test_resume_resuspended) {
2412   const char *text = "<doc>Hello<meep/>world</doc>";
2413 
2414   g_resumable = XML_TRUE;
2415   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2416   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2417       == XML_STATUS_ERROR)
2418     xml_failure(g_parser);
2419   g_resumable = XML_TRUE;
2420   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2421   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2422     fail("Resumption not suspended");
2423   /* This one should succeed and finish up */
2424   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2425     xml_failure(g_parser);
2426 }
2427 END_TEST
2428 
2429 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2430 START_TEST(test_cdata_default) {
2431   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2432   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2433   CharData storage;
2434 
2435   CharData_Init(&storage);
2436   XML_SetUserData(g_parser, &storage);
2437   XML_SetDefaultHandler(g_parser, accumulate_characters);
2438 
2439   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2440       == XML_STATUS_ERROR)
2441     xml_failure(g_parser);
2442   CharData_CheckXMLChars(&storage, expected);
2443 }
2444 END_TEST
2445 
2446 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2447 START_TEST(test_subordinate_reset) {
2448   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2449                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2450                      "<doc>&entity;</doc>";
2451 
2452   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2453   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2454   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2455       == XML_STATUS_ERROR)
2456     xml_failure(g_parser);
2457 }
2458 END_TEST
2459 
2460 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2461 START_TEST(test_subordinate_suspend) {
2462   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2463                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2464                      "<doc>&entity;</doc>";
2465 
2466   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2467   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2468   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2469       == XML_STATUS_ERROR)
2470     xml_failure(g_parser);
2471 }
2472 END_TEST
2473 
2474 /* Test suspending a subordinate parser from an XML declaration */
2475 /* Increases code coverage of the tests */
2476 
START_TEST(test_subordinate_xdecl_suspend)2477 START_TEST(test_subordinate_xdecl_suspend) {
2478   const char *text
2479       = "<!DOCTYPE doc [\n"
2480         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2481         "]>\n"
2482         "<doc>&entity;</doc>";
2483 
2484   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2486   g_resumable = XML_TRUE;
2487   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2488       == XML_STATUS_ERROR)
2489     xml_failure(g_parser);
2490 }
2491 END_TEST
2492 
START_TEST(test_subordinate_xdecl_abort)2493 START_TEST(test_subordinate_xdecl_abort) {
2494   const char *text
2495       = "<!DOCTYPE doc [\n"
2496         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2497         "]>\n"
2498         "<doc>&entity;</doc>";
2499 
2500   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2501   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2502   g_resumable = XML_FALSE;
2503   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2504       == XML_STATUS_ERROR)
2505     xml_failure(g_parser);
2506 }
2507 END_TEST
2508 
2509 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2510 START_TEST(test_ext_entity_invalid_suspended_parse) {
2511   const char *text = "<!DOCTYPE doc [\n"
2512                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2513                      "]>\n"
2514                      "<doc>&en;</doc>";
2515   ExtFaults faults[]
2516       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2517           "Incomplete element declaration not faulted", NULL,
2518           XML_ERROR_UNCLOSED_TOKEN},
2519          {/* First two bytes of a three-byte char */
2520           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2521           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2522          {NULL, NULL, NULL, XML_ERROR_NONE}};
2523   ExtFaults *fault;
2524 
2525   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2526     set_subtest("%s", fault->parse_text);
2527     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2528     XML_SetExternalEntityRefHandler(g_parser,
2529                                     external_entity_suspending_faulter);
2530     XML_SetUserData(g_parser, fault);
2531     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2532                    "Parser did not report external entity error");
2533     XML_ParserReset(g_parser, NULL);
2534   }
2535 }
2536 END_TEST
2537 
2538 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2539 START_TEST(test_explicit_encoding) {
2540   const char *text1 = "<doc>Hello ";
2541   const char *text2 = " World</doc>";
2542 
2543   /* Just check that we can set the encoding to NULL before starting */
2544   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2545     fail("Failed to initialise encoding to NULL");
2546   /* Say we are UTF-8 */
2547   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2548     fail("Failed to set explicit encoding");
2549   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2550       == XML_STATUS_ERROR)
2551     xml_failure(g_parser);
2552   /* Try to switch encodings mid-parse */
2553   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2554     fail("Allowed encoding change");
2555   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2556       == XML_STATUS_ERROR)
2557     xml_failure(g_parser);
2558   /* Try now the parse is over */
2559   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2560     fail("Failed to unset encoding");
2561 }
2562 END_TEST
2563 
2564 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2565 START_TEST(test_trailing_cr) {
2566   const char *text = "<doc>\r";
2567   int found_cr;
2568 
2569   /* Try with a character handler, for code coverage */
2570   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2571   XML_SetUserData(g_parser, &found_cr);
2572   found_cr = 0;
2573   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574       == XML_STATUS_OK)
2575     fail("Failed to fault unclosed doc");
2576   if (found_cr == 0)
2577     fail("Did not catch the carriage return");
2578   XML_ParserReset(g_parser, NULL);
2579 
2580   /* Now with a default handler instead */
2581   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2582   XML_SetUserData(g_parser, &found_cr);
2583   found_cr = 0;
2584   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2585       == XML_STATUS_OK)
2586     fail("Failed to fault unclosed doc");
2587   if (found_cr == 0)
2588     fail("Did not catch default carriage return");
2589 }
2590 END_TEST
2591 
2592 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2593 START_TEST(test_ext_entity_trailing_cr) {
2594   const char *text = "<!DOCTYPE doc [\n"
2595                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2596                      "]>\n"
2597                      "<doc>&en;</doc>";
2598   int found_cr;
2599 
2600   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2601   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2602   XML_SetUserData(g_parser, &found_cr);
2603   found_cr = 0;
2604   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2605       != XML_STATUS_OK)
2606     xml_failure(g_parser);
2607   if (found_cr == 0)
2608     fail("No carriage return found");
2609   XML_ParserReset(g_parser, NULL);
2610 
2611   /* Try again with a different trailing CR */
2612   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2613   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2614   XML_SetUserData(g_parser, &found_cr);
2615   found_cr = 0;
2616   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2617       != XML_STATUS_OK)
2618     xml_failure(g_parser);
2619   if (found_cr == 0)
2620     fail("No carriage return found");
2621 }
2622 END_TEST
2623 
2624 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2625 START_TEST(test_trailing_rsqb) {
2626   const char *text8 = "<doc>]";
2627   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2628   int found_rsqb;
2629   int text8_len = (int)strlen(text8);
2630 
2631   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2632   XML_SetUserData(g_parser, &found_rsqb);
2633   found_rsqb = 0;
2634   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2635       == XML_STATUS_OK)
2636     fail("Failed to fault unclosed doc");
2637   if (found_rsqb == 0)
2638     fail("Did not catch the right square bracket");
2639 
2640   /* Try again with a different encoding */
2641   XML_ParserReset(g_parser, NULL);
2642   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2643   XML_SetUserData(g_parser, &found_rsqb);
2644   found_rsqb = 0;
2645   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2646                               XML_TRUE)
2647       == XML_STATUS_OK)
2648     fail("Failed to fault unclosed doc");
2649   if (found_rsqb == 0)
2650     fail("Did not catch the right square bracket");
2651 
2652   /* And finally with a default handler */
2653   XML_ParserReset(g_parser, NULL);
2654   XML_SetDefaultHandler(g_parser, rsqb_handler);
2655   XML_SetUserData(g_parser, &found_rsqb);
2656   found_rsqb = 0;
2657   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2658                               XML_TRUE)
2659       == XML_STATUS_OK)
2660     fail("Failed to fault unclosed doc");
2661   if (found_rsqb == 0)
2662     fail("Did not catch the right square bracket");
2663 }
2664 END_TEST
2665 
2666 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2667 START_TEST(test_ext_entity_trailing_rsqb) {
2668   const char *text = "<!DOCTYPE doc [\n"
2669                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2670                      "]>\n"
2671                      "<doc>&en;</doc>";
2672   int found_rsqb;
2673 
2674   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2675   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2676   XML_SetUserData(g_parser, &found_rsqb);
2677   found_rsqb = 0;
2678   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2679       != XML_STATUS_OK)
2680     xml_failure(g_parser);
2681   if (found_rsqb == 0)
2682     fail("No right square bracket found");
2683 }
2684 END_TEST
2685 
2686 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2687 START_TEST(test_ext_entity_good_cdata) {
2688   const char *text = "<!DOCTYPE doc [\n"
2689                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2690                      "]>\n"
2691                      "<doc>&en;</doc>";
2692 
2693   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2694   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2695   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2696       != XML_STATUS_OK)
2697     xml_failure(g_parser);
2698 }
2699 END_TEST
2700 
2701 /* Test user parameter settings */
START_TEST(test_user_parameters)2702 START_TEST(test_user_parameters) {
2703   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2704                      "<!-- Primary parse -->\n"
2705                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2706                      "<doc>&entity;";
2707   const char *epilog = "<!-- Back to primary parser -->\n"
2708                        "</doc>";
2709 
2710   g_comment_count = 0;
2711   g_skip_count = 0;
2712   g_xdecl_count = 0;
2713   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2714   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2715   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2716   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2717   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2718   XML_UseParserAsHandlerArg(g_parser);
2719   XML_SetUserData(g_parser, (void *)1);
2720   g_handler_data = g_parser;
2721   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2722       == XML_STATUS_ERROR)
2723     xml_failure(g_parser);
2724   /* Ensure we can't change policy mid-parse */
2725   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2726     fail("Changed param entity parsing policy while parsing");
2727   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2728       == XML_STATUS_ERROR)
2729     xml_failure(g_parser);
2730   if (g_comment_count != 3)
2731     fail("Comment handler not invoked enough times");
2732   if (g_skip_count != 1)
2733     fail("Skip handler not invoked enough times");
2734   if (g_xdecl_count != 1)
2735     fail("XML declaration handler not invoked");
2736 }
2737 END_TEST
2738 
2739 /* Test that an explicit external entity handler argument replaces
2740  * the parser as the first argument.
2741  *
2742  * We do not call the first parameter to the external entity handler
2743  * 'parser' for once, since the first time the handler is called it
2744  * will actually be a text string.  We need to be able to access the
2745  * global 'parser' variable to create our external entity parser from,
2746  * since there are code paths we need to ensure get executed.
2747  */
START_TEST(test_ext_entity_ref_parameter)2748 START_TEST(test_ext_entity_ref_parameter) {
2749   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2750                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2751                      "<doc>&entity;</doc>";
2752 
2753   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2754   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2755   /* Set a handler arg that is not NULL and not parser (which is
2756    * what NULL would cause to be passed.
2757    */
2758   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2759   g_handler_data = text;
2760   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2761       == XML_STATUS_ERROR)
2762     xml_failure(g_parser);
2763 
2764   /* Now try again with unset args */
2765   XML_ParserReset(g_parser, NULL);
2766   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2767   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2768   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2769   g_handler_data = g_parser;
2770   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2771       == XML_STATUS_ERROR)
2772     xml_failure(g_parser);
2773 }
2774 END_TEST
2775 
2776 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2777 START_TEST(test_empty_parse) {
2778   const char *text = "<doc></doc>";
2779   const char *partial = "<doc>";
2780 
2781   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2782     fail("Parsing empty string faulted");
2783   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2784     fail("Parsing final empty string not faulted");
2785   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2786     fail("Parsing final empty string faulted for wrong reason");
2787 
2788   /* Now try with valid text before the empty end */
2789   XML_ParserReset(g_parser, NULL);
2790   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2791       == XML_STATUS_ERROR)
2792     xml_failure(g_parser);
2793   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2794     fail("Parsing final empty string faulted");
2795 
2796   /* Now try with invalid text before the empty end */
2797   XML_ParserReset(g_parser, NULL);
2798   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2799                               XML_FALSE)
2800       == XML_STATUS_ERROR)
2801     xml_failure(g_parser);
2802   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2803     fail("Parsing final incomplete empty string not faulted");
2804 }
2805 END_TEST
2806 
2807 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2808 START_TEST(test_negative_len_parse) {
2809   const char *const doc = "<root/>";
2810   for (int isFinal = 0; isFinal < 2; isFinal++) {
2811     set_subtest("isFinal=%d", isFinal);
2812 
2813     XML_Parser parser = XML_ParserCreate(NULL);
2814 
2815     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2816       fail("There was not supposed to be any initial parse error.");
2817 
2818     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2819 
2820     if (status != XML_STATUS_ERROR)
2821       fail("Negative len was expected to fail the parse but did not.");
2822 
2823     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2824       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2825 
2826     XML_ParserFree(parser);
2827   }
2828 }
2829 END_TEST
2830 
2831 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2832 START_TEST(test_negative_len_parse_buffer) {
2833   const char *const doc = "<root/>";
2834   for (int isFinal = 0; isFinal < 2; isFinal++) {
2835     set_subtest("isFinal=%d", isFinal);
2836 
2837     XML_Parser parser = XML_ParserCreate(NULL);
2838 
2839     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2840       fail("There was not supposed to be any initial parse error.");
2841 
2842     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2843 
2844     if (buffer == NULL)
2845       fail("XML_GetBuffer failed.");
2846 
2847     memcpy(buffer, doc, strlen(doc));
2848 
2849     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2850 
2851     if (status != XML_STATUS_ERROR)
2852       fail("Negative len was expected to fail the parse but did not.");
2853 
2854     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2855       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2856 
2857     XML_ParserFree(parser);
2858   }
2859 }
2860 END_TEST
2861 
2862 /* Test odd corners of the XML_GetBuffer interface */
2863 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)2864 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2865   const XML_Feature *feature = XML_GetFeatureList();
2866 
2867   if (feature == NULL)
2868     return XML_STATUS_ERROR;
2869   for (; feature->feature != XML_FEATURE_END; feature++) {
2870     if (feature->feature == feature_id) {
2871       *presult = feature->value;
2872       return XML_STATUS_OK;
2873     }
2874   }
2875   return XML_STATUS_ERROR;
2876 }
2877 
2878 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)2879 START_TEST(test_get_buffer_1) {
2880   const char *text = get_buffer_test_text;
2881   void *buffer;
2882   long context_bytes;
2883 
2884   /* Attempt to allocate a negative length buffer */
2885   if (XML_GetBuffer(g_parser, -12) != NULL)
2886     fail("Negative length buffer not failed");
2887 
2888   /* Now get a small buffer and extend it past valid length */
2889   buffer = XML_GetBuffer(g_parser, 1536);
2890   if (buffer == NULL)
2891     fail("1.5K buffer failed");
2892   assert(buffer != NULL);
2893   memcpy(buffer, text, strlen(text));
2894   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2895       == XML_STATUS_ERROR)
2896     xml_failure(g_parser);
2897   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2898     fail("INT_MAX buffer not failed");
2899 
2900   /* Now try extending it a more reasonable but still too large
2901    * amount.  The allocator in XML_GetBuffer() doubles the buffer
2902    * size until it exceeds the requested amount or INT_MAX.  If it
2903    * exceeds INT_MAX, it rejects the request, so we want a request
2904    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2905    * with an extra byte just to ensure that the request is off any
2906    * boundary.  The request will be inflated internally by
2907    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2908    * request.
2909    */
2910   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2911     context_bytes = 0;
2912   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2913     fail("INT_MAX- buffer not failed");
2914 
2915   /* Now try extending it a carefully crafted amount */
2916   if (XML_GetBuffer(g_parser, 1000) == NULL)
2917     fail("1000 buffer failed");
2918 }
2919 END_TEST
2920 
2921 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)2922 START_TEST(test_get_buffer_2) {
2923   const char *text = get_buffer_test_text;
2924   void *buffer;
2925 
2926   /* Now get a decent buffer */
2927   buffer = XML_GetBuffer(g_parser, 1536);
2928   if (buffer == NULL)
2929     fail("1.5K buffer failed");
2930   assert(buffer != NULL);
2931   memcpy(buffer, text, strlen(text));
2932   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2933       == XML_STATUS_ERROR)
2934     xml_failure(g_parser);
2935 
2936   /* Extend it, to catch a different code path */
2937   if (XML_GetBuffer(g_parser, 1024) == NULL)
2938     fail("1024 buffer failed");
2939 }
2940 END_TEST
2941 
2942 /* Test for signed integer overflow CVE-2022-23852 */
2943 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)2944 START_TEST(test_get_buffer_3_overflow) {
2945   XML_Parser parser = XML_ParserCreate(NULL);
2946   assert(parser != NULL);
2947 
2948   const char *const text = "\n";
2949   const int expectedKeepValue = (int)strlen(text);
2950 
2951   // After this call, variable "keep" in XML_GetBuffer will
2952   // have value expectedKeepValue
2953   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2954                               XML_FALSE /* isFinal */)
2955       == XML_STATUS_ERROR)
2956     xml_failure(parser);
2957 
2958   assert(expectedKeepValue > 0);
2959   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2960     fail("enlarging buffer not failed");
2961 
2962   XML_ParserFree(parser);
2963 }
2964 END_TEST
2965 #endif // XML_CONTEXT_BYTES > 0
2966 
START_TEST(test_buffer_can_grow_to_max)2967 START_TEST(test_buffer_can_grow_to_max) {
2968   const char *const prefixes[] = {
2969       "",
2970       "<",
2971       "<x a='",
2972       "<doc><x a='",
2973       "<document><x a='",
2974       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2975       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2976       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2977       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2978       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2979   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2980   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2981 #if defined(__MINGW32__) && ! defined(__MINGW64__)
2982   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2983   // Can we make a big allocation?
2984   void *big = malloc(maxbuf);
2985   if (! big) {
2986     // The big allocation failed. Let's be a little lenient.
2987     maxbuf = maxbuf / 2;
2988   }
2989   free(big);
2990 #endif
2991 
2992   for (int i = 0; i < num_prefixes; ++i) {
2993     set_subtest("\"%s\"", prefixes[i]);
2994     XML_Parser parser = XML_ParserCreate(NULL);
2995     const int prefix_len = (int)strlen(prefixes[i]);
2996     const enum XML_Status s
2997         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2998     if (s != XML_STATUS_OK)
2999       xml_failure(parser);
3000 
3001     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3002     // subtracting the whole prefix is easiest, and close enough.
3003     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3004     // The limit should be consistent; no prefix should allow us to
3005     // reach above the max buffer size.
3006     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3007     XML_ParserFree(parser);
3008   }
3009 }
3010 END_TEST
3011 
START_TEST(test_getbuffer_allocates_on_zero_len)3012 START_TEST(test_getbuffer_allocates_on_zero_len) {
3013   for (int first_len = 1; first_len >= 0; first_len--) {
3014     set_subtest("with len=%d first", first_len);
3015     XML_Parser parser = XML_ParserCreate(NULL);
3016     assert_true(parser != NULL);
3017     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3018     assert_true(XML_GetBuffer(parser, 0) != NULL);
3019     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3020       xml_failure(parser);
3021     XML_ParserFree(parser);
3022   }
3023 }
3024 END_TEST
3025 
3026 /* Test position information macros */
START_TEST(test_byte_info_at_end)3027 START_TEST(test_byte_info_at_end) {
3028   const char *text = "<doc></doc>";
3029 
3030   if (XML_GetCurrentByteIndex(g_parser) != -1
3031       || XML_GetCurrentByteCount(g_parser) != 0)
3032     fail("Byte index/count incorrect at start of parse");
3033   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3034       == XML_STATUS_ERROR)
3035     xml_failure(g_parser);
3036   /* At end, the count will be zero and the index the end of string */
3037   if (XML_GetCurrentByteCount(g_parser) != 0)
3038     fail("Terminal byte count incorrect");
3039   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3040     fail("Terminal byte index incorrect");
3041 }
3042 END_TEST
3043 
3044 /* Test position information from errors */
3045 #define PRE_ERROR_STR "<doc></"
3046 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3047 START_TEST(test_byte_info_at_error) {
3048   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3049 
3050   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3051       == XML_STATUS_OK)
3052     fail("Syntax error not faulted");
3053   if (XML_GetCurrentByteCount(g_parser) != 0)
3054     fail("Error byte count incorrect");
3055   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3056     fail("Error byte index incorrect");
3057 }
3058 END_TEST
3059 #undef PRE_ERROR_STR
3060 #undef POST_ERROR_STR
3061 
3062 /* Test position information in handler */
3063 #define START_ELEMENT "<e>"
3064 #define CDATA_TEXT "Hello"
3065 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3066 START_TEST(test_byte_info_at_cdata) {
3067   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3068   int offset, size;
3069   ByteTestData data;
3070 
3071   /* Check initial context is empty */
3072   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3073     fail("Unexpected context at start of parse");
3074 
3075   data.start_element_len = (int)strlen(START_ELEMENT);
3076   data.cdata_len = (int)strlen(CDATA_TEXT);
3077   data.total_string_len = (int)strlen(text);
3078   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3079   XML_SetUserData(g_parser, &data);
3080   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3081     xml_failure(g_parser);
3082 }
3083 END_TEST
3084 #undef START_ELEMENT
3085 #undef CDATA_TEXT
3086 #undef END_ELEMENT
3087 
3088 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3089 START_TEST(test_predefined_entities) {
3090   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3091   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3092   const XML_Char *result = XCS("<>&\"'");
3093   CharData storage;
3094 
3095   XML_SetDefaultHandler(g_parser, accumulate_characters);
3096   /* run_character_check uses XML_SetCharacterDataHandler(), which
3097    * unfortunately heads off a code path that we need to exercise.
3098    */
3099   CharData_Init(&storage);
3100   XML_SetUserData(g_parser, &storage);
3101   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3102       == XML_STATUS_ERROR)
3103     xml_failure(g_parser);
3104   /* The default handler doesn't translate the entities */
3105   CharData_CheckXMLChars(&storage, expected);
3106 
3107   /* Now try again and check the translation */
3108   XML_ParserReset(g_parser, NULL);
3109   run_character_check(text, result);
3110 }
3111 END_TEST
3112 
3113 /* Regression test that an invalid tag in an external parameter
3114  * reference in an external DTD is correctly faulted.
3115  *
3116  * Only a few specific tags are legal in DTDs ignoring comments and
3117  * processing instructions, all of which begin with an exclamation
3118  * mark.  "<el/>" is not one of them, so the parser should raise an
3119  * error on encountering it.
3120  */
START_TEST(test_invalid_tag_in_dtd)3121 START_TEST(test_invalid_tag_in_dtd) {
3122   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3123                      "<doc></doc>\n";
3124 
3125   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3126   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3127   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3128                  "Invalid tag IN DTD external param not rejected");
3129 }
3130 END_TEST
3131 
3132 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3133 START_TEST(test_not_predefined_entities) {
3134   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3135                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3136   int i = 0;
3137 
3138   while (text[i] != NULL) {
3139     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3140                    "Undefined entity not rejected");
3141     XML_ParserReset(g_parser, NULL);
3142     i++;
3143   }
3144 }
3145 END_TEST
3146 
3147 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3148 START_TEST(test_ignore_section) {
3149   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3150                      "<doc><e>&entity;</e></doc>";
3151   const XML_Char *expected
3152       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3153   CharData storage;
3154 
3155   CharData_Init(&storage);
3156   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3157   XML_SetUserData(g_parser, &storage);
3158   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3159   XML_SetDefaultHandler(g_parser, accumulate_characters);
3160   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3161   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3162   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3163   XML_SetStartElementHandler(g_parser, dummy_start_element);
3164   XML_SetEndElementHandler(g_parser, dummy_end_element);
3165   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3166       == XML_STATUS_ERROR)
3167     xml_failure(g_parser);
3168   CharData_CheckXMLChars(&storage, expected);
3169 }
3170 END_TEST
3171 
START_TEST(test_ignore_section_utf16)3172 START_TEST(test_ignore_section_utf16) {
3173   const char text[] =
3174       /* <!DOCTYPE d SYSTEM 's'> */
3175       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3176       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3177       /* <d><e>&en;</e></d> */
3178       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3179   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3180   CharData storage;
3181 
3182   CharData_Init(&storage);
3183   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3184   XML_SetUserData(g_parser, &storage);
3185   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3186   XML_SetDefaultHandler(g_parser, accumulate_characters);
3187   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3188   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3189   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3190   XML_SetStartElementHandler(g_parser, dummy_start_element);
3191   XML_SetEndElementHandler(g_parser, dummy_end_element);
3192   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3193       == XML_STATUS_ERROR)
3194     xml_failure(g_parser);
3195   CharData_CheckXMLChars(&storage, expected);
3196 }
3197 END_TEST
3198 
START_TEST(test_ignore_section_utf16_be)3199 START_TEST(test_ignore_section_utf16_be) {
3200   const char text[] =
3201       /* <!DOCTYPE d SYSTEM 's'> */
3202       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3203       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3204       /* <d><e>&en;</e></d> */
3205       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3206   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3207   CharData storage;
3208 
3209   CharData_Init(&storage);
3210   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3211   XML_SetUserData(g_parser, &storage);
3212   XML_SetExternalEntityRefHandler(g_parser,
3213                                   external_entity_load_ignore_utf16_be);
3214   XML_SetDefaultHandler(g_parser, accumulate_characters);
3215   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3216   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3217   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3218   XML_SetStartElementHandler(g_parser, dummy_start_element);
3219   XML_SetEndElementHandler(g_parser, dummy_end_element);
3220   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3221       == XML_STATUS_ERROR)
3222     xml_failure(g_parser);
3223   CharData_CheckXMLChars(&storage, expected);
3224 }
3225 END_TEST
3226 
3227 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3228 START_TEST(test_bad_ignore_section) {
3229   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3230                      "<doc><e>&entity;</e></doc>";
3231   ExtFaults faults[]
3232       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3233           XML_ERROR_SYNTAX},
3234          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3235           XML_ERROR_INVALID_TOKEN},
3236          {/* FIrst two bytes of a three-byte char */
3237           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3238           XML_ERROR_PARTIAL_CHAR},
3239          {NULL, NULL, NULL, XML_ERROR_NONE}};
3240   ExtFaults *fault;
3241 
3242   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3243     set_subtest("%s", fault->parse_text);
3244     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3245     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3246     XML_SetUserData(g_parser, fault);
3247     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3248                    "Incomplete IGNORE section not failed");
3249     XML_ParserReset(g_parser, NULL);
3250   }
3251 }
3252 END_TEST
3253 
3254 struct bom_testdata {
3255   const char *external;
3256   int split;
3257   XML_Bool nested_callback_happened;
3258 };
3259 
3260 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3261 external_bom_checker(XML_Parser parser, const XML_Char *context,
3262                      const XML_Char *base, const XML_Char *systemId,
3263                      const XML_Char *publicId) {
3264   const char *text;
3265   UNUSED_P(base);
3266   UNUSED_P(systemId);
3267   UNUSED_P(publicId);
3268 
3269   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3270   if (ext_parser == NULL)
3271     fail("Could not create external entity parser");
3272 
3273   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3274     struct bom_testdata *const testdata
3275         = (struct bom_testdata *)XML_GetUserData(parser);
3276     const char *const external = testdata->external;
3277     const int split = testdata->split;
3278     testdata->nested_callback_happened = XML_TRUE;
3279 
3280     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3281         != XML_STATUS_OK) {
3282       xml_failure(ext_parser);
3283     }
3284     text = external + split; // the parse below will continue where we left off.
3285   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3286     text = "<!ELEMENT doc EMPTY>\n"
3287            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3288            "<!ENTITY % e2 '%e1;'>\n";
3289   } else {
3290     fail("unknown systemId");
3291   }
3292 
3293   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3294       != XML_STATUS_OK)
3295     xml_failure(ext_parser);
3296 
3297   XML_ParserFree(ext_parser);
3298   return XML_STATUS_OK;
3299 }
3300 
3301 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3302 START_TEST(test_external_bom_consumed) {
3303   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3304                            "<doc></doc>\n";
3305   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3306   const int len = (int)strlen(external);
3307   for (int split = 0; split <= len; ++split) {
3308     set_subtest("split at byte %d", split);
3309 
3310     struct bom_testdata testdata;
3311     testdata.external = external;
3312     testdata.split = split;
3313     testdata.nested_callback_happened = XML_FALSE;
3314 
3315     XML_Parser parser = XML_ParserCreate(NULL);
3316     if (parser == NULL) {
3317       fail("Couldn't create parser");
3318     }
3319     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3320     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3321     XML_SetUserData(parser, &testdata);
3322     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3323         == XML_STATUS_ERROR)
3324       xml_failure(parser);
3325     if (! testdata.nested_callback_happened) {
3326       fail("ref handler not called");
3327     }
3328     XML_ParserFree(parser);
3329   }
3330 }
3331 END_TEST
3332 
3333 /* Test recursive parsing */
START_TEST(test_external_entity_values)3334 START_TEST(test_external_entity_values) {
3335   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3336                      "<doc></doc>\n";
3337   ExtFaults data_004_2[] = {
3338       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3339       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3340        XML_ERROR_INVALID_TOKEN},
3341       {"'wombat", "Unterminated string not faulted", NULL,
3342        XML_ERROR_UNCLOSED_TOKEN},
3343       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3344        XML_ERROR_PARTIAL_CHAR},
3345       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3346       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3347        XML_ERROR_XML_DECL},
3348       {/* UTF-8 BOM */
3349        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3350        XML_ERROR_NONE},
3351       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3352        "Invalid token after text declaration not faulted", NULL,
3353        XML_ERROR_INVALID_TOKEN},
3354       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3355        "Unterminated string after text decl not faulted", NULL,
3356        XML_ERROR_UNCLOSED_TOKEN},
3357       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3358        "Partial UTF-8 character after text decl not faulted", NULL,
3359        XML_ERROR_PARTIAL_CHAR},
3360       {"%e1;", "Recursive parameter entity not faulted", NULL,
3361        XML_ERROR_RECURSIVE_ENTITY_REF},
3362       {NULL, NULL, NULL, XML_ERROR_NONE}};
3363   int i;
3364 
3365   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3366     set_subtest("%s", data_004_2[i].parse_text);
3367     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3368     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3369     XML_SetUserData(g_parser, &data_004_2[i]);
3370     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3371         == XML_STATUS_ERROR)
3372       xml_failure(g_parser);
3373     XML_ParserReset(g_parser, NULL);
3374   }
3375 }
3376 END_TEST
3377 
3378 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3379 START_TEST(test_ext_entity_not_standalone) {
3380   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3381                      "<doc></doc>";
3382 
3383   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3384   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3385   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3386                  "Standalone rejection not caught");
3387 }
3388 END_TEST
3389 
START_TEST(test_ext_entity_value_abort)3390 START_TEST(test_ext_entity_value_abort) {
3391   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3392                      "<doc></doc>\n";
3393 
3394   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3395   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3396   g_resumable = XML_FALSE;
3397   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3398       == XML_STATUS_ERROR)
3399     xml_failure(g_parser);
3400 }
3401 END_TEST
3402 
START_TEST(test_bad_public_doctype)3403 START_TEST(test_bad_public_doctype) {
3404   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3405                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3406                      "<doc></doc>";
3407 
3408   /* Setting a handler provokes a particular code path */
3409   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3410                             dummy_end_doctype_handler);
3411   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3412 }
3413 END_TEST
3414 
3415 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3416 START_TEST(test_attribute_enum_value) {
3417   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3418                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3419                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3420   ExtTest dtd_data
3421       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3422          "<!ELEMENT a EMPTY>\n"
3423          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3424          NULL, NULL};
3425   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3426 
3427   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3428   XML_SetUserData(g_parser, &dtd_data);
3429   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3430   /* An attribute list handler provokes a different code path */
3431   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3432   run_ext_character_check(text, &dtd_data, expected);
3433 }
3434 END_TEST
3435 
3436 /* Slightly bizarrely, the library seems to silently ignore entity
3437  * definitions for predefined entities, even when they are wrong.  The
3438  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3439  * to happen, so this is currently treated as acceptable.
3440  */
START_TEST(test_predefined_entity_redefinition)3441 START_TEST(test_predefined_entity_redefinition) {
3442   const char *text = "<!DOCTYPE doc [\n"
3443                      "<!ENTITY apos 'foo'>\n"
3444                      "]>\n"
3445                      "<doc>&apos;</doc>";
3446   run_character_check(text, XCS("'"));
3447 }
3448 END_TEST
3449 
3450 /* Test that the parser stops processing the DTD after an unresolved
3451  * parameter entity is encountered.
3452  */
START_TEST(test_dtd_stop_processing)3453 START_TEST(test_dtd_stop_processing) {
3454   const char *text = "<!DOCTYPE doc [\n"
3455                      "%foo;\n"
3456                      "<!ENTITY bar 'bas'>\n"
3457                      "]><doc/>";
3458 
3459   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3460   init_dummy_handlers();
3461   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3462       == XML_STATUS_ERROR)
3463     xml_failure(g_parser);
3464   if (get_dummy_handler_flags() != 0)
3465     fail("DTD processing still going after undefined PE");
3466 }
3467 END_TEST
3468 
3469 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3470 START_TEST(test_public_notation_no_sysid) {
3471   const char *text = "<!DOCTYPE doc [\n"
3472                      "<!NOTATION note PUBLIC 'foo'>\n"
3473                      "<!ELEMENT doc EMPTY>\n"
3474                      "]>\n<doc/>";
3475 
3476   init_dummy_handlers();
3477   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3478   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3479       == XML_STATUS_ERROR)
3480     xml_failure(g_parser);
3481   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3482     fail("Notation declaration handler not called");
3483 }
3484 END_TEST
3485 
START_TEST(test_nested_groups)3486 START_TEST(test_nested_groups) {
3487   const char *text
3488       = "<!DOCTYPE doc [\n"
3489         "<!ELEMENT doc "
3490         /* Sixteen elements per line */
3491         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3492         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3493         "))))))))))))))))))))))))))))))))>\n"
3494         "<!ELEMENT e EMPTY>"
3495         "]>\n"
3496         "<doc><e/></doc>";
3497   CharData storage;
3498 
3499   CharData_Init(&storage);
3500   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3501   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3502   XML_SetUserData(g_parser, &storage);
3503   init_dummy_handlers();
3504   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3505       == XML_STATUS_ERROR)
3506     xml_failure(g_parser);
3507   CharData_CheckXMLChars(&storage, XCS("doce"));
3508   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3509     fail("Element handler not fired");
3510 }
3511 END_TEST
3512 
START_TEST(test_group_choice)3513 START_TEST(test_group_choice) {
3514   const char *text = "<!DOCTYPE doc [\n"
3515                      "<!ELEMENT doc (a|b|c)+>\n"
3516                      "<!ELEMENT a EMPTY>\n"
3517                      "<!ELEMENT b (#PCDATA)>\n"
3518                      "<!ELEMENT c ANY>\n"
3519                      "]>\n"
3520                      "<doc>\n"
3521                      "<a/>\n"
3522                      "<b attr='foo'>This is a foo</b>\n"
3523                      "<c></c>\n"
3524                      "</doc>\n";
3525 
3526   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3527   init_dummy_handlers();
3528   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3529       == XML_STATUS_ERROR)
3530     xml_failure(g_parser);
3531   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3532     fail("Element handler flag not raised");
3533 }
3534 END_TEST
3535 
START_TEST(test_standalone_parameter_entity)3536 START_TEST(test_standalone_parameter_entity) {
3537   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3538                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3539                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3540                      "%entity;\n"
3541                      "]>\n"
3542                      "<doc></doc>";
3543   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3544 
3545   XML_SetUserData(g_parser, dtd_data);
3546   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3547   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3548   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3549       == XML_STATUS_ERROR)
3550     xml_failure(g_parser);
3551 }
3552 END_TEST
3553 
3554 /* Test skipping of parameter entity in an external DTD */
3555 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3556 START_TEST(test_skipped_parameter_entity) {
3557   const char *text = "<?xml version='1.0'?>\n"
3558                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3559                      "<!ELEMENT root (#PCDATA|a)* >\n"
3560                      "]>\n"
3561                      "<root></root>";
3562   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3563 
3564   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3565   XML_SetUserData(g_parser, &dtd_data);
3566   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3567   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3568   init_dummy_handlers();
3569   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3570       == XML_STATUS_ERROR)
3571     xml_failure(g_parser);
3572   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3573     fail("Skip handler not executed");
3574 }
3575 END_TEST
3576 
3577 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3578 START_TEST(test_recursive_external_parameter_entity) {
3579   const char *text = "<?xml version='1.0'?>\n"
3580                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3581                      "<!ELEMENT root (#PCDATA|a)* >\n"
3582                      "]>\n"
3583                      "<root></root>";
3584   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3585                         "Recursive external parameter entity not faulted", NULL,
3586                         XML_ERROR_RECURSIVE_ENTITY_REF};
3587 
3588   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3589   XML_SetUserData(g_parser, &dtd_data);
3590   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3591   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3592                  "Recursive external parameter not spotted");
3593 }
3594 END_TEST
3595 
3596 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3597 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3598   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3599                      "<doc></doc>\n";
3600 
3601   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3602   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3603   XML_SetUserData(g_parser, NULL);
3604   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3605       == XML_STATUS_ERROR)
3606     xml_failure(g_parser);
3607 
3608   /* Now repeat without the external entity ref handler invoking
3609    * another copy of itself.
3610    */
3611   XML_ParserReset(g_parser, NULL);
3612   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3613   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3614   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3615   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3616       == XML_STATUS_ERROR)
3617     xml_failure(g_parser);
3618 }
3619 END_TEST
3620 
3621 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3622 START_TEST(test_suspend_xdecl) {
3623   const char *text = long_character_data_text;
3624 
3625   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3626   XML_SetUserData(g_parser, g_parser);
3627   g_resumable = XML_TRUE;
3628   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3629       != XML_STATUS_SUSPENDED)
3630     xml_failure(g_parser);
3631   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3632     xml_failure(g_parser);
3633   /* Attempt to start a new parse while suspended */
3634   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3635       != XML_STATUS_ERROR)
3636     fail("Attempt to parse while suspended not faulted");
3637   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3638     fail("Suspended parse not faulted with correct error");
3639 }
3640 END_TEST
3641 
3642 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3643 START_TEST(test_abort_epilog) {
3644   const char *text = "<doc></doc>\n\r\n";
3645   XML_Char trigger_char = XCS('\r');
3646 
3647   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3648   XML_SetUserData(g_parser, &trigger_char);
3649   g_resumable = XML_FALSE;
3650   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3651       != XML_STATUS_ERROR)
3652     fail("Abort not triggered");
3653   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3654     xml_failure(g_parser);
3655 }
3656 END_TEST
3657 
3658 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3659 START_TEST(test_abort_epilog_2) {
3660   const char *text = "<doc></doc>\n";
3661   XML_Char trigger_char = XCS('\n');
3662 
3663   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3664   XML_SetUserData(g_parser, &trigger_char);
3665   g_resumable = XML_FALSE;
3666   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3667 }
3668 END_TEST
3669 
3670 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3671 START_TEST(test_suspend_epilog) {
3672   const char *text = "<doc></doc>\n";
3673   XML_Char trigger_char = XCS('\n');
3674 
3675   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3676   XML_SetUserData(g_parser, &trigger_char);
3677   g_resumable = XML_TRUE;
3678   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3679       != XML_STATUS_SUSPENDED)
3680     xml_failure(g_parser);
3681 }
3682 END_TEST
3683 
START_TEST(test_suspend_in_sole_empty_tag)3684 START_TEST(test_suspend_in_sole_empty_tag) {
3685   const char *text = "<doc/>";
3686   enum XML_Status rc;
3687 
3688   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3689   XML_SetUserData(g_parser, g_parser);
3690   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3691   if (rc == XML_STATUS_ERROR)
3692     xml_failure(g_parser);
3693   else if (rc != XML_STATUS_SUSPENDED)
3694     fail("Suspend not triggered");
3695   rc = XML_ResumeParser(g_parser);
3696   if (rc == XML_STATUS_ERROR)
3697     xml_failure(g_parser);
3698   else if (rc != XML_STATUS_OK)
3699     fail("Resume failed");
3700 }
3701 END_TEST
3702 
START_TEST(test_unfinished_epilog)3703 START_TEST(test_unfinished_epilog) {
3704   const char *text = "<doc></doc><";
3705 
3706   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3707                  "Incomplete epilog entry not faulted");
3708 }
3709 END_TEST
3710 
START_TEST(test_partial_char_in_epilog)3711 START_TEST(test_partial_char_in_epilog) {
3712   const char *text = "<doc></doc>\xe2\x82";
3713 
3714   /* First check that no fault is raised if the parse is not finished */
3715   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3716       == XML_STATUS_ERROR)
3717     xml_failure(g_parser);
3718   /* Now check that it is faulted once we finish */
3719   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3720     fail("Partial character in epilog not faulted");
3721   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3722     xml_failure(g_parser);
3723 }
3724 END_TEST
3725 
3726 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3727 START_TEST(test_suspend_resume_internal_entity) {
3728   const char *text
3729       = "<!DOCTYPE doc [\n"
3730         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3731         "]>\n"
3732         "<doc>&foo;</doc>\n";
3733   const XML_Char *expected1 = XCS("Hi");
3734   const XML_Char *expected2 = XCS("HiHo");
3735   CharData storage;
3736 
3737   CharData_Init(&storage);
3738   XML_SetStartElementHandler(g_parser, start_element_suspender);
3739   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3740   XML_SetUserData(g_parser, &storage);
3741   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3742   // we won't know exactly how much input we actually managed to give Expat.
3743   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3744       != XML_STATUS_SUSPENDED)
3745     xml_failure(g_parser);
3746   CharData_CheckXMLChars(&storage, XCS(""));
3747   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3748     xml_failure(g_parser);
3749   CharData_CheckXMLChars(&storage, expected1);
3750   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3751     xml_failure(g_parser);
3752   CharData_CheckXMLChars(&storage, expected2);
3753 }
3754 END_TEST
3755 
START_TEST(test_suspend_resume_internal_entity_issue_629)3756 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3757   const char *const text
3758       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3759         "<"
3760         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3761         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3762         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3763         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3764         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3765         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3766         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3767         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3768         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3769         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800         "/>"
3801         "</b></a>";
3802   const size_t firstChunkSizeBytes = 54;
3803 
3804   XML_Parser parser = XML_ParserCreate(NULL);
3805   XML_SetUserData(parser, parser);
3806   XML_SetCommentHandler(parser, suspending_comment_handler);
3807 
3808   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3809       != XML_STATUS_SUSPENDED)
3810     xml_failure(parser);
3811   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3812     xml_failure(parser);
3813   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3814                               (int)(strlen(text) - firstChunkSizeBytes),
3815                               XML_TRUE)
3816       != XML_STATUS_OK)
3817     xml_failure(parser);
3818   XML_ParserFree(parser);
3819 }
3820 END_TEST
3821 
3822 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3823 START_TEST(test_resume_entity_with_syntax_error) {
3824   const char *text = "<!DOCTYPE doc [\n"
3825                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3826                      "]>\n"
3827                      "<doc>&foo;</doc>\n";
3828 
3829   XML_SetStartElementHandler(g_parser, start_element_suspender);
3830   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3831       != XML_STATUS_SUSPENDED)
3832     xml_failure(g_parser);
3833   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3834     fail("Syntax error in entity not faulted");
3835   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3836     xml_failure(g_parser);
3837 }
3838 END_TEST
3839 
3840 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)3841 START_TEST(test_suspend_resume_parameter_entity) {
3842   const char *text = "<!DOCTYPE doc [\n"
3843                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3844                      "%foo;\n"
3845                      "]>\n"
3846                      "<doc>Hello, world</doc>";
3847   const XML_Char *expected = XCS("Hello, world");
3848   CharData storage;
3849 
3850   CharData_Init(&storage);
3851   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3852   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3853   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3854   XML_SetUserData(g_parser, &storage);
3855   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3856       != XML_STATUS_SUSPENDED)
3857     xml_failure(g_parser);
3858   CharData_CheckXMLChars(&storage, XCS(""));
3859   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3860     xml_failure(g_parser);
3861   CharData_CheckXMLChars(&storage, expected);
3862 }
3863 END_TEST
3864 
3865 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)3866 START_TEST(test_restart_on_error) {
3867   const char *text = "<$doc><doc></doc>";
3868 
3869   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3870       != XML_STATUS_ERROR)
3871     fail("Invalid tag name not faulted");
3872   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3873     xml_failure(g_parser);
3874   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3875     fail("Restarting invalid parse not faulted");
3876   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3877     xml_failure(g_parser);
3878 }
3879 END_TEST
3880 
3881 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)3882 START_TEST(test_reject_lt_in_attribute_value) {
3883   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3884                      "<doc></doc>";
3885 
3886   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3887                  "Bad attribute default not faulted");
3888 }
3889 END_TEST
3890 
START_TEST(test_reject_unfinished_param_in_att_value)3891 START_TEST(test_reject_unfinished_param_in_att_value) {
3892   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3893                      "<doc></doc>";
3894 
3895   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3896                  "Bad attribute default not faulted");
3897 }
3898 END_TEST
3899 
START_TEST(test_trailing_cr_in_att_value)3900 START_TEST(test_trailing_cr_in_att_value) {
3901   const char *text = "<doc a='value\r'/>";
3902 
3903   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3904       == XML_STATUS_ERROR)
3905     xml_failure(g_parser);
3906 }
3907 END_TEST
3908 
3909 /* Try parsing a general entity within a parameter entity in a
3910  * standalone internal DTD.  Covers a corner case in the parser.
3911  */
START_TEST(test_standalone_internal_entity)3912 START_TEST(test_standalone_internal_entity) {
3913   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3914                      "<!DOCTYPE doc [\n"
3915                      "  <!ELEMENT doc (#PCDATA)>\n"
3916                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3917                      "  <!ENTITY ge 'AttDefaultValue'>\n"
3918                      "  %pe;\n"
3919                      "]>\n"
3920                      "<doc att2='any'/>";
3921 
3922   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3923   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3924       == XML_STATUS_ERROR)
3925     xml_failure(g_parser);
3926 }
3927 END_TEST
3928 
3929 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)3930 START_TEST(test_skipped_external_entity) {
3931   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3932                      "<doc></doc>\n";
3933   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3934                        "<!ENTITY % e2 '%e1;'>\n",
3935                        NULL, NULL};
3936 
3937   XML_SetUserData(g_parser, &test_data);
3938   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3939   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3940   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3941       == XML_STATUS_ERROR)
3942     xml_failure(g_parser);
3943 }
3944 END_TEST
3945 
3946 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)3947 START_TEST(test_skipped_null_loaded_ext_entity) {
3948   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3949                      "<doc />";
3950   ExtHdlrData test_data
3951       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3952          "<!ENTITY % pe2 '%pe1;'>\n"
3953          "%pe2;\n",
3954          external_entity_null_loader};
3955 
3956   XML_SetUserData(g_parser, &test_data);
3957   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3958   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3959   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3960       == XML_STATUS_ERROR)
3961     xml_failure(g_parser);
3962 }
3963 END_TEST
3964 
START_TEST(test_skipped_unloaded_ext_entity)3965 START_TEST(test_skipped_unloaded_ext_entity) {
3966   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3967                      "<doc />";
3968   ExtHdlrData test_data
3969       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3970          "<!ENTITY % pe2 '%pe1;'>\n"
3971          "%pe2;\n",
3972          NULL};
3973 
3974   XML_SetUserData(g_parser, &test_data);
3975   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3976   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3977   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3978       == XML_STATUS_ERROR)
3979     xml_failure(g_parser);
3980 }
3981 END_TEST
3982 
3983 /* Test that a parameter entity value ending with a carriage return
3984  * has it translated internally into a newline.
3985  */
START_TEST(test_param_entity_with_trailing_cr)3986 START_TEST(test_param_entity_with_trailing_cr) {
3987 #define PARAM_ENTITY_NAME "pe"
3988 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3989   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3990                      "<doc/>";
3991   ExtTest test_data
3992       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3993          "%" PARAM_ENTITY_NAME ";\n",
3994          NULL, NULL};
3995 
3996   XML_SetUserData(g_parser, &test_data);
3997   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3998   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3999   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4000   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4001                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4002   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4003       == XML_STATUS_ERROR)
4004     xml_failure(g_parser);
4005   int entity_match_flag = get_param_entity_match_flag();
4006   if (entity_match_flag == ENTITY_MATCH_FAIL)
4007     fail("Parameter entity CR->NEWLINE conversion failed");
4008   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4009     fail("Parameter entity not parsed");
4010 }
4011 #undef PARAM_ENTITY_NAME
4012 #undef PARAM_ENTITY_CORE_VALUE
4013 END_TEST
4014 
START_TEST(test_invalid_character_entity)4015 START_TEST(test_invalid_character_entity) {
4016   const char *text = "<!DOCTYPE doc [\n"
4017                      "  <!ENTITY entity '&#x110000;'>\n"
4018                      "]>\n"
4019                      "<doc>&entity;</doc>";
4020 
4021   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4022                  "Out of range character reference not faulted");
4023 }
4024 END_TEST
4025 
START_TEST(test_invalid_character_entity_2)4026 START_TEST(test_invalid_character_entity_2) {
4027   const char *text = "<!DOCTYPE doc [\n"
4028                      "  <!ENTITY entity '&#xg0;'>\n"
4029                      "]>\n"
4030                      "<doc>&entity;</doc>";
4031 
4032   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4033                  "Out of range character reference not faulted");
4034 }
4035 END_TEST
4036 
START_TEST(test_invalid_character_entity_3)4037 START_TEST(test_invalid_character_entity_3) {
4038   const char text[] =
4039       /* <!DOCTYPE doc [\n */
4040       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4041       /* U+0E04 = KHO KHWAI
4042        * U+0E08 = CHO CHAN */
4043       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4044       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4045       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4046       /* ]>\n */
4047       "\0]\0>\0\n"
4048       /* <doc>&entity;</doc> */
4049       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4050 
4051   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4052       != XML_STATUS_ERROR)
4053     fail("Invalid start of entity name not faulted");
4054   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4055     xml_failure(g_parser);
4056 }
4057 END_TEST
4058 
START_TEST(test_invalid_character_entity_4)4059 START_TEST(test_invalid_character_entity_4) {
4060   const char *text = "<!DOCTYPE doc [\n"
4061                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4062                      "]>\n"
4063                      "<doc>&entity;</doc>";
4064 
4065   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4066                  "Out of range character reference not faulted");
4067 }
4068 END_TEST
4069 
4070 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4071 START_TEST(test_pi_handled_in_default) {
4072   const char *text = "<?test processing instruction?>\n<doc/>";
4073   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4074   CharData storage;
4075 
4076   CharData_Init(&storage);
4077   XML_SetDefaultHandler(g_parser, accumulate_characters);
4078   XML_SetUserData(g_parser, &storage);
4079   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4080       == XML_STATUS_ERROR)
4081     xml_failure(g_parser);
4082   CharData_CheckXMLChars(&storage, expected);
4083 }
4084 END_TEST
4085 
4086 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4087 START_TEST(test_comment_handled_in_default) {
4088   const char *text = "<!-- This is a comment -->\n<doc/>";
4089   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4090   CharData storage;
4091 
4092   CharData_Init(&storage);
4093   XML_SetDefaultHandler(g_parser, accumulate_characters);
4094   XML_SetUserData(g_parser, &storage);
4095   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4096       == XML_STATUS_ERROR)
4097     xml_failure(g_parser);
4098   CharData_CheckXMLChars(&storage, expected);
4099 }
4100 END_TEST
4101 
4102 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4103 START_TEST(test_pi_yml) {
4104   const char *text = "<?yml something like data?><doc/>";
4105   const XML_Char *expected = XCS("yml: something like data\n");
4106   CharData storage;
4107 
4108   CharData_Init(&storage);
4109   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4110   XML_SetUserData(g_parser, &storage);
4111   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4112       == XML_STATUS_ERROR)
4113     xml_failure(g_parser);
4114   CharData_CheckXMLChars(&storage, expected);
4115 }
4116 END_TEST
4117 
START_TEST(test_pi_xnl)4118 START_TEST(test_pi_xnl) {
4119   const char *text = "<?xnl nothing like data?><doc/>";
4120   const XML_Char *expected = XCS("xnl: nothing like data\n");
4121   CharData storage;
4122 
4123   CharData_Init(&storage);
4124   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4125   XML_SetUserData(g_parser, &storage);
4126   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4127       == XML_STATUS_ERROR)
4128     xml_failure(g_parser);
4129   CharData_CheckXMLChars(&storage, expected);
4130 }
4131 END_TEST
4132 
START_TEST(test_pi_xmm)4133 START_TEST(test_pi_xmm) {
4134   const char *text = "<?xmm everything like data?><doc/>";
4135   const XML_Char *expected = XCS("xmm: everything like data\n");
4136   CharData storage;
4137 
4138   CharData_Init(&storage);
4139   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4140   XML_SetUserData(g_parser, &storage);
4141   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4142       == XML_STATUS_ERROR)
4143     xml_failure(g_parser);
4144   CharData_CheckXMLChars(&storage, expected);
4145 }
4146 END_TEST
4147 
START_TEST(test_utf16_pi)4148 START_TEST(test_utf16_pi) {
4149   const char text[] =
4150       /* <?{KHO KHWAI}{CHO CHAN}?>
4151        * where {KHO KHWAI} = U+0E04
4152        * and   {CHO CHAN}  = U+0E08
4153        */
4154       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4155       /* <q/> */
4156       "<\0q\0/\0>\0";
4157 #ifdef XML_UNICODE
4158   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4159 #else
4160   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4161 #endif
4162   CharData storage;
4163 
4164   CharData_Init(&storage);
4165   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4166   XML_SetUserData(g_parser, &storage);
4167   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4168       == XML_STATUS_ERROR)
4169     xml_failure(g_parser);
4170   CharData_CheckXMLChars(&storage, expected);
4171 }
4172 END_TEST
4173 
START_TEST(test_utf16_be_pi)4174 START_TEST(test_utf16_be_pi) {
4175   const char text[] =
4176       /* <?{KHO KHWAI}{CHO CHAN}?>
4177        * where {KHO KHWAI} = U+0E04
4178        * and   {CHO CHAN}  = U+0E08
4179        */
4180       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4181       /* <q/> */
4182       "\0<\0q\0/\0>";
4183 #ifdef XML_UNICODE
4184   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4185 #else
4186   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4187 #endif
4188   CharData storage;
4189 
4190   CharData_Init(&storage);
4191   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4192   XML_SetUserData(g_parser, &storage);
4193   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4194       == XML_STATUS_ERROR)
4195     xml_failure(g_parser);
4196   CharData_CheckXMLChars(&storage, expected);
4197 }
4198 END_TEST
4199 
4200 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4201 START_TEST(test_utf16_be_comment) {
4202   const char text[] =
4203       /* <!-- Comment A --> */
4204       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4205       /* <doc/> */
4206       "\0<\0d\0o\0c\0/\0>";
4207   const XML_Char *expected = XCS(" Comment A ");
4208   CharData storage;
4209 
4210   CharData_Init(&storage);
4211   XML_SetCommentHandler(g_parser, accumulate_comment);
4212   XML_SetUserData(g_parser, &storage);
4213   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4214       == XML_STATUS_ERROR)
4215     xml_failure(g_parser);
4216   CharData_CheckXMLChars(&storage, expected);
4217 }
4218 END_TEST
4219 
START_TEST(test_utf16_le_comment)4220 START_TEST(test_utf16_le_comment) {
4221   const char text[] =
4222       /* <!-- Comment B --> */
4223       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4224       /* <doc/> */
4225       "<\0d\0o\0c\0/\0>\0";
4226   const XML_Char *expected = XCS(" Comment B ");
4227   CharData storage;
4228 
4229   CharData_Init(&storage);
4230   XML_SetCommentHandler(g_parser, accumulate_comment);
4231   XML_SetUserData(g_parser, &storage);
4232   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4233       == XML_STATUS_ERROR)
4234     xml_failure(g_parser);
4235   CharData_CheckXMLChars(&storage, expected);
4236 }
4237 END_TEST
4238 
4239 /* Test that the unknown encoding handler with map entries that expect
4240  * conversion but no conversion function is faulted
4241  */
START_TEST(test_missing_encoding_conversion_fn)4242 START_TEST(test_missing_encoding_conversion_fn) {
4243   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4244                      "<doc>\x81</doc>";
4245 
4246   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4247   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4248    * character introducing a two-byte sequence.  For this, it
4249    * requires a convert function.  The above function call doesn't
4250    * pass one through, so when BadEncodingHandler actually gets
4251    * called it should supply an invalid encoding.
4252    */
4253   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4254                  "Encoding with missing convert() not faulted");
4255 }
4256 END_TEST
4257 
START_TEST(test_failing_encoding_conversion_fn)4258 START_TEST(test_failing_encoding_conversion_fn) {
4259   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4260                      "<doc>\x81</doc>";
4261 
4262   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4263   /* BadEncodingHandler sets up an encoding with every top-bit-set
4264    * character introducing a two-byte sequence.  For this, it
4265    * requires a convert function.  The above function call passes
4266    * one that insists all possible sequences are invalid anyway.
4267    */
4268   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4269                  "Encoding with failing convert() not faulted");
4270 }
4271 END_TEST
4272 
4273 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4274 START_TEST(test_unknown_encoding_success) {
4275   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4276                      /* Equivalent to <eoc>Hello, world</eoc> */
4277                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4278 
4279   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4280   run_character_check(text, XCS("Hello, world"));
4281 }
4282 END_TEST
4283 
4284 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4285 START_TEST(test_unknown_encoding_bad_name) {
4286   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4287                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4288 
4289   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4290   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4291                  "Bad name start in unknown encoding not faulted");
4292 }
4293 END_TEST
4294 
4295 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4296 START_TEST(test_unknown_encoding_bad_name_2) {
4297   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4298                      "<d\xffoc>Hello, world</d\xffoc>";
4299 
4300   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4301   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4302                  "Bad name in unknown encoding not faulted");
4303 }
4304 END_TEST
4305 
4306 /* Test element name that is long enough to fill the conversion buffer
4307  * in an unknown encoding, finishing with an encoded character.
4308  */
START_TEST(test_unknown_encoding_long_name_1)4309 START_TEST(test_unknown_encoding_long_name_1) {
4310   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4311                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4312                      "Hi"
4313                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4314   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4315   CharData storage;
4316 
4317   CharData_Init(&storage);
4318   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4319   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4320   XML_SetUserData(g_parser, &storage);
4321   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4322       == XML_STATUS_ERROR)
4323     xml_failure(g_parser);
4324   CharData_CheckXMLChars(&storage, expected);
4325 }
4326 END_TEST
4327 
4328 /* Test element name that is long enough to fill the conversion buffer
4329  * in an unknown encoding, finishing with an simple character.
4330  */
START_TEST(test_unknown_encoding_long_name_2)4331 START_TEST(test_unknown_encoding_long_name_2) {
4332   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4333                      "<abcdefghabcdefghabcdefghijklmnop>"
4334                      "Hi"
4335                      "</abcdefghabcdefghabcdefghijklmnop>";
4336   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4337   CharData storage;
4338 
4339   CharData_Init(&storage);
4340   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4341   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4342   XML_SetUserData(g_parser, &storage);
4343   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4344       == XML_STATUS_ERROR)
4345     xml_failure(g_parser);
4346   CharData_CheckXMLChars(&storage, expected);
4347 }
4348 END_TEST
4349 
START_TEST(test_invalid_unknown_encoding)4350 START_TEST(test_invalid_unknown_encoding) {
4351   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4352                      "<doc>Hello world</doc>";
4353 
4354   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4355   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4356                  "Invalid unknown encoding not faulted");
4357 }
4358 END_TEST
4359 
START_TEST(test_unknown_ascii_encoding_ok)4360 START_TEST(test_unknown_ascii_encoding_ok) {
4361   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4362                      "<doc>Hello, world</doc>";
4363 
4364   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4365   run_character_check(text, XCS("Hello, world"));
4366 }
4367 END_TEST
4368 
START_TEST(test_unknown_ascii_encoding_fail)4369 START_TEST(test_unknown_ascii_encoding_fail) {
4370   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4371                      "<doc>Hello, \x80 world</doc>";
4372 
4373   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4374   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4375                  "Invalid character not faulted");
4376 }
4377 END_TEST
4378 
START_TEST(test_unknown_encoding_invalid_length)4379 START_TEST(test_unknown_encoding_invalid_length) {
4380   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4381                      "<doc>Hello, world</doc>";
4382 
4383   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4384   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4385                  "Invalid unknown encoding not faulted");
4386 }
4387 END_TEST
4388 
START_TEST(test_unknown_encoding_invalid_topbit)4389 START_TEST(test_unknown_encoding_invalid_topbit) {
4390   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4391                      "<doc>Hello, world</doc>";
4392 
4393   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4394   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4395                  "Invalid unknown encoding not faulted");
4396 }
4397 END_TEST
4398 
START_TEST(test_unknown_encoding_invalid_surrogate)4399 START_TEST(test_unknown_encoding_invalid_surrogate) {
4400   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4401                      "<doc>Hello, \x82 world</doc>";
4402 
4403   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4404   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4405                  "Invalid unknown encoding not faulted");
4406 }
4407 END_TEST
4408 
START_TEST(test_unknown_encoding_invalid_high)4409 START_TEST(test_unknown_encoding_invalid_high) {
4410   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4411                      "<doc>Hello, world</doc>";
4412 
4413   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4414   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4415                  "Invalid unknown encoding not faulted");
4416 }
4417 END_TEST
4418 
START_TEST(test_unknown_encoding_invalid_attr_value)4419 START_TEST(test_unknown_encoding_invalid_attr_value) {
4420   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4421                      "<doc attr='\xff\x30'/>";
4422 
4423   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4424   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4425                  "Invalid attribute valid not faulted");
4426 }
4427 END_TEST
4428 
4429 /* Test an external entity parser set to use latin-1 detects UTF-16
4430  * BOMs correctly.
4431  */
4432 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4433 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4434   const char *text = "<!DOCTYPE doc [\n"
4435                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4436                      "]>\n"
4437                      "<doc>&en;</doc>";
4438   ExtTest2 test_data
4439       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4440          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4441           *   0x4c = L and 0x20 is a space
4442           */
4443          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4444 #ifdef XML_UNICODE
4445   const XML_Char *expected = XCS("\x00ff\x00feL ");
4446 #else
4447   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4448   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4449 #endif
4450   CharData storage;
4451 
4452   CharData_Init(&storage);
4453   test_data.storage = &storage;
4454   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4455   XML_SetUserData(g_parser, &test_data);
4456   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4457   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4458       == XML_STATUS_ERROR)
4459     xml_failure(g_parser);
4460   CharData_CheckXMLChars(&storage, expected);
4461 }
4462 END_TEST
4463 
START_TEST(test_ext_entity_latin1_utf16be_bom)4464 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4465   const char *text = "<!DOCTYPE doc [\n"
4466                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4467                      "]>\n"
4468                      "<doc>&en;</doc>";
4469   ExtTest2 test_data
4470       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4471          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4472           *   0x4c = L and 0x20 is a space
4473           */
4474          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4475 #ifdef XML_UNICODE
4476   const XML_Char *expected = XCS("\x00fe\x00ff L");
4477 #else
4478   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4479   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4480 #endif
4481   CharData storage;
4482 
4483   CharData_Init(&storage);
4484   test_data.storage = &storage;
4485   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4486   XML_SetUserData(g_parser, &test_data);
4487   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4488   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4489       == XML_STATUS_ERROR)
4490     xml_failure(g_parser);
4491   CharData_CheckXMLChars(&storage, expected);
4492 }
4493 END_TEST
4494 
4495 /* Parsing the full buffer rather than a byte at a time makes a
4496  * difference to the encoding scanning code, so repeat the above tests
4497  * without breaking them down by byte.
4498  */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4499 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4500   const char *text = "<!DOCTYPE doc [\n"
4501                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4502                      "]>\n"
4503                      "<doc>&en;</doc>";
4504   ExtTest2 test_data
4505       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4506          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4507           *   0x4c = L and 0x20 is a space
4508           */
4509          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4510 #ifdef XML_UNICODE
4511   const XML_Char *expected = XCS("\x00ff\x00feL ");
4512 #else
4513   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4514   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4515 #endif
4516   CharData storage;
4517 
4518   CharData_Init(&storage);
4519   test_data.storage = &storage;
4520   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4521   XML_SetUserData(g_parser, &test_data);
4522   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4523   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4524       == XML_STATUS_ERROR)
4525     xml_failure(g_parser);
4526   CharData_CheckXMLChars(&storage, expected);
4527 }
4528 END_TEST
4529 
START_TEST(test_ext_entity_latin1_utf16be_bom2)4530 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4531   const char *text = "<!DOCTYPE doc [\n"
4532                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4533                      "]>\n"
4534                      "<doc>&en;</doc>";
4535   ExtTest2 test_data
4536       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4537          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4538           *   0x4c = L and 0x20 is a space
4539           */
4540          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4541 #ifdef XML_UNICODE
4542   const XML_Char *expected = XCS("\x00fe\x00ff L");
4543 #else
4544   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4545   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4546 #endif
4547   CharData storage;
4548 
4549   CharData_Init(&storage);
4550   test_data.storage = &storage;
4551   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4552   XML_SetUserData(g_parser, &test_data);
4553   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4554   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4555       == XML_STATUS_ERROR)
4556     xml_failure(g_parser);
4557   CharData_CheckXMLChars(&storage, expected);
4558 }
4559 END_TEST
4560 
4561 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4562 START_TEST(test_ext_entity_utf16_be) {
4563   const char *text = "<!DOCTYPE doc [\n"
4564                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4565                      "]>\n"
4566                      "<doc>&en;</doc>";
4567   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4568 #ifdef XML_UNICODE
4569   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4570 #else
4571   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4572                                  "\xe6\x94\x80"   /* U+6500 */
4573                                  "\xe2\xbc\x80"   /* U+2F00 */
4574                                  "\xe3\xb8\x80"); /* U+3E00 */
4575 #endif
4576   CharData storage;
4577 
4578   CharData_Init(&storage);
4579   test_data.storage = &storage;
4580   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4581   XML_SetUserData(g_parser, &test_data);
4582   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4583   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4584       == XML_STATUS_ERROR)
4585     xml_failure(g_parser);
4586   CharData_CheckXMLChars(&storage, expected);
4587 }
4588 END_TEST
4589 
4590 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4591 START_TEST(test_ext_entity_utf16_le) {
4592   const char *text = "<!DOCTYPE doc [\n"
4593                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4594                      "]>\n"
4595                      "<doc>&en;</doc>";
4596   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4597 #ifdef XML_UNICODE
4598   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4599 #else
4600   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4601                                  "\xe6\x94\x80"   /* U+6500 */
4602                                  "\xe2\xbc\x80"   /* U+2F00 */
4603                                  "\xe3\xb8\x80"); /* U+3E00 */
4604 #endif
4605   CharData storage;
4606 
4607   CharData_Init(&storage);
4608   test_data.storage = &storage;
4609   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4610   XML_SetUserData(g_parser, &test_data);
4611   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4612   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4613       == XML_STATUS_ERROR)
4614     xml_failure(g_parser);
4615   CharData_CheckXMLChars(&storage, expected);
4616 }
4617 END_TEST
4618 
4619 /* Test little-endian UTF-16 given no explicit encoding.
4620  * The existing default encoding (UTF-8) is assumed to hold without a
4621  * BOM to contradict it, so the entity value will in fact provoke an
4622  * error because 0x00 is not a valid XML character.  We parse the
4623  * whole buffer in one go rather than feeding it in byte by byte to
4624  * exercise different code paths in the initial scanning routines.
4625  */
START_TEST(test_ext_entity_utf16_unknown)4626 START_TEST(test_ext_entity_utf16_unknown) {
4627   const char *text = "<!DOCTYPE doc [\n"
4628                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4629                      "]>\n"
4630                      "<doc>&en;</doc>";
4631   ExtFaults2 test_data
4632       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4633          XML_ERROR_INVALID_TOKEN};
4634 
4635   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4636   XML_SetUserData(g_parser, &test_data);
4637   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4638                  "Invalid character should not have been accepted");
4639 }
4640 END_TEST
4641 
4642 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4643 START_TEST(test_ext_entity_utf8_non_bom) {
4644   const char *text = "<!DOCTYPE doc [\n"
4645                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4646                      "]>\n"
4647                      "<doc>&en;</doc>";
4648   ExtTest2 test_data
4649       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4650          3, NULL, NULL};
4651 #ifdef XML_UNICODE
4652   const XML_Char *expected = XCS("\xfec0");
4653 #else
4654   const XML_Char *expected = XCS("\xef\xbb\x80");
4655 #endif
4656   CharData storage;
4657 
4658   CharData_Init(&storage);
4659   test_data.storage = &storage;
4660   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4661   XML_SetUserData(g_parser, &test_data);
4662   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4663   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4664       == XML_STATUS_ERROR)
4665     xml_failure(g_parser);
4666   CharData_CheckXMLChars(&storage, expected);
4667 }
4668 END_TEST
4669 
4670 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4671 START_TEST(test_utf8_in_cdata_section) {
4672   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4673 #ifdef XML_UNICODE
4674   const XML_Char *expected = XCS("one \x00e9 two");
4675 #else
4676   const XML_Char *expected = XCS("one \xc3\xa9 two");
4677 #endif
4678 
4679   run_character_check(text, expected);
4680 }
4681 END_TEST
4682 
4683 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4684 START_TEST(test_utf8_in_cdata_section_2) {
4685   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4686 #ifdef XML_UNICODE
4687   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4688 #else
4689   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4690 #endif
4691 
4692   run_character_check(text, expected);
4693 }
4694 END_TEST
4695 
START_TEST(test_utf8_in_start_tags)4696 START_TEST(test_utf8_in_start_tags) {
4697   struct test_case {
4698     bool goodName;
4699     bool goodNameStart;
4700     const char *tagName;
4701   };
4702 
4703   // The idea with the tests below is this:
4704   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4705   // go to isNever and are hence not a concern.
4706   //
4707   // We start with a character that is a valid name character
4708   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4709   // single bits at places where (1) the result leaves the UTF-8 encoding space
4710   // and (2) we stay in the same n-byte sequence family.
4711   //
4712   // The flipped bits are highlighted in angle brackets in comments,
4713   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4714   // the most significant bit to 1 to leave UTF-8 encoding space.
4715   struct test_case cases[] = {
4716       // 1-byte UTF-8: [0xxx xxxx]
4717       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4718       {false, false, "\xBA"}, // [<1>011 1010]
4719       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4720       {false, false, "\xB9"}, // [<1>011 1001]
4721 
4722       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4723       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4724                                   // Arabic small waw U+06E5
4725       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4726       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4727       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4728       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4729                                   // combining char U+0301
4730       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4731       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4732       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4733 
4734       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4735       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4736                                       // Devanagari Letter A U+0905
4737       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4738       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4739       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4740       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4741       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4742       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4743                                       // combining char U+0901
4744       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4745       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4746       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4747       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4748       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4749   };
4750   const bool atNameStart[] = {true, false};
4751 
4752   size_t i = 0;
4753   char doc[1024];
4754   size_t failCount = 0;
4755 
4756   // we need all the bytes to be parsed, but we don't want the errors that can
4757   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4758   if (g_reparseDeferralEnabledDefault) {
4759     return;
4760   }
4761 
4762   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4763     size_t j = 0;
4764     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4765       const bool expectedSuccess
4766           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4767       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4768                cases[i].tagName);
4769       XML_Parser parser = XML_ParserCreate(NULL);
4770 
4771       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4772           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4773 
4774       bool success = true;
4775       if ((status == XML_STATUS_OK) != expectedSuccess) {
4776         success = false;
4777       }
4778       if ((status == XML_STATUS_ERROR)
4779           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4780         success = false;
4781       }
4782 
4783       if (! success) {
4784         fprintf(
4785             stderr,
4786             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4787             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4788             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4789         failCount++;
4790       }
4791 
4792       XML_ParserFree(parser);
4793     }
4794   }
4795 
4796   if (failCount > 0) {
4797     fail("UTF-8 regression detected");
4798   }
4799 }
4800 END_TEST
4801 
4802 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)4803 START_TEST(test_trailing_spaces_in_elements) {
4804   const char *text = "<doc   >Hi</doc >";
4805   const XML_Char *expected = XCS("doc/doc");
4806   CharData storage;
4807 
4808   CharData_Init(&storage);
4809   XML_SetElementHandler(g_parser, record_element_start_handler,
4810                         record_element_end_handler);
4811   XML_SetUserData(g_parser, &storage);
4812   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4813       == XML_STATUS_ERROR)
4814     xml_failure(g_parser);
4815   CharData_CheckXMLChars(&storage, expected);
4816 }
4817 END_TEST
4818 
START_TEST(test_utf16_attribute)4819 START_TEST(test_utf16_attribute) {
4820   const char text[] =
4821       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4822        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4823        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4824        */
4825       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4826   const XML_Char *expected = XCS("a");
4827   CharData storage;
4828 
4829   CharData_Init(&storage);
4830   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4831   XML_SetUserData(g_parser, &storage);
4832   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4833       == XML_STATUS_ERROR)
4834     xml_failure(g_parser);
4835   CharData_CheckXMLChars(&storage, expected);
4836 }
4837 END_TEST
4838 
START_TEST(test_utf16_second_attr)4839 START_TEST(test_utf16_second_attr) {
4840   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4841    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4842    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4843    */
4844   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4845                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4846   const XML_Char *expected = XCS("1");
4847   CharData storage;
4848 
4849   CharData_Init(&storage);
4850   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4851   XML_SetUserData(g_parser, &storage);
4852   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4853       == XML_STATUS_ERROR)
4854     xml_failure(g_parser);
4855   CharData_CheckXMLChars(&storage, expected);
4856 }
4857 END_TEST
4858 
START_TEST(test_attr_after_solidus)4859 START_TEST(test_attr_after_solidus) {
4860   const char *text = "<doc attr1='a' / attr2='b'>";
4861 
4862   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4863 }
4864 END_TEST
4865 
START_TEST(test_utf16_pe)4866 START_TEST(test_utf16_pe) {
4867   /* <!DOCTYPE doc [
4868    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4869    * %{KHO KHWAI}{CHO CHAN};
4870    * ]>
4871    * <doc></doc>
4872    *
4873    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4874    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4875    */
4876   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4877                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4878                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4879                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4880                       "\0%\x0e\x04\x0e\x08\0;\0\n"
4881                       "\0]\0>\0\n"
4882                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4883 #ifdef XML_UNICODE
4884   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4885 #else
4886   const XML_Char *expected
4887       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4888 #endif
4889   CharData storage;
4890 
4891   CharData_Init(&storage);
4892   XML_SetUserData(g_parser, &storage);
4893   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4894   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4895       == XML_STATUS_ERROR)
4896     xml_failure(g_parser);
4897   CharData_CheckXMLChars(&storage, expected);
4898 }
4899 END_TEST
4900 
4901 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)4902 START_TEST(test_bad_attr_desc_keyword) {
4903   const char *text = "<!DOCTYPE doc [\n"
4904                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4905                      "]>\n"
4906                      "<doc />";
4907 
4908   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4909                  "Bad keyword !IMPLIED not faulted");
4910 }
4911 END_TEST
4912 
4913 /* Test that an invalid attribute description keyword consisting of
4914  * UTF-16 characters with their top bytes non-zero are correctly
4915  * faulted
4916  */
START_TEST(test_bad_attr_desc_keyword_utf16)4917 START_TEST(test_bad_attr_desc_keyword_utf16) {
4918   /* <!DOCTYPE d [
4919    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4920    * ]><d/>
4921    *
4922    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4923    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4924    */
4925   const char text[]
4926       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4927         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4928         "\0#\x0e\x04\x0e\x08\0>\0\n"
4929         "\0]\0>\0<\0d\0/\0>";
4930 
4931   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4932       != XML_STATUS_ERROR)
4933     fail("Invalid UTF16 attribute keyword not faulted");
4934   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4935     xml_failure(g_parser);
4936 }
4937 END_TEST
4938 
4939 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4940  * using prefix-encoding (see above) to trigger specific code paths
4941  */
START_TEST(test_bad_doctype)4942 START_TEST(test_bad_doctype) {
4943   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4944                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4945 
4946   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4947   expect_failure(text, XML_ERROR_SYNTAX,
4948                  "Invalid bytes in DOCTYPE not faulted");
4949 }
4950 END_TEST
4951 
START_TEST(test_bad_doctype_utf8)4952 START_TEST(test_bad_doctype_utf8) {
4953   const char *text = "<!DOCTYPE \xDB\x25"
4954                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
4955   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4956                  "Invalid UTF-8 in DOCTYPE not faulted");
4957 }
4958 END_TEST
4959 
START_TEST(test_bad_doctype_utf16)4960 START_TEST(test_bad_doctype_utf16) {
4961   const char text[] =
4962       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4963        *
4964        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4965        * (name character) but not a valid letter (name start character)
4966        */
4967       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4968       "\x06\xf2"
4969       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4970 
4971   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4972       != XML_STATUS_ERROR)
4973     fail("Invalid bytes in DOCTYPE not faulted");
4974   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4975     xml_failure(g_parser);
4976 }
4977 END_TEST
4978 
START_TEST(test_bad_doctype_plus)4979 START_TEST(test_bad_doctype_plus) {
4980   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4981                      "<1+>&foo;</1+>";
4982 
4983   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4984                  "'+' in document name not faulted");
4985 }
4986 END_TEST
4987 
START_TEST(test_bad_doctype_star)4988 START_TEST(test_bad_doctype_star) {
4989   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4990                      "<1*>&foo;</1*>";
4991 
4992   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4993                  "'*' in document name not faulted");
4994 }
4995 END_TEST
4996 
START_TEST(test_bad_doctype_query)4997 START_TEST(test_bad_doctype_query) {
4998   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4999                      "<1?>&foo;</1?>";
5000 
5001   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5002                  "'?' in document name not faulted");
5003 }
5004 END_TEST
5005 
START_TEST(test_unknown_encoding_bad_ignore)5006 START_TEST(test_unknown_encoding_bad_ignore) {
5007   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5008                      "<!DOCTYPE doc SYSTEM 'foo'>"
5009                      "<doc><e>&entity;</e></doc>";
5010   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5011                      "Invalid character not faulted", XCS("prefix-conv"),
5012                      XML_ERROR_INVALID_TOKEN};
5013 
5014   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5015   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5016   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5017   XML_SetUserData(g_parser, &fault);
5018   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5019                  "Bad IGNORE section with unknown encoding not failed");
5020 }
5021 END_TEST
5022 
START_TEST(test_entity_in_utf16_be_attr)5023 START_TEST(test_entity_in_utf16_be_attr) {
5024   const char text[] =
5025       /* <e a='&#228; &#x00E4;'></e> */
5026       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5027       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5028 #ifdef XML_UNICODE
5029   const XML_Char *expected = XCS("\x00e4 \x00e4");
5030 #else
5031   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5032 #endif
5033   CharData storage;
5034 
5035   CharData_Init(&storage);
5036   XML_SetUserData(g_parser, &storage);
5037   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5038   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5039       == XML_STATUS_ERROR)
5040     xml_failure(g_parser);
5041   CharData_CheckXMLChars(&storage, expected);
5042 }
5043 END_TEST
5044 
START_TEST(test_entity_in_utf16_le_attr)5045 START_TEST(test_entity_in_utf16_le_attr) {
5046   const char text[] =
5047       /* <e a='&#228; &#x00E4;'></e> */
5048       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5049       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5050 #ifdef XML_UNICODE
5051   const XML_Char *expected = XCS("\x00e4 \x00e4");
5052 #else
5053   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5054 #endif
5055   CharData storage;
5056 
5057   CharData_Init(&storage);
5058   XML_SetUserData(g_parser, &storage);
5059   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5060   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5061       == XML_STATUS_ERROR)
5062     xml_failure(g_parser);
5063   CharData_CheckXMLChars(&storage, expected);
5064 }
5065 END_TEST
5066 
START_TEST(test_entity_public_utf16_be)5067 START_TEST(test_entity_public_utf16_be) {
5068   const char text[] =
5069       /* <!DOCTYPE d [ */
5070       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5071       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5072       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5073       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5074       /* %e; */
5075       "\0%\0e\0;\0\n"
5076       /* ]> */
5077       "\0]\0>\0\n"
5078       /* <d>&j;</d> */
5079       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5080   ExtTest2 test_data
5081       = {/* <!ENTITY j 'baz'> */
5082          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5083   const XML_Char *expected = XCS("baz");
5084   CharData storage;
5085 
5086   CharData_Init(&storage);
5087   test_data.storage = &storage;
5088   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5089   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5090   XML_SetUserData(g_parser, &test_data);
5091   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5092   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5093       == XML_STATUS_ERROR)
5094     xml_failure(g_parser);
5095   CharData_CheckXMLChars(&storage, expected);
5096 }
5097 END_TEST
5098 
START_TEST(test_entity_public_utf16_le)5099 START_TEST(test_entity_public_utf16_le) {
5100   const char text[] =
5101       /* <!DOCTYPE d [ */
5102       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5103       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5104       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5105       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5106       /* %e; */
5107       "%\0e\0;\0\n\0"
5108       /* ]> */
5109       "]\0>\0\n\0"
5110       /* <d>&j;</d> */
5111       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5112   ExtTest2 test_data
5113       = {/* <!ENTITY j 'baz'> */
5114          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5115   const XML_Char *expected = XCS("baz");
5116   CharData storage;
5117 
5118   CharData_Init(&storage);
5119   test_data.storage = &storage;
5120   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5121   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5122   XML_SetUserData(g_parser, &test_data);
5123   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5124   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5125       == XML_STATUS_ERROR)
5126     xml_failure(g_parser);
5127   CharData_CheckXMLChars(&storage, expected);
5128 }
5129 END_TEST
5130 
5131 /* Test that a doctype with neither an internal nor external subset is
5132  * faulted
5133  */
START_TEST(test_short_doctype)5134 START_TEST(test_short_doctype) {
5135   const char *text = "<!DOCTYPE doc></doc>";
5136   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5137                  "DOCTYPE without subset not rejected");
5138 }
5139 END_TEST
5140 
START_TEST(test_short_doctype_2)5141 START_TEST(test_short_doctype_2) {
5142   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5143   expect_failure(text, XML_ERROR_SYNTAX,
5144                  "DOCTYPE without Public ID not rejected");
5145 }
5146 END_TEST
5147 
START_TEST(test_short_doctype_3)5148 START_TEST(test_short_doctype_3) {
5149   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5150   expect_failure(text, XML_ERROR_SYNTAX,
5151                  "DOCTYPE without System ID not rejected");
5152 }
5153 END_TEST
5154 
START_TEST(test_long_doctype)5155 START_TEST(test_long_doctype) {
5156   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5157   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5158 }
5159 END_TEST
5160 
START_TEST(test_bad_entity)5161 START_TEST(test_bad_entity) {
5162   const char *text = "<!DOCTYPE doc [\n"
5163                      "  <!ENTITY foo PUBLIC>\n"
5164                      "]>\n"
5165                      "<doc/>";
5166   expect_failure(text, XML_ERROR_SYNTAX,
5167                  "ENTITY without Public ID is not rejected");
5168 }
5169 END_TEST
5170 
5171 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5172 START_TEST(test_bad_entity_2) {
5173   const char *text = "<!DOCTYPE doc [\n"
5174                      "  <!ENTITY % foo bar>\n"
5175                      "]>\n"
5176                      "<doc/>";
5177   expect_failure(text, XML_ERROR_SYNTAX,
5178                  "ENTITY without Public ID is not rejected");
5179 }
5180 END_TEST
5181 
START_TEST(test_bad_entity_3)5182 START_TEST(test_bad_entity_3) {
5183   const char *text = "<!DOCTYPE doc [\n"
5184                      "  <!ENTITY % foo PUBLIC>\n"
5185                      "]>\n"
5186                      "<doc/>";
5187   expect_failure(text, XML_ERROR_SYNTAX,
5188                  "Parameter ENTITY without Public ID is not rejected");
5189 }
5190 END_TEST
5191 
START_TEST(test_bad_entity_4)5192 START_TEST(test_bad_entity_4) {
5193   const char *text = "<!DOCTYPE doc [\n"
5194                      "  <!ENTITY % foo SYSTEM>\n"
5195                      "]>\n"
5196                      "<doc/>";
5197   expect_failure(text, XML_ERROR_SYNTAX,
5198                  "Parameter ENTITY without Public ID is not rejected");
5199 }
5200 END_TEST
5201 
START_TEST(test_bad_notation)5202 START_TEST(test_bad_notation) {
5203   const char *text = "<!DOCTYPE doc [\n"
5204                      "  <!NOTATION n SYSTEM>\n"
5205                      "]>\n"
5206                      "<doc/>";
5207   expect_failure(text, XML_ERROR_SYNTAX,
5208                  "Notation without System ID is not rejected");
5209 }
5210 END_TEST
5211 
5212 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5213 START_TEST(test_default_doctype_handler) {
5214   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5215                      "  <!ENTITY foo 'bar'>\n"
5216                      "]>\n"
5217                      "<doc>&foo;</doc>";
5218   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5219                               {XCS("'test.dtd'"), 10, XML_FALSE},
5220                               {NULL, 0, XML_FALSE}};
5221   int i;
5222 
5223   XML_SetUserData(g_parser, &test_data);
5224   XML_SetDefaultHandler(g_parser, checking_default_handler);
5225   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5226   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5227       == XML_STATUS_ERROR)
5228     xml_failure(g_parser);
5229   for (i = 0; test_data[i].expected != NULL; i++)
5230     if (! test_data[i].seen)
5231       fail("Default handler not run for public !DOCTYPE");
5232 }
5233 END_TEST
5234 
START_TEST(test_empty_element_abort)5235 START_TEST(test_empty_element_abort) {
5236   const char *text = "<abort/>";
5237 
5238   XML_SetStartElementHandler(g_parser, start_element_suspender);
5239   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5240       != XML_STATUS_ERROR)
5241     fail("Expected to error on abort");
5242 }
5243 END_TEST
5244 
5245 /* Regression test for GH issue #612: unfinished m_declAttributeType
5246  * allocation in ->m_tempPool can corrupt following allocation.
5247  */
START_TEST(test_pool_integrity_with_unfinished_attr)5248 START_TEST(test_pool_integrity_with_unfinished_attr) {
5249   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5250                      "<!DOCTYPE foo [\n"
5251                      "<!ELEMENT foo ANY>\n"
5252                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5253                      "%entp;\n"
5254                      "]>\n"
5255                      "<a></a>\n";
5256   const XML_Char *expected = XCS("COMMENT");
5257   CharData storage;
5258 
5259   CharData_Init(&storage);
5260   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5261   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5262   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5263   XML_SetCommentHandler(g_parser, accumulate_comment);
5264   XML_SetUserData(g_parser, &storage);
5265   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5266       == XML_STATUS_ERROR)
5267     xml_failure(g_parser);
5268   CharData_CheckXMLChars(&storage, expected);
5269 }
5270 END_TEST
5271 
START_TEST(test_nested_entity_suspend)5272 START_TEST(test_nested_entity_suspend) {
5273   const char *const text = "<!DOCTYPE a [\n"
5274                            "  <!ENTITY e1 '<!--e1-->'>\n"
5275                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5276                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5277                            "]>\n"
5278                            "<a><!--start-->&e3;<!--end--></a>";
5279   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5280       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5281   CharData storage;
5282   CharData_Init(&storage);
5283   XML_Parser parser = XML_ParserCreate(NULL);
5284   ParserPlusStorage parserPlusStorage = {parser, &storage};
5285 
5286   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5287   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5288   XML_SetUserData(parser, &parserPlusStorage);
5289 
5290   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5291   while (status == XML_STATUS_SUSPENDED) {
5292     status = XML_ResumeParser(parser);
5293   }
5294   if (status != XML_STATUS_OK)
5295     xml_failure(parser);
5296 
5297   CharData_CheckXMLChars(&storage, expected);
5298   XML_ParserFree(parser);
5299 }
5300 END_TEST
5301 
5302 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5303 START_TEST(test_big_tokens_scale_linearly) {
5304   const struct {
5305     const char *pre;
5306     const char *post;
5307   } text[] = {
5308       {"<a>", "</a>"},                      // assumed good, used as baseline
5309       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5310       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5311       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5312       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5313   };
5314   const int num_cases = sizeof(text) / sizeof(text[0]);
5315   char aaaaaa[4096];
5316   const int fillsize = (int)sizeof(aaaaaa);
5317   const int fillcount = 100;
5318   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5319   const unsigned max_factor = 4;
5320   const unsigned max_scanned = max_factor * approx_bytes;
5321 
5322   memset(aaaaaa, 'a', fillsize);
5323 
5324   if (! g_reparseDeferralEnabledDefault) {
5325     return; // heuristic is disabled; we would get O(n^2) and fail.
5326   }
5327 
5328   for (int i = 0; i < num_cases; ++i) {
5329     XML_Parser parser = XML_ParserCreate(NULL);
5330     assert_true(parser != NULL);
5331     enum XML_Status status;
5332     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5333 
5334     // parse the start text
5335     g_bytesScanned = 0;
5336     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5337                                      (int)strlen(text[i].pre), XML_FALSE);
5338     if (status != XML_STATUS_OK) {
5339       xml_failure(parser);
5340     }
5341 
5342     // parse lots of 'a', failing the test early if it takes too long
5343     unsigned past_max_count = 0;
5344     for (int f = 0; f < fillcount; ++f) {
5345       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5346       if (status != XML_STATUS_OK) {
5347         xml_failure(parser);
5348       }
5349       if (g_bytesScanned > max_scanned) {
5350         // We're not done, and have already passed the limit -- the test will
5351         // definitely fail. This block allows us to save time by failing early.
5352         const unsigned pushed
5353             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5354         fprintf(
5355             stderr,
5356             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5357             f + 1, fillcount, pushed, g_bytesScanned,
5358             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5359         past_max_count++;
5360         // We are failing, but allow a few log prints first. If we don't reach
5361         // a count of five, the test will fail after the loop instead.
5362         assert_true(past_max_count < 5);
5363       }
5364     }
5365 
5366     // parse the end text
5367     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5368                                      (int)strlen(text[i].post), XML_TRUE);
5369     if (status != XML_STATUS_OK) {
5370       xml_failure(parser);
5371     }
5372 
5373     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5374     if (g_bytesScanned > max_scanned) {
5375       fprintf(
5376           stderr,
5377           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5378           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5379           max_factor);
5380       fail("scanned too many bytes");
5381     }
5382 
5383     XML_ParserFree(parser);
5384   }
5385 }
5386 END_TEST
5387 
START_TEST(test_set_reparse_deferral)5388 START_TEST(test_set_reparse_deferral) {
5389   const char *const pre = "<d>";
5390   const char *const start = "<x attr='";
5391   const char *const end = "'></x>";
5392   char eeeeee[100];
5393   const int fillsize = (int)sizeof(eeeeee);
5394   memset(eeeeee, 'e', fillsize);
5395 
5396   for (int enabled = 0; enabled <= 1; enabled += 1) {
5397     set_subtest("deferral=%d", enabled);
5398 
5399     XML_Parser parser = XML_ParserCreate(NULL);
5400     assert_true(parser != NULL);
5401     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5402     // pre-grow the buffer to avoid reparsing due to almost-fullness
5403     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5404 
5405     CharData storage;
5406     CharData_Init(&storage);
5407     XML_SetUserData(parser, &storage);
5408     XML_SetStartElementHandler(parser, start_element_event_handler);
5409 
5410     enum XML_Status status;
5411     // parse the start text
5412     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5413     if (status != XML_STATUS_OK) {
5414       xml_failure(parser);
5415     }
5416     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5417 
5418     // ..and the start of the token
5419     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5420     if (status != XML_STATUS_OK) {
5421       xml_failure(parser);
5422     }
5423     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5424 
5425     // try to parse lots of 'e', but the token isn't finished
5426     for (int c = 0; c < 100; ++c) {
5427       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5428       if (status != XML_STATUS_OK) {
5429         xml_failure(parser);
5430       }
5431     }
5432     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5433 
5434     // end the <x> token.
5435     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5436     if (status != XML_STATUS_OK) {
5437       xml_failure(parser);
5438     }
5439 
5440     if (enabled) {
5441       // In general, we may need to push more data to trigger a reparse attempt,
5442       // but in this test, the data is constructed to always require it.
5443       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5444       // 2x the token length should suffice; the +1 covers the start and end.
5445       for (int c = 0; c < 101; ++c) {
5446         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5447         if (status != XML_STATUS_OK) {
5448           xml_failure(parser);
5449         }
5450       }
5451     }
5452     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5453 
5454     XML_ParserFree(parser);
5455   }
5456 }
5457 END_TEST
5458 
5459 struct element_decl_data {
5460   XML_Parser parser;
5461   int count;
5462 };
5463 
5464 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5465 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5466   UNUSED_P(name);
5467   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5468   testdata->count += 1;
5469   XML_FreeContentModel(testdata->parser, model);
5470 }
5471 
5472 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5473 external_inherited_parser(XML_Parser p, const XML_Char *context,
5474                           const XML_Char *base, const XML_Char *systemId,
5475                           const XML_Char *publicId) {
5476   UNUSED_P(base);
5477   UNUSED_P(systemId);
5478   UNUSED_P(publicId);
5479   const char *const pre = "<!ELEMENT document ANY>\n";
5480   const char *const start = "<!ELEMENT ";
5481   const char *const end = " ANY>\n";
5482   const char *const post = "<!ELEMENT xyz ANY>\n";
5483   const int enabled = *(int *)XML_GetUserData(p);
5484   char eeeeee[100];
5485   char spaces[100];
5486   const int fillsize = (int)sizeof(eeeeee);
5487   assert_true(fillsize == (int)sizeof(spaces));
5488   memset(eeeeee, 'e', fillsize);
5489   memset(spaces, ' ', fillsize);
5490 
5491   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5492   assert_true(parser != NULL);
5493   // pre-grow the buffer to avoid reparsing due to almost-fullness
5494   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5495 
5496   struct element_decl_data testdata;
5497   testdata.parser = parser;
5498   testdata.count = 0;
5499   XML_SetUserData(parser, &testdata);
5500   XML_SetElementDeclHandler(parser, element_decl_counter);
5501 
5502   enum XML_Status status;
5503   // parse the initial text
5504   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5505   if (status != XML_STATUS_OK) {
5506     xml_failure(parser);
5507   }
5508   assert_true(testdata.count == 1); // first element should be done
5509 
5510   // ..and the start of the big token
5511   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5512   if (status != XML_STATUS_OK) {
5513     xml_failure(parser);
5514   }
5515   assert_true(testdata.count == 1); // still just the first one
5516 
5517   // try to parse lots of 'e', but the token isn't finished
5518   for (int c = 0; c < 100; ++c) {
5519     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5520     if (status != XML_STATUS_OK) {
5521       xml_failure(parser);
5522     }
5523   }
5524   assert_true(testdata.count == 1); // *still* just the first one
5525 
5526   // end the big token.
5527   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5528   if (status != XML_STATUS_OK) {
5529     xml_failure(parser);
5530   }
5531 
5532   if (enabled) {
5533     // In general, we may need to push more data to trigger a reparse attempt,
5534     // but in this test, the data is constructed to always require it.
5535     assert_true(testdata.count == 1); // or the test is incorrect
5536     // 2x the token length should suffice; the +1 covers the start and end.
5537     for (int c = 0; c < 101; ++c) {
5538       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5539       if (status != XML_STATUS_OK) {
5540         xml_failure(parser);
5541       }
5542     }
5543   }
5544   assert_true(testdata.count == 2); // the big token should be done
5545 
5546   // parse the final text
5547   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5548   if (status != XML_STATUS_OK) {
5549     xml_failure(parser);
5550   }
5551   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5552 
5553   XML_ParserFree(parser);
5554   return XML_STATUS_OK;
5555 }
5556 
START_TEST(test_reparse_deferral_is_inherited)5557 START_TEST(test_reparse_deferral_is_inherited) {
5558   const char *const text
5559       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5560   for (int enabled = 0; enabled <= 1; ++enabled) {
5561     set_subtest("deferral=%d", enabled);
5562 
5563     XML_Parser parser = XML_ParserCreate(NULL);
5564     assert_true(parser != NULL);
5565     XML_SetUserData(parser, (void *)&enabled);
5566     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5567     // this handler creates a sub-parser and checks that its deferral behavior
5568     // is what we expected, based on the value of `enabled` (in userdata).
5569     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5570     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5571     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5572       xml_failure(parser);
5573 
5574     XML_ParserFree(parser);
5575   }
5576 }
5577 END_TEST
5578 
START_TEST(test_set_reparse_deferral_on_null_parser)5579 START_TEST(test_set_reparse_deferral_on_null_parser) {
5580   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5581   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5582   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5583   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5584   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5585               == XML_FALSE);
5586   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5587               == XML_FALSE);
5588 }
5589 END_TEST
5590 
START_TEST(test_set_reparse_deferral_on_the_fly)5591 START_TEST(test_set_reparse_deferral_on_the_fly) {
5592   const char *const pre = "<d><x attr='";
5593   const char *const end = "'></x>";
5594   char iiiiii[100];
5595   const int fillsize = (int)sizeof(iiiiii);
5596   memset(iiiiii, 'i', fillsize);
5597 
5598   XML_Parser parser = XML_ParserCreate(NULL);
5599   assert_true(parser != NULL);
5600   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5601 
5602   CharData storage;
5603   CharData_Init(&storage);
5604   XML_SetUserData(parser, &storage);
5605   XML_SetStartElementHandler(parser, start_element_event_handler);
5606 
5607   enum XML_Status status;
5608   // parse the start text
5609   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5610   if (status != XML_STATUS_OK) {
5611     xml_failure(parser);
5612   }
5613   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5614 
5615   // try to parse some 'i', but the token isn't finished
5616   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5617   if (status != XML_STATUS_OK) {
5618     xml_failure(parser);
5619   }
5620   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5621 
5622   // end the <x> token.
5623   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5624   if (status != XML_STATUS_OK) {
5625     xml_failure(parser);
5626   }
5627   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5628 
5629   // now change the heuristic setting and add *no* data
5630   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5631   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5632   status = XML_Parse(parser, "", 0, XML_FALSE);
5633   if (status != XML_STATUS_OK) {
5634     xml_failure(parser);
5635   }
5636   CharData_CheckXMLChars(&storage, XCS("dx"));
5637 
5638   XML_ParserFree(parser);
5639 }
5640 END_TEST
5641 
START_TEST(test_set_bad_reparse_option)5642 START_TEST(test_set_bad_reparse_option) {
5643   XML_Parser parser = XML_ParserCreate(NULL);
5644   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5645   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5646   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5647   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5648   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5649   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5650   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5651   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5652   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5653   XML_ParserFree(parser);
5654 }
5655 END_TEST
5656 
5657 static size_t g_totalAlloc = 0;
5658 static size_t g_biggestAlloc = 0;
5659 
5660 static void *
counting_realloc(void * ptr,size_t size)5661 counting_realloc(void *ptr, size_t size) {
5662   g_totalAlloc += size;
5663   if (size > g_biggestAlloc) {
5664     g_biggestAlloc = size;
5665   }
5666   return realloc(ptr, size);
5667 }
5668 
5669 static void *
counting_malloc(size_t size)5670 counting_malloc(size_t size) {
5671   return counting_realloc(NULL, size);
5672 }
5673 
START_TEST(test_bypass_heuristic_when_close_to_bufsize)5674 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5675   if (g_chunkSize != 0) {
5676     // this test does not use SINGLE_BYTES, because it depends on very precise
5677     // buffer fills.
5678     return;
5679   }
5680   if (! g_reparseDeferralEnabledDefault) {
5681     return; // this test is irrelevant when the deferral heuristic is disabled.
5682   }
5683 
5684   const int document_length = 65536;
5685   char *const document = (char *)malloc(document_length);
5686 
5687   const XML_Memory_Handling_Suite memfuncs = {
5688       counting_malloc,
5689       counting_realloc,
5690       free,
5691   };
5692 
5693   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5694   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5695   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5696 
5697   for (const int *leading = leading_list; *leading >= 0; leading++) {
5698     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5699       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5700         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5701                     *fillsize);
5702         // start by checking that the test looks reasonably valid
5703         assert_true(*leading + *bigtoken <= document_length);
5704 
5705         // put 'x' everywhere; some will be overwritten by elements.
5706         memset(document, 'x', document_length);
5707         // maybe add an initial tag
5708         if (*leading) {
5709           assert_true(*leading >= 3); // or the test case is invalid
5710           memcpy(document, "<a>", 3);
5711         }
5712         // add the large token
5713         document[*leading + 0] = '<';
5714         document[*leading + 1] = 'b';
5715         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5716         document[*leading + *bigtoken - 1] = '>';
5717 
5718         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5719         const int expected_elem_total = 1 + (*leading ? 1 : 0);
5720 
5721         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5722         assert_true(parser != NULL);
5723 
5724         CharData storage;
5725         CharData_Init(&storage);
5726         XML_SetUserData(parser, &storage);
5727         XML_SetStartElementHandler(parser, start_element_event_handler);
5728 
5729         g_biggestAlloc = 0;
5730         g_totalAlloc = 0;
5731         int offset = 0;
5732         // fill data until the big token is covered (but not necessarily parsed)
5733         while (offset < *leading + *bigtoken) {
5734           assert_true(offset + *fillsize <= document_length);
5735           const enum XML_Status status
5736               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5737           if (status != XML_STATUS_OK) {
5738             xml_failure(parser);
5739           }
5740           offset += *fillsize;
5741         }
5742         // Now, check that we've had a buffer allocation that could fit the
5743         // context bytes and our big token. In order to detect a special case,
5744         // we need to know how many bytes of our big token were included in the
5745         // first push that contained _any_ bytes of the big token:
5746         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5747         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5748           // Special case: we aren't saving any context, and the whole big token
5749           // was covered by a single fill, so Expat may have parsed directly
5750           // from our input pointer, without allocating an internal buffer.
5751         } else if (*leading < XML_CONTEXT_BYTES) {
5752           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5753         } else {
5754           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5755         }
5756         // fill data until the big token is actually parsed
5757         while (storage.count < expected_elem_total) {
5758           const size_t alloc_before = g_totalAlloc;
5759           assert_true(offset + *fillsize <= document_length);
5760           const enum XML_Status status
5761               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5762           if (status != XML_STATUS_OK) {
5763             xml_failure(parser);
5764           }
5765           offset += *fillsize;
5766           // since all the bytes of the big token are already in the buffer,
5767           // the bufsize ceiling should make us finish its parsing without any
5768           // further buffer allocations. We assume that there will be no other
5769           // large allocations in this test.
5770           assert_true(g_totalAlloc - alloc_before < 4096);
5771         }
5772         // test-the-test: was our alloc even called?
5773         assert_true(g_totalAlloc > 0);
5774         // test-the-test: there shouldn't be any extra start elements
5775         assert_true(storage.count == expected_elem_total);
5776 
5777         XML_ParserFree(parser);
5778       }
5779     }
5780   }
5781   free(document);
5782 }
5783 END_TEST
5784 
START_TEST(test_varying_buffer_fills)5785 START_TEST(test_varying_buffer_fills) {
5786   const int KiB = 1024;
5787   const int MiB = 1024 * KiB;
5788   const int document_length = 16 * MiB;
5789   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5790 
5791   if (g_chunkSize != 0) {
5792     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5793   }
5794 
5795   char *const document = (char *)malloc(document_length);
5796   assert_true(document != NULL);
5797   memset(document, 'x', document_length);
5798   document[0] = '<';
5799   document[1] = 't';
5800   memset(&document[2], ' ', big - 2); // a very spacy token
5801   document[big - 1] = '>';
5802 
5803   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5804   // When reparse deferral is enabled, the final (negated) value is the expected
5805   // maximum number of bytes scanned in parse attempts.
5806   const int testcases[][30] = {
5807       {8 * MiB, -8 * MiB},
5808       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5809       // zero-size fills shouldn't trigger the bypass
5810       {4 * MiB, 0, 4 * MiB, -12 * MiB},
5811       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5812       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5813       // try to hit the buffer ceiling only once (at the end)
5814       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5815       // try to hit the same buffer ceiling multiple times
5816       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5817 
5818       // try to hit every ceiling, by always landing 1K shy of the buffer size
5819       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5820        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5821 
5822       // try to avoid every ceiling, by always landing 1B past the buffer size
5823       // the normal 2x heuristic threshold still forces parse attempts.
5824       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5825        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5826        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5827        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5828        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5829        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5830        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5831        -(10 * MiB + 682 * KiB + 7)},
5832       // try to avoid every ceiling again, except on our last fill.
5833       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5834        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5835        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5836        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5837        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5838        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5839        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5840        -(10 * MiB + 682 * KiB + 6)},
5841 
5842       // try to hit ceilings on the way multiple times
5843       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5844        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5845        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5846        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5847        // we'll make a parse attempt at every parse call
5848        -(45 * MiB + 12)},
5849   };
5850   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5851   for (int test_i = 0; test_i < testcount; test_i++) {
5852     const int *fillsize = testcases[test_i];
5853     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5854                 fillsize[2], fillsize[3]);
5855     XML_Parser parser = XML_ParserCreate(NULL);
5856     assert_true(parser != NULL);
5857 
5858     CharData storage;
5859     CharData_Init(&storage);
5860     XML_SetUserData(parser, &storage);
5861     XML_SetStartElementHandler(parser, start_element_event_handler);
5862 
5863     g_bytesScanned = 0;
5864     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5865     int offset = 0;
5866     while (*fillsize >= 0) {
5867       assert_true(offset + *fillsize <= document_length); // or test is invalid
5868       const enum XML_Status status
5869           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5870       if (status != XML_STATUS_OK) {
5871         xml_failure(parser);
5872       }
5873       offset += *fillsize;
5874       fillsize++;
5875       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5876       worstcase_bytes += offset; // we might've tried to parse all pending bytes
5877     }
5878     assert_true(storage.count == 1); // the big token should've been parsed
5879     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5880     if (g_reparseDeferralEnabledDefault) {
5881       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5882       const unsigned max_bytes_scanned = -*fillsize;
5883       if (g_bytesScanned > max_bytes_scanned) {
5884         fprintf(stderr,
5885                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5886                 g_bytesScanned, max_bytes_scanned);
5887         fail("too many bytes scanned in parse attempts");
5888       }
5889     }
5890     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
5891 
5892     XML_ParserFree(parser);
5893   }
5894   free(document);
5895 }
5896 END_TEST
5897 
5898 void
make_basic_test_case(Suite * s)5899 make_basic_test_case(Suite *s) {
5900   TCase *tc_basic = tcase_create("basic tests");
5901 
5902   suite_add_tcase(s, tc_basic);
5903   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5904 
5905   tcase_add_test(tc_basic, test_nul_byte);
5906   tcase_add_test(tc_basic, test_u0000_char);
5907   tcase_add_test(tc_basic, test_siphash_self);
5908   tcase_add_test(tc_basic, test_siphash_spec);
5909   tcase_add_test(tc_basic, test_bom_utf8);
5910   tcase_add_test(tc_basic, test_bom_utf16_be);
5911   tcase_add_test(tc_basic, test_bom_utf16_le);
5912   tcase_add_test(tc_basic, test_nobom_utf16_le);
5913   tcase_add_test(tc_basic, test_hash_collision);
5914   tcase_add_test(tc_basic, test_illegal_utf8);
5915   tcase_add_test(tc_basic, test_utf8_auto_align);
5916   tcase_add_test(tc_basic, test_utf16);
5917   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5918   tcase_add_test(tc_basic, test_not_utf16);
5919   tcase_add_test(tc_basic, test_bad_encoding);
5920   tcase_add_test(tc_basic, test_latin1_umlauts);
5921   tcase_add_test(tc_basic, test_long_utf8_character);
5922   tcase_add_test(tc_basic, test_long_latin1_attribute);
5923   tcase_add_test(tc_basic, test_long_ascii_attribute);
5924   /* Regression test for SF bug #491986. */
5925   tcase_add_test(tc_basic, test_danish_latin1);
5926   /* Regression test for SF bug #514281. */
5927   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5928   tcase_add_test(tc_basic, test_french_charref_decimal);
5929   tcase_add_test(tc_basic, test_french_latin1);
5930   tcase_add_test(tc_basic, test_french_utf8);
5931   tcase_add_test(tc_basic, test_utf8_false_rejection);
5932   tcase_add_test(tc_basic, test_line_number_after_parse);
5933   tcase_add_test(tc_basic, test_column_number_after_parse);
5934   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5935   tcase_add_test(tc_basic, test_line_number_after_error);
5936   tcase_add_test(tc_basic, test_column_number_after_error);
5937   tcase_add_test(tc_basic, test_really_long_lines);
5938   tcase_add_test(tc_basic, test_really_long_encoded_lines);
5939   tcase_add_test(tc_basic, test_end_element_events);
5940   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5941   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5942   tcase_add_test(tc_basic, test_xmldecl_misplaced);
5943   tcase_add_test(tc_basic, test_xmldecl_invalid);
5944   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5945   tcase_add_test(tc_basic, test_xmldecl_missing_value);
5946   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5947   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5948   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5949   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5950   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5951   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5952   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5953   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5954   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5955   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5956   tcase_add_test(tc_basic,
5957                  test_wfc_undeclared_entity_with_external_subset_standalone);
5958   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5959   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5960   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5961   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5962   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5963   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5964   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5965   tcase_add_test(tc_basic, test_dtd_attr_handling);
5966   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5967   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5968   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5969   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5970   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5971   tcase_add_test(tc_basic, test_good_cdata_ascii);
5972   tcase_add_test(tc_basic, test_good_cdata_utf16);
5973   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5974   tcase_add_test(tc_basic, test_long_cdata_utf16);
5975   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5976   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5977   tcase_add_test(tc_basic, test_bad_cdata);
5978   tcase_add_test(tc_basic, test_bad_cdata_utf16);
5979   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5980   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5981   tcase_add_test(tc_basic, test_memory_allocation);
5982   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5983   tcase_add_test(tc_basic, test_dtd_elements);
5984   tcase_add_test(tc_basic, test_dtd_elements_nesting);
5985   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5986   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5987   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5988   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5989   tcase_add_test__ifdef_xml_dtd(tc_basic,
5990                                 test_foreign_dtd_without_external_subset);
5991   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5992   tcase_add_test(tc_basic, test_set_base);
5993   tcase_add_test(tc_basic, test_attributes);
5994   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5995   tcase_add_test(tc_basic, test_resume_invalid_parse);
5996   tcase_add_test(tc_basic, test_resume_resuspended);
5997   tcase_add_test(tc_basic, test_cdata_default);
5998   tcase_add_test(tc_basic, test_subordinate_reset);
5999   tcase_add_test(tc_basic, test_subordinate_suspend);
6000   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6001   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6002   tcase_add_test__ifdef_xml_dtd(tc_basic,
6003                                 test_ext_entity_invalid_suspended_parse);
6004   tcase_add_test(tc_basic, test_explicit_encoding);
6005   tcase_add_test(tc_basic, test_trailing_cr);
6006   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6007   tcase_add_test(tc_basic, test_trailing_rsqb);
6008   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6009   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6010   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6011   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6012   tcase_add_test(tc_basic, test_empty_parse);
6013   tcase_add_test(tc_basic, test_negative_len_parse);
6014   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6015   tcase_add_test(tc_basic, test_get_buffer_1);
6016   tcase_add_test(tc_basic, test_get_buffer_2);
6017 #if XML_CONTEXT_BYTES > 0
6018   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6019 #endif
6020   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6021   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6022   tcase_add_test(tc_basic, test_byte_info_at_end);
6023   tcase_add_test(tc_basic, test_byte_info_at_error);
6024   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6025   tcase_add_test(tc_basic, test_predefined_entities);
6026   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6027   tcase_add_test(tc_basic, test_not_predefined_entities);
6028   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6029   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6030   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6031   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6032   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6033   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6034   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6035   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6036   tcase_add_test(tc_basic, test_bad_public_doctype);
6037   tcase_add_test(tc_basic, test_attribute_enum_value);
6038   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6039   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6040   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6041   tcase_add_test(tc_basic, test_nested_groups);
6042   tcase_add_test(tc_basic, test_group_choice);
6043   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6044   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6045   tcase_add_test__ifdef_xml_dtd(tc_basic,
6046                                 test_recursive_external_parameter_entity);
6047   tcase_add_test__ifdef_xml_dtd(tc_basic,
6048                                 test_recursive_external_parameter_entity_2);
6049   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6050   tcase_add_test(tc_basic, test_suspend_xdecl);
6051   tcase_add_test(tc_basic, test_abort_epilog);
6052   tcase_add_test(tc_basic, test_abort_epilog_2);
6053   tcase_add_test(tc_basic, test_suspend_epilog);
6054   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6055   tcase_add_test(tc_basic, test_unfinished_epilog);
6056   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6057   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6058   tcase_add_test__ifdef_xml_dtd(tc_basic,
6059                                 test_suspend_resume_internal_entity_issue_629);
6060   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6061   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6062   tcase_add_test(tc_basic, test_restart_on_error);
6063   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6064   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6065   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6066   tcase_add_test(tc_basic, test_standalone_internal_entity);
6067   tcase_add_test(tc_basic, test_skipped_external_entity);
6068   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6069   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6070   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6071   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6072   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6073   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6074   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6075   tcase_add_test(tc_basic, test_pi_handled_in_default);
6076   tcase_add_test(tc_basic, test_comment_handled_in_default);
6077   tcase_add_test(tc_basic, test_pi_yml);
6078   tcase_add_test(tc_basic, test_pi_xnl);
6079   tcase_add_test(tc_basic, test_pi_xmm);
6080   tcase_add_test(tc_basic, test_utf16_pi);
6081   tcase_add_test(tc_basic, test_utf16_be_pi);
6082   tcase_add_test(tc_basic, test_utf16_be_comment);
6083   tcase_add_test(tc_basic, test_utf16_le_comment);
6084   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6085   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6086   tcase_add_test(tc_basic, test_unknown_encoding_success);
6087   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6088   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6089   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6090   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6091   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6092   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6093   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6094   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6095   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6096   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6097   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6098   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6099   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6100   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6101   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6102   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6103   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6104   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6105   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6106   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6107   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6108   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6109   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6110   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6111   tcase_add_test(tc_basic, test_utf16_attribute);
6112   tcase_add_test(tc_basic, test_utf16_second_attr);
6113   tcase_add_test(tc_basic, test_attr_after_solidus);
6114   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6115   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6116   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6117   tcase_add_test(tc_basic, test_bad_doctype);
6118   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6119   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6120   tcase_add_test(tc_basic, test_bad_doctype_plus);
6121   tcase_add_test(tc_basic, test_bad_doctype_star);
6122   tcase_add_test(tc_basic, test_bad_doctype_query);
6123   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6124   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6125   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6126   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6127   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6128   tcase_add_test(tc_basic, test_short_doctype);
6129   tcase_add_test(tc_basic, test_short_doctype_2);
6130   tcase_add_test(tc_basic, test_short_doctype_3);
6131   tcase_add_test(tc_basic, test_long_doctype);
6132   tcase_add_test(tc_basic, test_bad_entity);
6133   tcase_add_test(tc_basic, test_bad_entity_2);
6134   tcase_add_test(tc_basic, test_bad_entity_3);
6135   tcase_add_test(tc_basic, test_bad_entity_4);
6136   tcase_add_test(tc_basic, test_bad_notation);
6137   tcase_add_test(tc_basic, test_default_doctype_handler);
6138   tcase_add_test(tc_basic, test_empty_element_abort);
6139   tcase_add_test__ifdef_xml_dtd(tc_basic,
6140                                 test_pool_integrity_with_unfinished_attr);
6141   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6142   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6143   tcase_add_test(tc_basic, test_set_reparse_deferral);
6144   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6145   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6146   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6147   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6148   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6149   tcase_add_test(tc_basic, test_varying_buffer_fills);
6150 }
6151