xref: /freebsd/contrib/expat/tests/basic_tests.c (revision e3935639d8d8b6556cad18e1c90e419a65f26b40)
1 /* Tests in the "basic" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22    Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
23    Copyright (c) 2026      Francesco Bertolaccini
24    Licensed under the MIT license:
25 
26    Permission is  hereby granted,  free of charge,  to any  person obtaining
27    a  copy  of  this  software   and  associated  documentation  files  (the
28    "Software"),  to  deal in  the  Software  without restriction,  including
29    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
30    distribute, sublicense, and/or sell copies of the Software, and to permit
31    persons  to whom  the Software  is  furnished to  do so,  subject to  the
32    following conditions:
33 
34    The above copyright  notice and this permission notice  shall be included
35    in all copies or substantial portions of the Software.
36 
37    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
38    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
39    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
40    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
41    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
42    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
43    USE OR OTHER DEALINGS IN THE SOFTWARE.
44 */
45 
46 #if defined(NDEBUG)
47 #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
48 #endif
49 
50 #include <assert.h>
51 
52 #include <stdio.h>
53 #include <string.h>
54 #include <time.h>
55 
56 #if ! defined(__cplusplus)
57 #  include <stdbool.h>
58 #endif
59 
60 #include "expat_config.h"
61 
62 #include "expat.h"
63 #include "internal.h"
64 #include "minicheck.h"
65 #include "structdata.h"
66 #include "common.h"
67 #include "dummy.h"
68 #include "handlers.h"
69 #include "siphash.h"
70 #include "basic_tests.h"
71 
72 static void
basic_setup(void)73 basic_setup(void) {
74   g_parser = XML_ParserCreate(NULL);
75   if (g_parser == NULL)
76     fail("Parser not created.");
77 }
78 
79 /*
80  * Character & encoding tests.
81  */
82 
START_TEST(test_nul_byte)83 START_TEST(test_nul_byte) {
84   char text[] = "<doc>\0</doc>";
85 
86   /* test that a NUL byte (in US-ASCII data) is an error */
87   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
88       == XML_STATUS_OK)
89     fail("Parser did not report error on NUL-byte.");
90   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
91     xml_failure(g_parser);
92 }
93 END_TEST
94 
START_TEST(test_u0000_char)95 START_TEST(test_u0000_char) {
96   /* test that a NUL byte (in US-ASCII data) is an error */
97   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
98                  "Parser did not report error on NUL-byte.");
99 }
100 END_TEST
101 
START_TEST(test_siphash_self)102 START_TEST(test_siphash_self) {
103   if (! sip24_valid())
104     fail("SipHash self-test failed");
105 }
106 END_TEST
107 
START_TEST(test_siphash_spec)108 START_TEST(test_siphash_spec) {
109   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
110   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
111                          "\x0a\x0b\x0c\x0d\x0e";
112   const size_t len = sizeof(message) - 1;
113   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
114   struct siphash state;
115   struct sipkey key;
116 
117   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
118                   "\x0a\x0b\x0c\x0d\x0e\x0f");
119   sip24_init(&state, &key);
120 
121   /* Cover spread across calls */
122   sip24_update(&state, message, 4);
123   sip24_update(&state, message + 4, len - 4);
124 
125   /* Cover null length */
126   sip24_update(&state, message, 0);
127 
128   if (sip24_final(&state) != expected)
129     fail("sip24_final failed spec test\n");
130 
131   /* Cover wrapper */
132   if (siphash24(message, len, &key) != expected)
133     fail("siphash24 failed spec test\n");
134 }
135 END_TEST
136 
START_TEST(test_bom_utf8)137 START_TEST(test_bom_utf8) {
138   /* This test is really just making sure we don't core on a UTF-8 BOM. */
139   const char *text = "\357\273\277<e/>";
140 
141   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
142       == XML_STATUS_ERROR)
143     xml_failure(g_parser);
144 }
145 END_TEST
146 
START_TEST(test_bom_utf16_be)147 START_TEST(test_bom_utf16_be) {
148   char text[] = "\376\377\0<\0e\0/\0>";
149 
150   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
151       == XML_STATUS_ERROR)
152     xml_failure(g_parser);
153 }
154 END_TEST
155 
START_TEST(test_bom_utf16_le)156 START_TEST(test_bom_utf16_le) {
157   char text[] = "\377\376<\0e\0/\0>\0";
158 
159   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
160       == XML_STATUS_ERROR)
161     xml_failure(g_parser);
162 }
163 END_TEST
164 
START_TEST(test_nobom_utf16_le)165 START_TEST(test_nobom_utf16_le) {
166   char text[] = " \0<\0e\0/\0>\0";
167 
168   if (g_chunkSize == 1) {
169     // TODO: with just the first byte, we can't tell the difference between
170     // UTF-16-LE and UTF-8. Avoid the failure for now.
171     return;
172   }
173 
174   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
175       == XML_STATUS_ERROR)
176     xml_failure(g_parser);
177 }
178 END_TEST
179 
START_TEST(test_hash_collision)180 START_TEST(test_hash_collision) {
181   /* For full coverage of the lookup routine, we need to ensure a
182    * hash collision even though we can only tell that we have one
183    * through breakpoint debugging or coverage statistics.  The
184    * following will cause a hash collision on machines with a 64-bit
185    * long type; others will have to experiment.  The full coverage
186    * tests invoked from qa.sh usually provide a hash collision, but
187    * not always.  This is an attempt to provide insurance.
188    */
189 #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
190   const char *text
191       = "<doc>\n"
192         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
193         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
194         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
195         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
196         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
197         "<d8>This triggers the table growth and collides with b2</d8>\n"
198         "</doc>\n";
199 
200   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
201   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
202       == XML_STATUS_ERROR)
203     xml_failure(g_parser);
204 }
205 END_TEST
206 #undef COLLIDING_HASH_SALT
207 
START_TEST(test_hash_salt_setter)208 START_TEST(test_hash_salt_setter) {
209   const uint8_t entropy[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
210                                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
211   XML_Parser parser = XML_ParserCreate(NULL);
212 
213   // NULL parser should be rejected
214   assert_true(XML_SetHashSalt16Bytes(NULL, entropy) == XML_FALSE);
215 
216   // NULL entropy should be rejected
217   assert_true(XML_SetHashSalt16Bytes(parser, NULL) == XML_FALSE);
218 
219   // Setting should be allowed more than once
220   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
221   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_TRUE);
222 
223   // But not after parsing has started
224   assert_true(XML_Parse(parser, "", 0, XML_FALSE /* isFinal */)
225               == XML_STATUS_OK);
226   assert_true(XML_SetHashSalt16Bytes(parser, entropy) == XML_FALSE);
227 
228   XML_ParserFree(parser);
229 }
230 END_TEST
231 
232 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)233 START_TEST(test_danish_latin1) {
234   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
235                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
236 #ifdef XML_UNICODE
237   const XML_Char *expected
238       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
239 #else
240   const XML_Char *expected
241       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
242 #endif
243   run_character_check(text, expected);
244 }
245 END_TEST
246 
247 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)248 START_TEST(test_french_charref_hexidecimal) {
249   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
251 #ifdef XML_UNICODE
252   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253 #else
254   const XML_Char *expected
255       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256 #endif
257   run_character_check(text, expected);
258 }
259 END_TEST
260 
START_TEST(test_french_charref_decimal)261 START_TEST(test_french_charref_decimal) {
262   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
263                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
264 #ifdef XML_UNICODE
265   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
266 #else
267   const XML_Char *expected
268       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
269 #endif
270   run_character_check(text, expected);
271 }
272 END_TEST
273 
START_TEST(test_french_latin1)274 START_TEST(test_french_latin1) {
275   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
276                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
277 #ifdef XML_UNICODE
278   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
279 #else
280   const XML_Char *expected
281       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
282 #endif
283   run_character_check(text, expected);
284 }
285 END_TEST
286 
START_TEST(test_french_utf8)287 START_TEST(test_french_utf8) {
288   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
289                      "<doc>\xC3\xA9</doc>";
290 #ifdef XML_UNICODE
291   const XML_Char *expected = XCS("\x00e9");
292 #else
293   const XML_Char *expected = XCS("\xC3\xA9");
294 #endif
295   run_character_check(text, expected);
296 }
297 END_TEST
298 
299 /* Regression test for SF bug #600479.
300    XXX There should be a test that exercises all legal XML Unicode
301    characters as PCDATA and attribute value content, and XML Name
302    characters as part of element and attribute names.
303 */
START_TEST(test_utf8_false_rejection)304 START_TEST(test_utf8_false_rejection) {
305   const char *text = "<doc>\xEF\xBA\xBF</doc>";
306 #ifdef XML_UNICODE
307   const XML_Char *expected = XCS("\xfebf");
308 #else
309   const XML_Char *expected = XCS("\xEF\xBA\xBF");
310 #endif
311   run_character_check(text, expected);
312 }
313 END_TEST
314 
315 /* Regression test for SF bug #477667.
316    This test assures that any 8-bit character followed by a 7-bit
317    character will not be mistakenly interpreted as a valid UTF-8
318    sequence.
319 */
START_TEST(test_illegal_utf8)320 START_TEST(test_illegal_utf8) {
321   char text[100];
322   int i;
323 
324   for (i = 128; i <= 255; ++i) {
325     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
326     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
327         == XML_STATUS_OK) {
328       snprintf(text, sizeof(text),
329                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
330                i);
331       fail(text);
332     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
333       xml_failure(g_parser);
334     /* Reset the parser since we use the same parser repeatedly. */
335     XML_ParserReset(g_parser, NULL);
336   }
337 }
338 END_TEST
339 
340 /* Examples, not masks: */
341 #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
342 #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
343 #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
344 #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
345 #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
346 
START_TEST(test_utf8_auto_align)347 START_TEST(test_utf8_auto_align) {
348   struct TestCase {
349     ptrdiff_t expectedMovementInChars;
350     const char *input;
351   };
352 
353   struct TestCase cases[] = {
354       {00, ""},
355 
356       {00, UTF8_LEAD_1},
357 
358       {-1, UTF8_LEAD_2},
359       {00, UTF8_LEAD_2 UTF8_FOLLOW},
360 
361       {-1, UTF8_LEAD_3},
362       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
363       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
364 
365       {-1, UTF8_LEAD_4},
366       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
367       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
368       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
369   };
370 
371   size_t i = 0;
372   bool success = true;
373   for (; i < sizeof(cases) / sizeof(*cases); i++) {
374     const char *fromLim = cases[i].input + strlen(cases[i].input);
375     const char *const fromLimInitially = fromLim;
376     ptrdiff_t actualMovementInChars;
377 
378     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
379 
380     actualMovementInChars = (fromLim - fromLimInitially);
381     if (actualMovementInChars != cases[i].expectedMovementInChars) {
382       size_t j = 0;
383       success = false;
384       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
385              ", actually moved by %2d chars: \"",
386              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
387              (int)actualMovementInChars);
388       for (; j < strlen(cases[i].input); j++) {
389         printf("\\x%02x", (unsigned char)cases[i].input[j]);
390       }
391       printf("\"\n");
392     }
393   }
394 
395   if (! success) {
396     fail("UTF-8 auto-alignment is not bullet-proof\n");
397   }
398 }
399 END_TEST
400 
START_TEST(test_utf16)401 START_TEST(test_utf16) {
402   /* <?xml version="1.0" encoding="UTF-16"?>
403    *  <doc a='123'>some {A} text</doc>
404    *
405    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
406    */
407   char text[]
408       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
409         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
410         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
411         "\000'\000?\000>\000\n"
412         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
413         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
414         "<\000/\000d\000o\000c\000>";
415 #ifdef XML_UNICODE
416   const XML_Char *expected = XCS("some \xff21 text");
417 #else
418   const XML_Char *expected = XCS("some \357\274\241 text");
419 #endif
420   CharData storage;
421 
422   CharData_Init(&storage);
423   XML_SetUserData(g_parser, &storage);
424   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
425   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
426       == XML_STATUS_ERROR)
427     xml_failure(g_parser);
428   CharData_CheckXMLChars(&storage, expected);
429 }
430 END_TEST
431 
START_TEST(test_utf16_le_epilog_newline)432 START_TEST(test_utf16_le_epilog_newline) {
433   unsigned int first_chunk_bytes = 17;
434   char text[] = "\xFF\xFE"                  /* BOM */
435                 "<\000e\000/\000>\000"      /* document element */
436                 "\r\000\n\000\r\000\n\000"; /* epilog */
437 
438   if (first_chunk_bytes >= sizeof(text) - 1)
439     fail("bad value of first_chunk_bytes");
440   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
441       == XML_STATUS_ERROR)
442     xml_failure(g_parser);
443   else {
444     enum XML_Status rc;
445     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
446                                  (int)(sizeof(text) - first_chunk_bytes - 1),
447                                  XML_TRUE);
448     if (rc == XML_STATUS_ERROR)
449       xml_failure(g_parser);
450   }
451 }
452 END_TEST
453 
454 /* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)455 START_TEST(test_not_utf16) {
456   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
457                      "<doc>Hi</doc>";
458 
459   /* Use a handler to provoke the appropriate code paths */
460   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
461   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
462                  "UTF-16 declared in UTF-8 not faulted");
463 }
464 END_TEST
465 
466 /* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)467 START_TEST(test_bad_encoding) {
468   const char *text = "<doc>Hi</doc>";
469 
470   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
471     fail("XML_SetEncoding failed");
472   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
473                  "Unknown encoding not faulted");
474 }
475 END_TEST
476 
477 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)478 START_TEST(test_latin1_umlauts) {
479   const char *text
480       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
481         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
482         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
483 #ifdef XML_UNICODE
484   /* Expected results in UTF-16 */
485   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
486       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
487 #else
488   /* Expected results in UTF-8 */
489   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
490       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
491 #endif
492 
493   run_character_check(text, expected);
494   XML_ParserReset(g_parser, NULL);
495   run_attribute_check(text, expected);
496   /* Repeat with a default handler */
497   XML_ParserReset(g_parser, NULL);
498   XML_SetDefaultHandler(g_parser, dummy_default_handler);
499   run_character_check(text, expected);
500   XML_ParserReset(g_parser, NULL);
501   XML_SetDefaultHandler(g_parser, dummy_default_handler);
502   run_attribute_check(text, expected);
503 }
504 END_TEST
505 
506 /* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)507 START_TEST(test_long_utf8_character) {
508   const char *text
509       = "<?xml version='1.0' encoding='utf-8'?>\n"
510         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
511         "<do\xf0\x90\x80\x80/>";
512   expect_failure(text, XML_ERROR_INVALID_TOKEN,
513                  "4-byte UTF-8 character in element name not faulted");
514 }
515 END_TEST
516 
517 /* Test that a long latin-1 attribute (too long to convert in one go)
518  * is correctly converted
519  */
START_TEST(test_long_latin1_attribute)520 START_TEST(test_long_latin1_attribute) {
521   const char *text
522       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
523         "<doc att='"
524         /* 64 characters per line */
525         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
526         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
527         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
528         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
529         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
530         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
531         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
532         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
533         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
534         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
535         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
536         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
537         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
538         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
539         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
540         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
541         /* Last character splits across a buffer boundary */
542         "\xe4'>\n</doc>";
543 
544   const XML_Char *expected =
545       /* 64 characters per line */
546       /* clang-format off */
547         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
548         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
549         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
550         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
551         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
552         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
553         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
554         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
555         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
556         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
557         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
558         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
559         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
560         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
561         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
562         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
563   /* clang-format on */
564 #ifdef XML_UNICODE
565                                                   XCS("\x00e4");
566 #else
567                                                   XCS("\xc3\xa4");
568 #endif
569 
570   run_attribute_check(text, expected);
571 }
572 END_TEST
573 
574 /* Test that a long ASCII attribute (too long to convert in one go)
575  * is correctly converted
576  */
START_TEST(test_long_ascii_attribute)577 START_TEST(test_long_ascii_attribute) {
578   const char *text
579       = "<?xml version='1.0' encoding='us-ascii'?>\n"
580         "<doc att='"
581         /* 64 characters per line */
582         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
583         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
584         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
585         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
586         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
587         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
588         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
589         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
590         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
591         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
592         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
593         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
594         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
595         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
596         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
597         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
598         "01234'>\n</doc>";
599   const XML_Char *expected =
600       /* 64 characters per line */
601       /* clang-format off */
602         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
603         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
604         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
605         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
606         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
607         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
608         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
609         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
610         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
611         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
612         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
613         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
614         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
615         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
616         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
617         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
618         XCS("01234");
619   /* clang-format on */
620 
621   run_attribute_check(text, expected);
622 }
623 END_TEST
624 
625 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)626 START_TEST(test_line_number_after_parse) {
627   const char *text = "<tag>\n"
628                      "\n"
629                      "\n</tag>";
630   XML_Size lineno;
631 
632   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
633       == XML_STATUS_ERROR)
634     xml_failure(g_parser);
635   lineno = XML_GetCurrentLineNumber(g_parser);
636   if (lineno != 4) {
637     char buffer[100];
638     snprintf(buffer, sizeof(buffer),
639              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
640     fail(buffer);
641   }
642 }
643 END_TEST
644 
645 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)646 START_TEST(test_column_number_after_parse) {
647   const char *text = "<tag></tag>";
648   XML_Size colno;
649 
650   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
651       == XML_STATUS_ERROR)
652     xml_failure(g_parser);
653   colno = XML_GetCurrentColumnNumber(g_parser);
654   if (colno != 11) {
655     char buffer[100];
656     snprintf(buffer, sizeof(buffer),
657              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
658     fail(buffer);
659   }
660 }
661 END_TEST
662 
663 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)664 START_TEST(test_line_and_column_numbers_inside_handlers) {
665   const char *text = "<a>\n"      /* Unix end-of-line */
666                      "  <b>\r\n"  /* Windows end-of-line */
667                      "    <c/>\r" /* Mac OS end-of-line */
668                      "  </b>\n"
669                      "  <d>\n"
670                      "    <f/>\n"
671                      "  </d>\n"
672                      "</a>";
673   const StructDataEntry expected[]
674       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
675          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
676          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
677          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
678          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
679   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
680   StructData storage;
681 
682   StructData_Init(&storage);
683   XML_SetUserData(g_parser, &storage);
684   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
685   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
686   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
687       == XML_STATUS_ERROR)
688     xml_failure(g_parser);
689 
690   StructData_CheckItems(&storage, expected, expected_count);
691   StructData_Dispose(&storage);
692 }
693 END_TEST
694 
695 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)696 START_TEST(test_line_number_after_error) {
697   const char *text = "<a>\n"
698                      "  <b>\n"
699                      "  </a>"; /* missing </b> */
700   XML_Size lineno;
701   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
702       != XML_STATUS_ERROR)
703     fail("Expected a parse error");
704 
705   lineno = XML_GetCurrentLineNumber(g_parser);
706   if (lineno != 3) {
707     char buffer[100];
708     snprintf(buffer, sizeof(buffer),
709              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
710     fail(buffer);
711   }
712 }
713 END_TEST
714 
715 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)716 START_TEST(test_column_number_after_error) {
717   const char *text = "<a>\n"
718                      "  <b>\n"
719                      "  </a>"; /* missing </b> */
720   XML_Size colno;
721   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
722       != XML_STATUS_ERROR)
723     fail("Expected a parse error");
724 
725   colno = XML_GetCurrentColumnNumber(g_parser);
726   if (colno != 4) {
727     char buffer[100];
728     snprintf(buffer, sizeof(buffer),
729              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
730     fail(buffer);
731   }
732 }
733 END_TEST
734 
735 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)736 START_TEST(test_really_long_lines) {
737   /* This parses an input line longer than INIT_DATA_BUF_SIZE
738      characters long (defined to be 1024 in xmlparse.c).  We take a
739      really cheesy approach to building the input buffer, because
740      this avoids writing bugs in buffer-filling code.
741   */
742   const char *text
743       = "<e>"
744         /* 64 chars */
745         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
746         /* until we have at least 1024 characters on the line: */
747         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
748         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
749         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
750         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
751         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
752         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
753         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
754         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
755         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
757         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763         "</e>";
764   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
765       == XML_STATUS_ERROR)
766     xml_failure(g_parser);
767 }
768 END_TEST
769 
770 /* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)771 START_TEST(test_really_long_encoded_lines) {
772   /* As above, except that we want to provoke an output buffer
773    * overflow with a non-trivial encoding.  For this we need to pass
774    * the whole cdata in one go, not byte-by-byte.
775    */
776   void *buffer;
777   const char *text
778       = "<?xml version='1.0' encoding='iso-8859-1'?>"
779         "<e>"
780         /* 64 chars */
781         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
782         /* until we have at least 1024 characters on the line: */
783         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
784         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
785         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
786         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
787         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
788         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
789         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
790         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
791         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
792         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
793         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
794         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
795         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
796         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
797         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
798         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
799         "</e>";
800   int parse_len = (int)strlen(text);
801 
802   /* Need a cdata handler to provoke the code path we want to test */
803   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
804   buffer = XML_GetBuffer(g_parser, parse_len);
805   if (buffer == NULL)
806     fail("Could not allocate parse buffer");
807   assert(buffer != NULL);
808   memcpy(buffer, text, parse_len);
809   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
810     xml_failure(g_parser);
811 }
812 END_TEST
813 
814 /*
815  * Element event tests.
816  */
817 
START_TEST(test_end_element_events)818 START_TEST(test_end_element_events) {
819   const char *text = "<a><b><c/></b><d><f/></d></a>";
820   const XML_Char *expected = XCS("/c/b/f/d/a");
821   CharData storage;
822 
823   CharData_Init(&storage);
824   XML_SetUserData(g_parser, &storage);
825   XML_SetEndElementHandler(g_parser, end_element_event_handler);
826   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
827       == XML_STATUS_ERROR)
828     xml_failure(g_parser);
829   CharData_CheckXMLChars(&storage, expected);
830 }
831 END_TEST
832 
833 /*
834  * Attribute tests.
835  */
836 
837 /* Helper used by the following tests; this checks any "attr" and "refs"
838    attributes to make sure whitespace has been normalized.
839 
840    Return true if whitespace has been normalized in a string, using
841    the rules for attribute value normalization.  The 'is_cdata' flag
842    is needed since CDATA attributes don't need to have multiple
843    whitespace characters collapsed to a single space, while other
844    attribute data types do.  (Section 3.3.3 of the recommendation.)
845 */
846 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)847 is_whitespace_normalized(const XML_Char *s, int is_cdata) {
848   int blanks = 0;
849   int at_start = 1;
850   while (*s) {
851     if (*s == XCS(' '))
852       ++blanks;
853     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
854       return 0;
855     else {
856       if (at_start) {
857         at_start = 0;
858         if (blanks && ! is_cdata)
859           /* illegal leading blanks */
860           return 0;
861       } else if (blanks > 1 && ! is_cdata)
862         return 0;
863       blanks = 0;
864     }
865     ++s;
866   }
867   if (blanks && ! is_cdata)
868     return 0;
869   return 1;
870 }
871 
872 /* Check the attribute whitespace checker: */
START_TEST(test_helper_is_whitespace_normalized)873 START_TEST(test_helper_is_whitespace_normalized) {
874   assert(is_whitespace_normalized(XCS("abc"), 0));
875   assert(is_whitespace_normalized(XCS("abc"), 1));
876   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
877   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
878   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
879   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
880   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
881   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
882   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
883   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
884   assert(! is_whitespace_normalized(XCS(" "), 0));
885   assert(is_whitespace_normalized(XCS(" "), 1));
886   assert(! is_whitespace_normalized(XCS("\t"), 0));
887   assert(! is_whitespace_normalized(XCS("\t"), 1));
888   assert(! is_whitespace_normalized(XCS("\n"), 0));
889   assert(! is_whitespace_normalized(XCS("\n"), 1));
890   assert(! is_whitespace_normalized(XCS("\r"), 0));
891   assert(! is_whitespace_normalized(XCS("\r"), 1));
892   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
893 }
894 END_TEST
895 
896 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)897 check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
898                                           const XML_Char **atts) {
899   int i;
900   UNUSED_P(userData);
901   UNUSED_P(name);
902   for (i = 0; atts[i] != NULL; i += 2) {
903     const XML_Char *attrname = atts[i];
904     const XML_Char *value = atts[i + 1];
905     if (xcstrcmp(XCS("attr"), attrname) == 0
906         || xcstrcmp(XCS("ents"), attrname) == 0
907         || xcstrcmp(XCS("refs"), attrname) == 0) {
908       if (! is_whitespace_normalized(value, 0)) {
909         char buffer[256];
910         snprintf(buffer, sizeof(buffer),
911                  "attribute value not normalized: %" XML_FMT_STR
912                  "='%" XML_FMT_STR "'",
913                  attrname, value);
914         fail(buffer);
915       }
916     }
917   }
918 }
919 
START_TEST(test_attr_whitespace_normalization)920 START_TEST(test_attr_whitespace_normalization) {
921   const char *text
922       = "<!DOCTYPE doc [\n"
923         "  <!ATTLIST doc\n"
924         "            attr NMTOKENS #REQUIRED\n"
925         "            ents ENTITIES #REQUIRED\n"
926         "            refs IDREFS   #REQUIRED>\n"
927         "]>\n"
928         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
929         "     ents=' ent-1   \t\r\n"
930         "            ent-2  ' >\n"
931         "  <e id='id-1'/>\n"
932         "  <e id='id-2'/>\n"
933         "</doc>";
934 
935   XML_SetStartElementHandler(g_parser,
936                              check_attr_contains_normalized_whitespace);
937   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
938       == XML_STATUS_ERROR)
939     xml_failure(g_parser);
940 }
941 END_TEST
942 
943 /*
944  * XML declaration tests.
945  */
946 
START_TEST(test_xmldecl_misplaced)947 START_TEST(test_xmldecl_misplaced) {
948   expect_failure("\n"
949                  "<?xml version='1.0'?>\n"
950                  "<a/>",
951                  XML_ERROR_MISPLACED_XML_PI,
952                  "failed to report misplaced XML declaration");
953 }
954 END_TEST
955 
START_TEST(test_xmldecl_invalid)956 START_TEST(test_xmldecl_invalid) {
957   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
958                  "Failed to report invalid XML declaration");
959 }
960 END_TEST
961 
START_TEST(test_xmldecl_missing_attr)962 START_TEST(test_xmldecl_missing_attr) {
963   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
964                  "Failed to report missing XML declaration attribute");
965 }
966 END_TEST
967 
START_TEST(test_xmldecl_missing_value)968 START_TEST(test_xmldecl_missing_value) {
969   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
970                  "<doc/>",
971                  XML_ERROR_XML_DECL,
972                  "Failed to report missing attribute value");
973 }
974 END_TEST
975 
976 /* Regression test for SF bug #584832. */
START_TEST(test_unknown_encoding_internal_entity)977 START_TEST(test_unknown_encoding_internal_entity) {
978   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
979                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
980                      "<test a='&foo;'/>";
981 
982   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
983   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
984       == XML_STATUS_ERROR)
985     xml_failure(g_parser);
986 }
987 END_TEST
988 
989 /* Test unrecognised encoding handler */
START_TEST(test_unrecognised_encoding_internal_entity)990 START_TEST(test_unrecognised_encoding_internal_entity) {
991   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
992                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
993                      "<test a='&foo;'/>";
994 
995   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
996   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
997       != XML_STATUS_ERROR)
998     fail("Unrecognised encoding not rejected");
999 }
1000 END_TEST
1001 
1002 /* Regression test for SF bug #620106. */
START_TEST(test_ext_entity_set_encoding)1003 START_TEST(test_ext_entity_set_encoding) {
1004   const char *text = "<!DOCTYPE doc [\n"
1005                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1006                      "]>\n"
1007                      "<doc>&en;</doc>";
1008   ExtTest test_data
1009       = {/* This text says it's an unsupported encoding, but it's really
1010             UTF-8, which we tell Expat using XML_SetEncoding().
1011          */
1012          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
1013 #ifdef XML_UNICODE
1014   const XML_Char *expected = XCS("\x00e9");
1015 #else
1016   const XML_Char *expected = XCS("\xc3\xa9");
1017 #endif
1018 
1019   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1020   run_ext_character_check(text, &test_data, expected);
1021 }
1022 END_TEST
1023 
1024 /* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)1025 START_TEST(test_ext_entity_no_handler) {
1026   const char *text = "<!DOCTYPE doc [\n"
1027                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1028                      "]>\n"
1029                      "<doc>&en;</doc>";
1030 
1031   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1032   run_character_check(text, XCS(""));
1033 }
1034 END_TEST
1035 
1036 /* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)1037 START_TEST(test_ext_entity_set_bom) {
1038   const char *text = "<!DOCTYPE doc [\n"
1039                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1040                      "]>\n"
1041                      "<doc>&en;</doc>";
1042   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1043                        "<?xml encoding='iso-8859-3'?>"
1044                        "\xC3\xA9",
1045                        XCS("utf-8"), NULL};
1046 #ifdef XML_UNICODE
1047   const XML_Char *expected = XCS("\x00e9");
1048 #else
1049   const XML_Char *expected = XCS("\xc3\xa9");
1050 #endif
1051 
1052   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1053   run_ext_character_check(text, &test_data, expected);
1054 }
1055 END_TEST
1056 
1057 /* Test that bad encodings are faulted */
START_TEST(test_ext_entity_bad_encoding)1058 START_TEST(test_ext_entity_bad_encoding) {
1059   const char *text = "<!DOCTYPE doc [\n"
1060                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1061                      "]>\n"
1062                      "<doc>&en;</doc>";
1063   ExtFaults fault
1064       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1065          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1066 
1067   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1068   XML_SetUserData(g_parser, &fault);
1069   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1070                  "Bad encoding should not have been accepted");
1071 }
1072 END_TEST
1073 
1074 /* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)1075 START_TEST(test_ext_entity_bad_encoding_2) {
1076   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1077                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1078                      "<doc>&entity;</doc>";
1079   ExtFaults fault
1080       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1081          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1082 
1083   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1084   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1085   XML_SetUserData(g_parser, &fault);
1086   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1087                  "Bad encoding not faulted in external entity handler");
1088 }
1089 END_TEST
1090 
1091 /* Test that no error is reported for unknown entities if we don't
1092    read an external subset.  This was fixed in Expat 1.95.5.
1093 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)1094 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1095   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1096                      "<doc>&entity;</doc>";
1097 
1098   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1099       == XML_STATUS_ERROR)
1100     xml_failure(g_parser);
1101 }
1102 END_TEST
1103 
1104 /* Test that an error is reported for unknown entities if we don't
1105    have an external subset.
1106 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)1107 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1108   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1109                  "Parser did not report undefined entity w/out a DTD.");
1110 }
1111 END_TEST
1112 
1113 /* Test that an error is reported for unknown entities if we don't
1114    read an external subset, but have been declared standalone.
1115 */
START_TEST(test_wfc_undeclared_entity_standalone)1116 START_TEST(test_wfc_undeclared_entity_standalone) {
1117   const char *text
1118       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1119         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1120         "<doc>&entity;</doc>";
1121 
1122   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1123                  "Parser did not report undefined entity (standalone).");
1124 }
1125 END_TEST
1126 
1127 /* Test that an error is reported for unknown entities if we have read
1128    an external subset, and standalone is true.
1129 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)1130 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1131   const char *text
1132       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1133         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1134         "<doc>&entity;</doc>";
1135   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1136 
1137   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1138   XML_SetUserData(g_parser, &test_data);
1139   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1140   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1141                  "Parser did not report undefined entity (external DTD).");
1142 }
1143 END_TEST
1144 
1145 /* Test that external entity handling is not done if the parsing flag
1146  * is set to UNLESS_STANDALONE
1147  */
START_TEST(test_entity_with_external_subset_unless_standalone)1148 START_TEST(test_entity_with_external_subset_unless_standalone) {
1149   const char *text
1150       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1151         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1152         "<doc>&entity;</doc>";
1153   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1154 
1155   XML_SetParamEntityParsing(g_parser,
1156                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1157   XML_SetUserData(g_parser, &test_data);
1158   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1159   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1160                  "Parser did not report undefined entity");
1161 }
1162 END_TEST
1163 
1164 /* Test that no error is reported for unknown entities if we have read
1165    an external subset, and standalone is false.
1166 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)1167 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1168   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1169                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1170                      "<doc>&entity;</doc>";
1171   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1172 
1173   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1174   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1175   run_ext_character_check(text, &test_data, XCS(""));
1176 }
1177 END_TEST
1178 
1179 /* Test that an error is reported if our NotStandalone handler fails */
START_TEST(test_not_standalone_handler_reject)1180 START_TEST(test_not_standalone_handler_reject) {
1181   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1182                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1183                      "<doc>&entity;</doc>";
1184   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1185 
1186   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1187   XML_SetUserData(g_parser, &test_data);
1188   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1189   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1190   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1191                  "NotStandalone handler failed to reject");
1192 
1193   /* Try again but without external entity handling */
1194   XML_ParserReset(g_parser, NULL);
1195   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1196   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1197                  "NotStandalone handler failed to reject");
1198 }
1199 END_TEST
1200 
1201 /* Test that no error is reported if our NotStandalone handler succeeds */
START_TEST(test_not_standalone_handler_accept)1202 START_TEST(test_not_standalone_handler_accept) {
1203   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1204                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1205                      "<doc>&entity;</doc>";
1206   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1207 
1208   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1209   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1210   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1211   run_ext_character_check(text, &test_data, XCS(""));
1212 
1213   /* Repeat without the external entity handler */
1214   XML_ParserReset(g_parser, NULL);
1215   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1216   run_character_check(text, XCS(""));
1217 }
1218 END_TEST
1219 
START_TEST(test_entity_start_tag_level_greater_than_one)1220 START_TEST(test_entity_start_tag_level_greater_than_one) {
1221   const char *const text = "<!DOCTYPE t1 [\n"
1222                            "  <!ENTITY e1 'hello'>\n"
1223                            "]>\n"
1224                            "<t1>\n"
1225                            "  <t2>&e1;</t2>\n"
1226                            "</t1>\n";
1227 
1228   XML_Parser parser = XML_ParserCreate(NULL);
1229   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
1230                                       /*isFinal*/ XML_TRUE)
1231               == XML_STATUS_OK);
1232   XML_ParserFree(parser);
1233 }
1234 END_TEST
1235 
START_TEST(test_wfc_no_recursive_entity_refs)1236 START_TEST(test_wfc_no_recursive_entity_refs) {
1237   const char *text = "<!DOCTYPE doc [\n"
1238                      "  <!ENTITY entity '&#38;entity;'>\n"
1239                      "]>\n"
1240                      "<doc>&entity;</doc>";
1241 
1242   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1243                  "Parser did not report recursive entity reference.");
1244 }
1245 END_TEST
1246 
START_TEST(test_no_indirectly_recursive_entity_refs)1247 START_TEST(test_no_indirectly_recursive_entity_refs) {
1248   struct TestCase {
1249     const char *doc;
1250     bool usesParameterEntities;
1251   };
1252 
1253   const struct TestCase cases[] = {
1254       // general entity + character data
1255       {"<!DOCTYPE a [\n"
1256        "  <!ENTITY e1 '&e2;'>\n"
1257        "  <!ENTITY e2 '&e1;'>\n"
1258        "]><a>&e2;</a>\n",
1259        false},
1260 
1261       // general entity + attribute value
1262       {"<!DOCTYPE a [\n"
1263        "  <!ENTITY e1 '&e2;'>\n"
1264        "  <!ENTITY e2 '&e1;'>\n"
1265        "]><a k1='&e2;' />\n",
1266        false},
1267 
1268       // parameter entity
1269       {"<!DOCTYPE doc [\n"
1270        "  <!ENTITY % p1 '&#37;p2;'>\n"
1271        "  <!ENTITY % p2 '&#37;p1;'>\n"
1272        "  <!ENTITY % define_g \"<!ENTITY g '&#37;p2;'>\">\n"
1273        "  %define_g;\n"
1274        "]>\n"
1275        "<doc/>\n",
1276        true},
1277   };
1278   const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
1279 
1280   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1281     for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
1282          j++) {
1283       const XML_Bool reset_wanted = reset_or_not[j];
1284       const char *const doc = cases[i].doc;
1285       const bool usesParameterEntities = cases[i].usesParameterEntities;
1286 
1287       set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
1288 
1289 #ifdef XML_DTD // both GE and DTD
1290       const bool rejection_expected = true;
1291 #elif XML_GE == 1 // GE but not DTD
1292       const bool rejection_expected = ! usesParameterEntities;
1293 #else             // neither DTD nor GE
1294       const bool rejection_expected = false;
1295 #endif
1296 
1297       XML_Parser parser = XML_ParserCreate(NULL);
1298 
1299 #ifdef XML_DTD
1300       if (usesParameterEntities) {
1301         assert_true(
1302             XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
1303             == 1);
1304       }
1305 #else
1306       UNUSED_P(usesParameterEntities);
1307 #endif // XML_DTD
1308 
1309       const enum XML_Status status
1310           = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
1311                                     /*isFinal*/ XML_TRUE);
1312 
1313       if (rejection_expected) {
1314         assert_true(status == XML_STATUS_ERROR);
1315         assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
1316       } else {
1317         assert_true(status == XML_STATUS_OK);
1318       }
1319 
1320       if (reset_wanted) {
1321         // This covers free'ing of (eventually) all three open entity lists by
1322         // XML_ParserReset.
1323         XML_ParserReset(parser, NULL);
1324       }
1325 
1326       // This covers free'ing of (eventually) all three open entity lists by
1327       // XML_ParserFree (unless XML_ParserReset has already done that above).
1328       XML_ParserFree(parser);
1329     }
1330   }
1331 }
1332 END_TEST
1333 
START_TEST(test_recursive_external_parameter_entity_2)1334 START_TEST(test_recursive_external_parameter_entity_2) {
1335   struct TestCase {
1336     const char *doc;
1337     enum XML_Status expectedStatus;
1338   };
1339 
1340   struct TestCase cases[] = {
1341       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1342       {"<!ENTITY % p1 '%p1;'>"
1343        "<!ENTITY % p1 'first declaration wins'>",
1344        XML_STATUS_ERROR},
1345       {"<!ENTITY % p1 'first declaration wins'>"
1346        "<!ENTITY % p1 '%p1;'>",
1347        XML_STATUS_OK},
1348       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1349   };
1350 
1351   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1352     const char *const doc = cases[i].doc;
1353     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1354     set_subtest("%s", doc);
1355 
1356     XML_Parser parser = XML_ParserCreate(NULL);
1357     assert_true(parser != NULL);
1358 
1359     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1360     assert_true(ext_parser != NULL);
1361 
1362     const enum XML_Status actualStatus
1363         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1364 
1365     assert_true(actualStatus == expectedStatus);
1366     if (actualStatus != XML_STATUS_OK) {
1367       assert_true(XML_GetErrorCode(ext_parser)
1368                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1369     }
1370 
1371     XML_ParserFree(ext_parser);
1372     XML_ParserFree(parser);
1373   }
1374 }
1375 END_TEST
1376 
1377 /* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)1378 START_TEST(test_ext_entity_invalid_parse) {
1379   const char *text = "<!DOCTYPE doc [\n"
1380                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1381                      "]>\n"
1382                      "<doc>&en;</doc>";
1383   const ExtFaults faults[]
1384       = {{"<", "Incomplete element declaration not faulted", NULL,
1385           XML_ERROR_UNCLOSED_TOKEN},
1386          {"<\xe2\x82", /* First two bytes of a three-byte char */
1387           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1388          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1389           XML_ERROR_PARTIAL_CHAR},
1390          {NULL, NULL, NULL, XML_ERROR_NONE}};
1391   const ExtFaults *fault = faults;
1392 
1393   for (; fault->parse_text != NULL; fault++) {
1394     set_subtest("\"%s\"", fault->parse_text);
1395     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1396     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1397     XML_SetUserData(g_parser, (void *)fault);
1398     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1399                    "Parser did not report external entity error");
1400     XML_ParserReset(g_parser, NULL);
1401   }
1402 }
1403 END_TEST
1404 
1405 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)1406 START_TEST(test_dtd_default_handling) {
1407   const char *text = "<!DOCTYPE doc [\n"
1408                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1409                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1410                      "<!ELEMENT doc EMPTY>\n"
1411                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1412                      "<?pi in dtd?>\n"
1413                      "<!--comment in dtd-->\n"
1414                      "]><doc/>";
1415 
1416   XML_SetDefaultHandler(g_parser, accumulate_characters);
1417   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1418   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1419   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1420   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1421   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1422   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1423   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1424   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1425   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1426   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1427   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1428 }
1429 END_TEST
1430 
1431 /* Test handling of attribute declarations */
START_TEST(test_dtd_attr_handling)1432 START_TEST(test_dtd_attr_handling) {
1433   const char *prolog = "<!DOCTYPE doc [\n"
1434                        "<!ELEMENT doc EMPTY>\n";
1435   AttTest attr_data[]
1436       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1437           "]>"
1438           "<doc a='two'/>",
1439           XCS("doc"), XCS("a"),
1440           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1441           NULL, XML_TRUE},
1442          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1443           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1444           "]>"
1445           "<doc/>",
1446           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1447          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1448           "]>"
1449           "<doc/>",
1450           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1451          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1452           "]>"
1453           "<doc/>",
1454           XCS("doc"), XCS("a"), XCS("CDATA"),
1455 #ifdef XML_UNICODE
1456           XCS("\x06f2"),
1457 #else
1458           XCS("\xdb\xb2"),
1459 #endif
1460           XML_FALSE},
1461          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1462   AttTest *test;
1463 
1464   for (test = attr_data; test->definition != NULL; test++) {
1465     set_subtest("%s", test->definition);
1466     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1467     XML_SetUserData(g_parser, test);
1468     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1469                                 XML_FALSE)
1470         == XML_STATUS_ERROR)
1471       xml_failure(g_parser);
1472     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1473                                 (int)strlen(test->definition), XML_TRUE)
1474         == XML_STATUS_ERROR)
1475       xml_failure(g_parser);
1476     XML_ParserReset(g_parser, NULL);
1477   }
1478 }
1479 END_TEST
1480 
1481 /* See related SF bug #673791.
1482    When namespace processing is enabled, setting the namespace URI for
1483    a prefix is not allowed; this test ensures that it *is* allowed
1484    when namespace processing is not enabled.
1485    (See Namespaces in XML, section 2.)
1486 */
START_TEST(test_empty_ns_without_namespaces)1487 START_TEST(test_empty_ns_without_namespaces) {
1488   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1489                      "  <e xmlns:prefix=''/>\n"
1490                      "</doc>";
1491 
1492   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1493       == XML_STATUS_ERROR)
1494     xml_failure(g_parser);
1495 }
1496 END_TEST
1497 
1498 /* Regression test for SF bug #824420.
1499    Checks that an xmlns:prefix attribute set in an attribute's default
1500    value isn't misinterpreted.
1501 */
START_TEST(test_ns_in_attribute_default_without_namespaces)1502 START_TEST(test_ns_in_attribute_default_without_namespaces) {
1503   const char *text = "<!DOCTYPE e:element [\n"
1504                      "  <!ATTLIST e:element\n"
1505                      "    xmlns:e CDATA 'http://example.org/'>\n"
1506                      "      ]>\n"
1507                      "<e:element/>";
1508 
1509   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1510       == XML_STATUS_ERROR)
1511     xml_failure(g_parser);
1512 }
1513 END_TEST
1514 
1515 /* Regression test for SF bug #1515266: missing check of stopped
1516    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1517 START_TEST(test_stop_parser_between_char_data_calls) {
1518   /* The sample data must be big enough that there are two calls to
1519      the character data handler from within the inner "for" loop of
1520      the XML_TOK_DATA_CHARS case in doContent(), and the character
1521      handler must stop the parser and clear the character data
1522      handler.
1523   */
1524   const char *text = long_character_data_text;
1525 
1526   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1527   g_resumable = XML_FALSE;
1528   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1529       != XML_STATUS_ERROR)
1530     xml_failure(g_parser);
1531   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1532     xml_failure(g_parser);
1533 }
1534 END_TEST
1535 
1536 /* Regression test for SF bug #1515266: missing check of stopped
1537    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1538 START_TEST(test_suspend_parser_between_char_data_calls) {
1539   /* The sample data must be big enough that there are two calls to
1540      the character data handler from within the inner "for" loop of
1541      the XML_TOK_DATA_CHARS case in doContent(), and the character
1542      handler must stop the parser and clear the character data
1543      handler.
1544   */
1545   const char *text = long_character_data_text;
1546 
1547   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1548   g_resumable = XML_TRUE;
1549   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1550   // we won't know exactly how much input we actually managed to give Expat.
1551   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1552       != XML_STATUS_SUSPENDED)
1553     xml_failure(g_parser);
1554   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1555     xml_failure(g_parser);
1556   /* Try parsing directly */
1557   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1558       != XML_STATUS_ERROR)
1559     fail("Attempt to continue parse while suspended not faulted");
1560   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1561     fail("Suspended parse not faulted with correct error");
1562 }
1563 END_TEST
1564 
1565 /* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)1566 START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1567   const char *text = long_character_data_text;
1568 
1569   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1570   g_resumable = XML_FALSE;
1571   g_abortable = XML_FALSE;
1572   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1573       != XML_STATUS_ERROR)
1574     fail("Failed to double-stop parser");
1575 
1576   XML_ParserReset(g_parser, NULL);
1577   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1578   g_resumable = XML_TRUE;
1579   g_abortable = XML_FALSE;
1580   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1581   // we won't know exactly how much input we actually managed to give Expat.
1582   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
1583       != XML_STATUS_SUSPENDED)
1584     fail("Failed to double-suspend parser");
1585 
1586   XML_ParserReset(g_parser, NULL);
1587   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1588   g_resumable = XML_TRUE;
1589   g_abortable = XML_TRUE;
1590   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1591       != XML_STATUS_ERROR)
1592     fail("Failed to suspend-abort parser");
1593 }
1594 END_TEST
1595 
START_TEST(test_good_cdata_ascii)1596 START_TEST(test_good_cdata_ascii) {
1597   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1598   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1599 
1600   CharData storage;
1601   CharData_Init(&storage);
1602   XML_SetUserData(g_parser, &storage);
1603   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1604   /* Add start and end handlers for coverage */
1605   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1606   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1607 
1608   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1609       == XML_STATUS_ERROR)
1610     xml_failure(g_parser);
1611   CharData_CheckXMLChars(&storage, expected);
1612 
1613   /* Try again, this time with a default handler */
1614   XML_ParserReset(g_parser, NULL);
1615   CharData_Init(&storage);
1616   XML_SetUserData(g_parser, &storage);
1617   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1618   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1619 
1620   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1621       == XML_STATUS_ERROR)
1622     xml_failure(g_parser);
1623   CharData_CheckXMLChars(&storage, expected);
1624 }
1625 END_TEST
1626 
START_TEST(test_good_cdata_utf16)1627 START_TEST(test_good_cdata_utf16) {
1628   /* Test data is:
1629    *   <?xml version='1.0' encoding='utf-16'?>
1630    *   <a><![CDATA[hello]]></a>
1631    */
1632   const char text[]
1633       = "\0<\0?\0x\0m\0l\0"
1634         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1635         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1636         "1\0"
1637         "6\0'"
1638         "\0?\0>\0\n"
1639         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1640   const XML_Char *expected = XCS("hello");
1641 
1642   CharData storage;
1643   CharData_Init(&storage);
1644   XML_SetUserData(g_parser, &storage);
1645   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1646 
1647   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1648       == XML_STATUS_ERROR)
1649     xml_failure(g_parser);
1650   CharData_CheckXMLChars(&storage, expected);
1651 }
1652 END_TEST
1653 
START_TEST(test_good_cdata_utf16_le)1654 START_TEST(test_good_cdata_utf16_le) {
1655   /* Test data is:
1656    *   <?xml version='1.0' encoding='utf-16'?>
1657    *   <a><![CDATA[hello]]></a>
1658    */
1659   const char text[]
1660       = "<\0?\0x\0m\0l\0"
1661         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1662         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1663         "1\0"
1664         "6\0'"
1665         "\0?\0>\0\n"
1666         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1667   const XML_Char *expected = XCS("hello");
1668 
1669   CharData storage;
1670   CharData_Init(&storage);
1671   XML_SetUserData(g_parser, &storage);
1672   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1673 
1674   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1675       == XML_STATUS_ERROR)
1676     xml_failure(g_parser);
1677   CharData_CheckXMLChars(&storage, expected);
1678 }
1679 END_TEST
1680 
1681 /* Test UTF16 conversion of a long cdata string */
1682 
1683 /* 16 characters: handy macro to reduce visual clutter */
1684 #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1685 
START_TEST(test_long_cdata_utf16)1686 START_TEST(test_long_cdata_utf16) {
1687   /* Test data is:
1688    * <?xlm version='1.0' encoding='utf-16'?>
1689    * <a><![CDATA[
1690    * ABCDEFGHIJKLMNOP
1691    * ]]></a>
1692    */
1693   const char text[]
1694       = "\0<\0?\0x\0m\0l\0 "
1695         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1696         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1697         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1698       /* 64 characters per line */
1699       /* clang-format off */
1700         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1701         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1702         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1703         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1704         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1705         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1706         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1707         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1708         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1709         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1710         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1711         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1712         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1713         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1714         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1715         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1716         A_TO_P_IN_UTF16
1717         /* clang-format on */
1718         "\0]\0]\0>\0<\0/\0a\0>";
1719   const XML_Char *expected =
1720       /* clang-format off */
1721         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1722         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1723         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1724         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1725         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1726         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1727         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1728         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1729         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1730         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1731         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1732         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1733         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1734         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1735         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1736         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1737         XCS("ABCDEFGHIJKLMNOP");
1738   /* clang-format on */
1739   CharData storage;
1740   void *buffer;
1741 
1742   CharData_Init(&storage);
1743   XML_SetUserData(g_parser, &storage);
1744   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1745   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1746   if (buffer == NULL)
1747     fail("Could not allocate parse buffer");
1748   assert(buffer != NULL);
1749   memcpy(buffer, text, sizeof(text) - 1);
1750   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1751     xml_failure(g_parser);
1752   CharData_CheckXMLChars(&storage, expected);
1753 }
1754 END_TEST
1755 
1756 /* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)1757 START_TEST(test_multichar_cdata_utf16) {
1758   /* Test data is:
1759    *   <?xml version='1.0' encoding='utf-16'?>
1760    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1761    *
1762    * where {MINIM} is U+1d15e (a minim or half-note)
1763    *   UTF-16: 0xd834 0xdd5e
1764    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1765    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1766    *   UTF-16: 0xd834 0xdd5f
1767    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1768    */
1769   const char text[] = "\0<\0?\0x\0m\0l\0"
1770                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1771                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1772                       "1\0"
1773                       "6\0'"
1774                       "\0?\0>\0\n"
1775                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1776                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1777                       "\0]\0]\0>\0<\0/\0a\0>";
1778 #ifdef XML_UNICODE
1779   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1780 #else
1781   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1782 #endif
1783   CharData storage;
1784 
1785   CharData_Init(&storage);
1786   XML_SetUserData(g_parser, &storage);
1787   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1788 
1789   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1790       == XML_STATUS_ERROR)
1791     xml_failure(g_parser);
1792   CharData_CheckXMLChars(&storage, expected);
1793 }
1794 END_TEST
1795 
1796 /* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)1797 START_TEST(test_utf16_bad_surrogate_pair) {
1798   /* Test data is:
1799    *   <?xml version='1.0' encoding='utf-16'?>
1800    *   <a><![CDATA[{BADLINB}]]></a>
1801    *
1802    * where {BADLINB} is U+10000 (the first Linear B character)
1803    * with the UTF-16 surrogate pair in the wrong order, i.e.
1804    *   0xdc00 0xd800
1805    */
1806   const char text[] = "\0<\0?\0x\0m\0l\0"
1807                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1808                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1809                       "1\0"
1810                       "6\0'"
1811                       "\0?\0>\0\n"
1812                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1813                       "\xdc\x00\xd8\x00"
1814                       "\0]\0]\0>\0<\0/\0a\0>";
1815 
1816   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1817       != XML_STATUS_ERROR)
1818     fail("Reversed UTF-16 surrogate pair not faulted");
1819   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1820     xml_failure(g_parser);
1821 }
1822 END_TEST
1823 
START_TEST(test_bad_cdata)1824 START_TEST(test_bad_cdata) {
1825   struct CaseData {
1826     const char *text;
1827     enum XML_Error expectedError;
1828   };
1829 
1830   struct CaseData cases[]
1831       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1832          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1833          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1834          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1835          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1836          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1837          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1838          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1839 
1840          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1841          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1842          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1843 
1844          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1845          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1846          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1847          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1848          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1849          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1850          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1851 
1852          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1853          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1854          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1855 
1856   size_t i = 0;
1857   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1858     set_subtest("%s", cases[i].text);
1859     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1860         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1861     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1862 
1863     assert(actualStatus == XML_STATUS_ERROR);
1864 
1865     if (actualError != cases[i].expectedError) {
1866       char message[100];
1867       snprintf(message, sizeof(message),
1868                "Expected error %d but got error %d for case %u: \"%s\"\n",
1869                cases[i].expectedError, actualError, (unsigned int)i + 1,
1870                cases[i].text);
1871       fail(message);
1872     }
1873 
1874     XML_ParserReset(g_parser, NULL);
1875   }
1876 }
1877 END_TEST
1878 
1879 /* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)1880 START_TEST(test_bad_cdata_utf16) {
1881   struct CaseData {
1882     size_t text_bytes;
1883     const char *text;
1884     enum XML_Error expected_error;
1885   };
1886 
1887   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1888                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1889                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1890                         "1\0"
1891                         "6\0'"
1892                         "\0?\0>\0\n"
1893                         "\0<\0a\0>";
1894   struct CaseData cases[] = {
1895       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1896       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1897       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1898       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1899       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1900       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1901       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1902       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1903       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1904       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1905       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1906       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1907       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1908       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1909       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1910       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1911       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1912       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1913       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1914       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1915       /* Now add a four-byte UTF-16 character */
1916       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1917        XML_ERROR_UNCLOSED_CDATA_SECTION},
1918       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1919       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1920        XML_ERROR_PARTIAL_CHAR},
1921       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1922        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1923   size_t i;
1924 
1925   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1926     set_subtest("case %lu", (long unsigned)(i + 1));
1927     enum XML_Status actual_status;
1928     enum XML_Error actual_error;
1929 
1930     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1931                                 XML_FALSE)
1932         == XML_STATUS_ERROR)
1933       xml_failure(g_parser);
1934     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1935                                             (int)cases[i].text_bytes, XML_TRUE);
1936     assert(actual_status == XML_STATUS_ERROR);
1937     actual_error = XML_GetErrorCode(g_parser);
1938     if (actual_error != cases[i].expected_error) {
1939       char message[1024];
1940 
1941       snprintf(message, sizeof(message),
1942                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1943                ") for case %lu\n",
1944                cases[i].expected_error,
1945                XML_ErrorString(cases[i].expected_error), actual_error,
1946                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1947       fail(message);
1948     }
1949     XML_ParserReset(g_parser, NULL);
1950   }
1951 }
1952 END_TEST
1953 
1954 /* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)1955 START_TEST(test_stop_parser_between_cdata_calls) {
1956   const char *text = long_cdata_text;
1957 
1958   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1959   g_resumable = XML_FALSE;
1960   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1961 }
1962 END_TEST
1963 
1964 /* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)1965 START_TEST(test_suspend_parser_between_cdata_calls) {
1966   if (g_chunkSize != 0) {
1967     // this test does not use SINGLE_BYTES, because of suspension
1968     return;
1969   }
1970 
1971   const char *text = long_cdata_text;
1972   enum XML_Status result;
1973 
1974   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1975   g_resumable = XML_TRUE;
1976   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
1977   // we won't know exactly how much input we actually managed to give Expat.
1978   result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
1979   if (result != XML_STATUS_SUSPENDED) {
1980     if (result == XML_STATUS_ERROR)
1981       xml_failure(g_parser);
1982     fail("Parse not suspended in CDATA handler");
1983   }
1984   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1985     xml_failure(g_parser);
1986 }
1987 END_TEST
1988 
1989 /* Test memory allocation functions */
START_TEST(test_memory_allocation)1990 START_TEST(test_memory_allocation) {
1991   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1992   char *p;
1993 
1994   if (buffer == NULL) {
1995     fail("Allocation failed");
1996   } else {
1997     /* Try writing to memory; some OSes try to cheat! */
1998     buffer[0] = 'T';
1999     buffer[1] = 'E';
2000     buffer[2] = 'S';
2001     buffer[3] = 'T';
2002     buffer[4] = '\0';
2003     if (strcmp(buffer, "TEST") != 0) {
2004       fail("Memory not writable");
2005     } else {
2006       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
2007       if (p == NULL) {
2008         fail("Reallocation failed");
2009       } else {
2010         /* Write again, just to be sure */
2011         buffer = p;
2012         buffer[0] = 'V';
2013         if (strcmp(buffer, "VEST") != 0) {
2014           fail("Reallocated memory not writable");
2015         }
2016       }
2017     }
2018     XML_MemFree(g_parser, buffer);
2019   }
2020 }
2021 END_TEST
2022 
2023 /* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)2024 START_TEST(test_default_current) {
2025   const char *text = "<doc>hell]</doc>";
2026   const char *entity_text = "<!DOCTYPE doc [\n"
2027                             "<!ENTITY entity '&#37;'>\n"
2028                             "]>\n"
2029                             "<doc>&entity;</doc>";
2030 
2031   set_subtest("with defaulting");
2032   {
2033     struct handler_record_list storage;
2034     storage.count = 0;
2035     XML_SetDefaultHandler(g_parser, record_default_handler);
2036     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2037     XML_SetUserData(g_parser, &storage);
2038     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2039         == XML_STATUS_ERROR)
2040       xml_failure(g_parser);
2041     int i = 0;
2042     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2043     // we should have gotten one or more cdata callbacks, totaling 5 chars
2044     int cdata_len_remaining = 5;
2045     while (cdata_len_remaining > 0) {
2046       const struct handler_record_entry *c_entry
2047           = handler_record_get(&storage, i++);
2048       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
2049       assert_true(c_entry->arg > 0);
2050       assert_true(c_entry->arg <= cdata_len_remaining);
2051       cdata_len_remaining -= c_entry->arg;
2052       // default handler must follow, with the exact same len argument.
2053       assert_record_handler_called(&storage, i++, "record_default_handler",
2054                                    c_entry->arg);
2055     }
2056     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2057     assert_true(storage.count == i);
2058   }
2059 
2060   /* Again, without the defaulting */
2061   set_subtest("no defaulting");
2062   {
2063     struct handler_record_list storage;
2064     storage.count = 0;
2065     XML_ParserReset(g_parser, NULL);
2066     XML_SetDefaultHandler(g_parser, record_default_handler);
2067     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2068     XML_SetUserData(g_parser, &storage);
2069     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2070         == XML_STATUS_ERROR)
2071       xml_failure(g_parser);
2072     int i = 0;
2073     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
2074     // we should have gotten one or more cdata callbacks, totaling 5 chars
2075     int cdata_len_remaining = 5;
2076     while (cdata_len_remaining > 0) {
2077       const struct handler_record_entry *c_entry
2078           = handler_record_get(&storage, i++);
2079       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
2080       assert_true(c_entry->arg > 0);
2081       assert_true(c_entry->arg <= cdata_len_remaining);
2082       cdata_len_remaining -= c_entry->arg;
2083     }
2084     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
2085     assert_true(storage.count == i);
2086   }
2087 
2088   /* Now with an internal entity to complicate matters */
2089   set_subtest("with internal entity");
2090   {
2091     struct handler_record_list storage;
2092     storage.count = 0;
2093     XML_ParserReset(g_parser, NULL);
2094     XML_SetDefaultHandler(g_parser, record_default_handler);
2095     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2096     XML_SetUserData(g_parser, &storage);
2097     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2098                                 XML_TRUE)
2099         == XML_STATUS_ERROR)
2100       xml_failure(g_parser);
2101     /* The default handler suppresses the entity */
2102     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2103     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2104     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2105     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2106     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2107     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2108     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2109     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2110     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2111     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2112     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2113     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2114     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2115     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2116     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2117     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2118     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2119     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
2120     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2121     assert_true(storage.count == 19);
2122   }
2123 
2124   /* Again, with a skip handler */
2125   set_subtest("with skip handler");
2126   {
2127     struct handler_record_list storage;
2128     storage.count = 0;
2129     XML_ParserReset(g_parser, NULL);
2130     XML_SetDefaultHandler(g_parser, record_default_handler);
2131     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2132     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
2133     XML_SetUserData(g_parser, &storage);
2134     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2135                                 XML_TRUE)
2136         == XML_STATUS_ERROR)
2137       xml_failure(g_parser);
2138     /* The default handler suppresses the entity */
2139     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2140     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2141     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2142     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2143     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2144     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2145     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2146     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2147     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2148     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2149     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2150     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2151     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2152     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2153     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2154     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2155     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2156     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2157     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2158     assert_true(storage.count == 19);
2159   }
2160 
2161   /* This time, allow the entity through */
2162   set_subtest("allow entity");
2163   {
2164     struct handler_record_list storage;
2165     storage.count = 0;
2166     XML_ParserReset(g_parser, NULL);
2167     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2168     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2169     XML_SetUserData(g_parser, &storage);
2170     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2171                                 XML_TRUE)
2172         == XML_STATUS_ERROR)
2173       xml_failure(g_parser);
2174     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2175     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2176     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2177     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2178     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2179     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2180     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2181     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2182     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2183     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2184     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2185     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2186     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2187     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2188     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2189     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2190     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2191     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2192     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2193     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2194     assert_true(storage.count == 20);
2195   }
2196 
2197   /* Finally, without passing the cdata to the default handler */
2198   set_subtest("not passing cdata");
2199   {
2200     struct handler_record_list storage;
2201     storage.count = 0;
2202     XML_ParserReset(g_parser, NULL);
2203     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2204     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2205     XML_SetUserData(g_parser, &storage);
2206     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2207                                 XML_TRUE)
2208         == XML_STATUS_ERROR)
2209       xml_failure(g_parser);
2210     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2211     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2212     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2213     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2214     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2215     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2216     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2217     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2218     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2219     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2220     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2221     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2222     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2223     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2224     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2225     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2226     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2227     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2228                                  1);
2229     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2230     assert_true(storage.count == 19);
2231   }
2232 }
2233 END_TEST
2234 
2235 /* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)2236 START_TEST(test_dtd_elements) {
2237   const char *text = "<!DOCTYPE doc [\n"
2238                      "<!ELEMENT doc (chapter)>\n"
2239                      "<!ELEMENT chapter (#PCDATA)>\n"
2240                      "]>\n"
2241                      "<doc><chapter>Wombats are go</chapter></doc>";
2242 
2243   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2244   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2245       == XML_STATUS_ERROR)
2246     xml_failure(g_parser);
2247 }
2248 END_TEST
2249 
2250 static void XMLCALL
element_decl_check_model(void * userData,const XML_Char * name,XML_Content * model)2251 element_decl_check_model(void *userData, const XML_Char *name,
2252                          XML_Content *model) {
2253   UNUSED_P(userData);
2254   uint32_t errorFlags = 0;
2255 
2256   /* Expected model array structure is this:
2257    * [0] (type 6, quant 0)
2258    *   [1] (type 5, quant 0)
2259    *     [3] (type 4, quant 0, name "bar")
2260    *     [4] (type 4, quant 0, name "foo")
2261    *     [5] (type 4, quant 3, name "xyz")
2262    *   [2] (type 4, quant 2, name "zebra")
2263    */
2264   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2265   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2266 
2267   if (model != NULL) {
2268     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2269     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2270     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2271     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2272     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2273 
2274     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2275     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2276     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2277     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2278     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2279 
2280     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2281     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2282     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2283     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2284     errorFlags
2285         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2286 
2287     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2288     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2289     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2290     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2291     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2292 
2293     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2294     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2295     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2296     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2297     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2298 
2299     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2300     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2301     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2302     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2303     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2304   }
2305 
2306   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2307   XML_FreeContentModel(g_parser, model);
2308 }
2309 
START_TEST(test_dtd_elements_nesting)2310 START_TEST(test_dtd_elements_nesting) {
2311   // Payload inspired by a test in Perl's XML::Parser
2312   const char *text = "<!DOCTYPE foo [\n"
2313                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2314                      "]>\n"
2315                      "<foo/>";
2316 
2317   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2318 
2319   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2320   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2321       == XML_STATUS_ERROR)
2322     xml_failure(g_parser);
2323 
2324   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2325     fail("Element declaration model regression detected");
2326 }
2327 END_TEST
2328 
2329 /* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)2330 START_TEST(test_set_foreign_dtd) {
2331   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2332   const char *text2 = "<doc>&entity;</doc>";
2333   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2334 
2335   /* Check hash salt is passed through too */
2336   XML_SetHashSalt(g_parser, 0x12345678);
2337   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2338   XML_SetUserData(g_parser, &test_data);
2339   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2340   /* Add a default handler to exercise more code paths */
2341   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2342   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2343     fail("Could not set foreign DTD");
2344   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2345       == XML_STATUS_ERROR)
2346     xml_failure(g_parser);
2347 
2348   /* Ensure that trying to set the DTD after parsing has started
2349    * is faulted, even if it's the same setting.
2350    */
2351   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2352       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2353     fail("Failed to reject late foreign DTD setting");
2354   /* Ditto for the hash salt */
2355   if (XML_SetHashSalt(g_parser, 0x23456789))
2356     fail("Failed to reject late hash salt change");
2357 
2358   /* Now finish the parse */
2359   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2360       == XML_STATUS_ERROR)
2361     xml_failure(g_parser);
2362 }
2363 END_TEST
2364 
2365 /* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)2366 START_TEST(test_foreign_dtd_not_standalone) {
2367   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2368                      "<doc>&entity;</doc>";
2369   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2370 
2371   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2372   XML_SetUserData(g_parser, &test_data);
2373   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2374   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2375   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2376     fail("Could not set foreign DTD");
2377   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2378                  "NotStandalonehandler failed to reject");
2379 }
2380 END_TEST
2381 
2382 /* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)2383 START_TEST(test_invalid_foreign_dtd) {
2384   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2385                      "<doc>&entity;</doc>";
2386   ExtFaults test_data
2387       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2388 
2389   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2390   XML_SetUserData(g_parser, &test_data);
2391   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2392   XML_UseForeignDTD(g_parser, XML_TRUE);
2393   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2394                  "Bad DTD should not have been accepted");
2395 }
2396 END_TEST
2397 
2398 /* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)2399 START_TEST(test_foreign_dtd_with_doctype) {
2400   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2401                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2402   const char *text2 = "<doc>&entity;</doc>";
2403   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2404 
2405   /* Check hash salt is passed through too */
2406   XML_SetHashSalt(g_parser, 0x12345678);
2407   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2408   XML_SetUserData(g_parser, &test_data);
2409   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2410   /* Add a default handler to exercise more code paths */
2411   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2412   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2413     fail("Could not set foreign DTD");
2414   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2415       == XML_STATUS_ERROR)
2416     xml_failure(g_parser);
2417 
2418   /* Ensure that trying to set the DTD after parsing has started
2419    * is faulted, even if it's the same setting.
2420    */
2421   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2422       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2423     fail("Failed to reject late foreign DTD setting");
2424   /* Ditto for the hash salt */
2425   if (XML_SetHashSalt(g_parser, 0x23456789))
2426     fail("Failed to reject late hash salt change");
2427 
2428   /* Now finish the parse */
2429   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2430       == XML_STATUS_ERROR)
2431     xml_failure(g_parser);
2432 }
2433 END_TEST
2434 
2435 /* Test XML_UseForeignDTD with no external subset present */
START_TEST(test_foreign_dtd_without_external_subset)2436 START_TEST(test_foreign_dtd_without_external_subset) {
2437   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2438                      "<doc>&foo;</doc>";
2439 
2440   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2441   XML_SetUserData(g_parser, NULL);
2442   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2443   XML_UseForeignDTD(g_parser, XML_TRUE);
2444   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2445       == XML_STATUS_ERROR)
2446     xml_failure(g_parser);
2447 }
2448 END_TEST
2449 
START_TEST(test_empty_foreign_dtd)2450 START_TEST(test_empty_foreign_dtd) {
2451   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2452                      "<doc>&entity;</doc>";
2453 
2454   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2455   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2456   XML_UseForeignDTD(g_parser, XML_TRUE);
2457   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2458                  "Undefined entity not faulted");
2459 }
2460 END_TEST
2461 
2462 /* Test XML Base is set and unset appropriately */
START_TEST(test_set_base)2463 START_TEST(test_set_base) {
2464   const XML_Char *old_base;
2465   const XML_Char *new_base = XCS("/local/file/name.xml");
2466 
2467   old_base = XML_GetBase(g_parser);
2468   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2469     fail("Unable to set base");
2470   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2471     fail("Base setting not correct");
2472   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2473     fail("Unable to NULL base");
2474   if (XML_GetBase(g_parser) != NULL)
2475     fail("Base setting not nulled");
2476   XML_SetBase(g_parser, old_base);
2477 }
2478 END_TEST
2479 
2480 /* Test attribute counts, indexing, etc */
START_TEST(test_attributes)2481 START_TEST(test_attributes) {
2482   const char *text = "<!DOCTYPE doc [\n"
2483                      "<!ELEMENT doc (tag)>\n"
2484                      "<!ATTLIST doc id ID #REQUIRED>\n"
2485                      "]>"
2486                      "<doc a='1' id='one' b='2'>"
2487                      "<tag c='3'/>"
2488                      "</doc>";
2489   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2490                          {XCS("b"), XCS("2")},
2491                          {XCS("id"), XCS("one")},
2492                          {NULL, NULL}};
2493   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2494   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2495                         {XCS("tag"), 1, NULL, NULL},
2496                         {NULL, 0, NULL, NULL}};
2497   info[0].attributes = doc_info;
2498   info[1].attributes = tag_info;
2499 
2500   XML_Parser parser = XML_ParserCreate(NULL);
2501   assert_true(parser != NULL);
2502   ParserAndElementInfo parserAndElementInfos = {
2503       parser,
2504       info,
2505   };
2506 
2507   XML_SetStartElementHandler(parser, counting_start_element_handler);
2508   XML_SetUserData(parser, &parserAndElementInfos);
2509   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2510       == XML_STATUS_ERROR)
2511     xml_failure(parser);
2512 
2513   XML_ParserFree(parser);
2514 }
2515 END_TEST
2516 
2517 /* Test reset works correctly in the middle of processing an internal
2518  * entity.  Exercises some obscure code in XML_ParserReset().
2519  */
START_TEST(test_reset_in_entity)2520 START_TEST(test_reset_in_entity) {
2521   if (g_chunkSize != 0) {
2522     // this test does not use SINGLE_BYTES, because of suspension
2523     return;
2524   }
2525 
2526   const char *text = "<!DOCTYPE doc [\n"
2527                      "<!ENTITY wombat 'wom'>\n"
2528                      "<!ENTITY entity 'hi &wom; there'>\n"
2529                      "]>\n"
2530                      "<doc>&entity;</doc>";
2531   XML_ParsingStatus status;
2532 
2533   g_resumable = XML_TRUE;
2534   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2535   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
2536   // we won't know exactly how much input we actually managed to give Expat.
2537   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2538       == XML_STATUS_ERROR)
2539     xml_failure(g_parser);
2540   XML_GetParsingStatus(g_parser, &status);
2541   if (status.parsing != XML_SUSPENDED)
2542     fail("Parsing status not SUSPENDED");
2543   XML_ParserReset(g_parser, NULL);
2544   XML_GetParsingStatus(g_parser, &status);
2545   if (status.parsing != XML_INITIALIZED)
2546     fail("Parsing status doesn't reset to INITIALIZED");
2547 }
2548 END_TEST
2549 
2550 /* Test that resume correctly passes through parse errors */
START_TEST(test_resume_invalid_parse)2551 START_TEST(test_resume_invalid_parse) {
2552   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2553 
2554   g_resumable = XML_TRUE;
2555   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2556   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2557       == XML_STATUS_ERROR)
2558     xml_failure(g_parser);
2559   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2560     fail("Resumed invalid parse not faulted");
2561   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2562     fail("Invalid parse not correctly faulted");
2563 }
2564 END_TEST
2565 
2566 /* Test that re-suspended parses are correctly passed through */
START_TEST(test_resume_resuspended)2567 START_TEST(test_resume_resuspended) {
2568   const char *text = "<doc>Hello<meep/>world</doc>";
2569 
2570   g_resumable = XML_TRUE;
2571   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2572   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2573       == XML_STATUS_ERROR)
2574     xml_failure(g_parser);
2575   g_resumable = XML_TRUE;
2576   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2577   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2578     fail("Resumption not suspended");
2579   /* This one should succeed and finish up */
2580   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2581     xml_failure(g_parser);
2582 }
2583 END_TEST
2584 
2585 /* Test that CDATA shows up correctly through a default handler */
START_TEST(test_cdata_default)2586 START_TEST(test_cdata_default) {
2587   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2588   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2589   CharData storage;
2590 
2591   CharData_Init(&storage);
2592   XML_SetUserData(g_parser, &storage);
2593   XML_SetDefaultHandler(g_parser, accumulate_characters);
2594 
2595   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2596       == XML_STATUS_ERROR)
2597     xml_failure(g_parser);
2598   CharData_CheckXMLChars(&storage, expected);
2599 }
2600 END_TEST
2601 
2602 /* Test resetting a subordinate parser does exactly nothing */
START_TEST(test_subordinate_reset)2603 START_TEST(test_subordinate_reset) {
2604   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2605                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2606                      "<doc>&entity;</doc>";
2607 
2608   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2609   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2610   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2611       == XML_STATUS_ERROR)
2612     xml_failure(g_parser);
2613 }
2614 END_TEST
2615 
2616 /* Test suspending a subordinate parser */
START_TEST(test_subordinate_suspend)2617 START_TEST(test_subordinate_suspend) {
2618   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2619                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2620                      "<doc>&entity;</doc>";
2621 
2622   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2623   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2624   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2625       == XML_STATUS_ERROR)
2626     xml_failure(g_parser);
2627 }
2628 END_TEST
2629 
2630 /* Test suspending a subordinate parser from an XML declaration */
2631 /* Increases code coverage of the tests */
2632 
START_TEST(test_subordinate_xdecl_suspend)2633 START_TEST(test_subordinate_xdecl_suspend) {
2634   const char *text
2635       = "<!DOCTYPE doc [\n"
2636         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2637         "]>\n"
2638         "<doc>&entity;</doc>";
2639 
2640   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2641   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2642   g_resumable = XML_TRUE;
2643   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2644       == XML_STATUS_ERROR)
2645     xml_failure(g_parser);
2646 }
2647 END_TEST
2648 
START_TEST(test_subordinate_xdecl_abort)2649 START_TEST(test_subordinate_xdecl_abort) {
2650   const char *text
2651       = "<!DOCTYPE doc [\n"
2652         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2653         "]>\n"
2654         "<doc>&entity;</doc>";
2655 
2656   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2657   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2658   g_resumable = XML_FALSE;
2659   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2660       == XML_STATUS_ERROR)
2661     xml_failure(g_parser);
2662 }
2663 END_TEST
2664 
2665 /* Test external entity fault handling with suspension */
START_TEST(test_ext_entity_invalid_suspended_parse)2666 START_TEST(test_ext_entity_invalid_suspended_parse) {
2667   const char *text = "<!DOCTYPE doc [\n"
2668                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2669                      "]>\n"
2670                      "<doc>&en;</doc>";
2671   ExtFaults faults[]
2672       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2673           "Incomplete element declaration not faulted", NULL,
2674           XML_ERROR_UNCLOSED_TOKEN},
2675          {/* First two bytes of a three-byte char */
2676           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2677           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2678          {NULL, NULL, NULL, XML_ERROR_NONE}};
2679   ExtFaults *fault;
2680 
2681   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2682     set_subtest("%s", fault->parse_text);
2683     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2684     XML_SetExternalEntityRefHandler(g_parser,
2685                                     external_entity_suspending_faulter);
2686     XML_SetUserData(g_parser, fault);
2687     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2688                    "Parser did not report external entity error");
2689     XML_ParserReset(g_parser, NULL);
2690   }
2691 }
2692 END_TEST
2693 
2694 /* Test setting an explicit encoding */
START_TEST(test_explicit_encoding)2695 START_TEST(test_explicit_encoding) {
2696   const char *text1 = "<doc>Hello ";
2697   const char *text2 = " World</doc>";
2698 
2699   /* Just check that we can set the encoding to NULL before starting */
2700   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2701     fail("Failed to initialise encoding to NULL");
2702   /* Say we are UTF-8 */
2703   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2704     fail("Failed to set explicit encoding");
2705   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2706       == XML_STATUS_ERROR)
2707     xml_failure(g_parser);
2708   /* Try to switch encodings mid-parse */
2709   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2710     fail("Allowed encoding change");
2711   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2712       == XML_STATUS_ERROR)
2713     xml_failure(g_parser);
2714   /* Try now the parse is over */
2715   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2716     fail("Failed to unset encoding");
2717 }
2718 END_TEST
2719 
2720 /* Test handling of trailing CR (rather than newline) */
START_TEST(test_trailing_cr)2721 START_TEST(test_trailing_cr) {
2722   const char *text = "<doc>\r";
2723   int found_cr;
2724 
2725   /* Try with a character handler, for code coverage */
2726   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2727   XML_SetUserData(g_parser, &found_cr);
2728   found_cr = 0;
2729   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2730       == XML_STATUS_OK)
2731     fail("Failed to fault unclosed doc");
2732   if (found_cr == 0)
2733     fail("Did not catch the carriage return");
2734   XML_ParserReset(g_parser, NULL);
2735 
2736   /* Now with a default handler instead */
2737   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2738   XML_SetUserData(g_parser, &found_cr);
2739   found_cr = 0;
2740   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2741       == XML_STATUS_OK)
2742     fail("Failed to fault unclosed doc");
2743   if (found_cr == 0)
2744     fail("Did not catch default carriage return");
2745 }
2746 END_TEST
2747 
2748 /* Test trailing CR in an external entity parse */
START_TEST(test_ext_entity_trailing_cr)2749 START_TEST(test_ext_entity_trailing_cr) {
2750   const char *text = "<!DOCTYPE doc [\n"
2751                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2752                      "]>\n"
2753                      "<doc>&en;</doc>";
2754   int found_cr;
2755 
2756   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2757   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2758   XML_SetUserData(g_parser, &found_cr);
2759   found_cr = 0;
2760   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2761       != XML_STATUS_OK)
2762     xml_failure(g_parser);
2763   if (found_cr == 0)
2764     fail("No carriage return found");
2765   XML_ParserReset(g_parser, NULL);
2766 
2767   /* Try again with a different trailing CR */
2768   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2769   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2770   XML_SetUserData(g_parser, &found_cr);
2771   found_cr = 0;
2772   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2773       != XML_STATUS_OK)
2774     xml_failure(g_parser);
2775   if (found_cr == 0)
2776     fail("No carriage return found");
2777 }
2778 END_TEST
2779 
2780 /* Test handling of trailing square bracket */
START_TEST(test_trailing_rsqb)2781 START_TEST(test_trailing_rsqb) {
2782   const char *text8 = "<doc>]";
2783   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2784   int found_rsqb;
2785   int text8_len = (int)strlen(text8);
2786 
2787   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2788   XML_SetUserData(g_parser, &found_rsqb);
2789   found_rsqb = 0;
2790   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2791       == XML_STATUS_OK)
2792     fail("Failed to fault unclosed doc");
2793   if (found_rsqb == 0)
2794     fail("Did not catch the right square bracket");
2795 
2796   /* Try again with a different encoding */
2797   XML_ParserReset(g_parser, NULL);
2798   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2799   XML_SetUserData(g_parser, &found_rsqb);
2800   found_rsqb = 0;
2801   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2802                               XML_TRUE)
2803       == XML_STATUS_OK)
2804     fail("Failed to fault unclosed doc");
2805   if (found_rsqb == 0)
2806     fail("Did not catch the right square bracket");
2807 
2808   /* And finally with a default handler */
2809   XML_ParserReset(g_parser, NULL);
2810   XML_SetDefaultHandler(g_parser, rsqb_handler);
2811   XML_SetUserData(g_parser, &found_rsqb);
2812   found_rsqb = 0;
2813   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2814                               XML_TRUE)
2815       == XML_STATUS_OK)
2816     fail("Failed to fault unclosed doc");
2817   if (found_rsqb == 0)
2818     fail("Did not catch the right square bracket");
2819 }
2820 END_TEST
2821 
2822 /* Test trailing right square bracket in an external entity parse */
START_TEST(test_ext_entity_trailing_rsqb)2823 START_TEST(test_ext_entity_trailing_rsqb) {
2824   const char *text = "<!DOCTYPE doc [\n"
2825                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2826                      "]>\n"
2827                      "<doc>&en;</doc>";
2828   int found_rsqb;
2829 
2830   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2831   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2832   XML_SetUserData(g_parser, &found_rsqb);
2833   found_rsqb = 0;
2834   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2835       != XML_STATUS_OK)
2836     xml_failure(g_parser);
2837   if (found_rsqb == 0)
2838     fail("No right square bracket found");
2839 }
2840 END_TEST
2841 
2842 /* Test CDATA handling in an external entity */
START_TEST(test_ext_entity_good_cdata)2843 START_TEST(test_ext_entity_good_cdata) {
2844   const char *text = "<!DOCTYPE doc [\n"
2845                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2846                      "]>\n"
2847                      "<doc>&en;</doc>";
2848 
2849   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2850   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2851   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2852       != XML_STATUS_OK)
2853     xml_failure(g_parser);
2854 }
2855 END_TEST
2856 
2857 /* Test user parameter settings */
START_TEST(test_user_parameters)2858 START_TEST(test_user_parameters) {
2859   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2860                      "<!-- Primary parse -->\n"
2861                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2862                      "<doc>&entity;";
2863   const char *epilog = "<!-- Back to primary parser -->\n"
2864                        "</doc>";
2865 
2866   g_comment_count = 0;
2867   g_skip_count = 0;
2868   g_xdecl_count = 0;
2869   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2870   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2871   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2872   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2873   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2874   XML_UseParserAsHandlerArg(g_parser);
2875   XML_SetUserData(g_parser, (void *)1);
2876   g_handler_data = g_parser;
2877   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2878       == XML_STATUS_ERROR)
2879     xml_failure(g_parser);
2880   /* Ensure we can't change policy mid-parse */
2881   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2882     fail("Changed param entity parsing policy while parsing");
2883   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2884       == XML_STATUS_ERROR)
2885     xml_failure(g_parser);
2886   if (g_comment_count != 3)
2887     fail("Comment handler not invoked enough times");
2888   if (g_skip_count != 1)
2889     fail("Skip handler not invoked enough times");
2890   if (g_xdecl_count != 1)
2891     fail("XML declaration handler not invoked");
2892 }
2893 END_TEST
2894 
2895 /* Test that an explicit external entity handler argument replaces
2896  * the parser as the first argument.
2897  *
2898  * We do not call the first parameter to the external entity handler
2899  * 'parser' for once, since the first time the handler is called it
2900  * will actually be a text string.  We need to be able to access the
2901  * global 'parser' variable to create our external entity parser from,
2902  * since there are code paths we need to ensure get executed.
2903  */
START_TEST(test_ext_entity_ref_parameter)2904 START_TEST(test_ext_entity_ref_parameter) {
2905   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2906                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2907                      "<doc>&entity;</doc>";
2908 
2909   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2910   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2911   /* Set a handler arg that is not NULL and not parser (which is
2912    * what NULL would cause to be passed.
2913    */
2914   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2915   g_handler_data = text;
2916   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2917       == XML_STATUS_ERROR)
2918     xml_failure(g_parser);
2919 
2920   /* Now try again with unset args */
2921   XML_ParserReset(g_parser, NULL);
2922   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2923   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2924   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2925   g_handler_data = g_parser;
2926   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2927       == XML_STATUS_ERROR)
2928     xml_failure(g_parser);
2929 }
2930 END_TEST
2931 
2932 /* Test the parsing of an empty string */
START_TEST(test_empty_parse)2933 START_TEST(test_empty_parse) {
2934   const char *text = "<doc></doc>";
2935   const char *partial = "<doc>";
2936 
2937   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2938     fail("Parsing empty string faulted");
2939   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2940     fail("Parsing final empty string not faulted");
2941   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2942     fail("Parsing final empty string faulted for wrong reason");
2943 
2944   /* Now try with valid text before the empty end */
2945   XML_ParserReset(g_parser, NULL);
2946   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2947       == XML_STATUS_ERROR)
2948     xml_failure(g_parser);
2949   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2950     fail("Parsing final empty string faulted");
2951 
2952   /* Now try with invalid text before the empty end */
2953   XML_ParserReset(g_parser, NULL);
2954   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2955                               XML_FALSE)
2956       == XML_STATUS_ERROR)
2957     xml_failure(g_parser);
2958   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2959     fail("Parsing final incomplete empty string not faulted");
2960 }
2961 END_TEST
2962 
2963 /* Test XML_Parse for len < 0 */
START_TEST(test_negative_len_parse)2964 START_TEST(test_negative_len_parse) {
2965   const char *const doc = "<root/>";
2966   for (int isFinal = 0; isFinal < 2; isFinal++) {
2967     set_subtest("isFinal=%d", isFinal);
2968 
2969     XML_Parser parser = XML_ParserCreate(NULL);
2970 
2971     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2972       fail("There was not supposed to be any initial parse error.");
2973 
2974     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2975 
2976     if (status != XML_STATUS_ERROR)
2977       fail("Negative len was expected to fail the parse but did not.");
2978 
2979     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2980       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2981 
2982     XML_ParserFree(parser);
2983   }
2984 }
2985 END_TEST
2986 
2987 /* Test XML_ParseBuffer for len < 0 */
START_TEST(test_negative_len_parse_buffer)2988 START_TEST(test_negative_len_parse_buffer) {
2989   const char *const doc = "<root/>";
2990   for (int isFinal = 0; isFinal < 2; isFinal++) {
2991     set_subtest("isFinal=%d", isFinal);
2992 
2993     XML_Parser parser = XML_ParserCreate(NULL);
2994 
2995     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2996       fail("There was not supposed to be any initial parse error.");
2997 
2998     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2999 
3000     if (buffer == NULL)
3001       fail("XML_GetBuffer failed.");
3002 
3003     memcpy(buffer, doc, strlen(doc));
3004 
3005     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
3006 
3007     if (status != XML_STATUS_ERROR)
3008       fail("Negative len was expected to fail the parse but did not.");
3009 
3010     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
3011       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
3012 
3013     XML_ParserFree(parser);
3014   }
3015 }
3016 END_TEST
3017 
3018 /* Test odd corners of the XML_GetBuffer interface */
3019 static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id,long * presult)3020 get_feature(enum XML_FeatureEnum feature_id, long *presult) {
3021   const XML_Feature *feature = XML_GetFeatureList();
3022 
3023   if (feature == NULL)
3024     return XML_STATUS_ERROR;
3025   for (; feature->feature != XML_FEATURE_END; feature++) {
3026     if (feature->feature == feature_id) {
3027       *presult = feature->value;
3028       return XML_STATUS_OK;
3029     }
3030   }
3031   return XML_STATUS_ERROR;
3032 }
3033 
3034 /* Test odd corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_1)3035 START_TEST(test_get_buffer_1) {
3036   const char *text = get_buffer_test_text;
3037   void *buffer;
3038   long context_bytes;
3039 
3040   /* Attempt to allocate a negative length buffer */
3041   if (XML_GetBuffer(g_parser, -12) != NULL)
3042     fail("Negative length buffer not failed");
3043 
3044   /* Now get a small buffer and extend it past valid length */
3045   buffer = XML_GetBuffer(g_parser, 1536);
3046   if (buffer == NULL)
3047     fail("1.5K buffer failed");
3048   assert(buffer != NULL);
3049   memcpy(buffer, text, strlen(text));
3050   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3051       == XML_STATUS_ERROR)
3052     xml_failure(g_parser);
3053   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
3054     fail("INT_MAX buffer not failed");
3055 
3056   /* Now try extending it a more reasonable but still too large
3057    * amount.  The allocator in XML_GetBuffer() doubles the buffer
3058    * size until it exceeds the requested amount or INT_MAX.  If it
3059    * exceeds INT_MAX, it rejects the request, so we want a request
3060    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
3061    * with an extra byte just to ensure that the request is off any
3062    * boundary.  The request will be inflated internally by
3063    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
3064    * request.
3065    */
3066   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
3067     context_bytes = 0;
3068   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
3069     fail("INT_MAX- buffer not failed");
3070 
3071   /* Now try extending it a carefully crafted amount */
3072   if (XML_GetBuffer(g_parser, 1000) == NULL)
3073     fail("1000 buffer failed");
3074 }
3075 END_TEST
3076 
3077 /* Test more corners of the XML_GetBuffer interface */
START_TEST(test_get_buffer_2)3078 START_TEST(test_get_buffer_2) {
3079   const char *text = get_buffer_test_text;
3080   void *buffer;
3081 
3082   /* Now get a decent buffer */
3083   buffer = XML_GetBuffer(g_parser, 1536);
3084   if (buffer == NULL)
3085     fail("1.5K buffer failed");
3086   assert(buffer != NULL);
3087   memcpy(buffer, text, strlen(text));
3088   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
3089       == XML_STATUS_ERROR)
3090     xml_failure(g_parser);
3091 
3092   /* Extend it, to catch a different code path */
3093   if (XML_GetBuffer(g_parser, 1024) == NULL)
3094     fail("1024 buffer failed");
3095 }
3096 END_TEST
3097 
3098 /* Test for signed integer overflow CVE-2022-23852 */
3099 #if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow)3100 START_TEST(test_get_buffer_3_overflow) {
3101   XML_Parser parser = XML_ParserCreate(NULL);
3102   assert(parser != NULL);
3103 
3104   const char *const text = "\n";
3105   const int expectedKeepValue = (int)strlen(text);
3106 
3107   // After this call, variable "keep" in XML_GetBuffer will
3108   // have value expectedKeepValue
3109   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
3110                               XML_FALSE /* isFinal */)
3111       == XML_STATUS_ERROR)
3112     xml_failure(parser);
3113 
3114   assert(expectedKeepValue > 0);
3115   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
3116     fail("enlarging buffer not failed");
3117 
3118   XML_ParserFree(parser);
3119 }
3120 END_TEST
3121 #endif // XML_CONTEXT_BYTES > 0
3122 
START_TEST(test_buffer_can_grow_to_max)3123 START_TEST(test_buffer_can_grow_to_max) {
3124   const char *const prefixes[] = {
3125       "",
3126       "<",
3127       "<x a='",
3128       "<doc><x a='",
3129       "<document><x a='",
3130       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
3131       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
3132       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
3133       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
3134       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
3135   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
3136   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
3137 #if defined(__MINGW32__) && ! defined(__MINGW64__)
3138   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
3139   // Can we make a big allocation?
3140   for (int i = 1; i <= 2; i++) {
3141     void *const big = malloc(maxbuf);
3142     if (big != NULL) {
3143       free(big);
3144       break;
3145     }
3146     // The big allocation failed. Let's be a little lenient.
3147     maxbuf = maxbuf / 2;
3148     fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf);
3149   }
3150 #endif
3151 
3152   for (int i = 0; i < num_prefixes; ++i) {
3153     set_subtest("\"%s\"", prefixes[i]);
3154     XML_Parser parser = XML_ParserCreate(NULL);
3155 #if XML_GE == 1
3156     assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
3157                 == XML_TRUE); // i.e. deactivate
3158 #endif
3159     const int prefix_len = (int)strlen(prefixes[i]);
3160     const enum XML_Status s
3161         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3162     if (s != XML_STATUS_OK)
3163       xml_failure(parser);
3164 
3165     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3166     // subtracting the whole prefix is easiest, and close enough.
3167     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3168     // The limit should be consistent; no prefix should allow us to
3169     // reach above the max buffer size.
3170     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3171     XML_ParserFree(parser);
3172   }
3173 }
3174 END_TEST
3175 
START_TEST(test_getbuffer_allocates_on_zero_len)3176 START_TEST(test_getbuffer_allocates_on_zero_len) {
3177   for (int first_len = 1; first_len >= 0; first_len--) {
3178     set_subtest("with len=%d first", first_len);
3179     XML_Parser parser = XML_ParserCreate(NULL);
3180     assert_true(parser != NULL);
3181     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3182     assert_true(XML_GetBuffer(parser, 0) != NULL);
3183     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3184       xml_failure(parser);
3185     XML_ParserFree(parser);
3186   }
3187 }
3188 END_TEST
3189 
3190 /* Test position information macros */
START_TEST(test_byte_info_at_end)3191 START_TEST(test_byte_info_at_end) {
3192   const char *text = "<doc></doc>";
3193 
3194   if (XML_GetCurrentByteIndex(g_parser) != -1
3195       || XML_GetCurrentByteCount(g_parser) != 0)
3196     fail("Byte index/count incorrect at start of parse");
3197   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3198       == XML_STATUS_ERROR)
3199     xml_failure(g_parser);
3200   /* At end, the count will be zero and the index the end of string */
3201   if (XML_GetCurrentByteCount(g_parser) != 0)
3202     fail("Terminal byte count incorrect");
3203   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3204     fail("Terminal byte index incorrect");
3205 }
3206 END_TEST
3207 
3208 /* Test position information from errors */
3209 #define PRE_ERROR_STR "<doc></"
3210 #define POST_ERROR_STR "wombat></doc>"
START_TEST(test_byte_info_at_error)3211 START_TEST(test_byte_info_at_error) {
3212   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3213 
3214   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3215       == XML_STATUS_OK)
3216     fail("Syntax error not faulted");
3217   if (XML_GetCurrentByteCount(g_parser) != 0)
3218     fail("Error byte count incorrect");
3219   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3220     fail("Error byte index incorrect");
3221 }
3222 END_TEST
3223 #undef PRE_ERROR_STR
3224 #undef POST_ERROR_STR
3225 
3226 /* Test position information in handler */
3227 #define START_ELEMENT "<e>"
3228 #define CDATA_TEXT "Hello"
3229 #define END_ELEMENT "</e>"
START_TEST(test_byte_info_at_cdata)3230 START_TEST(test_byte_info_at_cdata) {
3231   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3232   int offset, size;
3233   ByteTestData data;
3234 
3235   /* Check initial context is empty */
3236   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3237     fail("Unexpected context at start of parse");
3238 
3239   data.start_element_len = (int)strlen(START_ELEMENT);
3240   data.cdata_len = (int)strlen(CDATA_TEXT);
3241   data.total_string_len = (int)strlen(text);
3242   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3243   XML_SetUserData(g_parser, &data);
3244   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3245     xml_failure(g_parser);
3246 }
3247 END_TEST
3248 #undef START_ELEMENT
3249 #undef CDATA_TEXT
3250 #undef END_ELEMENT
3251 
3252 /* Test predefined entities are correctly recognised */
START_TEST(test_predefined_entities)3253 START_TEST(test_predefined_entities) {
3254   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3255   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3256   const XML_Char *result = XCS("<>&\"'");
3257   CharData storage;
3258 
3259   XML_SetDefaultHandler(g_parser, accumulate_characters);
3260   /* run_character_check uses XML_SetCharacterDataHandler(), which
3261    * unfortunately heads off a code path that we need to exercise.
3262    */
3263   CharData_Init(&storage);
3264   XML_SetUserData(g_parser, &storage);
3265   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3266       == XML_STATUS_ERROR)
3267     xml_failure(g_parser);
3268   /* The default handler doesn't translate the entities */
3269   CharData_CheckXMLChars(&storage, expected);
3270 
3271   /* Now try again and check the translation */
3272   XML_ParserReset(g_parser, NULL);
3273   run_character_check(text, result);
3274 }
3275 END_TEST
3276 
3277 /* Regression test that an invalid tag in an external parameter
3278  * reference in an external DTD is correctly faulted.
3279  *
3280  * Only a few specific tags are legal in DTDs ignoring comments and
3281  * processing instructions, all of which begin with an exclamation
3282  * mark.  "<el/>" is not one of them, so the parser should raise an
3283  * error on encountering it.
3284  */
START_TEST(test_invalid_tag_in_dtd)3285 START_TEST(test_invalid_tag_in_dtd) {
3286   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3287                      "<doc></doc>\n";
3288 
3289   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3290   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3291   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3292                  "Invalid tag IN DTD external param not rejected");
3293 }
3294 END_TEST
3295 
3296 /* Test entities not quite the predefined ones are not mis-recognised */
START_TEST(test_not_predefined_entities)3297 START_TEST(test_not_predefined_entities) {
3298   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3299                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3300   int i = 0;
3301 
3302   while (text[i] != NULL) {
3303     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3304                    "Undefined entity not rejected");
3305     XML_ParserReset(g_parser, NULL);
3306     i++;
3307   }
3308 }
3309 END_TEST
3310 
3311 /* Test conditional inclusion (IGNORE) */
START_TEST(test_ignore_section)3312 START_TEST(test_ignore_section) {
3313   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3314                      "<doc><e>&entity;</e></doc>";
3315   const XML_Char *expected
3316       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3317   CharData storage;
3318 
3319   CharData_Init(&storage);
3320   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3321   XML_SetUserData(g_parser, &storage);
3322   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3323   XML_SetDefaultHandler(g_parser, accumulate_characters);
3324   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3325   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3326   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3327   XML_SetStartElementHandler(g_parser, dummy_start_element);
3328   XML_SetEndElementHandler(g_parser, dummy_end_element);
3329   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3330       == XML_STATUS_ERROR)
3331     xml_failure(g_parser);
3332   CharData_CheckXMLChars(&storage, expected);
3333 }
3334 END_TEST
3335 
START_TEST(test_ignore_section_utf16)3336 START_TEST(test_ignore_section_utf16) {
3337   const char text[] =
3338       /* <!DOCTYPE d SYSTEM 's'> */
3339       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3340       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3341       /* <d><e>&en;</e></d> */
3342       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3343   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3344   CharData storage;
3345 
3346   CharData_Init(&storage);
3347   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3348   XML_SetUserData(g_parser, &storage);
3349   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3350   XML_SetDefaultHandler(g_parser, accumulate_characters);
3351   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3352   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3353   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3354   XML_SetStartElementHandler(g_parser, dummy_start_element);
3355   XML_SetEndElementHandler(g_parser, dummy_end_element);
3356   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3357       == XML_STATUS_ERROR)
3358     xml_failure(g_parser);
3359   CharData_CheckXMLChars(&storage, expected);
3360 }
3361 END_TEST
3362 
START_TEST(test_ignore_section_utf16_be)3363 START_TEST(test_ignore_section_utf16_be) {
3364   const char text[] =
3365       /* <!DOCTYPE d SYSTEM 's'> */
3366       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3367       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3368       /* <d><e>&en;</e></d> */
3369       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3370   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3371   CharData storage;
3372 
3373   CharData_Init(&storage);
3374   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3375   XML_SetUserData(g_parser, &storage);
3376   XML_SetExternalEntityRefHandler(g_parser,
3377                                   external_entity_load_ignore_utf16_be);
3378   XML_SetDefaultHandler(g_parser, accumulate_characters);
3379   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3380   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3381   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3382   XML_SetStartElementHandler(g_parser, dummy_start_element);
3383   XML_SetEndElementHandler(g_parser, dummy_end_element);
3384   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3385       == XML_STATUS_ERROR)
3386     xml_failure(g_parser);
3387   CharData_CheckXMLChars(&storage, expected);
3388 }
3389 END_TEST
3390 
3391 /* Test mis-formatted conditional exclusion */
START_TEST(test_bad_ignore_section)3392 START_TEST(test_bad_ignore_section) {
3393   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3394                      "<doc><e>&entity;</e></doc>";
3395   ExtFaults faults[]
3396       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3397           XML_ERROR_SYNTAX},
3398          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3399           XML_ERROR_INVALID_TOKEN},
3400          {/* FIrst two bytes of a three-byte char */
3401           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3402           XML_ERROR_PARTIAL_CHAR},
3403          {NULL, NULL, NULL, XML_ERROR_NONE}};
3404   ExtFaults *fault;
3405 
3406   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3407     set_subtest("%s", fault->parse_text);
3408     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3409     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3410     XML_SetUserData(g_parser, fault);
3411     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3412                    "Incomplete IGNORE section not failed");
3413     XML_ParserReset(g_parser, NULL);
3414   }
3415 }
3416 END_TEST
3417 
3418 struct bom_testdata {
3419   const char *external;
3420   int split;
3421   XML_Bool nested_callback_happened;
3422 };
3423 
3424 static int XMLCALL
external_bom_checker(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)3425 external_bom_checker(XML_Parser parser, const XML_Char *context,
3426                      const XML_Char *base, const XML_Char *systemId,
3427                      const XML_Char *publicId) {
3428   const char *text;
3429   UNUSED_P(base);
3430   UNUSED_P(systemId);
3431   UNUSED_P(publicId);
3432 
3433   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3434   if (ext_parser == NULL)
3435     fail("Could not create external entity parser");
3436 
3437   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3438     struct bom_testdata *const testdata
3439         = (struct bom_testdata *)XML_GetUserData(parser);
3440     const char *const external = testdata->external;
3441     const int split = testdata->split;
3442     testdata->nested_callback_happened = XML_TRUE;
3443 
3444     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3445         != XML_STATUS_OK) {
3446       xml_failure(ext_parser);
3447     }
3448     text = external + split; // the parse below will continue where we left off.
3449   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3450     text = "<!ELEMENT doc EMPTY>\n"
3451            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3452            "<!ENTITY % e2 '%e1;'>\n";
3453   } else {
3454     fail("unknown systemId");
3455   }
3456 
3457   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3458       != XML_STATUS_OK)
3459     xml_failure(ext_parser);
3460 
3461   XML_ParserFree(ext_parser);
3462   return XML_STATUS_OK;
3463 }
3464 
3465 /* regression test: BOM should be consumed when followed by a partial token. */
START_TEST(test_external_bom_consumed)3466 START_TEST(test_external_bom_consumed) {
3467   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3468                            "<doc></doc>\n";
3469   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3470   const int len = (int)strlen(external);
3471   for (int split = 0; split <= len; ++split) {
3472     set_subtest("split at byte %d", split);
3473 
3474     struct bom_testdata testdata;
3475     testdata.external = external;
3476     testdata.split = split;
3477     testdata.nested_callback_happened = XML_FALSE;
3478 
3479     XML_Parser parser = XML_ParserCreate(NULL);
3480     if (parser == NULL) {
3481       fail("Couldn't create parser");
3482     }
3483     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3484     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3485     XML_SetUserData(parser, &testdata);
3486     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3487         == XML_STATUS_ERROR)
3488       xml_failure(parser);
3489     if (! testdata.nested_callback_happened) {
3490       fail("ref handler not called");
3491     }
3492     XML_ParserFree(parser);
3493   }
3494 }
3495 END_TEST
3496 
3497 /* Test recursive parsing */
START_TEST(test_external_entity_values)3498 START_TEST(test_external_entity_values) {
3499   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3500                      "<doc></doc>\n";
3501   ExtFaults data_004_2[] = {
3502       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3503       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3504        XML_ERROR_INVALID_TOKEN},
3505       {"'wombat", "Unterminated string not faulted", NULL,
3506        XML_ERROR_UNCLOSED_TOKEN},
3507       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3508        XML_ERROR_PARTIAL_CHAR},
3509       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3510       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3511        XML_ERROR_XML_DECL},
3512       {/* UTF-8 BOM */
3513        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3514        XML_ERROR_NONE},
3515       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3516        "Invalid token after text declaration not faulted", NULL,
3517        XML_ERROR_INVALID_TOKEN},
3518       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3519        "Unterminated string after text decl not faulted", NULL,
3520        XML_ERROR_UNCLOSED_TOKEN},
3521       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3522        "Partial UTF-8 character after text decl not faulted", NULL,
3523        XML_ERROR_PARTIAL_CHAR},
3524       {"%e1;", "Recursive parameter entity not faulted", NULL,
3525        XML_ERROR_RECURSIVE_ENTITY_REF},
3526       {NULL, NULL, NULL, XML_ERROR_NONE}};
3527   int i;
3528 
3529   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3530     set_subtest("%s", data_004_2[i].parse_text);
3531     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3532     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3533     XML_SetUserData(g_parser, &data_004_2[i]);
3534     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3535         == XML_STATUS_ERROR)
3536       xml_failure(g_parser);
3537     XML_ParserReset(g_parser, NULL);
3538   }
3539 }
3540 END_TEST
3541 
3542 /* Test the recursive parse interacts with a not standalone handler */
START_TEST(test_ext_entity_not_standalone)3543 START_TEST(test_ext_entity_not_standalone) {
3544   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3545                      "<doc></doc>";
3546 
3547   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3548   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3549   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3550                  "Standalone rejection not caught");
3551 }
3552 END_TEST
3553 
START_TEST(test_ext_entity_value_abort)3554 START_TEST(test_ext_entity_value_abort) {
3555   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3556                      "<doc></doc>\n";
3557 
3558   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3559   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3560   g_resumable = XML_FALSE;
3561   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3562       == XML_STATUS_ERROR)
3563     xml_failure(g_parser);
3564 }
3565 END_TEST
3566 
START_TEST(test_bad_public_doctype)3567 START_TEST(test_bad_public_doctype) {
3568   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3569                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3570                      "<doc></doc>";
3571 
3572   /* Setting a handler provokes a particular code path */
3573   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3574                             dummy_end_doctype_handler);
3575   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3576 }
3577 END_TEST
3578 
3579 /* Test based on ibm/valid/P32/ibm32v04.xml */
START_TEST(test_attribute_enum_value)3580 START_TEST(test_attribute_enum_value) {
3581   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3582                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3583                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3584   ExtTest dtd_data
3585       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3586          "<!ELEMENT a EMPTY>\n"
3587          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3588          NULL, NULL};
3589   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3590 
3591   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3592   XML_SetUserData(g_parser, &dtd_data);
3593   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3594   /* An attribute list handler provokes a different code path */
3595   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3596   run_ext_character_check(text, &dtd_data, expected);
3597 }
3598 END_TEST
3599 
3600 /* Slightly bizarrely, the library seems to silently ignore entity
3601  * definitions for predefined entities, even when they are wrong.  The
3602  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3603  * to happen, so this is currently treated as acceptable.
3604  */
START_TEST(test_predefined_entity_redefinition)3605 START_TEST(test_predefined_entity_redefinition) {
3606   const char *text = "<!DOCTYPE doc [\n"
3607                      "<!ENTITY apos 'foo'>\n"
3608                      "]>\n"
3609                      "<doc>&apos;</doc>";
3610   run_character_check(text, XCS("'"));
3611 }
3612 END_TEST
3613 
3614 /* Test that the parser stops processing the DTD after an unresolved
3615  * parameter entity is encountered.
3616  */
START_TEST(test_dtd_stop_processing)3617 START_TEST(test_dtd_stop_processing) {
3618   const char *text = "<!DOCTYPE doc [\n"
3619                      "%foo;\n"
3620                      "<!ENTITY bar 'bas'>\n"
3621                      "]><doc/>";
3622 
3623   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3624   init_dummy_handlers();
3625   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3626       == XML_STATUS_ERROR)
3627     xml_failure(g_parser);
3628   if (get_dummy_handler_flags() != 0)
3629     fail("DTD processing still going after undefined PE");
3630 }
3631 END_TEST
3632 
3633 /* Test public notations with no system ID */
START_TEST(test_public_notation_no_sysid)3634 START_TEST(test_public_notation_no_sysid) {
3635   const char *text = "<!DOCTYPE doc [\n"
3636                      "<!NOTATION note PUBLIC 'foo'>\n"
3637                      "<!ELEMENT doc EMPTY>\n"
3638                      "]>\n<doc/>";
3639 
3640   init_dummy_handlers();
3641   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3642   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3643       == XML_STATUS_ERROR)
3644     xml_failure(g_parser);
3645   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3646     fail("Notation declaration handler not called");
3647 }
3648 END_TEST
3649 
START_TEST(test_nested_groups)3650 START_TEST(test_nested_groups) {
3651   const char *text
3652       = "<!DOCTYPE doc [\n"
3653         "<!ELEMENT doc "
3654         /* Sixteen elements per line */
3655         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3656         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3657         "))))))))))))))))))))))))))))))))>\n"
3658         "<!ELEMENT e EMPTY>"
3659         "]>\n"
3660         "<doc><e/></doc>";
3661   CharData storage;
3662 
3663   CharData_Init(&storage);
3664   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3665   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3666   XML_SetUserData(g_parser, &storage);
3667   init_dummy_handlers();
3668   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3669       == XML_STATUS_ERROR)
3670     xml_failure(g_parser);
3671   CharData_CheckXMLChars(&storage, XCS("doce"));
3672   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3673     fail("Element handler not fired");
3674 }
3675 END_TEST
3676 
START_TEST(test_group_choice)3677 START_TEST(test_group_choice) {
3678   const char *text = "<!DOCTYPE doc [\n"
3679                      "<!ELEMENT doc (a|b|c)+>\n"
3680                      "<!ELEMENT a EMPTY>\n"
3681                      "<!ELEMENT b (#PCDATA)>\n"
3682                      "<!ELEMENT c ANY>\n"
3683                      "]>\n"
3684                      "<doc>\n"
3685                      "<a/>\n"
3686                      "<b attr='foo'>This is a foo</b>\n"
3687                      "<c></c>\n"
3688                      "</doc>\n";
3689 
3690   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3691   init_dummy_handlers();
3692   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3693       == XML_STATUS_ERROR)
3694     xml_failure(g_parser);
3695   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3696     fail("Element handler flag not raised");
3697 }
3698 END_TEST
3699 
START_TEST(test_standalone_parameter_entity)3700 START_TEST(test_standalone_parameter_entity) {
3701   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3702                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3703                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3704                      "%entity;\n"
3705                      "]>\n"
3706                      "<doc></doc>";
3707   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3708 
3709   XML_SetUserData(g_parser, dtd_data);
3710   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3711   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3712   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3713       == XML_STATUS_ERROR)
3714     xml_failure(g_parser);
3715 }
3716 END_TEST
3717 
3718 /* Test skipping of parameter entity in an external DTD */
3719 /* Derived from ibm/invalid/P69/ibm69i01.xml */
START_TEST(test_skipped_parameter_entity)3720 START_TEST(test_skipped_parameter_entity) {
3721   const char *text = "<?xml version='1.0'?>\n"
3722                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3723                      "<!ELEMENT root (#PCDATA|a)* >\n"
3724                      "]>\n"
3725                      "<root></root>";
3726   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3727 
3728   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3729   XML_SetUserData(g_parser, &dtd_data);
3730   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3731   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3732   init_dummy_handlers();
3733   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3734       == XML_STATUS_ERROR)
3735     xml_failure(g_parser);
3736   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3737     fail("Skip handler not executed");
3738 }
3739 END_TEST
3740 
3741 /* Test recursive parameter entity definition rejected in external DTD */
START_TEST(test_recursive_external_parameter_entity)3742 START_TEST(test_recursive_external_parameter_entity) {
3743   const char *text = "<?xml version='1.0'?>\n"
3744                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3745                      "<!ELEMENT root (#PCDATA|a)* >\n"
3746                      "]>\n"
3747                      "<root></root>";
3748   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3749                         "Recursive external parameter entity not faulted", NULL,
3750                         XML_ERROR_RECURSIVE_ENTITY_REF};
3751 
3752   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3753   XML_SetUserData(g_parser, &dtd_data);
3754   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3755   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3756                  "Recursive external parameter not spotted");
3757 }
3758 END_TEST
3759 
3760 /* Test undefined parameter entity in external entity handler */
START_TEST(test_undefined_ext_entity_in_external_dtd)3761 START_TEST(test_undefined_ext_entity_in_external_dtd) {
3762   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3763                      "<doc></doc>\n";
3764 
3765   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3766   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3767   XML_SetUserData(g_parser, NULL);
3768   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3769       == XML_STATUS_ERROR)
3770     xml_failure(g_parser);
3771 
3772   /* Now repeat without the external entity ref handler invoking
3773    * another copy of itself.
3774    */
3775   XML_ParserReset(g_parser, NULL);
3776   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3777   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3778   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3779   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3780       == XML_STATUS_ERROR)
3781     xml_failure(g_parser);
3782 }
3783 END_TEST
3784 
3785 /* Test suspending the parse on receiving an XML declaration works */
START_TEST(test_suspend_xdecl)3786 START_TEST(test_suspend_xdecl) {
3787   const char *text = long_character_data_text;
3788 
3789   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3790   XML_SetUserData(g_parser, g_parser);
3791   g_resumable = XML_TRUE;
3792   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3793   // we won't know exactly how much input we actually managed to give Expat.
3794   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3795       != XML_STATUS_SUSPENDED)
3796     xml_failure(g_parser);
3797   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3798     xml_failure(g_parser);
3799   /* Attempt to start a new parse while suspended */
3800   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3801       != XML_STATUS_ERROR)
3802     fail("Attempt to parse while suspended not faulted");
3803   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3804     fail("Suspended parse not faulted with correct error");
3805 }
3806 END_TEST
3807 
3808 /* Test aborting the parse in an epilog works */
START_TEST(test_abort_epilog)3809 START_TEST(test_abort_epilog) {
3810   const char *text = "<doc></doc>\n\r\n";
3811   XML_Char trigger_char = XCS('\r');
3812 
3813   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3814   XML_SetUserData(g_parser, &trigger_char);
3815   g_resumable = XML_FALSE;
3816   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3817       != XML_STATUS_ERROR)
3818     fail("Abort not triggered");
3819   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3820     xml_failure(g_parser);
3821 }
3822 END_TEST
3823 
3824 /* Test a different code path for abort in the epilog */
START_TEST(test_abort_epilog_2)3825 START_TEST(test_abort_epilog_2) {
3826   const char *text = "<doc></doc>\n";
3827   XML_Char trigger_char = XCS('\n');
3828 
3829   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3830   XML_SetUserData(g_parser, &trigger_char);
3831   g_resumable = XML_FALSE;
3832   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3833 }
3834 END_TEST
3835 
3836 /* Test suspension from the epilog */
START_TEST(test_suspend_epilog)3837 START_TEST(test_suspend_epilog) {
3838   const char *text = "<doc></doc>\n";
3839   XML_Char trigger_char = XCS('\n');
3840 
3841   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3842   XML_SetUserData(g_parser, &trigger_char);
3843   g_resumable = XML_TRUE;
3844   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3845       != XML_STATUS_SUSPENDED)
3846     xml_failure(g_parser);
3847 }
3848 END_TEST
3849 
START_TEST(test_suspend_in_sole_empty_tag)3850 START_TEST(test_suspend_in_sole_empty_tag) {
3851   const char *text = "<doc/>";
3852   enum XML_Status rc;
3853 
3854   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3855   XML_SetUserData(g_parser, g_parser);
3856   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3857   if (rc == XML_STATUS_ERROR)
3858     xml_failure(g_parser);
3859   else if (rc != XML_STATUS_SUSPENDED)
3860     fail("Suspend not triggered");
3861   rc = XML_ResumeParser(g_parser);
3862   if (rc == XML_STATUS_ERROR)
3863     xml_failure(g_parser);
3864   else if (rc != XML_STATUS_OK)
3865     fail("Resume failed");
3866 }
3867 END_TEST
3868 
START_TEST(test_unfinished_epilog)3869 START_TEST(test_unfinished_epilog) {
3870   const char *text = "<doc></doc><";
3871 
3872   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3873                  "Incomplete epilog entry not faulted");
3874 }
3875 END_TEST
3876 
START_TEST(test_partial_char_in_epilog)3877 START_TEST(test_partial_char_in_epilog) {
3878   const char *text = "<doc></doc>\xe2\x82";
3879 
3880   /* First check that no fault is raised if the parse is not finished */
3881   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3882       == XML_STATUS_ERROR)
3883     xml_failure(g_parser);
3884   /* Now check that it is faulted once we finish */
3885   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3886     fail("Partial character in epilog not faulted");
3887   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3888     xml_failure(g_parser);
3889 }
3890 END_TEST
3891 
3892 /* Test resuming a parse suspended in entity substitution */
START_TEST(test_suspend_resume_internal_entity)3893 START_TEST(test_suspend_resume_internal_entity) {
3894   const char *text
3895       = "<!DOCTYPE doc [\n"
3896         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3897         "]>\n"
3898         "<doc>&foo;</doc>\n";
3899   const XML_Char *expected1 = XCS("Hi");
3900   const XML_Char *expected2 = XCS("HiHo");
3901   CharData storage;
3902 
3903   CharData_Init(&storage);
3904   XML_SetStartElementHandler(g_parser, start_element_suspender);
3905   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3906   XML_SetUserData(g_parser, &storage);
3907   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3908   // we won't know exactly how much input we actually managed to give Expat.
3909   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3910       != XML_STATUS_SUSPENDED)
3911     xml_failure(g_parser);
3912   CharData_CheckXMLChars(&storage, XCS(""));
3913   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3914     xml_failure(g_parser);
3915   CharData_CheckXMLChars(&storage, expected1);
3916   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3917     xml_failure(g_parser);
3918   CharData_CheckXMLChars(&storage, expected2);
3919 }
3920 END_TEST
3921 
START_TEST(test_suspend_resume_internal_entity_issue_629)3922 START_TEST(test_suspend_resume_internal_entity_issue_629) {
3923   const char *const text
3924       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3925         "<"
3926         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3927         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3928         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3929         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3930         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3931         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3932         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3933         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3934         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3935         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3936         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3937         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3938         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3939         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3940         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3941         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3942         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3943         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3944         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3945         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3946         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3947         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3948         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3949         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3950         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3951         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3952         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3953         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3954         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3955         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3956         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3957         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3958         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3959         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3960         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3961         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3962         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3963         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3964         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3965         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3966         "/>"
3967         "</b></a>";
3968   const size_t firstChunkSizeBytes = 54;
3969 
3970   XML_Parser parser = XML_ParserCreate(NULL);
3971   XML_SetUserData(parser, parser);
3972   XML_SetCommentHandler(parser, suspending_comment_handler);
3973 
3974   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3975       != XML_STATUS_SUSPENDED)
3976     xml_failure(parser);
3977   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3978     xml_failure(parser);
3979   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3980                               (int)(strlen(text) - firstChunkSizeBytes),
3981                               XML_TRUE)
3982       != XML_STATUS_OK)
3983     xml_failure(parser);
3984   XML_ParserFree(parser);
3985 }
3986 END_TEST
3987 
3988 /* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error)3989 START_TEST(test_resume_entity_with_syntax_error) {
3990   if (g_chunkSize != 0) {
3991     // this test does not use SINGLE_BYTES, because of suspension
3992     return;
3993   }
3994 
3995   const char *text = "<!DOCTYPE doc [\n"
3996                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3997                      "]>\n"
3998                      "<doc>&foo;</doc>\n";
3999 
4000   XML_SetStartElementHandler(g_parser, start_element_suspender);
4001   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
4002   // we won't know exactly how much input we actually managed to give Expat.
4003   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4004       != XML_STATUS_SUSPENDED)
4005     xml_failure(g_parser);
4006   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
4007     fail("Syntax error in entity not faulted");
4008   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
4009     xml_failure(g_parser);
4010 }
4011 END_TEST
4012 
4013 /* Test suspending and resuming in a parameter entity substitution */
START_TEST(test_suspend_resume_parameter_entity)4014 START_TEST(test_suspend_resume_parameter_entity) {
4015   const char *text = "<!DOCTYPE doc [\n"
4016                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
4017                      "%foo;\n"
4018                      "]>\n"
4019                      "<doc>Hello, world</doc>";
4020   const XML_Char *expected = XCS("Hello, world");
4021   CharData storage;
4022 
4023   CharData_Init(&storage);
4024   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4025   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
4026   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
4027   XML_SetUserData(g_parser, &storage);
4028   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
4029       != XML_STATUS_SUSPENDED)
4030     xml_failure(g_parser);
4031   CharData_CheckXMLChars(&storage, XCS(""));
4032   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
4033     xml_failure(g_parser);
4034   CharData_CheckXMLChars(&storage, expected);
4035 }
4036 END_TEST
4037 
4038 /* Test attempting to use parser after an error is faulted */
START_TEST(test_restart_on_error)4039 START_TEST(test_restart_on_error) {
4040   const char *text = "<$doc><doc></doc>";
4041 
4042   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4043       != XML_STATUS_ERROR)
4044     fail("Invalid tag name not faulted");
4045   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4046     xml_failure(g_parser);
4047   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
4048     fail("Restarting invalid parse not faulted");
4049   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
4050     xml_failure(g_parser);
4051 }
4052 END_TEST
4053 
4054 /* Test that angle brackets in an attribute default value are faulted */
START_TEST(test_reject_lt_in_attribute_value)4055 START_TEST(test_reject_lt_in_attribute_value) {
4056   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
4057                      "<doc></doc>";
4058 
4059   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4060                  "Bad attribute default not faulted");
4061 }
4062 END_TEST
4063 
START_TEST(test_reject_unfinished_param_in_att_value)4064 START_TEST(test_reject_unfinished_param_in_att_value) {
4065   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
4066                      "<doc></doc>";
4067 
4068   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4069                  "Bad attribute default not faulted");
4070 }
4071 END_TEST
4072 
START_TEST(test_trailing_cr_in_att_value)4073 START_TEST(test_trailing_cr_in_att_value) {
4074   const char *text = "<doc a='value\r'/>";
4075 
4076   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4077       == XML_STATUS_ERROR)
4078     xml_failure(g_parser);
4079 }
4080 END_TEST
4081 
4082 /* Try parsing a general entity within a parameter entity in a
4083  * standalone internal DTD.  Covers a corner case in the parser.
4084  */
START_TEST(test_standalone_internal_entity)4085 START_TEST(test_standalone_internal_entity) {
4086   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
4087                      "<!DOCTYPE doc [\n"
4088                      "  <!ELEMENT doc (#PCDATA)>\n"
4089                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
4090                      "  <!ENTITY ge 'AttDefaultValue'>\n"
4091                      "  %pe;\n"
4092                      "]>\n"
4093                      "<doc att2='any'/>";
4094 
4095   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4096   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4097       == XML_STATUS_ERROR)
4098     xml_failure(g_parser);
4099 }
4100 END_TEST
4101 
4102 /* Test that a reference to an unknown external entity is skipped */
START_TEST(test_skipped_external_entity)4103 START_TEST(test_skipped_external_entity) {
4104   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4105                      "<doc></doc>\n";
4106   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
4107                        "<!ENTITY % e2 '%e1;'>\n",
4108                        NULL, NULL};
4109 
4110   XML_SetUserData(g_parser, &test_data);
4111   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4112   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4113   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4114       == XML_STATUS_ERROR)
4115     xml_failure(g_parser);
4116 }
4117 END_TEST
4118 
4119 /* Test a different form of unknown external entity */
START_TEST(test_skipped_null_loaded_ext_entity)4120 START_TEST(test_skipped_null_loaded_ext_entity) {
4121   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4122                      "<doc />";
4123   ExtHdlrData test_data
4124       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4125          "<!ENTITY % pe2 '%pe1;'>\n"
4126          "%pe2;\n",
4127          external_entity_null_loader, NULL};
4128 
4129   XML_SetUserData(g_parser, &test_data);
4130   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4131   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4132   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4133       == XML_STATUS_ERROR)
4134     xml_failure(g_parser);
4135 }
4136 END_TEST
4137 
START_TEST(test_skipped_unloaded_ext_entity)4138 START_TEST(test_skipped_unloaded_ext_entity) {
4139   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
4140                      "<doc />";
4141   ExtHdlrData test_data
4142       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
4143          "<!ENTITY % pe2 '%pe1;'>\n"
4144          "%pe2;\n",
4145          NULL, NULL};
4146 
4147   XML_SetUserData(g_parser, &test_data);
4148   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4149   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
4150   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4151       == XML_STATUS_ERROR)
4152     xml_failure(g_parser);
4153 }
4154 END_TEST
4155 
4156 /* Test that a parameter entity value ending with a carriage return
4157  * has it translated internally into a newline.
4158  */
START_TEST(test_param_entity_with_trailing_cr)4159 START_TEST(test_param_entity_with_trailing_cr) {
4160 #define PARAM_ENTITY_NAME "pe"
4161 #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
4162   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
4163                      "<doc/>";
4164   ExtTest test_data
4165       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4166          "%" PARAM_ENTITY_NAME ";\n",
4167          NULL, NULL};
4168 
4169   XML_SetUserData(g_parser, &test_data);
4170   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4171   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4172   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4173   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4174                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4175   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4176       == XML_STATUS_ERROR)
4177     xml_failure(g_parser);
4178   int entity_match_flag = get_param_entity_match_flag();
4179   if (entity_match_flag == ENTITY_MATCH_FAIL)
4180     fail("Parameter entity CR->NEWLINE conversion failed");
4181   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4182     fail("Parameter entity not parsed");
4183 }
4184 #undef PARAM_ENTITY_NAME
4185 #undef PARAM_ENTITY_CORE_VALUE
4186 END_TEST
4187 
START_TEST(test_invalid_character_entity)4188 START_TEST(test_invalid_character_entity) {
4189   const char *text = "<!DOCTYPE doc [\n"
4190                      "  <!ENTITY entity '&#x110000;'>\n"
4191                      "]>\n"
4192                      "<doc>&entity;</doc>";
4193 
4194   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4195                  "Out of range character reference not faulted");
4196 }
4197 END_TEST
4198 
START_TEST(test_invalid_character_entity_2)4199 START_TEST(test_invalid_character_entity_2) {
4200   const char *text = "<!DOCTYPE doc [\n"
4201                      "  <!ENTITY entity '&#xg0;'>\n"
4202                      "]>\n"
4203                      "<doc>&entity;</doc>";
4204 
4205   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4206                  "Out of range character reference not faulted");
4207 }
4208 END_TEST
4209 
START_TEST(test_invalid_character_entity_3)4210 START_TEST(test_invalid_character_entity_3) {
4211   const char text[] =
4212       /* <!DOCTYPE doc [\n */
4213       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4214       /* U+0E04 = KHO KHWAI
4215        * U+0E08 = CHO CHAN */
4216       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4217       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4218       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4219       /* ]>\n */
4220       "\0]\0>\0\n"
4221       /* <doc>&entity;</doc> */
4222       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4223 
4224   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4225       != XML_STATUS_ERROR)
4226     fail("Invalid start of entity name not faulted");
4227   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4228     xml_failure(g_parser);
4229 }
4230 END_TEST
4231 
START_TEST(test_invalid_character_entity_4)4232 START_TEST(test_invalid_character_entity_4) {
4233   const char *text = "<!DOCTYPE doc [\n"
4234                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4235                      "]>\n"
4236                      "<doc>&entity;</doc>";
4237 
4238   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4239                  "Out of range character reference not faulted");
4240 }
4241 END_TEST
4242 
4243 /* Test that processing instructions are picked up by a default handler */
START_TEST(test_pi_handled_in_default)4244 START_TEST(test_pi_handled_in_default) {
4245   const char *text = "<?test processing instruction?>\n<doc/>";
4246   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4247   CharData storage;
4248 
4249   CharData_Init(&storage);
4250   XML_SetDefaultHandler(g_parser, accumulate_characters);
4251   XML_SetUserData(g_parser, &storage);
4252   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4253       == XML_STATUS_ERROR)
4254     xml_failure(g_parser);
4255   CharData_CheckXMLChars(&storage, expected);
4256 }
4257 END_TEST
4258 
4259 /* Test that comments are picked up by a default handler */
START_TEST(test_comment_handled_in_default)4260 START_TEST(test_comment_handled_in_default) {
4261   const char *text = "<!-- This is a comment -->\n<doc/>";
4262   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4263   CharData storage;
4264 
4265   CharData_Init(&storage);
4266   XML_SetDefaultHandler(g_parser, accumulate_characters);
4267   XML_SetUserData(g_parser, &storage);
4268   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4269       == XML_STATUS_ERROR)
4270     xml_failure(g_parser);
4271   CharData_CheckXMLChars(&storage, expected);
4272 }
4273 END_TEST
4274 
4275 /* Test PIs that look almost but not quite like XML declarations */
START_TEST(test_pi_yml)4276 START_TEST(test_pi_yml) {
4277   const char *text = "<?yml something like data?><doc/>";
4278   const XML_Char *expected = XCS("yml: something like data\n");
4279   CharData storage;
4280 
4281   CharData_Init(&storage);
4282   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4283   XML_SetUserData(g_parser, &storage);
4284   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4285       == XML_STATUS_ERROR)
4286     xml_failure(g_parser);
4287   CharData_CheckXMLChars(&storage, expected);
4288 }
4289 END_TEST
4290 
START_TEST(test_pi_xnl)4291 START_TEST(test_pi_xnl) {
4292   const char *text = "<?xnl nothing like data?><doc/>";
4293   const XML_Char *expected = XCS("xnl: nothing like data\n");
4294   CharData storage;
4295 
4296   CharData_Init(&storage);
4297   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4298   XML_SetUserData(g_parser, &storage);
4299   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4300       == XML_STATUS_ERROR)
4301     xml_failure(g_parser);
4302   CharData_CheckXMLChars(&storage, expected);
4303 }
4304 END_TEST
4305 
START_TEST(test_pi_xmm)4306 START_TEST(test_pi_xmm) {
4307   const char *text = "<?xmm everything like data?><doc/>";
4308   const XML_Char *expected = XCS("xmm: everything like data\n");
4309   CharData storage;
4310 
4311   CharData_Init(&storage);
4312   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4313   XML_SetUserData(g_parser, &storage);
4314   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4315       == XML_STATUS_ERROR)
4316     xml_failure(g_parser);
4317   CharData_CheckXMLChars(&storage, expected);
4318 }
4319 END_TEST
4320 
START_TEST(test_utf16_pi)4321 START_TEST(test_utf16_pi) {
4322   const char text[] =
4323       /* <?{KHO KHWAI}{CHO CHAN}?>
4324        * where {KHO KHWAI} = U+0E04
4325        * and   {CHO CHAN}  = U+0E08
4326        */
4327       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4328       /* <q/> */
4329       "<\0q\0/\0>\0";
4330 #ifdef XML_UNICODE
4331   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4332 #else
4333   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4334 #endif
4335   CharData storage;
4336 
4337   CharData_Init(&storage);
4338   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4339   XML_SetUserData(g_parser, &storage);
4340   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4341       == XML_STATUS_ERROR)
4342     xml_failure(g_parser);
4343   CharData_CheckXMLChars(&storage, expected);
4344 }
4345 END_TEST
4346 
START_TEST(test_utf16_be_pi)4347 START_TEST(test_utf16_be_pi) {
4348   const char text[] =
4349       /* <?{KHO KHWAI}{CHO CHAN}?>
4350        * where {KHO KHWAI} = U+0E04
4351        * and   {CHO CHAN}  = U+0E08
4352        */
4353       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4354       /* <q/> */
4355       "\0<\0q\0/\0>";
4356 #ifdef XML_UNICODE
4357   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4358 #else
4359   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4360 #endif
4361   CharData storage;
4362 
4363   CharData_Init(&storage);
4364   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4365   XML_SetUserData(g_parser, &storage);
4366   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4367       == XML_STATUS_ERROR)
4368     xml_failure(g_parser);
4369   CharData_CheckXMLChars(&storage, expected);
4370 }
4371 END_TEST
4372 
4373 /* Test that comments can be picked up and translated */
START_TEST(test_utf16_be_comment)4374 START_TEST(test_utf16_be_comment) {
4375   const char text[] =
4376       /* <!-- Comment A --> */
4377       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4378       /* <doc/> */
4379       "\0<\0d\0o\0c\0/\0>";
4380   const XML_Char *expected = XCS(" Comment A ");
4381   CharData storage;
4382 
4383   CharData_Init(&storage);
4384   XML_SetCommentHandler(g_parser, accumulate_comment);
4385   XML_SetUserData(g_parser, &storage);
4386   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4387       == XML_STATUS_ERROR)
4388     xml_failure(g_parser);
4389   CharData_CheckXMLChars(&storage, expected);
4390 }
4391 END_TEST
4392 
START_TEST(test_utf16_le_comment)4393 START_TEST(test_utf16_le_comment) {
4394   const char text[] =
4395       /* <!-- Comment B --> */
4396       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4397       /* <doc/> */
4398       "<\0d\0o\0c\0/\0>\0";
4399   const XML_Char *expected = XCS(" Comment B ");
4400   CharData storage;
4401 
4402   CharData_Init(&storage);
4403   XML_SetCommentHandler(g_parser, accumulate_comment);
4404   XML_SetUserData(g_parser, &storage);
4405   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4406       == XML_STATUS_ERROR)
4407     xml_failure(g_parser);
4408   CharData_CheckXMLChars(&storage, expected);
4409 }
4410 END_TEST
4411 
4412 /* Test that the unknown encoding handler with map entries that expect
4413  * conversion but no conversion function is faulted
4414  */
START_TEST(test_missing_encoding_conversion_fn)4415 START_TEST(test_missing_encoding_conversion_fn) {
4416   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4417                      "<doc>\x81</doc>";
4418 
4419   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4420   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4421    * character introducing a two-byte sequence.  For this, it
4422    * requires a convert function.  The above function call doesn't
4423    * pass one through, so when BadEncodingHandler actually gets
4424    * called it should supply an invalid encoding.
4425    */
4426   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4427                  "Encoding with missing convert() not faulted");
4428 }
4429 END_TEST
4430 
START_TEST(test_failing_encoding_conversion_fn)4431 START_TEST(test_failing_encoding_conversion_fn) {
4432   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4433                      "<doc>\x81</doc>";
4434 
4435   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4436   /* BadEncodingHandler sets up an encoding with every top-bit-set
4437    * character introducing a two-byte sequence.  For this, it
4438    * requires a convert function.  The above function call passes
4439    * one that insists all possible sequences are invalid anyway.
4440    */
4441   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4442                  "Encoding with failing convert() not faulted");
4443 }
4444 END_TEST
4445 
4446 /* Test unknown encoding conversions */
START_TEST(test_unknown_encoding_success)4447 START_TEST(test_unknown_encoding_success) {
4448   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4449                      /* Equivalent to <eoc>Hello, world</eoc> */
4450                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4451 
4452   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4453   run_character_check(text, XCS("Hello, world"));
4454 }
4455 END_TEST
4456 
4457 /* Test bad name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name)4458 START_TEST(test_unknown_encoding_bad_name) {
4459   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4460                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4461 
4462   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4463   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4464                  "Bad name start in unknown encoding not faulted");
4465 }
4466 END_TEST
4467 
4468 /* Test bad mid-name character in unknown encoding */
START_TEST(test_unknown_encoding_bad_name_2)4469 START_TEST(test_unknown_encoding_bad_name_2) {
4470   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4471                      "<d\xffoc>Hello, world</d\xffoc>";
4472 
4473   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4474   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4475                  "Bad name in unknown encoding not faulted");
4476 }
4477 END_TEST
4478 
4479 /* Test element name that is long enough to fill the conversion buffer
4480  * in an unknown encoding, finishing with an encoded character.
4481  */
START_TEST(test_unknown_encoding_long_name_1)4482 START_TEST(test_unknown_encoding_long_name_1) {
4483   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4484                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4485                      "Hi"
4486                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4487   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4488   CharData storage;
4489 
4490   CharData_Init(&storage);
4491   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4492   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4493   XML_SetUserData(g_parser, &storage);
4494   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4495       == XML_STATUS_ERROR)
4496     xml_failure(g_parser);
4497   CharData_CheckXMLChars(&storage, expected);
4498 }
4499 END_TEST
4500 
4501 /* Test element name that is long enough to fill the conversion buffer
4502  * in an unknown encoding, finishing with an simple character.
4503  */
START_TEST(test_unknown_encoding_long_name_2)4504 START_TEST(test_unknown_encoding_long_name_2) {
4505   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4506                      "<abcdefghabcdefghabcdefghijklmnop>"
4507                      "Hi"
4508                      "</abcdefghabcdefghabcdefghijklmnop>";
4509   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4510   CharData storage;
4511 
4512   CharData_Init(&storage);
4513   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4514   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4515   XML_SetUserData(g_parser, &storage);
4516   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4517       == XML_STATUS_ERROR)
4518     xml_failure(g_parser);
4519   CharData_CheckXMLChars(&storage, expected);
4520 }
4521 END_TEST
4522 
START_TEST(test_invalid_unknown_encoding)4523 START_TEST(test_invalid_unknown_encoding) {
4524   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4525                      "<doc>Hello world</doc>";
4526 
4527   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4528   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4529                  "Invalid unknown encoding not faulted");
4530 }
4531 END_TEST
4532 
START_TEST(test_unknown_ascii_encoding_ok)4533 START_TEST(test_unknown_ascii_encoding_ok) {
4534   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4535                      "<doc>Hello, world</doc>";
4536 
4537   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4538   run_character_check(text, XCS("Hello, world"));
4539 }
4540 END_TEST
4541 
START_TEST(test_unknown_ascii_encoding_fail)4542 START_TEST(test_unknown_ascii_encoding_fail) {
4543   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4544                      "<doc>Hello, \x80 world</doc>";
4545 
4546   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4547   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4548                  "Invalid character not faulted");
4549 }
4550 END_TEST
4551 
START_TEST(test_unknown_encoding_invalid_length)4552 START_TEST(test_unknown_encoding_invalid_length) {
4553   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4554                      "<doc>Hello, world</doc>";
4555 
4556   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4557   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4558                  "Invalid unknown encoding not faulted");
4559 }
4560 END_TEST
4561 
START_TEST(test_unknown_encoding_invalid_topbit)4562 START_TEST(test_unknown_encoding_invalid_topbit) {
4563   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4564                      "<doc>Hello, world</doc>";
4565 
4566   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4567   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4568                  "Invalid unknown encoding not faulted");
4569 }
4570 END_TEST
4571 
START_TEST(test_unknown_encoding_invalid_surrogate)4572 START_TEST(test_unknown_encoding_invalid_surrogate) {
4573   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4574                      "<doc>Hello, \x82 world</doc>";
4575 
4576   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4577   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4578                  "Invalid unknown encoding not faulted");
4579 }
4580 END_TEST
4581 
START_TEST(test_unknown_encoding_invalid_high)4582 START_TEST(test_unknown_encoding_invalid_high) {
4583   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4584                      "<doc>Hello, world</doc>";
4585 
4586   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4587   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4588                  "Invalid unknown encoding not faulted");
4589 }
4590 END_TEST
4591 
START_TEST(test_unknown_encoding_invalid_attr_value)4592 START_TEST(test_unknown_encoding_invalid_attr_value) {
4593   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4594                      "<doc attr='\xff\x30'/>";
4595 
4596   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4597   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4598                  "Invalid attribute valid not faulted");
4599 }
4600 END_TEST
4601 
START_TEST(test_unknown_encoding_user_data_primary)4602 START_TEST(test_unknown_encoding_user_data_primary) {
4603   // This test is based on ideas contributed by Artiphishell Inc.
4604   const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n"
4605                            "<root />\n";
4606   XML_Parser parser = XML_ParserCreate(NULL);
4607   XML_SetUnknownEncodingHandler(parser,
4608                                 user_data_checking_unknown_encoding_handler,
4609                                 (void *)(intptr_t)0xC0FFEE);
4610 
4611   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
4612               == XML_STATUS_OK);
4613 
4614   XML_ParserFree(parser);
4615 }
4616 END_TEST
4617 
START_TEST(test_unknown_encoding_user_data_secondary)4618 START_TEST(test_unknown_encoding_user_data_secondary) {
4619   // This test is based on ideas contributed by Artiphishell Inc.
4620   const char *const text_main = "<!DOCTYPE r [\n"
4621                                 "  <!ENTITY ext SYSTEM 'ext.ent'>\n"
4622                                 "]>\n"
4623                                 "<r>&ext;</r>\n";
4624   const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n"
4625                                     "<e>data</e>";
4626   ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL};
4627   XML_Parser parser = XML_ParserCreate(NULL);
4628   XML_SetExternalEntityRefHandler(parser, external_entity_loader2);
4629   XML_SetUnknownEncodingHandler(parser,
4630                                 user_data_checking_unknown_encoding_handler,
4631                                 (void *)(intptr_t)0xC0FFEE);
4632   XML_SetUserData(parser, &test_data);
4633 
4634   assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main),
4635                                       XML_TRUE)
4636               == XML_STATUS_OK);
4637 
4638   XML_ParserFree(parser);
4639 }
4640 END_TEST
4641 
4642 /* Test an external entity parser set to use latin-1 detects UTF-16
4643  * BOMs correctly.
4644  */
4645 /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
START_TEST(test_ext_entity_latin1_utf16le_bom)4646 START_TEST(test_ext_entity_latin1_utf16le_bom) {
4647   const char *text = "<!DOCTYPE doc [\n"
4648                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4649                      "]>\n"
4650                      "<doc>&en;</doc>";
4651   ExtTest2 test_data
4652       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4653          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4654           *   0x4c = L and 0x20 is a space
4655           */
4656          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4657 #ifdef XML_UNICODE
4658   const XML_Char *expected = XCS("\x00ff\x00feL ");
4659 #else
4660   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4661   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4662 #endif
4663   CharData storage;
4664 
4665   CharData_Init(&storage);
4666   test_data.storage = &storage;
4667   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4668   XML_SetUserData(g_parser, &test_data);
4669   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4670   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4671       == XML_STATUS_ERROR)
4672     xml_failure(g_parser);
4673   CharData_CheckXMLChars(&storage, expected);
4674 }
4675 END_TEST
4676 
START_TEST(test_ext_entity_latin1_utf16be_bom)4677 START_TEST(test_ext_entity_latin1_utf16be_bom) {
4678   const char *text = "<!DOCTYPE doc [\n"
4679                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4680                      "]>\n"
4681                      "<doc>&en;</doc>";
4682   ExtTest2 test_data
4683       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4684          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4685           *   0x4c = L and 0x20 is a space
4686           */
4687          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4688 #ifdef XML_UNICODE
4689   const XML_Char *expected = XCS("\x00fe\x00ff L");
4690 #else
4691   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4692   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4693 #endif
4694   CharData storage;
4695 
4696   CharData_Init(&storage);
4697   test_data.storage = &storage;
4698   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4699   XML_SetUserData(g_parser, &test_data);
4700   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4701   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4702       == XML_STATUS_ERROR)
4703     xml_failure(g_parser);
4704   CharData_CheckXMLChars(&storage, expected);
4705 }
4706 END_TEST
4707 
4708 /* Parsing the full buffer rather than a byte at a time makes a
4709  * difference to the encoding scanning code, so repeat the above tests
4710  * without breaking them down by byte.
4711  */
START_TEST(test_ext_entity_latin1_utf16le_bom2)4712 START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4713   const char *text = "<!DOCTYPE doc [\n"
4714                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4715                      "]>\n"
4716                      "<doc>&en;</doc>";
4717   ExtTest2 test_data
4718       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4719          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4720           *   0x4c = L and 0x20 is a space
4721           */
4722          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4723 #ifdef XML_UNICODE
4724   const XML_Char *expected = XCS("\x00ff\x00feL ");
4725 #else
4726   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4727   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4728 #endif
4729   CharData storage;
4730 
4731   CharData_Init(&storage);
4732   test_data.storage = &storage;
4733   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4734   XML_SetUserData(g_parser, &test_data);
4735   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4736   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4737       == XML_STATUS_ERROR)
4738     xml_failure(g_parser);
4739   CharData_CheckXMLChars(&storage, expected);
4740 }
4741 END_TEST
4742 
START_TEST(test_ext_entity_latin1_utf16be_bom2)4743 START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4744   const char *text = "<!DOCTYPE doc [\n"
4745                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4746                      "]>\n"
4747                      "<doc>&en;</doc>";
4748   ExtTest2 test_data
4749       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4750          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4751           *   0x4c = L and 0x20 is a space
4752           */
4753          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4754 #ifdef XML_UNICODE
4755   const XML_Char *expected = XCS("\x00fe\x00ff L");
4756 #else
4757   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4758   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4759 #endif
4760   CharData storage;
4761 
4762   CharData_Init(&storage);
4763   test_data.storage = &storage;
4764   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4765   XML_SetUserData(g_parser, &test_data);
4766   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4767   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4768       == XML_STATUS_ERROR)
4769     xml_failure(g_parser);
4770   CharData_CheckXMLChars(&storage, expected);
4771 }
4772 END_TEST
4773 
4774 /* Test little-endian UTF-16 given an explicit big-endian encoding */
START_TEST(test_ext_entity_utf16_be)4775 START_TEST(test_ext_entity_utf16_be) {
4776   const char *text = "<!DOCTYPE doc [\n"
4777                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4778                      "]>\n"
4779                      "<doc>&en;</doc>";
4780   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4781 #ifdef XML_UNICODE
4782   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4783 #else
4784   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4785                                  "\xe6\x94\x80"   /* U+6500 */
4786                                  "\xe2\xbc\x80"   /* U+2F00 */
4787                                  "\xe3\xb8\x80"); /* U+3E00 */
4788 #endif
4789   CharData storage;
4790 
4791   CharData_Init(&storage);
4792   test_data.storage = &storage;
4793   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4794   XML_SetUserData(g_parser, &test_data);
4795   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4796   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4797       == XML_STATUS_ERROR)
4798     xml_failure(g_parser);
4799   CharData_CheckXMLChars(&storage, expected);
4800 }
4801 END_TEST
4802 
4803 /* Test big-endian UTF-16 given an explicit little-endian encoding */
START_TEST(test_ext_entity_utf16_le)4804 START_TEST(test_ext_entity_utf16_le) {
4805   const char *text = "<!DOCTYPE doc [\n"
4806                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4807                      "]>\n"
4808                      "<doc>&en;</doc>";
4809   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4810 #ifdef XML_UNICODE
4811   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4812 #else
4813   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4814                                  "\xe6\x94\x80"   /* U+6500 */
4815                                  "\xe2\xbc\x80"   /* U+2F00 */
4816                                  "\xe3\xb8\x80"); /* U+3E00 */
4817 #endif
4818   CharData storage;
4819 
4820   CharData_Init(&storage);
4821   test_data.storage = &storage;
4822   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4823   XML_SetUserData(g_parser, &test_data);
4824   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4825   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4826       == XML_STATUS_ERROR)
4827     xml_failure(g_parser);
4828   CharData_CheckXMLChars(&storage, expected);
4829 }
4830 END_TEST
4831 
4832 /* Test little-endian UTF-16 given no explicit encoding.
4833  * The existing default encoding (UTF-8) is assumed to hold without a
4834  * BOM to contradict it, so the entity value will in fact provoke an
4835  * error because 0x00 is not a valid XML character.  We parse the
4836  * whole buffer in one go rather than feeding it in byte by byte to
4837  * exercise different code paths in the initial scanning routines.
4838  */
START_TEST(test_ext_entity_utf16_unknown)4839 START_TEST(test_ext_entity_utf16_unknown) {
4840   const char *text = "<!DOCTYPE doc [\n"
4841                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4842                      "]>\n"
4843                      "<doc>&en;</doc>";
4844   ExtFaults2 test_data
4845       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4846          XML_ERROR_INVALID_TOKEN};
4847 
4848   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4849   XML_SetUserData(g_parser, &test_data);
4850   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4851                  "Invalid character should not have been accepted");
4852 }
4853 END_TEST
4854 
4855 /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
START_TEST(test_ext_entity_utf8_non_bom)4856 START_TEST(test_ext_entity_utf8_non_bom) {
4857   const char *text = "<!DOCTYPE doc [\n"
4858                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4859                      "]>\n"
4860                      "<doc>&en;</doc>";
4861   ExtTest2 test_data
4862       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4863          3, NULL, NULL};
4864 #ifdef XML_UNICODE
4865   const XML_Char *expected = XCS("\xfec0");
4866 #else
4867   const XML_Char *expected = XCS("\xef\xbb\x80");
4868 #endif
4869   CharData storage;
4870 
4871   CharData_Init(&storage);
4872   test_data.storage = &storage;
4873   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4874   XML_SetUserData(g_parser, &test_data);
4875   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4876   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4877       == XML_STATUS_ERROR)
4878     xml_failure(g_parser);
4879   CharData_CheckXMLChars(&storage, expected);
4880 }
4881 END_TEST
4882 
4883 /* Test that UTF-8 in a CDATA section is correctly passed through */
START_TEST(test_utf8_in_cdata_section)4884 START_TEST(test_utf8_in_cdata_section) {
4885   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4886 #ifdef XML_UNICODE
4887   const XML_Char *expected = XCS("one \x00e9 two");
4888 #else
4889   const XML_Char *expected = XCS("one \xc3\xa9 two");
4890 #endif
4891 
4892   run_character_check(text, expected);
4893 }
4894 END_TEST
4895 
4896 /* Test that little-endian UTF-16 in a CDATA section is handled */
START_TEST(test_utf8_in_cdata_section_2)4897 START_TEST(test_utf8_in_cdata_section_2) {
4898   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4899 #ifdef XML_UNICODE
4900   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4901 #else
4902   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4903 #endif
4904 
4905   run_character_check(text, expected);
4906 }
4907 END_TEST
4908 
START_TEST(test_utf8_in_start_tags)4909 START_TEST(test_utf8_in_start_tags) {
4910   struct test_case {
4911     bool goodName;
4912     bool goodNameStart;
4913     const char *tagName;
4914   };
4915 
4916   // The idea with the tests below is this:
4917   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4918   // go to isNever and are hence not a concern.
4919   //
4920   // We start with a character that is a valid name character
4921   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4922   // single bits at places where (1) the result leaves the UTF-8 encoding space
4923   // and (2) we stay in the same n-byte sequence family.
4924   //
4925   // The flipped bits are highlighted in angle brackets in comments,
4926   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4927   // the most significant bit to 1 to leave UTF-8 encoding space.
4928   struct test_case cases[] = {
4929       // 1-byte UTF-8: [0xxx xxxx]
4930       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4931       {false, false, "\xBA"}, // [<1>011 1010]
4932       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4933       {false, false, "\xB9"}, // [<1>011 1001]
4934 
4935       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4936       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4937                                   // Arabic small waw U+06E5
4938       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4939       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4940       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4941       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4942                                   // combining char U+0301
4943       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4944       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4945       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4946 
4947       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4948       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4949                                       // Devanagari Letter A U+0905
4950       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4951       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4952       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4953       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4954       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4955       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4956                                       // combining char U+0901
4957       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4958       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4959       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4960       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4961       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4962   };
4963   const bool atNameStart[] = {true, false};
4964 
4965   size_t i = 0;
4966   char doc[1024];
4967   size_t failCount = 0;
4968 
4969   // we need all the bytes to be parsed, but we don't want the errors that can
4970   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4971   if (g_reparseDeferralEnabledDefault) {
4972     return;
4973   }
4974 
4975   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4976     size_t j = 0;
4977     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4978       const bool expectedSuccess
4979           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4980       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4981                cases[i].tagName);
4982       XML_Parser parser = XML_ParserCreate(NULL);
4983 
4984       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4985           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4986 
4987       bool success = true;
4988       if ((status == XML_STATUS_OK) != expectedSuccess) {
4989         success = false;
4990       }
4991       if ((status == XML_STATUS_ERROR)
4992           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4993         success = false;
4994       }
4995 
4996       if (! success) {
4997         fprintf(
4998             stderr,
4999             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
5000             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
5001             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
5002         failCount++;
5003       }
5004 
5005       XML_ParserFree(parser);
5006     }
5007   }
5008 
5009   if (failCount > 0) {
5010     fail("UTF-8 regression detected");
5011   }
5012 }
5013 END_TEST
5014 
5015 /* Test trailing spaces in elements are accepted */
START_TEST(test_trailing_spaces_in_elements)5016 START_TEST(test_trailing_spaces_in_elements) {
5017   const char *text = "<doc   >Hi</doc >";
5018   const XML_Char *expected = XCS("doc/doc");
5019   CharData storage;
5020 
5021   CharData_Init(&storage);
5022   XML_SetElementHandler(g_parser, record_element_start_handler,
5023                         record_element_end_handler);
5024   XML_SetUserData(g_parser, &storage);
5025   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5026       == XML_STATUS_ERROR)
5027     xml_failure(g_parser);
5028   CharData_CheckXMLChars(&storage, expected);
5029 }
5030 END_TEST
5031 
START_TEST(test_utf16_attribute)5032 START_TEST(test_utf16_attribute) {
5033   const char text[] =
5034       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
5035        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5036        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5037        */
5038       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
5039   const XML_Char *expected = XCS("a");
5040   CharData storage;
5041 
5042   CharData_Init(&storage);
5043   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5044   XML_SetUserData(g_parser, &storage);
5045   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5046       == XML_STATUS_ERROR)
5047     xml_failure(g_parser);
5048   CharData_CheckXMLChars(&storage, expected);
5049 }
5050 END_TEST
5051 
START_TEST(test_utf16_second_attr)5052 START_TEST(test_utf16_second_attr) {
5053   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
5054    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5055    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5056    */
5057   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
5058                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
5059   const XML_Char *expected = XCS("1");
5060   CharData storage;
5061 
5062   CharData_Init(&storage);
5063   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5064   XML_SetUserData(g_parser, &storage);
5065   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5066       == XML_STATUS_ERROR)
5067     xml_failure(g_parser);
5068   CharData_CheckXMLChars(&storage, expected);
5069 }
5070 END_TEST
5071 
START_TEST(test_attr_after_solidus)5072 START_TEST(test_attr_after_solidus) {
5073   const char *text = "<doc attr1='a' / attr2='b'>";
5074 
5075   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
5076 }
5077 END_TEST
5078 
START_TEST(test_utf16_pe)5079 START_TEST(test_utf16_pe) {
5080   /* <!DOCTYPE doc [
5081    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
5082    * %{KHO KHWAI}{CHO CHAN};
5083    * ]>
5084    * <doc></doc>
5085    *
5086    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5087    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5088    */
5089   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
5090                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
5091                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
5092                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
5093                       "\0%\x0e\x04\x0e\x08\0;\0\n"
5094                       "\0]\0>\0\n"
5095                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
5096 #ifdef XML_UNICODE
5097   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
5098 #else
5099   const XML_Char *expected
5100       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
5101 #endif
5102   CharData storage;
5103 
5104   CharData_Init(&storage);
5105   XML_SetUserData(g_parser, &storage);
5106   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
5107   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5108       == XML_STATUS_ERROR)
5109     xml_failure(g_parser);
5110   CharData_CheckXMLChars(&storage, expected);
5111 }
5112 END_TEST
5113 
5114 /* Test that duff attribute description keywords are rejected */
START_TEST(test_bad_attr_desc_keyword)5115 START_TEST(test_bad_attr_desc_keyword) {
5116   const char *text = "<!DOCTYPE doc [\n"
5117                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
5118                      "]>\n"
5119                      "<doc />";
5120 
5121   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5122                  "Bad keyword !IMPLIED not faulted");
5123 }
5124 END_TEST
5125 
5126 /* Test that an invalid attribute description keyword consisting of
5127  * UTF-16 characters with their top bytes non-zero are correctly
5128  * faulted
5129  */
START_TEST(test_bad_attr_desc_keyword_utf16)5130 START_TEST(test_bad_attr_desc_keyword_utf16) {
5131   /* <!DOCTYPE d [
5132    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
5133    * ]><d/>
5134    *
5135    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
5136    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
5137    */
5138   const char text[]
5139       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5140         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
5141         "\0#\x0e\x04\x0e\x08\0>\0\n"
5142         "\0]\0>\0<\0d\0/\0>";
5143 
5144   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5145       != XML_STATUS_ERROR)
5146     fail("Invalid UTF16 attribute keyword not faulted");
5147   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5148     xml_failure(g_parser);
5149 }
5150 END_TEST
5151 
5152 /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
5153  * using prefix-encoding (see above) to trigger specific code paths
5154  */
START_TEST(test_bad_doctype)5155 START_TEST(test_bad_doctype) {
5156   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
5157                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
5158 
5159   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5160   expect_failure(text, XML_ERROR_SYNTAX,
5161                  "Invalid bytes in DOCTYPE not faulted");
5162 }
5163 END_TEST
5164 
START_TEST(test_bad_doctype_utf8)5165 START_TEST(test_bad_doctype_utf8) {
5166   const char *text = "<!DOCTYPE \xDB\x25"
5167                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
5168   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5169                  "Invalid UTF-8 in DOCTYPE not faulted");
5170 }
5171 END_TEST
5172 
START_TEST(test_bad_doctype_utf16)5173 START_TEST(test_bad_doctype_utf16) {
5174   const char text[] =
5175       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
5176        *
5177        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
5178        * (name character) but not a valid letter (name start character)
5179        */
5180       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
5181       "\x06\xf2"
5182       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
5183 
5184   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5185       != XML_STATUS_ERROR)
5186     fail("Invalid bytes in DOCTYPE not faulted");
5187   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
5188     xml_failure(g_parser);
5189 }
5190 END_TEST
5191 
START_TEST(test_bad_doctype_plus)5192 START_TEST(test_bad_doctype_plus) {
5193   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
5194                      "<1+>&foo;</1+>";
5195 
5196   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5197                  "'+' in document name not faulted");
5198 }
5199 END_TEST
5200 
START_TEST(test_bad_doctype_star)5201 START_TEST(test_bad_doctype_star) {
5202   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
5203                      "<1*>&foo;</1*>";
5204 
5205   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5206                  "'*' in document name not faulted");
5207 }
5208 END_TEST
5209 
START_TEST(test_bad_doctype_query)5210 START_TEST(test_bad_doctype_query) {
5211   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5212                      "<1?>&foo;</1?>";
5213 
5214   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5215                  "'?' in document name not faulted");
5216 }
5217 END_TEST
5218 
START_TEST(test_unknown_encoding_bad_ignore)5219 START_TEST(test_unknown_encoding_bad_ignore) {
5220   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5221                      "<!DOCTYPE doc SYSTEM 'foo'>"
5222                      "<doc><e>&entity;</e></doc>";
5223   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5224                      "Invalid character not faulted", XCS("prefix-conv"),
5225                      XML_ERROR_INVALID_TOKEN};
5226 
5227   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5228   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5229   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5230   XML_SetUserData(g_parser, &fault);
5231   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5232                  "Bad IGNORE section with unknown encoding not failed");
5233 }
5234 END_TEST
5235 
START_TEST(test_entity_in_utf16_be_attr)5236 START_TEST(test_entity_in_utf16_be_attr) {
5237   const char text[] =
5238       /* <e a='&#228; &#x00E4;'></e> */
5239       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5240       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5241 #ifdef XML_UNICODE
5242   const XML_Char *expected = XCS("\x00e4 \x00e4");
5243 #else
5244   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5245 #endif
5246   CharData storage;
5247 
5248   CharData_Init(&storage);
5249   XML_SetUserData(g_parser, &storage);
5250   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5251   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5252       == XML_STATUS_ERROR)
5253     xml_failure(g_parser);
5254   CharData_CheckXMLChars(&storage, expected);
5255 }
5256 END_TEST
5257 
START_TEST(test_entity_in_utf16_le_attr)5258 START_TEST(test_entity_in_utf16_le_attr) {
5259   const char text[] =
5260       /* <e a='&#228; &#x00E4;'></e> */
5261       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5262       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5263 #ifdef XML_UNICODE
5264   const XML_Char *expected = XCS("\x00e4 \x00e4");
5265 #else
5266   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5267 #endif
5268   CharData storage;
5269 
5270   CharData_Init(&storage);
5271   XML_SetUserData(g_parser, &storage);
5272   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5273   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5274       == XML_STATUS_ERROR)
5275     xml_failure(g_parser);
5276   CharData_CheckXMLChars(&storage, expected);
5277 }
5278 END_TEST
5279 
START_TEST(test_entity_public_utf16_be)5280 START_TEST(test_entity_public_utf16_be) {
5281   const char text[] =
5282       /* <!DOCTYPE d [ */
5283       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5284       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5285       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5286       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5287       /* %e; */
5288       "\0%\0e\0;\0\n"
5289       /* ]> */
5290       "\0]\0>\0\n"
5291       /* <d>&j;</d> */
5292       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5293   ExtTest2 test_data
5294       = {/* <!ENTITY j 'baz'> */
5295          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5296   const XML_Char *expected = XCS("baz");
5297   CharData storage;
5298 
5299   CharData_Init(&storage);
5300   test_data.storage = &storage;
5301   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5302   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5303   XML_SetUserData(g_parser, &test_data);
5304   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5305   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5306       == XML_STATUS_ERROR)
5307     xml_failure(g_parser);
5308   CharData_CheckXMLChars(&storage, expected);
5309 }
5310 END_TEST
5311 
START_TEST(test_entity_public_utf16_le)5312 START_TEST(test_entity_public_utf16_le) {
5313   const char text[] =
5314       /* <!DOCTYPE d [ */
5315       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5316       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5317       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5318       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5319       /* %e; */
5320       "%\0e\0;\0\n\0"
5321       /* ]> */
5322       "]\0>\0\n\0"
5323       /* <d>&j;</d> */
5324       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5325   ExtTest2 test_data
5326       = {/* <!ENTITY j 'baz'> */
5327          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5328   const XML_Char *expected = XCS("baz");
5329   CharData storage;
5330 
5331   CharData_Init(&storage);
5332   test_data.storage = &storage;
5333   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5334   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5335   XML_SetUserData(g_parser, &test_data);
5336   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5337   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5338       == XML_STATUS_ERROR)
5339     xml_failure(g_parser);
5340   CharData_CheckXMLChars(&storage, expected);
5341 }
5342 END_TEST
5343 
5344 /* Test that a doctype with neither an internal nor external subset is
5345  * faulted
5346  */
START_TEST(test_short_doctype)5347 START_TEST(test_short_doctype) {
5348   const char *text = "<!DOCTYPE doc></doc>";
5349   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5350                  "DOCTYPE without subset not rejected");
5351 }
5352 END_TEST
5353 
START_TEST(test_short_doctype_2)5354 START_TEST(test_short_doctype_2) {
5355   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5356   expect_failure(text, XML_ERROR_SYNTAX,
5357                  "DOCTYPE without Public ID not rejected");
5358 }
5359 END_TEST
5360 
START_TEST(test_short_doctype_3)5361 START_TEST(test_short_doctype_3) {
5362   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5363   expect_failure(text, XML_ERROR_SYNTAX,
5364                  "DOCTYPE without System ID not rejected");
5365 }
5366 END_TEST
5367 
START_TEST(test_long_doctype)5368 START_TEST(test_long_doctype) {
5369   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5370   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5371 }
5372 END_TEST
5373 
START_TEST(test_bad_entity)5374 START_TEST(test_bad_entity) {
5375   const char *text = "<!DOCTYPE doc [\n"
5376                      "  <!ENTITY foo PUBLIC>\n"
5377                      "]>\n"
5378                      "<doc/>";
5379   expect_failure(text, XML_ERROR_SYNTAX,
5380                  "ENTITY without Public ID is not rejected");
5381 }
5382 END_TEST
5383 
5384 /* Test unquoted value is faulted */
START_TEST(test_bad_entity_2)5385 START_TEST(test_bad_entity_2) {
5386   const char *text = "<!DOCTYPE doc [\n"
5387                      "  <!ENTITY % foo bar>\n"
5388                      "]>\n"
5389                      "<doc/>";
5390   expect_failure(text, XML_ERROR_SYNTAX,
5391                  "ENTITY without Public ID is not rejected");
5392 }
5393 END_TEST
5394 
START_TEST(test_bad_entity_3)5395 START_TEST(test_bad_entity_3) {
5396   const char *text = "<!DOCTYPE doc [\n"
5397                      "  <!ENTITY % foo PUBLIC>\n"
5398                      "]>\n"
5399                      "<doc/>";
5400   expect_failure(text, XML_ERROR_SYNTAX,
5401                  "Parameter ENTITY without Public ID is not rejected");
5402 }
5403 END_TEST
5404 
START_TEST(test_bad_entity_4)5405 START_TEST(test_bad_entity_4) {
5406   const char *text = "<!DOCTYPE doc [\n"
5407                      "  <!ENTITY % foo SYSTEM>\n"
5408                      "]>\n"
5409                      "<doc/>";
5410   expect_failure(text, XML_ERROR_SYNTAX,
5411                  "Parameter ENTITY without Public ID is not rejected");
5412 }
5413 END_TEST
5414 
START_TEST(test_bad_notation)5415 START_TEST(test_bad_notation) {
5416   const char *text = "<!DOCTYPE doc [\n"
5417                      "  <!NOTATION n SYSTEM>\n"
5418                      "]>\n"
5419                      "<doc/>";
5420   expect_failure(text, XML_ERROR_SYNTAX,
5421                  "Notation without System ID is not rejected");
5422 }
5423 END_TEST
5424 
5425 /* Test for issue #11, wrongly suppressed default handler */
START_TEST(test_default_doctype_handler)5426 START_TEST(test_default_doctype_handler) {
5427   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5428                      "  <!ENTITY foo 'bar'>\n"
5429                      "]>\n"
5430                      "<doc>&foo;</doc>";
5431   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5432                               {XCS("'test.dtd'"), 10, XML_FALSE},
5433                               {NULL, 0, XML_FALSE}};
5434   int i;
5435 
5436   XML_SetUserData(g_parser, &test_data);
5437   XML_SetDefaultHandler(g_parser, checking_default_handler);
5438   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5439   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5440       == XML_STATUS_ERROR)
5441     xml_failure(g_parser);
5442   for (i = 0; test_data[i].expected != NULL; i++)
5443     if (! test_data[i].seen)
5444       fail("Default handler not run for public !DOCTYPE");
5445 }
5446 END_TEST
5447 
START_TEST(test_empty_element_abort)5448 START_TEST(test_empty_element_abort) {
5449   const char *text = "<abort/>";
5450 
5451   XML_SetStartElementHandler(g_parser, start_element_suspender);
5452   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5453       != XML_STATUS_ERROR)
5454     fail("Expected to error on abort");
5455 }
5456 END_TEST
5457 
5458 /* Regression test for GH issue #612: unfinished m_declAttributeType
5459  * allocation in ->m_tempPool can corrupt following allocation.
5460  */
START_TEST(test_pool_integrity_with_unfinished_attr)5461 START_TEST(test_pool_integrity_with_unfinished_attr) {
5462   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5463                      "<!DOCTYPE foo [\n"
5464                      "<!ELEMENT foo ANY>\n"
5465                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5466                      "%entp;\n"
5467                      "]>\n"
5468                      "<a></a>\n";
5469   const XML_Char *expected = XCS("COMMENT");
5470   CharData storage;
5471 
5472   CharData_Init(&storage);
5473   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5474   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5475   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5476   XML_SetCommentHandler(g_parser, accumulate_comment);
5477   XML_SetUserData(g_parser, &storage);
5478   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5479       == XML_STATUS_ERROR)
5480     xml_failure(g_parser);
5481   CharData_CheckXMLChars(&storage, expected);
5482 }
5483 END_TEST
5484 
5485 /* Test a possible early return location in internalEntityProcessor */
START_TEST(test_entity_ref_no_elements)5486 START_TEST(test_entity_ref_no_elements) {
5487   const char *const text = "<!DOCTYPE foo [\n"
5488                            "<!ENTITY e1 \"test\">\n"
5489                            "]> <foo>&e1;"; // intentionally missing newline
5490 
5491   XML_Parser parser = XML_ParserCreate(NULL);
5492   assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5493               == XML_STATUS_ERROR);
5494   assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
5495   XML_ParserFree(parser);
5496 }
5497 END_TEST
5498 
5499 /* Tests if chained entity references lead to unbounded recursion */
START_TEST(test_deep_nested_entity)5500 START_TEST(test_deep_nested_entity) {
5501   const size_t N_LINES = 60000;
5502   const size_t SIZE_PER_LINE = 50;
5503 
5504   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5505   if (text == NULL) {
5506     fail("malloc failed");
5507   }
5508 
5509   char *textPtr = text;
5510 
5511   // Create the XML
5512   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5513                       "<!DOCTYPE foo [\n"
5514                       "	<!ENTITY s0 'deepText'>\n");
5515 
5516   for (size_t i = 1; i < N_LINES; ++i) {
5517     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5518                         (long unsigned)i, (long unsigned)(i - 1));
5519   }
5520 
5521   snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
5522            (long unsigned)(N_LINES - 1));
5523 
5524   const XML_Char *const expected = XCS("deepText");
5525 
5526   CharData storage;
5527   CharData_Init(&storage);
5528 
5529   XML_Parser parser = XML_ParserCreate(NULL);
5530 
5531   XML_SetCharacterDataHandler(parser, accumulate_characters);
5532   XML_SetUserData(parser, &storage);
5533 
5534   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5535       == XML_STATUS_ERROR)
5536     xml_failure(parser);
5537 
5538   CharData_CheckXMLChars(&storage, expected);
5539   XML_ParserFree(parser);
5540   free(text);
5541 }
5542 END_TEST
5543 
5544 /* Tests if chained entity references in attributes
5545 lead to unbounded recursion */
START_TEST(test_deep_nested_attribute_entity)5546 START_TEST(test_deep_nested_attribute_entity) {
5547   const size_t N_LINES = 60000;
5548   const size_t SIZE_PER_LINE = 100;
5549 
5550   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5551   if (text == NULL) {
5552     fail("malloc failed");
5553   }
5554 
5555   char *textPtr = text;
5556 
5557   // Create the XML
5558   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5559                       "<!DOCTYPE foo [\n"
5560                       "	<!ENTITY s0 'deepText'>\n");
5561 
5562   for (size_t i = 1; i < N_LINES; ++i) {
5563     textPtr += snprintf(textPtr, SIZE_PER_LINE, "  <!ENTITY s%lu '&s%lu;'>\n",
5564                         (long unsigned)i, (long unsigned)(i - 1));
5565   }
5566 
5567   snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
5568            (long unsigned)(N_LINES - 1));
5569 
5570   AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
5571   ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
5572   info[0].attributes = doc_info;
5573 
5574   XML_Parser parser = XML_ParserCreate(NULL);
5575   ParserAndElementInfo parserPlusElemenInfo = {parser, info};
5576 
5577   XML_SetStartElementHandler(parser, counting_start_element_handler);
5578   XML_SetUserData(parser, &parserPlusElemenInfo);
5579 
5580   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5581       == XML_STATUS_ERROR)
5582     xml_failure(parser);
5583 
5584   XML_ParserFree(parser);
5585   free(text);
5586 }
5587 END_TEST
5588 
START_TEST(test_deep_nested_entity_delayed_interpretation)5589 START_TEST(test_deep_nested_entity_delayed_interpretation) {
5590   const size_t N_LINES = 70000;
5591   const size_t SIZE_PER_LINE = 100;
5592 
5593   char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
5594   if (text == NULL) {
5595     fail("malloc failed");
5596   }
5597 
5598   char *textPtr = text;
5599 
5600   // Create the XML
5601   textPtr += snprintf(textPtr, SIZE_PER_LINE,
5602                       "<!DOCTYPE foo [\n"
5603                       "	<!ENTITY %% s0 'deepText'>\n");
5604 
5605   for (size_t i = 1; i < N_LINES; ++i) {
5606     textPtr += snprintf(textPtr, SIZE_PER_LINE,
5607                         "  <!ENTITY %% s%lu '&#37;s%lu;'>\n", (long unsigned)i,
5608                         (long unsigned)(i - 1));
5609   }
5610 
5611   snprintf(textPtr, SIZE_PER_LINE,
5612            "  <!ENTITY %% define_g \"<!ENTITY g '&#37;s%lu;'>\">\n"
5613            "  %%define_g;\n"
5614            "]>\n"
5615            "<foo/>\n",
5616            (long unsigned)(N_LINES - 1));
5617 
5618   XML_Parser parser = XML_ParserCreate(NULL);
5619 
5620   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5621   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
5622       == XML_STATUS_ERROR)
5623     xml_failure(parser);
5624 
5625   XML_ParserFree(parser);
5626   free(text);
5627 }
5628 END_TEST
5629 
START_TEST(test_nested_entity_suspend)5630 START_TEST(test_nested_entity_suspend) {
5631   const char *const text = "<!DOCTYPE a [\n"
5632                            "  <!ENTITY e1 '<!--e1-->'>\n"
5633                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5634                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5635                            "]>\n"
5636                            "<a><!--start-->&e3;<!--end--></a>";
5637   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5638       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5639   CharData storage;
5640   CharData_Init(&storage);
5641   XML_Parser parser = XML_ParserCreate(NULL);
5642   ParserPlusStorage parserPlusStorage = {parser, &storage};
5643 
5644   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5645   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5646   XML_SetUserData(parser, &parserPlusStorage);
5647 
5648   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5649   while (status == XML_STATUS_SUSPENDED) {
5650     status = XML_ResumeParser(parser);
5651   }
5652   if (status != XML_STATUS_OK)
5653     xml_failure(parser);
5654 
5655   CharData_CheckXMLChars(&storage, expected);
5656   XML_ParserFree(parser);
5657 }
5658 END_TEST
5659 
START_TEST(test_nested_entity_suspend_2)5660 START_TEST(test_nested_entity_suspend_2) {
5661   const char *const text = "<!DOCTYPE doc [\n"
5662                            "  <!ENTITY ge1 'head1Ztail1'>\n"
5663                            "  <!ENTITY ge2 'head2&ge1;tail2'>\n"
5664                            "  <!ENTITY ge3 'head3&ge2;tail3'>\n"
5665                            "]>\n"
5666                            "<doc>&ge3;</doc>";
5667   const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
5668       XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
5669   CharData storage;
5670   CharData_Init(&storage);
5671   XML_Parser parser = XML_ParserCreate(NULL);
5672   ParserPlusStorage parserPlusStorage = {parser, &storage};
5673 
5674   XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
5675   XML_SetUserData(parser, &parserPlusStorage);
5676 
5677   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5678   while (status == XML_STATUS_SUSPENDED) {
5679     status = XML_ResumeParser(parser);
5680   }
5681   if (status != XML_STATUS_OK)
5682     xml_failure(parser);
5683 
5684   CharData_CheckXMLChars(&storage, expected);
5685   XML_ParserFree(parser);
5686 }
5687 END_TEST
5688 
5689 /* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly)5690 START_TEST(test_big_tokens_scale_linearly) {
5691   const struct {
5692     const char *pre;
5693     const char *post;
5694   } text[] = {
5695       {"<a>", "</a>"},                      // assumed good, used as baseline
5696       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5697       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5698       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5699       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5700   };
5701   const int num_cases = sizeof(text) / sizeof(text[0]);
5702   char aaaaaa[4096];
5703   const int fillsize = (int)sizeof(aaaaaa);
5704   const int fillcount = 100;
5705   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5706   const unsigned max_factor = 4;
5707   const unsigned max_scanned = max_factor * approx_bytes;
5708 
5709   memset(aaaaaa, 'a', fillsize);
5710 
5711   if (! g_reparseDeferralEnabledDefault) {
5712     return; // heuristic is disabled; we would get O(n^2) and fail.
5713   }
5714 
5715   for (int i = 0; i < num_cases; ++i) {
5716     XML_Parser parser = XML_ParserCreate(NULL);
5717     assert_true(parser != NULL);
5718     enum XML_Status status;
5719     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5720 
5721     // parse the start text
5722     g_bytesScanned = 0;
5723     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5724                                      (int)strlen(text[i].pre), XML_FALSE);
5725     if (status != XML_STATUS_OK) {
5726       xml_failure(parser);
5727     }
5728 
5729     // parse lots of 'a', failing the test early if it takes too long
5730     unsigned past_max_count = 0;
5731     for (int f = 0; f < fillcount; ++f) {
5732       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5733       if (status != XML_STATUS_OK) {
5734         xml_failure(parser);
5735       }
5736       if (g_bytesScanned > max_scanned) {
5737         // We're not done, and have already passed the limit -- the test will
5738         // definitely fail. This block allows us to save time by failing early.
5739         const unsigned pushed
5740             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5741         fprintf(
5742             stderr,
5743             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5744             f + 1, fillcount, pushed, g_bytesScanned,
5745             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5746         past_max_count++;
5747         // We are failing, but allow a few log prints first. If we don't reach
5748         // a count of five, the test will fail after the loop instead.
5749         assert_true(past_max_count < 5);
5750       }
5751     }
5752 
5753     // parse the end text
5754     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5755                                      (int)strlen(text[i].post), XML_TRUE);
5756     if (status != XML_STATUS_OK) {
5757       xml_failure(parser);
5758     }
5759 
5760     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5761     if (g_bytesScanned > max_scanned) {
5762       fprintf(
5763           stderr,
5764           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5765           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5766           max_factor);
5767       fail("scanned too many bytes");
5768     }
5769 
5770     XML_ParserFree(parser);
5771   }
5772 }
5773 END_TEST
5774 
START_TEST(test_set_reparse_deferral)5775 START_TEST(test_set_reparse_deferral) {
5776   const char *const pre = "<d>";
5777   const char *const start = "<x attr='";
5778   const char *const end = "'></x>";
5779   char eeeeee[100];
5780   const int fillsize = (int)sizeof(eeeeee);
5781   memset(eeeeee, 'e', fillsize);
5782 
5783   for (int enabled = 0; enabled <= 1; enabled += 1) {
5784     set_subtest("deferral=%d", enabled);
5785 
5786     XML_Parser parser = XML_ParserCreate(NULL);
5787     assert_true(parser != NULL);
5788     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5789     // pre-grow the buffer to avoid reparsing due to almost-fullness
5790     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5791 
5792     CharData storage;
5793     CharData_Init(&storage);
5794     XML_SetUserData(parser, &storage);
5795     XML_SetStartElementHandler(parser, start_element_event_handler);
5796 
5797     enum XML_Status status;
5798     // parse the start text
5799     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5800     if (status != XML_STATUS_OK) {
5801       xml_failure(parser);
5802     }
5803     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5804 
5805     // ..and the start of the token
5806     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5807     if (status != XML_STATUS_OK) {
5808       xml_failure(parser);
5809     }
5810     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5811 
5812     // try to parse lots of 'e', but the token isn't finished
5813     for (int c = 0; c < 100; ++c) {
5814       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5815       if (status != XML_STATUS_OK) {
5816         xml_failure(parser);
5817       }
5818     }
5819     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5820 
5821     // end the <x> token.
5822     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5823     if (status != XML_STATUS_OK) {
5824       xml_failure(parser);
5825     }
5826 
5827     if (enabled) {
5828       // In general, we may need to push more data to trigger a reparse attempt,
5829       // but in this test, the data is constructed to always require it.
5830       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5831       // 2x the token length should suffice; the +1 covers the start and end.
5832       for (int c = 0; c < 101; ++c) {
5833         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5834         if (status != XML_STATUS_OK) {
5835           xml_failure(parser);
5836         }
5837       }
5838     }
5839     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5840 
5841     XML_ParserFree(parser);
5842   }
5843 }
5844 END_TEST
5845 
5846 struct element_decl_data {
5847   XML_Parser parser;
5848   int count;
5849 };
5850 
5851 static void
element_decl_counter(void * userData,const XML_Char * name,XML_Content * model)5852 element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5853   UNUSED_P(name);
5854   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5855   testdata->count += 1;
5856   XML_FreeContentModel(testdata->parser, model);
5857 }
5858 
5859 static int
external_inherited_parser(XML_Parser p,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)5860 external_inherited_parser(XML_Parser p, const XML_Char *context,
5861                           const XML_Char *base, const XML_Char *systemId,
5862                           const XML_Char *publicId) {
5863   UNUSED_P(base);
5864   UNUSED_P(systemId);
5865   UNUSED_P(publicId);
5866   const char *const pre = "<!ELEMENT document ANY>\n";
5867   const char *const start = "<!ELEMENT ";
5868   const char *const end = " ANY>\n";
5869   const char *const post = "<!ELEMENT xyz ANY>\n";
5870   const int enabled = *(int *)XML_GetUserData(p);
5871   char eeeeee[100];
5872   char spaces[100];
5873   const int fillsize = (int)sizeof(eeeeee);
5874   assert_true(fillsize == (int)sizeof(spaces));
5875   memset(eeeeee, 'e', fillsize);
5876   memset(spaces, ' ', fillsize);
5877 
5878   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5879   assert_true(parser != NULL);
5880   // pre-grow the buffer to avoid reparsing due to almost-fullness
5881   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5882 
5883   struct element_decl_data testdata;
5884   testdata.parser = parser;
5885   testdata.count = 0;
5886   XML_SetUserData(parser, &testdata);
5887   XML_SetElementDeclHandler(parser, element_decl_counter);
5888 
5889   enum XML_Status status;
5890   // parse the initial text
5891   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5892   if (status != XML_STATUS_OK) {
5893     xml_failure(parser);
5894   }
5895   assert_true(testdata.count == 1); // first element should be done
5896 
5897   // ..and the start of the big token
5898   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5899   if (status != XML_STATUS_OK) {
5900     xml_failure(parser);
5901   }
5902   assert_true(testdata.count == 1); // still just the first one
5903 
5904   // try to parse lots of 'e', but the token isn't finished
5905   for (int c = 0; c < 100; ++c) {
5906     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5907     if (status != XML_STATUS_OK) {
5908       xml_failure(parser);
5909     }
5910   }
5911   assert_true(testdata.count == 1); // *still* just the first one
5912 
5913   // end the big token.
5914   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5915   if (status != XML_STATUS_OK) {
5916     xml_failure(parser);
5917   }
5918 
5919   if (enabled) {
5920     // In general, we may need to push more data to trigger a reparse attempt,
5921     // but in this test, the data is constructed to always require it.
5922     assert_true(testdata.count == 1); // or the test is incorrect
5923     // 2x the token length should suffice; the +1 covers the start and end.
5924     for (int c = 0; c < 101; ++c) {
5925       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5926       if (status != XML_STATUS_OK) {
5927         xml_failure(parser);
5928       }
5929     }
5930   }
5931   assert_true(testdata.count == 2); // the big token should be done
5932 
5933   // parse the final text
5934   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5935   if (status != XML_STATUS_OK) {
5936     xml_failure(parser);
5937   }
5938   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5939 
5940   XML_ParserFree(parser);
5941   return XML_STATUS_OK;
5942 }
5943 
START_TEST(test_reparse_deferral_is_inherited)5944 START_TEST(test_reparse_deferral_is_inherited) {
5945   const char *const text
5946       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5947   for (int enabled = 0; enabled <= 1; ++enabled) {
5948     set_subtest("deferral=%d", enabled);
5949 
5950     XML_Parser parser = XML_ParserCreate(NULL);
5951     assert_true(parser != NULL);
5952     XML_SetUserData(parser, (void *)&enabled);
5953     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5954     // this handler creates a sub-parser and checks that its deferral behavior
5955     // is what we expected, based on the value of `enabled` (in userdata).
5956     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5957     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5958     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5959       xml_failure(parser);
5960 
5961     XML_ParserFree(parser);
5962   }
5963 }
5964 END_TEST
5965 
START_TEST(test_set_reparse_deferral_on_null_parser)5966 START_TEST(test_set_reparse_deferral_on_null_parser) {
5967   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5968   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5969   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5970   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5971   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5972               == XML_FALSE);
5973   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5974               == XML_FALSE);
5975 }
5976 END_TEST
5977 
START_TEST(test_set_reparse_deferral_on_the_fly)5978 START_TEST(test_set_reparse_deferral_on_the_fly) {
5979   const char *const pre = "<d><x attr='";
5980   const char *const end = "'></x>";
5981   char iiiiii[100];
5982   const int fillsize = (int)sizeof(iiiiii);
5983   memset(iiiiii, 'i', fillsize);
5984 
5985   XML_Parser parser = XML_ParserCreate(NULL);
5986   assert_true(parser != NULL);
5987   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5988 
5989   CharData storage;
5990   CharData_Init(&storage);
5991   XML_SetUserData(parser, &storage);
5992   XML_SetStartElementHandler(parser, start_element_event_handler);
5993 
5994   enum XML_Status status;
5995   // parse the start text
5996   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5997   if (status != XML_STATUS_OK) {
5998     xml_failure(parser);
5999   }
6000   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
6001 
6002   // try to parse some 'i', but the token isn't finished
6003   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
6004   if (status != XML_STATUS_OK) {
6005     xml_failure(parser);
6006   }
6007   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
6008 
6009   // end the <x> token.
6010   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
6011   if (status != XML_STATUS_OK) {
6012     xml_failure(parser);
6013   }
6014   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
6015 
6016   // now change the heuristic setting and add *no* data
6017   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
6018   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
6019   status = XML_Parse(parser, "", 0, XML_FALSE);
6020   if (status != XML_STATUS_OK) {
6021     xml_failure(parser);
6022   }
6023   CharData_CheckXMLChars(&storage, XCS("dx"));
6024 
6025   XML_ParserFree(parser);
6026 }
6027 END_TEST
6028 
START_TEST(test_set_bad_reparse_option)6029 START_TEST(test_set_bad_reparse_option) {
6030   XML_Parser parser = XML_ParserCreate(NULL);
6031   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
6032   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
6033   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
6034   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
6035   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
6036   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
6037   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
6038   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
6039   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
6040   XML_ParserFree(parser);
6041 }
6042 END_TEST
6043 
6044 static size_t g_totalAlloc = 0;
6045 static size_t g_biggestAlloc = 0;
6046 
6047 static void *
counting_realloc(void * ptr,size_t size)6048 counting_realloc(void *ptr, size_t size) {
6049   g_totalAlloc += size;
6050   if (size > g_biggestAlloc) {
6051     g_biggestAlloc = size;
6052   }
6053   return realloc(ptr, size);
6054 }
6055 
6056 static void *
counting_malloc(size_t size)6057 counting_malloc(size_t size) {
6058   return counting_realloc(NULL, size);
6059 }
6060 
START_TEST(test_bypass_heuristic_when_close_to_bufsize)6061 START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
6062   if (g_chunkSize != 0) {
6063     // this test does not use SINGLE_BYTES, because it depends on very precise
6064     // buffer fills.
6065     return;
6066   }
6067   if (! g_reparseDeferralEnabledDefault) {
6068     return; // this test is irrelevant when the deferral heuristic is disabled.
6069   }
6070 
6071   const int document_length = 65536;
6072   char *const document = (char *)malloc(document_length);
6073   assert_true(document != NULL);
6074 
6075   const XML_Memory_Handling_Suite memfuncs = {
6076       counting_malloc,
6077       counting_realloc,
6078       free,
6079   };
6080 
6081   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
6082   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
6083   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
6084 
6085   for (const int *leading = leading_list; *leading >= 0; leading++) {
6086     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
6087       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
6088         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
6089                     *fillsize);
6090         // start by checking that the test looks reasonably valid
6091         assert_true(*leading + *bigtoken <= document_length);
6092 
6093         // put 'x' everywhere; some will be overwritten by elements.
6094         memset(document, 'x', document_length);
6095         // maybe add an initial tag
6096         if (*leading) {
6097           assert_true(*leading >= 3); // or the test case is invalid
6098           memcpy(document, "<a>", 3);
6099         }
6100         // add the large token
6101         document[*leading + 0] = '<';
6102         document[*leading + 1] = 'b';
6103         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
6104         document[*leading + *bigtoken - 1] = '>';
6105 
6106         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
6107         const int expected_elem_total = 1 + (*leading ? 1 : 0);
6108 
6109         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
6110         assert_true(parser != NULL);
6111 
6112         CharData storage;
6113         CharData_Init(&storage);
6114         XML_SetUserData(parser, &storage);
6115         XML_SetStartElementHandler(parser, start_element_event_handler);
6116 
6117         g_biggestAlloc = 0;
6118         g_totalAlloc = 0;
6119         int offset = 0;
6120         // fill data until the big token is covered (but not necessarily parsed)
6121         while (offset < *leading + *bigtoken) {
6122           assert_true(offset + *fillsize <= document_length);
6123           const enum XML_Status status
6124               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6125           if (status != XML_STATUS_OK) {
6126             xml_failure(parser);
6127           }
6128           offset += *fillsize;
6129         }
6130         // Now, check that we've had a buffer allocation that could fit the
6131         // context bytes and our big token. In order to detect a special case,
6132         // we need to know how many bytes of our big token were included in the
6133         // first push that contained _any_ bytes of the big token:
6134         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
6135         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
6136           // Special case: we aren't saving any context, and the whole big token
6137           // was covered by a single fill, so Expat may have parsed directly
6138           // from our input pointer, without allocating an internal buffer.
6139         } else if (*leading < XML_CONTEXT_BYTES) {
6140           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
6141         } else {
6142           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
6143         }
6144         // fill data until the big token is actually parsed
6145         while (storage.count < expected_elem_total) {
6146           const size_t alloc_before = g_totalAlloc;
6147           assert_true(offset + *fillsize <= document_length);
6148           const enum XML_Status status
6149               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6150           if (status != XML_STATUS_OK) {
6151             xml_failure(parser);
6152           }
6153           offset += *fillsize;
6154           // since all the bytes of the big token are already in the buffer,
6155           // the bufsize ceiling should make us finish its parsing without any
6156           // further buffer allocations. We assume that there will be no other
6157           // large allocations in this test.
6158           assert_true(g_totalAlloc - alloc_before < 4096);
6159         }
6160         // test-the-test: was our alloc even called?
6161         assert_true(g_totalAlloc > 0);
6162         // test-the-test: there shouldn't be any extra start elements
6163         assert_true(storage.count == expected_elem_total);
6164 
6165         XML_ParserFree(parser);
6166       }
6167     }
6168   }
6169   free(document);
6170 }
6171 END_TEST
6172 
START_TEST(test_varying_buffer_fills)6173 START_TEST(test_varying_buffer_fills) {
6174   const int KiB = 1024;
6175   const int MiB = 1024 * KiB;
6176   const int document_length = 16 * MiB;
6177   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
6178 
6179   if (g_chunkSize != 0) {
6180     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
6181   }
6182 
6183   char *const document = (char *)malloc(document_length);
6184   assert_true(document != NULL);
6185   memset(document, 'x', document_length);
6186   document[0] = '<';
6187   document[1] = 't';
6188   memset(&document[2], ' ', big - 2); // a very spacy token
6189   document[big - 1] = '>';
6190 
6191   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
6192   // When reparse deferral is enabled, the final (negated) value is the expected
6193   // maximum number of bytes scanned in parse attempts.
6194   const int testcases[][30] = {
6195       {8 * MiB, -8 * MiB},
6196       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
6197       // zero-size fills shouldn't trigger the bypass
6198       {4 * MiB, 0, 4 * MiB, -12 * MiB},
6199       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
6200       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
6201       // try to hit the buffer ceiling only once (at the end)
6202       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
6203       // try to hit the same buffer ceiling multiple times
6204       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
6205 
6206       // try to hit every ceiling, by always landing 1K shy of the buffer size
6207       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
6208        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
6209 
6210       // try to avoid every ceiling, by always landing 1B past the buffer size
6211       // the normal 2x heuristic threshold still forces parse attempts.
6212       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6213        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6214        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6215        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6216        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6217        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6218        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
6219        -(10 * MiB + 682 * KiB + 7)},
6220       // try to avoid every ceiling again, except on our last fill.
6221       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
6222        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
6223        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
6224        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
6225        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
6226        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
6227        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
6228        -(10 * MiB + 682 * KiB + 6)},
6229 
6230       // try to hit ceilings on the way multiple times
6231       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
6232        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
6233        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
6234        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
6235        // we'll make a parse attempt at every parse call
6236        -(45 * MiB + 12)},
6237   };
6238   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
6239   for (int test_i = 0; test_i < testcount; test_i++) {
6240     const int *fillsize = testcases[test_i];
6241     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
6242                 fillsize[2], fillsize[3]);
6243     XML_Parser parser = XML_ParserCreate(NULL);
6244     assert_true(parser != NULL);
6245 
6246     CharData storage;
6247     CharData_Init(&storage);
6248     XML_SetUserData(parser, &storage);
6249     XML_SetStartElementHandler(parser, start_element_event_handler);
6250 
6251     g_bytesScanned = 0;
6252     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
6253     int offset = 0;
6254     while (*fillsize >= 0) {
6255       assert_true(offset + *fillsize <= document_length); // or test is invalid
6256       const enum XML_Status status
6257           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
6258       if (status != XML_STATUS_OK) {
6259         xml_failure(parser);
6260       }
6261       offset += *fillsize;
6262       fillsize++;
6263       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
6264       worstcase_bytes += offset; // we might've tried to parse all pending bytes
6265     }
6266     assert_true(storage.count == 1); // the big token should've been parsed
6267     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
6268     if (g_reparseDeferralEnabledDefault) {
6269       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
6270       const unsigned max_bytes_scanned = -*fillsize;
6271       if (g_bytesScanned > max_bytes_scanned) {
6272         fprintf(stderr,
6273                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
6274                 g_bytesScanned, max_bytes_scanned);
6275         fail("too many bytes scanned in parse attempts");
6276       }
6277     }
6278     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
6279 
6280     XML_ParserFree(parser);
6281   }
6282   free(document);
6283 }
6284 END_TEST
6285 
START_TEST(test_empty_ext_param_entity_in_value)6286 START_TEST(test_empty_ext_param_entity_in_value) {
6287   const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>";
6288   ExtOption options[] = {
6289       {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">"
6290                        "<!ENTITY ge \"%pe;\">"},
6291       {XCS("empty"), ""},
6292       {NULL, NULL},
6293   };
6294 
6295   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
6296   XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner);
6297   XML_SetUserData(g_parser, options);
6298   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6299       == XML_STATUS_ERROR)
6300     xml_failure(g_parser);
6301 }
6302 END_TEST
6303 
6304 void
make_basic_test_case(Suite * s)6305 make_basic_test_case(Suite *s) {
6306   TCase *tc_basic = tcase_create("basic tests");
6307 
6308   suite_add_tcase(s, tc_basic);
6309   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
6310 
6311   tcase_add_test(tc_basic, test_nul_byte);
6312   tcase_add_test(tc_basic, test_u0000_char);
6313   tcase_add_test(tc_basic, test_siphash_self);
6314   tcase_add_test(tc_basic, test_siphash_spec);
6315   tcase_add_test(tc_basic, test_bom_utf8);
6316   tcase_add_test(tc_basic, test_bom_utf16_be);
6317   tcase_add_test(tc_basic, test_bom_utf16_le);
6318   tcase_add_test(tc_basic, test_nobom_utf16_le);
6319   tcase_add_test(tc_basic, test_hash_collision);
6320   tcase_add_test(tc_basic, test_hash_salt_setter);
6321   tcase_add_test(tc_basic, test_illegal_utf8);
6322   tcase_add_test(tc_basic, test_utf8_auto_align);
6323   tcase_add_test(tc_basic, test_utf16);
6324   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
6325   tcase_add_test(tc_basic, test_not_utf16);
6326   tcase_add_test(tc_basic, test_bad_encoding);
6327   tcase_add_test(tc_basic, test_latin1_umlauts);
6328   tcase_add_test(tc_basic, test_long_utf8_character);
6329   tcase_add_test(tc_basic, test_long_latin1_attribute);
6330   tcase_add_test(tc_basic, test_long_ascii_attribute);
6331   /* Regression test for SF bug #491986. */
6332   tcase_add_test(tc_basic, test_danish_latin1);
6333   /* Regression test for SF bug #514281. */
6334   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
6335   tcase_add_test(tc_basic, test_french_charref_decimal);
6336   tcase_add_test(tc_basic, test_french_latin1);
6337   tcase_add_test(tc_basic, test_french_utf8);
6338   tcase_add_test(tc_basic, test_utf8_false_rejection);
6339   tcase_add_test(tc_basic, test_line_number_after_parse);
6340   tcase_add_test(tc_basic, test_column_number_after_parse);
6341   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
6342   tcase_add_test(tc_basic, test_line_number_after_error);
6343   tcase_add_test(tc_basic, test_column_number_after_error);
6344   tcase_add_test(tc_basic, test_really_long_lines);
6345   tcase_add_test(tc_basic, test_really_long_encoded_lines);
6346   tcase_add_test(tc_basic, test_end_element_events);
6347   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
6348   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
6349   tcase_add_test(tc_basic, test_xmldecl_misplaced);
6350   tcase_add_test(tc_basic, test_xmldecl_invalid);
6351   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
6352   tcase_add_test(tc_basic, test_xmldecl_missing_value);
6353   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
6354   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
6355   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
6356   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
6357   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
6358   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
6359   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
6360   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
6361   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
6362   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
6363   tcase_add_test(tc_basic,
6364                  test_wfc_undeclared_entity_with_external_subset_standalone);
6365   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
6366   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
6367   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
6368   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
6369   tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
6370   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
6371   tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
6372   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
6373   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
6374   tcase_add_test(tc_basic, test_dtd_attr_handling);
6375   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
6376   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
6377   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
6378   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
6379   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
6380   tcase_add_test(tc_basic, test_good_cdata_ascii);
6381   tcase_add_test(tc_basic, test_good_cdata_utf16);
6382   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
6383   tcase_add_test(tc_basic, test_long_cdata_utf16);
6384   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
6385   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
6386   tcase_add_test(tc_basic, test_bad_cdata);
6387   tcase_add_test(tc_basic, test_bad_cdata_utf16);
6388   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
6389   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
6390   tcase_add_test(tc_basic, test_memory_allocation);
6391   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
6392   tcase_add_test(tc_basic, test_dtd_elements);
6393   tcase_add_test(tc_basic, test_dtd_elements_nesting);
6394   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
6395   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6396   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6397   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6398   tcase_add_test__ifdef_xml_dtd(tc_basic,
6399                                 test_foreign_dtd_without_external_subset);
6400   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6401   tcase_add_test(tc_basic, test_set_base);
6402   tcase_add_test(tc_basic, test_attributes);
6403   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6404   tcase_add_test(tc_basic, test_resume_invalid_parse);
6405   tcase_add_test(tc_basic, test_resume_resuspended);
6406   tcase_add_test(tc_basic, test_cdata_default);
6407   tcase_add_test(tc_basic, test_subordinate_reset);
6408   tcase_add_test(tc_basic, test_subordinate_suspend);
6409   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6410   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6411   tcase_add_test__ifdef_xml_dtd(tc_basic,
6412                                 test_ext_entity_invalid_suspended_parse);
6413   tcase_add_test(tc_basic, test_explicit_encoding);
6414   tcase_add_test(tc_basic, test_trailing_cr);
6415   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6416   tcase_add_test(tc_basic, test_trailing_rsqb);
6417   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6418   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6419   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6420   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6421   tcase_add_test(tc_basic, test_empty_parse);
6422   tcase_add_test(tc_basic, test_negative_len_parse);
6423   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6424   tcase_add_test(tc_basic, test_get_buffer_1);
6425   tcase_add_test(tc_basic, test_get_buffer_2);
6426 #if XML_CONTEXT_BYTES > 0
6427   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6428 #endif
6429   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6430   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6431   tcase_add_test(tc_basic, test_byte_info_at_end);
6432   tcase_add_test(tc_basic, test_byte_info_at_error);
6433   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6434   tcase_add_test(tc_basic, test_predefined_entities);
6435   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6436   tcase_add_test(tc_basic, test_not_predefined_entities);
6437   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6438   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6439   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6440   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6441   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6442   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6443   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6444   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6445   tcase_add_test(tc_basic, test_bad_public_doctype);
6446   tcase_add_test(tc_basic, test_attribute_enum_value);
6447   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6448   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6449   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6450   tcase_add_test(tc_basic, test_nested_groups);
6451   tcase_add_test(tc_basic, test_group_choice);
6452   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6453   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6454   tcase_add_test__ifdef_xml_dtd(tc_basic,
6455                                 test_recursive_external_parameter_entity);
6456   tcase_add_test__ifdef_xml_dtd(tc_basic,
6457                                 test_recursive_external_parameter_entity_2);
6458   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6459   tcase_add_test(tc_basic, test_suspend_xdecl);
6460   tcase_add_test(tc_basic, test_abort_epilog);
6461   tcase_add_test(tc_basic, test_abort_epilog_2);
6462   tcase_add_test(tc_basic, test_suspend_epilog);
6463   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6464   tcase_add_test(tc_basic, test_unfinished_epilog);
6465   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6466   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6467   tcase_add_test__ifdef_xml_dtd(tc_basic,
6468                                 test_suspend_resume_internal_entity_issue_629);
6469   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6470   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6471   tcase_add_test(tc_basic, test_restart_on_error);
6472   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6473   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6474   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6475   tcase_add_test(tc_basic, test_standalone_internal_entity);
6476   tcase_add_test(tc_basic, test_skipped_external_entity);
6477   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6478   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6479   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6480   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6481   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6482   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6483   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6484   tcase_add_test(tc_basic, test_pi_handled_in_default);
6485   tcase_add_test(tc_basic, test_comment_handled_in_default);
6486   tcase_add_test(tc_basic, test_pi_yml);
6487   tcase_add_test(tc_basic, test_pi_xnl);
6488   tcase_add_test(tc_basic, test_pi_xmm);
6489   tcase_add_test(tc_basic, test_utf16_pi);
6490   tcase_add_test(tc_basic, test_utf16_be_pi);
6491   tcase_add_test(tc_basic, test_utf16_be_comment);
6492   tcase_add_test(tc_basic, test_utf16_le_comment);
6493   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6494   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6495   tcase_add_test(tc_basic, test_unknown_encoding_success);
6496   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6497   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6498   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6499   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6500   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6501   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6502   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6503   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6504   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6505   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6506   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6507   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6508   tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary);
6509   tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary);
6510   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6511   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6512   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6513   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6514   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6515   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6516   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6517   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6518   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6519   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6520   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6521   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6522   tcase_add_test(tc_basic, test_utf16_attribute);
6523   tcase_add_test(tc_basic, test_utf16_second_attr);
6524   tcase_add_test(tc_basic, test_attr_after_solidus);
6525   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6526   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6527   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6528   tcase_add_test(tc_basic, test_bad_doctype);
6529   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6530   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6531   tcase_add_test(tc_basic, test_bad_doctype_plus);
6532   tcase_add_test(tc_basic, test_bad_doctype_star);
6533   tcase_add_test(tc_basic, test_bad_doctype_query);
6534   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6535   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6536   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6537   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6538   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6539   tcase_add_test(tc_basic, test_short_doctype);
6540   tcase_add_test(tc_basic, test_short_doctype_2);
6541   tcase_add_test(tc_basic, test_short_doctype_3);
6542   tcase_add_test(tc_basic, test_long_doctype);
6543   tcase_add_test(tc_basic, test_bad_entity);
6544   tcase_add_test(tc_basic, test_bad_entity_2);
6545   tcase_add_test(tc_basic, test_bad_entity_3);
6546   tcase_add_test(tc_basic, test_bad_entity_4);
6547   tcase_add_test(tc_basic, test_bad_notation);
6548   tcase_add_test(tc_basic, test_default_doctype_handler);
6549   tcase_add_test(tc_basic, test_empty_element_abort);
6550   tcase_add_test__ifdef_xml_dtd(tc_basic,
6551                                 test_pool_integrity_with_unfinished_attr);
6552   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value);
6553   tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
6554   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
6555   tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
6556   tcase_add_test__if_xml_ge(tc_basic,
6557                             test_deep_nested_entity_delayed_interpretation);
6558   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6559   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
6560   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6561   tcase_add_test(tc_basic, test_set_reparse_deferral);
6562   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6563   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6564   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6565   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6566   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6567   tcase_add_test(tc_basic, test_varying_buffer_fills);
6568 }
6569