xref: /freebsd/contrib/expat/tests/basic_tests.c (revision 908f215e80fa482aa953c39afa6bb516f561fc00)
14543ef51SXin LI /* Tests in the "basic" test case for the Expat test suite
24543ef51SXin LI                             __  __            _
34543ef51SXin LI                          ___\ \/ /_ __   __ _| |_
44543ef51SXin LI                         / _ \\  /| '_ \ / _` | __|
54543ef51SXin LI                        |  __//  \| |_) | (_| | |_
64543ef51SXin LI                         \___/_/\_\ .__/ \__,_|\__|
74543ef51SXin LI                                  |_| XML parser
84543ef51SXin LI 
94543ef51SXin LI    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
104543ef51SXin LI    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
114543ef51SXin LI    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
124543ef51SXin LI    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
134543ef51SXin LI    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
144543ef51SXin LI    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
154543ef51SXin LI    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
164543ef51SXin LI    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
174543ef51SXin LI    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
184543ef51SXin LI    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
194543ef51SXin LI    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
204543ef51SXin LI    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
214543ef51SXin LI    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
224543ef51SXin LI    Licensed under the MIT license:
234543ef51SXin LI 
244543ef51SXin LI    Permission is  hereby granted,  free of charge,  to any  person obtaining
254543ef51SXin LI    a  copy  of  this  software   and  associated  documentation  files  (the
264543ef51SXin LI    "Software"),  to  deal in  the  Software  without restriction,  including
274543ef51SXin LI    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
284543ef51SXin LI    distribute, sublicense, and/or sell copies of the Software, and to permit
294543ef51SXin LI    persons  to whom  the Software  is  furnished to  do so,  subject to  the
304543ef51SXin LI    following conditions:
314543ef51SXin LI 
324543ef51SXin LI    The above copyright  notice and this permission notice  shall be included
334543ef51SXin LI    in all copies or substantial portions of the Software.
344543ef51SXin LI 
354543ef51SXin LI    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
364543ef51SXin LI    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
374543ef51SXin LI    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
384543ef51SXin LI    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
394543ef51SXin LI    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
404543ef51SXin LI    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
414543ef51SXin LI    USE OR OTHER DEALINGS IN THE SOFTWARE.
424543ef51SXin LI */
434543ef51SXin LI 
444543ef51SXin LI #if defined(NDEBUG)
454543ef51SXin LI #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
464543ef51SXin LI #endif
474543ef51SXin LI 
484543ef51SXin LI #include <assert.h>
494543ef51SXin LI 
504543ef51SXin LI #include <stdio.h>
514543ef51SXin LI #include <string.h>
524543ef51SXin LI #include <time.h>
534543ef51SXin LI 
544543ef51SXin LI #if ! defined(__cplusplus)
554543ef51SXin LI #  include <stdbool.h>
564543ef51SXin LI #endif
574543ef51SXin LI 
584543ef51SXin LI #include "expat_config.h"
594543ef51SXin LI 
604543ef51SXin LI #include "expat.h"
614543ef51SXin LI #include "internal.h"
624543ef51SXin LI #include "minicheck.h"
634543ef51SXin LI #include "structdata.h"
644543ef51SXin LI #include "common.h"
654543ef51SXin LI #include "dummy.h"
664543ef51SXin LI #include "handlers.h"
674543ef51SXin LI #include "siphash.h"
684543ef51SXin LI #include "basic_tests.h"
694543ef51SXin LI 
704543ef51SXin LI static void
714543ef51SXin LI basic_setup(void) {
724543ef51SXin LI   g_parser = XML_ParserCreate(NULL);
734543ef51SXin LI   if (g_parser == NULL)
744543ef51SXin LI     fail("Parser not created.");
754543ef51SXin LI }
764543ef51SXin LI 
774543ef51SXin LI /*
784543ef51SXin LI  * Character & encoding tests.
794543ef51SXin LI  */
804543ef51SXin LI 
814543ef51SXin LI START_TEST(test_nul_byte) {
824543ef51SXin LI   char text[] = "<doc>\0</doc>";
834543ef51SXin LI 
844543ef51SXin LI   /* test that a NUL byte (in US-ASCII data) is an error */
854543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
864543ef51SXin LI       == XML_STATUS_OK)
874543ef51SXin LI     fail("Parser did not report error on NUL-byte.");
884543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
894543ef51SXin LI     xml_failure(g_parser);
904543ef51SXin LI }
914543ef51SXin LI END_TEST
924543ef51SXin LI 
934543ef51SXin LI START_TEST(test_u0000_char) {
944543ef51SXin LI   /* test that a NUL byte (in US-ASCII data) is an error */
954543ef51SXin LI   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
964543ef51SXin LI                  "Parser did not report error on NUL-byte.");
974543ef51SXin LI }
984543ef51SXin LI END_TEST
994543ef51SXin LI 
1004543ef51SXin LI START_TEST(test_siphash_self) {
1014543ef51SXin LI   if (! sip24_valid())
1024543ef51SXin LI     fail("SipHash self-test failed");
1034543ef51SXin LI }
1044543ef51SXin LI END_TEST
1054543ef51SXin LI 
1064543ef51SXin LI START_TEST(test_siphash_spec) {
1074543ef51SXin LI   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
1084543ef51SXin LI   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
1094543ef51SXin LI                          "\x0a\x0b\x0c\x0d\x0e";
1104543ef51SXin LI   const size_t len = sizeof(message) - 1;
1114543ef51SXin LI   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
1124543ef51SXin LI   struct siphash state;
1134543ef51SXin LI   struct sipkey key;
1144543ef51SXin LI 
1154543ef51SXin LI   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
1164543ef51SXin LI                   "\x0a\x0b\x0c\x0d\x0e\x0f");
1174543ef51SXin LI   sip24_init(&state, &key);
1184543ef51SXin LI 
1194543ef51SXin LI   /* Cover spread across calls */
1204543ef51SXin LI   sip24_update(&state, message, 4);
1214543ef51SXin LI   sip24_update(&state, message + 4, len - 4);
1224543ef51SXin LI 
1234543ef51SXin LI   /* Cover null length */
1244543ef51SXin LI   sip24_update(&state, message, 0);
1254543ef51SXin LI 
1264543ef51SXin LI   if (sip24_final(&state) != expected)
1274543ef51SXin LI     fail("sip24_final failed spec test\n");
1284543ef51SXin LI 
1294543ef51SXin LI   /* Cover wrapper */
1304543ef51SXin LI   if (siphash24(message, len, &key) != expected)
1314543ef51SXin LI     fail("siphash24 failed spec test\n");
1324543ef51SXin LI }
1334543ef51SXin LI END_TEST
1344543ef51SXin LI 
1354543ef51SXin LI START_TEST(test_bom_utf8) {
1364543ef51SXin LI   /* This test is really just making sure we don't core on a UTF-8 BOM. */
1374543ef51SXin LI   const char *text = "\357\273\277<e/>";
1384543ef51SXin LI 
1394543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1404543ef51SXin LI       == XML_STATUS_ERROR)
1414543ef51SXin LI     xml_failure(g_parser);
1424543ef51SXin LI }
1434543ef51SXin LI END_TEST
1444543ef51SXin LI 
1454543ef51SXin LI START_TEST(test_bom_utf16_be) {
1464543ef51SXin LI   char text[] = "\376\377\0<\0e\0/\0>";
1474543ef51SXin LI 
1484543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
1494543ef51SXin LI       == XML_STATUS_ERROR)
1504543ef51SXin LI     xml_failure(g_parser);
1514543ef51SXin LI }
1524543ef51SXin LI END_TEST
1534543ef51SXin LI 
1544543ef51SXin LI START_TEST(test_bom_utf16_le) {
1554543ef51SXin LI   char text[] = "\377\376<\0e\0/\0>\0";
1564543ef51SXin LI 
1574543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
1584543ef51SXin LI       == XML_STATUS_ERROR)
1594543ef51SXin LI     xml_failure(g_parser);
1604543ef51SXin LI }
1614543ef51SXin LI END_TEST
1624543ef51SXin LI 
1634543ef51SXin LI START_TEST(test_nobom_utf16_le) {
1644543ef51SXin LI   char text[] = " \0<\0e\0/\0>\0";
1654543ef51SXin LI 
1664543ef51SXin LI   if (g_chunkSize == 1) {
1674543ef51SXin LI     // TODO: with just the first byte, we can't tell the difference between
1684543ef51SXin LI     // UTF-16-LE and UTF-8. Avoid the failure for now.
1694543ef51SXin LI     return;
1704543ef51SXin LI   }
1714543ef51SXin LI 
1724543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
1734543ef51SXin LI       == XML_STATUS_ERROR)
1744543ef51SXin LI     xml_failure(g_parser);
1754543ef51SXin LI }
1764543ef51SXin LI END_TEST
1774543ef51SXin LI 
1784543ef51SXin LI START_TEST(test_hash_collision) {
1794543ef51SXin LI   /* For full coverage of the lookup routine, we need to ensure a
1804543ef51SXin LI    * hash collision even though we can only tell that we have one
1814543ef51SXin LI    * through breakpoint debugging or coverage statistics.  The
1824543ef51SXin LI    * following will cause a hash collision on machines with a 64-bit
1834543ef51SXin LI    * long type; others will have to experiment.  The full coverage
1844543ef51SXin LI    * tests invoked from qa.sh usually provide a hash collision, but
1854543ef51SXin LI    * not always.  This is an attempt to provide insurance.
1864543ef51SXin LI    */
1874543ef51SXin LI #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
1884543ef51SXin LI   const char *text
1894543ef51SXin LI       = "<doc>\n"
1904543ef51SXin LI         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
1914543ef51SXin LI         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
1924543ef51SXin LI         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
1934543ef51SXin LI         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
1944543ef51SXin LI         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
1954543ef51SXin LI         "<d8>This triggers the table growth and collides with b2</d8>\n"
1964543ef51SXin LI         "</doc>\n";
1974543ef51SXin LI 
1984543ef51SXin LI   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
1994543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2004543ef51SXin LI       == XML_STATUS_ERROR)
2014543ef51SXin LI     xml_failure(g_parser);
2024543ef51SXin LI }
2034543ef51SXin LI END_TEST
2044543ef51SXin LI #undef COLLIDING_HASH_SALT
2054543ef51SXin LI 
2064543ef51SXin LI /* Regression test for SF bug #491986. */
2074543ef51SXin LI START_TEST(test_danish_latin1) {
2084543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2094543ef51SXin LI                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
2104543ef51SXin LI #ifdef XML_UNICODE
2114543ef51SXin LI   const XML_Char *expected
2124543ef51SXin LI       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
2134543ef51SXin LI #else
2144543ef51SXin LI   const XML_Char *expected
2154543ef51SXin LI       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
2164543ef51SXin LI #endif
2174543ef51SXin LI   run_character_check(text, expected);
2184543ef51SXin LI }
2194543ef51SXin LI END_TEST
2204543ef51SXin LI 
2214543ef51SXin LI /* Regression test for SF bug #514281. */
2224543ef51SXin LI START_TEST(test_french_charref_hexidecimal) {
2234543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2244543ef51SXin LI                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
2254543ef51SXin LI #ifdef XML_UNICODE
2264543ef51SXin LI   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
2274543ef51SXin LI #else
2284543ef51SXin LI   const XML_Char *expected
2294543ef51SXin LI       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
2304543ef51SXin LI #endif
2314543ef51SXin LI   run_character_check(text, expected);
2324543ef51SXin LI }
2334543ef51SXin LI END_TEST
2344543ef51SXin LI 
2354543ef51SXin LI START_TEST(test_french_charref_decimal) {
2364543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2374543ef51SXin LI                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
2384543ef51SXin LI #ifdef XML_UNICODE
2394543ef51SXin LI   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
2404543ef51SXin LI #else
2414543ef51SXin LI   const XML_Char *expected
2424543ef51SXin LI       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
2434543ef51SXin LI #endif
2444543ef51SXin LI   run_character_check(text, expected);
2454543ef51SXin LI }
2464543ef51SXin LI END_TEST
2474543ef51SXin LI 
2484543ef51SXin LI START_TEST(test_french_latin1) {
2494543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2504543ef51SXin LI                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
2514543ef51SXin LI #ifdef XML_UNICODE
2524543ef51SXin LI   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
2534543ef51SXin LI #else
2544543ef51SXin LI   const XML_Char *expected
2554543ef51SXin LI       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
2564543ef51SXin LI #endif
2574543ef51SXin LI   run_character_check(text, expected);
2584543ef51SXin LI }
2594543ef51SXin LI END_TEST
2604543ef51SXin LI 
2614543ef51SXin LI START_TEST(test_french_utf8) {
2624543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
2634543ef51SXin LI                      "<doc>\xC3\xA9</doc>";
2644543ef51SXin LI #ifdef XML_UNICODE
2654543ef51SXin LI   const XML_Char *expected = XCS("\x00e9");
2664543ef51SXin LI #else
2674543ef51SXin LI   const XML_Char *expected = XCS("\xC3\xA9");
2684543ef51SXin LI #endif
2694543ef51SXin LI   run_character_check(text, expected);
2704543ef51SXin LI }
2714543ef51SXin LI END_TEST
2724543ef51SXin LI 
2734543ef51SXin LI /* Regression test for SF bug #600479.
2744543ef51SXin LI    XXX There should be a test that exercises all legal XML Unicode
2754543ef51SXin LI    characters as PCDATA and attribute value content, and XML Name
2764543ef51SXin LI    characters as part of element and attribute names.
2774543ef51SXin LI */
2784543ef51SXin LI START_TEST(test_utf8_false_rejection) {
2794543ef51SXin LI   const char *text = "<doc>\xEF\xBA\xBF</doc>";
2804543ef51SXin LI #ifdef XML_UNICODE
2814543ef51SXin LI   const XML_Char *expected = XCS("\xfebf");
2824543ef51SXin LI #else
2834543ef51SXin LI   const XML_Char *expected = XCS("\xEF\xBA\xBF");
2844543ef51SXin LI #endif
2854543ef51SXin LI   run_character_check(text, expected);
2864543ef51SXin LI }
2874543ef51SXin LI END_TEST
2884543ef51SXin LI 
2894543ef51SXin LI /* Regression test for SF bug #477667.
2904543ef51SXin LI    This test assures that any 8-bit character followed by a 7-bit
2914543ef51SXin LI    character will not be mistakenly interpreted as a valid UTF-8
2924543ef51SXin LI    sequence.
2934543ef51SXin LI */
2944543ef51SXin LI START_TEST(test_illegal_utf8) {
2954543ef51SXin LI   char text[100];
2964543ef51SXin LI   int i;
2974543ef51SXin LI 
2984543ef51SXin LI   for (i = 128; i <= 255; ++i) {
2994543ef51SXin LI     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
3004543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3014543ef51SXin LI         == XML_STATUS_OK) {
3024543ef51SXin LI       snprintf(text, sizeof(text),
3034543ef51SXin LI                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
3044543ef51SXin LI                i);
3054543ef51SXin LI       fail(text);
3064543ef51SXin LI     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3074543ef51SXin LI       xml_failure(g_parser);
3084543ef51SXin LI     /* Reset the parser since we use the same parser repeatedly. */
3094543ef51SXin LI     XML_ParserReset(g_parser, NULL);
3104543ef51SXin LI   }
3114543ef51SXin LI }
3124543ef51SXin LI END_TEST
3134543ef51SXin LI 
3144543ef51SXin LI /* Examples, not masks: */
3154543ef51SXin LI #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
3164543ef51SXin LI #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
3174543ef51SXin LI #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
3184543ef51SXin LI #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
3194543ef51SXin LI #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
3204543ef51SXin LI 
3214543ef51SXin LI START_TEST(test_utf8_auto_align) {
3224543ef51SXin LI   struct TestCase {
3234543ef51SXin LI     ptrdiff_t expectedMovementInChars;
3244543ef51SXin LI     const char *input;
3254543ef51SXin LI   };
3264543ef51SXin LI 
3274543ef51SXin LI   struct TestCase cases[] = {
3284543ef51SXin LI       {00, ""},
3294543ef51SXin LI 
3304543ef51SXin LI       {00, UTF8_LEAD_1},
3314543ef51SXin LI 
3324543ef51SXin LI       {-1, UTF8_LEAD_2},
3334543ef51SXin LI       {00, UTF8_LEAD_2 UTF8_FOLLOW},
3344543ef51SXin LI 
3354543ef51SXin LI       {-1, UTF8_LEAD_3},
3364543ef51SXin LI       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
3374543ef51SXin LI       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
3384543ef51SXin LI 
3394543ef51SXin LI       {-1, UTF8_LEAD_4},
3404543ef51SXin LI       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
3414543ef51SXin LI       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
3424543ef51SXin LI       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
3434543ef51SXin LI   };
3444543ef51SXin LI 
3454543ef51SXin LI   size_t i = 0;
3464543ef51SXin LI   bool success = true;
3474543ef51SXin LI   for (; i < sizeof(cases) / sizeof(*cases); i++) {
3484543ef51SXin LI     const char *fromLim = cases[i].input + strlen(cases[i].input);
3494543ef51SXin LI     const char *const fromLimInitially = fromLim;
3504543ef51SXin LI     ptrdiff_t actualMovementInChars;
3514543ef51SXin LI 
3524543ef51SXin LI     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
3534543ef51SXin LI 
3544543ef51SXin LI     actualMovementInChars = (fromLim - fromLimInitially);
3554543ef51SXin LI     if (actualMovementInChars != cases[i].expectedMovementInChars) {
3564543ef51SXin LI       size_t j = 0;
3574543ef51SXin LI       success = false;
3584543ef51SXin LI       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
3594543ef51SXin LI              ", actually moved by %2d chars: \"",
3604543ef51SXin LI              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
3614543ef51SXin LI              (int)actualMovementInChars);
3624543ef51SXin LI       for (; j < strlen(cases[i].input); j++) {
3634543ef51SXin LI         printf("\\x%02x", (unsigned char)cases[i].input[j]);
3644543ef51SXin LI       }
3654543ef51SXin LI       printf("\"\n");
3664543ef51SXin LI     }
3674543ef51SXin LI   }
3684543ef51SXin LI 
3694543ef51SXin LI   if (! success) {
3704543ef51SXin LI     fail("UTF-8 auto-alignment is not bullet-proof\n");
3714543ef51SXin LI   }
3724543ef51SXin LI }
3734543ef51SXin LI END_TEST
3744543ef51SXin LI 
3754543ef51SXin LI START_TEST(test_utf16) {
3764543ef51SXin LI   /* <?xml version="1.0" encoding="UTF-16"?>
3774543ef51SXin LI    *  <doc a='123'>some {A} text</doc>
3784543ef51SXin LI    *
3794543ef51SXin LI    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
3804543ef51SXin LI    */
3814543ef51SXin LI   char text[]
3824543ef51SXin LI       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
3834543ef51SXin LI         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
3844543ef51SXin LI         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
3854543ef51SXin LI         "\000'\000?\000>\000\n"
3864543ef51SXin LI         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
3874543ef51SXin LI         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
3884543ef51SXin LI         "<\000/\000d\000o\000c\000>";
3894543ef51SXin LI #ifdef XML_UNICODE
3904543ef51SXin LI   const XML_Char *expected = XCS("some \xff21 text");
3914543ef51SXin LI #else
3924543ef51SXin LI   const XML_Char *expected = XCS("some \357\274\241 text");
3934543ef51SXin LI #endif
3944543ef51SXin LI   CharData storage;
3954543ef51SXin LI 
3964543ef51SXin LI   CharData_Init(&storage);
3974543ef51SXin LI   XML_SetUserData(g_parser, &storage);
3984543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3994543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
4004543ef51SXin LI       == XML_STATUS_ERROR)
4014543ef51SXin LI     xml_failure(g_parser);
4024543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
4034543ef51SXin LI }
4044543ef51SXin LI END_TEST
4054543ef51SXin LI 
4064543ef51SXin LI START_TEST(test_utf16_le_epilog_newline) {
4074543ef51SXin LI   unsigned int first_chunk_bytes = 17;
4084543ef51SXin LI   char text[] = "\xFF\xFE"                  /* BOM */
4094543ef51SXin LI                 "<\000e\000/\000>\000"      /* document element */
4104543ef51SXin LI                 "\r\000\n\000\r\000\n\000"; /* epilog */
4114543ef51SXin LI 
4124543ef51SXin LI   if (first_chunk_bytes >= sizeof(text) - 1)
4134543ef51SXin LI     fail("bad value of first_chunk_bytes");
4144543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
4154543ef51SXin LI       == XML_STATUS_ERROR)
4164543ef51SXin LI     xml_failure(g_parser);
4174543ef51SXin LI   else {
4184543ef51SXin LI     enum XML_Status rc;
4194543ef51SXin LI     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
4204543ef51SXin LI                                  sizeof(text) - first_chunk_bytes - 1,
4214543ef51SXin LI                                  XML_TRUE);
4224543ef51SXin LI     if (rc == XML_STATUS_ERROR)
4234543ef51SXin LI       xml_failure(g_parser);
4244543ef51SXin LI   }
4254543ef51SXin LI }
4264543ef51SXin LI END_TEST
4274543ef51SXin LI 
4284543ef51SXin LI /* Test that an outright lie in the encoding is faulted */
4294543ef51SXin LI START_TEST(test_not_utf16) {
4304543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
4314543ef51SXin LI                      "<doc>Hi</doc>";
4324543ef51SXin LI 
4334543ef51SXin LI   /* Use a handler to provoke the appropriate code paths */
4344543ef51SXin LI   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
4354543ef51SXin LI   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
4364543ef51SXin LI                  "UTF-16 declared in UTF-8 not faulted");
4374543ef51SXin LI }
4384543ef51SXin LI END_TEST
4394543ef51SXin LI 
4404543ef51SXin LI /* Test that an unknown encoding is rejected */
4414543ef51SXin LI START_TEST(test_bad_encoding) {
4424543ef51SXin LI   const char *text = "<doc>Hi</doc>";
4434543ef51SXin LI 
4444543ef51SXin LI   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
4454543ef51SXin LI     fail("XML_SetEncoding failed");
4464543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4474543ef51SXin LI                  "Unknown encoding not faulted");
4484543ef51SXin LI }
4494543ef51SXin LI END_TEST
4504543ef51SXin LI 
4514543ef51SXin LI /* Regression test for SF bug #481609, #774028. */
4524543ef51SXin LI START_TEST(test_latin1_umlauts) {
4534543ef51SXin LI   const char *text
4544543ef51SXin LI       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
4554543ef51SXin LI         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
4564543ef51SXin LI         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
4574543ef51SXin LI #ifdef XML_UNICODE
4584543ef51SXin LI   /* Expected results in UTF-16 */
4594543ef51SXin LI   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
4604543ef51SXin LI       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
4614543ef51SXin LI #else
4624543ef51SXin LI   /* Expected results in UTF-8 */
4634543ef51SXin LI   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
4644543ef51SXin LI       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
4654543ef51SXin LI #endif
4664543ef51SXin LI 
4674543ef51SXin LI   run_character_check(text, expected);
4684543ef51SXin LI   XML_ParserReset(g_parser, NULL);
4694543ef51SXin LI   run_attribute_check(text, expected);
4704543ef51SXin LI   /* Repeat with a default handler */
4714543ef51SXin LI   XML_ParserReset(g_parser, NULL);
4724543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
4734543ef51SXin LI   run_character_check(text, expected);
4744543ef51SXin LI   XML_ParserReset(g_parser, NULL);
4754543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
4764543ef51SXin LI   run_attribute_check(text, expected);
4774543ef51SXin LI }
4784543ef51SXin LI END_TEST
4794543ef51SXin LI 
4804543ef51SXin LI /* Test that an element name with a 4-byte UTF-8 character is rejected */
4814543ef51SXin LI START_TEST(test_long_utf8_character) {
4824543ef51SXin LI   const char *text
4834543ef51SXin LI       = "<?xml version='1.0' encoding='utf-8'?>\n"
4844543ef51SXin LI         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
4854543ef51SXin LI         "<do\xf0\x90\x80\x80/>";
4864543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4874543ef51SXin LI                  "4-byte UTF-8 character in element name not faulted");
4884543ef51SXin LI }
4894543ef51SXin LI END_TEST
4904543ef51SXin LI 
4914543ef51SXin LI /* Test that a long latin-1 attribute (too long to convert in one go)
4924543ef51SXin LI  * is correctly converted
4934543ef51SXin LI  */
4944543ef51SXin LI START_TEST(test_long_latin1_attribute) {
4954543ef51SXin LI   const char *text
4964543ef51SXin LI       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
4974543ef51SXin LI         "<doc att='"
4984543ef51SXin LI         /* 64 characters per line */
4994543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5004543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5014543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5024543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5034543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5044543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5054543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5064543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5074543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5084543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5094543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5104543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5114543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5124543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5134543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5144543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
5154543ef51SXin LI         /* Last character splits across a buffer boundary */
5164543ef51SXin LI         "\xe4'>\n</doc>";
5174543ef51SXin LI 
5184543ef51SXin LI   const XML_Char *expected =
5194543ef51SXin LI       /* 64 characters per line */
5204543ef51SXin LI       /* clang-format off */
5214543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5224543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5234543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5244543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5254543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5264543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5274543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5284543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5294543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5304543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5314543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5324543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5334543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5344543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5354543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5364543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
5374543ef51SXin LI   /* clang-format on */
5384543ef51SXin LI #ifdef XML_UNICODE
5394543ef51SXin LI                                                   XCS("\x00e4");
5404543ef51SXin LI #else
5414543ef51SXin LI                                                   XCS("\xc3\xa4");
5424543ef51SXin LI #endif
5434543ef51SXin LI 
5444543ef51SXin LI   run_attribute_check(text, expected);
5454543ef51SXin LI }
5464543ef51SXin LI END_TEST
5474543ef51SXin LI 
5484543ef51SXin LI /* Test that a long ASCII attribute (too long to convert in one go)
5494543ef51SXin LI  * is correctly converted
5504543ef51SXin LI  */
5514543ef51SXin LI START_TEST(test_long_ascii_attribute) {
5524543ef51SXin LI   const char *text
5534543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii'?>\n"
5544543ef51SXin LI         "<doc att='"
5554543ef51SXin LI         /* 64 characters per line */
5564543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5574543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5584543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5594543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5604543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5614543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5624543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5634543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5644543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5654543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5664543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5674543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5684543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5694543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5704543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5714543ef51SXin LI         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
5724543ef51SXin LI         "01234'>\n</doc>";
5734543ef51SXin LI   const XML_Char *expected =
5744543ef51SXin LI       /* 64 characters per line */
5754543ef51SXin LI       /* clang-format off */
5764543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5774543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5784543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5794543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5804543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5814543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5824543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5834543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5844543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5854543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5864543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5874543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5884543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5894543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5904543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5914543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
5924543ef51SXin LI         XCS("01234");
5934543ef51SXin LI   /* clang-format on */
5944543ef51SXin LI 
5954543ef51SXin LI   run_attribute_check(text, expected);
5964543ef51SXin LI }
5974543ef51SXin LI END_TEST
5984543ef51SXin LI 
5994543ef51SXin LI /* Regression test #1 for SF bug #653180. */
6004543ef51SXin LI START_TEST(test_line_number_after_parse) {
6014543ef51SXin LI   const char *text = "<tag>\n"
6024543ef51SXin LI                      "\n"
6034543ef51SXin LI                      "\n</tag>";
6044543ef51SXin LI   XML_Size lineno;
6054543ef51SXin LI 
6064543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6074543ef51SXin LI       == XML_STATUS_ERROR)
6084543ef51SXin LI     xml_failure(g_parser);
6094543ef51SXin LI   lineno = XML_GetCurrentLineNumber(g_parser);
6104543ef51SXin LI   if (lineno != 4) {
6114543ef51SXin LI     char buffer[100];
6124543ef51SXin LI     snprintf(buffer, sizeof(buffer),
6134543ef51SXin LI              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
6144543ef51SXin LI     fail(buffer);
6154543ef51SXin LI   }
6164543ef51SXin LI }
6174543ef51SXin LI END_TEST
6184543ef51SXin LI 
6194543ef51SXin LI /* Regression test #2 for SF bug #653180. */
6204543ef51SXin LI START_TEST(test_column_number_after_parse) {
6214543ef51SXin LI   const char *text = "<tag></tag>";
6224543ef51SXin LI   XML_Size colno;
6234543ef51SXin LI 
6244543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6254543ef51SXin LI       == XML_STATUS_ERROR)
6264543ef51SXin LI     xml_failure(g_parser);
6274543ef51SXin LI   colno = XML_GetCurrentColumnNumber(g_parser);
6284543ef51SXin LI   if (colno != 11) {
6294543ef51SXin LI     char buffer[100];
6304543ef51SXin LI     snprintf(buffer, sizeof(buffer),
6314543ef51SXin LI              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
6324543ef51SXin LI     fail(buffer);
6334543ef51SXin LI   }
6344543ef51SXin LI }
6354543ef51SXin LI END_TEST
6364543ef51SXin LI 
6374543ef51SXin LI /* Regression test #3 for SF bug #653180. */
6384543ef51SXin LI START_TEST(test_line_and_column_numbers_inside_handlers) {
6394543ef51SXin LI   const char *text = "<a>\n"      /* Unix end-of-line */
6404543ef51SXin LI                      "  <b>\r\n"  /* Windows end-of-line */
6414543ef51SXin LI                      "    <c/>\r" /* Mac OS end-of-line */
6424543ef51SXin LI                      "  </b>\n"
6434543ef51SXin LI                      "  <d>\n"
6444543ef51SXin LI                      "    <f/>\n"
6454543ef51SXin LI                      "  </d>\n"
6464543ef51SXin LI                      "</a>";
6474543ef51SXin LI   const StructDataEntry expected[]
6484543ef51SXin LI       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
6494543ef51SXin LI          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
6504543ef51SXin LI          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
6514543ef51SXin LI          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
6524543ef51SXin LI          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
6534543ef51SXin LI   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
6544543ef51SXin LI   StructData storage;
6554543ef51SXin LI 
6564543ef51SXin LI   StructData_Init(&storage);
6574543ef51SXin LI   XML_SetUserData(g_parser, &storage);
6584543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
6594543ef51SXin LI   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
6604543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6614543ef51SXin LI       == XML_STATUS_ERROR)
6624543ef51SXin LI     xml_failure(g_parser);
6634543ef51SXin LI 
6644543ef51SXin LI   StructData_CheckItems(&storage, expected, expected_count);
6654543ef51SXin LI   StructData_Dispose(&storage);
6664543ef51SXin LI }
6674543ef51SXin LI END_TEST
6684543ef51SXin LI 
6694543ef51SXin LI /* Regression test #4 for SF bug #653180. */
6704543ef51SXin LI START_TEST(test_line_number_after_error) {
6714543ef51SXin LI   const char *text = "<a>\n"
6724543ef51SXin LI                      "  <b>\n"
6734543ef51SXin LI                      "  </a>"; /* missing </b> */
6744543ef51SXin LI   XML_Size lineno;
6754543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6764543ef51SXin LI       != XML_STATUS_ERROR)
6774543ef51SXin LI     fail("Expected a parse error");
6784543ef51SXin LI 
6794543ef51SXin LI   lineno = XML_GetCurrentLineNumber(g_parser);
6804543ef51SXin LI   if (lineno != 3) {
6814543ef51SXin LI     char buffer[100];
6824543ef51SXin LI     snprintf(buffer, sizeof(buffer),
6834543ef51SXin LI              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
6844543ef51SXin LI     fail(buffer);
6854543ef51SXin LI   }
6864543ef51SXin LI }
6874543ef51SXin LI END_TEST
6884543ef51SXin LI 
6894543ef51SXin LI /* Regression test #5 for SF bug #653180. */
6904543ef51SXin LI START_TEST(test_column_number_after_error) {
6914543ef51SXin LI   const char *text = "<a>\n"
6924543ef51SXin LI                      "  <b>\n"
6934543ef51SXin LI                      "  </a>"; /* missing </b> */
6944543ef51SXin LI   XML_Size colno;
6954543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
6964543ef51SXin LI       != XML_STATUS_ERROR)
6974543ef51SXin LI     fail("Expected a parse error");
6984543ef51SXin LI 
6994543ef51SXin LI   colno = XML_GetCurrentColumnNumber(g_parser);
7004543ef51SXin LI   if (colno != 4) {
7014543ef51SXin LI     char buffer[100];
7024543ef51SXin LI     snprintf(buffer, sizeof(buffer),
7034543ef51SXin LI              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
7044543ef51SXin LI     fail(buffer);
7054543ef51SXin LI   }
7064543ef51SXin LI }
7074543ef51SXin LI END_TEST
7084543ef51SXin LI 
7094543ef51SXin LI /* Regression test for SF bug #478332. */
7104543ef51SXin LI START_TEST(test_really_long_lines) {
7114543ef51SXin LI   /* This parses an input line longer than INIT_DATA_BUF_SIZE
7124543ef51SXin LI      characters long (defined to be 1024 in xmlparse.c).  We take a
7134543ef51SXin LI      really cheesy approach to building the input buffer, because
7144543ef51SXin LI      this avoids writing bugs in buffer-filling code.
7154543ef51SXin LI   */
7164543ef51SXin LI   const char *text
7174543ef51SXin LI       = "<e>"
7184543ef51SXin LI         /* 64 chars */
7194543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7204543ef51SXin LI         /* until we have at least 1024 characters on the line: */
7214543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7224543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7234543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7244543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7254543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7264543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7274543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7284543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7294543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7304543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7314543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7324543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7334543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7344543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7354543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7364543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7374543ef51SXin LI         "</e>";
7384543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
7394543ef51SXin LI       == XML_STATUS_ERROR)
7404543ef51SXin LI     xml_failure(g_parser);
7414543ef51SXin LI }
7424543ef51SXin LI END_TEST
7434543ef51SXin LI 
7444543ef51SXin LI /* Test cdata processing across a buffer boundary */
7454543ef51SXin LI START_TEST(test_really_long_encoded_lines) {
7464543ef51SXin LI   /* As above, except that we want to provoke an output buffer
7474543ef51SXin LI    * overflow with a non-trivial encoding.  For this we need to pass
7484543ef51SXin LI    * the whole cdata in one go, not byte-by-byte.
7494543ef51SXin LI    */
7504543ef51SXin LI   void *buffer;
7514543ef51SXin LI   const char *text
7524543ef51SXin LI       = "<?xml version='1.0' encoding='iso-8859-1'?>"
7534543ef51SXin LI         "<e>"
7544543ef51SXin LI         /* 64 chars */
7554543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7564543ef51SXin LI         /* until we have at least 1024 characters on the line: */
7574543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7584543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7594543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7604543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7614543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7624543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7634543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7644543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7654543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7664543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7674543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7684543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7694543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7704543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7714543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7724543ef51SXin LI         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
7734543ef51SXin LI         "</e>";
7744543ef51SXin LI   int parse_len = (int)strlen(text);
7754543ef51SXin LI 
7764543ef51SXin LI   /* Need a cdata handler to provoke the code path we want to test */
7774543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
7784543ef51SXin LI   buffer = XML_GetBuffer(g_parser, parse_len);
7794543ef51SXin LI   if (buffer == NULL)
7804543ef51SXin LI     fail("Could not allocate parse buffer");
7814543ef51SXin LI   assert(buffer != NULL);
7824543ef51SXin LI   memcpy(buffer, text, parse_len);
7834543ef51SXin LI   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
7844543ef51SXin LI     xml_failure(g_parser);
7854543ef51SXin LI }
7864543ef51SXin LI END_TEST
7874543ef51SXin LI 
7884543ef51SXin LI /*
7894543ef51SXin LI  * Element event tests.
7904543ef51SXin LI  */
7914543ef51SXin LI 
7924543ef51SXin LI START_TEST(test_end_element_events) {
7934543ef51SXin LI   const char *text = "<a><b><c/></b><d><f/></d></a>";
7944543ef51SXin LI   const XML_Char *expected = XCS("/c/b/f/d/a");
7954543ef51SXin LI   CharData storage;
7964543ef51SXin LI 
7974543ef51SXin LI   CharData_Init(&storage);
7984543ef51SXin LI   XML_SetUserData(g_parser, &storage);
7994543ef51SXin LI   XML_SetEndElementHandler(g_parser, end_element_event_handler);
8004543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
8014543ef51SXin LI       == XML_STATUS_ERROR)
8024543ef51SXin LI     xml_failure(g_parser);
8034543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
8044543ef51SXin LI }
8054543ef51SXin LI END_TEST
8064543ef51SXin LI 
8074543ef51SXin LI /*
8084543ef51SXin LI  * Attribute tests.
8094543ef51SXin LI  */
8104543ef51SXin LI 
8114543ef51SXin LI /* Helper used by the following tests; this checks any "attr" and "refs"
8124543ef51SXin LI    attributes to make sure whitespace has been normalized.
8134543ef51SXin LI 
8144543ef51SXin LI    Return true if whitespace has been normalized in a string, using
8154543ef51SXin LI    the rules for attribute value normalization.  The 'is_cdata' flag
8164543ef51SXin LI    is needed since CDATA attributes don't need to have multiple
8174543ef51SXin LI    whitespace characters collapsed to a single space, while other
8184543ef51SXin LI    attribute data types do.  (Section 3.3.3 of the recommendation.)
8194543ef51SXin LI */
8204543ef51SXin LI static int
8214543ef51SXin LI is_whitespace_normalized(const XML_Char *s, int is_cdata) {
8224543ef51SXin LI   int blanks = 0;
8234543ef51SXin LI   int at_start = 1;
8244543ef51SXin LI   while (*s) {
8254543ef51SXin LI     if (*s == XCS(' '))
8264543ef51SXin LI       ++blanks;
8274543ef51SXin LI     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
8284543ef51SXin LI       return 0;
8294543ef51SXin LI     else {
8304543ef51SXin LI       if (at_start) {
8314543ef51SXin LI         at_start = 0;
8324543ef51SXin LI         if (blanks && ! is_cdata)
8334543ef51SXin LI           /* illegal leading blanks */
8344543ef51SXin LI           return 0;
8354543ef51SXin LI       } else if (blanks > 1 && ! is_cdata)
8364543ef51SXin LI         return 0;
8374543ef51SXin LI       blanks = 0;
8384543ef51SXin LI     }
8394543ef51SXin LI     ++s;
8404543ef51SXin LI   }
8414543ef51SXin LI   if (blanks && ! is_cdata)
8424543ef51SXin LI     return 0;
8434543ef51SXin LI   return 1;
8444543ef51SXin LI }
8454543ef51SXin LI 
8464543ef51SXin LI /* Check the attribute whitespace checker: */
8474543ef51SXin LI START_TEST(test_helper_is_whitespace_normalized) {
8484543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc"), 0));
8494543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc"), 1));
8504543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
8514543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
8524543ef51SXin LI   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
8534543ef51SXin LI   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
8544543ef51SXin LI   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
8554543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
8564543ef51SXin LI   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
8574543ef51SXin LI   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
8584543ef51SXin LI   assert(! is_whitespace_normalized(XCS(" "), 0));
8594543ef51SXin LI   assert(is_whitespace_normalized(XCS(" "), 1));
8604543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\t"), 0));
8614543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\t"), 1));
8624543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\n"), 0));
8634543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\n"), 1));
8644543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\r"), 0));
8654543ef51SXin LI   assert(! is_whitespace_normalized(XCS("\r"), 1));
8664543ef51SXin LI   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
8674543ef51SXin LI }
8684543ef51SXin LI END_TEST
8694543ef51SXin LI 
8704543ef51SXin LI static void XMLCALL
8714543ef51SXin LI check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
8724543ef51SXin LI                                           const XML_Char **atts) {
8734543ef51SXin LI   int i;
8744543ef51SXin LI   UNUSED_P(userData);
8754543ef51SXin LI   UNUSED_P(name);
8764543ef51SXin LI   for (i = 0; atts[i] != NULL; i += 2) {
8774543ef51SXin LI     const XML_Char *attrname = atts[i];
8784543ef51SXin LI     const XML_Char *value = atts[i + 1];
8794543ef51SXin LI     if (xcstrcmp(XCS("attr"), attrname) == 0
8804543ef51SXin LI         || xcstrcmp(XCS("ents"), attrname) == 0
8814543ef51SXin LI         || xcstrcmp(XCS("refs"), attrname) == 0) {
8824543ef51SXin LI       if (! is_whitespace_normalized(value, 0)) {
8834543ef51SXin LI         char buffer[256];
8844543ef51SXin LI         snprintf(buffer, sizeof(buffer),
8854543ef51SXin LI                  "attribute value not normalized: %" XML_FMT_STR
8864543ef51SXin LI                  "='%" XML_FMT_STR "'",
8874543ef51SXin LI                  attrname, value);
8884543ef51SXin LI         fail(buffer);
8894543ef51SXin LI       }
8904543ef51SXin LI     }
8914543ef51SXin LI   }
8924543ef51SXin LI }
8934543ef51SXin LI 
8944543ef51SXin LI START_TEST(test_attr_whitespace_normalization) {
8954543ef51SXin LI   const char *text
8964543ef51SXin LI       = "<!DOCTYPE doc [\n"
8974543ef51SXin LI         "  <!ATTLIST doc\n"
8984543ef51SXin LI         "            attr NMTOKENS #REQUIRED\n"
8994543ef51SXin LI         "            ents ENTITIES #REQUIRED\n"
9004543ef51SXin LI         "            refs IDREFS   #REQUIRED>\n"
9014543ef51SXin LI         "]>\n"
9024543ef51SXin LI         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
9034543ef51SXin LI         "     ents=' ent-1   \t\r\n"
9044543ef51SXin LI         "            ent-2  ' >\n"
9054543ef51SXin LI         "  <e id='id-1'/>\n"
9064543ef51SXin LI         "  <e id='id-2'/>\n"
9074543ef51SXin LI         "</doc>";
9084543ef51SXin LI 
9094543ef51SXin LI   XML_SetStartElementHandler(g_parser,
9104543ef51SXin LI                              check_attr_contains_normalized_whitespace);
9114543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
9124543ef51SXin LI       == XML_STATUS_ERROR)
9134543ef51SXin LI     xml_failure(g_parser);
9144543ef51SXin LI }
9154543ef51SXin LI END_TEST
9164543ef51SXin LI 
9174543ef51SXin LI /*
9184543ef51SXin LI  * XML declaration tests.
9194543ef51SXin LI  */
9204543ef51SXin LI 
9214543ef51SXin LI START_TEST(test_xmldecl_misplaced) {
9224543ef51SXin LI   expect_failure("\n"
9234543ef51SXin LI                  "<?xml version='1.0'?>\n"
9244543ef51SXin LI                  "<a/>",
9254543ef51SXin LI                  XML_ERROR_MISPLACED_XML_PI,
9264543ef51SXin LI                  "failed to report misplaced XML declaration");
9274543ef51SXin LI }
9284543ef51SXin LI END_TEST
9294543ef51SXin LI 
9304543ef51SXin LI START_TEST(test_xmldecl_invalid) {
9314543ef51SXin LI   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
9324543ef51SXin LI                  "Failed to report invalid XML declaration");
9334543ef51SXin LI }
9344543ef51SXin LI END_TEST
9354543ef51SXin LI 
9364543ef51SXin LI START_TEST(test_xmldecl_missing_attr) {
9374543ef51SXin LI   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
9384543ef51SXin LI                  "Failed to report missing XML declaration attribute");
9394543ef51SXin LI }
9404543ef51SXin LI END_TEST
9414543ef51SXin LI 
9424543ef51SXin LI START_TEST(test_xmldecl_missing_value) {
9434543ef51SXin LI   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
9444543ef51SXin LI                  "<doc/>",
9454543ef51SXin LI                  XML_ERROR_XML_DECL,
9464543ef51SXin LI                  "Failed to report missing attribute value");
9474543ef51SXin LI }
9484543ef51SXin LI END_TEST
9494543ef51SXin LI 
9504543ef51SXin LI /* Regression test for SF bug #584832. */
9514543ef51SXin LI START_TEST(test_unknown_encoding_internal_entity) {
9524543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
9534543ef51SXin LI                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
9544543ef51SXin LI                      "<test a='&foo;'/>";
9554543ef51SXin LI 
9564543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
9574543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
9584543ef51SXin LI       == XML_STATUS_ERROR)
9594543ef51SXin LI     xml_failure(g_parser);
9604543ef51SXin LI }
9614543ef51SXin LI END_TEST
9624543ef51SXin LI 
9634543ef51SXin LI /* Test unrecognised encoding handler */
9644543ef51SXin LI START_TEST(test_unrecognised_encoding_internal_entity) {
9654543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
9664543ef51SXin LI                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
9674543ef51SXin LI                      "<test a='&foo;'/>";
9684543ef51SXin LI 
9694543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
9704543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
9714543ef51SXin LI       != XML_STATUS_ERROR)
9724543ef51SXin LI     fail("Unrecognised encoding not rejected");
9734543ef51SXin LI }
9744543ef51SXin LI END_TEST
9754543ef51SXin LI 
9764543ef51SXin LI /* Regression test for SF bug #620106. */
9774543ef51SXin LI START_TEST(test_ext_entity_set_encoding) {
9784543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
9794543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
9804543ef51SXin LI                      "]>\n"
9814543ef51SXin LI                      "<doc>&en;</doc>";
9824543ef51SXin LI   ExtTest test_data
9834543ef51SXin LI       = {/* This text says it's an unsupported encoding, but it's really
9844543ef51SXin LI             UTF-8, which we tell Expat using XML_SetEncoding().
9854543ef51SXin LI          */
9864543ef51SXin LI          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
9874543ef51SXin LI #ifdef XML_UNICODE
9884543ef51SXin LI   const XML_Char *expected = XCS("\x00e9");
9894543ef51SXin LI #else
9904543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa9");
9914543ef51SXin LI #endif
9924543ef51SXin LI 
9934543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
9944543ef51SXin LI   run_ext_character_check(text, &test_data, expected);
9954543ef51SXin LI }
9964543ef51SXin LI END_TEST
9974543ef51SXin LI 
9984543ef51SXin LI /* Test external entities with no handler */
9994543ef51SXin LI START_TEST(test_ext_entity_no_handler) {
10004543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
10014543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
10024543ef51SXin LI                      "]>\n"
10034543ef51SXin LI                      "<doc>&en;</doc>";
10044543ef51SXin LI 
10054543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
10064543ef51SXin LI   run_character_check(text, XCS(""));
10074543ef51SXin LI }
10084543ef51SXin LI END_TEST
10094543ef51SXin LI 
10104543ef51SXin LI /* Test UTF-8 BOM is accepted */
10114543ef51SXin LI START_TEST(test_ext_entity_set_bom) {
10124543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
10134543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
10144543ef51SXin LI                      "]>\n"
10154543ef51SXin LI                      "<doc>&en;</doc>";
10164543ef51SXin LI   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
10174543ef51SXin LI                        "<?xml encoding='iso-8859-3'?>"
10184543ef51SXin LI                        "\xC3\xA9",
10194543ef51SXin LI                        XCS("utf-8"), NULL};
10204543ef51SXin LI #ifdef XML_UNICODE
10214543ef51SXin LI   const XML_Char *expected = XCS("\x00e9");
10224543ef51SXin LI #else
10234543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa9");
10244543ef51SXin LI #endif
10254543ef51SXin LI 
10264543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
10274543ef51SXin LI   run_ext_character_check(text, &test_data, expected);
10284543ef51SXin LI }
10294543ef51SXin LI END_TEST
10304543ef51SXin LI 
10314543ef51SXin LI /* Test that bad encodings are faulted */
10324543ef51SXin LI START_TEST(test_ext_entity_bad_encoding) {
10334543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
10344543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
10354543ef51SXin LI                      "]>\n"
10364543ef51SXin LI                      "<doc>&en;</doc>";
10374543ef51SXin LI   ExtFaults fault
10384543ef51SXin LI       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
10394543ef51SXin LI          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
10404543ef51SXin LI 
10414543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
10424543ef51SXin LI   XML_SetUserData(g_parser, &fault);
10434543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
10444543ef51SXin LI                  "Bad encoding should not have been accepted");
10454543ef51SXin LI }
10464543ef51SXin LI END_TEST
10474543ef51SXin LI 
10484543ef51SXin LI /* Try handing an invalid encoding to an external entity parser */
10494543ef51SXin LI START_TEST(test_ext_entity_bad_encoding_2) {
10504543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
10514543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
10524543ef51SXin LI                      "<doc>&entity;</doc>";
10534543ef51SXin LI   ExtFaults fault
10544543ef51SXin LI       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
10554543ef51SXin LI          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
10564543ef51SXin LI 
10574543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
10584543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
10594543ef51SXin LI   XML_SetUserData(g_parser, &fault);
10604543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
10614543ef51SXin LI                  "Bad encoding not faulted in external entity handler");
10624543ef51SXin LI }
10634543ef51SXin LI END_TEST
10644543ef51SXin LI 
10654543ef51SXin LI /* Test that no error is reported for unknown entities if we don't
10664543ef51SXin LI    read an external subset.  This was fixed in Expat 1.95.5.
10674543ef51SXin LI */
10684543ef51SXin LI START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
10694543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
10704543ef51SXin LI                      "<doc>&entity;</doc>";
10714543ef51SXin LI 
10724543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
10734543ef51SXin LI       == XML_STATUS_ERROR)
10744543ef51SXin LI     xml_failure(g_parser);
10754543ef51SXin LI }
10764543ef51SXin LI END_TEST
10774543ef51SXin LI 
10784543ef51SXin LI /* Test that an error is reported for unknown entities if we don't
10794543ef51SXin LI    have an external subset.
10804543ef51SXin LI */
10814543ef51SXin LI START_TEST(test_wfc_undeclared_entity_no_external_subset) {
10824543ef51SXin LI   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
10834543ef51SXin LI                  "Parser did not report undefined entity w/out a DTD.");
10844543ef51SXin LI }
10854543ef51SXin LI END_TEST
10864543ef51SXin LI 
10874543ef51SXin LI /* Test that an error is reported for unknown entities if we don't
10884543ef51SXin LI    read an external subset, but have been declared standalone.
10894543ef51SXin LI */
10904543ef51SXin LI START_TEST(test_wfc_undeclared_entity_standalone) {
10914543ef51SXin LI   const char *text
10924543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
10934543ef51SXin LI         "<!DOCTYPE doc SYSTEM 'foo'>\n"
10944543ef51SXin LI         "<doc>&entity;</doc>";
10954543ef51SXin LI 
10964543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
10974543ef51SXin LI                  "Parser did not report undefined entity (standalone).");
10984543ef51SXin LI }
10994543ef51SXin LI END_TEST
11004543ef51SXin LI 
11014543ef51SXin LI /* Test that an error is reported for unknown entities if we have read
11024543ef51SXin LI    an external subset, and standalone is true.
11034543ef51SXin LI */
11044543ef51SXin LI START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
11054543ef51SXin LI   const char *text
11064543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
11074543ef51SXin LI         "<!DOCTYPE doc SYSTEM 'foo'>\n"
11084543ef51SXin LI         "<doc>&entity;</doc>";
11094543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
11104543ef51SXin LI 
11114543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
11124543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
11134543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
11144543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
11154543ef51SXin LI                  "Parser did not report undefined entity (external DTD).");
11164543ef51SXin LI }
11174543ef51SXin LI END_TEST
11184543ef51SXin LI 
11194543ef51SXin LI /* Test that external entity handling is not done if the parsing flag
11204543ef51SXin LI  * is set to UNLESS_STANDALONE
11214543ef51SXin LI  */
11224543ef51SXin LI START_TEST(test_entity_with_external_subset_unless_standalone) {
11234543ef51SXin LI   const char *text
11244543ef51SXin LI       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
11254543ef51SXin LI         "<!DOCTYPE doc SYSTEM 'foo'>\n"
11264543ef51SXin LI         "<doc>&entity;</doc>";
11274543ef51SXin LI   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
11284543ef51SXin LI 
11294543ef51SXin LI   XML_SetParamEntityParsing(g_parser,
11304543ef51SXin LI                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
11314543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
11324543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
11334543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
11344543ef51SXin LI                  "Parser did not report undefined entity");
11354543ef51SXin LI }
11364543ef51SXin LI END_TEST
11374543ef51SXin LI 
11384543ef51SXin LI /* Test that no error is reported for unknown entities if we have read
11394543ef51SXin LI    an external subset, and standalone is false.
11404543ef51SXin LI */
11414543ef51SXin LI START_TEST(test_wfc_undeclared_entity_with_external_subset) {
11424543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
11434543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
11444543ef51SXin LI                      "<doc>&entity;</doc>";
11454543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
11464543ef51SXin LI 
11474543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
11484543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
11494543ef51SXin LI   run_ext_character_check(text, &test_data, XCS(""));
11504543ef51SXin LI }
11514543ef51SXin LI END_TEST
11524543ef51SXin LI 
11534543ef51SXin LI /* Test that an error is reported if our NotStandalone handler fails */
11544543ef51SXin LI START_TEST(test_not_standalone_handler_reject) {
11554543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
11564543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
11574543ef51SXin LI                      "<doc>&entity;</doc>";
11584543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
11594543ef51SXin LI 
11604543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
11614543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
11624543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
11634543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
11644543ef51SXin LI   expect_failure(text, XML_ERROR_NOT_STANDALONE,
11654543ef51SXin LI                  "NotStandalone handler failed to reject");
11664543ef51SXin LI 
11674543ef51SXin LI   /* Try again but without external entity handling */
11684543ef51SXin LI   XML_ParserReset(g_parser, NULL);
11694543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
11704543ef51SXin LI   expect_failure(text, XML_ERROR_NOT_STANDALONE,
11714543ef51SXin LI                  "NotStandalone handler failed to reject");
11724543ef51SXin LI }
11734543ef51SXin LI END_TEST
11744543ef51SXin LI 
11754543ef51SXin LI /* Test that no error is reported if our NotStandalone handler succeeds */
11764543ef51SXin LI START_TEST(test_not_standalone_handler_accept) {
11774543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
11784543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
11794543ef51SXin LI                      "<doc>&entity;</doc>";
11804543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
11814543ef51SXin LI 
11824543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
11834543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
11844543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
11854543ef51SXin LI   run_ext_character_check(text, &test_data, XCS(""));
11864543ef51SXin LI 
11874543ef51SXin LI   /* Repeat without the external entity handler */
11884543ef51SXin LI   XML_ParserReset(g_parser, NULL);
11894543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
11904543ef51SXin LI   run_character_check(text, XCS(""));
11914543ef51SXin LI }
11924543ef51SXin LI END_TEST
11934543ef51SXin LI 
11944543ef51SXin LI START_TEST(test_wfc_no_recursive_entity_refs) {
11954543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
11964543ef51SXin LI                      "  <!ENTITY entity '&#38;entity;'>\n"
11974543ef51SXin LI                      "]>\n"
11984543ef51SXin LI                      "<doc>&entity;</doc>";
11994543ef51SXin LI 
12004543ef51SXin LI   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
12014543ef51SXin LI                  "Parser did not report recursive entity reference.");
12024543ef51SXin LI }
12034543ef51SXin LI END_TEST
12044543ef51SXin LI 
1205ffd294a1SEnji Cooper START_TEST(test_recursive_external_parameter_entity_2) {
1206ffd294a1SEnji Cooper   struct TestCase {
1207ffd294a1SEnji Cooper     const char *doc;
1208ffd294a1SEnji Cooper     enum XML_Status expectedStatus;
1209ffd294a1SEnji Cooper   };
1210ffd294a1SEnji Cooper 
1211ffd294a1SEnji Cooper   struct TestCase cases[] = {
1212ffd294a1SEnji Cooper       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213ffd294a1SEnji Cooper       {"<!ENTITY % p1 '%p1;'>"
1214ffd294a1SEnji Cooper        "<!ENTITY % p1 'first declaration wins'>",
1215ffd294a1SEnji Cooper        XML_STATUS_ERROR},
1216ffd294a1SEnji Cooper       {"<!ENTITY % p1 'first declaration wins'>"
1217ffd294a1SEnji Cooper        "<!ENTITY % p1 '%p1;'>",
1218ffd294a1SEnji Cooper        XML_STATUS_OK},
1219ffd294a1SEnji Cooper       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1220ffd294a1SEnji Cooper   };
1221ffd294a1SEnji Cooper 
1222ffd294a1SEnji Cooper   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223ffd294a1SEnji Cooper     const char *const doc = cases[i].doc;
1224ffd294a1SEnji Cooper     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225ffd294a1SEnji Cooper     set_subtest("%s", doc);
1226ffd294a1SEnji Cooper 
1227ffd294a1SEnji Cooper     XML_Parser parser = XML_ParserCreate(NULL);
1228ffd294a1SEnji Cooper     assert_true(parser != NULL);
1229ffd294a1SEnji Cooper 
1230ffd294a1SEnji Cooper     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231ffd294a1SEnji Cooper     assert_true(ext_parser != NULL);
1232ffd294a1SEnji Cooper 
1233ffd294a1SEnji Cooper     const enum XML_Status actualStatus
1234ffd294a1SEnji Cooper         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235ffd294a1SEnji Cooper 
1236ffd294a1SEnji Cooper     assert_true(actualStatus == expectedStatus);
1237ffd294a1SEnji Cooper     if (actualStatus != XML_STATUS_OK) {
1238ffd294a1SEnji Cooper       assert_true(XML_GetErrorCode(ext_parser)
1239ffd294a1SEnji Cooper                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1240ffd294a1SEnji Cooper     }
1241ffd294a1SEnji Cooper 
1242ffd294a1SEnji Cooper     XML_ParserFree(ext_parser);
1243ffd294a1SEnji Cooper     XML_ParserFree(parser);
1244ffd294a1SEnji Cooper   }
1245ffd294a1SEnji Cooper }
1246ffd294a1SEnji Cooper END_TEST
1247ffd294a1SEnji Cooper 
12484543ef51SXin LI /* Test incomplete external entities are faulted */
12494543ef51SXin LI START_TEST(test_ext_entity_invalid_parse) {
12504543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
12514543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
12524543ef51SXin LI                      "]>\n"
12534543ef51SXin LI                      "<doc>&en;</doc>";
12544543ef51SXin LI   const ExtFaults faults[]
12554543ef51SXin LI       = {{"<", "Incomplete element declaration not faulted", NULL,
12564543ef51SXin LI           XML_ERROR_UNCLOSED_TOKEN},
12574543ef51SXin LI          {"<\xe2\x82", /* First two bytes of a three-byte char */
12584543ef51SXin LI           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
12594543ef51SXin LI          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
12604543ef51SXin LI           XML_ERROR_PARTIAL_CHAR},
12614543ef51SXin LI          {NULL, NULL, NULL, XML_ERROR_NONE}};
12624543ef51SXin LI   const ExtFaults *fault = faults;
12634543ef51SXin LI 
12644543ef51SXin LI   for (; fault->parse_text != NULL; fault++) {
12654543ef51SXin LI     set_subtest("\"%s\"", fault->parse_text);
12664543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
12674543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
12684543ef51SXin LI     XML_SetUserData(g_parser, (void *)fault);
12694543ef51SXin LI     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
12704543ef51SXin LI                    "Parser did not report external entity error");
12714543ef51SXin LI     XML_ParserReset(g_parser, NULL);
12724543ef51SXin LI   }
12734543ef51SXin LI }
12744543ef51SXin LI END_TEST
12754543ef51SXin LI 
12764543ef51SXin LI /* Regression test for SF bug #483514. */
12774543ef51SXin LI START_TEST(test_dtd_default_handling) {
12784543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
12794543ef51SXin LI                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
12804543ef51SXin LI                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
12814543ef51SXin LI                      "<!ELEMENT doc EMPTY>\n"
12824543ef51SXin LI                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
12834543ef51SXin LI                      "<?pi in dtd?>\n"
12844543ef51SXin LI                      "<!--comment in dtd-->\n"
12854543ef51SXin LI                      "]><doc/>";
12864543ef51SXin LI 
12874543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
12884543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
12894543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
12904543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
12914543ef51SXin LI   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
12924543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
12934543ef51SXin LI   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
12944543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
12954543ef51SXin LI   XML_SetCommentHandler(g_parser, dummy_comment_handler);
12964543ef51SXin LI   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
12974543ef51SXin LI   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
12984543ef51SXin LI   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
12994543ef51SXin LI }
13004543ef51SXin LI END_TEST
13014543ef51SXin LI 
13024543ef51SXin LI /* Test handling of attribute declarations */
13034543ef51SXin LI START_TEST(test_dtd_attr_handling) {
13044543ef51SXin LI   const char *prolog = "<!DOCTYPE doc [\n"
13054543ef51SXin LI                        "<!ELEMENT doc EMPTY>\n";
13064543ef51SXin LI   AttTest attr_data[]
13074543ef51SXin LI       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
13084543ef51SXin LI           "]>"
13094543ef51SXin LI           "<doc a='two'/>",
13104543ef51SXin LI           XCS("doc"), XCS("a"),
13114543ef51SXin LI           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
13124543ef51SXin LI           NULL, XML_TRUE},
13134543ef51SXin LI          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
13144543ef51SXin LI           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
13154543ef51SXin LI           "]>"
13164543ef51SXin LI           "<doc/>",
13174543ef51SXin LI           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
13184543ef51SXin LI          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
13194543ef51SXin LI           "]>"
13204543ef51SXin LI           "<doc/>",
13214543ef51SXin LI           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
13224543ef51SXin LI          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
13234543ef51SXin LI           "]>"
13244543ef51SXin LI           "<doc/>",
13254543ef51SXin LI           XCS("doc"), XCS("a"), XCS("CDATA"),
13264543ef51SXin LI #ifdef XML_UNICODE
13274543ef51SXin LI           XCS("\x06f2"),
13284543ef51SXin LI #else
13294543ef51SXin LI           XCS("\xdb\xb2"),
13304543ef51SXin LI #endif
13314543ef51SXin LI           XML_FALSE},
13324543ef51SXin LI          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
13334543ef51SXin LI   AttTest *test;
13344543ef51SXin LI 
13354543ef51SXin LI   for (test = attr_data; test->definition != NULL; test++) {
13364543ef51SXin LI     set_subtest("%s", test->definition);
13374543ef51SXin LI     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
13384543ef51SXin LI     XML_SetUserData(g_parser, test);
13394543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
13404543ef51SXin LI                                 XML_FALSE)
13414543ef51SXin LI         == XML_STATUS_ERROR)
13424543ef51SXin LI       xml_failure(g_parser);
13434543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
13444543ef51SXin LI                                 (int)strlen(test->definition), XML_TRUE)
13454543ef51SXin LI         == XML_STATUS_ERROR)
13464543ef51SXin LI       xml_failure(g_parser);
13474543ef51SXin LI     XML_ParserReset(g_parser, NULL);
13484543ef51SXin LI   }
13494543ef51SXin LI }
13504543ef51SXin LI END_TEST
13514543ef51SXin LI 
13524543ef51SXin LI /* See related SF bug #673791.
13534543ef51SXin LI    When namespace processing is enabled, setting the namespace URI for
13544543ef51SXin LI    a prefix is not allowed; this test ensures that it *is* allowed
13554543ef51SXin LI    when namespace processing is not enabled.
13564543ef51SXin LI    (See Namespaces in XML, section 2.)
13574543ef51SXin LI */
13584543ef51SXin LI START_TEST(test_empty_ns_without_namespaces) {
13594543ef51SXin LI   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
13604543ef51SXin LI                      "  <e xmlns:prefix=''/>\n"
13614543ef51SXin LI                      "</doc>";
13624543ef51SXin LI 
13634543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
13644543ef51SXin LI       == XML_STATUS_ERROR)
13654543ef51SXin LI     xml_failure(g_parser);
13664543ef51SXin LI }
13674543ef51SXin LI END_TEST
13684543ef51SXin LI 
13694543ef51SXin LI /* Regression test for SF bug #824420.
13704543ef51SXin LI    Checks that an xmlns:prefix attribute set in an attribute's default
13714543ef51SXin LI    value isn't misinterpreted.
13724543ef51SXin LI */
13734543ef51SXin LI START_TEST(test_ns_in_attribute_default_without_namespaces) {
13744543ef51SXin LI   const char *text = "<!DOCTYPE e:element [\n"
13754543ef51SXin LI                      "  <!ATTLIST e:element\n"
13764543ef51SXin LI                      "    xmlns:e CDATA 'http://example.org/'>\n"
13774543ef51SXin LI                      "      ]>\n"
13784543ef51SXin LI                      "<e:element/>";
13794543ef51SXin LI 
13804543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
13814543ef51SXin LI       == XML_STATUS_ERROR)
13824543ef51SXin LI     xml_failure(g_parser);
13834543ef51SXin LI }
13844543ef51SXin LI END_TEST
13854543ef51SXin LI 
13864543ef51SXin LI /* Regression test for SF bug #1515266: missing check of stopped
13874543ef51SXin LI    parser in doContext() 'for' loop. */
13884543ef51SXin LI START_TEST(test_stop_parser_between_char_data_calls) {
13894543ef51SXin LI   /* The sample data must be big enough that there are two calls to
13904543ef51SXin LI      the character data handler from within the inner "for" loop of
13914543ef51SXin LI      the XML_TOK_DATA_CHARS case in doContent(), and the character
13924543ef51SXin LI      handler must stop the parser and clear the character data
13934543ef51SXin LI      handler.
13944543ef51SXin LI   */
13954543ef51SXin LI   const char *text = long_character_data_text;
13964543ef51SXin LI 
13974543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
13984543ef51SXin LI   g_resumable = XML_FALSE;
13994543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14004543ef51SXin LI       != XML_STATUS_ERROR)
14014543ef51SXin LI     xml_failure(g_parser);
14024543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
14034543ef51SXin LI     xml_failure(g_parser);
14044543ef51SXin LI }
14054543ef51SXin LI END_TEST
14064543ef51SXin LI 
14074543ef51SXin LI /* Regression test for SF bug #1515266: missing check of stopped
14084543ef51SXin LI    parser in doContext() 'for' loop. */
14094543ef51SXin LI START_TEST(test_suspend_parser_between_char_data_calls) {
14104543ef51SXin LI   /* The sample data must be big enough that there are two calls to
14114543ef51SXin LI      the character data handler from within the inner "for" loop of
14124543ef51SXin LI      the XML_TOK_DATA_CHARS case in doContent(), and the character
14134543ef51SXin LI      handler must stop the parser and clear the character data
14144543ef51SXin LI      handler.
14154543ef51SXin LI   */
14164543ef51SXin LI   const char *text = long_character_data_text;
14174543ef51SXin LI 
14184543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
14194543ef51SXin LI   g_resumable = XML_TRUE;
14204543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14214543ef51SXin LI       != XML_STATUS_SUSPENDED)
14224543ef51SXin LI     xml_failure(g_parser);
14234543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
14244543ef51SXin LI     xml_failure(g_parser);
14254543ef51SXin LI   /* Try parsing directly */
14264543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14274543ef51SXin LI       != XML_STATUS_ERROR)
14284543ef51SXin LI     fail("Attempt to continue parse while suspended not faulted");
14294543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
14304543ef51SXin LI     fail("Suspended parse not faulted with correct error");
14314543ef51SXin LI }
14324543ef51SXin LI END_TEST
14334543ef51SXin LI 
14344543ef51SXin LI /* Test repeated calls to XML_StopParser are handled correctly */
14354543ef51SXin LI START_TEST(test_repeated_stop_parser_between_char_data_calls) {
14364543ef51SXin LI   const char *text = long_character_data_text;
14374543ef51SXin LI 
14384543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
14394543ef51SXin LI   g_resumable = XML_FALSE;
14404543ef51SXin LI   g_abortable = XML_FALSE;
14414543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14424543ef51SXin LI       != XML_STATUS_ERROR)
14434543ef51SXin LI     fail("Failed to double-stop parser");
14444543ef51SXin LI 
14454543ef51SXin LI   XML_ParserReset(g_parser, NULL);
14464543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
14474543ef51SXin LI   g_resumable = XML_TRUE;
14484543ef51SXin LI   g_abortable = XML_FALSE;
14494543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14504543ef51SXin LI       != XML_STATUS_SUSPENDED)
14514543ef51SXin LI     fail("Failed to double-suspend parser");
14524543ef51SXin LI 
14534543ef51SXin LI   XML_ParserReset(g_parser, NULL);
14544543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
14554543ef51SXin LI   g_resumable = XML_TRUE;
14564543ef51SXin LI   g_abortable = XML_TRUE;
14574543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14584543ef51SXin LI       != XML_STATUS_ERROR)
14594543ef51SXin LI     fail("Failed to suspend-abort parser");
14604543ef51SXin LI }
14614543ef51SXin LI END_TEST
14624543ef51SXin LI 
14634543ef51SXin LI START_TEST(test_good_cdata_ascii) {
14644543ef51SXin LI   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
14654543ef51SXin LI   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
14664543ef51SXin LI 
14674543ef51SXin LI   CharData storage;
14684543ef51SXin LI   CharData_Init(&storage);
14694543ef51SXin LI   XML_SetUserData(g_parser, &storage);
14704543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
14714543ef51SXin LI   /* Add start and end handlers for coverage */
14724543ef51SXin LI   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
14734543ef51SXin LI   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
14744543ef51SXin LI 
14754543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14764543ef51SXin LI       == XML_STATUS_ERROR)
14774543ef51SXin LI     xml_failure(g_parser);
14784543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
14794543ef51SXin LI 
14804543ef51SXin LI   /* Try again, this time with a default handler */
14814543ef51SXin LI   XML_ParserReset(g_parser, NULL);
14824543ef51SXin LI   CharData_Init(&storage);
14834543ef51SXin LI   XML_SetUserData(g_parser, &storage);
14844543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
14854543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
14864543ef51SXin LI 
14874543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
14884543ef51SXin LI       == XML_STATUS_ERROR)
14894543ef51SXin LI     xml_failure(g_parser);
14904543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
14914543ef51SXin LI }
14924543ef51SXin LI END_TEST
14934543ef51SXin LI 
14944543ef51SXin LI START_TEST(test_good_cdata_utf16) {
14954543ef51SXin LI   /* Test data is:
14964543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
14974543ef51SXin LI    *   <a><![CDATA[hello]]></a>
14984543ef51SXin LI    */
14994543ef51SXin LI   const char text[]
15004543ef51SXin LI       = "\0<\0?\0x\0m\0l\0"
15014543ef51SXin LI         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
15024543ef51SXin LI         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
15034543ef51SXin LI         "1\0"
15044543ef51SXin LI         "6\0'"
15054543ef51SXin LI         "\0?\0>\0\n"
15064543ef51SXin LI         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
15074543ef51SXin LI   const XML_Char *expected = XCS("hello");
15084543ef51SXin LI 
15094543ef51SXin LI   CharData storage;
15104543ef51SXin LI   CharData_Init(&storage);
15114543ef51SXin LI   XML_SetUserData(g_parser, &storage);
15124543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
15134543ef51SXin LI 
15144543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
15154543ef51SXin LI       == XML_STATUS_ERROR)
15164543ef51SXin LI     xml_failure(g_parser);
15174543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
15184543ef51SXin LI }
15194543ef51SXin LI END_TEST
15204543ef51SXin LI 
15214543ef51SXin LI START_TEST(test_good_cdata_utf16_le) {
15224543ef51SXin LI   /* Test data is:
15234543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
15244543ef51SXin LI    *   <a><![CDATA[hello]]></a>
15254543ef51SXin LI    */
15264543ef51SXin LI   const char text[]
15274543ef51SXin LI       = "<\0?\0x\0m\0l\0"
15284543ef51SXin LI         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
15294543ef51SXin LI         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
15304543ef51SXin LI         "1\0"
15314543ef51SXin LI         "6\0'"
15324543ef51SXin LI         "\0?\0>\0\n"
15334543ef51SXin LI         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
15344543ef51SXin LI   const XML_Char *expected = XCS("hello");
15354543ef51SXin LI 
15364543ef51SXin LI   CharData storage;
15374543ef51SXin LI   CharData_Init(&storage);
15384543ef51SXin LI   XML_SetUserData(g_parser, &storage);
15394543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
15404543ef51SXin LI 
15414543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
15424543ef51SXin LI       == XML_STATUS_ERROR)
15434543ef51SXin LI     xml_failure(g_parser);
15444543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
15454543ef51SXin LI }
15464543ef51SXin LI END_TEST
15474543ef51SXin LI 
15484543ef51SXin LI /* Test UTF16 conversion of a long cdata string */
15494543ef51SXin LI 
15504543ef51SXin LI /* 16 characters: handy macro to reduce visual clutter */
15514543ef51SXin LI #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
15524543ef51SXin LI 
15534543ef51SXin LI START_TEST(test_long_cdata_utf16) {
15544543ef51SXin LI   /* Test data is:
15554543ef51SXin LI    * <?xlm version='1.0' encoding='utf-16'?>
15564543ef51SXin LI    * <a><![CDATA[
15574543ef51SXin LI    * ABCDEFGHIJKLMNOP
15584543ef51SXin LI    * ]]></a>
15594543ef51SXin LI    */
15604543ef51SXin LI   const char text[]
15614543ef51SXin LI       = "\0<\0?\0x\0m\0l\0 "
15624543ef51SXin LI         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
15634543ef51SXin LI         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
15644543ef51SXin LI         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
15654543ef51SXin LI       /* 64 characters per line */
15664543ef51SXin LI       /* clang-format off */
15674543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15684543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15694543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15704543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15714543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15724543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15734543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15744543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15754543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15764543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15774543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15784543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15794543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15804543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15814543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15824543ef51SXin LI         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
15834543ef51SXin LI         A_TO_P_IN_UTF16
15844543ef51SXin LI         /* clang-format on */
15854543ef51SXin LI         "\0]\0]\0>\0<\0/\0a\0>";
15864543ef51SXin LI   const XML_Char *expected =
15874543ef51SXin LI       /* clang-format off */
15884543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15894543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15904543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15914543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15924543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15934543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15944543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15954543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15964543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15974543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15984543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
15994543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
16004543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
16014543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
16024543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
16034543ef51SXin LI         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
16044543ef51SXin LI         XCS("ABCDEFGHIJKLMNOP");
16054543ef51SXin LI   /* clang-format on */
16064543ef51SXin LI   CharData storage;
16074543ef51SXin LI   void *buffer;
16084543ef51SXin LI 
16094543ef51SXin LI   CharData_Init(&storage);
16104543ef51SXin LI   XML_SetUserData(g_parser, &storage);
16114543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
16124543ef51SXin LI   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
16134543ef51SXin LI   if (buffer == NULL)
16144543ef51SXin LI     fail("Could not allocate parse buffer");
16154543ef51SXin LI   assert(buffer != NULL);
16164543ef51SXin LI   memcpy(buffer, text, sizeof(text) - 1);
16174543ef51SXin LI   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
16184543ef51SXin LI     xml_failure(g_parser);
16194543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
16204543ef51SXin LI }
16214543ef51SXin LI END_TEST
16224543ef51SXin LI 
16234543ef51SXin LI /* Test handling of multiple unit UTF-16 characters */
16244543ef51SXin LI START_TEST(test_multichar_cdata_utf16) {
16254543ef51SXin LI   /* Test data is:
16264543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
16274543ef51SXin LI    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
16284543ef51SXin LI    *
16294543ef51SXin LI    * where {MINIM} is U+1d15e (a minim or half-note)
16304543ef51SXin LI    *   UTF-16: 0xd834 0xdd5e
16314543ef51SXin LI    *   UTF-8:  0xf0 0x9d 0x85 0x9e
16324543ef51SXin LI    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
16334543ef51SXin LI    *   UTF-16: 0xd834 0xdd5f
16344543ef51SXin LI    *   UTF-8:  0xf0 0x9d 0x85 0x9f
16354543ef51SXin LI    */
16364543ef51SXin LI   const char text[] = "\0<\0?\0x\0m\0l\0"
16374543ef51SXin LI                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
16384543ef51SXin LI                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
16394543ef51SXin LI                       "1\0"
16404543ef51SXin LI                       "6\0'"
16414543ef51SXin LI                       "\0?\0>\0\n"
16424543ef51SXin LI                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
16434543ef51SXin LI                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
16444543ef51SXin LI                       "\0]\0]\0>\0<\0/\0a\0>";
16454543ef51SXin LI #ifdef XML_UNICODE
16464543ef51SXin LI   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
16474543ef51SXin LI #else
16484543ef51SXin LI   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
16494543ef51SXin LI #endif
16504543ef51SXin LI   CharData storage;
16514543ef51SXin LI 
16524543ef51SXin LI   CharData_Init(&storage);
16534543ef51SXin LI   XML_SetUserData(g_parser, &storage);
16544543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
16554543ef51SXin LI 
16564543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
16574543ef51SXin LI       == XML_STATUS_ERROR)
16584543ef51SXin LI     xml_failure(g_parser);
16594543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
16604543ef51SXin LI }
16614543ef51SXin LI END_TEST
16624543ef51SXin LI 
16634543ef51SXin LI /* Test that an element name with a UTF-16 surrogate pair is rejected */
16644543ef51SXin LI START_TEST(test_utf16_bad_surrogate_pair) {
16654543ef51SXin LI   /* Test data is:
16664543ef51SXin LI    *   <?xml version='1.0' encoding='utf-16'?>
16674543ef51SXin LI    *   <a><![CDATA[{BADLINB}]]></a>
16684543ef51SXin LI    *
16694543ef51SXin LI    * where {BADLINB} is U+10000 (the first Linear B character)
16704543ef51SXin LI    * with the UTF-16 surrogate pair in the wrong order, i.e.
16714543ef51SXin LI    *   0xdc00 0xd800
16724543ef51SXin LI    */
16734543ef51SXin LI   const char text[] = "\0<\0?\0x\0m\0l\0"
16744543ef51SXin LI                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
16754543ef51SXin LI                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
16764543ef51SXin LI                       "1\0"
16774543ef51SXin LI                       "6\0'"
16784543ef51SXin LI                       "\0?\0>\0\n"
16794543ef51SXin LI                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
16804543ef51SXin LI                       "\xdc\x00\xd8\x00"
16814543ef51SXin LI                       "\0]\0]\0>\0<\0/\0a\0>";
16824543ef51SXin LI 
16834543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
16844543ef51SXin LI       != XML_STATUS_ERROR)
16854543ef51SXin LI     fail("Reversed UTF-16 surrogate pair not faulted");
16864543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
16874543ef51SXin LI     xml_failure(g_parser);
16884543ef51SXin LI }
16894543ef51SXin LI END_TEST
16904543ef51SXin LI 
16914543ef51SXin LI START_TEST(test_bad_cdata) {
16924543ef51SXin LI   struct CaseData {
16934543ef51SXin LI     const char *text;
16944543ef51SXin LI     enum XML_Error expectedError;
16954543ef51SXin LI   };
16964543ef51SXin LI 
16974543ef51SXin LI   struct CaseData cases[]
16984543ef51SXin LI       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
16994543ef51SXin LI          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
17004543ef51SXin LI          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
17014543ef51SXin LI          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
17024543ef51SXin LI          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
17034543ef51SXin LI          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
17044543ef51SXin LI          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
17054543ef51SXin LI          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
17064543ef51SXin LI 
17074543ef51SXin LI          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
17084543ef51SXin LI          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
17094543ef51SXin LI          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
17104543ef51SXin LI 
17114543ef51SXin LI          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
17124543ef51SXin LI          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
17134543ef51SXin LI          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
17144543ef51SXin LI          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
17154543ef51SXin LI          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
17164543ef51SXin LI          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
17174543ef51SXin LI          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
17184543ef51SXin LI 
17194543ef51SXin LI          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
17204543ef51SXin LI          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
17214543ef51SXin LI          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
17224543ef51SXin LI 
17234543ef51SXin LI   size_t i = 0;
17244543ef51SXin LI   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
17254543ef51SXin LI     set_subtest("%s", cases[i].text);
17264543ef51SXin LI     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
17274543ef51SXin LI         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
17284543ef51SXin LI     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
17294543ef51SXin LI 
17304543ef51SXin LI     assert(actualStatus == XML_STATUS_ERROR);
17314543ef51SXin LI 
17324543ef51SXin LI     if (actualError != cases[i].expectedError) {
17334543ef51SXin LI       char message[100];
17344543ef51SXin LI       snprintf(message, sizeof(message),
17354543ef51SXin LI                "Expected error %d but got error %d for case %u: \"%s\"\n",
17364543ef51SXin LI                cases[i].expectedError, actualError, (unsigned int)i + 1,
17374543ef51SXin LI                cases[i].text);
17384543ef51SXin LI       fail(message);
17394543ef51SXin LI     }
17404543ef51SXin LI 
17414543ef51SXin LI     XML_ParserReset(g_parser, NULL);
17424543ef51SXin LI   }
17434543ef51SXin LI }
17444543ef51SXin LI END_TEST
17454543ef51SXin LI 
17464543ef51SXin LI /* Test failures in UTF-16 CDATA */
17474543ef51SXin LI START_TEST(test_bad_cdata_utf16) {
17484543ef51SXin LI   struct CaseData {
17494543ef51SXin LI     size_t text_bytes;
17504543ef51SXin LI     const char *text;
17514543ef51SXin LI     enum XML_Error expected_error;
17524543ef51SXin LI   };
17534543ef51SXin LI 
17544543ef51SXin LI   const char prolog[] = "\0<\0?\0x\0m\0l\0"
17554543ef51SXin LI                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
17564543ef51SXin LI                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
17574543ef51SXin LI                         "1\0"
17584543ef51SXin LI                         "6\0'"
17594543ef51SXin LI                         "\0?\0>\0\n"
17604543ef51SXin LI                         "\0<\0a\0>";
17614543ef51SXin LI   struct CaseData cases[] = {
17624543ef51SXin LI       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
17634543ef51SXin LI       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
17644543ef51SXin LI       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
17654543ef51SXin LI       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
17664543ef51SXin LI       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
17674543ef51SXin LI       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
17684543ef51SXin LI       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
17694543ef51SXin LI       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
17704543ef51SXin LI       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
17714543ef51SXin LI       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
17724543ef51SXin LI       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
17734543ef51SXin LI       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
17744543ef51SXin LI       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
17754543ef51SXin LI       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
17764543ef51SXin LI       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
17774543ef51SXin LI       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
17784543ef51SXin LI       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
17794543ef51SXin LI       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
17804543ef51SXin LI       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
17814543ef51SXin LI       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
17824543ef51SXin LI       /* Now add a four-byte UTF-16 character */
17834543ef51SXin LI       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
17844543ef51SXin LI        XML_ERROR_UNCLOSED_CDATA_SECTION},
17854543ef51SXin LI       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
17864543ef51SXin LI       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
17874543ef51SXin LI        XML_ERROR_PARTIAL_CHAR},
17884543ef51SXin LI       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
17894543ef51SXin LI        XML_ERROR_UNCLOSED_CDATA_SECTION}};
17904543ef51SXin LI   size_t i;
17914543ef51SXin LI 
17924543ef51SXin LI   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
17934543ef51SXin LI     set_subtest("case %lu", (long unsigned)(i + 1));
17944543ef51SXin LI     enum XML_Status actual_status;
17954543ef51SXin LI     enum XML_Error actual_error;
17964543ef51SXin LI 
17974543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
17984543ef51SXin LI                                 XML_FALSE)
17994543ef51SXin LI         == XML_STATUS_ERROR)
18004543ef51SXin LI       xml_failure(g_parser);
18014543ef51SXin LI     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
18024543ef51SXin LI                                             (int)cases[i].text_bytes, XML_TRUE);
18034543ef51SXin LI     assert(actual_status == XML_STATUS_ERROR);
18044543ef51SXin LI     actual_error = XML_GetErrorCode(g_parser);
18054543ef51SXin LI     if (actual_error != cases[i].expected_error) {
18064543ef51SXin LI       char message[1024];
18074543ef51SXin LI 
18084543ef51SXin LI       snprintf(message, sizeof(message),
18094543ef51SXin LI                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
18104543ef51SXin LI                ") for case %lu\n",
18114543ef51SXin LI                cases[i].expected_error,
18124543ef51SXin LI                XML_ErrorString(cases[i].expected_error), actual_error,
18134543ef51SXin LI                XML_ErrorString(actual_error), (long unsigned)(i + 1));
18144543ef51SXin LI       fail(message);
18154543ef51SXin LI     }
18164543ef51SXin LI     XML_ParserReset(g_parser, NULL);
18174543ef51SXin LI   }
18184543ef51SXin LI }
18194543ef51SXin LI END_TEST
18204543ef51SXin LI 
18214543ef51SXin LI /* Test stopping the parser in cdata handler */
18224543ef51SXin LI START_TEST(test_stop_parser_between_cdata_calls) {
18234543ef51SXin LI   const char *text = long_cdata_text;
18244543ef51SXin LI 
18254543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
18264543ef51SXin LI   g_resumable = XML_FALSE;
18274543ef51SXin LI   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
18284543ef51SXin LI }
18294543ef51SXin LI END_TEST
18304543ef51SXin LI 
18314543ef51SXin LI /* Test suspending the parser in cdata handler */
18324543ef51SXin LI START_TEST(test_suspend_parser_between_cdata_calls) {
18334543ef51SXin LI   const char *text = long_cdata_text;
18344543ef51SXin LI   enum XML_Status result;
18354543ef51SXin LI 
18364543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
18374543ef51SXin LI   g_resumable = XML_TRUE;
18384543ef51SXin LI   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
18394543ef51SXin LI   if (result != XML_STATUS_SUSPENDED) {
18404543ef51SXin LI     if (result == XML_STATUS_ERROR)
18414543ef51SXin LI       xml_failure(g_parser);
18424543ef51SXin LI     fail("Parse not suspended in CDATA handler");
18434543ef51SXin LI   }
18444543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
18454543ef51SXin LI     xml_failure(g_parser);
18464543ef51SXin LI }
18474543ef51SXin LI END_TEST
18484543ef51SXin LI 
18494543ef51SXin LI /* Test memory allocation functions */
18504543ef51SXin LI START_TEST(test_memory_allocation) {
18514543ef51SXin LI   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
18524543ef51SXin LI   char *p;
18534543ef51SXin LI 
18544543ef51SXin LI   if (buffer == NULL) {
18554543ef51SXin LI     fail("Allocation failed");
18564543ef51SXin LI   } else {
18574543ef51SXin LI     /* Try writing to memory; some OSes try to cheat! */
18584543ef51SXin LI     buffer[0] = 'T';
18594543ef51SXin LI     buffer[1] = 'E';
18604543ef51SXin LI     buffer[2] = 'S';
18614543ef51SXin LI     buffer[3] = 'T';
18624543ef51SXin LI     buffer[4] = '\0';
18634543ef51SXin LI     if (strcmp(buffer, "TEST") != 0) {
18644543ef51SXin LI       fail("Memory not writable");
18654543ef51SXin LI     } else {
18664543ef51SXin LI       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
18674543ef51SXin LI       if (p == NULL) {
18684543ef51SXin LI         fail("Reallocation failed");
18694543ef51SXin LI       } else {
18704543ef51SXin LI         /* Write again, just to be sure */
18714543ef51SXin LI         buffer = p;
18724543ef51SXin LI         buffer[0] = 'V';
18734543ef51SXin LI         if (strcmp(buffer, "VEST") != 0) {
18744543ef51SXin LI           fail("Reallocated memory not writable");
18754543ef51SXin LI         }
18764543ef51SXin LI       }
18774543ef51SXin LI     }
18784543ef51SXin LI     XML_MemFree(g_parser, buffer);
18794543ef51SXin LI   }
18804543ef51SXin LI }
18814543ef51SXin LI END_TEST
18824543ef51SXin LI 
18834543ef51SXin LI /* Test XML_DefaultCurrent() passes handling on correctly */
18844543ef51SXin LI START_TEST(test_default_current) {
18854543ef51SXin LI   const char *text = "<doc>hell]</doc>";
18864543ef51SXin LI   const char *entity_text = "<!DOCTYPE doc [\n"
18874543ef51SXin LI                             "<!ENTITY entity '&#37;'>\n"
18884543ef51SXin LI                             "]>\n"
18894543ef51SXin LI                             "<doc>&entity;</doc>";
18904543ef51SXin LI 
18914543ef51SXin LI   set_subtest("with defaulting");
18924543ef51SXin LI   {
18934543ef51SXin LI     struct handler_record_list storage;
18944543ef51SXin LI     storage.count = 0;
18954543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
18964543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
18974543ef51SXin LI     XML_SetUserData(g_parser, &storage);
18984543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
18994543ef51SXin LI         == XML_STATUS_ERROR)
19004543ef51SXin LI       xml_failure(g_parser);
19014543ef51SXin LI     int i = 0;
19024543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
19034543ef51SXin LI     // we should have gotten one or more cdata callbacks, totaling 5 chars
19044543ef51SXin LI     int cdata_len_remaining = 5;
19054543ef51SXin LI     while (cdata_len_remaining > 0) {
19064543ef51SXin LI       const struct handler_record_entry *c_entry
19074543ef51SXin LI           = handler_record_get(&storage, i++);
19084543ef51SXin LI       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
19094543ef51SXin LI       assert_true(c_entry->arg > 0);
19104543ef51SXin LI       assert_true(c_entry->arg <= cdata_len_remaining);
19114543ef51SXin LI       cdata_len_remaining -= c_entry->arg;
19124543ef51SXin LI       // default handler must follow, with the exact same len argument.
19134543ef51SXin LI       assert_record_handler_called(&storage, i++, "record_default_handler",
19144543ef51SXin LI                                    c_entry->arg);
19154543ef51SXin LI     }
19164543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
19174543ef51SXin LI     assert_true(storage.count == i);
19184543ef51SXin LI   }
19194543ef51SXin LI 
19204543ef51SXin LI   /* Again, without the defaulting */
19214543ef51SXin LI   set_subtest("no defaulting");
19224543ef51SXin LI   {
19234543ef51SXin LI     struct handler_record_list storage;
19244543ef51SXin LI     storage.count = 0;
19254543ef51SXin LI     XML_ParserReset(g_parser, NULL);
19264543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
19274543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
19284543ef51SXin LI     XML_SetUserData(g_parser, &storage);
19294543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
19304543ef51SXin LI         == XML_STATUS_ERROR)
19314543ef51SXin LI       xml_failure(g_parser);
19324543ef51SXin LI     int i = 0;
19334543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
19344543ef51SXin LI     // we should have gotten one or more cdata callbacks, totaling 5 chars
19354543ef51SXin LI     int cdata_len_remaining = 5;
19364543ef51SXin LI     while (cdata_len_remaining > 0) {
19374543ef51SXin LI       const struct handler_record_entry *c_entry
19384543ef51SXin LI           = handler_record_get(&storage, i++);
19394543ef51SXin LI       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
19404543ef51SXin LI       assert_true(c_entry->arg > 0);
19414543ef51SXin LI       assert_true(c_entry->arg <= cdata_len_remaining);
19424543ef51SXin LI       cdata_len_remaining -= c_entry->arg;
19434543ef51SXin LI     }
19444543ef51SXin LI     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
19454543ef51SXin LI     assert_true(storage.count == i);
19464543ef51SXin LI   }
19474543ef51SXin LI 
19484543ef51SXin LI   /* Now with an internal entity to complicate matters */
19494543ef51SXin LI   set_subtest("with internal entity");
19504543ef51SXin LI   {
19514543ef51SXin LI     struct handler_record_list storage;
19524543ef51SXin LI     storage.count = 0;
19534543ef51SXin LI     XML_ParserReset(g_parser, NULL);
19544543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
19554543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
19564543ef51SXin LI     XML_SetUserData(g_parser, &storage);
19574543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
19584543ef51SXin LI                                 XML_TRUE)
19594543ef51SXin LI         == XML_STATUS_ERROR)
19604543ef51SXin LI       xml_failure(g_parser);
19614543ef51SXin LI     /* The default handler suppresses the entity */
19624543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
19634543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
19644543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
19654543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
19664543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
19674543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
19684543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
19694543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
19704543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
19714543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
19724543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
19734543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
19744543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
19754543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
19764543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
19774543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
19784543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
19794543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
19804543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
19814543ef51SXin LI     assert_true(storage.count == 19);
19824543ef51SXin LI   }
19834543ef51SXin LI 
19844543ef51SXin LI   /* Again, with a skip handler */
19854543ef51SXin LI   set_subtest("with skip handler");
19864543ef51SXin LI   {
19874543ef51SXin LI     struct handler_record_list storage;
19884543ef51SXin LI     storage.count = 0;
19894543ef51SXin LI     XML_ParserReset(g_parser, NULL);
19904543ef51SXin LI     XML_SetDefaultHandler(g_parser, record_default_handler);
19914543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
19924543ef51SXin LI     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
19934543ef51SXin LI     XML_SetUserData(g_parser, &storage);
19944543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
19954543ef51SXin LI                                 XML_TRUE)
19964543ef51SXin LI         == XML_STATUS_ERROR)
19974543ef51SXin LI       xml_failure(g_parser);
19984543ef51SXin LI     /* The default handler suppresses the entity */
19994543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
20004543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
20014543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
20024543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
20034543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
20044543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
20054543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
20064543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
20074543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
20084543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
20094543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
20104543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
20114543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
20124543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
20134543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
20144543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
20154543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
20164543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
20174543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
20184543ef51SXin LI     assert_true(storage.count == 19);
20194543ef51SXin LI   }
20204543ef51SXin LI 
20214543ef51SXin LI   /* This time, allow the entity through */
20224543ef51SXin LI   set_subtest("allow entity");
20234543ef51SXin LI   {
20244543ef51SXin LI     struct handler_record_list storage;
20254543ef51SXin LI     storage.count = 0;
20264543ef51SXin LI     XML_ParserReset(g_parser, NULL);
20274543ef51SXin LI     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
20284543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
20294543ef51SXin LI     XML_SetUserData(g_parser, &storage);
20304543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
20314543ef51SXin LI                                 XML_TRUE)
20324543ef51SXin LI         == XML_STATUS_ERROR)
20334543ef51SXin LI       xml_failure(g_parser);
20344543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
20354543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
20364543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
20374543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
20384543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
20394543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
20404543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
20414543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
20424543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
20434543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
20444543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
20454543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
20464543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
20474543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
20484543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
20494543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
20504543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
20514543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
20524543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
20534543ef51SXin LI     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
20544543ef51SXin LI     assert_true(storage.count == 20);
20554543ef51SXin LI   }
20564543ef51SXin LI 
20574543ef51SXin LI   /* Finally, without passing the cdata to the default handler */
20584543ef51SXin LI   set_subtest("not passing cdata");
20594543ef51SXin LI   {
20604543ef51SXin LI     struct handler_record_list storage;
20614543ef51SXin LI     storage.count = 0;
20624543ef51SXin LI     XML_ParserReset(g_parser, NULL);
20634543ef51SXin LI     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
20644543ef51SXin LI     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
20654543ef51SXin LI     XML_SetUserData(g_parser, &storage);
20664543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
20674543ef51SXin LI                                 XML_TRUE)
20684543ef51SXin LI         == XML_STATUS_ERROR)
20694543ef51SXin LI       xml_failure(g_parser);
20704543ef51SXin LI     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
20714543ef51SXin LI     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
20724543ef51SXin LI     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
20734543ef51SXin LI     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
20744543ef51SXin LI     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
20754543ef51SXin LI     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
20764543ef51SXin LI     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
20774543ef51SXin LI     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
20784543ef51SXin LI     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
20794543ef51SXin LI     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
20804543ef51SXin LI     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
20814543ef51SXin LI     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
20824543ef51SXin LI     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
20834543ef51SXin LI     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
20844543ef51SXin LI     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
20854543ef51SXin LI     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
20864543ef51SXin LI     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
20874543ef51SXin LI     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
20884543ef51SXin LI                                  1);
20894543ef51SXin LI     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
20904543ef51SXin LI     assert_true(storage.count == 19);
20914543ef51SXin LI   }
20924543ef51SXin LI }
20934543ef51SXin LI END_TEST
20944543ef51SXin LI 
20954543ef51SXin LI /* Test DTD element parsing code paths */
20964543ef51SXin LI START_TEST(test_dtd_elements) {
20974543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
20984543ef51SXin LI                      "<!ELEMENT doc (chapter)>\n"
20994543ef51SXin LI                      "<!ELEMENT chapter (#PCDATA)>\n"
21004543ef51SXin LI                      "]>\n"
21014543ef51SXin LI                      "<doc><chapter>Wombats are go</chapter></doc>";
21024543ef51SXin LI 
21034543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
21044543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
21054543ef51SXin LI       == XML_STATUS_ERROR)
21064543ef51SXin LI     xml_failure(g_parser);
21074543ef51SXin LI }
21084543ef51SXin LI END_TEST
21094543ef51SXin LI 
21104543ef51SXin LI static void XMLCALL
21114543ef51SXin LI element_decl_check_model(void *userData, const XML_Char *name,
21124543ef51SXin LI                          XML_Content *model) {
21134543ef51SXin LI   UNUSED_P(userData);
21144543ef51SXin LI   uint32_t errorFlags = 0;
21154543ef51SXin LI 
21164543ef51SXin LI   /* Expected model array structure is this:
21174543ef51SXin LI    * [0] (type 6, quant 0)
21184543ef51SXin LI    *   [1] (type 5, quant 0)
21194543ef51SXin LI    *     [3] (type 4, quant 0, name "bar")
21204543ef51SXin LI    *     [4] (type 4, quant 0, name "foo")
21214543ef51SXin LI    *     [5] (type 4, quant 3, name "xyz")
21224543ef51SXin LI    *   [2] (type 4, quant 2, name "zebra")
21234543ef51SXin LI    */
21244543ef51SXin LI   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
21254543ef51SXin LI   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
21264543ef51SXin LI 
21274543ef51SXin LI   if (model != NULL) {
21284543ef51SXin LI     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
21294543ef51SXin LI     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
21304543ef51SXin LI     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
21314543ef51SXin LI     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
21324543ef51SXin LI     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
21334543ef51SXin LI 
21344543ef51SXin LI     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
21354543ef51SXin LI     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
21364543ef51SXin LI     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
21374543ef51SXin LI     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
21384543ef51SXin LI     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
21394543ef51SXin LI 
21404543ef51SXin LI     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
21414543ef51SXin LI     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
21424543ef51SXin LI     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
21434543ef51SXin LI     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
21444543ef51SXin LI     errorFlags
21454543ef51SXin LI         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
21464543ef51SXin LI 
21474543ef51SXin LI     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
21484543ef51SXin LI     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
21494543ef51SXin LI     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
21504543ef51SXin LI     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
21514543ef51SXin LI     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
21524543ef51SXin LI 
21534543ef51SXin LI     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
21544543ef51SXin LI     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
21554543ef51SXin LI     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
21564543ef51SXin LI     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
21574543ef51SXin LI     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
21584543ef51SXin LI 
21594543ef51SXin LI     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
21604543ef51SXin LI     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
21614543ef51SXin LI     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
21624543ef51SXin LI     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
21634543ef51SXin LI     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
21644543ef51SXin LI   }
21654543ef51SXin LI 
21664543ef51SXin LI   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
21674543ef51SXin LI   XML_FreeContentModel(g_parser, model);
21684543ef51SXin LI }
21694543ef51SXin LI 
21704543ef51SXin LI START_TEST(test_dtd_elements_nesting) {
21714543ef51SXin LI   // Payload inspired by a test in Perl's XML::Parser
21724543ef51SXin LI   const char *text = "<!DOCTYPE foo [\n"
21734543ef51SXin LI                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
21744543ef51SXin LI                      "]>\n"
21754543ef51SXin LI                      "<foo/>";
21764543ef51SXin LI 
21774543ef51SXin LI   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
21784543ef51SXin LI 
21794543ef51SXin LI   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
21804543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
21814543ef51SXin LI       == XML_STATUS_ERROR)
21824543ef51SXin LI     xml_failure(g_parser);
21834543ef51SXin LI 
21844543ef51SXin LI   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
21854543ef51SXin LI     fail("Element declaration model regression detected");
21864543ef51SXin LI }
21874543ef51SXin LI END_TEST
21884543ef51SXin LI 
21894543ef51SXin LI /* Test foreign DTD handling */
21904543ef51SXin LI START_TEST(test_set_foreign_dtd) {
21914543ef51SXin LI   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
21924543ef51SXin LI   const char *text2 = "<doc>&entity;</doc>";
21934543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
21944543ef51SXin LI 
21954543ef51SXin LI   /* Check hash salt is passed through too */
21964543ef51SXin LI   XML_SetHashSalt(g_parser, 0x12345678);
21974543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
21984543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
21994543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
22004543ef51SXin LI   /* Add a default handler to exercise more code paths */
22014543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
22024543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
22034543ef51SXin LI     fail("Could not set foreign DTD");
22044543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
22054543ef51SXin LI       == XML_STATUS_ERROR)
22064543ef51SXin LI     xml_failure(g_parser);
22074543ef51SXin LI 
22084543ef51SXin LI   /* Ensure that trying to set the DTD after parsing has started
22094543ef51SXin LI    * is faulted, even if it's the same setting.
22104543ef51SXin LI    */
22114543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE)
22124543ef51SXin LI       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
22134543ef51SXin LI     fail("Failed to reject late foreign DTD setting");
22144543ef51SXin LI   /* Ditto for the hash salt */
22154543ef51SXin LI   if (XML_SetHashSalt(g_parser, 0x23456789))
22164543ef51SXin LI     fail("Failed to reject late hash salt change");
22174543ef51SXin LI 
22184543ef51SXin LI   /* Now finish the parse */
22194543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
22204543ef51SXin LI       == XML_STATUS_ERROR)
22214543ef51SXin LI     xml_failure(g_parser);
22224543ef51SXin LI }
22234543ef51SXin LI END_TEST
22244543ef51SXin LI 
22254543ef51SXin LI /* Test foreign DTD handling with a failing NotStandalone handler */
22264543ef51SXin LI START_TEST(test_foreign_dtd_not_standalone) {
22274543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
22284543ef51SXin LI                      "<doc>&entity;</doc>";
22294543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
22304543ef51SXin LI 
22314543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
22324543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
22334543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
22344543ef51SXin LI   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
22354543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
22364543ef51SXin LI     fail("Could not set foreign DTD");
22374543ef51SXin LI   expect_failure(text, XML_ERROR_NOT_STANDALONE,
22384543ef51SXin LI                  "NotStandalonehandler failed to reject");
22394543ef51SXin LI }
22404543ef51SXin LI END_TEST
22414543ef51SXin LI 
22424543ef51SXin LI /* Test invalid character in a foreign DTD is faulted */
22434543ef51SXin LI START_TEST(test_invalid_foreign_dtd) {
22444543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
22454543ef51SXin LI                      "<doc>&entity;</doc>";
22464543ef51SXin LI   ExtFaults test_data
22474543ef51SXin LI       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
22484543ef51SXin LI 
22494543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
22504543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
22514543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
22524543ef51SXin LI   XML_UseForeignDTD(g_parser, XML_TRUE);
22534543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
22544543ef51SXin LI                  "Bad DTD should not have been accepted");
22554543ef51SXin LI }
22564543ef51SXin LI END_TEST
22574543ef51SXin LI 
22584543ef51SXin LI /* Test foreign DTD use with a doctype */
22594543ef51SXin LI START_TEST(test_foreign_dtd_with_doctype) {
22604543ef51SXin LI   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
22614543ef51SXin LI                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
22624543ef51SXin LI   const char *text2 = "<doc>&entity;</doc>";
22634543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
22644543ef51SXin LI 
22654543ef51SXin LI   /* Check hash salt is passed through too */
22664543ef51SXin LI   XML_SetHashSalt(g_parser, 0x12345678);
22674543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
22684543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
22694543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
22704543ef51SXin LI   /* Add a default handler to exercise more code paths */
22714543ef51SXin LI   XML_SetDefaultHandler(g_parser, dummy_default_handler);
22724543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
22734543ef51SXin LI     fail("Could not set foreign DTD");
22744543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
22754543ef51SXin LI       == XML_STATUS_ERROR)
22764543ef51SXin LI     xml_failure(g_parser);
22774543ef51SXin LI 
22784543ef51SXin LI   /* Ensure that trying to set the DTD after parsing has started
22794543ef51SXin LI    * is faulted, even if it's the same setting.
22804543ef51SXin LI    */
22814543ef51SXin LI   if (XML_UseForeignDTD(g_parser, XML_TRUE)
22824543ef51SXin LI       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
22834543ef51SXin LI     fail("Failed to reject late foreign DTD setting");
22844543ef51SXin LI   /* Ditto for the hash salt */
22854543ef51SXin LI   if (XML_SetHashSalt(g_parser, 0x23456789))
22864543ef51SXin LI     fail("Failed to reject late hash salt change");
22874543ef51SXin LI 
22884543ef51SXin LI   /* Now finish the parse */
22894543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
22904543ef51SXin LI       == XML_STATUS_ERROR)
22914543ef51SXin LI     xml_failure(g_parser);
22924543ef51SXin LI }
22934543ef51SXin LI END_TEST
22944543ef51SXin LI 
22954543ef51SXin LI /* Test XML_UseForeignDTD with no external subset present */
22964543ef51SXin LI START_TEST(test_foreign_dtd_without_external_subset) {
22974543ef51SXin LI   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
22984543ef51SXin LI                      "<doc>&foo;</doc>";
22994543ef51SXin LI 
23004543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
23014543ef51SXin LI   XML_SetUserData(g_parser, NULL);
23024543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
23034543ef51SXin LI   XML_UseForeignDTD(g_parser, XML_TRUE);
23044543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
23054543ef51SXin LI       == XML_STATUS_ERROR)
23064543ef51SXin LI     xml_failure(g_parser);
23074543ef51SXin LI }
23084543ef51SXin LI END_TEST
23094543ef51SXin LI 
23104543ef51SXin LI START_TEST(test_empty_foreign_dtd) {
23114543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
23124543ef51SXin LI                      "<doc>&entity;</doc>";
23134543ef51SXin LI 
23144543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
23154543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
23164543ef51SXin LI   XML_UseForeignDTD(g_parser, XML_TRUE);
23174543ef51SXin LI   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
23184543ef51SXin LI                  "Undefined entity not faulted");
23194543ef51SXin LI }
23204543ef51SXin LI END_TEST
23214543ef51SXin LI 
23224543ef51SXin LI /* Test XML Base is set and unset appropriately */
23234543ef51SXin LI START_TEST(test_set_base) {
23244543ef51SXin LI   const XML_Char *old_base;
23254543ef51SXin LI   const XML_Char *new_base = XCS("/local/file/name.xml");
23264543ef51SXin LI 
23274543ef51SXin LI   old_base = XML_GetBase(g_parser);
23284543ef51SXin LI   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
23294543ef51SXin LI     fail("Unable to set base");
23304543ef51SXin LI   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
23314543ef51SXin LI     fail("Base setting not correct");
23324543ef51SXin LI   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
23334543ef51SXin LI     fail("Unable to NULL base");
23344543ef51SXin LI   if (XML_GetBase(g_parser) != NULL)
23354543ef51SXin LI     fail("Base setting not nulled");
23364543ef51SXin LI   XML_SetBase(g_parser, old_base);
23374543ef51SXin LI }
23384543ef51SXin LI END_TEST
23394543ef51SXin LI 
23404543ef51SXin LI /* Test attribute counts, indexing, etc */
23414543ef51SXin LI START_TEST(test_attributes) {
23424543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
23434543ef51SXin LI                      "<!ELEMENT doc (tag)>\n"
23444543ef51SXin LI                      "<!ATTLIST doc id ID #REQUIRED>\n"
23454543ef51SXin LI                      "]>"
23464543ef51SXin LI                      "<doc a='1' id='one' b='2'>"
23474543ef51SXin LI                      "<tag c='3'/>"
23484543ef51SXin LI                      "</doc>";
23494543ef51SXin LI   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
23504543ef51SXin LI                          {XCS("b"), XCS("2")},
23514543ef51SXin LI                          {XCS("id"), XCS("one")},
23524543ef51SXin LI                          {NULL, NULL}};
23534543ef51SXin LI   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
23544543ef51SXin LI   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
23554543ef51SXin LI                         {XCS("tag"), 1, NULL, NULL},
23564543ef51SXin LI                         {NULL, 0, NULL, NULL}};
23574543ef51SXin LI   info[0].attributes = doc_info;
23584543ef51SXin LI   info[1].attributes = tag_info;
23594543ef51SXin LI 
2360*908f215eSXin LI   XML_Parser parser = XML_ParserCreate(NULL);
2361*908f215eSXin LI   assert_true(parser != NULL);
2362*908f215eSXin LI   ParserAndElementInfo parserAndElementInfos = {
2363*908f215eSXin LI       parser,
2364*908f215eSXin LI       info,
2365*908f215eSXin LI   };
2366*908f215eSXin LI 
2367*908f215eSXin LI   XML_SetStartElementHandler(parser, counting_start_element_handler);
2368*908f215eSXin LI   XML_SetUserData(parser, &parserAndElementInfos);
2369*908f215eSXin LI   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
23704543ef51SXin LI       == XML_STATUS_ERROR)
2371*908f215eSXin LI     xml_failure(parser);
2372*908f215eSXin LI 
2373*908f215eSXin LI   XML_ParserFree(parser);
23744543ef51SXin LI }
23754543ef51SXin LI END_TEST
23764543ef51SXin LI 
23774543ef51SXin LI /* Test reset works correctly in the middle of processing an internal
23784543ef51SXin LI  * entity.  Exercises some obscure code in XML_ParserReset().
23794543ef51SXin LI  */
23804543ef51SXin LI START_TEST(test_reset_in_entity) {
23814543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
23824543ef51SXin LI                      "<!ENTITY wombat 'wom'>\n"
23834543ef51SXin LI                      "<!ENTITY entity 'hi &wom; there'>\n"
23844543ef51SXin LI                      "]>\n"
23854543ef51SXin LI                      "<doc>&entity;</doc>";
23864543ef51SXin LI   XML_ParsingStatus status;
23874543ef51SXin LI 
23884543ef51SXin LI   g_resumable = XML_TRUE;
23894543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
23904543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
23914543ef51SXin LI       == XML_STATUS_ERROR)
23924543ef51SXin LI     xml_failure(g_parser);
23934543ef51SXin LI   XML_GetParsingStatus(g_parser, &status);
23944543ef51SXin LI   if (status.parsing != XML_SUSPENDED)
23954543ef51SXin LI     fail("Parsing status not SUSPENDED");
23964543ef51SXin LI   XML_ParserReset(g_parser, NULL);
23974543ef51SXin LI   XML_GetParsingStatus(g_parser, &status);
23984543ef51SXin LI   if (status.parsing != XML_INITIALIZED)
23994543ef51SXin LI     fail("Parsing status doesn't reset to INITIALIZED");
24004543ef51SXin LI }
24014543ef51SXin LI END_TEST
24024543ef51SXin LI 
24034543ef51SXin LI /* Test that resume correctly passes through parse errors */
24044543ef51SXin LI START_TEST(test_resume_invalid_parse) {
24054543ef51SXin LI   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
24064543ef51SXin LI 
24074543ef51SXin LI   g_resumable = XML_TRUE;
24084543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
24094543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
24104543ef51SXin LI       == XML_STATUS_ERROR)
24114543ef51SXin LI     xml_failure(g_parser);
24124543ef51SXin LI   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
24134543ef51SXin LI     fail("Resumed invalid parse not faulted");
24144543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
24154543ef51SXin LI     fail("Invalid parse not correctly faulted");
24164543ef51SXin LI }
24174543ef51SXin LI END_TEST
24184543ef51SXin LI 
24194543ef51SXin LI /* Test that re-suspended parses are correctly passed through */
24204543ef51SXin LI START_TEST(test_resume_resuspended) {
24214543ef51SXin LI   const char *text = "<doc>Hello<meep/>world</doc>";
24224543ef51SXin LI 
24234543ef51SXin LI   g_resumable = XML_TRUE;
24244543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
24254543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
24264543ef51SXin LI       == XML_STATUS_ERROR)
24274543ef51SXin LI     xml_failure(g_parser);
24284543ef51SXin LI   g_resumable = XML_TRUE;
24294543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
24304543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
24314543ef51SXin LI     fail("Resumption not suspended");
24324543ef51SXin LI   /* This one should succeed and finish up */
24334543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
24344543ef51SXin LI     xml_failure(g_parser);
24354543ef51SXin LI }
24364543ef51SXin LI END_TEST
24374543ef51SXin LI 
24384543ef51SXin LI /* Test that CDATA shows up correctly through a default handler */
24394543ef51SXin LI START_TEST(test_cdata_default) {
24404543ef51SXin LI   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
24414543ef51SXin LI   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
24424543ef51SXin LI   CharData storage;
24434543ef51SXin LI 
24444543ef51SXin LI   CharData_Init(&storage);
24454543ef51SXin LI   XML_SetUserData(g_parser, &storage);
24464543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
24474543ef51SXin LI 
24484543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
24494543ef51SXin LI       == XML_STATUS_ERROR)
24504543ef51SXin LI     xml_failure(g_parser);
24514543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
24524543ef51SXin LI }
24534543ef51SXin LI END_TEST
24544543ef51SXin LI 
24554543ef51SXin LI /* Test resetting a subordinate parser does exactly nothing */
24564543ef51SXin LI START_TEST(test_subordinate_reset) {
24574543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
24584543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
24594543ef51SXin LI                      "<doc>&entity;</doc>";
24604543ef51SXin LI 
24614543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
24624543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
24634543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
24644543ef51SXin LI       == XML_STATUS_ERROR)
24654543ef51SXin LI     xml_failure(g_parser);
24664543ef51SXin LI }
24674543ef51SXin LI END_TEST
24684543ef51SXin LI 
24694543ef51SXin LI /* Test suspending a subordinate parser */
24704543ef51SXin LI START_TEST(test_subordinate_suspend) {
24714543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
24724543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
24734543ef51SXin LI                      "<doc>&entity;</doc>";
24744543ef51SXin LI 
24754543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
24764543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
24774543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
24784543ef51SXin LI       == XML_STATUS_ERROR)
24794543ef51SXin LI     xml_failure(g_parser);
24804543ef51SXin LI }
24814543ef51SXin LI END_TEST
24824543ef51SXin LI 
24834543ef51SXin LI /* Test suspending a subordinate parser from an XML declaration */
24844543ef51SXin LI /* Increases code coverage of the tests */
24854543ef51SXin LI 
24864543ef51SXin LI START_TEST(test_subordinate_xdecl_suspend) {
24874543ef51SXin LI   const char *text
24884543ef51SXin LI       = "<!DOCTYPE doc [\n"
24894543ef51SXin LI         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
24904543ef51SXin LI         "]>\n"
24914543ef51SXin LI         "<doc>&entity;</doc>";
24924543ef51SXin LI 
24934543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
24944543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
24954543ef51SXin LI   g_resumable = XML_TRUE;
24964543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
24974543ef51SXin LI       == XML_STATUS_ERROR)
24984543ef51SXin LI     xml_failure(g_parser);
24994543ef51SXin LI }
25004543ef51SXin LI END_TEST
25014543ef51SXin LI 
25024543ef51SXin LI START_TEST(test_subordinate_xdecl_abort) {
25034543ef51SXin LI   const char *text
25044543ef51SXin LI       = "<!DOCTYPE doc [\n"
25054543ef51SXin LI         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
25064543ef51SXin LI         "]>\n"
25074543ef51SXin LI         "<doc>&entity;</doc>";
25084543ef51SXin LI 
25094543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
25104543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
25114543ef51SXin LI   g_resumable = XML_FALSE;
25124543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
25134543ef51SXin LI       == XML_STATUS_ERROR)
25144543ef51SXin LI     xml_failure(g_parser);
25154543ef51SXin LI }
25164543ef51SXin LI END_TEST
25174543ef51SXin LI 
25184543ef51SXin LI /* Test external entity fault handling with suspension */
25194543ef51SXin LI START_TEST(test_ext_entity_invalid_suspended_parse) {
25204543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
25214543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
25224543ef51SXin LI                      "]>\n"
25234543ef51SXin LI                      "<doc>&en;</doc>";
25244543ef51SXin LI   ExtFaults faults[]
25254543ef51SXin LI       = {{"<?xml version='1.0' encoding='us-ascii'?><",
25264543ef51SXin LI           "Incomplete element declaration not faulted", NULL,
25274543ef51SXin LI           XML_ERROR_UNCLOSED_TOKEN},
25284543ef51SXin LI          {/* First two bytes of a three-byte char */
25294543ef51SXin LI           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
25304543ef51SXin LI           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
25314543ef51SXin LI          {NULL, NULL, NULL, XML_ERROR_NONE}};
25324543ef51SXin LI   ExtFaults *fault;
25334543ef51SXin LI 
25344543ef51SXin LI   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
25354543ef51SXin LI     set_subtest("%s", fault->parse_text);
25364543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
25374543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser,
25384543ef51SXin LI                                     external_entity_suspending_faulter);
25394543ef51SXin LI     XML_SetUserData(g_parser, fault);
25404543ef51SXin LI     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
25414543ef51SXin LI                    "Parser did not report external entity error");
25424543ef51SXin LI     XML_ParserReset(g_parser, NULL);
25434543ef51SXin LI   }
25444543ef51SXin LI }
25454543ef51SXin LI END_TEST
25464543ef51SXin LI 
25474543ef51SXin LI /* Test setting an explicit encoding */
25484543ef51SXin LI START_TEST(test_explicit_encoding) {
25494543ef51SXin LI   const char *text1 = "<doc>Hello ";
25504543ef51SXin LI   const char *text2 = " World</doc>";
25514543ef51SXin LI 
25524543ef51SXin LI   /* Just check that we can set the encoding to NULL before starting */
25534543ef51SXin LI   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
25544543ef51SXin LI     fail("Failed to initialise encoding to NULL");
25554543ef51SXin LI   /* Say we are UTF-8 */
25564543ef51SXin LI   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
25574543ef51SXin LI     fail("Failed to set explicit encoding");
25584543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
25594543ef51SXin LI       == XML_STATUS_ERROR)
25604543ef51SXin LI     xml_failure(g_parser);
25614543ef51SXin LI   /* Try to switch encodings mid-parse */
25624543ef51SXin LI   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
25634543ef51SXin LI     fail("Allowed encoding change");
25644543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
25654543ef51SXin LI       == XML_STATUS_ERROR)
25664543ef51SXin LI     xml_failure(g_parser);
25674543ef51SXin LI   /* Try now the parse is over */
25684543ef51SXin LI   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
25694543ef51SXin LI     fail("Failed to unset encoding");
25704543ef51SXin LI }
25714543ef51SXin LI END_TEST
25724543ef51SXin LI 
25734543ef51SXin LI /* Test handling of trailing CR (rather than newline) */
25744543ef51SXin LI START_TEST(test_trailing_cr) {
25754543ef51SXin LI   const char *text = "<doc>\r";
25764543ef51SXin LI   int found_cr;
25774543ef51SXin LI 
25784543ef51SXin LI   /* Try with a character handler, for code coverage */
25794543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
25804543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
25814543ef51SXin LI   found_cr = 0;
25824543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
25834543ef51SXin LI       == XML_STATUS_OK)
25844543ef51SXin LI     fail("Failed to fault unclosed doc");
25854543ef51SXin LI   if (found_cr == 0)
25864543ef51SXin LI     fail("Did not catch the carriage return");
25874543ef51SXin LI   XML_ParserReset(g_parser, NULL);
25884543ef51SXin LI 
25894543ef51SXin LI   /* Now with a default handler instead */
25904543ef51SXin LI   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
25914543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
25924543ef51SXin LI   found_cr = 0;
25934543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
25944543ef51SXin LI       == XML_STATUS_OK)
25954543ef51SXin LI     fail("Failed to fault unclosed doc");
25964543ef51SXin LI   if (found_cr == 0)
25974543ef51SXin LI     fail("Did not catch default carriage return");
25984543ef51SXin LI }
25994543ef51SXin LI END_TEST
26004543ef51SXin LI 
26014543ef51SXin LI /* Test trailing CR in an external entity parse */
26024543ef51SXin LI START_TEST(test_ext_entity_trailing_cr) {
26034543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
26044543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
26054543ef51SXin LI                      "]>\n"
26064543ef51SXin LI                      "<doc>&en;</doc>";
26074543ef51SXin LI   int found_cr;
26084543ef51SXin LI 
26094543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
26104543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
26114543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
26124543ef51SXin LI   found_cr = 0;
26134543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
26144543ef51SXin LI       != XML_STATUS_OK)
26154543ef51SXin LI     xml_failure(g_parser);
26164543ef51SXin LI   if (found_cr == 0)
26174543ef51SXin LI     fail("No carriage return found");
26184543ef51SXin LI   XML_ParserReset(g_parser, NULL);
26194543ef51SXin LI 
26204543ef51SXin LI   /* Try again with a different trailing CR */
26214543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
26224543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
26234543ef51SXin LI   XML_SetUserData(g_parser, &found_cr);
26244543ef51SXin LI   found_cr = 0;
26254543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
26264543ef51SXin LI       != XML_STATUS_OK)
26274543ef51SXin LI     xml_failure(g_parser);
26284543ef51SXin LI   if (found_cr == 0)
26294543ef51SXin LI     fail("No carriage return found");
26304543ef51SXin LI }
26314543ef51SXin LI END_TEST
26324543ef51SXin LI 
26334543ef51SXin LI /* Test handling of trailing square bracket */
26344543ef51SXin LI START_TEST(test_trailing_rsqb) {
26354543ef51SXin LI   const char *text8 = "<doc>]";
26364543ef51SXin LI   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
26374543ef51SXin LI   int found_rsqb;
26384543ef51SXin LI   int text8_len = (int)strlen(text8);
26394543ef51SXin LI 
26404543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
26414543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
26424543ef51SXin LI   found_rsqb = 0;
26434543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
26444543ef51SXin LI       == XML_STATUS_OK)
26454543ef51SXin LI     fail("Failed to fault unclosed doc");
26464543ef51SXin LI   if (found_rsqb == 0)
26474543ef51SXin LI     fail("Did not catch the right square bracket");
26484543ef51SXin LI 
26494543ef51SXin LI   /* Try again with a different encoding */
26504543ef51SXin LI   XML_ParserReset(g_parser, NULL);
26514543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
26524543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
26534543ef51SXin LI   found_rsqb = 0;
26544543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
26554543ef51SXin LI                               XML_TRUE)
26564543ef51SXin LI       == XML_STATUS_OK)
26574543ef51SXin LI     fail("Failed to fault unclosed doc");
26584543ef51SXin LI   if (found_rsqb == 0)
26594543ef51SXin LI     fail("Did not catch the right square bracket");
26604543ef51SXin LI 
26614543ef51SXin LI   /* And finally with a default handler */
26624543ef51SXin LI   XML_ParserReset(g_parser, NULL);
26634543ef51SXin LI   XML_SetDefaultHandler(g_parser, rsqb_handler);
26644543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
26654543ef51SXin LI   found_rsqb = 0;
26664543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
26674543ef51SXin LI                               XML_TRUE)
26684543ef51SXin LI       == XML_STATUS_OK)
26694543ef51SXin LI     fail("Failed to fault unclosed doc");
26704543ef51SXin LI   if (found_rsqb == 0)
26714543ef51SXin LI     fail("Did not catch the right square bracket");
26724543ef51SXin LI }
26734543ef51SXin LI END_TEST
26744543ef51SXin LI 
26754543ef51SXin LI /* Test trailing right square bracket in an external entity parse */
26764543ef51SXin LI START_TEST(test_ext_entity_trailing_rsqb) {
26774543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
26784543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
26794543ef51SXin LI                      "]>\n"
26804543ef51SXin LI                      "<doc>&en;</doc>";
26814543ef51SXin LI   int found_rsqb;
26824543ef51SXin LI 
26834543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
26844543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
26854543ef51SXin LI   XML_SetUserData(g_parser, &found_rsqb);
26864543ef51SXin LI   found_rsqb = 0;
26874543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
26884543ef51SXin LI       != XML_STATUS_OK)
26894543ef51SXin LI     xml_failure(g_parser);
26904543ef51SXin LI   if (found_rsqb == 0)
26914543ef51SXin LI     fail("No right square bracket found");
26924543ef51SXin LI }
26934543ef51SXin LI END_TEST
26944543ef51SXin LI 
26954543ef51SXin LI /* Test CDATA handling in an external entity */
26964543ef51SXin LI START_TEST(test_ext_entity_good_cdata) {
26974543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
26984543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
26994543ef51SXin LI                      "]>\n"
27004543ef51SXin LI                      "<doc>&en;</doc>";
27014543ef51SXin LI 
27024543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
27034543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
27044543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
27054543ef51SXin LI       != XML_STATUS_OK)
27064543ef51SXin LI     xml_failure(g_parser);
27074543ef51SXin LI }
27084543ef51SXin LI END_TEST
27094543ef51SXin LI 
27104543ef51SXin LI /* Test user parameter settings */
27114543ef51SXin LI START_TEST(test_user_parameters) {
27124543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
27134543ef51SXin LI                      "<!-- Primary parse -->\n"
27144543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
27154543ef51SXin LI                      "<doc>&entity;";
27164543ef51SXin LI   const char *epilog = "<!-- Back to primary parser -->\n"
27174543ef51SXin LI                        "</doc>";
27184543ef51SXin LI 
27194543ef51SXin LI   g_comment_count = 0;
27204543ef51SXin LI   g_skip_count = 0;
27214543ef51SXin LI   g_xdecl_count = 0;
27224543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
27234543ef51SXin LI   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
27244543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
27254543ef51SXin LI   XML_SetCommentHandler(g_parser, data_check_comment_handler);
27264543ef51SXin LI   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
27274543ef51SXin LI   XML_UseParserAsHandlerArg(g_parser);
27284543ef51SXin LI   XML_SetUserData(g_parser, (void *)1);
27294543ef51SXin LI   g_handler_data = g_parser;
27304543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
27314543ef51SXin LI       == XML_STATUS_ERROR)
27324543ef51SXin LI     xml_failure(g_parser);
27334543ef51SXin LI   /* Ensure we can't change policy mid-parse */
27344543ef51SXin LI   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
27354543ef51SXin LI     fail("Changed param entity parsing policy while parsing");
27364543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
27374543ef51SXin LI       == XML_STATUS_ERROR)
27384543ef51SXin LI     xml_failure(g_parser);
27394543ef51SXin LI   if (g_comment_count != 3)
27404543ef51SXin LI     fail("Comment handler not invoked enough times");
27414543ef51SXin LI   if (g_skip_count != 1)
27424543ef51SXin LI     fail("Skip handler not invoked enough times");
27434543ef51SXin LI   if (g_xdecl_count != 1)
27444543ef51SXin LI     fail("XML declaration handler not invoked");
27454543ef51SXin LI }
27464543ef51SXin LI END_TEST
27474543ef51SXin LI 
27484543ef51SXin LI /* Test that an explicit external entity handler argument replaces
27494543ef51SXin LI  * the parser as the first argument.
27504543ef51SXin LI  *
27514543ef51SXin LI  * We do not call the first parameter to the external entity handler
27524543ef51SXin LI  * 'parser' for once, since the first time the handler is called it
27534543ef51SXin LI  * will actually be a text string.  We need to be able to access the
27544543ef51SXin LI  * global 'parser' variable to create our external entity parser from,
27554543ef51SXin LI  * since there are code paths we need to ensure get executed.
27564543ef51SXin LI  */
27574543ef51SXin LI START_TEST(test_ext_entity_ref_parameter) {
27584543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
27594543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
27604543ef51SXin LI                      "<doc>&entity;</doc>";
27614543ef51SXin LI 
27624543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
27634543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
27644543ef51SXin LI   /* Set a handler arg that is not NULL and not parser (which is
27654543ef51SXin LI    * what NULL would cause to be passed.
27664543ef51SXin LI    */
27674543ef51SXin LI   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
27684543ef51SXin LI   g_handler_data = text;
27694543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
27704543ef51SXin LI       == XML_STATUS_ERROR)
27714543ef51SXin LI     xml_failure(g_parser);
27724543ef51SXin LI 
27734543ef51SXin LI   /* Now try again with unset args */
27744543ef51SXin LI   XML_ParserReset(g_parser, NULL);
27754543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
27764543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
27774543ef51SXin LI   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
27784543ef51SXin LI   g_handler_data = g_parser;
27794543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
27804543ef51SXin LI       == XML_STATUS_ERROR)
27814543ef51SXin LI     xml_failure(g_parser);
27824543ef51SXin LI }
27834543ef51SXin LI END_TEST
27844543ef51SXin LI 
27854543ef51SXin LI /* Test the parsing of an empty string */
27864543ef51SXin LI START_TEST(test_empty_parse) {
27874543ef51SXin LI   const char *text = "<doc></doc>";
27884543ef51SXin LI   const char *partial = "<doc>";
27894543ef51SXin LI 
27904543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
27914543ef51SXin LI     fail("Parsing empty string faulted");
27924543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
27934543ef51SXin LI     fail("Parsing final empty string not faulted");
27944543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
27954543ef51SXin LI     fail("Parsing final empty string faulted for wrong reason");
27964543ef51SXin LI 
27974543ef51SXin LI   /* Now try with valid text before the empty end */
27984543ef51SXin LI   XML_ParserReset(g_parser, NULL);
27994543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
28004543ef51SXin LI       == XML_STATUS_ERROR)
28014543ef51SXin LI     xml_failure(g_parser);
28024543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
28034543ef51SXin LI     fail("Parsing final empty string faulted");
28044543ef51SXin LI 
28054543ef51SXin LI   /* Now try with invalid text before the empty end */
28064543ef51SXin LI   XML_ParserReset(g_parser, NULL);
28074543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
28084543ef51SXin LI                               XML_FALSE)
28094543ef51SXin LI       == XML_STATUS_ERROR)
28104543ef51SXin LI     xml_failure(g_parser);
28114543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
28124543ef51SXin LI     fail("Parsing final incomplete empty string not faulted");
28134543ef51SXin LI }
28144543ef51SXin LI END_TEST
28154543ef51SXin LI 
2816ffd294a1SEnji Cooper /* Test XML_Parse for len < 0 */
2817ffd294a1SEnji Cooper START_TEST(test_negative_len_parse) {
2818ffd294a1SEnji Cooper   const char *const doc = "<root/>";
2819ffd294a1SEnji Cooper   for (int isFinal = 0; isFinal < 2; isFinal++) {
2820ffd294a1SEnji Cooper     set_subtest("isFinal=%d", isFinal);
2821ffd294a1SEnji Cooper 
2822ffd294a1SEnji Cooper     XML_Parser parser = XML_ParserCreate(NULL);
2823ffd294a1SEnji Cooper 
2824ffd294a1SEnji Cooper     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2825ffd294a1SEnji Cooper       fail("There was not supposed to be any initial parse error.");
2826ffd294a1SEnji Cooper 
2827ffd294a1SEnji Cooper     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2828ffd294a1SEnji Cooper 
2829ffd294a1SEnji Cooper     if (status != XML_STATUS_ERROR)
2830ffd294a1SEnji Cooper       fail("Negative len was expected to fail the parse but did not.");
2831ffd294a1SEnji Cooper 
2832ffd294a1SEnji Cooper     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2833ffd294a1SEnji Cooper       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2834ffd294a1SEnji Cooper 
2835ffd294a1SEnji Cooper     XML_ParserFree(parser);
2836ffd294a1SEnji Cooper   }
2837ffd294a1SEnji Cooper }
2838ffd294a1SEnji Cooper END_TEST
2839ffd294a1SEnji Cooper 
2840ffd294a1SEnji Cooper /* Test XML_ParseBuffer for len < 0 */
2841ffd294a1SEnji Cooper START_TEST(test_negative_len_parse_buffer) {
2842ffd294a1SEnji Cooper   const char *const doc = "<root/>";
2843ffd294a1SEnji Cooper   for (int isFinal = 0; isFinal < 2; isFinal++) {
2844ffd294a1SEnji Cooper     set_subtest("isFinal=%d", isFinal);
2845ffd294a1SEnji Cooper 
2846ffd294a1SEnji Cooper     XML_Parser parser = XML_ParserCreate(NULL);
2847ffd294a1SEnji Cooper 
2848ffd294a1SEnji Cooper     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
2849ffd294a1SEnji Cooper       fail("There was not supposed to be any initial parse error.");
2850ffd294a1SEnji Cooper 
2851ffd294a1SEnji Cooper     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2852ffd294a1SEnji Cooper 
2853ffd294a1SEnji Cooper     if (buffer == NULL)
2854ffd294a1SEnji Cooper       fail("XML_GetBuffer failed.");
2855ffd294a1SEnji Cooper 
2856ffd294a1SEnji Cooper     memcpy(buffer, doc, strlen(doc));
2857ffd294a1SEnji Cooper 
2858ffd294a1SEnji Cooper     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2859ffd294a1SEnji Cooper 
2860ffd294a1SEnji Cooper     if (status != XML_STATUS_ERROR)
2861ffd294a1SEnji Cooper       fail("Negative len was expected to fail the parse but did not.");
2862ffd294a1SEnji Cooper 
2863ffd294a1SEnji Cooper     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
2864ffd294a1SEnji Cooper       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2865ffd294a1SEnji Cooper 
2866ffd294a1SEnji Cooper     XML_ParserFree(parser);
2867ffd294a1SEnji Cooper   }
2868ffd294a1SEnji Cooper }
2869ffd294a1SEnji Cooper END_TEST
2870ffd294a1SEnji Cooper 
28714543ef51SXin LI /* Test odd corners of the XML_GetBuffer interface */
28724543ef51SXin LI static enum XML_Status
28734543ef51SXin LI get_feature(enum XML_FeatureEnum feature_id, long *presult) {
28744543ef51SXin LI   const XML_Feature *feature = XML_GetFeatureList();
28754543ef51SXin LI 
28764543ef51SXin LI   if (feature == NULL)
28774543ef51SXin LI     return XML_STATUS_ERROR;
28784543ef51SXin LI   for (; feature->feature != XML_FEATURE_END; feature++) {
28794543ef51SXin LI     if (feature->feature == feature_id) {
28804543ef51SXin LI       *presult = feature->value;
28814543ef51SXin LI       return XML_STATUS_OK;
28824543ef51SXin LI     }
28834543ef51SXin LI   }
28844543ef51SXin LI   return XML_STATUS_ERROR;
28854543ef51SXin LI }
28864543ef51SXin LI 
28874543ef51SXin LI /* Test odd corners of the XML_GetBuffer interface */
28884543ef51SXin LI START_TEST(test_get_buffer_1) {
28894543ef51SXin LI   const char *text = get_buffer_test_text;
28904543ef51SXin LI   void *buffer;
28914543ef51SXin LI   long context_bytes;
28924543ef51SXin LI 
28934543ef51SXin LI   /* Attempt to allocate a negative length buffer */
28944543ef51SXin LI   if (XML_GetBuffer(g_parser, -12) != NULL)
28954543ef51SXin LI     fail("Negative length buffer not failed");
28964543ef51SXin LI 
28974543ef51SXin LI   /* Now get a small buffer and extend it past valid length */
28984543ef51SXin LI   buffer = XML_GetBuffer(g_parser, 1536);
28994543ef51SXin LI   if (buffer == NULL)
29004543ef51SXin LI     fail("1.5K buffer failed");
29014543ef51SXin LI   assert(buffer != NULL);
29024543ef51SXin LI   memcpy(buffer, text, strlen(text));
29034543ef51SXin LI   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
29044543ef51SXin LI       == XML_STATUS_ERROR)
29054543ef51SXin LI     xml_failure(g_parser);
29064543ef51SXin LI   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
29074543ef51SXin LI     fail("INT_MAX buffer not failed");
29084543ef51SXin LI 
29094543ef51SXin LI   /* Now try extending it a more reasonable but still too large
29104543ef51SXin LI    * amount.  The allocator in XML_GetBuffer() doubles the buffer
29114543ef51SXin LI    * size until it exceeds the requested amount or INT_MAX.  If it
29124543ef51SXin LI    * exceeds INT_MAX, it rejects the request, so we want a request
29134543ef51SXin LI    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
29144543ef51SXin LI    * with an extra byte just to ensure that the request is off any
29154543ef51SXin LI    * boundary.  The request will be inflated internally by
29164543ef51SXin LI    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
29174543ef51SXin LI    * request.
29184543ef51SXin LI    */
29194543ef51SXin LI   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
29204543ef51SXin LI     context_bytes = 0;
29214543ef51SXin LI   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
29224543ef51SXin LI     fail("INT_MAX- buffer not failed");
29234543ef51SXin LI 
29244543ef51SXin LI   /* Now try extending it a carefully crafted amount */
29254543ef51SXin LI   if (XML_GetBuffer(g_parser, 1000) == NULL)
29264543ef51SXin LI     fail("1000 buffer failed");
29274543ef51SXin LI }
29284543ef51SXin LI END_TEST
29294543ef51SXin LI 
29304543ef51SXin LI /* Test more corners of the XML_GetBuffer interface */
29314543ef51SXin LI START_TEST(test_get_buffer_2) {
29324543ef51SXin LI   const char *text = get_buffer_test_text;
29334543ef51SXin LI   void *buffer;
29344543ef51SXin LI 
29354543ef51SXin LI   /* Now get a decent buffer */
29364543ef51SXin LI   buffer = XML_GetBuffer(g_parser, 1536);
29374543ef51SXin LI   if (buffer == NULL)
29384543ef51SXin LI     fail("1.5K buffer failed");
29394543ef51SXin LI   assert(buffer != NULL);
29404543ef51SXin LI   memcpy(buffer, text, strlen(text));
29414543ef51SXin LI   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
29424543ef51SXin LI       == XML_STATUS_ERROR)
29434543ef51SXin LI     xml_failure(g_parser);
29444543ef51SXin LI 
29454543ef51SXin LI   /* Extend it, to catch a different code path */
29464543ef51SXin LI   if (XML_GetBuffer(g_parser, 1024) == NULL)
29474543ef51SXin LI     fail("1024 buffer failed");
29484543ef51SXin LI }
29494543ef51SXin LI END_TEST
29504543ef51SXin LI 
29514543ef51SXin LI /* Test for signed integer overflow CVE-2022-23852 */
29524543ef51SXin LI #if XML_CONTEXT_BYTES > 0
29534543ef51SXin LI START_TEST(test_get_buffer_3_overflow) {
29544543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
29554543ef51SXin LI   assert(parser != NULL);
29564543ef51SXin LI 
29574543ef51SXin LI   const char *const text = "\n";
29584543ef51SXin LI   const int expectedKeepValue = (int)strlen(text);
29594543ef51SXin LI 
29604543ef51SXin LI   // After this call, variable "keep" in XML_GetBuffer will
29614543ef51SXin LI   // have value expectedKeepValue
29624543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
29634543ef51SXin LI                               XML_FALSE /* isFinal */)
29644543ef51SXin LI       == XML_STATUS_ERROR)
29654543ef51SXin LI     xml_failure(parser);
29664543ef51SXin LI 
29674543ef51SXin LI   assert(expectedKeepValue > 0);
29684543ef51SXin LI   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
29694543ef51SXin LI     fail("enlarging buffer not failed");
29704543ef51SXin LI 
29714543ef51SXin LI   XML_ParserFree(parser);
29724543ef51SXin LI }
29734543ef51SXin LI END_TEST
29744543ef51SXin LI #endif // XML_CONTEXT_BYTES > 0
29754543ef51SXin LI 
29764543ef51SXin LI START_TEST(test_buffer_can_grow_to_max) {
29774543ef51SXin LI   const char *const prefixes[] = {
29784543ef51SXin LI       "",
29794543ef51SXin LI       "<",
29804543ef51SXin LI       "<x a='",
29814543ef51SXin LI       "<doc><x a='",
29824543ef51SXin LI       "<document><x a='",
29834543ef51SXin LI       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
29844543ef51SXin LI       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
29854543ef51SXin LI       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
29864543ef51SXin LI       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
29874543ef51SXin LI       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
29884543ef51SXin LI   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
29894543ef51SXin LI   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
29904543ef51SXin LI #if defined(__MINGW32__) && ! defined(__MINGW64__)
29914543ef51SXin LI   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
29924543ef51SXin LI   // Can we make a big allocation?
29934543ef51SXin LI   void *big = malloc(maxbuf);
29944543ef51SXin LI   if (! big) {
29954543ef51SXin LI     // The big allocation failed. Let's be a little lenient.
29964543ef51SXin LI     maxbuf = maxbuf / 2;
29974543ef51SXin LI   }
29984543ef51SXin LI   free(big);
29994543ef51SXin LI #endif
30004543ef51SXin LI 
30014543ef51SXin LI   for (int i = 0; i < num_prefixes; ++i) {
30024543ef51SXin LI     set_subtest("\"%s\"", prefixes[i]);
30034543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
30044543ef51SXin LI     const int prefix_len = (int)strlen(prefixes[i]);
30054543ef51SXin LI     const enum XML_Status s
30064543ef51SXin LI         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
30074543ef51SXin LI     if (s != XML_STATUS_OK)
30084543ef51SXin LI       xml_failure(parser);
30094543ef51SXin LI 
30104543ef51SXin LI     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
30114543ef51SXin LI     // subtracting the whole prefix is easiest, and close enough.
30124543ef51SXin LI     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
30134543ef51SXin LI     // The limit should be consistent; no prefix should allow us to
30144543ef51SXin LI     // reach above the max buffer size.
30154543ef51SXin LI     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
30164543ef51SXin LI     XML_ParserFree(parser);
30174543ef51SXin LI   }
30184543ef51SXin LI }
30194543ef51SXin LI END_TEST
30204543ef51SXin LI 
30214543ef51SXin LI START_TEST(test_getbuffer_allocates_on_zero_len) {
30224543ef51SXin LI   for (int first_len = 1; first_len >= 0; first_len--) {
30234543ef51SXin LI     set_subtest("with len=%d first", first_len);
30244543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
30254543ef51SXin LI     assert_true(parser != NULL);
30264543ef51SXin LI     assert_true(XML_GetBuffer(parser, first_len) != NULL);
30274543ef51SXin LI     assert_true(XML_GetBuffer(parser, 0) != NULL);
30284543ef51SXin LI     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
30294543ef51SXin LI       xml_failure(parser);
30304543ef51SXin LI     XML_ParserFree(parser);
30314543ef51SXin LI   }
30324543ef51SXin LI }
30334543ef51SXin LI END_TEST
30344543ef51SXin LI 
30354543ef51SXin LI /* Test position information macros */
30364543ef51SXin LI START_TEST(test_byte_info_at_end) {
30374543ef51SXin LI   const char *text = "<doc></doc>";
30384543ef51SXin LI 
30394543ef51SXin LI   if (XML_GetCurrentByteIndex(g_parser) != -1
30404543ef51SXin LI       || XML_GetCurrentByteCount(g_parser) != 0)
30414543ef51SXin LI     fail("Byte index/count incorrect at start of parse");
30424543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
30434543ef51SXin LI       == XML_STATUS_ERROR)
30444543ef51SXin LI     xml_failure(g_parser);
30454543ef51SXin LI   /* At end, the count will be zero and the index the end of string */
30464543ef51SXin LI   if (XML_GetCurrentByteCount(g_parser) != 0)
30474543ef51SXin LI     fail("Terminal byte count incorrect");
30484543ef51SXin LI   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
30494543ef51SXin LI     fail("Terminal byte index incorrect");
30504543ef51SXin LI }
30514543ef51SXin LI END_TEST
30524543ef51SXin LI 
30534543ef51SXin LI /* Test position information from errors */
30544543ef51SXin LI #define PRE_ERROR_STR "<doc></"
30554543ef51SXin LI #define POST_ERROR_STR "wombat></doc>"
30564543ef51SXin LI START_TEST(test_byte_info_at_error) {
30574543ef51SXin LI   const char *text = PRE_ERROR_STR POST_ERROR_STR;
30584543ef51SXin LI 
30594543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
30604543ef51SXin LI       == XML_STATUS_OK)
30614543ef51SXin LI     fail("Syntax error not faulted");
30624543ef51SXin LI   if (XML_GetCurrentByteCount(g_parser) != 0)
30634543ef51SXin LI     fail("Error byte count incorrect");
30644543ef51SXin LI   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
30654543ef51SXin LI     fail("Error byte index incorrect");
30664543ef51SXin LI }
30674543ef51SXin LI END_TEST
30684543ef51SXin LI #undef PRE_ERROR_STR
30694543ef51SXin LI #undef POST_ERROR_STR
30704543ef51SXin LI 
30714543ef51SXin LI /* Test position information in handler */
30724543ef51SXin LI #define START_ELEMENT "<e>"
30734543ef51SXin LI #define CDATA_TEXT "Hello"
30744543ef51SXin LI #define END_ELEMENT "</e>"
30754543ef51SXin LI START_TEST(test_byte_info_at_cdata) {
30764543ef51SXin LI   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
30774543ef51SXin LI   int offset, size;
30784543ef51SXin LI   ByteTestData data;
30794543ef51SXin LI 
30804543ef51SXin LI   /* Check initial context is empty */
30814543ef51SXin LI   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
30824543ef51SXin LI     fail("Unexpected context at start of parse");
30834543ef51SXin LI 
30844543ef51SXin LI   data.start_element_len = (int)strlen(START_ELEMENT);
30854543ef51SXin LI   data.cdata_len = (int)strlen(CDATA_TEXT);
30864543ef51SXin LI   data.total_string_len = (int)strlen(text);
30874543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
30884543ef51SXin LI   XML_SetUserData(g_parser, &data);
30894543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
30904543ef51SXin LI     xml_failure(g_parser);
30914543ef51SXin LI }
30924543ef51SXin LI END_TEST
30934543ef51SXin LI #undef START_ELEMENT
30944543ef51SXin LI #undef CDATA_TEXT
30954543ef51SXin LI #undef END_ELEMENT
30964543ef51SXin LI 
30974543ef51SXin LI /* Test predefined entities are correctly recognised */
30984543ef51SXin LI START_TEST(test_predefined_entities) {
30994543ef51SXin LI   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
31004543ef51SXin LI   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
31014543ef51SXin LI   const XML_Char *result = XCS("<>&\"'");
31024543ef51SXin LI   CharData storage;
31034543ef51SXin LI 
31044543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
31054543ef51SXin LI   /* run_character_check uses XML_SetCharacterDataHandler(), which
31064543ef51SXin LI    * unfortunately heads off a code path that we need to exercise.
31074543ef51SXin LI    */
31084543ef51SXin LI   CharData_Init(&storage);
31094543ef51SXin LI   XML_SetUserData(g_parser, &storage);
31104543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
31114543ef51SXin LI       == XML_STATUS_ERROR)
31124543ef51SXin LI     xml_failure(g_parser);
31134543ef51SXin LI   /* The default handler doesn't translate the entities */
31144543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
31154543ef51SXin LI 
31164543ef51SXin LI   /* Now try again and check the translation */
31174543ef51SXin LI   XML_ParserReset(g_parser, NULL);
31184543ef51SXin LI   run_character_check(text, result);
31194543ef51SXin LI }
31204543ef51SXin LI END_TEST
31214543ef51SXin LI 
31224543ef51SXin LI /* Regression test that an invalid tag in an external parameter
31234543ef51SXin LI  * reference in an external DTD is correctly faulted.
31244543ef51SXin LI  *
31254543ef51SXin LI  * Only a few specific tags are legal in DTDs ignoring comments and
31264543ef51SXin LI  * processing instructions, all of which begin with an exclamation
31274543ef51SXin LI  * mark.  "<el/>" is not one of them, so the parser should raise an
31284543ef51SXin LI  * error on encountering it.
31294543ef51SXin LI  */
31304543ef51SXin LI START_TEST(test_invalid_tag_in_dtd) {
31314543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
31324543ef51SXin LI                      "<doc></doc>\n";
31334543ef51SXin LI 
31344543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
31354543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
31364543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
31374543ef51SXin LI                  "Invalid tag IN DTD external param not rejected");
31384543ef51SXin LI }
31394543ef51SXin LI END_TEST
31404543ef51SXin LI 
31414543ef51SXin LI /* Test entities not quite the predefined ones are not mis-recognised */
31424543ef51SXin LI START_TEST(test_not_predefined_entities) {
31434543ef51SXin LI   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
31444543ef51SXin LI                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
31454543ef51SXin LI   int i = 0;
31464543ef51SXin LI 
31474543ef51SXin LI   while (text[i] != NULL) {
31484543ef51SXin LI     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
31494543ef51SXin LI                    "Undefined entity not rejected");
31504543ef51SXin LI     XML_ParserReset(g_parser, NULL);
31514543ef51SXin LI     i++;
31524543ef51SXin LI   }
31534543ef51SXin LI }
31544543ef51SXin LI END_TEST
31554543ef51SXin LI 
31564543ef51SXin LI /* Test conditional inclusion (IGNORE) */
31574543ef51SXin LI START_TEST(test_ignore_section) {
31584543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
31594543ef51SXin LI                      "<doc><e>&entity;</e></doc>";
31604543ef51SXin LI   const XML_Char *expected
31614543ef51SXin LI       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
31624543ef51SXin LI   CharData storage;
31634543ef51SXin LI 
31644543ef51SXin LI   CharData_Init(&storage);
31654543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
31664543ef51SXin LI   XML_SetUserData(g_parser, &storage);
31674543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
31684543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
31694543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
31704543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
31714543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
31724543ef51SXin LI   XML_SetStartElementHandler(g_parser, dummy_start_element);
31734543ef51SXin LI   XML_SetEndElementHandler(g_parser, dummy_end_element);
31744543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
31754543ef51SXin LI       == XML_STATUS_ERROR)
31764543ef51SXin LI     xml_failure(g_parser);
31774543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
31784543ef51SXin LI }
31794543ef51SXin LI END_TEST
31804543ef51SXin LI 
31814543ef51SXin LI START_TEST(test_ignore_section_utf16) {
31824543ef51SXin LI   const char text[] =
31834543ef51SXin LI       /* <!DOCTYPE d SYSTEM 's'> */
31844543ef51SXin LI       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
31854543ef51SXin LI       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
31864543ef51SXin LI       /* <d><e>&en;</e></d> */
31874543ef51SXin LI       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
31884543ef51SXin LI   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
31894543ef51SXin LI   CharData storage;
31904543ef51SXin LI 
31914543ef51SXin LI   CharData_Init(&storage);
31924543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
31934543ef51SXin LI   XML_SetUserData(g_parser, &storage);
31944543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
31954543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
31964543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
31974543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
31984543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
31994543ef51SXin LI   XML_SetStartElementHandler(g_parser, dummy_start_element);
32004543ef51SXin LI   XML_SetEndElementHandler(g_parser, dummy_end_element);
32014543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
32024543ef51SXin LI       == XML_STATUS_ERROR)
32034543ef51SXin LI     xml_failure(g_parser);
32044543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
32054543ef51SXin LI }
32064543ef51SXin LI END_TEST
32074543ef51SXin LI 
32084543ef51SXin LI START_TEST(test_ignore_section_utf16_be) {
32094543ef51SXin LI   const char text[] =
32104543ef51SXin LI       /* <!DOCTYPE d SYSTEM 's'> */
32114543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
32124543ef51SXin LI       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
32134543ef51SXin LI       /* <d><e>&en;</e></d> */
32144543ef51SXin LI       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
32154543ef51SXin LI   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
32164543ef51SXin LI   CharData storage;
32174543ef51SXin LI 
32184543ef51SXin LI   CharData_Init(&storage);
32194543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
32204543ef51SXin LI   XML_SetUserData(g_parser, &storage);
32214543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser,
32224543ef51SXin LI                                   external_entity_load_ignore_utf16_be);
32234543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
32244543ef51SXin LI   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
32254543ef51SXin LI   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
32264543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
32274543ef51SXin LI   XML_SetStartElementHandler(g_parser, dummy_start_element);
32284543ef51SXin LI   XML_SetEndElementHandler(g_parser, dummy_end_element);
32294543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
32304543ef51SXin LI       == XML_STATUS_ERROR)
32314543ef51SXin LI     xml_failure(g_parser);
32324543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
32334543ef51SXin LI }
32344543ef51SXin LI END_TEST
32354543ef51SXin LI 
32364543ef51SXin LI /* Test mis-formatted conditional exclusion */
32374543ef51SXin LI START_TEST(test_bad_ignore_section) {
32384543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
32394543ef51SXin LI                      "<doc><e>&entity;</e></doc>";
32404543ef51SXin LI   ExtFaults faults[]
32414543ef51SXin LI       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
32424543ef51SXin LI           XML_ERROR_SYNTAX},
32434543ef51SXin LI          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
32444543ef51SXin LI           XML_ERROR_INVALID_TOKEN},
32454543ef51SXin LI          {/* FIrst two bytes of a three-byte char */
32464543ef51SXin LI           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
32474543ef51SXin LI           XML_ERROR_PARTIAL_CHAR},
32484543ef51SXin LI          {NULL, NULL, NULL, XML_ERROR_NONE}};
32494543ef51SXin LI   ExtFaults *fault;
32504543ef51SXin LI 
32514543ef51SXin LI   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
32524543ef51SXin LI     set_subtest("%s", fault->parse_text);
32534543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
32544543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
32554543ef51SXin LI     XML_SetUserData(g_parser, fault);
32564543ef51SXin LI     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
32574543ef51SXin LI                    "Incomplete IGNORE section not failed");
32584543ef51SXin LI     XML_ParserReset(g_parser, NULL);
32594543ef51SXin LI   }
32604543ef51SXin LI }
32614543ef51SXin LI END_TEST
32624543ef51SXin LI 
32634543ef51SXin LI struct bom_testdata {
32644543ef51SXin LI   const char *external;
32654543ef51SXin LI   int split;
32664543ef51SXin LI   XML_Bool nested_callback_happened;
32674543ef51SXin LI };
32684543ef51SXin LI 
32694543ef51SXin LI static int XMLCALL
32704543ef51SXin LI external_bom_checker(XML_Parser parser, const XML_Char *context,
32714543ef51SXin LI                      const XML_Char *base, const XML_Char *systemId,
32724543ef51SXin LI                      const XML_Char *publicId) {
32734543ef51SXin LI   const char *text;
32744543ef51SXin LI   UNUSED_P(base);
32754543ef51SXin LI   UNUSED_P(systemId);
32764543ef51SXin LI   UNUSED_P(publicId);
32774543ef51SXin LI 
32784543ef51SXin LI   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
32794543ef51SXin LI   if (ext_parser == NULL)
32804543ef51SXin LI     fail("Could not create external entity parser");
32814543ef51SXin LI 
32824543ef51SXin LI   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
32834543ef51SXin LI     struct bom_testdata *const testdata
32844543ef51SXin LI         = (struct bom_testdata *)XML_GetUserData(parser);
32854543ef51SXin LI     const char *const external = testdata->external;
32864543ef51SXin LI     const int split = testdata->split;
32874543ef51SXin LI     testdata->nested_callback_happened = XML_TRUE;
32884543ef51SXin LI 
32894543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
32904543ef51SXin LI         != XML_STATUS_OK) {
32914543ef51SXin LI       xml_failure(ext_parser);
32924543ef51SXin LI     }
32934543ef51SXin LI     text = external + split; // the parse below will continue where we left off.
32944543ef51SXin LI   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
32954543ef51SXin LI     text = "<!ELEMENT doc EMPTY>\n"
32964543ef51SXin LI            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
32974543ef51SXin LI            "<!ENTITY % e2 '%e1;'>\n";
32984543ef51SXin LI   } else {
32994543ef51SXin LI     fail("unknown systemId");
33004543ef51SXin LI   }
33014543ef51SXin LI 
33024543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
33034543ef51SXin LI       != XML_STATUS_OK)
33044543ef51SXin LI     xml_failure(ext_parser);
33054543ef51SXin LI 
33064543ef51SXin LI   XML_ParserFree(ext_parser);
33074543ef51SXin LI   return XML_STATUS_OK;
33084543ef51SXin LI }
33094543ef51SXin LI 
33104543ef51SXin LI /* regression test: BOM should be consumed when followed by a partial token. */
33114543ef51SXin LI START_TEST(test_external_bom_consumed) {
33124543ef51SXin LI   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
33134543ef51SXin LI                            "<doc></doc>\n";
33144543ef51SXin LI   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
33154543ef51SXin LI   const int len = (int)strlen(external);
33164543ef51SXin LI   for (int split = 0; split <= len; ++split) {
33174543ef51SXin LI     set_subtest("split at byte %d", split);
33184543ef51SXin LI 
33194543ef51SXin LI     struct bom_testdata testdata;
33204543ef51SXin LI     testdata.external = external;
33214543ef51SXin LI     testdata.split = split;
33224543ef51SXin LI     testdata.nested_callback_happened = XML_FALSE;
33234543ef51SXin LI 
33244543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
33254543ef51SXin LI     if (parser == NULL) {
33264543ef51SXin LI       fail("Couldn't create parser");
33274543ef51SXin LI     }
33284543ef51SXin LI     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
33294543ef51SXin LI     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
33304543ef51SXin LI     XML_SetUserData(parser, &testdata);
33314543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
33324543ef51SXin LI         == XML_STATUS_ERROR)
33334543ef51SXin LI       xml_failure(parser);
33344543ef51SXin LI     if (! testdata.nested_callback_happened) {
33354543ef51SXin LI       fail("ref handler not called");
33364543ef51SXin LI     }
33374543ef51SXin LI     XML_ParserFree(parser);
33384543ef51SXin LI   }
33394543ef51SXin LI }
33404543ef51SXin LI END_TEST
33414543ef51SXin LI 
33424543ef51SXin LI /* Test recursive parsing */
33434543ef51SXin LI START_TEST(test_external_entity_values) {
33444543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
33454543ef51SXin LI                      "<doc></doc>\n";
33464543ef51SXin LI   ExtFaults data_004_2[] = {
33474543ef51SXin LI       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
33484543ef51SXin LI       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
33494543ef51SXin LI        XML_ERROR_INVALID_TOKEN},
33504543ef51SXin LI       {"'wombat", "Unterminated string not faulted", NULL,
33514543ef51SXin LI        XML_ERROR_UNCLOSED_TOKEN},
33524543ef51SXin LI       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
33534543ef51SXin LI        XML_ERROR_PARTIAL_CHAR},
33544543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
33554543ef51SXin LI       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
33564543ef51SXin LI        XML_ERROR_XML_DECL},
33574543ef51SXin LI       {/* UTF-8 BOM */
33584543ef51SXin LI        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
33594543ef51SXin LI        XML_ERROR_NONE},
33604543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n$",
33614543ef51SXin LI        "Invalid token after text declaration not faulted", NULL,
33624543ef51SXin LI        XML_ERROR_INVALID_TOKEN},
33634543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
33644543ef51SXin LI        "Unterminated string after text decl not faulted", NULL,
33654543ef51SXin LI        XML_ERROR_UNCLOSED_TOKEN},
33664543ef51SXin LI       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
33674543ef51SXin LI        "Partial UTF-8 character after text decl not faulted", NULL,
33684543ef51SXin LI        XML_ERROR_PARTIAL_CHAR},
33694543ef51SXin LI       {"%e1;", "Recursive parameter entity not faulted", NULL,
33704543ef51SXin LI        XML_ERROR_RECURSIVE_ENTITY_REF},
33714543ef51SXin LI       {NULL, NULL, NULL, XML_ERROR_NONE}};
33724543ef51SXin LI   int i;
33734543ef51SXin LI 
33744543ef51SXin LI   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
33754543ef51SXin LI     set_subtest("%s", data_004_2[i].parse_text);
33764543ef51SXin LI     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
33774543ef51SXin LI     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
33784543ef51SXin LI     XML_SetUserData(g_parser, &data_004_2[i]);
33794543ef51SXin LI     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
33804543ef51SXin LI         == XML_STATUS_ERROR)
33814543ef51SXin LI       xml_failure(g_parser);
33824543ef51SXin LI     XML_ParserReset(g_parser, NULL);
33834543ef51SXin LI   }
33844543ef51SXin LI }
33854543ef51SXin LI END_TEST
33864543ef51SXin LI 
33874543ef51SXin LI /* Test the recursive parse interacts with a not standalone handler */
33884543ef51SXin LI START_TEST(test_ext_entity_not_standalone) {
33894543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
33904543ef51SXin LI                      "<doc></doc>";
33914543ef51SXin LI 
33924543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
33934543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
33944543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
33954543ef51SXin LI                  "Standalone rejection not caught");
33964543ef51SXin LI }
33974543ef51SXin LI END_TEST
33984543ef51SXin LI 
33994543ef51SXin LI START_TEST(test_ext_entity_value_abort) {
34004543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
34014543ef51SXin LI                      "<doc></doc>\n";
34024543ef51SXin LI 
34034543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
34044543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
34054543ef51SXin LI   g_resumable = XML_FALSE;
34064543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
34074543ef51SXin LI       == XML_STATUS_ERROR)
34084543ef51SXin LI     xml_failure(g_parser);
34094543ef51SXin LI }
34104543ef51SXin LI END_TEST
34114543ef51SXin LI 
34124543ef51SXin LI START_TEST(test_bad_public_doctype) {
34134543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
34144543ef51SXin LI                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
34154543ef51SXin LI                      "<doc></doc>";
34164543ef51SXin LI 
34174543ef51SXin LI   /* Setting a handler provokes a particular code path */
34184543ef51SXin LI   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
34194543ef51SXin LI                             dummy_end_doctype_handler);
34204543ef51SXin LI   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
34214543ef51SXin LI }
34224543ef51SXin LI END_TEST
34234543ef51SXin LI 
34244543ef51SXin LI /* Test based on ibm/valid/P32/ibm32v04.xml */
34254543ef51SXin LI START_TEST(test_attribute_enum_value) {
34264543ef51SXin LI   const char *text = "<?xml version='1.0' standalone='no'?>\n"
34274543ef51SXin LI                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
34284543ef51SXin LI                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
34294543ef51SXin LI   ExtTest dtd_data
34304543ef51SXin LI       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
34314543ef51SXin LI          "<!ELEMENT a EMPTY>\n"
34324543ef51SXin LI          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
34334543ef51SXin LI          NULL, NULL};
34344543ef51SXin LI   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
34354543ef51SXin LI 
34364543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
34374543ef51SXin LI   XML_SetUserData(g_parser, &dtd_data);
34384543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
34394543ef51SXin LI   /* An attribute list handler provokes a different code path */
34404543ef51SXin LI   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
34414543ef51SXin LI   run_ext_character_check(text, &dtd_data, expected);
34424543ef51SXin LI }
34434543ef51SXin LI END_TEST
34444543ef51SXin LI 
34454543ef51SXin LI /* Slightly bizarrely, the library seems to silently ignore entity
34464543ef51SXin LI  * definitions for predefined entities, even when they are wrong.  The
34474543ef51SXin LI  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
34484543ef51SXin LI  * to happen, so this is currently treated as acceptable.
34494543ef51SXin LI  */
34504543ef51SXin LI START_TEST(test_predefined_entity_redefinition) {
34514543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
34524543ef51SXin LI                      "<!ENTITY apos 'foo'>\n"
34534543ef51SXin LI                      "]>\n"
34544543ef51SXin LI                      "<doc>&apos;</doc>";
34554543ef51SXin LI   run_character_check(text, XCS("'"));
34564543ef51SXin LI }
34574543ef51SXin LI END_TEST
34584543ef51SXin LI 
34594543ef51SXin LI /* Test that the parser stops processing the DTD after an unresolved
34604543ef51SXin LI  * parameter entity is encountered.
34614543ef51SXin LI  */
34624543ef51SXin LI START_TEST(test_dtd_stop_processing) {
34634543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
34644543ef51SXin LI                      "%foo;\n"
34654543ef51SXin LI                      "<!ENTITY bar 'bas'>\n"
34664543ef51SXin LI                      "]><doc/>";
34674543ef51SXin LI 
34684543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
34694543ef51SXin LI   init_dummy_handlers();
34704543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
34714543ef51SXin LI       == XML_STATUS_ERROR)
34724543ef51SXin LI     xml_failure(g_parser);
34734543ef51SXin LI   if (get_dummy_handler_flags() != 0)
34744543ef51SXin LI     fail("DTD processing still going after undefined PE");
34754543ef51SXin LI }
34764543ef51SXin LI END_TEST
34774543ef51SXin LI 
34784543ef51SXin LI /* Test public notations with no system ID */
34794543ef51SXin LI START_TEST(test_public_notation_no_sysid) {
34804543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
34814543ef51SXin LI                      "<!NOTATION note PUBLIC 'foo'>\n"
34824543ef51SXin LI                      "<!ELEMENT doc EMPTY>\n"
34834543ef51SXin LI                      "]>\n<doc/>";
34844543ef51SXin LI 
34854543ef51SXin LI   init_dummy_handlers();
34864543ef51SXin LI   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
34874543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
34884543ef51SXin LI       == XML_STATUS_ERROR)
34894543ef51SXin LI     xml_failure(g_parser);
34904543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
34914543ef51SXin LI     fail("Notation declaration handler not called");
34924543ef51SXin LI }
34934543ef51SXin LI END_TEST
34944543ef51SXin LI 
34954543ef51SXin LI START_TEST(test_nested_groups) {
34964543ef51SXin LI   const char *text
34974543ef51SXin LI       = "<!DOCTYPE doc [\n"
34984543ef51SXin LI         "<!ELEMENT doc "
34994543ef51SXin LI         /* Sixteen elements per line */
35004543ef51SXin LI         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
35014543ef51SXin LI         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
35024543ef51SXin LI         "))))))))))))))))))))))))))))))))>\n"
35034543ef51SXin LI         "<!ELEMENT e EMPTY>"
35044543ef51SXin LI         "]>\n"
35054543ef51SXin LI         "<doc><e/></doc>";
35064543ef51SXin LI   CharData storage;
35074543ef51SXin LI 
35084543ef51SXin LI   CharData_Init(&storage);
35094543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
35104543ef51SXin LI   XML_SetStartElementHandler(g_parser, record_element_start_handler);
35114543ef51SXin LI   XML_SetUserData(g_parser, &storage);
35124543ef51SXin LI   init_dummy_handlers();
35134543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
35144543ef51SXin LI       == XML_STATUS_ERROR)
35154543ef51SXin LI     xml_failure(g_parser);
35164543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("doce"));
35174543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
35184543ef51SXin LI     fail("Element handler not fired");
35194543ef51SXin LI }
35204543ef51SXin LI END_TEST
35214543ef51SXin LI 
35224543ef51SXin LI START_TEST(test_group_choice) {
35234543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
35244543ef51SXin LI                      "<!ELEMENT doc (a|b|c)+>\n"
35254543ef51SXin LI                      "<!ELEMENT a EMPTY>\n"
35264543ef51SXin LI                      "<!ELEMENT b (#PCDATA)>\n"
35274543ef51SXin LI                      "<!ELEMENT c ANY>\n"
35284543ef51SXin LI                      "]>\n"
35294543ef51SXin LI                      "<doc>\n"
35304543ef51SXin LI                      "<a/>\n"
35314543ef51SXin LI                      "<b attr='foo'>This is a foo</b>\n"
35324543ef51SXin LI                      "<c></c>\n"
35334543ef51SXin LI                      "</doc>\n";
35344543ef51SXin LI 
35354543ef51SXin LI   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
35364543ef51SXin LI   init_dummy_handlers();
35374543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
35384543ef51SXin LI       == XML_STATUS_ERROR)
35394543ef51SXin LI     xml_failure(g_parser);
35404543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
35414543ef51SXin LI     fail("Element handler flag not raised");
35424543ef51SXin LI }
35434543ef51SXin LI END_TEST
35444543ef51SXin LI 
35454543ef51SXin LI START_TEST(test_standalone_parameter_entity) {
35464543ef51SXin LI   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
35474543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
35484543ef51SXin LI                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
35494543ef51SXin LI                      "%entity;\n"
35504543ef51SXin LI                      "]>\n"
35514543ef51SXin LI                      "<doc></doc>";
35524543ef51SXin LI   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
35534543ef51SXin LI 
35544543ef51SXin LI   XML_SetUserData(g_parser, dtd_data);
35554543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
35564543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
35574543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
35584543ef51SXin LI       == XML_STATUS_ERROR)
35594543ef51SXin LI     xml_failure(g_parser);
35604543ef51SXin LI }
35614543ef51SXin LI END_TEST
35624543ef51SXin LI 
35634543ef51SXin LI /* Test skipping of parameter entity in an external DTD */
35644543ef51SXin LI /* Derived from ibm/invalid/P69/ibm69i01.xml */
35654543ef51SXin LI START_TEST(test_skipped_parameter_entity) {
35664543ef51SXin LI   const char *text = "<?xml version='1.0'?>\n"
35674543ef51SXin LI                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
35684543ef51SXin LI                      "<!ELEMENT root (#PCDATA|a)* >\n"
35694543ef51SXin LI                      "]>\n"
35704543ef51SXin LI                      "<root></root>";
35714543ef51SXin LI   ExtTest dtd_data = {"%pe2;", NULL, NULL};
35724543ef51SXin LI 
35734543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
35744543ef51SXin LI   XML_SetUserData(g_parser, &dtd_data);
35754543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
35764543ef51SXin LI   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
35774543ef51SXin LI   init_dummy_handlers();
35784543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
35794543ef51SXin LI       == XML_STATUS_ERROR)
35804543ef51SXin LI     xml_failure(g_parser);
35814543ef51SXin LI   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
35824543ef51SXin LI     fail("Skip handler not executed");
35834543ef51SXin LI }
35844543ef51SXin LI END_TEST
35854543ef51SXin LI 
35864543ef51SXin LI /* Test recursive parameter entity definition rejected in external DTD */
35874543ef51SXin LI START_TEST(test_recursive_external_parameter_entity) {
35884543ef51SXin LI   const char *text = "<?xml version='1.0'?>\n"
35894543ef51SXin LI                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
35904543ef51SXin LI                      "<!ELEMENT root (#PCDATA|a)* >\n"
35914543ef51SXin LI                      "]>\n"
35924543ef51SXin LI                      "<root></root>";
35934543ef51SXin LI   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
35944543ef51SXin LI                         "Recursive external parameter entity not faulted", NULL,
35954543ef51SXin LI                         XML_ERROR_RECURSIVE_ENTITY_REF};
35964543ef51SXin LI 
35974543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
35984543ef51SXin LI   XML_SetUserData(g_parser, &dtd_data);
35994543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
36004543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
36014543ef51SXin LI                  "Recursive external parameter not spotted");
36024543ef51SXin LI }
36034543ef51SXin LI END_TEST
36044543ef51SXin LI 
36054543ef51SXin LI /* Test undefined parameter entity in external entity handler */
36064543ef51SXin LI START_TEST(test_undefined_ext_entity_in_external_dtd) {
36074543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
36084543ef51SXin LI                      "<doc></doc>\n";
36094543ef51SXin LI 
36104543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
36114543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
36124543ef51SXin LI   XML_SetUserData(g_parser, NULL);
36134543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
36144543ef51SXin LI       == XML_STATUS_ERROR)
36154543ef51SXin LI     xml_failure(g_parser);
36164543ef51SXin LI 
36174543ef51SXin LI   /* Now repeat without the external entity ref handler invoking
36184543ef51SXin LI    * another copy of itself.
36194543ef51SXin LI    */
36204543ef51SXin LI   XML_ParserReset(g_parser, NULL);
36214543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
36224543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
36234543ef51SXin LI   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
36244543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
36254543ef51SXin LI       == XML_STATUS_ERROR)
36264543ef51SXin LI     xml_failure(g_parser);
36274543ef51SXin LI }
36284543ef51SXin LI END_TEST
36294543ef51SXin LI 
36304543ef51SXin LI /* Test suspending the parse on receiving an XML declaration works */
36314543ef51SXin LI START_TEST(test_suspend_xdecl) {
36324543ef51SXin LI   const char *text = long_character_data_text;
36334543ef51SXin LI 
36344543ef51SXin LI   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
36354543ef51SXin LI   XML_SetUserData(g_parser, g_parser);
36364543ef51SXin LI   g_resumable = XML_TRUE;
36374543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
36384543ef51SXin LI       != XML_STATUS_SUSPENDED)
36394543ef51SXin LI     xml_failure(g_parser);
36404543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
36414543ef51SXin LI     xml_failure(g_parser);
36424543ef51SXin LI   /* Attempt to start a new parse while suspended */
36434543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
36444543ef51SXin LI       != XML_STATUS_ERROR)
36454543ef51SXin LI     fail("Attempt to parse while suspended not faulted");
36464543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
36474543ef51SXin LI     fail("Suspended parse not faulted with correct error");
36484543ef51SXin LI }
36494543ef51SXin LI END_TEST
36504543ef51SXin LI 
36514543ef51SXin LI /* Test aborting the parse in an epilog works */
36524543ef51SXin LI START_TEST(test_abort_epilog) {
36534543ef51SXin LI   const char *text = "<doc></doc>\n\r\n";
36544543ef51SXin LI   XML_Char trigger_char = XCS('\r');
36554543ef51SXin LI 
36564543ef51SXin LI   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
36574543ef51SXin LI   XML_SetUserData(g_parser, &trigger_char);
36584543ef51SXin LI   g_resumable = XML_FALSE;
36594543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
36604543ef51SXin LI       != XML_STATUS_ERROR)
36614543ef51SXin LI     fail("Abort not triggered");
36624543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
36634543ef51SXin LI     xml_failure(g_parser);
36644543ef51SXin LI }
36654543ef51SXin LI END_TEST
36664543ef51SXin LI 
36674543ef51SXin LI /* Test a different code path for abort in the epilog */
36684543ef51SXin LI START_TEST(test_abort_epilog_2) {
36694543ef51SXin LI   const char *text = "<doc></doc>\n";
36704543ef51SXin LI   XML_Char trigger_char = XCS('\n');
36714543ef51SXin LI 
36724543ef51SXin LI   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
36734543ef51SXin LI   XML_SetUserData(g_parser, &trigger_char);
36744543ef51SXin LI   g_resumable = XML_FALSE;
36754543ef51SXin LI   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
36764543ef51SXin LI }
36774543ef51SXin LI END_TEST
36784543ef51SXin LI 
36794543ef51SXin LI /* Test suspension from the epilog */
36804543ef51SXin LI START_TEST(test_suspend_epilog) {
36814543ef51SXin LI   const char *text = "<doc></doc>\n";
36824543ef51SXin LI   XML_Char trigger_char = XCS('\n');
36834543ef51SXin LI 
36844543ef51SXin LI   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
36854543ef51SXin LI   XML_SetUserData(g_parser, &trigger_char);
36864543ef51SXin LI   g_resumable = XML_TRUE;
36874543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
36884543ef51SXin LI       != XML_STATUS_SUSPENDED)
36894543ef51SXin LI     xml_failure(g_parser);
36904543ef51SXin LI }
36914543ef51SXin LI END_TEST
36924543ef51SXin LI 
36934543ef51SXin LI START_TEST(test_suspend_in_sole_empty_tag) {
36944543ef51SXin LI   const char *text = "<doc/>";
36954543ef51SXin LI   enum XML_Status rc;
36964543ef51SXin LI 
36974543ef51SXin LI   XML_SetEndElementHandler(g_parser, suspending_end_handler);
36984543ef51SXin LI   XML_SetUserData(g_parser, g_parser);
36994543ef51SXin LI   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
37004543ef51SXin LI   if (rc == XML_STATUS_ERROR)
37014543ef51SXin LI     xml_failure(g_parser);
37024543ef51SXin LI   else if (rc != XML_STATUS_SUSPENDED)
37034543ef51SXin LI     fail("Suspend not triggered");
37044543ef51SXin LI   rc = XML_ResumeParser(g_parser);
37054543ef51SXin LI   if (rc == XML_STATUS_ERROR)
37064543ef51SXin LI     xml_failure(g_parser);
37074543ef51SXin LI   else if (rc != XML_STATUS_OK)
37084543ef51SXin LI     fail("Resume failed");
37094543ef51SXin LI }
37104543ef51SXin LI END_TEST
37114543ef51SXin LI 
37124543ef51SXin LI START_TEST(test_unfinished_epilog) {
37134543ef51SXin LI   const char *text = "<doc></doc><";
37144543ef51SXin LI 
37154543ef51SXin LI   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
37164543ef51SXin LI                  "Incomplete epilog entry not faulted");
37174543ef51SXin LI }
37184543ef51SXin LI END_TEST
37194543ef51SXin LI 
37204543ef51SXin LI START_TEST(test_partial_char_in_epilog) {
37214543ef51SXin LI   const char *text = "<doc></doc>\xe2\x82";
37224543ef51SXin LI 
37234543ef51SXin LI   /* First check that no fault is raised if the parse is not finished */
37244543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
37254543ef51SXin LI       == XML_STATUS_ERROR)
37264543ef51SXin LI     xml_failure(g_parser);
37274543ef51SXin LI   /* Now check that it is faulted once we finish */
37284543ef51SXin LI   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
37294543ef51SXin LI     fail("Partial character in epilog not faulted");
37304543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
37314543ef51SXin LI     xml_failure(g_parser);
37324543ef51SXin LI }
37334543ef51SXin LI END_TEST
37344543ef51SXin LI 
37354543ef51SXin LI /* Test resuming a parse suspended in entity substitution */
37364543ef51SXin LI START_TEST(test_suspend_resume_internal_entity) {
37374543ef51SXin LI   const char *text
37384543ef51SXin LI       = "<!DOCTYPE doc [\n"
37394543ef51SXin LI         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
37404543ef51SXin LI         "]>\n"
37414543ef51SXin LI         "<doc>&foo;</doc>\n";
37424543ef51SXin LI   const XML_Char *expected1 = XCS("Hi");
37434543ef51SXin LI   const XML_Char *expected2 = XCS("HiHo");
37444543ef51SXin LI   CharData storage;
37454543ef51SXin LI 
37464543ef51SXin LI   CharData_Init(&storage);
37474543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_suspender);
37484543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
37494543ef51SXin LI   XML_SetUserData(g_parser, &storage);
37504543ef51SXin LI   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
37514543ef51SXin LI   // we won't know exactly how much input we actually managed to give Expat.
37524543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
37534543ef51SXin LI       != XML_STATUS_SUSPENDED)
37544543ef51SXin LI     xml_failure(g_parser);
37554543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS(""));
37564543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
37574543ef51SXin LI     xml_failure(g_parser);
37584543ef51SXin LI   CharData_CheckXMLChars(&storage, expected1);
37594543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
37604543ef51SXin LI     xml_failure(g_parser);
37614543ef51SXin LI   CharData_CheckXMLChars(&storage, expected2);
37624543ef51SXin LI }
37634543ef51SXin LI END_TEST
37644543ef51SXin LI 
37654543ef51SXin LI START_TEST(test_suspend_resume_internal_entity_issue_629) {
37664543ef51SXin LI   const char *const text
37674543ef51SXin LI       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
37684543ef51SXin LI         "<"
37694543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37704543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37714543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37724543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37734543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37744543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37754543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37764543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37774543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37784543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37794543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37804543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37814543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37824543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37834543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37844543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37854543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37864543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37874543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37884543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37894543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37904543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37914543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37924543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37934543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37944543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37954543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37964543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37974543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37984543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
37994543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38004543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38014543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38024543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38034543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38044543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38054543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38064543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38074543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38084543ef51SXin LI         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
38094543ef51SXin LI         "/>"
38104543ef51SXin LI         "</b></a>";
38114543ef51SXin LI   const size_t firstChunkSizeBytes = 54;
38124543ef51SXin LI 
38134543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
38144543ef51SXin LI   XML_SetUserData(parser, parser);
38154543ef51SXin LI   XML_SetCommentHandler(parser, suspending_comment_handler);
38164543ef51SXin LI 
38174543ef51SXin LI   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
38184543ef51SXin LI       != XML_STATUS_SUSPENDED)
38194543ef51SXin LI     xml_failure(parser);
38204543ef51SXin LI   if (XML_ResumeParser(parser) != XML_STATUS_OK)
38214543ef51SXin LI     xml_failure(parser);
38224543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
38234543ef51SXin LI                               (int)(strlen(text) - firstChunkSizeBytes),
38244543ef51SXin LI                               XML_TRUE)
38254543ef51SXin LI       != XML_STATUS_OK)
38264543ef51SXin LI     xml_failure(parser);
38274543ef51SXin LI   XML_ParserFree(parser);
38284543ef51SXin LI }
38294543ef51SXin LI END_TEST
38304543ef51SXin LI 
38314543ef51SXin LI /* Test syntax error is caught at parse resumption */
38324543ef51SXin LI START_TEST(test_resume_entity_with_syntax_error) {
38334543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
38344543ef51SXin LI                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
38354543ef51SXin LI                      "]>\n"
38364543ef51SXin LI                      "<doc>&foo;</doc>\n";
38374543ef51SXin LI 
38384543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_suspender);
38394543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
38404543ef51SXin LI       != XML_STATUS_SUSPENDED)
38414543ef51SXin LI     xml_failure(g_parser);
38424543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
38434543ef51SXin LI     fail("Syntax error in entity not faulted");
38444543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
38454543ef51SXin LI     xml_failure(g_parser);
38464543ef51SXin LI }
38474543ef51SXin LI END_TEST
38484543ef51SXin LI 
38494543ef51SXin LI /* Test suspending and resuming in a parameter entity substitution */
38504543ef51SXin LI START_TEST(test_suspend_resume_parameter_entity) {
38514543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
38524543ef51SXin LI                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
38534543ef51SXin LI                      "%foo;\n"
38544543ef51SXin LI                      "]>\n"
38554543ef51SXin LI                      "<doc>Hello, world</doc>";
38564543ef51SXin LI   const XML_Char *expected = XCS("Hello, world");
38574543ef51SXin LI   CharData storage;
38584543ef51SXin LI 
38594543ef51SXin LI   CharData_Init(&storage);
38604543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
38614543ef51SXin LI   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
38624543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
38634543ef51SXin LI   XML_SetUserData(g_parser, &storage);
38644543ef51SXin LI   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
38654543ef51SXin LI       != XML_STATUS_SUSPENDED)
38664543ef51SXin LI     xml_failure(g_parser);
38674543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS(""));
38684543ef51SXin LI   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
38694543ef51SXin LI     xml_failure(g_parser);
38704543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
38714543ef51SXin LI }
38724543ef51SXin LI END_TEST
38734543ef51SXin LI 
38744543ef51SXin LI /* Test attempting to use parser after an error is faulted */
38754543ef51SXin LI START_TEST(test_restart_on_error) {
38764543ef51SXin LI   const char *text = "<$doc><doc></doc>";
38774543ef51SXin LI 
38784543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
38794543ef51SXin LI       != XML_STATUS_ERROR)
38804543ef51SXin LI     fail("Invalid tag name not faulted");
38814543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
38824543ef51SXin LI     xml_failure(g_parser);
38834543ef51SXin LI   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
38844543ef51SXin LI     fail("Restarting invalid parse not faulted");
38854543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
38864543ef51SXin LI     xml_failure(g_parser);
38874543ef51SXin LI }
38884543ef51SXin LI END_TEST
38894543ef51SXin LI 
38904543ef51SXin LI /* Test that angle brackets in an attribute default value are faulted */
38914543ef51SXin LI START_TEST(test_reject_lt_in_attribute_value) {
38924543ef51SXin LI   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
38934543ef51SXin LI                      "<doc></doc>";
38944543ef51SXin LI 
38954543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
38964543ef51SXin LI                  "Bad attribute default not faulted");
38974543ef51SXin LI }
38984543ef51SXin LI END_TEST
38994543ef51SXin LI 
39004543ef51SXin LI START_TEST(test_reject_unfinished_param_in_att_value) {
39014543ef51SXin LI   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
39024543ef51SXin LI                      "<doc></doc>";
39034543ef51SXin LI 
39044543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
39054543ef51SXin LI                  "Bad attribute default not faulted");
39064543ef51SXin LI }
39074543ef51SXin LI END_TEST
39084543ef51SXin LI 
39094543ef51SXin LI START_TEST(test_trailing_cr_in_att_value) {
39104543ef51SXin LI   const char *text = "<doc a='value\r'/>";
39114543ef51SXin LI 
39124543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
39134543ef51SXin LI       == XML_STATUS_ERROR)
39144543ef51SXin LI     xml_failure(g_parser);
39154543ef51SXin LI }
39164543ef51SXin LI END_TEST
39174543ef51SXin LI 
39184543ef51SXin LI /* Try parsing a general entity within a parameter entity in a
39194543ef51SXin LI  * standalone internal DTD.  Covers a corner case in the parser.
39204543ef51SXin LI  */
39214543ef51SXin LI START_TEST(test_standalone_internal_entity) {
39224543ef51SXin LI   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
39234543ef51SXin LI                      "<!DOCTYPE doc [\n"
39244543ef51SXin LI                      "  <!ELEMENT doc (#PCDATA)>\n"
39254543ef51SXin LI                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
39264543ef51SXin LI                      "  <!ENTITY ge 'AttDefaultValue'>\n"
39274543ef51SXin LI                      "  %pe;\n"
39284543ef51SXin LI                      "]>\n"
39294543ef51SXin LI                      "<doc att2='any'/>";
39304543ef51SXin LI 
39314543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
39324543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
39334543ef51SXin LI       == XML_STATUS_ERROR)
39344543ef51SXin LI     xml_failure(g_parser);
39354543ef51SXin LI }
39364543ef51SXin LI END_TEST
39374543ef51SXin LI 
39384543ef51SXin LI /* Test that a reference to an unknown external entity is skipped */
39394543ef51SXin LI START_TEST(test_skipped_external_entity) {
39404543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
39414543ef51SXin LI                      "<doc></doc>\n";
39424543ef51SXin LI   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
39434543ef51SXin LI                        "<!ENTITY % e2 '%e1;'>\n",
39444543ef51SXin LI                        NULL, NULL};
39454543ef51SXin LI 
39464543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
39474543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
39484543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
39494543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
39504543ef51SXin LI       == XML_STATUS_ERROR)
39514543ef51SXin LI     xml_failure(g_parser);
39524543ef51SXin LI }
39534543ef51SXin LI END_TEST
39544543ef51SXin LI 
39554543ef51SXin LI /* Test a different form of unknown external entity */
39564543ef51SXin LI START_TEST(test_skipped_null_loaded_ext_entity) {
39574543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
39584543ef51SXin LI                      "<doc />";
39594543ef51SXin LI   ExtHdlrData test_data
39604543ef51SXin LI       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
39614543ef51SXin LI          "<!ENTITY % pe2 '%pe1;'>\n"
39624543ef51SXin LI          "%pe2;\n",
39634543ef51SXin LI          external_entity_null_loader};
39644543ef51SXin LI 
39654543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
39664543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
39674543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
39684543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
39694543ef51SXin LI       == XML_STATUS_ERROR)
39704543ef51SXin LI     xml_failure(g_parser);
39714543ef51SXin LI }
39724543ef51SXin LI END_TEST
39734543ef51SXin LI 
39744543ef51SXin LI START_TEST(test_skipped_unloaded_ext_entity) {
39754543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
39764543ef51SXin LI                      "<doc />";
39774543ef51SXin LI   ExtHdlrData test_data
39784543ef51SXin LI       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
39794543ef51SXin LI          "<!ENTITY % pe2 '%pe1;'>\n"
39804543ef51SXin LI          "%pe2;\n",
39814543ef51SXin LI          NULL};
39824543ef51SXin LI 
39834543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
39844543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
39854543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
39864543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
39874543ef51SXin LI       == XML_STATUS_ERROR)
39884543ef51SXin LI     xml_failure(g_parser);
39894543ef51SXin LI }
39904543ef51SXin LI END_TEST
39914543ef51SXin LI 
39924543ef51SXin LI /* Test that a parameter entity value ending with a carriage return
39934543ef51SXin LI  * has it translated internally into a newline.
39944543ef51SXin LI  */
39954543ef51SXin LI START_TEST(test_param_entity_with_trailing_cr) {
39964543ef51SXin LI #define PARAM_ENTITY_NAME "pe"
39974543ef51SXin LI #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
39984543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
39994543ef51SXin LI                      "<doc/>";
40004543ef51SXin LI   ExtTest test_data
40014543ef51SXin LI       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
40024543ef51SXin LI          "%" PARAM_ENTITY_NAME ";\n",
40034543ef51SXin LI          NULL, NULL};
40044543ef51SXin LI 
40054543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
40064543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
40074543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
40084543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
40094543ef51SXin LI   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
40104543ef51SXin LI                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
40114543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
40124543ef51SXin LI       == XML_STATUS_ERROR)
40134543ef51SXin LI     xml_failure(g_parser);
40144543ef51SXin LI   int entity_match_flag = get_param_entity_match_flag();
40154543ef51SXin LI   if (entity_match_flag == ENTITY_MATCH_FAIL)
40164543ef51SXin LI     fail("Parameter entity CR->NEWLINE conversion failed");
40174543ef51SXin LI   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
40184543ef51SXin LI     fail("Parameter entity not parsed");
40194543ef51SXin LI }
40204543ef51SXin LI #undef PARAM_ENTITY_NAME
40214543ef51SXin LI #undef PARAM_ENTITY_CORE_VALUE
40224543ef51SXin LI END_TEST
40234543ef51SXin LI 
40244543ef51SXin LI START_TEST(test_invalid_character_entity) {
40254543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
40264543ef51SXin LI                      "  <!ENTITY entity '&#x110000;'>\n"
40274543ef51SXin LI                      "]>\n"
40284543ef51SXin LI                      "<doc>&entity;</doc>";
40294543ef51SXin LI 
40304543ef51SXin LI   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
40314543ef51SXin LI                  "Out of range character reference not faulted");
40324543ef51SXin LI }
40334543ef51SXin LI END_TEST
40344543ef51SXin LI 
40354543ef51SXin LI START_TEST(test_invalid_character_entity_2) {
40364543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
40374543ef51SXin LI                      "  <!ENTITY entity '&#xg0;'>\n"
40384543ef51SXin LI                      "]>\n"
40394543ef51SXin LI                      "<doc>&entity;</doc>";
40404543ef51SXin LI 
40414543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
40424543ef51SXin LI                  "Out of range character reference not faulted");
40434543ef51SXin LI }
40444543ef51SXin LI END_TEST
40454543ef51SXin LI 
40464543ef51SXin LI START_TEST(test_invalid_character_entity_3) {
40474543ef51SXin LI   const char text[] =
40484543ef51SXin LI       /* <!DOCTYPE doc [\n */
40494543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
40504543ef51SXin LI       /* U+0E04 = KHO KHWAI
40514543ef51SXin LI        * U+0E08 = CHO CHAN */
40524543ef51SXin LI       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
40534543ef51SXin LI       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
40544543ef51SXin LI       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
40554543ef51SXin LI       /* ]>\n */
40564543ef51SXin LI       "\0]\0>\0\n"
40574543ef51SXin LI       /* <doc>&entity;</doc> */
40584543ef51SXin LI       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
40594543ef51SXin LI 
40604543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
40614543ef51SXin LI       != XML_STATUS_ERROR)
40624543ef51SXin LI     fail("Invalid start of entity name not faulted");
40634543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
40644543ef51SXin LI     xml_failure(g_parser);
40654543ef51SXin LI }
40664543ef51SXin LI END_TEST
40674543ef51SXin LI 
40684543ef51SXin LI START_TEST(test_invalid_character_entity_4) {
40694543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
40704543ef51SXin LI                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
40714543ef51SXin LI                      "]>\n"
40724543ef51SXin LI                      "<doc>&entity;</doc>";
40734543ef51SXin LI 
40744543ef51SXin LI   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
40754543ef51SXin LI                  "Out of range character reference not faulted");
40764543ef51SXin LI }
40774543ef51SXin LI END_TEST
40784543ef51SXin LI 
40794543ef51SXin LI /* Test that processing instructions are picked up by a default handler */
40804543ef51SXin LI START_TEST(test_pi_handled_in_default) {
40814543ef51SXin LI   const char *text = "<?test processing instruction?>\n<doc/>";
40824543ef51SXin LI   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
40834543ef51SXin LI   CharData storage;
40844543ef51SXin LI 
40854543ef51SXin LI   CharData_Init(&storage);
40864543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
40874543ef51SXin LI   XML_SetUserData(g_parser, &storage);
40884543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
40894543ef51SXin LI       == XML_STATUS_ERROR)
40904543ef51SXin LI     xml_failure(g_parser);
40914543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
40924543ef51SXin LI }
40934543ef51SXin LI END_TEST
40944543ef51SXin LI 
40954543ef51SXin LI /* Test that comments are picked up by a default handler */
40964543ef51SXin LI START_TEST(test_comment_handled_in_default) {
40974543ef51SXin LI   const char *text = "<!-- This is a comment -->\n<doc/>";
40984543ef51SXin LI   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
40994543ef51SXin LI   CharData storage;
41004543ef51SXin LI 
41014543ef51SXin LI   CharData_Init(&storage);
41024543ef51SXin LI   XML_SetDefaultHandler(g_parser, accumulate_characters);
41034543ef51SXin LI   XML_SetUserData(g_parser, &storage);
41044543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
41054543ef51SXin LI       == XML_STATUS_ERROR)
41064543ef51SXin LI     xml_failure(g_parser);
41074543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
41084543ef51SXin LI }
41094543ef51SXin LI END_TEST
41104543ef51SXin LI 
41114543ef51SXin LI /* Test PIs that look almost but not quite like XML declarations */
41124543ef51SXin LI START_TEST(test_pi_yml) {
41134543ef51SXin LI   const char *text = "<?yml something like data?><doc/>";
41144543ef51SXin LI   const XML_Char *expected = XCS("yml: something like data\n");
41154543ef51SXin LI   CharData storage;
41164543ef51SXin LI 
41174543ef51SXin LI   CharData_Init(&storage);
41184543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
41194543ef51SXin LI   XML_SetUserData(g_parser, &storage);
41204543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
41214543ef51SXin LI       == XML_STATUS_ERROR)
41224543ef51SXin LI     xml_failure(g_parser);
41234543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
41244543ef51SXin LI }
41254543ef51SXin LI END_TEST
41264543ef51SXin LI 
41274543ef51SXin LI START_TEST(test_pi_xnl) {
41284543ef51SXin LI   const char *text = "<?xnl nothing like data?><doc/>";
41294543ef51SXin LI   const XML_Char *expected = XCS("xnl: nothing like data\n");
41304543ef51SXin LI   CharData storage;
41314543ef51SXin LI 
41324543ef51SXin LI   CharData_Init(&storage);
41334543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
41344543ef51SXin LI   XML_SetUserData(g_parser, &storage);
41354543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
41364543ef51SXin LI       == XML_STATUS_ERROR)
41374543ef51SXin LI     xml_failure(g_parser);
41384543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
41394543ef51SXin LI }
41404543ef51SXin LI END_TEST
41414543ef51SXin LI 
41424543ef51SXin LI START_TEST(test_pi_xmm) {
41434543ef51SXin LI   const char *text = "<?xmm everything like data?><doc/>";
41444543ef51SXin LI   const XML_Char *expected = XCS("xmm: everything like data\n");
41454543ef51SXin LI   CharData storage;
41464543ef51SXin LI 
41474543ef51SXin LI   CharData_Init(&storage);
41484543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
41494543ef51SXin LI   XML_SetUserData(g_parser, &storage);
41504543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
41514543ef51SXin LI       == XML_STATUS_ERROR)
41524543ef51SXin LI     xml_failure(g_parser);
41534543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
41544543ef51SXin LI }
41554543ef51SXin LI END_TEST
41564543ef51SXin LI 
41574543ef51SXin LI START_TEST(test_utf16_pi) {
41584543ef51SXin LI   const char text[] =
41594543ef51SXin LI       /* <?{KHO KHWAI}{CHO CHAN}?>
41604543ef51SXin LI        * where {KHO KHWAI} = U+0E04
41614543ef51SXin LI        * and   {CHO CHAN}  = U+0E08
41624543ef51SXin LI        */
41634543ef51SXin LI       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
41644543ef51SXin LI       /* <q/> */
41654543ef51SXin LI       "<\0q\0/\0>\0";
41664543ef51SXin LI #ifdef XML_UNICODE
41674543ef51SXin LI   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
41684543ef51SXin LI #else
41694543ef51SXin LI   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
41704543ef51SXin LI #endif
41714543ef51SXin LI   CharData storage;
41724543ef51SXin LI 
41734543ef51SXin LI   CharData_Init(&storage);
41744543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
41754543ef51SXin LI   XML_SetUserData(g_parser, &storage);
41764543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
41774543ef51SXin LI       == XML_STATUS_ERROR)
41784543ef51SXin LI     xml_failure(g_parser);
41794543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
41804543ef51SXin LI }
41814543ef51SXin LI END_TEST
41824543ef51SXin LI 
41834543ef51SXin LI START_TEST(test_utf16_be_pi) {
41844543ef51SXin LI   const char text[] =
41854543ef51SXin LI       /* <?{KHO KHWAI}{CHO CHAN}?>
41864543ef51SXin LI        * where {KHO KHWAI} = U+0E04
41874543ef51SXin LI        * and   {CHO CHAN}  = U+0E08
41884543ef51SXin LI        */
41894543ef51SXin LI       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
41904543ef51SXin LI       /* <q/> */
41914543ef51SXin LI       "\0<\0q\0/\0>";
41924543ef51SXin LI #ifdef XML_UNICODE
41934543ef51SXin LI   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
41944543ef51SXin LI #else
41954543ef51SXin LI   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
41964543ef51SXin LI #endif
41974543ef51SXin LI   CharData storage;
41984543ef51SXin LI 
41994543ef51SXin LI   CharData_Init(&storage);
42004543ef51SXin LI   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
42014543ef51SXin LI   XML_SetUserData(g_parser, &storage);
42024543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
42034543ef51SXin LI       == XML_STATUS_ERROR)
42044543ef51SXin LI     xml_failure(g_parser);
42054543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
42064543ef51SXin LI }
42074543ef51SXin LI END_TEST
42084543ef51SXin LI 
42094543ef51SXin LI /* Test that comments can be picked up and translated */
42104543ef51SXin LI START_TEST(test_utf16_be_comment) {
42114543ef51SXin LI   const char text[] =
42124543ef51SXin LI       /* <!-- Comment A --> */
42134543ef51SXin LI       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
42144543ef51SXin LI       /* <doc/> */
42154543ef51SXin LI       "\0<\0d\0o\0c\0/\0>";
42164543ef51SXin LI   const XML_Char *expected = XCS(" Comment A ");
42174543ef51SXin LI   CharData storage;
42184543ef51SXin LI 
42194543ef51SXin LI   CharData_Init(&storage);
42204543ef51SXin LI   XML_SetCommentHandler(g_parser, accumulate_comment);
42214543ef51SXin LI   XML_SetUserData(g_parser, &storage);
42224543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
42234543ef51SXin LI       == XML_STATUS_ERROR)
42244543ef51SXin LI     xml_failure(g_parser);
42254543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
42264543ef51SXin LI }
42274543ef51SXin LI END_TEST
42284543ef51SXin LI 
42294543ef51SXin LI START_TEST(test_utf16_le_comment) {
42304543ef51SXin LI   const char text[] =
42314543ef51SXin LI       /* <!-- Comment B --> */
42324543ef51SXin LI       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
42334543ef51SXin LI       /* <doc/> */
42344543ef51SXin LI       "<\0d\0o\0c\0/\0>\0";
42354543ef51SXin LI   const XML_Char *expected = XCS(" Comment B ");
42364543ef51SXin LI   CharData storage;
42374543ef51SXin LI 
42384543ef51SXin LI   CharData_Init(&storage);
42394543ef51SXin LI   XML_SetCommentHandler(g_parser, accumulate_comment);
42404543ef51SXin LI   XML_SetUserData(g_parser, &storage);
42414543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
42424543ef51SXin LI       == XML_STATUS_ERROR)
42434543ef51SXin LI     xml_failure(g_parser);
42444543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
42454543ef51SXin LI }
42464543ef51SXin LI END_TEST
42474543ef51SXin LI 
42484543ef51SXin LI /* Test that the unknown encoding handler with map entries that expect
42494543ef51SXin LI  * conversion but no conversion function is faulted
42504543ef51SXin LI  */
42514543ef51SXin LI START_TEST(test_missing_encoding_conversion_fn) {
42524543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
42534543ef51SXin LI                      "<doc>\x81</doc>";
42544543ef51SXin LI 
42554543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
42564543ef51SXin LI   /* MiscEncodingHandler sets up an encoding with every top-bit-set
42574543ef51SXin LI    * character introducing a two-byte sequence.  For this, it
42584543ef51SXin LI    * requires a convert function.  The above function call doesn't
42594543ef51SXin LI    * pass one through, so when BadEncodingHandler actually gets
42604543ef51SXin LI    * called it should supply an invalid encoding.
42614543ef51SXin LI    */
42624543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
42634543ef51SXin LI                  "Encoding with missing convert() not faulted");
42644543ef51SXin LI }
42654543ef51SXin LI END_TEST
42664543ef51SXin LI 
42674543ef51SXin LI START_TEST(test_failing_encoding_conversion_fn) {
42684543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
42694543ef51SXin LI                      "<doc>\x81</doc>";
42704543ef51SXin LI 
42714543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
42724543ef51SXin LI   /* BadEncodingHandler sets up an encoding with every top-bit-set
42734543ef51SXin LI    * character introducing a two-byte sequence.  For this, it
42744543ef51SXin LI    * requires a convert function.  The above function call passes
42754543ef51SXin LI    * one that insists all possible sequences are invalid anyway.
42764543ef51SXin LI    */
42774543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
42784543ef51SXin LI                  "Encoding with failing convert() not faulted");
42794543ef51SXin LI }
42804543ef51SXin LI END_TEST
42814543ef51SXin LI 
42824543ef51SXin LI /* Test unknown encoding conversions */
42834543ef51SXin LI START_TEST(test_unknown_encoding_success) {
42844543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
42854543ef51SXin LI                      /* Equivalent to <eoc>Hello, world</eoc> */
42864543ef51SXin LI                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
42874543ef51SXin LI 
42884543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
42894543ef51SXin LI   run_character_check(text, XCS("Hello, world"));
42904543ef51SXin LI }
42914543ef51SXin LI END_TEST
42924543ef51SXin LI 
42934543ef51SXin LI /* Test bad name character in unknown encoding */
42944543ef51SXin LI START_TEST(test_unknown_encoding_bad_name) {
42954543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
42964543ef51SXin LI                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
42974543ef51SXin LI 
42984543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
42994543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
43004543ef51SXin LI                  "Bad name start in unknown encoding not faulted");
43014543ef51SXin LI }
43024543ef51SXin LI END_TEST
43034543ef51SXin LI 
43044543ef51SXin LI /* Test bad mid-name character in unknown encoding */
43054543ef51SXin LI START_TEST(test_unknown_encoding_bad_name_2) {
43064543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
43074543ef51SXin LI                      "<d\xffoc>Hello, world</d\xffoc>";
43084543ef51SXin LI 
43094543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
43104543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
43114543ef51SXin LI                  "Bad name in unknown encoding not faulted");
43124543ef51SXin LI }
43134543ef51SXin LI END_TEST
43144543ef51SXin LI 
43154543ef51SXin LI /* Test element name that is long enough to fill the conversion buffer
43164543ef51SXin LI  * in an unknown encoding, finishing with an encoded character.
43174543ef51SXin LI  */
43184543ef51SXin LI START_TEST(test_unknown_encoding_long_name_1) {
43194543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
43204543ef51SXin LI                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
43214543ef51SXin LI                      "Hi"
43224543ef51SXin LI                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
43234543ef51SXin LI   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
43244543ef51SXin LI   CharData storage;
43254543ef51SXin LI 
43264543ef51SXin LI   CharData_Init(&storage);
43274543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
43284543ef51SXin LI   XML_SetStartElementHandler(g_parser, record_element_start_handler);
43294543ef51SXin LI   XML_SetUserData(g_parser, &storage);
43304543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
43314543ef51SXin LI       == XML_STATUS_ERROR)
43324543ef51SXin LI     xml_failure(g_parser);
43334543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
43344543ef51SXin LI }
43354543ef51SXin LI END_TEST
43364543ef51SXin LI 
43374543ef51SXin LI /* Test element name that is long enough to fill the conversion buffer
43384543ef51SXin LI  * in an unknown encoding, finishing with an simple character.
43394543ef51SXin LI  */
43404543ef51SXin LI START_TEST(test_unknown_encoding_long_name_2) {
43414543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
43424543ef51SXin LI                      "<abcdefghabcdefghabcdefghijklmnop>"
43434543ef51SXin LI                      "Hi"
43444543ef51SXin LI                      "</abcdefghabcdefghabcdefghijklmnop>";
43454543ef51SXin LI   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
43464543ef51SXin LI   CharData storage;
43474543ef51SXin LI 
43484543ef51SXin LI   CharData_Init(&storage);
43494543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
43504543ef51SXin LI   XML_SetStartElementHandler(g_parser, record_element_start_handler);
43514543ef51SXin LI   XML_SetUserData(g_parser, &storage);
43524543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
43534543ef51SXin LI       == XML_STATUS_ERROR)
43544543ef51SXin LI     xml_failure(g_parser);
43554543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
43564543ef51SXin LI }
43574543ef51SXin LI END_TEST
43584543ef51SXin LI 
43594543ef51SXin LI START_TEST(test_invalid_unknown_encoding) {
43604543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
43614543ef51SXin LI                      "<doc>Hello world</doc>";
43624543ef51SXin LI 
43634543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
43644543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
43654543ef51SXin LI                  "Invalid unknown encoding not faulted");
43664543ef51SXin LI }
43674543ef51SXin LI END_TEST
43684543ef51SXin LI 
43694543ef51SXin LI START_TEST(test_unknown_ascii_encoding_ok) {
43704543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
43714543ef51SXin LI                      "<doc>Hello, world</doc>";
43724543ef51SXin LI 
43734543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
43744543ef51SXin LI   run_character_check(text, XCS("Hello, world"));
43754543ef51SXin LI }
43764543ef51SXin LI END_TEST
43774543ef51SXin LI 
43784543ef51SXin LI START_TEST(test_unknown_ascii_encoding_fail) {
43794543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
43804543ef51SXin LI                      "<doc>Hello, \x80 world</doc>";
43814543ef51SXin LI 
43824543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
43834543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
43844543ef51SXin LI                  "Invalid character not faulted");
43854543ef51SXin LI }
43864543ef51SXin LI END_TEST
43874543ef51SXin LI 
43884543ef51SXin LI START_TEST(test_unknown_encoding_invalid_length) {
43894543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
43904543ef51SXin LI                      "<doc>Hello, world</doc>";
43914543ef51SXin LI 
43924543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
43934543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
43944543ef51SXin LI                  "Invalid unknown encoding not faulted");
43954543ef51SXin LI }
43964543ef51SXin LI END_TEST
43974543ef51SXin LI 
43984543ef51SXin LI START_TEST(test_unknown_encoding_invalid_topbit) {
43994543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
44004543ef51SXin LI                      "<doc>Hello, world</doc>";
44014543ef51SXin LI 
44024543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
44034543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
44044543ef51SXin LI                  "Invalid unknown encoding not faulted");
44054543ef51SXin LI }
44064543ef51SXin LI END_TEST
44074543ef51SXin LI 
44084543ef51SXin LI START_TEST(test_unknown_encoding_invalid_surrogate) {
44094543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
44104543ef51SXin LI                      "<doc>Hello, \x82 world</doc>";
44114543ef51SXin LI 
44124543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
44134543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
44144543ef51SXin LI                  "Invalid unknown encoding not faulted");
44154543ef51SXin LI }
44164543ef51SXin LI END_TEST
44174543ef51SXin LI 
44184543ef51SXin LI START_TEST(test_unknown_encoding_invalid_high) {
44194543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
44204543ef51SXin LI                      "<doc>Hello, world</doc>";
44214543ef51SXin LI 
44224543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
44234543ef51SXin LI   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
44244543ef51SXin LI                  "Invalid unknown encoding not faulted");
44254543ef51SXin LI }
44264543ef51SXin LI END_TEST
44274543ef51SXin LI 
44284543ef51SXin LI START_TEST(test_unknown_encoding_invalid_attr_value) {
44294543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
44304543ef51SXin LI                      "<doc attr='\xff\x30'/>";
44314543ef51SXin LI 
44324543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
44334543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
44344543ef51SXin LI                  "Invalid attribute valid not faulted");
44354543ef51SXin LI }
44364543ef51SXin LI END_TEST
44374543ef51SXin LI 
44384543ef51SXin LI /* Test an external entity parser set to use latin-1 detects UTF-16
44394543ef51SXin LI  * BOMs correctly.
44404543ef51SXin LI  */
44414543ef51SXin LI /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
44424543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16le_bom) {
44434543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
44444543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
44454543ef51SXin LI                      "]>\n"
44464543ef51SXin LI                      "<doc>&en;</doc>";
44474543ef51SXin LI   ExtTest2 test_data
44484543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
44494543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
44504543ef51SXin LI           *   0x4c = L and 0x20 is a space
44514543ef51SXin LI           */
44524543ef51SXin LI          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
44534543ef51SXin LI #ifdef XML_UNICODE
44544543ef51SXin LI   const XML_Char *expected = XCS("\x00ff\x00feL ");
44554543ef51SXin LI #else
44564543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
44574543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
44584543ef51SXin LI #endif
44594543ef51SXin LI   CharData storage;
44604543ef51SXin LI 
44614543ef51SXin LI   CharData_Init(&storage);
44624543ef51SXin LI   test_data.storage = &storage;
44634543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
44644543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
44654543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
44664543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
44674543ef51SXin LI       == XML_STATUS_ERROR)
44684543ef51SXin LI     xml_failure(g_parser);
44694543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
44704543ef51SXin LI }
44714543ef51SXin LI END_TEST
44724543ef51SXin LI 
44734543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16be_bom) {
44744543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
44754543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
44764543ef51SXin LI                      "]>\n"
44774543ef51SXin LI                      "<doc>&en;</doc>";
44784543ef51SXin LI   ExtTest2 test_data
44794543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
44804543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
44814543ef51SXin LI           *   0x4c = L and 0x20 is a space
44824543ef51SXin LI           */
44834543ef51SXin LI          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
44844543ef51SXin LI #ifdef XML_UNICODE
44854543ef51SXin LI   const XML_Char *expected = XCS("\x00fe\x00ff L");
44864543ef51SXin LI #else
44874543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
44884543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
44894543ef51SXin LI #endif
44904543ef51SXin LI   CharData storage;
44914543ef51SXin LI 
44924543ef51SXin LI   CharData_Init(&storage);
44934543ef51SXin LI   test_data.storage = &storage;
44944543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
44954543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
44964543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
44974543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
44984543ef51SXin LI       == XML_STATUS_ERROR)
44994543ef51SXin LI     xml_failure(g_parser);
45004543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
45014543ef51SXin LI }
45024543ef51SXin LI END_TEST
45034543ef51SXin LI 
45044543ef51SXin LI /* Parsing the full buffer rather than a byte at a time makes a
45054543ef51SXin LI  * difference to the encoding scanning code, so repeat the above tests
45064543ef51SXin LI  * without breaking them down by byte.
45074543ef51SXin LI  */
45084543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16le_bom2) {
45094543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
45104543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
45114543ef51SXin LI                      "]>\n"
45124543ef51SXin LI                      "<doc>&en;</doc>";
45134543ef51SXin LI   ExtTest2 test_data
45144543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
45154543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
45164543ef51SXin LI           *   0x4c = L and 0x20 is a space
45174543ef51SXin LI           */
45184543ef51SXin LI          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
45194543ef51SXin LI #ifdef XML_UNICODE
45204543ef51SXin LI   const XML_Char *expected = XCS("\x00ff\x00feL ");
45214543ef51SXin LI #else
45224543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
45234543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
45244543ef51SXin LI #endif
45254543ef51SXin LI   CharData storage;
45264543ef51SXin LI 
45274543ef51SXin LI   CharData_Init(&storage);
45284543ef51SXin LI   test_data.storage = &storage;
45294543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
45304543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
45314543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
45324543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
45334543ef51SXin LI       == XML_STATUS_ERROR)
45344543ef51SXin LI     xml_failure(g_parser);
45354543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
45364543ef51SXin LI }
45374543ef51SXin LI END_TEST
45384543ef51SXin LI 
45394543ef51SXin LI START_TEST(test_ext_entity_latin1_utf16be_bom2) {
45404543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
45414543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
45424543ef51SXin LI                      "]>\n"
45434543ef51SXin LI                      "<doc>&en;</doc>";
45444543ef51SXin LI   ExtTest2 test_data
45454543ef51SXin LI       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
45464543ef51SXin LI          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
45474543ef51SXin LI           *   0x4c = L and 0x20 is a space
45484543ef51SXin LI           */
45494543ef51SXin LI          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
45504543ef51SXin LI #ifdef XML_UNICODE
45514543ef51SXin LI   const XML_Char *expected = XCS("\x00fe\x00ff L");
45524543ef51SXin LI #else
45534543ef51SXin LI   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
45544543ef51SXin LI   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
45554543ef51SXin LI #endif
45564543ef51SXin LI   CharData storage;
45574543ef51SXin LI 
45584543ef51SXin LI   CharData_Init(&storage);
45594543ef51SXin LI   test_data.storage = &storage;
45604543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
45614543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
45624543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
45634543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
45644543ef51SXin LI       == XML_STATUS_ERROR)
45654543ef51SXin LI     xml_failure(g_parser);
45664543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
45674543ef51SXin LI }
45684543ef51SXin LI END_TEST
45694543ef51SXin LI 
45704543ef51SXin LI /* Test little-endian UTF-16 given an explicit big-endian encoding */
45714543ef51SXin LI START_TEST(test_ext_entity_utf16_be) {
45724543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
45734543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
45744543ef51SXin LI                      "]>\n"
45754543ef51SXin LI                      "<doc>&en;</doc>";
45764543ef51SXin LI   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
45774543ef51SXin LI #ifdef XML_UNICODE
45784543ef51SXin LI   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
45794543ef51SXin LI #else
45804543ef51SXin LI   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
45814543ef51SXin LI                                  "\xe6\x94\x80"   /* U+6500 */
45824543ef51SXin LI                                  "\xe2\xbc\x80"   /* U+2F00 */
45834543ef51SXin LI                                  "\xe3\xb8\x80"); /* U+3E00 */
45844543ef51SXin LI #endif
45854543ef51SXin LI   CharData storage;
45864543ef51SXin LI 
45874543ef51SXin LI   CharData_Init(&storage);
45884543ef51SXin LI   test_data.storage = &storage;
45894543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
45904543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
45914543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
45924543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
45934543ef51SXin LI       == XML_STATUS_ERROR)
45944543ef51SXin LI     xml_failure(g_parser);
45954543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
45964543ef51SXin LI }
45974543ef51SXin LI END_TEST
45984543ef51SXin LI 
45994543ef51SXin LI /* Test big-endian UTF-16 given an explicit little-endian encoding */
46004543ef51SXin LI START_TEST(test_ext_entity_utf16_le) {
46014543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
46024543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
46034543ef51SXin LI                      "]>\n"
46044543ef51SXin LI                      "<doc>&en;</doc>";
46054543ef51SXin LI   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
46064543ef51SXin LI #ifdef XML_UNICODE
46074543ef51SXin LI   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
46084543ef51SXin LI #else
46094543ef51SXin LI   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
46104543ef51SXin LI                                  "\xe6\x94\x80"   /* U+6500 */
46114543ef51SXin LI                                  "\xe2\xbc\x80"   /* U+2F00 */
46124543ef51SXin LI                                  "\xe3\xb8\x80"); /* U+3E00 */
46134543ef51SXin LI #endif
46144543ef51SXin LI   CharData storage;
46154543ef51SXin LI 
46164543ef51SXin LI   CharData_Init(&storage);
46174543ef51SXin LI   test_data.storage = &storage;
46184543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
46194543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
46204543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
46214543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
46224543ef51SXin LI       == XML_STATUS_ERROR)
46234543ef51SXin LI     xml_failure(g_parser);
46244543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
46254543ef51SXin LI }
46264543ef51SXin LI END_TEST
46274543ef51SXin LI 
46284543ef51SXin LI /* Test little-endian UTF-16 given no explicit encoding.
46294543ef51SXin LI  * The existing default encoding (UTF-8) is assumed to hold without a
46304543ef51SXin LI  * BOM to contradict it, so the entity value will in fact provoke an
46314543ef51SXin LI  * error because 0x00 is not a valid XML character.  We parse the
46324543ef51SXin LI  * whole buffer in one go rather than feeding it in byte by byte to
46334543ef51SXin LI  * exercise different code paths in the initial scanning routines.
46344543ef51SXin LI  */
46354543ef51SXin LI START_TEST(test_ext_entity_utf16_unknown) {
46364543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
46374543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
46384543ef51SXin LI                      "]>\n"
46394543ef51SXin LI                      "<doc>&en;</doc>";
46404543ef51SXin LI   ExtFaults2 test_data
46414543ef51SXin LI       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
46424543ef51SXin LI          XML_ERROR_INVALID_TOKEN};
46434543ef51SXin LI 
46444543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
46454543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
46464543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
46474543ef51SXin LI                  "Invalid character should not have been accepted");
46484543ef51SXin LI }
46494543ef51SXin LI END_TEST
46504543ef51SXin LI 
46514543ef51SXin LI /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
46524543ef51SXin LI START_TEST(test_ext_entity_utf8_non_bom) {
46534543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
46544543ef51SXin LI                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
46554543ef51SXin LI                      "]>\n"
46564543ef51SXin LI                      "<doc>&en;</doc>";
46574543ef51SXin LI   ExtTest2 test_data
46584543ef51SXin LI       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
46594543ef51SXin LI          3, NULL, NULL};
46604543ef51SXin LI #ifdef XML_UNICODE
46614543ef51SXin LI   const XML_Char *expected = XCS("\xfec0");
46624543ef51SXin LI #else
46634543ef51SXin LI   const XML_Char *expected = XCS("\xef\xbb\x80");
46644543ef51SXin LI #endif
46654543ef51SXin LI   CharData storage;
46664543ef51SXin LI 
46674543ef51SXin LI   CharData_Init(&storage);
46684543ef51SXin LI   test_data.storage = &storage;
46694543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
46704543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
46714543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
46724543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
46734543ef51SXin LI       == XML_STATUS_ERROR)
46744543ef51SXin LI     xml_failure(g_parser);
46754543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
46764543ef51SXin LI }
46774543ef51SXin LI END_TEST
46784543ef51SXin LI 
46794543ef51SXin LI /* Test that UTF-8 in a CDATA section is correctly passed through */
46804543ef51SXin LI START_TEST(test_utf8_in_cdata_section) {
46814543ef51SXin LI   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
46824543ef51SXin LI #ifdef XML_UNICODE
46834543ef51SXin LI   const XML_Char *expected = XCS("one \x00e9 two");
46844543ef51SXin LI #else
46854543ef51SXin LI   const XML_Char *expected = XCS("one \xc3\xa9 two");
46864543ef51SXin LI #endif
46874543ef51SXin LI 
46884543ef51SXin LI   run_character_check(text, expected);
46894543ef51SXin LI }
46904543ef51SXin LI END_TEST
46914543ef51SXin LI 
46924543ef51SXin LI /* Test that little-endian UTF-16 in a CDATA section is handled */
46934543ef51SXin LI START_TEST(test_utf8_in_cdata_section_2) {
46944543ef51SXin LI   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
46954543ef51SXin LI #ifdef XML_UNICODE
46964543ef51SXin LI   const XML_Char *expected = XCS("\x00e9]\x00e9two");
46974543ef51SXin LI #else
46984543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
46994543ef51SXin LI #endif
47004543ef51SXin LI 
47014543ef51SXin LI   run_character_check(text, expected);
47024543ef51SXin LI }
47034543ef51SXin LI END_TEST
47044543ef51SXin LI 
47054543ef51SXin LI START_TEST(test_utf8_in_start_tags) {
47064543ef51SXin LI   struct test_case {
47074543ef51SXin LI     bool goodName;
47084543ef51SXin LI     bool goodNameStart;
47094543ef51SXin LI     const char *tagName;
47104543ef51SXin LI   };
47114543ef51SXin LI 
47124543ef51SXin LI   // The idea with the tests below is this:
47134543ef51SXin LI   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
47144543ef51SXin LI   // go to isNever and are hence not a concern.
47154543ef51SXin LI   //
47164543ef51SXin LI   // We start with a character that is a valid name character
47174543ef51SXin LI   // (or even name-start character, see XML 1.0r4 spec) and then we flip
47184543ef51SXin LI   // single bits at places where (1) the result leaves the UTF-8 encoding space
47194543ef51SXin LI   // and (2) we stay in the same n-byte sequence family.
47204543ef51SXin LI   //
47214543ef51SXin LI   // The flipped bits are highlighted in angle brackets in comments,
47224543ef51SXin LI   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
47234543ef51SXin LI   // the most significant bit to 1 to leave UTF-8 encoding space.
47244543ef51SXin LI   struct test_case cases[] = {
47254543ef51SXin LI       // 1-byte UTF-8: [0xxx xxxx]
47264543ef51SXin LI       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
47274543ef51SXin LI       {false, false, "\xBA"}, // [<1>011 1010]
47284543ef51SXin LI       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
47294543ef51SXin LI       {false, false, "\xB9"}, // [<1>011 1001]
47304543ef51SXin LI 
47314543ef51SXin LI       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
47324543ef51SXin LI       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
47334543ef51SXin LI                                   // Arabic small waw U+06E5
47344543ef51SXin LI       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
47354543ef51SXin LI       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
47364543ef51SXin LI       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
47374543ef51SXin LI       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
47384543ef51SXin LI                                   // combining char U+0301
47394543ef51SXin LI       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
47404543ef51SXin LI       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
47414543ef51SXin LI       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
47424543ef51SXin LI 
47434543ef51SXin LI       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
47444543ef51SXin LI       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
47454543ef51SXin LI                                       // Devanagari Letter A U+0905
47464543ef51SXin LI       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
47474543ef51SXin LI       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
47484543ef51SXin LI       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
47494543ef51SXin LI       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
47504543ef51SXin LI       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
47514543ef51SXin LI       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
47524543ef51SXin LI                                       // combining char U+0901
47534543ef51SXin LI       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
47544543ef51SXin LI       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
47554543ef51SXin LI       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
47564543ef51SXin LI       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
47574543ef51SXin LI       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
47584543ef51SXin LI   };
47594543ef51SXin LI   const bool atNameStart[] = {true, false};
47604543ef51SXin LI 
47614543ef51SXin LI   size_t i = 0;
47624543ef51SXin LI   char doc[1024];
47634543ef51SXin LI   size_t failCount = 0;
47644543ef51SXin LI 
47654543ef51SXin LI   // we need all the bytes to be parsed, but we don't want the errors that can
47664543ef51SXin LI   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
47674543ef51SXin LI   if (g_reparseDeferralEnabledDefault) {
47684543ef51SXin LI     return;
47694543ef51SXin LI   }
47704543ef51SXin LI 
47714543ef51SXin LI   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
47724543ef51SXin LI     size_t j = 0;
47734543ef51SXin LI     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
47744543ef51SXin LI       const bool expectedSuccess
47754543ef51SXin LI           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
47764543ef51SXin LI       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
47774543ef51SXin LI                cases[i].tagName);
47784543ef51SXin LI       XML_Parser parser = XML_ParserCreate(NULL);
47794543ef51SXin LI 
47804543ef51SXin LI       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
47814543ef51SXin LI           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
47824543ef51SXin LI 
47834543ef51SXin LI       bool success = true;
47844543ef51SXin LI       if ((status == XML_STATUS_OK) != expectedSuccess) {
47854543ef51SXin LI         success = false;
47864543ef51SXin LI       }
47874543ef51SXin LI       if ((status == XML_STATUS_ERROR)
47884543ef51SXin LI           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
47894543ef51SXin LI         success = false;
47904543ef51SXin LI       }
47914543ef51SXin LI 
47924543ef51SXin LI       if (! success) {
47934543ef51SXin LI         fprintf(
47944543ef51SXin LI             stderr,
47954543ef51SXin LI             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
47964543ef51SXin LI             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
47974543ef51SXin LI             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
47984543ef51SXin LI         failCount++;
47994543ef51SXin LI       }
48004543ef51SXin LI 
48014543ef51SXin LI       XML_ParserFree(parser);
48024543ef51SXin LI     }
48034543ef51SXin LI   }
48044543ef51SXin LI 
48054543ef51SXin LI   if (failCount > 0) {
48064543ef51SXin LI     fail("UTF-8 regression detected");
48074543ef51SXin LI   }
48084543ef51SXin LI }
48094543ef51SXin LI END_TEST
48104543ef51SXin LI 
48114543ef51SXin LI /* Test trailing spaces in elements are accepted */
48124543ef51SXin LI START_TEST(test_trailing_spaces_in_elements) {
48134543ef51SXin LI   const char *text = "<doc   >Hi</doc >";
48144543ef51SXin LI   const XML_Char *expected = XCS("doc/doc");
48154543ef51SXin LI   CharData storage;
48164543ef51SXin LI 
48174543ef51SXin LI   CharData_Init(&storage);
48184543ef51SXin LI   XML_SetElementHandler(g_parser, record_element_start_handler,
48194543ef51SXin LI                         record_element_end_handler);
48204543ef51SXin LI   XML_SetUserData(g_parser, &storage);
48214543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
48224543ef51SXin LI       == XML_STATUS_ERROR)
48234543ef51SXin LI     xml_failure(g_parser);
48244543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
48254543ef51SXin LI }
48264543ef51SXin LI END_TEST
48274543ef51SXin LI 
48284543ef51SXin LI START_TEST(test_utf16_attribute) {
48294543ef51SXin LI   const char text[] =
48304543ef51SXin LI       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
48314543ef51SXin LI        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
48324543ef51SXin LI        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
48334543ef51SXin LI        */
48344543ef51SXin LI       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
48354543ef51SXin LI   const XML_Char *expected = XCS("a");
48364543ef51SXin LI   CharData storage;
48374543ef51SXin LI 
48384543ef51SXin LI   CharData_Init(&storage);
48394543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
48404543ef51SXin LI   XML_SetUserData(g_parser, &storage);
48414543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
48424543ef51SXin LI       == XML_STATUS_ERROR)
48434543ef51SXin LI     xml_failure(g_parser);
48444543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
48454543ef51SXin LI }
48464543ef51SXin LI END_TEST
48474543ef51SXin LI 
48484543ef51SXin LI START_TEST(test_utf16_second_attr) {
48494543ef51SXin LI   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
48504543ef51SXin LI    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
48514543ef51SXin LI    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
48524543ef51SXin LI    */
48534543ef51SXin LI   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
48544543ef51SXin LI                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
48554543ef51SXin LI   const XML_Char *expected = XCS("1");
48564543ef51SXin LI   CharData storage;
48574543ef51SXin LI 
48584543ef51SXin LI   CharData_Init(&storage);
48594543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
48604543ef51SXin LI   XML_SetUserData(g_parser, &storage);
48614543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
48624543ef51SXin LI       == XML_STATUS_ERROR)
48634543ef51SXin LI     xml_failure(g_parser);
48644543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
48654543ef51SXin LI }
48664543ef51SXin LI END_TEST
48674543ef51SXin LI 
48684543ef51SXin LI START_TEST(test_attr_after_solidus) {
48694543ef51SXin LI   const char *text = "<doc attr1='a' / attr2='b'>";
48704543ef51SXin LI 
48714543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
48724543ef51SXin LI }
48734543ef51SXin LI END_TEST
48744543ef51SXin LI 
48754543ef51SXin LI START_TEST(test_utf16_pe) {
48764543ef51SXin LI   /* <!DOCTYPE doc [
48774543ef51SXin LI    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
48784543ef51SXin LI    * %{KHO KHWAI}{CHO CHAN};
48794543ef51SXin LI    * ]>
48804543ef51SXin LI    * <doc></doc>
48814543ef51SXin LI    *
48824543ef51SXin LI    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
48834543ef51SXin LI    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
48844543ef51SXin LI    */
48854543ef51SXin LI   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
48864543ef51SXin LI                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
48874543ef51SXin LI                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
48884543ef51SXin LI                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
48894543ef51SXin LI                       "\0%\x0e\x04\x0e\x08\0;\0\n"
48904543ef51SXin LI                       "\0]\0>\0\n"
48914543ef51SXin LI                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
48924543ef51SXin LI #ifdef XML_UNICODE
48934543ef51SXin LI   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
48944543ef51SXin LI #else
48954543ef51SXin LI   const XML_Char *expected
48964543ef51SXin LI       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
48974543ef51SXin LI #endif
48984543ef51SXin LI   CharData storage;
48994543ef51SXin LI 
49004543ef51SXin LI   CharData_Init(&storage);
49014543ef51SXin LI   XML_SetUserData(g_parser, &storage);
49024543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
49034543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
49044543ef51SXin LI       == XML_STATUS_ERROR)
49054543ef51SXin LI     xml_failure(g_parser);
49064543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
49074543ef51SXin LI }
49084543ef51SXin LI END_TEST
49094543ef51SXin LI 
49104543ef51SXin LI /* Test that duff attribute description keywords are rejected */
49114543ef51SXin LI START_TEST(test_bad_attr_desc_keyword) {
49124543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
49134543ef51SXin LI                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
49144543ef51SXin LI                      "]>\n"
49154543ef51SXin LI                      "<doc />";
49164543ef51SXin LI 
49174543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
49184543ef51SXin LI                  "Bad keyword !IMPLIED not faulted");
49194543ef51SXin LI }
49204543ef51SXin LI END_TEST
49214543ef51SXin LI 
49224543ef51SXin LI /* Test that an invalid attribute description keyword consisting of
49234543ef51SXin LI  * UTF-16 characters with their top bytes non-zero are correctly
49244543ef51SXin LI  * faulted
49254543ef51SXin LI  */
49264543ef51SXin LI START_TEST(test_bad_attr_desc_keyword_utf16) {
49274543ef51SXin LI   /* <!DOCTYPE d [
49284543ef51SXin LI    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
49294543ef51SXin LI    * ]><d/>
49304543ef51SXin LI    *
49314543ef51SXin LI    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
49324543ef51SXin LI    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
49334543ef51SXin LI    */
49344543ef51SXin LI   const char text[]
49354543ef51SXin LI       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
49364543ef51SXin LI         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
49374543ef51SXin LI         "\0#\x0e\x04\x0e\x08\0>\0\n"
49384543ef51SXin LI         "\0]\0>\0<\0d\0/\0>";
49394543ef51SXin LI 
49404543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
49414543ef51SXin LI       != XML_STATUS_ERROR)
49424543ef51SXin LI     fail("Invalid UTF16 attribute keyword not faulted");
49434543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
49444543ef51SXin LI     xml_failure(g_parser);
49454543ef51SXin LI }
49464543ef51SXin LI END_TEST
49474543ef51SXin LI 
49484543ef51SXin LI /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
49494543ef51SXin LI  * using prefix-encoding (see above) to trigger specific code paths
49504543ef51SXin LI  */
49514543ef51SXin LI START_TEST(test_bad_doctype) {
49524543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
49534543ef51SXin LI                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
49544543ef51SXin LI 
49554543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
49564543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
49574543ef51SXin LI                  "Invalid bytes in DOCTYPE not faulted");
49584543ef51SXin LI }
49594543ef51SXin LI END_TEST
49604543ef51SXin LI 
49614543ef51SXin LI START_TEST(test_bad_doctype_utf8) {
49624543ef51SXin LI   const char *text = "<!DOCTYPE \xDB\x25"
49634543ef51SXin LI                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
49644543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
49654543ef51SXin LI                  "Invalid UTF-8 in DOCTYPE not faulted");
49664543ef51SXin LI }
49674543ef51SXin LI END_TEST
49684543ef51SXin LI 
49694543ef51SXin LI START_TEST(test_bad_doctype_utf16) {
49704543ef51SXin LI   const char text[] =
49714543ef51SXin LI       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
49724543ef51SXin LI        *
49734543ef51SXin LI        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
49744543ef51SXin LI        * (name character) but not a valid letter (name start character)
49754543ef51SXin LI        */
49764543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
49774543ef51SXin LI       "\x06\xf2"
49784543ef51SXin LI       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
49794543ef51SXin LI 
49804543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
49814543ef51SXin LI       != XML_STATUS_ERROR)
49824543ef51SXin LI     fail("Invalid bytes in DOCTYPE not faulted");
49834543ef51SXin LI   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
49844543ef51SXin LI     xml_failure(g_parser);
49854543ef51SXin LI }
49864543ef51SXin LI END_TEST
49874543ef51SXin LI 
49884543ef51SXin LI START_TEST(test_bad_doctype_plus) {
49894543ef51SXin LI   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
49904543ef51SXin LI                      "<1+>&foo;</1+>";
49914543ef51SXin LI 
49924543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
49934543ef51SXin LI                  "'+' in document name not faulted");
49944543ef51SXin LI }
49954543ef51SXin LI END_TEST
49964543ef51SXin LI 
49974543ef51SXin LI START_TEST(test_bad_doctype_star) {
49984543ef51SXin LI   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
49994543ef51SXin LI                      "<1*>&foo;</1*>";
50004543ef51SXin LI 
50014543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
50024543ef51SXin LI                  "'*' in document name not faulted");
50034543ef51SXin LI }
50044543ef51SXin LI END_TEST
50054543ef51SXin LI 
50064543ef51SXin LI START_TEST(test_bad_doctype_query) {
50074543ef51SXin LI   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
50084543ef51SXin LI                      "<1?>&foo;</1?>";
50094543ef51SXin LI 
50104543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
50114543ef51SXin LI                  "'?' in document name not faulted");
50124543ef51SXin LI }
50134543ef51SXin LI END_TEST
50144543ef51SXin LI 
50154543ef51SXin LI START_TEST(test_unknown_encoding_bad_ignore) {
50164543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
50174543ef51SXin LI                      "<!DOCTYPE doc SYSTEM 'foo'>"
50184543ef51SXin LI                      "<doc><e>&entity;</e></doc>";
50194543ef51SXin LI   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
50204543ef51SXin LI                      "Invalid character not faulted", XCS("prefix-conv"),
50214543ef51SXin LI                      XML_ERROR_INVALID_TOKEN};
50224543ef51SXin LI 
50234543ef51SXin LI   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
50244543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
50254543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
50264543ef51SXin LI   XML_SetUserData(g_parser, &fault);
50274543ef51SXin LI   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
50284543ef51SXin LI                  "Bad IGNORE section with unknown encoding not failed");
50294543ef51SXin LI }
50304543ef51SXin LI END_TEST
50314543ef51SXin LI 
50324543ef51SXin LI START_TEST(test_entity_in_utf16_be_attr) {
50334543ef51SXin LI   const char text[] =
50344543ef51SXin LI       /* <e a='&#228; &#x00E4;'></e> */
50354543ef51SXin LI       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
50364543ef51SXin LI       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
50374543ef51SXin LI #ifdef XML_UNICODE
50384543ef51SXin LI   const XML_Char *expected = XCS("\x00e4 \x00e4");
50394543ef51SXin LI #else
50404543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
50414543ef51SXin LI #endif
50424543ef51SXin LI   CharData storage;
50434543ef51SXin LI 
50444543ef51SXin LI   CharData_Init(&storage);
50454543ef51SXin LI   XML_SetUserData(g_parser, &storage);
50464543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
50474543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
50484543ef51SXin LI       == XML_STATUS_ERROR)
50494543ef51SXin LI     xml_failure(g_parser);
50504543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
50514543ef51SXin LI }
50524543ef51SXin LI END_TEST
50534543ef51SXin LI 
50544543ef51SXin LI START_TEST(test_entity_in_utf16_le_attr) {
50554543ef51SXin LI   const char text[] =
50564543ef51SXin LI       /* <e a='&#228; &#x00E4;'></e> */
50574543ef51SXin LI       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
50584543ef51SXin LI       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
50594543ef51SXin LI #ifdef XML_UNICODE
50604543ef51SXin LI   const XML_Char *expected = XCS("\x00e4 \x00e4");
50614543ef51SXin LI #else
50624543ef51SXin LI   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
50634543ef51SXin LI #endif
50644543ef51SXin LI   CharData storage;
50654543ef51SXin LI 
50664543ef51SXin LI   CharData_Init(&storage);
50674543ef51SXin LI   XML_SetUserData(g_parser, &storage);
50684543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
50694543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
50704543ef51SXin LI       == XML_STATUS_ERROR)
50714543ef51SXin LI     xml_failure(g_parser);
50724543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
50734543ef51SXin LI }
50744543ef51SXin LI END_TEST
50754543ef51SXin LI 
50764543ef51SXin LI START_TEST(test_entity_public_utf16_be) {
50774543ef51SXin LI   const char text[] =
50784543ef51SXin LI       /* <!DOCTYPE d [ */
50794543ef51SXin LI       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
50804543ef51SXin LI       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
50814543ef51SXin LI       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
50824543ef51SXin LI       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
50834543ef51SXin LI       /* %e; */
50844543ef51SXin LI       "\0%\0e\0;\0\n"
50854543ef51SXin LI       /* ]> */
50864543ef51SXin LI       "\0]\0>\0\n"
50874543ef51SXin LI       /* <d>&j;</d> */
50884543ef51SXin LI       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
50894543ef51SXin LI   ExtTest2 test_data
50904543ef51SXin LI       = {/* <!ENTITY j 'baz'> */
50914543ef51SXin LI          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
50924543ef51SXin LI   const XML_Char *expected = XCS("baz");
50934543ef51SXin LI   CharData storage;
50944543ef51SXin LI 
50954543ef51SXin LI   CharData_Init(&storage);
50964543ef51SXin LI   test_data.storage = &storage;
50974543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
50984543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
50994543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
51004543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
51014543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
51024543ef51SXin LI       == XML_STATUS_ERROR)
51034543ef51SXin LI     xml_failure(g_parser);
51044543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
51054543ef51SXin LI }
51064543ef51SXin LI END_TEST
51074543ef51SXin LI 
51084543ef51SXin LI START_TEST(test_entity_public_utf16_le) {
51094543ef51SXin LI   const char text[] =
51104543ef51SXin LI       /* <!DOCTYPE d [ */
51114543ef51SXin LI       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
51124543ef51SXin LI       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
51134543ef51SXin LI       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
51144543ef51SXin LI       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
51154543ef51SXin LI       /* %e; */
51164543ef51SXin LI       "%\0e\0;\0\n\0"
51174543ef51SXin LI       /* ]> */
51184543ef51SXin LI       "]\0>\0\n\0"
51194543ef51SXin LI       /* <d>&j;</d> */
51204543ef51SXin LI       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
51214543ef51SXin LI   ExtTest2 test_data
51224543ef51SXin LI       = {/* <!ENTITY j 'baz'> */
51234543ef51SXin LI          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
51244543ef51SXin LI   const XML_Char *expected = XCS("baz");
51254543ef51SXin LI   CharData storage;
51264543ef51SXin LI 
51274543ef51SXin LI   CharData_Init(&storage);
51284543ef51SXin LI   test_data.storage = &storage;
51294543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
51304543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
51314543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
51324543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
51334543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
51344543ef51SXin LI       == XML_STATUS_ERROR)
51354543ef51SXin LI     xml_failure(g_parser);
51364543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
51374543ef51SXin LI }
51384543ef51SXin LI END_TEST
51394543ef51SXin LI 
51404543ef51SXin LI /* Test that a doctype with neither an internal nor external subset is
51414543ef51SXin LI  * faulted
51424543ef51SXin LI  */
51434543ef51SXin LI START_TEST(test_short_doctype) {
51444543ef51SXin LI   const char *text = "<!DOCTYPE doc></doc>";
51454543ef51SXin LI   expect_failure(text, XML_ERROR_INVALID_TOKEN,
51464543ef51SXin LI                  "DOCTYPE without subset not rejected");
51474543ef51SXin LI }
51484543ef51SXin LI END_TEST
51494543ef51SXin LI 
51504543ef51SXin LI START_TEST(test_short_doctype_2) {
51514543ef51SXin LI   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
51524543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
51534543ef51SXin LI                  "DOCTYPE without Public ID not rejected");
51544543ef51SXin LI }
51554543ef51SXin LI END_TEST
51564543ef51SXin LI 
51574543ef51SXin LI START_TEST(test_short_doctype_3) {
51584543ef51SXin LI   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
51594543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
51604543ef51SXin LI                  "DOCTYPE without System ID not rejected");
51614543ef51SXin LI }
51624543ef51SXin LI END_TEST
51634543ef51SXin LI 
51644543ef51SXin LI START_TEST(test_long_doctype) {
51654543ef51SXin LI   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
51664543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
51674543ef51SXin LI }
51684543ef51SXin LI END_TEST
51694543ef51SXin LI 
51704543ef51SXin LI START_TEST(test_bad_entity) {
51714543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
51724543ef51SXin LI                      "  <!ENTITY foo PUBLIC>\n"
51734543ef51SXin LI                      "]>\n"
51744543ef51SXin LI                      "<doc/>";
51754543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
51764543ef51SXin LI                  "ENTITY without Public ID is not rejected");
51774543ef51SXin LI }
51784543ef51SXin LI END_TEST
51794543ef51SXin LI 
51804543ef51SXin LI /* Test unquoted value is faulted */
51814543ef51SXin LI START_TEST(test_bad_entity_2) {
51824543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
51834543ef51SXin LI                      "  <!ENTITY % foo bar>\n"
51844543ef51SXin LI                      "]>\n"
51854543ef51SXin LI                      "<doc/>";
51864543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
51874543ef51SXin LI                  "ENTITY without Public ID is not rejected");
51884543ef51SXin LI }
51894543ef51SXin LI END_TEST
51904543ef51SXin LI 
51914543ef51SXin LI START_TEST(test_bad_entity_3) {
51924543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
51934543ef51SXin LI                      "  <!ENTITY % foo PUBLIC>\n"
51944543ef51SXin LI                      "]>\n"
51954543ef51SXin LI                      "<doc/>";
51964543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
51974543ef51SXin LI                  "Parameter ENTITY without Public ID is not rejected");
51984543ef51SXin LI }
51994543ef51SXin LI END_TEST
52004543ef51SXin LI 
52014543ef51SXin LI START_TEST(test_bad_entity_4) {
52024543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
52034543ef51SXin LI                      "  <!ENTITY % foo SYSTEM>\n"
52044543ef51SXin LI                      "]>\n"
52054543ef51SXin LI                      "<doc/>";
52064543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
52074543ef51SXin LI                  "Parameter ENTITY without Public ID is not rejected");
52084543ef51SXin LI }
52094543ef51SXin LI END_TEST
52104543ef51SXin LI 
52114543ef51SXin LI START_TEST(test_bad_notation) {
52124543ef51SXin LI   const char *text = "<!DOCTYPE doc [\n"
52134543ef51SXin LI                      "  <!NOTATION n SYSTEM>\n"
52144543ef51SXin LI                      "]>\n"
52154543ef51SXin LI                      "<doc/>";
52164543ef51SXin LI   expect_failure(text, XML_ERROR_SYNTAX,
52174543ef51SXin LI                  "Notation without System ID is not rejected");
52184543ef51SXin LI }
52194543ef51SXin LI END_TEST
52204543ef51SXin LI 
52214543ef51SXin LI /* Test for issue #11, wrongly suppressed default handler */
52224543ef51SXin LI START_TEST(test_default_doctype_handler) {
52234543ef51SXin LI   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
52244543ef51SXin LI                      "  <!ENTITY foo 'bar'>\n"
52254543ef51SXin LI                      "]>\n"
52264543ef51SXin LI                      "<doc>&foo;</doc>";
52274543ef51SXin LI   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
52284543ef51SXin LI                               {XCS("'test.dtd'"), 10, XML_FALSE},
52294543ef51SXin LI                               {NULL, 0, XML_FALSE}};
52304543ef51SXin LI   int i;
52314543ef51SXin LI 
52324543ef51SXin LI   XML_SetUserData(g_parser, &test_data);
52334543ef51SXin LI   XML_SetDefaultHandler(g_parser, checking_default_handler);
52344543ef51SXin LI   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
52354543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
52364543ef51SXin LI       == XML_STATUS_ERROR)
52374543ef51SXin LI     xml_failure(g_parser);
52384543ef51SXin LI   for (i = 0; test_data[i].expected != NULL; i++)
52394543ef51SXin LI     if (! test_data[i].seen)
52404543ef51SXin LI       fail("Default handler not run for public !DOCTYPE");
52414543ef51SXin LI }
52424543ef51SXin LI END_TEST
52434543ef51SXin LI 
52444543ef51SXin LI START_TEST(test_empty_element_abort) {
52454543ef51SXin LI   const char *text = "<abort/>";
52464543ef51SXin LI 
52474543ef51SXin LI   XML_SetStartElementHandler(g_parser, start_element_suspender);
52484543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
52494543ef51SXin LI       != XML_STATUS_ERROR)
52504543ef51SXin LI     fail("Expected to error on abort");
52514543ef51SXin LI }
52524543ef51SXin LI END_TEST
52534543ef51SXin LI 
52544543ef51SXin LI /* Regression test for GH issue #612: unfinished m_declAttributeType
52554543ef51SXin LI  * allocation in ->m_tempPool can corrupt following allocation.
52564543ef51SXin LI  */
52574543ef51SXin LI START_TEST(test_pool_integrity_with_unfinished_attr) {
52584543ef51SXin LI   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
52594543ef51SXin LI                      "<!DOCTYPE foo [\n"
52604543ef51SXin LI                      "<!ELEMENT foo ANY>\n"
52614543ef51SXin LI                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
52624543ef51SXin LI                      "%entp;\n"
52634543ef51SXin LI                      "]>\n"
52644543ef51SXin LI                      "<a></a>\n";
52654543ef51SXin LI   const XML_Char *expected = XCS("COMMENT");
52664543ef51SXin LI   CharData storage;
52674543ef51SXin LI 
52684543ef51SXin LI   CharData_Init(&storage);
52694543ef51SXin LI   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
52704543ef51SXin LI   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
52714543ef51SXin LI   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
52724543ef51SXin LI   XML_SetCommentHandler(g_parser, accumulate_comment);
52734543ef51SXin LI   XML_SetUserData(g_parser, &storage);
52744543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
52754543ef51SXin LI       == XML_STATUS_ERROR)
52764543ef51SXin LI     xml_failure(g_parser);
52774543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
52784543ef51SXin LI }
52794543ef51SXin LI END_TEST
52804543ef51SXin LI 
52814543ef51SXin LI START_TEST(test_nested_entity_suspend) {
52824543ef51SXin LI   const char *const text = "<!DOCTYPE a [\n"
52834543ef51SXin LI                            "  <!ENTITY e1 '<!--e1-->'>\n"
52844543ef51SXin LI                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
52854543ef51SXin LI                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
52864543ef51SXin LI                            "]>\n"
52874543ef51SXin LI                            "<a><!--start-->&e3;<!--end--></a>";
52884543ef51SXin LI   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
52894543ef51SXin LI       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
52904543ef51SXin LI   CharData storage;
52914543ef51SXin LI   CharData_Init(&storage);
52924543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
52934543ef51SXin LI   ParserPlusStorage parserPlusStorage = {parser, &storage};
52944543ef51SXin LI 
52954543ef51SXin LI   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
52964543ef51SXin LI   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
52974543ef51SXin LI   XML_SetUserData(parser, &parserPlusStorage);
52984543ef51SXin LI 
52994543ef51SXin LI   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
53004543ef51SXin LI   while (status == XML_STATUS_SUSPENDED) {
53014543ef51SXin LI     status = XML_ResumeParser(parser);
53024543ef51SXin LI   }
53034543ef51SXin LI   if (status != XML_STATUS_OK)
53044543ef51SXin LI     xml_failure(parser);
53054543ef51SXin LI 
53064543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
53074543ef51SXin LI   XML_ParserFree(parser);
53084543ef51SXin LI }
53094543ef51SXin LI END_TEST
53104543ef51SXin LI 
53114543ef51SXin LI /* Regression test for quadratic parsing on large tokens */
5312ffd294a1SEnji Cooper START_TEST(test_big_tokens_scale_linearly) {
53134543ef51SXin LI   const struct {
53144543ef51SXin LI     const char *pre;
53154543ef51SXin LI     const char *post;
53164543ef51SXin LI   } text[] = {
53174543ef51SXin LI       {"<a>", "</a>"},                      // assumed good, used as baseline
53184543ef51SXin LI       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
53194543ef51SXin LI       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
53204543ef51SXin LI       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
53214543ef51SXin LI       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
53224543ef51SXin LI   };
53234543ef51SXin LI   const int num_cases = sizeof(text) / sizeof(text[0]);
53244543ef51SXin LI   char aaaaaa[4096];
53254543ef51SXin LI   const int fillsize = (int)sizeof(aaaaaa);
53264543ef51SXin LI   const int fillcount = 100;
5327ffd294a1SEnji Cooper   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5328ffd294a1SEnji Cooper   const unsigned max_factor = 4;
5329ffd294a1SEnji Cooper   const unsigned max_scanned = max_factor * approx_bytes;
53304543ef51SXin LI 
53314543ef51SXin LI   memset(aaaaaa, 'a', fillsize);
53324543ef51SXin LI 
53334543ef51SXin LI   if (! g_reparseDeferralEnabledDefault) {
53344543ef51SXin LI     return; // heuristic is disabled; we would get O(n^2) and fail.
53354543ef51SXin LI   }
53364543ef51SXin LI 
53374543ef51SXin LI   for (int i = 0; i < num_cases; ++i) {
53384543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
53394543ef51SXin LI     assert_true(parser != NULL);
53404543ef51SXin LI     enum XML_Status status;
5341ffd294a1SEnji Cooper     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
53424543ef51SXin LI 
53434543ef51SXin LI     // parse the start text
5344ffd294a1SEnji Cooper     g_bytesScanned = 0;
53454543ef51SXin LI     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
53464543ef51SXin LI                                      (int)strlen(text[i].pre), XML_FALSE);
53474543ef51SXin LI     if (status != XML_STATUS_OK) {
53484543ef51SXin LI       xml_failure(parser);
53494543ef51SXin LI     }
5350ffd294a1SEnji Cooper 
53514543ef51SXin LI     // parse lots of 'a', failing the test early if it takes too long
5352ffd294a1SEnji Cooper     unsigned past_max_count = 0;
53534543ef51SXin LI     for (int f = 0; f < fillcount; ++f) {
53544543ef51SXin LI       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
53554543ef51SXin LI       if (status != XML_STATUS_OK) {
53564543ef51SXin LI         xml_failure(parser);
53574543ef51SXin LI       }
5358ffd294a1SEnji Cooper       if (g_bytesScanned > max_scanned) {
5359ffd294a1SEnji Cooper         // We're not done, and have already passed the limit -- the test will
5360ffd294a1SEnji Cooper         // definitely fail. This block allows us to save time by failing early.
5361ffd294a1SEnji Cooper         const unsigned pushed
5362ffd294a1SEnji Cooper             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
53634543ef51SXin LI         fprintf(
53644543ef51SXin LI             stderr,
5365ffd294a1SEnji Cooper             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5366ffd294a1SEnji Cooper             f + 1, fillcount, pushed, g_bytesScanned,
5367ffd294a1SEnji Cooper             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5368ffd294a1SEnji Cooper         past_max_count++;
5369ffd294a1SEnji Cooper         // We are failing, but allow a few log prints first. If we don't reach
5370ffd294a1SEnji Cooper         // a count of five, the test will fail after the loop instead.
5371ffd294a1SEnji Cooper         assert_true(past_max_count < 5);
53724543ef51SXin LI       }
53734543ef51SXin LI     }
5374ffd294a1SEnji Cooper 
53754543ef51SXin LI     // parse the end text
53764543ef51SXin LI     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
53774543ef51SXin LI                                      (int)strlen(text[i].post), XML_TRUE);
53784543ef51SXin LI     if (status != XML_STATUS_OK) {
53794543ef51SXin LI       xml_failure(parser);
53804543ef51SXin LI     }
53814543ef51SXin LI 
5382ffd294a1SEnji Cooper     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5383ffd294a1SEnji Cooper     if (g_bytesScanned > max_scanned) {
5384ffd294a1SEnji Cooper       fprintf(
5385ffd294a1SEnji Cooper           stderr,
5386ffd294a1SEnji Cooper           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5387ffd294a1SEnji Cooper           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5388ffd294a1SEnji Cooper           max_factor);
5389ffd294a1SEnji Cooper       fail("scanned too many bytes");
53904543ef51SXin LI     }
53914543ef51SXin LI 
53924543ef51SXin LI     XML_ParserFree(parser);
53934543ef51SXin LI   }
53944543ef51SXin LI }
53954543ef51SXin LI END_TEST
53964543ef51SXin LI 
53974543ef51SXin LI START_TEST(test_set_reparse_deferral) {
53984543ef51SXin LI   const char *const pre = "<d>";
53994543ef51SXin LI   const char *const start = "<x attr='";
54004543ef51SXin LI   const char *const end = "'></x>";
54014543ef51SXin LI   char eeeeee[100];
54024543ef51SXin LI   const int fillsize = (int)sizeof(eeeeee);
54034543ef51SXin LI   memset(eeeeee, 'e', fillsize);
54044543ef51SXin LI 
54054543ef51SXin LI   for (int enabled = 0; enabled <= 1; enabled += 1) {
54064543ef51SXin LI     set_subtest("deferral=%d", enabled);
54074543ef51SXin LI 
54084543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
54094543ef51SXin LI     assert_true(parser != NULL);
54104543ef51SXin LI     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
54114543ef51SXin LI     // pre-grow the buffer to avoid reparsing due to almost-fullness
54124543ef51SXin LI     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
54134543ef51SXin LI 
54144543ef51SXin LI     CharData storage;
54154543ef51SXin LI     CharData_Init(&storage);
54164543ef51SXin LI     XML_SetUserData(parser, &storage);
54174543ef51SXin LI     XML_SetStartElementHandler(parser, start_element_event_handler);
54184543ef51SXin LI 
54194543ef51SXin LI     enum XML_Status status;
54204543ef51SXin LI     // parse the start text
54214543ef51SXin LI     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
54224543ef51SXin LI     if (status != XML_STATUS_OK) {
54234543ef51SXin LI       xml_failure(parser);
54244543ef51SXin LI     }
54254543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
54264543ef51SXin LI 
54274543ef51SXin LI     // ..and the start of the token
54284543ef51SXin LI     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
54294543ef51SXin LI     if (status != XML_STATUS_OK) {
54304543ef51SXin LI       xml_failure(parser);
54314543ef51SXin LI     }
54324543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
54334543ef51SXin LI 
54344543ef51SXin LI     // try to parse lots of 'e', but the token isn't finished
54354543ef51SXin LI     for (int c = 0; c < 100; ++c) {
54364543ef51SXin LI       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
54374543ef51SXin LI       if (status != XML_STATUS_OK) {
54384543ef51SXin LI         xml_failure(parser);
54394543ef51SXin LI       }
54404543ef51SXin LI     }
54414543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
54424543ef51SXin LI 
54434543ef51SXin LI     // end the <x> token.
54444543ef51SXin LI     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
54454543ef51SXin LI     if (status != XML_STATUS_OK) {
54464543ef51SXin LI       xml_failure(parser);
54474543ef51SXin LI     }
54484543ef51SXin LI 
54494543ef51SXin LI     if (enabled) {
54504543ef51SXin LI       // In general, we may need to push more data to trigger a reparse attempt,
54514543ef51SXin LI       // but in this test, the data is constructed to always require it.
54524543ef51SXin LI       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
54534543ef51SXin LI       // 2x the token length should suffice; the +1 covers the start and end.
54544543ef51SXin LI       for (int c = 0; c < 101; ++c) {
54554543ef51SXin LI         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
54564543ef51SXin LI         if (status != XML_STATUS_OK) {
54574543ef51SXin LI           xml_failure(parser);
54584543ef51SXin LI         }
54594543ef51SXin LI       }
54604543ef51SXin LI     }
54614543ef51SXin LI     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
54624543ef51SXin LI 
54634543ef51SXin LI     XML_ParserFree(parser);
54644543ef51SXin LI   }
54654543ef51SXin LI }
54664543ef51SXin LI END_TEST
54674543ef51SXin LI 
54684543ef51SXin LI struct element_decl_data {
54694543ef51SXin LI   XML_Parser parser;
54704543ef51SXin LI   int count;
54714543ef51SXin LI };
54724543ef51SXin LI 
54734543ef51SXin LI static void
54744543ef51SXin LI element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
54754543ef51SXin LI   UNUSED_P(name);
54764543ef51SXin LI   struct element_decl_data *testdata = (struct element_decl_data *)userData;
54774543ef51SXin LI   testdata->count += 1;
54784543ef51SXin LI   XML_FreeContentModel(testdata->parser, model);
54794543ef51SXin LI }
54804543ef51SXin LI 
54814543ef51SXin LI static int
54824543ef51SXin LI external_inherited_parser(XML_Parser p, const XML_Char *context,
54834543ef51SXin LI                           const XML_Char *base, const XML_Char *systemId,
54844543ef51SXin LI                           const XML_Char *publicId) {
54854543ef51SXin LI   UNUSED_P(base);
54864543ef51SXin LI   UNUSED_P(systemId);
54874543ef51SXin LI   UNUSED_P(publicId);
54884543ef51SXin LI   const char *const pre = "<!ELEMENT document ANY>\n";
54894543ef51SXin LI   const char *const start = "<!ELEMENT ";
54904543ef51SXin LI   const char *const end = " ANY>\n";
54914543ef51SXin LI   const char *const post = "<!ELEMENT xyz ANY>\n";
54924543ef51SXin LI   const int enabled = *(int *)XML_GetUserData(p);
54934543ef51SXin LI   char eeeeee[100];
54944543ef51SXin LI   char spaces[100];
54954543ef51SXin LI   const int fillsize = (int)sizeof(eeeeee);
54964543ef51SXin LI   assert_true(fillsize == (int)sizeof(spaces));
54974543ef51SXin LI   memset(eeeeee, 'e', fillsize);
54984543ef51SXin LI   memset(spaces, ' ', fillsize);
54994543ef51SXin LI 
55004543ef51SXin LI   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
55014543ef51SXin LI   assert_true(parser != NULL);
55024543ef51SXin LI   // pre-grow the buffer to avoid reparsing due to almost-fullness
55034543ef51SXin LI   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
55044543ef51SXin LI 
55054543ef51SXin LI   struct element_decl_data testdata;
55064543ef51SXin LI   testdata.parser = parser;
55074543ef51SXin LI   testdata.count = 0;
55084543ef51SXin LI   XML_SetUserData(parser, &testdata);
55094543ef51SXin LI   XML_SetElementDeclHandler(parser, element_decl_counter);
55104543ef51SXin LI 
55114543ef51SXin LI   enum XML_Status status;
55124543ef51SXin LI   // parse the initial text
55134543ef51SXin LI   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
55144543ef51SXin LI   if (status != XML_STATUS_OK) {
55154543ef51SXin LI     xml_failure(parser);
55164543ef51SXin LI   }
55174543ef51SXin LI   assert_true(testdata.count == 1); // first element should be done
55184543ef51SXin LI 
55194543ef51SXin LI   // ..and the start of the big token
55204543ef51SXin LI   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
55214543ef51SXin LI   if (status != XML_STATUS_OK) {
55224543ef51SXin LI     xml_failure(parser);
55234543ef51SXin LI   }
55244543ef51SXin LI   assert_true(testdata.count == 1); // still just the first one
55254543ef51SXin LI 
55264543ef51SXin LI   // try to parse lots of 'e', but the token isn't finished
55274543ef51SXin LI   for (int c = 0; c < 100; ++c) {
55284543ef51SXin LI     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
55294543ef51SXin LI     if (status != XML_STATUS_OK) {
55304543ef51SXin LI       xml_failure(parser);
55314543ef51SXin LI     }
55324543ef51SXin LI   }
55334543ef51SXin LI   assert_true(testdata.count == 1); // *still* just the first one
55344543ef51SXin LI 
55354543ef51SXin LI   // end the big token.
55364543ef51SXin LI   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
55374543ef51SXin LI   if (status != XML_STATUS_OK) {
55384543ef51SXin LI     xml_failure(parser);
55394543ef51SXin LI   }
55404543ef51SXin LI 
55414543ef51SXin LI   if (enabled) {
55424543ef51SXin LI     // In general, we may need to push more data to trigger a reparse attempt,
55434543ef51SXin LI     // but in this test, the data is constructed to always require it.
55444543ef51SXin LI     assert_true(testdata.count == 1); // or the test is incorrect
55454543ef51SXin LI     // 2x the token length should suffice; the +1 covers the start and end.
55464543ef51SXin LI     for (int c = 0; c < 101; ++c) {
55474543ef51SXin LI       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
55484543ef51SXin LI       if (status != XML_STATUS_OK) {
55494543ef51SXin LI         xml_failure(parser);
55504543ef51SXin LI       }
55514543ef51SXin LI     }
55524543ef51SXin LI   }
55534543ef51SXin LI   assert_true(testdata.count == 2); // the big token should be done
55544543ef51SXin LI 
55554543ef51SXin LI   // parse the final text
55564543ef51SXin LI   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
55574543ef51SXin LI   if (status != XML_STATUS_OK) {
55584543ef51SXin LI     xml_failure(parser);
55594543ef51SXin LI   }
55604543ef51SXin LI   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
55614543ef51SXin LI 
55624543ef51SXin LI   XML_ParserFree(parser);
55634543ef51SXin LI   return XML_STATUS_OK;
55644543ef51SXin LI }
55654543ef51SXin LI 
55664543ef51SXin LI START_TEST(test_reparse_deferral_is_inherited) {
55674543ef51SXin LI   const char *const text
55684543ef51SXin LI       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
55694543ef51SXin LI   for (int enabled = 0; enabled <= 1; ++enabled) {
55704543ef51SXin LI     set_subtest("deferral=%d", enabled);
55714543ef51SXin LI 
55724543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
55734543ef51SXin LI     assert_true(parser != NULL);
55744543ef51SXin LI     XML_SetUserData(parser, (void *)&enabled);
55754543ef51SXin LI     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
55764543ef51SXin LI     // this handler creates a sub-parser and checks that its deferral behavior
55774543ef51SXin LI     // is what we expected, based on the value of `enabled` (in userdata).
55784543ef51SXin LI     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
55794543ef51SXin LI     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
55804543ef51SXin LI     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
55814543ef51SXin LI       xml_failure(parser);
55824543ef51SXin LI 
55834543ef51SXin LI     XML_ParserFree(parser);
55844543ef51SXin LI   }
55854543ef51SXin LI }
55864543ef51SXin LI END_TEST
55874543ef51SXin LI 
55884543ef51SXin LI START_TEST(test_set_reparse_deferral_on_null_parser) {
55894543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
55904543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
55914543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
55924543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
55934543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
55944543ef51SXin LI               == XML_FALSE);
55954543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
55964543ef51SXin LI               == XML_FALSE);
55974543ef51SXin LI }
55984543ef51SXin LI END_TEST
55994543ef51SXin LI 
56004543ef51SXin LI START_TEST(test_set_reparse_deferral_on_the_fly) {
56014543ef51SXin LI   const char *const pre = "<d><x attr='";
56024543ef51SXin LI   const char *const end = "'></x>";
56034543ef51SXin LI   char iiiiii[100];
56044543ef51SXin LI   const int fillsize = (int)sizeof(iiiiii);
56054543ef51SXin LI   memset(iiiiii, 'i', fillsize);
56064543ef51SXin LI 
56074543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
56084543ef51SXin LI   assert_true(parser != NULL);
56094543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
56104543ef51SXin LI 
56114543ef51SXin LI   CharData storage;
56124543ef51SXin LI   CharData_Init(&storage);
56134543ef51SXin LI   XML_SetUserData(parser, &storage);
56144543ef51SXin LI   XML_SetStartElementHandler(parser, start_element_event_handler);
56154543ef51SXin LI 
56164543ef51SXin LI   enum XML_Status status;
56174543ef51SXin LI   // parse the start text
56184543ef51SXin LI   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
56194543ef51SXin LI   if (status != XML_STATUS_OK) {
56204543ef51SXin LI     xml_failure(parser);
56214543ef51SXin LI   }
56224543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
56234543ef51SXin LI 
56244543ef51SXin LI   // try to parse some 'i', but the token isn't finished
56254543ef51SXin LI   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
56264543ef51SXin LI   if (status != XML_STATUS_OK) {
56274543ef51SXin LI     xml_failure(parser);
56284543ef51SXin LI   }
56294543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
56304543ef51SXin LI 
56314543ef51SXin LI   // end the <x> token.
56324543ef51SXin LI   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
56334543ef51SXin LI   if (status != XML_STATUS_OK) {
56344543ef51SXin LI     xml_failure(parser);
56354543ef51SXin LI   }
56364543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
56374543ef51SXin LI 
56384543ef51SXin LI   // now change the heuristic setting and add *no* data
56394543ef51SXin LI   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
56404543ef51SXin LI   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
56414543ef51SXin LI   status = XML_Parse(parser, "", 0, XML_FALSE);
56424543ef51SXin LI   if (status != XML_STATUS_OK) {
56434543ef51SXin LI     xml_failure(parser);
56444543ef51SXin LI   }
56454543ef51SXin LI   CharData_CheckXMLChars(&storage, XCS("dx"));
56464543ef51SXin LI 
56474543ef51SXin LI   XML_ParserFree(parser);
56484543ef51SXin LI }
56494543ef51SXin LI END_TEST
56504543ef51SXin LI 
56514543ef51SXin LI START_TEST(test_set_bad_reparse_option) {
56524543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
56534543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
56544543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
56554543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
56564543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
56574543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
56584543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
56594543ef51SXin LI   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
56604543ef51SXin LI   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
56614543ef51SXin LI   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
56624543ef51SXin LI   XML_ParserFree(parser);
56634543ef51SXin LI }
56644543ef51SXin LI END_TEST
56654543ef51SXin LI 
56664543ef51SXin LI static size_t g_totalAlloc = 0;
56674543ef51SXin LI static size_t g_biggestAlloc = 0;
56684543ef51SXin LI 
56694543ef51SXin LI static void *
56704543ef51SXin LI counting_realloc(void *ptr, size_t size) {
56714543ef51SXin LI   g_totalAlloc += size;
56724543ef51SXin LI   if (size > g_biggestAlloc) {
56734543ef51SXin LI     g_biggestAlloc = size;
56744543ef51SXin LI   }
56754543ef51SXin LI   return realloc(ptr, size);
56764543ef51SXin LI }
56774543ef51SXin LI 
56784543ef51SXin LI static void *
56794543ef51SXin LI counting_malloc(size_t size) {
56804543ef51SXin LI   return counting_realloc(NULL, size);
56814543ef51SXin LI }
56824543ef51SXin LI 
56834543ef51SXin LI START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
56844543ef51SXin LI   if (g_chunkSize != 0) {
56854543ef51SXin LI     // this test does not use SINGLE_BYTES, because it depends on very precise
56864543ef51SXin LI     // buffer fills.
56874543ef51SXin LI     return;
56884543ef51SXin LI   }
56894543ef51SXin LI   if (! g_reparseDeferralEnabledDefault) {
56904543ef51SXin LI     return; // this test is irrelevant when the deferral heuristic is disabled.
56914543ef51SXin LI   }
56924543ef51SXin LI 
56934543ef51SXin LI   const int document_length = 65536;
56944543ef51SXin LI   char *const document = (char *)malloc(document_length);
56954543ef51SXin LI 
56964543ef51SXin LI   const XML_Memory_Handling_Suite memfuncs = {
56974543ef51SXin LI       counting_malloc,
56984543ef51SXin LI       counting_realloc,
56994543ef51SXin LI       free,
57004543ef51SXin LI   };
57014543ef51SXin LI 
57024543ef51SXin LI   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
57034543ef51SXin LI   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
57044543ef51SXin LI   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
57054543ef51SXin LI 
57064543ef51SXin LI   for (const int *leading = leading_list; *leading >= 0; leading++) {
57074543ef51SXin LI     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
57084543ef51SXin LI       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
57094543ef51SXin LI         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
57104543ef51SXin LI                     *fillsize);
57114543ef51SXin LI         // start by checking that the test looks reasonably valid
57124543ef51SXin LI         assert_true(*leading + *bigtoken <= document_length);
57134543ef51SXin LI 
57144543ef51SXin LI         // put 'x' everywhere; some will be overwritten by elements.
57154543ef51SXin LI         memset(document, 'x', document_length);
57164543ef51SXin LI         // maybe add an initial tag
57174543ef51SXin LI         if (*leading) {
57184543ef51SXin LI           assert_true(*leading >= 3); // or the test case is invalid
57194543ef51SXin LI           memcpy(document, "<a>", 3);
57204543ef51SXin LI         }
57214543ef51SXin LI         // add the large token
57224543ef51SXin LI         document[*leading + 0] = '<';
57234543ef51SXin LI         document[*leading + 1] = 'b';
57244543ef51SXin LI         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
57254543ef51SXin LI         document[*leading + *bigtoken - 1] = '>';
57264543ef51SXin LI 
57274543ef51SXin LI         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
57284543ef51SXin LI         const int expected_elem_total = 1 + (*leading ? 1 : 0);
57294543ef51SXin LI 
57304543ef51SXin LI         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
57314543ef51SXin LI         assert_true(parser != NULL);
57324543ef51SXin LI 
57334543ef51SXin LI         CharData storage;
57344543ef51SXin LI         CharData_Init(&storage);
57354543ef51SXin LI         XML_SetUserData(parser, &storage);
57364543ef51SXin LI         XML_SetStartElementHandler(parser, start_element_event_handler);
57374543ef51SXin LI 
57384543ef51SXin LI         g_biggestAlloc = 0;
57394543ef51SXin LI         g_totalAlloc = 0;
57404543ef51SXin LI         int offset = 0;
57414543ef51SXin LI         // fill data until the big token is covered (but not necessarily parsed)
57424543ef51SXin LI         while (offset < *leading + *bigtoken) {
57434543ef51SXin LI           assert_true(offset + *fillsize <= document_length);
57444543ef51SXin LI           const enum XML_Status status
57454543ef51SXin LI               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
57464543ef51SXin LI           if (status != XML_STATUS_OK) {
57474543ef51SXin LI             xml_failure(parser);
57484543ef51SXin LI           }
57494543ef51SXin LI           offset += *fillsize;
57504543ef51SXin LI         }
57514543ef51SXin LI         // Now, check that we've had a buffer allocation that could fit the
57524543ef51SXin LI         // context bytes and our big token. In order to detect a special case,
57534543ef51SXin LI         // we need to know how many bytes of our big token were included in the
57544543ef51SXin LI         // first push that contained _any_ bytes of the big token:
57554543ef51SXin LI         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
57564543ef51SXin LI         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
57574543ef51SXin LI           // Special case: we aren't saving any context, and the whole big token
57584543ef51SXin LI           // was covered by a single fill, so Expat may have parsed directly
57594543ef51SXin LI           // from our input pointer, without allocating an internal buffer.
57604543ef51SXin LI         } else if (*leading < XML_CONTEXT_BYTES) {
57614543ef51SXin LI           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
57624543ef51SXin LI         } else {
57634543ef51SXin LI           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
57644543ef51SXin LI         }
57654543ef51SXin LI         // fill data until the big token is actually parsed
57664543ef51SXin LI         while (storage.count < expected_elem_total) {
57674543ef51SXin LI           const size_t alloc_before = g_totalAlloc;
57684543ef51SXin LI           assert_true(offset + *fillsize <= document_length);
57694543ef51SXin LI           const enum XML_Status status
57704543ef51SXin LI               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
57714543ef51SXin LI           if (status != XML_STATUS_OK) {
57724543ef51SXin LI             xml_failure(parser);
57734543ef51SXin LI           }
57744543ef51SXin LI           offset += *fillsize;
57754543ef51SXin LI           // since all the bytes of the big token are already in the buffer,
57764543ef51SXin LI           // the bufsize ceiling should make us finish its parsing without any
57774543ef51SXin LI           // further buffer allocations. We assume that there will be no other
57784543ef51SXin LI           // large allocations in this test.
57794543ef51SXin LI           assert_true(g_totalAlloc - alloc_before < 4096);
57804543ef51SXin LI         }
57814543ef51SXin LI         // test-the-test: was our alloc even called?
57824543ef51SXin LI         assert_true(g_totalAlloc > 0);
57834543ef51SXin LI         // test-the-test: there shouldn't be any extra start elements
57844543ef51SXin LI         assert_true(storage.count == expected_elem_total);
57854543ef51SXin LI 
57864543ef51SXin LI         XML_ParserFree(parser);
57874543ef51SXin LI       }
57884543ef51SXin LI     }
57894543ef51SXin LI   }
57904543ef51SXin LI   free(document);
57914543ef51SXin LI }
57924543ef51SXin LI END_TEST
57934543ef51SXin LI 
57944543ef51SXin LI START_TEST(test_varying_buffer_fills) {
57954543ef51SXin LI   const int KiB = 1024;
57964543ef51SXin LI   const int MiB = 1024 * KiB;
57974543ef51SXin LI   const int document_length = 16 * MiB;
57984543ef51SXin LI   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
57994543ef51SXin LI 
58004543ef51SXin LI   if (g_chunkSize != 0) {
58014543ef51SXin LI     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
58024543ef51SXin LI   }
58034543ef51SXin LI 
58044543ef51SXin LI   char *const document = (char *)malloc(document_length);
58054543ef51SXin LI   assert_true(document != NULL);
58064543ef51SXin LI   memset(document, 'x', document_length);
58074543ef51SXin LI   document[0] = '<';
58084543ef51SXin LI   document[1] = 't';
58094543ef51SXin LI   memset(&document[2], ' ', big - 2); // a very spacy token
58104543ef51SXin LI   document[big - 1] = '>';
58114543ef51SXin LI 
58124543ef51SXin LI   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
58134543ef51SXin LI   // When reparse deferral is enabled, the final (negated) value is the expected
58144543ef51SXin LI   // maximum number of bytes scanned in parse attempts.
58154543ef51SXin LI   const int testcases[][30] = {
58164543ef51SXin LI       {8 * MiB, -8 * MiB},
58174543ef51SXin LI       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
58184543ef51SXin LI       // zero-size fills shouldn't trigger the bypass
58194543ef51SXin LI       {4 * MiB, 0, 4 * MiB, -12 * MiB},
58204543ef51SXin LI       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
58214543ef51SXin LI       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
58224543ef51SXin LI       // try to hit the buffer ceiling only once (at the end)
58234543ef51SXin LI       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
58244543ef51SXin LI       // try to hit the same buffer ceiling multiple times
58254543ef51SXin LI       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
58264543ef51SXin LI 
58274543ef51SXin LI       // try to hit every ceiling, by always landing 1K shy of the buffer size
58284543ef51SXin LI       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
58294543ef51SXin LI        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
58304543ef51SXin LI 
58314543ef51SXin LI       // try to avoid every ceiling, by always landing 1B past the buffer size
58324543ef51SXin LI       // the normal 2x heuristic threshold still forces parse attempts.
58334543ef51SXin LI       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
58344543ef51SXin LI        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
58354543ef51SXin LI        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
58364543ef51SXin LI        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
58374543ef51SXin LI        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
58384543ef51SXin LI        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
58394543ef51SXin LI        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
58404543ef51SXin LI        -(10 * MiB + 682 * KiB + 7)},
58414543ef51SXin LI       // try to avoid every ceiling again, except on our last fill.
58424543ef51SXin LI       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
58434543ef51SXin LI        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
58444543ef51SXin LI        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
58454543ef51SXin LI        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
58464543ef51SXin LI        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
58474543ef51SXin LI        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
58484543ef51SXin LI        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
58494543ef51SXin LI        -(10 * MiB + 682 * KiB + 6)},
58504543ef51SXin LI 
58514543ef51SXin LI       // try to hit ceilings on the way multiple times
58524543ef51SXin LI       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
58534543ef51SXin LI        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
58544543ef51SXin LI        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
58554543ef51SXin LI        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
58564543ef51SXin LI        // we'll make a parse attempt at every parse call
58574543ef51SXin LI        -(45 * MiB + 12)},
58584543ef51SXin LI   };
58594543ef51SXin LI   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
58604543ef51SXin LI   for (int test_i = 0; test_i < testcount; test_i++) {
58614543ef51SXin LI     const int *fillsize = testcases[test_i];
58624543ef51SXin LI     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
58634543ef51SXin LI                 fillsize[2], fillsize[3]);
58644543ef51SXin LI     XML_Parser parser = XML_ParserCreate(NULL);
58654543ef51SXin LI     assert_true(parser != NULL);
58664543ef51SXin LI 
58674543ef51SXin LI     CharData storage;
58684543ef51SXin LI     CharData_Init(&storage);
58694543ef51SXin LI     XML_SetUserData(parser, &storage);
58704543ef51SXin LI     XML_SetStartElementHandler(parser, start_element_event_handler);
58714543ef51SXin LI 
5872ffd294a1SEnji Cooper     g_bytesScanned = 0;
58734543ef51SXin LI     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
58744543ef51SXin LI     int offset = 0;
58754543ef51SXin LI     while (*fillsize >= 0) {
58764543ef51SXin LI       assert_true(offset + *fillsize <= document_length); // or test is invalid
58774543ef51SXin LI       const enum XML_Status status
58784543ef51SXin LI           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
58794543ef51SXin LI       if (status != XML_STATUS_OK) {
58804543ef51SXin LI         xml_failure(parser);
58814543ef51SXin LI       }
58824543ef51SXin LI       offset += *fillsize;
58834543ef51SXin LI       fillsize++;
58844543ef51SXin LI       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
58854543ef51SXin LI       worstcase_bytes += offset; // we might've tried to parse all pending bytes
58864543ef51SXin LI     }
58874543ef51SXin LI     assert_true(storage.count == 1); // the big token should've been parsed
5888ffd294a1SEnji Cooper     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
58894543ef51SXin LI     if (g_reparseDeferralEnabledDefault) {
58904543ef51SXin LI       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5891ffd294a1SEnji Cooper       const unsigned max_bytes_scanned = -*fillsize;
5892ffd294a1SEnji Cooper       if (g_bytesScanned > max_bytes_scanned) {
58934543ef51SXin LI         fprintf(stderr,
5894ffd294a1SEnji Cooper                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5895ffd294a1SEnji Cooper                 g_bytesScanned, max_bytes_scanned);
58964543ef51SXin LI         fail("too many bytes scanned in parse attempts");
58974543ef51SXin LI       }
58984543ef51SXin LI     }
5899ffd294a1SEnji Cooper     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
59004543ef51SXin LI 
59014543ef51SXin LI     XML_ParserFree(parser);
59024543ef51SXin LI   }
59034543ef51SXin LI   free(document);
59044543ef51SXin LI }
59054543ef51SXin LI END_TEST
59064543ef51SXin LI 
59074543ef51SXin LI void
59084543ef51SXin LI make_basic_test_case(Suite *s) {
59094543ef51SXin LI   TCase *tc_basic = tcase_create("basic tests");
59104543ef51SXin LI 
59114543ef51SXin LI   suite_add_tcase(s, tc_basic);
59124543ef51SXin LI   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
59134543ef51SXin LI 
59144543ef51SXin LI   tcase_add_test(tc_basic, test_nul_byte);
59154543ef51SXin LI   tcase_add_test(tc_basic, test_u0000_char);
59164543ef51SXin LI   tcase_add_test(tc_basic, test_siphash_self);
59174543ef51SXin LI   tcase_add_test(tc_basic, test_siphash_spec);
59184543ef51SXin LI   tcase_add_test(tc_basic, test_bom_utf8);
59194543ef51SXin LI   tcase_add_test(tc_basic, test_bom_utf16_be);
59204543ef51SXin LI   tcase_add_test(tc_basic, test_bom_utf16_le);
59214543ef51SXin LI   tcase_add_test(tc_basic, test_nobom_utf16_le);
59224543ef51SXin LI   tcase_add_test(tc_basic, test_hash_collision);
59234543ef51SXin LI   tcase_add_test(tc_basic, test_illegal_utf8);
59244543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_auto_align);
59254543ef51SXin LI   tcase_add_test(tc_basic, test_utf16);
59264543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
59274543ef51SXin LI   tcase_add_test(tc_basic, test_not_utf16);
59284543ef51SXin LI   tcase_add_test(tc_basic, test_bad_encoding);
59294543ef51SXin LI   tcase_add_test(tc_basic, test_latin1_umlauts);
59304543ef51SXin LI   tcase_add_test(tc_basic, test_long_utf8_character);
59314543ef51SXin LI   tcase_add_test(tc_basic, test_long_latin1_attribute);
59324543ef51SXin LI   tcase_add_test(tc_basic, test_long_ascii_attribute);
59334543ef51SXin LI   /* Regression test for SF bug #491986. */
59344543ef51SXin LI   tcase_add_test(tc_basic, test_danish_latin1);
59354543ef51SXin LI   /* Regression test for SF bug #514281. */
59364543ef51SXin LI   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
59374543ef51SXin LI   tcase_add_test(tc_basic, test_french_charref_decimal);
59384543ef51SXin LI   tcase_add_test(tc_basic, test_french_latin1);
59394543ef51SXin LI   tcase_add_test(tc_basic, test_french_utf8);
59404543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_false_rejection);
59414543ef51SXin LI   tcase_add_test(tc_basic, test_line_number_after_parse);
59424543ef51SXin LI   tcase_add_test(tc_basic, test_column_number_after_parse);
59434543ef51SXin LI   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
59444543ef51SXin LI   tcase_add_test(tc_basic, test_line_number_after_error);
59454543ef51SXin LI   tcase_add_test(tc_basic, test_column_number_after_error);
59464543ef51SXin LI   tcase_add_test(tc_basic, test_really_long_lines);
59474543ef51SXin LI   tcase_add_test(tc_basic, test_really_long_encoded_lines);
59484543ef51SXin LI   tcase_add_test(tc_basic, test_end_element_events);
59494543ef51SXin LI   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
59504543ef51SXin LI   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
59514543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_misplaced);
59524543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_invalid);
59534543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
59544543ef51SXin LI   tcase_add_test(tc_basic, test_xmldecl_missing_value);
59554543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
59564543ef51SXin LI   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
59574543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
59584543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
59594543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
59604543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
59614543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
59624543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
59634543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
59644543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
59654543ef51SXin LI   tcase_add_test(tc_basic,
59664543ef51SXin LI                  test_wfc_undeclared_entity_with_external_subset_standalone);
59674543ef51SXin LI   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
59684543ef51SXin LI   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
59694543ef51SXin LI   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
59704543ef51SXin LI   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
59714543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
59724543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
59734543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
59744543ef51SXin LI   tcase_add_test(tc_basic, test_dtd_attr_handling);
59754543ef51SXin LI   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
59764543ef51SXin LI   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
59774543ef51SXin LI   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
59784543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
59794543ef51SXin LI   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
59804543ef51SXin LI   tcase_add_test(tc_basic, test_good_cdata_ascii);
59814543ef51SXin LI   tcase_add_test(tc_basic, test_good_cdata_utf16);
59824543ef51SXin LI   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
59834543ef51SXin LI   tcase_add_test(tc_basic, test_long_cdata_utf16);
59844543ef51SXin LI   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
59854543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
59864543ef51SXin LI   tcase_add_test(tc_basic, test_bad_cdata);
59874543ef51SXin LI   tcase_add_test(tc_basic, test_bad_cdata_utf16);
59884543ef51SXin LI   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
59894543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
59904543ef51SXin LI   tcase_add_test(tc_basic, test_memory_allocation);
59914543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
59924543ef51SXin LI   tcase_add_test(tc_basic, test_dtd_elements);
59934543ef51SXin LI   tcase_add_test(tc_basic, test_dtd_elements_nesting);
59944543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
59954543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
59964543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
59974543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
59984543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
59994543ef51SXin LI                                 test_foreign_dtd_without_external_subset);
60004543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
60014543ef51SXin LI   tcase_add_test(tc_basic, test_set_base);
60024543ef51SXin LI   tcase_add_test(tc_basic, test_attributes);
60034543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
60044543ef51SXin LI   tcase_add_test(tc_basic, test_resume_invalid_parse);
60054543ef51SXin LI   tcase_add_test(tc_basic, test_resume_resuspended);
60064543ef51SXin LI   tcase_add_test(tc_basic, test_cdata_default);
60074543ef51SXin LI   tcase_add_test(tc_basic, test_subordinate_reset);
60084543ef51SXin LI   tcase_add_test(tc_basic, test_subordinate_suspend);
60094543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
60104543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
60114543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
60124543ef51SXin LI                                 test_ext_entity_invalid_suspended_parse);
60134543ef51SXin LI   tcase_add_test(tc_basic, test_explicit_encoding);
60144543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_cr);
60154543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
60164543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_rsqb);
60174543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
60184543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
60194543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
60204543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
60214543ef51SXin LI   tcase_add_test(tc_basic, test_empty_parse);
6022ffd294a1SEnji Cooper   tcase_add_test(tc_basic, test_negative_len_parse);
6023ffd294a1SEnji Cooper   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
60244543ef51SXin LI   tcase_add_test(tc_basic, test_get_buffer_1);
60254543ef51SXin LI   tcase_add_test(tc_basic, test_get_buffer_2);
60264543ef51SXin LI #if XML_CONTEXT_BYTES > 0
60274543ef51SXin LI   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
60284543ef51SXin LI #endif
60294543ef51SXin LI   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
60304543ef51SXin LI   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
60314543ef51SXin LI   tcase_add_test(tc_basic, test_byte_info_at_end);
60324543ef51SXin LI   tcase_add_test(tc_basic, test_byte_info_at_error);
60334543ef51SXin LI   tcase_add_test(tc_basic, test_byte_info_at_cdata);
60344543ef51SXin LI   tcase_add_test(tc_basic, test_predefined_entities);
60354543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
60364543ef51SXin LI   tcase_add_test(tc_basic, test_not_predefined_entities);
60374543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
60384543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
60394543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
60404543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
60414543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
60424543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
60434543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
60444543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
60454543ef51SXin LI   tcase_add_test(tc_basic, test_bad_public_doctype);
60464543ef51SXin LI   tcase_add_test(tc_basic, test_attribute_enum_value);
60474543ef51SXin LI   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
60484543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
60494543ef51SXin LI   tcase_add_test(tc_basic, test_public_notation_no_sysid);
60504543ef51SXin LI   tcase_add_test(tc_basic, test_nested_groups);
60514543ef51SXin LI   tcase_add_test(tc_basic, test_group_choice);
60524543ef51SXin LI   tcase_add_test(tc_basic, test_standalone_parameter_entity);
60534543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
60544543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
60554543ef51SXin LI                                 test_recursive_external_parameter_entity);
6056ffd294a1SEnji Cooper   tcase_add_test__ifdef_xml_dtd(tc_basic,
6057ffd294a1SEnji Cooper                                 test_recursive_external_parameter_entity_2);
60584543ef51SXin LI   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
60594543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_xdecl);
60604543ef51SXin LI   tcase_add_test(tc_basic, test_abort_epilog);
60614543ef51SXin LI   tcase_add_test(tc_basic, test_abort_epilog_2);
60624543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_epilog);
60634543ef51SXin LI   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
60644543ef51SXin LI   tcase_add_test(tc_basic, test_unfinished_epilog);
60654543ef51SXin LI   tcase_add_test(tc_basic, test_partial_char_in_epilog);
60664543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
60674543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
60684543ef51SXin LI                                 test_suspend_resume_internal_entity_issue_629);
60694543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
60704543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
60714543ef51SXin LI   tcase_add_test(tc_basic, test_restart_on_error);
60724543ef51SXin LI   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
60734543ef51SXin LI   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
60744543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
60754543ef51SXin LI   tcase_add_test(tc_basic, test_standalone_internal_entity);
60764543ef51SXin LI   tcase_add_test(tc_basic, test_skipped_external_entity);
60774543ef51SXin LI   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
60784543ef51SXin LI   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
60794543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
60804543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
60814543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
60824543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
60834543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
60844543ef51SXin LI   tcase_add_test(tc_basic, test_pi_handled_in_default);
60854543ef51SXin LI   tcase_add_test(tc_basic, test_comment_handled_in_default);
60864543ef51SXin LI   tcase_add_test(tc_basic, test_pi_yml);
60874543ef51SXin LI   tcase_add_test(tc_basic, test_pi_xnl);
60884543ef51SXin LI   tcase_add_test(tc_basic, test_pi_xmm);
60894543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_pi);
60904543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_be_pi);
60914543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_be_comment);
60924543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_le_comment);
60934543ef51SXin LI   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
60944543ef51SXin LI   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
60954543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_success);
60964543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
60974543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
60984543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
60994543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
61004543ef51SXin LI   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
61014543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
61024543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
61034543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
61044543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
61054543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
61064543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
61074543ef51SXin LI   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
61084543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
61094543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
61104543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
61114543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
61124543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
61134543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
61144543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
61154543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
61164543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
61174543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
61184543ef51SXin LI   tcase_add_test(tc_basic, test_utf8_in_start_tags);
61194543ef51SXin LI   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
61204543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_attribute);
61214543ef51SXin LI   tcase_add_test(tc_basic, test_utf16_second_attr);
61224543ef51SXin LI   tcase_add_test(tc_basic, test_attr_after_solidus);
61234543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
61244543ef51SXin LI   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
61254543ef51SXin LI   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
61264543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype);
61274543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_utf8);
61284543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_utf16);
61294543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_plus);
61304543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_star);
61314543ef51SXin LI   tcase_add_test(tc_basic, test_bad_doctype_query);
61324543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
61334543ef51SXin LI   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
61344543ef51SXin LI   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
61354543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
61364543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
61374543ef51SXin LI   tcase_add_test(tc_basic, test_short_doctype);
61384543ef51SXin LI   tcase_add_test(tc_basic, test_short_doctype_2);
61394543ef51SXin LI   tcase_add_test(tc_basic, test_short_doctype_3);
61404543ef51SXin LI   tcase_add_test(tc_basic, test_long_doctype);
61414543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity);
61424543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity_2);
61434543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity_3);
61444543ef51SXin LI   tcase_add_test(tc_basic, test_bad_entity_4);
61454543ef51SXin LI   tcase_add_test(tc_basic, test_bad_notation);
61464543ef51SXin LI   tcase_add_test(tc_basic, test_default_doctype_handler);
61474543ef51SXin LI   tcase_add_test(tc_basic, test_empty_element_abort);
61484543ef51SXin LI   tcase_add_test__ifdef_xml_dtd(tc_basic,
61494543ef51SXin LI                                 test_pool_integrity_with_unfinished_attr);
61504543ef51SXin LI   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6151ffd294a1SEnji Cooper   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
61524543ef51SXin LI   tcase_add_test(tc_basic, test_set_reparse_deferral);
61534543ef51SXin LI   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
61544543ef51SXin LI   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
61554543ef51SXin LI   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
61564543ef51SXin LI   tcase_add_test(tc_basic, test_set_bad_reparse_option);
61574543ef51SXin LI   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
61584543ef51SXin LI   tcase_add_test(tc_basic, test_varying_buffer_fills);
61594543ef51SXin LI }
6160