xref: /freebsd/contrib/expat/tests/common.c (revision fe9278888fd4414abe2d922e469cf608005f4c65)
14543ef51SXin LI /* Commonly used functions for the Expat test suite
24543ef51SXin LI                             __  __            _
34543ef51SXin LI                          ___\ \/ /_ __   __ _| |_
44543ef51SXin LI                         / _ \\  /| '_ \ / _` | __|
54543ef51SXin LI                        |  __//  \| |_) | (_| | |_
64543ef51SXin LI                         \___/_/\_\ .__/ \__,_|\__|
74543ef51SXin LI                                  |_| XML parser
84543ef51SXin LI 
94543ef51SXin LI    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
104543ef51SXin LI    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
114543ef51SXin LI    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
124543ef51SXin LI    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13*fe927888SPhilip Paeps    Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
144543ef51SXin LI    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
154543ef51SXin LI    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
164543ef51SXin LI    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
174543ef51SXin LI    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
184543ef51SXin LI    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
194543ef51SXin LI    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
204543ef51SXin LI    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
214543ef51SXin LI    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
224543ef51SXin LI    Licensed under the MIT license:
234543ef51SXin LI 
244543ef51SXin LI    Permission is  hereby granted,  free of charge,  to any  person obtaining
254543ef51SXin LI    a  copy  of  this  software   and  associated  documentation  files  (the
264543ef51SXin LI    "Software"),  to  deal in  the  Software  without restriction,  including
274543ef51SXin LI    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
284543ef51SXin LI    distribute, sublicense, and/or sell copies of the Software, and to permit
294543ef51SXin LI    persons  to whom  the Software  is  furnished to  do so,  subject to  the
304543ef51SXin LI    following conditions:
314543ef51SXin LI 
324543ef51SXin LI    The above copyright  notice and this permission notice  shall be included
334543ef51SXin LI    in all copies or substantial portions of the Software.
344543ef51SXin LI 
354543ef51SXin LI    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
364543ef51SXin LI    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
374543ef51SXin LI    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
384543ef51SXin LI    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
394543ef51SXin LI    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
404543ef51SXin LI    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
414543ef51SXin LI    USE OR OTHER DEALINGS IN THE SOFTWARE.
424543ef51SXin LI */
434543ef51SXin LI 
444543ef51SXin LI #include <assert.h>
45*fe927888SPhilip Paeps #include <errno.h>
46*fe927888SPhilip Paeps #include <stdint.h> // for SIZE_MAX
474543ef51SXin LI #include <stdio.h>
484543ef51SXin LI #include <string.h>
494543ef51SXin LI 
504543ef51SXin LI #include "expat_config.h"
514543ef51SXin LI #include "expat.h"
524543ef51SXin LI #include "internal.h"
534543ef51SXin LI #include "chardata.h"
544543ef51SXin LI #include "minicheck.h"
554543ef51SXin LI #include "common.h"
56908f215eSXin LI #include "handlers.h"
574543ef51SXin LI 
584543ef51SXin LI /* Common test data */
594543ef51SXin LI 
604543ef51SXin LI const char *long_character_data_text
614543ef51SXin LI     = "<?xml version='1.0' encoding='iso-8859-1'?><s>"
624543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
634543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
644543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
654543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
664543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
674543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
684543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
694543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
704543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
714543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
724543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
734543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
744543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
754543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
764543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
774543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
784543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
794543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
804543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
814543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
824543ef51SXin LI       "</s>";
834543ef51SXin LI 
844543ef51SXin LI const char *long_cdata_text
854543ef51SXin LI     = "<s><![CDATA["
864543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
874543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
884543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
894543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
904543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
914543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
924543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
934543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
944543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
954543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
964543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
974543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
984543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
994543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
1004543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
1014543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
1024543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
1034543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
1044543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
1054543ef51SXin LI       "012345678901234567890123456789012345678901234567890123456789"
1064543ef51SXin LI       "]]></s>";
1074543ef51SXin LI 
1084543ef51SXin LI /* Having an element name longer than 1024 characters exercises some
1094543ef51SXin LI  * of the pool allocation code in the parser that otherwise does not
1104543ef51SXin LI  * get executed.  The count at the end of the line is the number of
1114543ef51SXin LI  * characters (bytes) in the element name by that point.x
1124543ef51SXin LI  */
1134543ef51SXin LI const char *get_buffer_test_text
1144543ef51SXin LI     = "<documentwitharidiculouslylongelementnametotease"  /* 0x030 */
1154543ef51SXin LI       "aparticularcorneroftheallocationinXML_GetBuffers"  /* 0x060 */
1164543ef51SXin LI       "othatwecanimprovethecoverageyetagain012345678901"  /* 0x090 */
1174543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x0c0 */
1184543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x0f0 */
1194543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x120 */
1204543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x150 */
1214543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x180 */
1224543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x1b0 */
1234543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x1e0 */
1244543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x210 */
1254543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x240 */
1264543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x270 */
1274543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x2a0 */
1284543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x2d0 */
1294543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x300 */
1304543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x330 */
1314543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x360 */
1324543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x390 */
1334543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x3c0 */
1344543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789abcdef0"  /* 0x3f0 */
1354543ef51SXin LI       "123456789abcdef0123456789abcdef0123456789>\n<ef0"; /* 0x420 */
1364543ef51SXin LI 
1374543ef51SXin LI /* Test control globals */
1384543ef51SXin LI 
1394543ef51SXin LI /* Used as the "resumable" parameter to XML_StopParser by some tests */
1404543ef51SXin LI XML_Bool g_resumable = XML_FALSE;
1414543ef51SXin LI 
1424543ef51SXin LI /* Used to control abort checks in some tests */
1434543ef51SXin LI XML_Bool g_abortable = XML_FALSE;
1444543ef51SXin LI 
1454543ef51SXin LI /* Used to control _XML_Parse_SINGLE_BYTES() chunk size */
1464543ef51SXin LI int g_chunkSize = 1;
1474543ef51SXin LI 
1484543ef51SXin LI /* Common test functions */
1494543ef51SXin LI 
1504543ef51SXin LI void
tcase_add_test__ifdef_xml_dtd(TCase * tc,tcase_test_function test)1514543ef51SXin LI tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) {
1524543ef51SXin LI #ifdef XML_DTD
1534543ef51SXin LI   tcase_add_test(tc, test);
1544543ef51SXin LI #else
1554543ef51SXin LI   UNUSED_P(tc);
1564543ef51SXin LI   UNUSED_P(test);
1574543ef51SXin LI #endif
1584543ef51SXin LI }
1594543ef51SXin LI 
1604543ef51SXin LI void
tcase_add_test__if_xml_ge(TCase * tc,tcase_test_function test)1614543ef51SXin LI tcase_add_test__if_xml_ge(TCase *tc, tcase_test_function test) {
1624543ef51SXin LI #if XML_GE == 1
1634543ef51SXin LI   tcase_add_test(tc, test);
1644543ef51SXin LI #else
1654543ef51SXin LI   UNUSED_P(tc);
1664543ef51SXin LI   UNUSED_P(test);
1674543ef51SXin LI #endif
1684543ef51SXin LI }
1694543ef51SXin LI 
1704543ef51SXin LI void
basic_teardown(void)1714543ef51SXin LI basic_teardown(void) {
1724543ef51SXin LI   if (g_parser != NULL) {
1734543ef51SXin LI     XML_ParserFree(g_parser);
1744543ef51SXin LI     g_parser = NULL;
1754543ef51SXin LI   }
1764543ef51SXin LI }
1774543ef51SXin LI 
1784543ef51SXin LI /* Generate a failure using the parser state to create an error message;
1794543ef51SXin LI    this should be used when the parser reports an error we weren't
1804543ef51SXin LI    expecting.
1814543ef51SXin LI */
1824543ef51SXin LI void
_xml_failure(XML_Parser parser,const char * file,int line)1834543ef51SXin LI _xml_failure(XML_Parser parser, const char *file, int line) {
1844543ef51SXin LI   char buffer[1024];
1854543ef51SXin LI   enum XML_Error err = XML_GetErrorCode(parser);
1864543ef51SXin LI   snprintf(buffer, sizeof(buffer),
1874543ef51SXin LI            "    %d: %" XML_FMT_STR " (line %" XML_FMT_INT_MOD
1884543ef51SXin LI            "u, offset %" XML_FMT_INT_MOD "u)\n    reported from %s, line %d\n",
1894543ef51SXin LI            err, XML_ErrorString(err), XML_GetCurrentLineNumber(parser),
1904543ef51SXin LI            XML_GetCurrentColumnNumber(parser), file, line);
1914543ef51SXin LI   _fail(file, line, buffer);
1924543ef51SXin LI }
1934543ef51SXin LI 
1944543ef51SXin LI enum XML_Status
_XML_Parse_SINGLE_BYTES(XML_Parser parser,const char * s,int len,int isFinal)1954543ef51SXin LI _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len,
1964543ef51SXin LI                         int isFinal) {
1974543ef51SXin LI   // This ensures that tests have to run pathological parse cases
1984543ef51SXin LI   // (e.g. when `s` is NULL) against plain XML_Parse rather than
1994543ef51SXin LI   // chunking _XML_Parse_SINGLE_BYTES.
2004543ef51SXin LI   assert((parser != NULL) && (s != NULL) && (len >= 0));
2014543ef51SXin LI   const int chunksize = g_chunkSize;
2024543ef51SXin LI   if (chunksize > 0) {
2034543ef51SXin LI     // parse in chunks of `chunksize` bytes as long as not exhausting
2044543ef51SXin LI     for (; len > chunksize; len -= chunksize, s += chunksize) {
2054543ef51SXin LI       enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE);
2064543ef51SXin LI       if (res != XML_STATUS_OK) {
207*fe927888SPhilip Paeps         if ((res == XML_STATUS_SUSPENDED) && (len > chunksize)) {
208*fe927888SPhilip Paeps           fail("Use of function _XML_Parse_SINGLE_BYTES with a chunk size "
209*fe927888SPhilip Paeps                "greater than 0 (from g_chunkSize) does not work well with "
210*fe927888SPhilip Paeps                "suspension. Please consider use of plain XML_Parse at this "
211*fe927888SPhilip Paeps                "place in your test, instead.");
212*fe927888SPhilip Paeps         }
2134543ef51SXin LI         return res;
2144543ef51SXin LI       }
2154543ef51SXin LI     }
2164543ef51SXin LI   }
2174543ef51SXin LI   // parse the final chunk, the size of which will be <= chunksize
2184543ef51SXin LI   return XML_Parse(parser, s, len, isFinal);
2194543ef51SXin LI }
2204543ef51SXin LI 
2214543ef51SXin LI void
_expect_failure(const char * text,enum XML_Error errorCode,const char * errorMessage,const char * file,int lineno)2224543ef51SXin LI _expect_failure(const char *text, enum XML_Error errorCode,
2234543ef51SXin LI                 const char *errorMessage, const char *file, int lineno) {
2244543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2254543ef51SXin LI       == XML_STATUS_OK)
2264543ef51SXin LI     /* Hackish use of _fail() macro, but lets us report
2274543ef51SXin LI        the right filename and line number. */
2284543ef51SXin LI     _fail(file, lineno, errorMessage);
2294543ef51SXin LI   if (XML_GetErrorCode(g_parser) != errorCode)
2304543ef51SXin LI     _xml_failure(g_parser, file, lineno);
2314543ef51SXin LI }
2324543ef51SXin LI 
2334543ef51SXin LI void
_run_character_check(const char * text,const XML_Char * expected,const char * file,int line)2344543ef51SXin LI _run_character_check(const char *text, const XML_Char *expected,
2354543ef51SXin LI                      const char *file, int line) {
2364543ef51SXin LI   CharData storage;
2374543ef51SXin LI 
2384543ef51SXin LI   CharData_Init(&storage);
2394543ef51SXin LI   XML_SetUserData(g_parser, &storage);
2404543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
2414543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2424543ef51SXin LI       == XML_STATUS_ERROR)
2434543ef51SXin LI     _xml_failure(g_parser, file, line);
2444543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
2454543ef51SXin LI }
2464543ef51SXin LI 
2474543ef51SXin LI void
_run_attribute_check(const char * text,const XML_Char * expected,const char * file,int line)2484543ef51SXin LI _run_attribute_check(const char *text, const XML_Char *expected,
2494543ef51SXin LI                      const char *file, int line) {
2504543ef51SXin LI   CharData storage;
2514543ef51SXin LI 
2524543ef51SXin LI   CharData_Init(&storage);
2534543ef51SXin LI   XML_SetUserData(g_parser, &storage);
2544543ef51SXin LI   XML_SetStartElementHandler(g_parser, accumulate_attribute);
2554543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2564543ef51SXin LI       == XML_STATUS_ERROR)
2574543ef51SXin LI     _xml_failure(g_parser, file, line);
2584543ef51SXin LI   CharData_CheckXMLChars(&storage, expected);
2594543ef51SXin LI }
2604543ef51SXin LI 
2614543ef51SXin LI void
_run_ext_character_check(const char * text,ExtTest * test_data,const XML_Char * expected,const char * file,int line)2624543ef51SXin LI _run_ext_character_check(const char *text, ExtTest *test_data,
2634543ef51SXin LI                          const XML_Char *expected, const char *file, int line) {
2644543ef51SXin LI   CharData *const storage = (CharData *)malloc(sizeof(CharData));
2654543ef51SXin LI 
2664543ef51SXin LI   CharData_Init(storage);
2674543ef51SXin LI   test_data->storage = storage;
2684543ef51SXin LI   XML_SetUserData(g_parser, test_data);
2694543ef51SXin LI   XML_SetCharacterDataHandler(g_parser, ext_accumulate_characters);
2704543ef51SXin LI   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2714543ef51SXin LI       == XML_STATUS_ERROR)
2724543ef51SXin LI     _xml_failure(g_parser, file, line);
2734543ef51SXin LI   CharData_CheckXMLChars(storage, expected);
2744543ef51SXin LI 
2754543ef51SXin LI   free(storage);
2764543ef51SXin LI }
2774543ef51SXin LI 
2784543ef51SXin LI /* Control variable; the number of times duff_allocator() will successfully
2794543ef51SXin LI  * allocate */
2804543ef51SXin LI #define ALLOC_ALWAYS_SUCCEED (-1)
2814543ef51SXin LI #define REALLOC_ALWAYS_SUCCEED (-1)
2824543ef51SXin LI 
2834543ef51SXin LI int g_allocation_count = ALLOC_ALWAYS_SUCCEED;
2844543ef51SXin LI int g_reallocation_count = REALLOC_ALWAYS_SUCCEED;
2854543ef51SXin LI 
2864543ef51SXin LI /* Crocked allocator for allocation failure tests */
2874543ef51SXin LI void *
duff_allocator(size_t size)2884543ef51SXin LI duff_allocator(size_t size) {
2894543ef51SXin LI   if (g_allocation_count == 0)
2904543ef51SXin LI     return NULL;
2914543ef51SXin LI   if (g_allocation_count != ALLOC_ALWAYS_SUCCEED)
2924543ef51SXin LI     g_allocation_count--;
2934543ef51SXin LI   return malloc(size);
2944543ef51SXin LI }
2954543ef51SXin LI 
2964543ef51SXin LI /* Crocked reallocator for allocation failure tests */
2974543ef51SXin LI void *
duff_reallocator(void * ptr,size_t size)2984543ef51SXin LI duff_reallocator(void *ptr, size_t size) {
2994543ef51SXin LI   if (g_reallocation_count == 0)
3004543ef51SXin LI     return NULL;
3014543ef51SXin LI   if (g_reallocation_count != REALLOC_ALWAYS_SUCCEED)
3024543ef51SXin LI     g_reallocation_count--;
3034543ef51SXin LI   return realloc(ptr, size);
3044543ef51SXin LI }
305*fe927888SPhilip Paeps 
306*fe927888SPhilip Paeps // Portable remake of strndup(3) for C99; does not care about space efficiency
307*fe927888SPhilip Paeps char *
portable_strndup(const char * s,size_t n)308*fe927888SPhilip Paeps portable_strndup(const char *s, size_t n) {
309*fe927888SPhilip Paeps   if ((s == NULL) || (n == SIZE_MAX)) {
310*fe927888SPhilip Paeps     errno = EINVAL;
311*fe927888SPhilip Paeps     return NULL;
312*fe927888SPhilip Paeps   }
313*fe927888SPhilip Paeps 
314*fe927888SPhilip Paeps   char *const buffer = (char *)malloc(n + 1);
315*fe927888SPhilip Paeps   if (buffer == NULL) {
316*fe927888SPhilip Paeps     errno = ENOMEM;
317*fe927888SPhilip Paeps     return NULL;
318*fe927888SPhilip Paeps   }
319*fe927888SPhilip Paeps 
320*fe927888SPhilip Paeps   errno = 0;
321*fe927888SPhilip Paeps 
322*fe927888SPhilip Paeps   memcpy(buffer, s, n);
323*fe927888SPhilip Paeps 
324*fe927888SPhilip Paeps   buffer[n] = '\0';
325*fe927888SPhilip Paeps 
326*fe927888SPhilip Paeps   return buffer;
327*fe927888SPhilip Paeps }
328