1 /* Commonly used functions for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com>
23 Licensed under the MIT license:
24
25 Permission is hereby granted, free of charge, to any person obtaining
26 a copy of this software and associated documentation files (the
27 "Software"), to deal in the Software without restriction, including
28 without limitation the rights to use, copy, modify, merge, publish,
29 distribute, sublicense, and/or sell copies of the Software, and to permit
30 persons to whom the Software is furnished to do so, subject to the
31 following conditions:
32
33 The above copyright notice and this permission notice shall be included
34 in all copies or substantial portions of the Software.
35
36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
37 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
38 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
39 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
40 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
41 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
42 USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44
45 #include <assert.h>
46 #include <errno.h>
47 #include <stdint.h> // for SIZE_MAX
48 #include <stdio.h>
49 #include <string.h>
50
51 #include "expat_config.h"
52 #include "expat.h"
53 #include "internal.h"
54 #include "chardata.h"
55 #include "minicheck.h"
56 #include "common.h"
57 #include "handlers.h"
58
59 /* Common test data */
60
61 const char *long_character_data_text
62 = "<?xml version='1.0' encoding='iso-8859-1'?><s>"
63 "012345678901234567890123456789012345678901234567890123456789"
64 "012345678901234567890123456789012345678901234567890123456789"
65 "012345678901234567890123456789012345678901234567890123456789"
66 "012345678901234567890123456789012345678901234567890123456789"
67 "012345678901234567890123456789012345678901234567890123456789"
68 "012345678901234567890123456789012345678901234567890123456789"
69 "012345678901234567890123456789012345678901234567890123456789"
70 "012345678901234567890123456789012345678901234567890123456789"
71 "012345678901234567890123456789012345678901234567890123456789"
72 "012345678901234567890123456789012345678901234567890123456789"
73 "012345678901234567890123456789012345678901234567890123456789"
74 "012345678901234567890123456789012345678901234567890123456789"
75 "012345678901234567890123456789012345678901234567890123456789"
76 "012345678901234567890123456789012345678901234567890123456789"
77 "012345678901234567890123456789012345678901234567890123456789"
78 "012345678901234567890123456789012345678901234567890123456789"
79 "012345678901234567890123456789012345678901234567890123456789"
80 "012345678901234567890123456789012345678901234567890123456789"
81 "012345678901234567890123456789012345678901234567890123456789"
82 "012345678901234567890123456789012345678901234567890123456789"
83 "</s>";
84
85 const char *long_cdata_text
86 = "<s><![CDATA["
87 "012345678901234567890123456789012345678901234567890123456789"
88 "012345678901234567890123456789012345678901234567890123456789"
89 "012345678901234567890123456789012345678901234567890123456789"
90 "012345678901234567890123456789012345678901234567890123456789"
91 "012345678901234567890123456789012345678901234567890123456789"
92 "012345678901234567890123456789012345678901234567890123456789"
93 "012345678901234567890123456789012345678901234567890123456789"
94 "012345678901234567890123456789012345678901234567890123456789"
95 "012345678901234567890123456789012345678901234567890123456789"
96 "012345678901234567890123456789012345678901234567890123456789"
97 "012345678901234567890123456789012345678901234567890123456789"
98 "012345678901234567890123456789012345678901234567890123456789"
99 "012345678901234567890123456789012345678901234567890123456789"
100 "012345678901234567890123456789012345678901234567890123456789"
101 "012345678901234567890123456789012345678901234567890123456789"
102 "012345678901234567890123456789012345678901234567890123456789"
103 "012345678901234567890123456789012345678901234567890123456789"
104 "012345678901234567890123456789012345678901234567890123456789"
105 "012345678901234567890123456789012345678901234567890123456789"
106 "012345678901234567890123456789012345678901234567890123456789"
107 "]]></s>";
108
109 /* Having an element name longer than 1024 characters exercises some
110 * of the pool allocation code in the parser that otherwise does not
111 * get executed. The count at the end of the line is the number of
112 * characters (bytes) in the element name by that point.x
113 */
114 const char *get_buffer_test_text
115 = "<documentwitharidiculouslylongelementnametotease" /* 0x030 */
116 "aparticularcorneroftheallocationinXML_GetBuffers" /* 0x060 */
117 "othatwecanimprovethecoverageyetagain012345678901" /* 0x090 */
118 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0c0 */
119 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0f0 */
120 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x120 */
121 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x150 */
122 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x180 */
123 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1b0 */
124 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1e0 */
125 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x210 */
126 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x240 */
127 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x270 */
128 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2a0 */
129 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2d0 */
130 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x300 */
131 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x330 */
132 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x360 */
133 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x390 */
134 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3c0 */
135 "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3f0 */
136 "123456789abcdef0123456789abcdef0123456789>\n<ef0"; /* 0x420 */
137
138 /* Test control globals */
139
140 /* Used as the "resumable" parameter to XML_StopParser by some tests */
141 XML_Bool g_resumable = XML_FALSE;
142
143 /* Used to control abort checks in some tests */
144 XML_Bool g_abortable = XML_FALSE;
145
146 /* Used to control _XML_Parse_SINGLE_BYTES() chunk size */
147 int g_chunkSize = 1;
148
149 /* Common test functions */
150
151 void
tcase_add_test__ifdef_xml_dtd(TCase * tc,tcase_test_function test)152 tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) {
153 #ifdef XML_DTD
154 tcase_add_test(tc, test);
155 #else
156 UNUSED_P(tc);
157 UNUSED_P(test);
158 #endif
159 }
160
161 void
tcase_add_test__if_xml_ge(TCase * tc,tcase_test_function test)162 tcase_add_test__if_xml_ge(TCase *tc, tcase_test_function test) {
163 #if XML_GE == 1
164 tcase_add_test(tc, test);
165 #else
166 UNUSED_P(tc);
167 UNUSED_P(test);
168 #endif
169 }
170
171 void
basic_teardown(void)172 basic_teardown(void) {
173 if (g_parser != NULL) {
174 XML_ParserFree(g_parser);
175 g_parser = NULL;
176 }
177 }
178
179 /* Generate a failure using the parser state to create an error message;
180 this should be used when the parser reports an error we weren't
181 expecting.
182 */
183 void
_xml_failure(XML_Parser parser,const char * file,int line)184 _xml_failure(XML_Parser parser, const char *file, int line) {
185 char buffer[1024];
186 enum XML_Error err = XML_GetErrorCode(parser);
187 snprintf(buffer, sizeof(buffer),
188 " %d: %" XML_FMT_STR " (line %" XML_FMT_INT_MOD
189 "u, offset %" XML_FMT_INT_MOD "u)\n reported from %s, line %d\n",
190 err, XML_ErrorString(err), XML_GetCurrentLineNumber(parser),
191 XML_GetCurrentColumnNumber(parser), file, line);
192 _fail(file, line, buffer);
193 }
194
195 enum XML_Status
_XML_Parse_SINGLE_BYTES(XML_Parser parser,const char * s,int len,int isFinal)196 _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len,
197 int isFinal) {
198 // This ensures that tests have to run pathological parse cases
199 // (e.g. when `s` is NULL) against plain XML_Parse rather than
200 // chunking _XML_Parse_SINGLE_BYTES.
201 assert((parser != NULL) && (s != NULL) && (len >= 0));
202 const int chunksize = g_chunkSize;
203 if (chunksize > 0) {
204 // parse in chunks of `chunksize` bytes as long as not exhausting
205 for (; len > chunksize; len -= chunksize, s += chunksize) {
206 enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE);
207 if (res != XML_STATUS_OK) {
208 if ((res == XML_STATUS_SUSPENDED) && (len > chunksize)) {
209 fail("Use of function _XML_Parse_SINGLE_BYTES with a chunk size "
210 "greater than 0 (from g_chunkSize) does not work well with "
211 "suspension. Please consider use of plain XML_Parse at this "
212 "place in your test, instead.");
213 }
214 return res;
215 }
216 }
217 }
218 // parse the final chunk, the size of which will be <= chunksize
219 return XML_Parse(parser, s, len, isFinal);
220 }
221
222 void
_expect_failure(const char * text,enum XML_Error errorCode,const char * errorMessage,const char * file,int lineno)223 _expect_failure(const char *text, enum XML_Error errorCode,
224 const char *errorMessage, const char *file, int lineno) {
225 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
226 == XML_STATUS_OK)
227 /* Hackish use of _fail() macro, but lets us report
228 the right filename and line number. */
229 _fail(file, lineno, errorMessage);
230 if (XML_GetErrorCode(g_parser) != errorCode)
231 _xml_failure(g_parser, file, lineno);
232 }
233
234 void
_run_character_check(const char * text,const XML_Char * expected,const char * file,int line)235 _run_character_check(const char *text, const XML_Char *expected,
236 const char *file, int line) {
237 CharData storage;
238
239 CharData_Init(&storage);
240 XML_SetUserData(g_parser, &storage);
241 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
242 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
243 == XML_STATUS_ERROR)
244 _xml_failure(g_parser, file, line);
245 CharData_CheckXMLChars(&storage, expected);
246 }
247
248 void
_run_attribute_check(const char * text,const XML_Char * expected,const char * file,int line)249 _run_attribute_check(const char *text, const XML_Char *expected,
250 const char *file, int line) {
251 CharData storage;
252
253 CharData_Init(&storage);
254 XML_SetUserData(g_parser, &storage);
255 XML_SetStartElementHandler(g_parser, accumulate_attribute);
256 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
257 == XML_STATUS_ERROR)
258 _xml_failure(g_parser, file, line);
259 CharData_CheckXMLChars(&storage, expected);
260 }
261
262 void
_run_ext_character_check(const char * text,ExtTest * test_data,const XML_Char * expected,const char * file,int line)263 _run_ext_character_check(const char *text, ExtTest *test_data,
264 const XML_Char *expected, const char *file, int line) {
265 CharData *const storage = malloc(sizeof(CharData));
266
267 CharData_Init(storage);
268 test_data->storage = storage;
269 XML_SetUserData(g_parser, test_data);
270 XML_SetCharacterDataHandler(g_parser, ext_accumulate_characters);
271 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
272 == XML_STATUS_ERROR)
273 _xml_failure(g_parser, file, line);
274 CharData_CheckXMLChars(storage, expected);
275
276 free(storage);
277 }
278
279 /* Control variable; the number of times duff_allocator() will successfully
280 * allocate */
281 #define ALLOC_ALWAYS_SUCCEED (-1)
282 #define REALLOC_ALWAYS_SUCCEED (-1)
283
284 int g_allocation_count = ALLOC_ALWAYS_SUCCEED;
285 int g_reallocation_count = REALLOC_ALWAYS_SUCCEED;
286
287 /* Crocked allocator for allocation failure tests */
288 void *
duff_allocator(size_t size)289 duff_allocator(size_t size) {
290 if (g_allocation_count == 0)
291 return NULL;
292 if (g_allocation_count != ALLOC_ALWAYS_SUCCEED)
293 g_allocation_count--;
294 return malloc(size);
295 }
296
297 /* Crocked reallocator for allocation failure tests */
298 void *
duff_reallocator(void * ptr,size_t size)299 duff_reallocator(void *ptr, size_t size) {
300 if (g_reallocation_count == 0)
301 return NULL;
302 if (g_reallocation_count != REALLOC_ALWAYS_SUCCEED)
303 g_reallocation_count--;
304 return realloc(ptr, size);
305 }
306
307 // Portable remake of strnlen(3) for C99
308 static size_t
portable_strnlen(const char * s,size_t maxlen)309 portable_strnlen(const char *s, size_t maxlen) {
310 const char *const end = (const char *)memchr(s, '\0', maxlen);
311 return (end == NULL) ? maxlen : (size_t)(end - s);
312 }
313
314 // Portable remake of strndup(3) for C99
315 char *
portable_strndup(const char * s,size_t n)316 portable_strndup(const char *s, size_t n) {
317 if ((s == NULL) || (n == SIZE_MAX)) {
318 errno = EINVAL;
319 return NULL;
320 }
321
322 n = portable_strnlen(s, n);
323
324 char *const buffer = malloc(n + 1);
325 if (buffer == NULL) {
326 errno = ENOMEM;
327 return NULL;
328 }
329
330 errno = 0;
331
332 memcpy(buffer, s, n);
333
334 buffer[n] = '\0';
335
336 return buffer;
337 }
338