1 /* Tests in the "miscellaneous" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15 Copyright (c) 2017 Joe Orton <jorton@redhat.com>
16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
20 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
21 Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
22 Licensed under the MIT license:
23
24 Permission is hereby granted, free of charge, to any person obtaining
25 a copy of this software and associated documentation files (the
26 "Software"), to deal in the Software without restriction, including
27 without limitation the rights to use, copy, modify, merge, publish,
28 distribute, sublicense, and/or sell copies of the Software, and to permit
29 persons to whom the Software is furnished to do so, subject to the
30 following conditions:
31
32 The above copyright notice and this permission notice shall be included
33 in all copies or substantial portions of the Software.
34
35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41 USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43
44 #if defined(NDEBUG)
45 # undef NDEBUG /* because test suite relies on assert(...) at the moment */
46 #endif
47
48 #include <assert.h>
49 #include <string.h>
50
51 #include "expat_config.h"
52
53 #include "expat.h"
54 #include "internal.h"
55 #include "minicheck.h"
56 #include "memcheck.h"
57 #include "common.h"
58 #include "ascii.h" /* for ASCII_xxx */
59 #include "handlers.h"
60 #include "misc_tests.h"
61
62 /* Test that a failure to allocate the parser structure fails gracefully */
START_TEST(test_misc_alloc_create_parser)63 START_TEST(test_misc_alloc_create_parser) {
64 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
65 unsigned int i;
66 const unsigned int max_alloc_count = 10;
67
68 /* Something this simple shouldn't need more than 10 allocations */
69 for (i = 0; i < max_alloc_count; i++) {
70 g_allocation_count = i;
71 g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL);
72 if (g_parser != NULL)
73 break;
74 }
75 if (i == 0)
76 fail("Parser unexpectedly ignored failing allocator");
77 else if (i == max_alloc_count)
78 fail("Parser not created with max allocation count");
79 }
80 END_TEST
81
82 /* Test memory allocation failures for a parser with an encoding */
START_TEST(test_misc_alloc_create_parser_with_encoding)83 START_TEST(test_misc_alloc_create_parser_with_encoding) {
84 XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
85 unsigned int i;
86 const unsigned int max_alloc_count = 10;
87
88 /* Try several levels of allocation */
89 for (i = 0; i < max_alloc_count; i++) {
90 g_allocation_count = i;
91 g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL);
92 if (g_parser != NULL)
93 break;
94 }
95 if (i == 0)
96 fail("Parser ignored failing allocator");
97 else if (i == max_alloc_count)
98 fail("Parser not created with max allocation count");
99 }
100 END_TEST
101
102 /* Test that freeing a NULL parser doesn't cause an explosion.
103 * (Not actually tested anywhere else)
104 */
START_TEST(test_misc_null_parser)105 START_TEST(test_misc_null_parser) {
106 XML_ParserFree(NULL);
107 }
108 END_TEST
109
110 #if defined(__has_feature)
111 # if __has_feature(undefined_behavior_sanitizer)
112 # define EXPAT_TESTS_UBSAN 1
113 # else
114 # define EXPAT_TESTS_UBSAN 0
115 # endif
116 #else
117 # define EXPAT_TESTS_UBSAN 0
118 #endif
119
120 /* Test that XML_ErrorString rejects out-of-range codes */
START_TEST(test_misc_error_string)121 START_TEST(test_misc_error_string) {
122 #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan
123 union {
124 enum XML_Error xml_error;
125 int integer;
126 } trickery;
127
128 assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test
129
130 trickery.integer = -1;
131 if (XML_ErrorString(trickery.xml_error) != NULL)
132 fail("Negative error code not rejected");
133
134 trickery.integer = 100;
135 if (XML_ErrorString(trickery.xml_error) != NULL)
136 fail("Large error code not rejected");
137 #endif
138 }
139 END_TEST
140
141 /* Test the version information is consistent */
142
143 /* Since we are working in XML_LChars (potentially 16-bits), we
144 * can't use the standard C library functions for character
145 * manipulation and have to roll our own.
146 */
147 static int
parse_version(const XML_LChar * version_text,XML_Expat_Version * version_struct)148 parse_version(const XML_LChar *version_text,
149 XML_Expat_Version *version_struct) {
150 if (! version_text)
151 return XML_FALSE;
152
153 while (*version_text != 0x00) {
154 if (*version_text >= ASCII_0 && *version_text <= ASCII_9)
155 break;
156 version_text++;
157 }
158 if (*version_text == 0x00)
159 return XML_FALSE;
160
161 /* version_struct->major = strtoul(version_text, 10, &version_text) */
162 version_struct->major = 0;
163 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
164 version_struct->major
165 = 10 * version_struct->major + (*version_text++ - ASCII_0);
166 }
167 if (*version_text++ != ASCII_PERIOD)
168 return XML_FALSE;
169
170 /* Now for the minor version number */
171 version_struct->minor = 0;
172 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
173 version_struct->minor
174 = 10 * version_struct->minor + (*version_text++ - ASCII_0);
175 }
176 if (*version_text++ != ASCII_PERIOD)
177 return XML_FALSE;
178
179 /* Finally the micro version number */
180 version_struct->micro = 0;
181 while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
182 version_struct->micro
183 = 10 * version_struct->micro + (*version_text++ - ASCII_0);
184 }
185 if (*version_text != 0x00)
186 return XML_FALSE;
187 return XML_TRUE;
188 }
189
190 static int
versions_equal(const XML_Expat_Version * first,const XML_Expat_Version * second)191 versions_equal(const XML_Expat_Version *first,
192 const XML_Expat_Version *second) {
193 return (first->major == second->major && first->minor == second->minor
194 && first->micro == second->micro);
195 }
196
START_TEST(test_misc_version)197 START_TEST(test_misc_version) {
198 XML_Expat_Version read_version = XML_ExpatVersionInfo();
199 /* Silence compiler warning with the following assignment */
200 XML_Expat_Version parsed_version = {0, 0, 0};
201 const XML_LChar *version_text = XML_ExpatVersion();
202
203 if (version_text == NULL)
204 fail("Could not obtain version text");
205 assert(version_text != NULL);
206 if (! parse_version(version_text, &parsed_version))
207 fail("Unable to parse version text");
208 if (! versions_equal(&read_version, &parsed_version))
209 fail("Version mismatch");
210
211 if (xcstrcmp(version_text, XCS("expat_2.6.3"))) /* needs bump on releases */
212 fail("XML_*_VERSION in expat.h out of sync?\n");
213 }
214 END_TEST
215
216 /* Test feature information */
START_TEST(test_misc_features)217 START_TEST(test_misc_features) {
218 const XML_Feature *features = XML_GetFeatureList();
219
220 /* Prevent problems with double-freeing parsers */
221 g_parser = NULL;
222 if (features == NULL) {
223 fail("Failed to get feature information");
224 } else {
225 /* Loop through the features checking what we can */
226 while (features->feature != XML_FEATURE_END) {
227 switch (features->feature) {
228 case XML_FEATURE_SIZEOF_XML_CHAR:
229 if (features->value != sizeof(XML_Char))
230 fail("Incorrect size of XML_Char");
231 break;
232 case XML_FEATURE_SIZEOF_XML_LCHAR:
233 if (features->value != sizeof(XML_LChar))
234 fail("Incorrect size of XML_LChar");
235 break;
236 default:
237 break;
238 }
239 features++;
240 }
241 }
242 }
243 END_TEST
244
245 /* Regression test for GitHub Issue #17: memory leak parsing attribute
246 * values with mixed bound and unbound namespaces.
247 */
START_TEST(test_misc_attribute_leak)248 START_TEST(test_misc_attribute_leak) {
249 const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>";
250 XML_Memory_Handling_Suite memsuite
251 = {tracking_malloc, tracking_realloc, tracking_free};
252
253 g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n"));
254 expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found");
255 XML_ParserFree(g_parser);
256 /* Prevent the teardown trying to double free */
257 g_parser = NULL;
258
259 if (! tracking_report())
260 fail("Memory leak found");
261 }
262 END_TEST
263
264 /* Test parser created for UTF-16LE is successful */
START_TEST(test_misc_utf16le)265 START_TEST(test_misc_utf16le) {
266 const char text[] =
267 /* <?xml version='1.0'?><q>Hi</q> */
268 "<\0?\0x\0m\0l\0 \0"
269 "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0"
270 "<\0q\0>\0H\0i\0<\0/\0q\0>\0";
271 const XML_Char *expected = XCS("Hi");
272 CharData storage;
273
274 g_parser = XML_ParserCreate(XCS("UTF-16LE"));
275 if (g_parser == NULL)
276 fail("Parser not created");
277
278 CharData_Init(&storage);
279 XML_SetUserData(g_parser, &storage);
280 XML_SetCharacterDataHandler(g_parser, accumulate_characters);
281 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
282 == XML_STATUS_ERROR)
283 xml_failure(g_parser);
284 CharData_CheckXMLChars(&storage, expected);
285 }
286 END_TEST
287
START_TEST(test_misc_stop_during_end_handler_issue_240_1)288 START_TEST(test_misc_stop_during_end_handler_issue_240_1) {
289 XML_Parser parser;
290 DataIssue240 *mydata;
291 enum XML_Status result;
292 const char *const doc1 = "<doc><e1/><e><foo/></e></doc>";
293
294 parser = XML_ParserCreate(NULL);
295 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
296 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
297 mydata->parser = parser;
298 mydata->deep = 0;
299 XML_SetUserData(parser, mydata);
300
301 result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1);
302 XML_ParserFree(parser);
303 free(mydata);
304 if (result != XML_STATUS_ERROR)
305 fail("Stopping the parser did not work as expected");
306 }
307 END_TEST
308
START_TEST(test_misc_stop_during_end_handler_issue_240_2)309 START_TEST(test_misc_stop_during_end_handler_issue_240_2) {
310 XML_Parser parser;
311 DataIssue240 *mydata;
312 enum XML_Status result;
313 const char *const doc2 = "<doc><elem/></doc>";
314
315 parser = XML_ParserCreate(NULL);
316 XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
317 mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
318 mydata->parser = parser;
319 mydata->deep = 0;
320 XML_SetUserData(parser, mydata);
321
322 result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1);
323 XML_ParserFree(parser);
324 free(mydata);
325 if (result != XML_STATUS_ERROR)
326 fail("Stopping the parser did not work as expected");
327 }
328 END_TEST
329
START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317)330 START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
331 const char *const inputOne = "<!DOCTYPE d [\n"
332 "<!ENTITY % e ']><d/>'>\n"
333 "\n"
334 "%e;";
335 const char *const inputTwo = "<!DOCTYPE d [\n"
336 "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '&e1;'>\n"
337 "\n"
338 "%e2;";
339 const char *const inputThree = "<!DOCTYPE d [\n"
340 "<!ENTITY % e ']><d'>\n"
341 "\n"
342 "%e;";
343 const char *const inputIssue317 = "<!DOCTYPE doc [\n"
344 "<!ENTITY % foo ']>\n"
345 "<doc>Hell<oc (#PCDATA)*>'>\n"
346 "%foo;\n"
347 "]>\n"
348 "<doc>Hello, world</dVc>";
349
350 const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
351 size_t inputIndex = 0;
352
353 for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
354 set_subtest("%s", inputs[inputIndex]);
355 XML_Parser parser;
356 enum XML_Status parseResult;
357 int setParamEntityResult;
358 XML_Size lineNumber;
359 XML_Size columnNumber;
360 const char *const input = inputs[inputIndex];
361
362 parser = XML_ParserCreate(NULL);
363 setParamEntityResult
364 = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
365 if (setParamEntityResult != 1)
366 fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
367
368 parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
369 if (parseResult != XML_STATUS_ERROR) {
370 parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
371 if (parseResult != XML_STATUS_ERROR) {
372 fail("Parsing was expected to fail but succeeded.");
373 }
374 }
375
376 if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
377 fail("Error code does not match XML_ERROR_INVALID_TOKEN");
378
379 lineNumber = XML_GetCurrentLineNumber(parser);
380 if (lineNumber != 4)
381 fail("XML_GetCurrentLineNumber does not work as expected.");
382
383 columnNumber = XML_GetCurrentColumnNumber(parser);
384 if (columnNumber != 0)
385 fail("XML_GetCurrentColumnNumber does not work as expected.");
386
387 XML_ParserFree(parser);
388 }
389 }
390 END_TEST
391
START_TEST(test_misc_tag_mismatch_reset_leak)392 START_TEST(test_misc_tag_mismatch_reset_leak) {
393 #ifdef XML_NS
394 const char *const text = "<open xmlns='https://namespace1.test'></close>";
395 XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n'));
396
397 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
398 != XML_STATUS_ERROR)
399 fail("Call to parse was expected to fail");
400 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
401 fail("Call to parse was expected to fail from a closing tag mismatch");
402
403 XML_ParserReset(parser, NULL);
404
405 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
406 != XML_STATUS_ERROR)
407 fail("Call to parse was expected to fail");
408 if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
409 fail("Call to parse was expected to fail from a closing tag mismatch");
410
411 XML_ParserFree(parser);
412 #endif
413 }
414 END_TEST
415
START_TEST(test_misc_create_external_entity_parser_with_null_context)416 START_TEST(test_misc_create_external_entity_parser_with_null_context) {
417 // With XML_DTD undefined, the only supported case of external entities
418 // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context
419 // was causing a segfault through a null pointer dereference in function
420 // setContext, previously.
421 XML_Parser parser = XML_ParserCreate(NULL);
422 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
423 #ifdef XML_DTD
424 assert_true(ext_parser != NULL);
425 XML_ParserFree(ext_parser);
426 #else
427 assert_true(ext_parser == NULL);
428 #endif /* XML_DTD */
429 XML_ParserFree(parser);
430 }
431 END_TEST
432
START_TEST(test_misc_general_entities_support)433 START_TEST(test_misc_general_entities_support) {
434 const char *const doc
435 = "<!DOCTYPE r [\n"
436 "<!ENTITY e1 'v1'>\n"
437 "<!ENTITY e2 SYSTEM 'v2'>\n"
438 "]>\n"
439 "<r a1='[&e1;]'>[&e1;][&e2;][&'><"]</r>";
440
441 CharData storage;
442 CharData_Init(&storage);
443
444 XML_Parser parser = XML_ParserCreate(NULL);
445 XML_SetUserData(parser, &storage);
446 XML_SetStartElementHandler(parser, accumulate_start_element);
447 XML_SetExternalEntityRefHandler(parser,
448 external_entity_failer__if_not_xml_ge);
449 XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
450 XML_SetCharacterDataHandler(parser, accumulate_char_data);
451
452 if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
453 != XML_STATUS_OK) {
454 xml_failure(parser);
455 }
456
457 XML_ParserFree(parser);
458
459 CharData_CheckXMLChars(&storage,
460 /* clang-format off */
461 #if XML_GE == 1
462 XCS("e1=v1\n")
463 XCS("e2=(null)\n")
464 XCS("(r(a1=[v1]))\n")
465 XCS("[v1][][&'><\"]")
466 #else
467 XCS("e1=&e1;\n")
468 XCS("e2=(null)\n")
469 XCS("(r(a1=[&e1;]))\n")
470 XCS("[&e1;][&e2;][&'><\"]")
471 #endif
472 );
473 /* clang-format on */
474 }
475 END_TEST
476
477 static void XMLCALL
resumable_stopping_character_handler(void * userData,const XML_Char * s,int len)478 resumable_stopping_character_handler(void *userData, const XML_Char *s,
479 int len) {
480 UNUSED_P(s);
481 UNUSED_P(len);
482 XML_Parser parser = (XML_Parser)userData;
483 XML_StopParser(parser, XML_TRUE);
484 }
485
486 // NOTE: This test needs active LeakSanitizer to be of actual use
START_TEST(test_misc_char_handler_stop_without_leak)487 START_TEST(test_misc_char_handler_stop_without_leak) {
488 const char *const data
489 = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;";
490 XML_Parser parser = XML_ParserCreate(NULL);
491 assert_true(parser != NULL);
492 XML_SetUserData(parser, parser);
493 XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler);
494 _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE);
495 XML_ParserFree(parser);
496 }
497 END_TEST
498
499 void
make_miscellaneous_test_case(Suite * s)500 make_miscellaneous_test_case(Suite *s) {
501 TCase *tc_misc = tcase_create("miscellaneous tests");
502
503 suite_add_tcase(s, tc_misc);
504 tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
505
506 tcase_add_test(tc_misc, test_misc_alloc_create_parser);
507 tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding);
508 tcase_add_test(tc_misc, test_misc_null_parser);
509 tcase_add_test(tc_misc, test_misc_error_string);
510 tcase_add_test(tc_misc, test_misc_version);
511 tcase_add_test(tc_misc, test_misc_features);
512 tcase_add_test(tc_misc, test_misc_attribute_leak);
513 tcase_add_test(tc_misc, test_misc_utf16le);
514 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1);
515 tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2);
516 tcase_add_test__ifdef_xml_dtd(
517 tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317);
518 tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
519 tcase_add_test(tc_misc,
520 test_misc_create_external_entity_parser_with_null_context);
521 tcase_add_test(tc_misc, test_misc_general_entities_support);
522 tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
523 }
524