1 /* Tests in the "accounting" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Licensed under the MIT license: 23 24 Permission is hereby granted, free of charge, to any person obtaining 25 a copy of this software and associated documentation files (the 26 "Software"), to deal in the Software without restriction, including 27 without limitation the rights to use, copy, modify, merge, publish, 28 distribute, sublicense, and/or sell copies of the Software, and to permit 29 persons to whom the Software is furnished to do so, subject to the 30 following conditions: 31 32 The above copyright notice and this permission notice shall be included 33 in all copies or substantial portions of the Software. 34 35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41 USE OR OTHER DEALINGS IN THE SOFTWARE. 42 */ 43 44 #include <math.h> /* NAN, INFINITY */ 45 #include <stdio.h> 46 #include <string.h> 47 48 #include "expat_config.h" 49 50 #include "expat.h" 51 #include "internal.h" 52 #include "common.h" 53 #include "minicheck.h" 54 #include "chardata.h" 55 #include "handlers.h" 56 #include "acc_tests.h" 57 58 #if XML_GE == 1 59 START_TEST(test_accounting_precision) { 60 struct AccountingTestCase cases[] = { 61 {"<e/>", NULL, NULL, 0}, 62 {"<e></e>", NULL, NULL, 0}, 63 64 /* Attributes */ 65 {"<e k1=\"v2\" k2=\"v2\"/>", NULL, NULL, 0}, 66 {"<e k1=\"v2\" k2=\"v2\"></e>", NULL, NULL, 0}, 67 {"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0}, 68 {"<e k=\"&'><"\" />", NULL, NULL, 69 sizeof(XML_Char) * 5 /* number of predefined entities */}, 70 {"<e1 xmlns='https://example.org/'>\n" 71 " <e2 xmlns=''/>\n" 72 "</e1>", 73 NULL, NULL, 0}, 74 75 /* Text */ 76 {"<e>text</e>", NULL, NULL, 0}, 77 {"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0}, 78 {"<e>&'><"</e>", NULL, NULL, 79 sizeof(XML_Char) * 5 /* number of predefined entities */}, 80 {"<e>A)</e>", NULL, NULL, 0}, 81 82 /* Prolog */ 83 {"<?xml version=\"1.0\"?><root/>", NULL, NULL, 0}, 84 85 /* Whitespace */ 86 {" <e1> <e2> </e2> </e1> ", NULL, NULL, 0}, 87 {"<e1 ><e2 /></e1 >", NULL, NULL, 0}, 88 {"<e1><e2 k = \"v\"/><e3 k = 'v'/></e1>", NULL, NULL, 0}, 89 90 /* Comments */ 91 {"<!-- Comment --><e><!-- Comment --></e>", NULL, NULL, 0}, 92 93 /* Processing instructions */ 94 {"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>", 95 NULL, NULL, 0}, 96 {"<?pi0?><?pi1 ?><?pi2 ?><r/><?pi4?>", NULL, NULL, 0}, 97 # ifdef XML_DTD 98 {"<?pi0?><?pi1 ?><?pi2 ?><!DOCTYPE r SYSTEM 'first.ent'><r/>", 99 "<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>", 100 0}, 101 # endif /* XML_DTD */ 102 103 /* CDATA */ 104 {"<e><![CDATA[one two three]]></e>", NULL, NULL, 0}, 105 /* The following is the essence of this OSS-Fuzz finding: 106 https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34302 107 https://oss-fuzz.com/testcase-detail/4860575394955264 108 */ 109 {"<!DOCTYPE r [\n" 110 "<!ENTITY e \"111<![CDATA[2 <= 2]]>333\">\n" 111 "]>\n" 112 "<r>&e;</r>\n", 113 NULL, NULL, sizeof(XML_Char) * strlen("111<![CDATA[2 <= 2]]>333")}, 114 115 # ifdef XML_DTD 116 /* Conditional sections */ 117 {"<!DOCTYPE r [\n" 118 "<!ENTITY % draft 'INCLUDE'>\n" 119 "<!ENTITY % final 'IGNORE'>\n" 120 "<!ENTITY % import SYSTEM \"first.ent\">\n" 121 "%import;\n" 122 "]>\n" 123 "<r/>\n", 124 "<![%draft;[<!--1-->]]>\n" 125 "<![%final;[<!--22-->]]>", 126 NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE"))}, 127 # endif /* XML_DTD */ 128 129 /* General entities */ 130 {"<!DOCTYPE root [\n" 131 "<!ENTITY nine \"123456789\">\n" 132 "]>\n" 133 "<root>&nine;</root>", 134 NULL, NULL, sizeof(XML_Char) * strlen("123456789")}, 135 {"<!DOCTYPE root [\n" 136 "<!ENTITY nine \"123456789\">\n" 137 "]>\n" 138 "<root k1=\"&nine;\"/>", 139 NULL, NULL, sizeof(XML_Char) * strlen("123456789")}, 140 {"<!DOCTYPE root [\n" 141 "<!ENTITY nine \"123456789\">\n" 142 "<!ENTITY nine2 \"&nine;&nine;\">\n" 143 "]>\n" 144 "<root>&nine2;&nine2;&nine2;</root>", 145 NULL, NULL, 146 sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */ 147 * (strlen("&nine;") + strlen("123456789"))}, 148 {"<!DOCTYPE r [\n" 149 " <!ENTITY five SYSTEM 'first.ent'>\n" 150 "]>\n" 151 "<r>&five;</r>", 152 "12345", NULL, 0}, 153 {"<!DOCTYPE r [\n" 154 " <!ENTITY five SYSTEM 'first.ent'>\n" 155 "]>\n" 156 "<r>&five;</r>", 157 "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0}, 158 159 # ifdef XML_DTD 160 /* Parameter entities */ 161 {"<!DOCTYPE r [\n" 162 "<!ENTITY % comment \"<!---->\">\n" 163 "%comment;\n" 164 "]>\n" 165 "<r/>", 166 NULL, NULL, sizeof(XML_Char) * strlen("<!---->")}, 167 {"<!DOCTYPE r [\n" 168 "<!ENTITY % ninedef \"<!ENTITY nine "123456789">\">\n" 169 "%ninedef;\n" 170 "]>\n" 171 "<r>&nine;</r>", 172 NULL, NULL, 173 sizeof(XML_Char) 174 * (strlen("<!ENTITY nine \"123456789\">") + strlen("123456789"))}, 175 {"<!DOCTYPE r [\n" 176 "<!ENTITY % comment \"<!--1-->\">\n" 177 "<!ENTITY % comment2 \"%comment;<!--22-->%comment;\">\n" 178 "%comment2;\n" 179 "]>\n" 180 "<r/>\n", 181 NULL, NULL, 182 sizeof(XML_Char) 183 * (strlen("%comment;<!--22-->%comment;") + 2 * strlen("<!--1-->"))}, 184 {"<!DOCTYPE r [\n" 185 " <!ENTITY % five \"12345\">\n" 186 " <!ENTITY % five2def \"<!ENTITY five2 "[%five;][%five;]]]]">\">\n" 187 " %five2def;\n" 188 "]>\n" 189 "<r>&five2;</r>", 190 NULL, NULL, /* from "%five2def;": */ 191 sizeof(XML_Char) 192 * (strlen("<!ENTITY five2 \"[%five;][%five;]]]]\">") 193 + 2 /* calls to "%five;" */ * strlen("12345") 194 + /* from "&five2;": */ strlen("[12345][12345]]]]"))}, 195 {"<!DOCTYPE r SYSTEM \"first.ent\">\n" 196 "<r/>", 197 "<!ENTITY % comment '<!--1-->'>\n" 198 "<!ENTITY % comment2 '<!--22-->%comment;<!--22-->%comment;<!--22-->'>\n" 199 "%comment2;", 200 NULL, 201 sizeof(XML_Char) 202 * (strlen("<!--22-->%comment;<!--22-->%comment;<!--22-->") 203 + 2 /* calls to "%comment;" */ * strlen("<!---->"))}, 204 {"<!DOCTYPE r SYSTEM 'first.ent'>\n" 205 "<r/>", 206 "<!ENTITY % e1 PUBLIC 'foo' 'second.ent'>\n" 207 "<!ENTITY % e2 '<!--22-->%e1;<!--22-->'>\n" 208 "%e2;\n", 209 "<!--1-->", sizeof(XML_Char) * strlen("<!--22--><!--1--><!--22-->")}, 210 { 211 "<!DOCTYPE r SYSTEM 'first.ent'>\n" 212 "<r/>", 213 "<!ENTITY % e1 SYSTEM 'second.ent'>\n" 214 "<!ENTITY % e2 '%e1;'>", 215 "<?xml version='1.0' encoding='utf-8'?>\n" 216 "hello\n" 217 "xml" /* without trailing newline! */, 218 0, 219 }, 220 { 221 "<!DOCTYPE r SYSTEM 'first.ent'>\n" 222 "<r/>", 223 "<!ENTITY % e1 SYSTEM 'second.ent'>\n" 224 "<!ENTITY % e2 '%e1;'>", 225 "<?xml version='1.0' encoding='utf-8'?>\n" 226 "hello\n" 227 "xml\n" /* with trailing newline! */, 228 0, 229 }, 230 {"<!DOCTYPE doc SYSTEM 'first.ent'>\n" 231 "<doc></doc>\n", 232 "<!ELEMENT doc EMPTY>\n" 233 "<!ENTITY % e1 SYSTEM 'second.ent'>\n" 234 "<!ENTITY % e2 '%e1;'>\n" 235 "%e1;\n", 236 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */, 237 strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>")}, 238 # endif /* XML_DTD */ 239 }; 240 241 const size_t countCases = sizeof(cases) / sizeof(cases[0]); 242 size_t u = 0; 243 for (; u < countCases; u++) { 244 const unsigned long long expectedCountBytesDirect 245 = strlen(cases[u].primaryText); 246 const unsigned long long expectedCountBytesIndirect 247 = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) : 0) 248 + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText) 249 : 0) 250 + cases[u].expectedCountBytesIndirectExtra; 251 252 XML_Parser parser = XML_ParserCreate(NULL); 253 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 254 if (cases[u].firstExternalText) { 255 XML_SetExternalEntityRefHandler(parser, 256 accounting_external_entity_ref_handler); 257 XML_SetUserData(parser, (void *)&cases[u]); 258 } 259 260 enum XML_Status status 261 = _XML_Parse_SINGLE_BYTES(parser, cases[u].primaryText, 262 (int)strlen(cases[u].primaryText), XML_TRUE); 263 if (status != XML_STATUS_OK) { 264 _xml_failure(parser, __FILE__, __LINE__); 265 } 266 267 const unsigned long long actualCountBytesDirect 268 = testingAccountingGetCountBytesDirect(parser); 269 const unsigned long long actualCountBytesIndirect 270 = testingAccountingGetCountBytesIndirect(parser); 271 272 XML_ParserFree(parser); 273 274 if (actualCountBytesDirect != expectedCountBytesDirect) { 275 fprintf( 276 stderr, 277 "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL( 278 "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n", 279 u + 1, countCases, expectedCountBytesDirect, actualCountBytesDirect); 280 fail("Count of direct bytes is off"); 281 } 282 283 if (actualCountBytesIndirect != expectedCountBytesIndirect) { 284 fprintf( 285 stderr, 286 "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL( 287 "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n", 288 u + 1, countCases, expectedCountBytesIndirect, 289 actualCountBytesIndirect); 290 fail("Count of indirect bytes is off"); 291 } 292 } 293 } 294 END_TEST 295 296 START_TEST(test_billion_laughs_attack_protection_api) { 297 XML_Parser parserWithoutParent = XML_ParserCreate(NULL); 298 XML_Parser parserWithParent = XML_ExternalEntityParserCreate( 299 parserWithoutParent, XCS("entity123"), NULL); 300 if (parserWithoutParent == NULL) 301 fail("parserWithoutParent is NULL"); 302 if (parserWithParent == NULL) 303 fail("parserWithParent is NULL"); 304 305 // XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases 306 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f) 307 == XML_TRUE) 308 fail("Call with NULL parser is NOT supposed to succeed"); 309 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent, 310 123.0f) 311 == XML_TRUE) 312 fail("Call with non-root parser is NOT supposed to succeed"); 313 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( 314 parserWithoutParent, NAN) 315 == XML_TRUE) 316 fail("Call with NaN limit is NOT supposed to succeed"); 317 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( 318 parserWithoutParent, -1.0f) 319 == XML_TRUE) 320 fail("Call with negative limit is NOT supposed to succeed"); 321 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( 322 parserWithoutParent, 0.9f) 323 == XML_TRUE) 324 fail("Call with positive limit <1.0 is NOT supposed to succeed"); 325 326 // XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases 327 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( 328 parserWithoutParent, 1.0f) 329 == XML_FALSE) 330 fail("Call with positive limit >=1.0 is supposed to succeed"); 331 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( 332 parserWithoutParent, 123456.789f) 333 == XML_FALSE) 334 fail("Call with positive limit >=1.0 is supposed to succeed"); 335 if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( 336 parserWithoutParent, INFINITY) 337 == XML_FALSE) 338 fail("Call with positive limit >=1.0 is supposed to succeed"); 339 340 // XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases 341 if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123) 342 == XML_TRUE) 343 fail("Call with NULL parser is NOT supposed to succeed"); 344 if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent, 345 123) 346 == XML_TRUE) 347 fail("Call with non-root parser is NOT supposed to succeed"); 348 349 // XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases 350 if (XML_SetBillionLaughsAttackProtectionActivationThreshold( 351 parserWithoutParent, 123) 352 == XML_FALSE) 353 fail("Call with non-NULL parentless parser is supposed to succeed"); 354 355 XML_ParserFree(parserWithParent); 356 XML_ParserFree(parserWithoutParent); 357 } 358 END_TEST 359 360 START_TEST(test_helper_unsigned_char_to_printable) { 361 // Smoke test 362 unsigned char uc = 0; 363 for (; uc < (unsigned char)-1; uc++) { 364 set_subtest("char %u", (unsigned)uc); 365 const char *const printable = unsignedCharToPrintable(uc); 366 if (printable == NULL) 367 fail("unsignedCharToPrintable returned NULL"); 368 else if (strlen(printable) < (size_t)1) 369 fail("unsignedCharToPrintable returned empty string"); 370 } 371 372 // Two concrete samples 373 set_subtest("char 'A'"); 374 if (strcmp(unsignedCharToPrintable('A'), "A") != 0) 375 fail("unsignedCharToPrintable result mistaken"); 376 set_subtest("char '\\'"); 377 if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0) 378 fail("unsignedCharToPrintable result mistaken"); 379 } 380 END_TEST 381 382 START_TEST(test_amplification_isolated_external_parser) { 383 // NOTE: Length 44 is precisely twice the length of "<!ENTITY a SYSTEM 'b'>" 384 // (22) that is used in function accountingGetCurrentAmplification in 385 // xmlparse.c. 386 // 1.........1.........1.........1.........1..4 => 44 387 const char doc[] = "<!ENTITY % p1 '123456789_123456789_1234567'>"; 388 const int docLen = (int)sizeof(doc) - 1; 389 const float maximumToleratedAmplification = 2.0f; 390 391 struct TestCase { 392 int offsetOfThreshold; 393 enum XML_Status expectedStatus; 394 }; 395 396 struct TestCase cases[] = { 397 {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR}, 398 {+1, XML_STATUS_OK}, {+2, XML_STATUS_OK}, 399 }; 400 401 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 402 const int offsetOfThreshold = cases[i].offsetOfThreshold; 403 const enum XML_Status expectedStatus = cases[i].expectedStatus; 404 const unsigned long long activationThresholdBytes 405 = docLen + offsetOfThreshold; 406 407 set_subtest("offsetOfThreshold=%d, expectedStatus=%d", offsetOfThreshold, 408 expectedStatus); 409 410 XML_Parser parser = XML_ParserCreate(NULL); 411 assert_true(parser != NULL); 412 413 assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification( 414 parser, maximumToleratedAmplification) 415 == XML_TRUE); 416 assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold( 417 parser, activationThresholdBytes) 418 == XML_TRUE); 419 420 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 421 assert_true(ext_parser != NULL); 422 423 const enum XML_Status actualStatus 424 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE); 425 426 assert_true(actualStatus == expectedStatus); 427 if (actualStatus != XML_STATUS_OK) { 428 assert_true(XML_GetErrorCode(ext_parser) 429 == XML_ERROR_AMPLIFICATION_LIMIT_BREACH); 430 } 431 432 XML_ParserFree(ext_parser); 433 XML_ParserFree(parser); 434 } 435 } 436 END_TEST 437 438 #endif // XML_GE == 1 439 440 void 441 make_accounting_test_case(Suite *s) { 442 #if XML_GE == 1 443 TCase *tc_accounting = tcase_create("accounting tests"); 444 445 suite_add_tcase(s, tc_accounting); 446 447 tcase_add_test(tc_accounting, test_accounting_precision); 448 tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api); 449 tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable); 450 tcase_add_test__ifdef_xml_dtd(tc_accounting, 451 test_amplification_isolated_external_parser); 452 #else 453 UNUSED_P(s); 454 #endif /* XML_GE == 1 */ 455 } 456