xref: /freebsd/contrib/expat/tests/acc_tests.c (revision ac77b2621508c6a50ab01d07fe8d43795d908f05)
1 /* Tests in the "accounting" test case for the Expat test suite
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21    Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
22    Licensed under the MIT license:
23 
24    Permission is  hereby granted,  free of charge,  to any  person obtaining
25    a  copy  of  this  software   and  associated  documentation  files  (the
26    "Software"),  to  deal in  the  Software  without restriction,  including
27    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28    distribute, sublicense, and/or sell copies of the Software, and to permit
29    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30    following conditions:
31 
32    The above copyright  notice and this permission notice  shall be included
33    in all copies or substantial portions of the Software.
34 
35    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41    USE OR OTHER DEALINGS IN THE SOFTWARE.
42 */
43 
44 #include <math.h> /* NAN, INFINITY */
45 #include <stdio.h>
46 #include <string.h>
47 
48 #include "expat_config.h"
49 
50 #include "expat.h"
51 #include "internal.h"
52 #include "common.h"
53 #include "minicheck.h"
54 #include "chardata.h"
55 #include "handlers.h"
56 #include "acc_tests.h"
57 
58 #if XML_GE == 1
59 START_TEST(test_accounting_precision) {
60   struct AccountingTestCase cases[] = {
61       {"<e/>", NULL, NULL, 0},
62       {"<e></e>", NULL, NULL, 0},
63 
64       /* Attributes */
65       {"<e k1=\"v2\" k2=\"v2\"/>", NULL, NULL, 0},
66       {"<e k1=\"v2\" k2=\"v2\"></e>", NULL, NULL, 0},
67       {"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0},
68       {"<e k=\"&amp;&apos;&gt;&lt;&quot;\" />", NULL, NULL,
69        sizeof(XML_Char) * 5 /* number of predefined entities */},
70       {"<e1 xmlns='https://example.org/'>\n"
71        "  <e2 xmlns=''/>\n"
72        "</e1>",
73        NULL, NULL, 0},
74 
75       /* Text */
76       {"<e>text</e>", NULL, NULL, 0},
77       {"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0},
78       {"<e>&amp;&apos;&gt;&lt;&quot;</e>", NULL, NULL,
79        sizeof(XML_Char) * 5 /* number of predefined entities */},
80       {"<e>&#65;&#41;</e>", NULL, NULL, 0},
81 
82       /* Prolog */
83       {"<?xml version=\"1.0\"?><root/>", NULL, NULL, 0},
84 
85       /* Whitespace */
86       {"  <e1>  <e2>  </e2>  </e1>  ", NULL, NULL, 0},
87       {"<e1  ><e2  /></e1  >", NULL, NULL, 0},
88       {"<e1><e2 k = \"v\"/><e3 k = 'v'/></e1>", NULL, NULL, 0},
89 
90       /* Comments */
91       {"<!-- Comment --><e><!-- Comment --></e>", NULL, NULL, 0},
92 
93       /* Processing instructions */
94       {"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>",
95        NULL, NULL, 0},
96       {"<?pi0?><?pi1 ?><?pi2  ?><r/><?pi4?>", NULL, NULL, 0},
97 #  ifdef XML_DTD
98       {"<?pi0?><?pi1 ?><?pi2  ?><!DOCTYPE r SYSTEM 'first.ent'><r/>",
99        "<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>",
100        0},
101 #  endif /* XML_DTD */
102 
103       /* CDATA */
104       {"<e><![CDATA[one two three]]></e>", NULL, NULL, 0},
105       /* The following is the essence of this OSS-Fuzz finding:
106          https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34302
107          https://oss-fuzz.com/testcase-detail/4860575394955264
108       */
109       {"<!DOCTYPE r [\n"
110        "<!ENTITY e \"111<![CDATA[2 <= 2]]>333\">\n"
111        "]>\n"
112        "<r>&e;</r>\n",
113        NULL, NULL, sizeof(XML_Char) * strlen("111<![CDATA[2 <= 2]]>333")},
114 
115 #  ifdef XML_DTD
116       /* Conditional sections */
117       {"<!DOCTYPE r [\n"
118        "<!ENTITY % draft 'INCLUDE'>\n"
119        "<!ENTITY % final 'IGNORE'>\n"
120        "<!ENTITY % import SYSTEM \"first.ent\">\n"
121        "%import;\n"
122        "]>\n"
123        "<r/>\n",
124        "<![%draft;[<!--1-->]]>\n"
125        "<![%final;[<!--22-->]]>",
126        NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE"))},
127 #  endif /* XML_DTD */
128 
129       /* General entities */
130       {"<!DOCTYPE root [\n"
131        "<!ENTITY nine \"123456789\">\n"
132        "]>\n"
133        "<root>&nine;</root>",
134        NULL, NULL, sizeof(XML_Char) * strlen("123456789")},
135       {"<!DOCTYPE root [\n"
136        "<!ENTITY nine \"123456789\">\n"
137        "]>\n"
138        "<root k1=\"&nine;\"/>",
139        NULL, NULL, sizeof(XML_Char) * strlen("123456789")},
140       {"<!DOCTYPE root [\n"
141        "<!ENTITY nine \"123456789\">\n"
142        "<!ENTITY nine2 \"&nine;&nine;\">\n"
143        "]>\n"
144        "<root>&nine2;&nine2;&nine2;</root>",
145        NULL, NULL,
146        sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */
147            * (strlen("&nine;") + strlen("123456789"))},
148       {"<!DOCTYPE r [\n"
149        "  <!ENTITY five SYSTEM 'first.ent'>\n"
150        "]>\n"
151        "<r>&five;</r>",
152        "12345", NULL, 0},
153       {"<!DOCTYPE r [\n"
154        "  <!ENTITY five SYSTEM 'first.ent'>\n"
155        "]>\n"
156        "<r>&five;</r>",
157        "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0},
158 
159 #  ifdef XML_DTD
160       /* Parameter entities */
161       {"<!DOCTYPE r [\n"
162        "<!ENTITY % comment \"<!---->\">\n"
163        "%comment;\n"
164        "]>\n"
165        "<r/>",
166        NULL, NULL, sizeof(XML_Char) * strlen("<!---->")},
167       {"<!DOCTYPE r [\n"
168        "<!ENTITY % ninedef \"&#60;!ENTITY nine &#34;123456789&#34;&#62;\">\n"
169        "%ninedef;\n"
170        "]>\n"
171        "<r>&nine;</r>",
172        NULL, NULL,
173        sizeof(XML_Char)
174            * (strlen("<!ENTITY nine \"123456789\">") + strlen("123456789"))},
175       {"<!DOCTYPE r [\n"
176        "<!ENTITY % comment \"<!--1-->\">\n"
177        "<!ENTITY % comment2 \"&#37;comment;<!--22-->&#37;comment;\">\n"
178        "%comment2;\n"
179        "]>\n"
180        "<r/>\n",
181        NULL, NULL,
182        sizeof(XML_Char)
183            * (strlen("%comment;<!--22-->%comment;") + 2 * strlen("<!--1-->"))},
184       {"<!DOCTYPE r [\n"
185        "  <!ENTITY % five \"12345\">\n"
186        "  <!ENTITY % five2def \"&#60;!ENTITY five2 &#34;[&#37;five;][&#37;five;]]]]&#34;&#62;\">\n"
187        "  %five2def;\n"
188        "]>\n"
189        "<r>&five2;</r>",
190        NULL, NULL, /* from "%five2def;": */
191        sizeof(XML_Char)
192            * (strlen("<!ENTITY five2 \"[%five;][%five;]]]]\">")
193               + 2 /* calls to "%five;" */ * strlen("12345")
194               + /* from "&five2;": */ strlen("[12345][12345]]]]"))},
195       {"<!DOCTYPE r SYSTEM \"first.ent\">\n"
196        "<r/>",
197        "<!ENTITY % comment '<!--1-->'>\n"
198        "<!ENTITY % comment2 '<!--22-->%comment;<!--22-->%comment;<!--22-->'>\n"
199        "%comment2;",
200        NULL,
201        sizeof(XML_Char)
202            * (strlen("<!--22-->%comment;<!--22-->%comment;<!--22-->")
203               + 2 /* calls to "%comment;" */ * strlen("<!---->"))},
204       {"<!DOCTYPE r SYSTEM 'first.ent'>\n"
205        "<r/>",
206        "<!ENTITY % e1 PUBLIC 'foo' 'second.ent'>\n"
207        "<!ENTITY % e2 '<!--22-->%e1;<!--22-->'>\n"
208        "%e2;\n",
209        "<!--1-->", sizeof(XML_Char) * strlen("<!--22--><!--1--><!--22-->")},
210       {
211           "<!DOCTYPE r SYSTEM 'first.ent'>\n"
212           "<r/>",
213           "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
214           "<!ENTITY % e2 '%e1;'>",
215           "<?xml version='1.0' encoding='utf-8'?>\n"
216           "hello\n"
217           "xml" /* without trailing newline! */,
218           0,
219       },
220       {
221           "<!DOCTYPE r SYSTEM 'first.ent'>\n"
222           "<r/>",
223           "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
224           "<!ENTITY % e2 '%e1;'>",
225           "<?xml version='1.0' encoding='utf-8'?>\n"
226           "hello\n"
227           "xml\n" /* with trailing newline! */,
228           0,
229       },
230       {"<!DOCTYPE doc SYSTEM 'first.ent'>\n"
231        "<doc></doc>\n",
232        "<!ELEMENT doc EMPTY>\n"
233        "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
234        "<!ENTITY % e2 '%e1;'>\n"
235        "%e1;\n",
236        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */,
237        strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>")},
238 #  endif /* XML_DTD */
239   };
240 
241   const size_t countCases = sizeof(cases) / sizeof(cases[0]);
242   size_t u = 0;
243   for (; u < countCases; u++) {
244     const unsigned long long expectedCountBytesDirect
245         = strlen(cases[u].primaryText);
246     const unsigned long long expectedCountBytesIndirect
247         = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) : 0)
248           + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText)
249                                          : 0)
250           + cases[u].expectedCountBytesIndirectExtra;
251 
252     XML_Parser parser = XML_ParserCreate(NULL);
253     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
254     if (cases[u].firstExternalText) {
255       XML_SetExternalEntityRefHandler(parser,
256                                       accounting_external_entity_ref_handler);
257       XML_SetUserData(parser, (void *)&cases[u]);
258     }
259 
260     enum XML_Status status
261         = _XML_Parse_SINGLE_BYTES(parser, cases[u].primaryText,
262                                   (int)strlen(cases[u].primaryText), XML_TRUE);
263     if (status != XML_STATUS_OK) {
264       _xml_failure(parser, __FILE__, __LINE__);
265     }
266 
267     const unsigned long long actualCountBytesDirect
268         = testingAccountingGetCountBytesDirect(parser);
269     const unsigned long long actualCountBytesIndirect
270         = testingAccountingGetCountBytesIndirect(parser);
271 
272     XML_ParserFree(parser);
273 
274     if (actualCountBytesDirect != expectedCountBytesDirect) {
275       fprintf(
276           stderr,
277           "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL(
278               "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n",
279           u + 1, countCases, expectedCountBytesDirect, actualCountBytesDirect);
280       fail("Count of direct bytes is off");
281     }
282 
283     if (actualCountBytesIndirect != expectedCountBytesIndirect) {
284       fprintf(
285           stderr,
286           "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL(
287               "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n",
288           u + 1, countCases, expectedCountBytesIndirect,
289           actualCountBytesIndirect);
290       fail("Count of indirect bytes is off");
291     }
292   }
293 }
294 END_TEST
295 
296 START_TEST(test_billion_laughs_attack_protection_api) {
297   XML_Parser parserWithoutParent = XML_ParserCreate(NULL);
298   XML_Parser parserWithParent = XML_ExternalEntityParserCreate(
299       parserWithoutParent, XCS("entity123"), NULL);
300   if (parserWithoutParent == NULL)
301     fail("parserWithoutParent is NULL");
302   if (parserWithParent == NULL)
303     fail("parserWithParent is NULL");
304 
305   // XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases
306   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f)
307       == XML_TRUE)
308     fail("Call with NULL parser is NOT supposed to succeed");
309   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent,
310                                                                123.0f)
311       == XML_TRUE)
312     fail("Call with non-root parser is NOT supposed to succeed");
313   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
314           parserWithoutParent, NAN)
315       == XML_TRUE)
316     fail("Call with NaN limit is NOT supposed to succeed");
317   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
318           parserWithoutParent, -1.0f)
319       == XML_TRUE)
320     fail("Call with negative limit is NOT supposed to succeed");
321   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
322           parserWithoutParent, 0.9f)
323       == XML_TRUE)
324     fail("Call with positive limit <1.0 is NOT supposed to succeed");
325 
326   // XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases
327   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
328           parserWithoutParent, 1.0f)
329       == XML_FALSE)
330     fail("Call with positive limit >=1.0 is supposed to succeed");
331   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
332           parserWithoutParent, 123456.789f)
333       == XML_FALSE)
334     fail("Call with positive limit >=1.0 is supposed to succeed");
335   if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
336           parserWithoutParent, INFINITY)
337       == XML_FALSE)
338     fail("Call with positive limit >=1.0 is supposed to succeed");
339 
340   // XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases
341   if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123)
342       == XML_TRUE)
343     fail("Call with NULL parser is NOT supposed to succeed");
344   if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent,
345                                                               123)
346       == XML_TRUE)
347     fail("Call with non-root parser is NOT supposed to succeed");
348 
349   // XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases
350   if (XML_SetBillionLaughsAttackProtectionActivationThreshold(
351           parserWithoutParent, 123)
352       == XML_FALSE)
353     fail("Call with non-NULL parentless parser is supposed to succeed");
354 
355   XML_ParserFree(parserWithParent);
356   XML_ParserFree(parserWithoutParent);
357 }
358 END_TEST
359 
360 START_TEST(test_helper_unsigned_char_to_printable) {
361   // Smoke test
362   unsigned char uc = 0;
363   for (; uc < (unsigned char)-1; uc++) {
364     set_subtest("char %u", (unsigned)uc);
365     const char *const printable = unsignedCharToPrintable(uc);
366     if (printable == NULL)
367       fail("unsignedCharToPrintable returned NULL");
368     else if (strlen(printable) < (size_t)1)
369       fail("unsignedCharToPrintable returned empty string");
370   }
371 
372   // Two concrete samples
373   set_subtest("char 'A'");
374   if (strcmp(unsignedCharToPrintable('A'), "A") != 0)
375     fail("unsignedCharToPrintable result mistaken");
376   set_subtest("char '\\'");
377   if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0)
378     fail("unsignedCharToPrintable result mistaken");
379 }
380 END_TEST
381 
382 START_TEST(test_amplification_isolated_external_parser) {
383   // NOTE: Length 44 is precisely twice the length of "<!ENTITY a SYSTEM 'b'>"
384   // (22) that is used in function accountingGetCurrentAmplification in
385   // xmlparse.c.
386   //                  1.........1.........1.........1.........1..4 => 44
387   const char doc[] = "<!ENTITY % p1 '123456789_123456789_1234567'>";
388   const int docLen = (int)sizeof(doc) - 1;
389   const float maximumToleratedAmplification = 2.0f;
390 
391   struct TestCase {
392     int offsetOfThreshold;
393     enum XML_Status expectedStatus;
394   };
395 
396   struct TestCase cases[] = {
397       {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR},
398       {+1, XML_STATUS_OK},    {+2, XML_STATUS_OK},
399   };
400 
401   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
402     const int offsetOfThreshold = cases[i].offsetOfThreshold;
403     const enum XML_Status expectedStatus = cases[i].expectedStatus;
404     const unsigned long long activationThresholdBytes
405         = docLen + offsetOfThreshold;
406 
407     set_subtest("offsetOfThreshold=%d, expectedStatus=%d", offsetOfThreshold,
408                 expectedStatus);
409 
410     XML_Parser parser = XML_ParserCreate(NULL);
411     assert_true(parser != NULL);
412 
413     assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification(
414                     parser, maximumToleratedAmplification)
415                 == XML_TRUE);
416     assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold(
417                     parser, activationThresholdBytes)
418                 == XML_TRUE);
419 
420     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
421     assert_true(ext_parser != NULL);
422 
423     const enum XML_Status actualStatus
424         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE);
425 
426     assert_true(actualStatus == expectedStatus);
427     if (actualStatus != XML_STATUS_OK) {
428       assert_true(XML_GetErrorCode(ext_parser)
429                   == XML_ERROR_AMPLIFICATION_LIMIT_BREACH);
430     }
431 
432     XML_ParserFree(ext_parser);
433     XML_ParserFree(parser);
434   }
435 }
436 END_TEST
437 
438 #endif // XML_GE == 1
439 
440 void
441 make_accounting_test_case(Suite *s) {
442 #if XML_GE == 1
443   TCase *tc_accounting = tcase_create("accounting tests");
444 
445   suite_add_tcase(s, tc_accounting);
446 
447   tcase_add_test(tc_accounting, test_accounting_precision);
448   tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api);
449   tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable);
450   tcase_add_test__ifdef_xml_dtd(tc_accounting,
451                                 test_amplification_isolated_external_parser);
452 #else
453   UNUSED_P(s);
454 #endif /* XML_GE == 1 */
455 }
456