xref: /freebsd/contrib/expat/fuzz/xml_lpm_fuzzer.cpp (revision fe9278888fd4414abe2d922e469cf608005f4c65)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2022 Mark Brand <markbrand@google.com>
10    Copyright (c) 2025 Sebastian Pipping <sebastian@pipping.org>
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #if defined(NDEBUG)
34 #  undef NDEBUG // because checks below rely on assert(...)
35 #endif
36 
37 #include <assert.h>
38 #include <stdint.h>
39 #include <vector>
40 
41 #include "expat.h"
42 #include "xml_lpm_fuzzer.pb.h"
43 #include "src/libfuzzer/libfuzzer_macro.h"
44 
45 static const char *g_encoding = nullptr;
46 static const char *g_external_entity = nullptr;
47 static size_t g_external_entity_size = 0;
48 
49 void
SetEncoding(const xml_lpm_fuzzer::Encoding & e)50 SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
51   switch (e) {
52   case xml_lpm_fuzzer::Encoding::UTF8:
53     g_encoding = "UTF-8";
54     break;
55 
56   case xml_lpm_fuzzer::Encoding::UTF16:
57     g_encoding = "UTF-16";
58     break;
59 
60   case xml_lpm_fuzzer::Encoding::ISO88591:
61     g_encoding = "ISO-8859-1";
62     break;
63 
64   case xml_lpm_fuzzer::Encoding::ASCII:
65     g_encoding = "US-ASCII";
66     break;
67 
68   case xml_lpm_fuzzer::Encoding::NONE:
69     g_encoding = NULL;
70     break;
71 
72   default:
73     g_encoding = "UNKNOWN";
74     break;
75   }
76 }
77 
78 static int g_allocation_count = 0;
79 static std::vector<int> g_fail_allocations = {};
80 
81 void *
MallocHook(size_t size)82 MallocHook(size_t size) {
83   g_allocation_count += 1;
84   for (auto index : g_fail_allocations) {
85     if (index == g_allocation_count) {
86       return NULL;
87     }
88   }
89   return malloc(size);
90 }
91 
92 void *
ReallocHook(void * ptr,size_t size)93 ReallocHook(void *ptr, size_t size) {
94   g_allocation_count += 1;
95   for (auto index : g_fail_allocations) {
96     if (index == g_allocation_count) {
97       return NULL;
98     }
99   }
100   return realloc(ptr, size);
101 }
102 
103 void
FreeHook(void * ptr)104 FreeHook(void *ptr) {
105   free(ptr);
106 }
107 
108 XML_Memory_Handling_Suite memory_handling_suite
109     = {MallocHook, ReallocHook, FreeHook};
110 
111 void InitializeParser(XML_Parser parser);
112 
113 // We want a parse function that supports resumption, so that we can cover the
114 // suspend/resume code.
115 enum XML_Status
Parse(XML_Parser parser,const char * input,int input_len,int is_final)116 Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
117   enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
118   while (status == XML_STATUS_SUSPENDED) {
119     status = XML_ResumeParser(parser);
120   }
121   return status;
122 }
123 
124 // When the fuzzer is compiled with instrumentation such as ASan, then the
125 // accesses in TouchString will fault if they access invalid memory (ie. detect
126 // either a use-after-free or buffer-overflow). By calling TouchString in each
127 // of the callbacks, we can check that the arguments meet the API specifications
128 // in terms of length/null-termination. no_optimize is used to ensure that the
129 // compiler has to emit actual memory reads, instead of removing them.
130 static volatile size_t no_optimize = 0;
131 static void
TouchString(const XML_Char * ptr,int len=-1)132 TouchString(const XML_Char *ptr, int len = -1) {
133   if (! ptr) {
134     return;
135   }
136 
137   if (len == -1) {
138     for (XML_Char value = *ptr++; value; value = *ptr++) {
139       no_optimize += value;
140     }
141   } else {
142     for (int i = 0; i < len; ++i) {
143       no_optimize += ptr[i];
144     }
145   }
146 }
147 
148 static void
TouchNodeAndRecurse(XML_Content * content)149 TouchNodeAndRecurse(XML_Content *content) {
150   switch (content->type) {
151   case XML_CTYPE_EMPTY:
152   case XML_CTYPE_ANY:
153     assert(content->quant == XML_CQUANT_NONE);
154     assert(content->name == NULL);
155     assert(content->numchildren == 0);
156     assert(content->children == NULL);
157     break;
158 
159   case XML_CTYPE_MIXED:
160     assert(content->quant == XML_CQUANT_NONE
161            || content->quant == XML_CQUANT_REP);
162     assert(content->name == NULL);
163     for (unsigned int i = 0; i < content->numchildren; ++i) {
164       assert(content->children[i].type == XML_CTYPE_NAME);
165       assert(content->children[i].quant == XML_CQUANT_NONE);
166       assert(content->children[i].numchildren == 0);
167       assert(content->children[i].children == NULL);
168       TouchString(content->children[i].name);
169     }
170     break;
171 
172   case XML_CTYPE_NAME:
173     assert((content->quant == XML_CQUANT_NONE)
174            || (content->quant == XML_CQUANT_OPT)
175            || (content->quant == XML_CQUANT_REP)
176            || (content->quant == XML_CQUANT_PLUS));
177     assert(content->numchildren == 0);
178     assert(content->children == NULL);
179     TouchString(content->name);
180     break;
181 
182   case XML_CTYPE_CHOICE:
183   case XML_CTYPE_SEQ:
184     assert((content->quant == XML_CQUANT_NONE)
185            || (content->quant == XML_CQUANT_OPT)
186            || (content->quant == XML_CQUANT_REP)
187            || (content->quant == XML_CQUANT_PLUS));
188     assert(content->name == NULL);
189     for (unsigned int i = 0; i < content->numchildren; ++i) {
190       TouchNodeAndRecurse(&content->children[i]);
191     }
192     break;
193 
194   default:
195     assert(false);
196   }
197 }
198 
199 static void XMLCALL
ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)200 ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
201   TouchString(name);
202   TouchNodeAndRecurse(model);
203   XML_FreeContentModel((XML_Parser)userData, model);
204 }
205 
206 static void XMLCALL
AttlistDeclHandler(void * userData,const XML_Char * elname,const XML_Char * attname,const XML_Char * atttype,const XML_Char * dflt,int isrequired)207 AttlistDeclHandler(void *userData, const XML_Char *elname,
208                    const XML_Char *attname, const XML_Char *atttype,
209                    const XML_Char *dflt, int isrequired) {
210   (void)userData;
211   TouchString(elname);
212   TouchString(attname);
213   TouchString(atttype);
214   TouchString(dflt);
215   (void)isrequired;
216 }
217 
218 static void XMLCALL
XmlDeclHandler(void * userData,const XML_Char * version,const XML_Char * encoding,int standalone)219 XmlDeclHandler(void *userData, const XML_Char *version,
220                const XML_Char *encoding, int standalone) {
221   (void)userData;
222   TouchString(version);
223   TouchString(encoding);
224   (void)standalone;
225 }
226 
227 static void XMLCALL
StartElementHandler(void * userData,const XML_Char * name,const XML_Char ** atts)228 StartElementHandler(void *userData, const XML_Char *name,
229                     const XML_Char **atts) {
230   (void)userData;
231   TouchString(name);
232   for (size_t i = 0; atts[i] != NULL; ++i) {
233     TouchString(atts[i]);
234   }
235 }
236 
237 static void XMLCALL
EndElementHandler(void * userData,const XML_Char * name)238 EndElementHandler(void *userData, const XML_Char *name) {
239   (void)userData;
240   TouchString(name);
241 }
242 
243 static void XMLCALL
CharacterDataHandler(void * userData,const XML_Char * s,int len)244 CharacterDataHandler(void *userData, const XML_Char *s, int len) {
245   (void)userData;
246   TouchString(s, len);
247 }
248 
249 static void XMLCALL
ProcessingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)250 ProcessingInstructionHandler(void *userData, const XML_Char *target,
251                              const XML_Char *data) {
252   (void)userData;
253   TouchString(target);
254   TouchString(data);
255 }
256 
257 static void XMLCALL
CommentHandler(void * userData,const XML_Char * data)258 CommentHandler(void *userData, const XML_Char *data) {
259   TouchString(data);
260   // Use the comment handler to trigger parser suspend, so that we can get
261   // coverage of that code.
262   XML_StopParser((XML_Parser)userData, XML_TRUE);
263 }
264 
265 static void XMLCALL
StartCdataSectionHandler(void * userData)266 StartCdataSectionHandler(void *userData) {
267   (void)userData;
268 }
269 
270 static void XMLCALL
EndCdataSectionHandler(void * userData)271 EndCdataSectionHandler(void *userData) {
272   (void)userData;
273 }
274 
275 static void XMLCALL
DefaultHandler(void * userData,const XML_Char * s,int len)276 DefaultHandler(void *userData, const XML_Char *s, int len) {
277   (void)userData;
278   TouchString(s, len);
279 }
280 
281 static void XMLCALL
StartDoctypeDeclHandler(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)282 StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
283                         const XML_Char *sysid, const XML_Char *pubid,
284                         int has_internal_subset) {
285   (void)userData;
286   TouchString(doctypeName);
287   TouchString(sysid);
288   TouchString(pubid);
289   (void)has_internal_subset;
290 }
291 
292 static void XMLCALL
EndDoctypeDeclHandler(void * userData)293 EndDoctypeDeclHandler(void *userData) {
294   (void)userData;
295 }
296 
297 static void XMLCALL
EntityDeclHandler(void * userData,const XML_Char * entityName,int is_parameter_entity,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)298 EntityDeclHandler(void *userData, const XML_Char *entityName,
299                   int is_parameter_entity, const XML_Char *value,
300                   int value_length, const XML_Char *base,
301                   const XML_Char *systemId, const XML_Char *publicId,
302                   const XML_Char *notationName) {
303   (void)userData;
304   TouchString(entityName);
305   (void)is_parameter_entity;
306   TouchString(value, value_length);
307   TouchString(base);
308   TouchString(systemId);
309   TouchString(publicId);
310   TouchString(notationName);
311 }
312 
313 static void XMLCALL
NotationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)314 NotationDeclHandler(void *userData, const XML_Char *notationName,
315                     const XML_Char *base, const XML_Char *systemId,
316                     const XML_Char *publicId) {
317   (void)userData;
318   TouchString(notationName);
319   TouchString(base);
320   TouchString(systemId);
321   TouchString(publicId);
322 }
323 
324 static void XMLCALL
StartNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)325 StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
326                           const XML_Char *uri) {
327   (void)userData;
328   TouchString(prefix);
329   TouchString(uri);
330 }
331 
332 static void XMLCALL
EndNamespaceDeclHandler(void * userData,const XML_Char * prefix)333 EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
334   (void)userData;
335   TouchString(prefix);
336 }
337 
338 static int XMLCALL
NotStandaloneHandler(void * userData)339 NotStandaloneHandler(void *userData) {
340   (void)userData;
341   return XML_STATUS_OK;
342 }
343 
344 static int XMLCALL
ExternalEntityRefHandler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)345 ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
346                          const XML_Char *base, const XML_Char *systemId,
347                          const XML_Char *publicId) {
348   int rc = XML_STATUS_ERROR;
349   TouchString(context);
350   TouchString(base);
351   TouchString(systemId);
352   TouchString(publicId);
353 
354   if (g_external_entity) {
355     XML_Parser ext_parser
356         = XML_ExternalEntityParserCreate(parser, context, g_encoding);
357     rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
358     XML_ParserFree(ext_parser);
359   }
360 
361   return rc;
362 }
363 
364 static void XMLCALL
SkippedEntityHandler(void * userData,const XML_Char * entityName,int is_parameter_entity)365 SkippedEntityHandler(void *userData, const XML_Char *entityName,
366                      int is_parameter_entity) {
367   (void)userData;
368   TouchString(entityName);
369   (void)is_parameter_entity;
370 }
371 
372 static int XMLCALL
UnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)373 UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
374                        XML_Encoding *info) {
375   (void)encodingHandlerData;
376   TouchString(name);
377   (void)info;
378   return XML_STATUS_ERROR;
379 }
380 
381 void
InitializeParser(XML_Parser parser)382 InitializeParser(XML_Parser parser) {
383   XML_SetUserData(parser, (void *)parser);
384   XML_SetHashSalt(parser, 0x41414141);
385   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
386 
387   XML_SetElementDeclHandler(parser, ElementDeclHandler);
388   XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
389   XML_SetXmlDeclHandler(parser, XmlDeclHandler);
390   XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
391   XML_SetCharacterDataHandler(parser, CharacterDataHandler);
392   XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
393   XML_SetCommentHandler(parser, CommentHandler);
394   XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
395                              EndCdataSectionHandler);
396   // XML_SetDefaultHandler disables entity expansion
397   XML_SetDefaultHandlerExpand(parser, DefaultHandler);
398   XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
399                             EndDoctypeDeclHandler);
400   // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
401   //       and there isn't any significant code change between the two.
402   XML_SetEntityDeclHandler(parser, EntityDeclHandler);
403   XML_SetNotationDeclHandler(parser, NotationDeclHandler);
404   XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
405                               EndNamespaceDeclHandler);
406   XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
407   XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
408   XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
409   XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
410 }
411 
DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase & testcase)412 DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
413   g_external_entity = nullptr;
414 
415   if (! testcase.actions_size()) {
416     return;
417   }
418 
419   g_allocation_count = 0;
420   g_fail_allocations.clear();
421   for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
422     g_fail_allocations.push_back(testcase.fail_allocations(i));
423   }
424 
425   SetEncoding(testcase.encoding());
426   XML_Parser parser
427       = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
428   InitializeParser(parser);
429 
430   for (int i = 0; i < testcase.actions_size(); ++i) {
431     const auto &action = testcase.actions(i);
432     switch (action.action_case()) {
433     case xml_lpm_fuzzer::Action::kChunk:
434       if (XML_STATUS_ERROR
435           == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
436         // Force a reset after parse error.
437         XML_ParserReset(parser, g_encoding);
438         InitializeParser(parser);
439       }
440       break;
441 
442     case xml_lpm_fuzzer::Action::kLastChunk:
443       Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
444       XML_ParserReset(parser, g_encoding);
445       InitializeParser(parser);
446       break;
447 
448     case xml_lpm_fuzzer::Action::kReset:
449       XML_ParserReset(parser, g_encoding);
450       InitializeParser(parser);
451       break;
452 
453     case xml_lpm_fuzzer::Action::kExternalEntity:
454       g_external_entity = action.external_entity().data();
455       g_external_entity_size = action.external_entity().size();
456       break;
457 
458     default:
459       break;
460     }
461   }
462 
463   XML_ParserFree(parser);
464 }
465