xref: /freebsd/contrib/expat/fuzz/xml_lpm_fuzzer.cpp (revision 627b778d9e6b603a44a010d22d823ca7c392b363)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 2022 Mark Brand <markbrand@google.com>
10    Copyright (c) 2025 Sebastian Pipping <sebastian@pipping.org>
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #if defined(NDEBUG)
34 #  undef NDEBUG // because checks below rely on assert(...)
35 #endif
36 
37 #include <assert.h>
38 #include <stdint.h>
39 #include <vector>
40 
41 #include "expat.h"
42 #include "xml_lpm_fuzzer.pb.h"
43 #include "src/libfuzzer/libfuzzer_macro.h"
44 
45 static const char *g_encoding = nullptr;
46 static const char *g_external_entity = nullptr;
47 static size_t g_external_entity_size = 0;
48 
49 void
SetEncoding(const xml_lpm_fuzzer::Encoding & e)50 SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
51   switch (e) {
52   case xml_lpm_fuzzer::Encoding::UTF8:
53     g_encoding = "UTF-8";
54     break;
55 
56   case xml_lpm_fuzzer::Encoding::UTF16:
57     g_encoding = "UTF-16";
58     break;
59 
60   case xml_lpm_fuzzer::Encoding::ISO88591:
61     g_encoding = "ISO-8859-1";
62     break;
63 
64   case xml_lpm_fuzzer::Encoding::ASCII:
65     g_encoding = "US-ASCII";
66     break;
67 
68   case xml_lpm_fuzzer::Encoding::NONE:
69     g_encoding = NULL;
70     break;
71 
72   default:
73     g_encoding = "UNKNOWN";
74     break;
75   }
76 }
77 
78 static int g_allocation_count = 0;
79 static std::vector<int> g_fail_allocations = {};
80 
81 void *
MallocHook(size_t size)82 MallocHook(size_t size) {
83   g_allocation_count += 1;
84   for (auto index : g_fail_allocations) {
85     if (index == g_allocation_count) {
86       return NULL;
87     }
88   }
89   return malloc(size);
90 }
91 
92 void *
ReallocHook(void * ptr,size_t size)93 ReallocHook(void *ptr, size_t size) {
94   g_allocation_count += 1;
95   for (auto index : g_fail_allocations) {
96     if (index == g_allocation_count) {
97       return NULL;
98     }
99   }
100   return realloc(ptr, size);
101 }
102 
103 void
FreeHook(void * ptr)104 FreeHook(void *ptr) {
105   free(ptr);
106 }
107 
108 XML_Memory_Handling_Suite memory_handling_suite
109     = {MallocHook, ReallocHook, FreeHook};
110 
111 void InitializeParser(XML_Parser parser);
112 
113 // We want a parse function that supports resumption, so that we can cover the
114 // suspend/resume code.
115 enum XML_Status
Parse(XML_Parser parser,const char * input,int input_len,int is_final)116 Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
117   enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
118   while (status == XML_STATUS_SUSPENDED) {
119     status = XML_ResumeParser(parser);
120   }
121   return status;
122 }
123 
124 // When the fuzzer is compiled with instrumentation such as ASan, then the
125 // accesses in TouchString will fault if they access invalid memory (ie. detect
126 // either a use-after-free or buffer-overflow). By calling TouchString in each
127 // of the callbacks, we can check that the arguments meet the API specifications
128 // in terms of length/null-termination. no_optimize is used to ensure that the
129 // compiler has to emit actual memory reads, instead of removing them.
130 static volatile size_t no_optimize = 0;
131 static void
TouchString(const XML_Char * ptr,int len=-1)132 TouchString(const XML_Char *ptr, int len = -1) {
133   if (! ptr) {
134     return;
135   }
136 
137   if (len == -1) {
138     for (XML_Char value = *ptr++; value; value = *ptr++) {
139       no_optimize += value;
140     }
141   } else {
142     for (int i = 0; i < len; ++i) {
143       no_optimize += ptr[i];
144     }
145   }
146 }
147 
148 static void
TouchNodeAndRecurse(XML_Content * content)149 TouchNodeAndRecurse(XML_Content *content) {
150   switch (content->type) {
151   case XML_CTYPE_EMPTY:
152   case XML_CTYPE_ANY:
153     assert(content->quant == XML_CQUANT_NONE);
154     assert(content->name == NULL);
155     assert(content->numchildren == 0);
156     assert(content->children == NULL);
157     break;
158 
159   case XML_CTYPE_MIXED:
160     assert(content->quant == XML_CQUANT_NONE
161            || content->quant == XML_CQUANT_REP);
162     assert(content->name == NULL);
163     for (unsigned int i = 0; i < content->numchildren; ++i) {
164       assert(content->children[i].type == XML_CTYPE_NAME);
165       assert(content->children[i].quant == XML_CQUANT_NONE);
166       assert(content->children[i].numchildren == 0);
167       assert(content->children[i].children == NULL);
168       TouchString(content->children[i].name);
169     }
170     break;
171 
172   case XML_CTYPE_NAME:
173     assert((content->quant == XML_CQUANT_NONE)
174            || (content->quant == XML_CQUANT_OPT)
175            || (content->quant == XML_CQUANT_REP)
176            || (content->quant == XML_CQUANT_PLUS));
177     assert(content->numchildren == 0);
178     assert(content->children == NULL);
179     TouchString(content->name);
180     break;
181 
182   case XML_CTYPE_CHOICE:
183   case XML_CTYPE_SEQ:
184     assert((content->quant == XML_CQUANT_NONE)
185            || (content->quant == XML_CQUANT_OPT)
186            || (content->quant == XML_CQUANT_REP)
187            || (content->quant == XML_CQUANT_PLUS));
188     assert(content->name == NULL);
189     for (unsigned int i = 0; i < content->numchildren; ++i) {
190       TouchNodeAndRecurse(&content->children[i]);
191     }
192     break;
193 
194   default:
195     assert(false);
196   }
197 }
198 
199 static void XMLCALL
ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)200 ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
201   TouchString(name);
202   TouchNodeAndRecurse(model);
203   XML_FreeContentModel((XML_Parser)userData, model);
204 }
205 
206 static void XMLCALL
AttlistDeclHandler(void * userData,const XML_Char * elname,const XML_Char * attname,const XML_Char * atttype,const XML_Char * dflt,int isrequired)207 AttlistDeclHandler(void *userData, const XML_Char *elname,
208                    const XML_Char *attname, const XML_Char *atttype,
209                    const XML_Char *dflt, int isrequired) {
210   (void)userData;
211   TouchString(elname);
212   TouchString(attname);
213   TouchString(atttype);
214   TouchString(dflt);
215   (void)isrequired;
216 }
217 
218 static void XMLCALL
XmlDeclHandler(void * userData,const XML_Char * version,const XML_Char * encoding,int standalone)219 XmlDeclHandler(void *userData, const XML_Char *version,
220                const XML_Char *encoding, int standalone) {
221   (void)userData;
222   TouchString(version);
223   TouchString(encoding);
224   (void)standalone;
225 }
226 
227 static void XMLCALL
StartElementHandler(void * userData,const XML_Char * name,const XML_Char ** atts)228 StartElementHandler(void *userData, const XML_Char *name,
229                     const XML_Char **atts) {
230   (void)userData;
231   TouchString(name);
232   for (size_t i = 0; atts[i] != NULL; ++i) {
233     TouchString(atts[i]);
234   }
235 }
236 
237 static void XMLCALL
EndElementHandler(void * userData,const XML_Char * name)238 EndElementHandler(void *userData, const XML_Char *name) {
239   (void)userData;
240   TouchString(name);
241 }
242 
243 static void XMLCALL
CharacterDataHandler(void * userData,const XML_Char * s,int len)244 CharacterDataHandler(void *userData, const XML_Char *s, int len) {
245   (void)userData;
246   TouchString(s, len);
247 }
248 
249 static void XMLCALL
ProcessingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)250 ProcessingInstructionHandler(void *userData, const XML_Char *target,
251                              const XML_Char *data) {
252   (void)userData;
253   TouchString(target);
254   TouchString(data);
255 }
256 
257 static void XMLCALL
CommentHandler(void * userData,const XML_Char * data)258 CommentHandler(void *userData, const XML_Char *data) {
259   TouchString(data);
260   // Use the comment handler to trigger parser suspend, so that we can get
261   // coverage of that code.
262   XML_StopParser((XML_Parser)userData, XML_TRUE);
263 }
264 
265 static void XMLCALL
StartCdataSectionHandler(void * userData)266 StartCdataSectionHandler(void *userData) {
267   (void)userData;
268 }
269 
270 static void XMLCALL
EndCdataSectionHandler(void * userData)271 EndCdataSectionHandler(void *userData) {
272   (void)userData;
273 }
274 
275 static void XMLCALL
DefaultHandler(void * userData,const XML_Char * s,int len)276 DefaultHandler(void *userData, const XML_Char *s, int len) {
277   (void)userData;
278   TouchString(s, len);
279 }
280 
281 static void XMLCALL
StartDoctypeDeclHandler(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)282 StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
283                         const XML_Char *sysid, const XML_Char *pubid,
284                         int has_internal_subset) {
285   (void)userData;
286   TouchString(doctypeName);
287   TouchString(sysid);
288   TouchString(pubid);
289   (void)has_internal_subset;
290 }
291 
292 static void XMLCALL
EndDoctypeDeclHandler(void * userData)293 EndDoctypeDeclHandler(void *userData) {
294   (void)userData;
295 }
296 
297 static void XMLCALL
EntityDeclHandler(void * userData,const XML_Char * entityName,int is_parameter_entity,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)298 EntityDeclHandler(void *userData, const XML_Char *entityName,
299                   int is_parameter_entity, const XML_Char *value,
300                   int value_length, const XML_Char *base,
301                   const XML_Char *systemId, const XML_Char *publicId,
302                   const XML_Char *notationName) {
303   (void)userData;
304   TouchString(entityName);
305   (void)is_parameter_entity;
306   TouchString(value, value_length);
307   TouchString(base);
308   TouchString(systemId);
309   TouchString(publicId);
310   TouchString(notationName);
311 }
312 
313 static void XMLCALL
NotationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)314 NotationDeclHandler(void *userData, const XML_Char *notationName,
315                     const XML_Char *base, const XML_Char *systemId,
316                     const XML_Char *publicId) {
317   (void)userData;
318   TouchString(notationName);
319   TouchString(base);
320   TouchString(systemId);
321   TouchString(publicId);
322 }
323 
324 static void XMLCALL
StartNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)325 StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
326                           const XML_Char *uri) {
327   (void)userData;
328   TouchString(prefix);
329   TouchString(uri);
330 }
331 
332 static void XMLCALL
EndNamespaceDeclHandler(void * userData,const XML_Char * prefix)333 EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
334   (void)userData;
335   TouchString(prefix);
336 }
337 
338 static int XMLCALL
NotStandaloneHandler(void * userData)339 NotStandaloneHandler(void *userData) {
340   (void)userData;
341   return XML_STATUS_OK;
342 }
343 
344 static int XMLCALL
ExternalEntityRefHandler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)345 ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
346                          const XML_Char *base, const XML_Char *systemId,
347                          const XML_Char *publicId) {
348   int rc = XML_STATUS_ERROR;
349   TouchString(context);
350   TouchString(base);
351   TouchString(systemId);
352   TouchString(publicId);
353 
354   if (g_external_entity) {
355     XML_Parser ext_parser
356         = XML_ExternalEntityParserCreate(parser, context, g_encoding);
357     if (ext_parser != NULL) {
358       rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
359       XML_ParserFree(ext_parser);
360     }
361   }
362 
363   return rc;
364 }
365 
366 static void XMLCALL
SkippedEntityHandler(void * userData,const XML_Char * entityName,int is_parameter_entity)367 SkippedEntityHandler(void *userData, const XML_Char *entityName,
368                      int is_parameter_entity) {
369   (void)userData;
370   TouchString(entityName);
371   (void)is_parameter_entity;
372 }
373 
374 static int XMLCALL
UnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)375 UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
376                        XML_Encoding *info) {
377   (void)encodingHandlerData;
378   TouchString(name);
379   (void)info;
380   return XML_STATUS_ERROR;
381 }
382 
383 void
InitializeParser(XML_Parser parser)384 InitializeParser(XML_Parser parser) {
385   XML_SetUserData(parser, (void *)parser);
386   XML_SetHashSalt(parser, 0x41414141);
387   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
388 
389   XML_SetElementDeclHandler(parser, ElementDeclHandler);
390   XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
391   XML_SetXmlDeclHandler(parser, XmlDeclHandler);
392   XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
393   XML_SetCharacterDataHandler(parser, CharacterDataHandler);
394   XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
395   XML_SetCommentHandler(parser, CommentHandler);
396   XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
397                              EndCdataSectionHandler);
398   // XML_SetDefaultHandler disables entity expansion
399   XML_SetDefaultHandlerExpand(parser, DefaultHandler);
400   XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
401                             EndDoctypeDeclHandler);
402   // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
403   //       and there isn't any significant code change between the two.
404   XML_SetEntityDeclHandler(parser, EntityDeclHandler);
405   XML_SetNotationDeclHandler(parser, NotationDeclHandler);
406   XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
407                               EndNamespaceDeclHandler);
408   XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
409   XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
410   XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
411   XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
412 }
413 
DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase & testcase)414 DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
415   g_external_entity = nullptr;
416 
417   if (! testcase.actions_size()) {
418     return;
419   }
420 
421   g_allocation_count = 0;
422   g_fail_allocations.clear();
423   for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
424     g_fail_allocations.push_back(testcase.fail_allocations(i));
425   }
426 
427   SetEncoding(testcase.encoding());
428   XML_Parser parser
429       = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
430   InitializeParser(parser);
431 
432   for (int i = 0; i < testcase.actions_size(); ++i) {
433     const auto &action = testcase.actions(i);
434     switch (action.action_case()) {
435     case xml_lpm_fuzzer::Action::kChunk:
436       if (XML_STATUS_ERROR
437           == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
438         // Force a reset after parse error.
439         XML_ParserReset(parser, g_encoding);
440         InitializeParser(parser);
441       }
442       break;
443 
444     case xml_lpm_fuzzer::Action::kLastChunk:
445       Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
446       XML_ParserReset(parser, g_encoding);
447       InitializeParser(parser);
448       break;
449 
450     case xml_lpm_fuzzer::Action::kReset:
451       XML_ParserReset(parser, g_encoding);
452       InitializeParser(parser);
453       break;
454 
455     case xml_lpm_fuzzer::Action::kExternalEntity:
456       g_external_entity = action.external_entity().data();
457       g_external_entity_size = action.external_entity().size();
458       break;
459 
460     default:
461       break;
462     }
463   }
464 
465   XML_ParserFree(parser);
466 }
467