1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2022 Mark Brand <markbrand@google.com>
10 Copyright (c) 2025 Sebastian Pipping <sebastian@pipping.org>
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #if defined(NDEBUG)
34 # undef NDEBUG // because checks below rely on assert(...)
35 #endif
36
37 #include <assert.h>
38 #include <stdint.h>
39 #include <vector>
40
41 #include "expat.h"
42 #include "xml_lpm_fuzzer.pb.h"
43 #include "src/libfuzzer/libfuzzer_macro.h"
44
45 static const char *g_encoding = nullptr;
46 static const char *g_external_entity = nullptr;
47 static size_t g_external_entity_size = 0;
48
49 void
SetEncoding(const xml_lpm_fuzzer::Encoding & e)50 SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
51 switch (e) {
52 case xml_lpm_fuzzer::Encoding::UTF8:
53 g_encoding = "UTF-8";
54 break;
55
56 case xml_lpm_fuzzer::Encoding::UTF16:
57 g_encoding = "UTF-16";
58 break;
59
60 case xml_lpm_fuzzer::Encoding::ISO88591:
61 g_encoding = "ISO-8859-1";
62 break;
63
64 case xml_lpm_fuzzer::Encoding::ASCII:
65 g_encoding = "US-ASCII";
66 break;
67
68 case xml_lpm_fuzzer::Encoding::NONE:
69 g_encoding = NULL;
70 break;
71
72 default:
73 g_encoding = "UNKNOWN";
74 break;
75 }
76 }
77
78 static int g_allocation_count = 0;
79 static std::vector<int> g_fail_allocations = {};
80
81 void *
MallocHook(size_t size)82 MallocHook(size_t size) {
83 g_allocation_count += 1;
84 for (auto index : g_fail_allocations) {
85 if (index == g_allocation_count) {
86 return NULL;
87 }
88 }
89 return malloc(size);
90 }
91
92 void *
ReallocHook(void * ptr,size_t size)93 ReallocHook(void *ptr, size_t size) {
94 g_allocation_count += 1;
95 for (auto index : g_fail_allocations) {
96 if (index == g_allocation_count) {
97 return NULL;
98 }
99 }
100 return realloc(ptr, size);
101 }
102
103 void
FreeHook(void * ptr)104 FreeHook(void *ptr) {
105 free(ptr);
106 }
107
108 XML_Memory_Handling_Suite memory_handling_suite
109 = {MallocHook, ReallocHook, FreeHook};
110
111 void InitializeParser(XML_Parser parser);
112
113 // We want a parse function that supports resumption, so that we can cover the
114 // suspend/resume code.
115 enum XML_Status
Parse(XML_Parser parser,const char * input,int input_len,int is_final)116 Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
117 enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
118 while (status == XML_STATUS_SUSPENDED) {
119 status = XML_ResumeParser(parser);
120 }
121 return status;
122 }
123
124 // When the fuzzer is compiled with instrumentation such as ASan, then the
125 // accesses in TouchString will fault if they access invalid memory (ie. detect
126 // either a use-after-free or buffer-overflow). By calling TouchString in each
127 // of the callbacks, we can check that the arguments meet the API specifications
128 // in terms of length/null-termination. no_optimize is used to ensure that the
129 // compiler has to emit actual memory reads, instead of removing them.
130 static volatile size_t no_optimize = 0;
131 static void
TouchString(const XML_Char * ptr,int len=-1)132 TouchString(const XML_Char *ptr, int len = -1) {
133 if (! ptr) {
134 return;
135 }
136
137 if (len == -1) {
138 for (XML_Char value = *ptr++; value; value = *ptr++) {
139 no_optimize += value;
140 }
141 } else {
142 for (int i = 0; i < len; ++i) {
143 no_optimize += ptr[i];
144 }
145 }
146 }
147
148 static void
TouchNodeAndRecurse(XML_Content * content)149 TouchNodeAndRecurse(XML_Content *content) {
150 switch (content->type) {
151 case XML_CTYPE_EMPTY:
152 case XML_CTYPE_ANY:
153 assert(content->quant == XML_CQUANT_NONE);
154 assert(content->name == NULL);
155 assert(content->numchildren == 0);
156 assert(content->children == NULL);
157 break;
158
159 case XML_CTYPE_MIXED:
160 assert(content->quant == XML_CQUANT_NONE
161 || content->quant == XML_CQUANT_REP);
162 assert(content->name == NULL);
163 for (unsigned int i = 0; i < content->numchildren; ++i) {
164 assert(content->children[i].type == XML_CTYPE_NAME);
165 assert(content->children[i].quant == XML_CQUANT_NONE);
166 assert(content->children[i].numchildren == 0);
167 assert(content->children[i].children == NULL);
168 TouchString(content->children[i].name);
169 }
170 break;
171
172 case XML_CTYPE_NAME:
173 assert((content->quant == XML_CQUANT_NONE)
174 || (content->quant == XML_CQUANT_OPT)
175 || (content->quant == XML_CQUANT_REP)
176 || (content->quant == XML_CQUANT_PLUS));
177 assert(content->numchildren == 0);
178 assert(content->children == NULL);
179 TouchString(content->name);
180 break;
181
182 case XML_CTYPE_CHOICE:
183 case XML_CTYPE_SEQ:
184 assert((content->quant == XML_CQUANT_NONE)
185 || (content->quant == XML_CQUANT_OPT)
186 || (content->quant == XML_CQUANT_REP)
187 || (content->quant == XML_CQUANT_PLUS));
188 assert(content->name == NULL);
189 for (unsigned int i = 0; i < content->numchildren; ++i) {
190 TouchNodeAndRecurse(&content->children[i]);
191 }
192 break;
193
194 default:
195 assert(false);
196 }
197 }
198
199 static void XMLCALL
ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)200 ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
201 TouchString(name);
202 TouchNodeAndRecurse(model);
203 XML_FreeContentModel((XML_Parser)userData, model);
204 }
205
206 static void XMLCALL
AttlistDeclHandler(void * userData,const XML_Char * elname,const XML_Char * attname,const XML_Char * atttype,const XML_Char * dflt,int isrequired)207 AttlistDeclHandler(void *userData, const XML_Char *elname,
208 const XML_Char *attname, const XML_Char *atttype,
209 const XML_Char *dflt, int isrequired) {
210 (void)userData;
211 TouchString(elname);
212 TouchString(attname);
213 TouchString(atttype);
214 TouchString(dflt);
215 (void)isrequired;
216 }
217
218 static void XMLCALL
XmlDeclHandler(void * userData,const XML_Char * version,const XML_Char * encoding,int standalone)219 XmlDeclHandler(void *userData, const XML_Char *version,
220 const XML_Char *encoding, int standalone) {
221 (void)userData;
222 TouchString(version);
223 TouchString(encoding);
224 (void)standalone;
225 }
226
227 static void XMLCALL
StartElementHandler(void * userData,const XML_Char * name,const XML_Char ** atts)228 StartElementHandler(void *userData, const XML_Char *name,
229 const XML_Char **atts) {
230 (void)userData;
231 TouchString(name);
232 for (size_t i = 0; atts[i] != NULL; ++i) {
233 TouchString(atts[i]);
234 }
235 }
236
237 static void XMLCALL
EndElementHandler(void * userData,const XML_Char * name)238 EndElementHandler(void *userData, const XML_Char *name) {
239 (void)userData;
240 TouchString(name);
241 }
242
243 static void XMLCALL
CharacterDataHandler(void * userData,const XML_Char * s,int len)244 CharacterDataHandler(void *userData, const XML_Char *s, int len) {
245 (void)userData;
246 TouchString(s, len);
247 }
248
249 static void XMLCALL
ProcessingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)250 ProcessingInstructionHandler(void *userData, const XML_Char *target,
251 const XML_Char *data) {
252 (void)userData;
253 TouchString(target);
254 TouchString(data);
255 }
256
257 static void XMLCALL
CommentHandler(void * userData,const XML_Char * data)258 CommentHandler(void *userData, const XML_Char *data) {
259 TouchString(data);
260 // Use the comment handler to trigger parser suspend, so that we can get
261 // coverage of that code.
262 XML_StopParser((XML_Parser)userData, XML_TRUE);
263 }
264
265 static void XMLCALL
StartCdataSectionHandler(void * userData)266 StartCdataSectionHandler(void *userData) {
267 (void)userData;
268 }
269
270 static void XMLCALL
EndCdataSectionHandler(void * userData)271 EndCdataSectionHandler(void *userData) {
272 (void)userData;
273 }
274
275 static void XMLCALL
DefaultHandler(void * userData,const XML_Char * s,int len)276 DefaultHandler(void *userData, const XML_Char *s, int len) {
277 (void)userData;
278 TouchString(s, len);
279 }
280
281 static void XMLCALL
StartDoctypeDeclHandler(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)282 StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
283 const XML_Char *sysid, const XML_Char *pubid,
284 int has_internal_subset) {
285 (void)userData;
286 TouchString(doctypeName);
287 TouchString(sysid);
288 TouchString(pubid);
289 (void)has_internal_subset;
290 }
291
292 static void XMLCALL
EndDoctypeDeclHandler(void * userData)293 EndDoctypeDeclHandler(void *userData) {
294 (void)userData;
295 }
296
297 static void XMLCALL
EntityDeclHandler(void * userData,const XML_Char * entityName,int is_parameter_entity,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)298 EntityDeclHandler(void *userData, const XML_Char *entityName,
299 int is_parameter_entity, const XML_Char *value,
300 int value_length, const XML_Char *base,
301 const XML_Char *systemId, const XML_Char *publicId,
302 const XML_Char *notationName) {
303 (void)userData;
304 TouchString(entityName);
305 (void)is_parameter_entity;
306 TouchString(value, value_length);
307 TouchString(base);
308 TouchString(systemId);
309 TouchString(publicId);
310 TouchString(notationName);
311 }
312
313 static void XMLCALL
NotationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)314 NotationDeclHandler(void *userData, const XML_Char *notationName,
315 const XML_Char *base, const XML_Char *systemId,
316 const XML_Char *publicId) {
317 (void)userData;
318 TouchString(notationName);
319 TouchString(base);
320 TouchString(systemId);
321 TouchString(publicId);
322 }
323
324 static void XMLCALL
StartNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)325 StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
326 const XML_Char *uri) {
327 (void)userData;
328 TouchString(prefix);
329 TouchString(uri);
330 }
331
332 static void XMLCALL
EndNamespaceDeclHandler(void * userData,const XML_Char * prefix)333 EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
334 (void)userData;
335 TouchString(prefix);
336 }
337
338 static int XMLCALL
NotStandaloneHandler(void * userData)339 NotStandaloneHandler(void *userData) {
340 (void)userData;
341 return XML_STATUS_OK;
342 }
343
344 static int XMLCALL
ExternalEntityRefHandler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)345 ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
346 const XML_Char *base, const XML_Char *systemId,
347 const XML_Char *publicId) {
348 int rc = XML_STATUS_ERROR;
349 TouchString(context);
350 TouchString(base);
351 TouchString(systemId);
352 TouchString(publicId);
353
354 if (g_external_entity) {
355 XML_Parser ext_parser
356 = XML_ExternalEntityParserCreate(parser, context, g_encoding);
357 rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
358 XML_ParserFree(ext_parser);
359 }
360
361 return rc;
362 }
363
364 static void XMLCALL
SkippedEntityHandler(void * userData,const XML_Char * entityName,int is_parameter_entity)365 SkippedEntityHandler(void *userData, const XML_Char *entityName,
366 int is_parameter_entity) {
367 (void)userData;
368 TouchString(entityName);
369 (void)is_parameter_entity;
370 }
371
372 static int XMLCALL
UnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)373 UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
374 XML_Encoding *info) {
375 (void)encodingHandlerData;
376 TouchString(name);
377 (void)info;
378 return XML_STATUS_ERROR;
379 }
380
381 void
InitializeParser(XML_Parser parser)382 InitializeParser(XML_Parser parser) {
383 XML_SetUserData(parser, (void *)parser);
384 XML_SetHashSalt(parser, 0x41414141);
385 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
386
387 XML_SetElementDeclHandler(parser, ElementDeclHandler);
388 XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
389 XML_SetXmlDeclHandler(parser, XmlDeclHandler);
390 XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
391 XML_SetCharacterDataHandler(parser, CharacterDataHandler);
392 XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
393 XML_SetCommentHandler(parser, CommentHandler);
394 XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
395 EndCdataSectionHandler);
396 // XML_SetDefaultHandler disables entity expansion
397 XML_SetDefaultHandlerExpand(parser, DefaultHandler);
398 XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
399 EndDoctypeDeclHandler);
400 // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
401 // and there isn't any significant code change between the two.
402 XML_SetEntityDeclHandler(parser, EntityDeclHandler);
403 XML_SetNotationDeclHandler(parser, NotationDeclHandler);
404 XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
405 EndNamespaceDeclHandler);
406 XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
407 XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
408 XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
409 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
410 }
411
DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase & testcase)412 DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
413 g_external_entity = nullptr;
414
415 if (! testcase.actions_size()) {
416 return;
417 }
418
419 g_allocation_count = 0;
420 g_fail_allocations.clear();
421 for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
422 g_fail_allocations.push_back(testcase.fail_allocations(i));
423 }
424
425 SetEncoding(testcase.encoding());
426 XML_Parser parser
427 = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
428 InitializeParser(parser);
429
430 for (int i = 0; i < testcase.actions_size(); ++i) {
431 const auto &action = testcase.actions(i);
432 switch (action.action_case()) {
433 case xml_lpm_fuzzer::Action::kChunk:
434 if (XML_STATUS_ERROR
435 == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
436 // Force a reset after parse error.
437 XML_ParserReset(parser, g_encoding);
438 InitializeParser(parser);
439 }
440 break;
441
442 case xml_lpm_fuzzer::Action::kLastChunk:
443 Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
444 XML_ParserReset(parser, g_encoding);
445 InitializeParser(parser);
446 break;
447
448 case xml_lpm_fuzzer::Action::kReset:
449 XML_ParserReset(parser, g_encoding);
450 InitializeParser(parser);
451 break;
452
453 case xml_lpm_fuzzer::Action::kExternalEntity:
454 g_external_entity = action.external_entity().data();
455 g_external_entity_size = action.external_entity().size();
456 break;
457
458 default:
459 break;
460 }
461 }
462
463 XML_ParserFree(parser);
464 }
465