1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2022 Mark Brand <markbrand@google.com> 10 Copyright (c) 2025 Sebastian Pipping <sebastian@pipping.org> 11 Licensed under the MIT license: 12 13 Permission is hereby granted, free of charge, to any person obtaining 14 a copy of this software and associated documentation files (the 15 "Software"), to deal in the Software without restriction, including 16 without limitation the rights to use, copy, modify, merge, publish, 17 distribute, sublicense, and/or sell copies of the Software, and to permit 18 persons to whom the Software is furnished to do so, subject to the 19 following conditions: 20 21 The above copyright notice and this permission notice shall be included 22 in all copies or substantial portions of the Software. 23 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30 USE OR OTHER DEALINGS IN THE SOFTWARE. 31 */ 32 33 #if defined(NDEBUG) 34 # undef NDEBUG // because checks below rely on assert(...) 35 #endif 36 37 #include <assert.h> 38 #include <stdint.h> 39 #include <vector> 40 41 #include "expat.h" 42 #include "xml_lpm_fuzzer.pb.h" 43 #include "src/libfuzzer/libfuzzer_macro.h" 44 45 static const char *g_encoding = nullptr; 46 static const char *g_external_entity = nullptr; 47 static size_t g_external_entity_size = 0; 48 49 void 50 SetEncoding(const xml_lpm_fuzzer::Encoding &e) { 51 switch (e) { 52 case xml_lpm_fuzzer::Encoding::UTF8: 53 g_encoding = "UTF-8"; 54 break; 55 56 case xml_lpm_fuzzer::Encoding::UTF16: 57 g_encoding = "UTF-16"; 58 break; 59 60 case xml_lpm_fuzzer::Encoding::ISO88591: 61 g_encoding = "ISO-8859-1"; 62 break; 63 64 case xml_lpm_fuzzer::Encoding::ASCII: 65 g_encoding = "US-ASCII"; 66 break; 67 68 case xml_lpm_fuzzer::Encoding::NONE: 69 g_encoding = NULL; 70 break; 71 72 default: 73 g_encoding = "UNKNOWN"; 74 break; 75 } 76 } 77 78 static int g_allocation_count = 0; 79 static std::vector<int> g_fail_allocations = {}; 80 81 void * 82 MallocHook(size_t size) { 83 g_allocation_count += 1; 84 for (auto index : g_fail_allocations) { 85 if (index == g_allocation_count) { 86 return NULL; 87 } 88 } 89 return malloc(size); 90 } 91 92 void * 93 ReallocHook(void *ptr, size_t size) { 94 g_allocation_count += 1; 95 for (auto index : g_fail_allocations) { 96 if (index == g_allocation_count) { 97 return NULL; 98 } 99 } 100 return realloc(ptr, size); 101 } 102 103 void 104 FreeHook(void *ptr) { 105 free(ptr); 106 } 107 108 XML_Memory_Handling_Suite memory_handling_suite 109 = {MallocHook, ReallocHook, FreeHook}; 110 111 void InitializeParser(XML_Parser parser); 112 113 // We want a parse function that supports resumption, so that we can cover the 114 // suspend/resume code. 115 enum XML_Status 116 Parse(XML_Parser parser, const char *input, int input_len, int is_final) { 117 enum XML_Status status = XML_Parse(parser, input, input_len, is_final); 118 while (status == XML_STATUS_SUSPENDED) { 119 status = XML_ResumeParser(parser); 120 } 121 return status; 122 } 123 124 // When the fuzzer is compiled with instrumentation such as ASan, then the 125 // accesses in TouchString will fault if they access invalid memory (ie. detect 126 // either a use-after-free or buffer-overflow). By calling TouchString in each 127 // of the callbacks, we can check that the arguments meet the API specifications 128 // in terms of length/null-termination. no_optimize is used to ensure that the 129 // compiler has to emit actual memory reads, instead of removing them. 130 static volatile size_t no_optimize = 0; 131 static void 132 TouchString(const XML_Char *ptr, int len = -1) { 133 if (! ptr) { 134 return; 135 } 136 137 if (len == -1) { 138 for (XML_Char value = *ptr++; value; value = *ptr++) { 139 no_optimize += value; 140 } 141 } else { 142 for (int i = 0; i < len; ++i) { 143 no_optimize += ptr[i]; 144 } 145 } 146 } 147 148 static void 149 TouchNodeAndRecurse(XML_Content *content) { 150 switch (content->type) { 151 case XML_CTYPE_EMPTY: 152 case XML_CTYPE_ANY: 153 assert(content->quant == XML_CQUANT_NONE); 154 assert(content->name == NULL); 155 assert(content->numchildren == 0); 156 assert(content->children == NULL); 157 break; 158 159 case XML_CTYPE_MIXED: 160 assert(content->quant == XML_CQUANT_NONE 161 || content->quant == XML_CQUANT_REP); 162 assert(content->name == NULL); 163 for (unsigned int i = 0; i < content->numchildren; ++i) { 164 assert(content->children[i].type == XML_CTYPE_NAME); 165 assert(content->children[i].quant == XML_CQUANT_NONE); 166 assert(content->children[i].numchildren == 0); 167 assert(content->children[i].children == NULL); 168 TouchString(content->children[i].name); 169 } 170 break; 171 172 case XML_CTYPE_NAME: 173 assert((content->quant == XML_CQUANT_NONE) 174 || (content->quant == XML_CQUANT_OPT) 175 || (content->quant == XML_CQUANT_REP) 176 || (content->quant == XML_CQUANT_PLUS)); 177 assert(content->numchildren == 0); 178 assert(content->children == NULL); 179 TouchString(content->name); 180 break; 181 182 case XML_CTYPE_CHOICE: 183 case XML_CTYPE_SEQ: 184 assert((content->quant == XML_CQUANT_NONE) 185 || (content->quant == XML_CQUANT_OPT) 186 || (content->quant == XML_CQUANT_REP) 187 || (content->quant == XML_CQUANT_PLUS)); 188 assert(content->name == NULL); 189 for (unsigned int i = 0; i < content->numchildren; ++i) { 190 TouchNodeAndRecurse(&content->children[i]); 191 } 192 break; 193 194 default: 195 assert(false); 196 } 197 } 198 199 static void XMLCALL 200 ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) { 201 TouchString(name); 202 TouchNodeAndRecurse(model); 203 XML_FreeContentModel((XML_Parser)userData, model); 204 } 205 206 static void XMLCALL 207 AttlistDeclHandler(void *userData, const XML_Char *elname, 208 const XML_Char *attname, const XML_Char *atttype, 209 const XML_Char *dflt, int isrequired) { 210 (void)userData; 211 TouchString(elname); 212 TouchString(attname); 213 TouchString(atttype); 214 TouchString(dflt); 215 (void)isrequired; 216 } 217 218 static void XMLCALL 219 XmlDeclHandler(void *userData, const XML_Char *version, 220 const XML_Char *encoding, int standalone) { 221 (void)userData; 222 TouchString(version); 223 TouchString(encoding); 224 (void)standalone; 225 } 226 227 static void XMLCALL 228 StartElementHandler(void *userData, const XML_Char *name, 229 const XML_Char **atts) { 230 (void)userData; 231 TouchString(name); 232 for (size_t i = 0; atts[i] != NULL; ++i) { 233 TouchString(atts[i]); 234 } 235 } 236 237 static void XMLCALL 238 EndElementHandler(void *userData, const XML_Char *name) { 239 (void)userData; 240 TouchString(name); 241 } 242 243 static void XMLCALL 244 CharacterDataHandler(void *userData, const XML_Char *s, int len) { 245 (void)userData; 246 TouchString(s, len); 247 } 248 249 static void XMLCALL 250 ProcessingInstructionHandler(void *userData, const XML_Char *target, 251 const XML_Char *data) { 252 (void)userData; 253 TouchString(target); 254 TouchString(data); 255 } 256 257 static void XMLCALL 258 CommentHandler(void *userData, const XML_Char *data) { 259 TouchString(data); 260 // Use the comment handler to trigger parser suspend, so that we can get 261 // coverage of that code. 262 XML_StopParser((XML_Parser)userData, XML_TRUE); 263 } 264 265 static void XMLCALL 266 StartCdataSectionHandler(void *userData) { 267 (void)userData; 268 } 269 270 static void XMLCALL 271 EndCdataSectionHandler(void *userData) { 272 (void)userData; 273 } 274 275 static void XMLCALL 276 DefaultHandler(void *userData, const XML_Char *s, int len) { 277 (void)userData; 278 TouchString(s, len); 279 } 280 281 static void XMLCALL 282 StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName, 283 const XML_Char *sysid, const XML_Char *pubid, 284 int has_internal_subset) { 285 (void)userData; 286 TouchString(doctypeName); 287 TouchString(sysid); 288 TouchString(pubid); 289 (void)has_internal_subset; 290 } 291 292 static void XMLCALL 293 EndDoctypeDeclHandler(void *userData) { 294 (void)userData; 295 } 296 297 static void XMLCALL 298 EntityDeclHandler(void *userData, const XML_Char *entityName, 299 int is_parameter_entity, const XML_Char *value, 300 int value_length, const XML_Char *base, 301 const XML_Char *systemId, const XML_Char *publicId, 302 const XML_Char *notationName) { 303 (void)userData; 304 TouchString(entityName); 305 (void)is_parameter_entity; 306 TouchString(value, value_length); 307 TouchString(base); 308 TouchString(systemId); 309 TouchString(publicId); 310 TouchString(notationName); 311 } 312 313 static void XMLCALL 314 NotationDeclHandler(void *userData, const XML_Char *notationName, 315 const XML_Char *base, const XML_Char *systemId, 316 const XML_Char *publicId) { 317 (void)userData; 318 TouchString(notationName); 319 TouchString(base); 320 TouchString(systemId); 321 TouchString(publicId); 322 } 323 324 static void XMLCALL 325 StartNamespaceDeclHandler(void *userData, const XML_Char *prefix, 326 const XML_Char *uri) { 327 (void)userData; 328 TouchString(prefix); 329 TouchString(uri); 330 } 331 332 static void XMLCALL 333 EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) { 334 (void)userData; 335 TouchString(prefix); 336 } 337 338 static int XMLCALL 339 NotStandaloneHandler(void *userData) { 340 (void)userData; 341 return XML_STATUS_OK; 342 } 343 344 static int XMLCALL 345 ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context, 346 const XML_Char *base, const XML_Char *systemId, 347 const XML_Char *publicId) { 348 int rc = XML_STATUS_ERROR; 349 TouchString(context); 350 TouchString(base); 351 TouchString(systemId); 352 TouchString(publicId); 353 354 if (g_external_entity) { 355 XML_Parser ext_parser 356 = XML_ExternalEntityParserCreate(parser, context, g_encoding); 357 rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1); 358 XML_ParserFree(ext_parser); 359 } 360 361 return rc; 362 } 363 364 static void XMLCALL 365 SkippedEntityHandler(void *userData, const XML_Char *entityName, 366 int is_parameter_entity) { 367 (void)userData; 368 TouchString(entityName); 369 (void)is_parameter_entity; 370 } 371 372 static int XMLCALL 373 UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name, 374 XML_Encoding *info) { 375 (void)encodingHandlerData; 376 TouchString(name); 377 (void)info; 378 return XML_STATUS_ERROR; 379 } 380 381 void 382 InitializeParser(XML_Parser parser) { 383 XML_SetUserData(parser, (void *)parser); 384 XML_SetHashSalt(parser, 0x41414141); 385 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 386 387 XML_SetElementDeclHandler(parser, ElementDeclHandler); 388 XML_SetAttlistDeclHandler(parser, AttlistDeclHandler); 389 XML_SetXmlDeclHandler(parser, XmlDeclHandler); 390 XML_SetElementHandler(parser, StartElementHandler, EndElementHandler); 391 XML_SetCharacterDataHandler(parser, CharacterDataHandler); 392 XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler); 393 XML_SetCommentHandler(parser, CommentHandler); 394 XML_SetCdataSectionHandler(parser, StartCdataSectionHandler, 395 EndCdataSectionHandler); 396 // XML_SetDefaultHandler disables entity expansion 397 XML_SetDefaultHandlerExpand(parser, DefaultHandler); 398 XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler, 399 EndDoctypeDeclHandler); 400 // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler, 401 // and there isn't any significant code change between the two. 402 XML_SetEntityDeclHandler(parser, EntityDeclHandler); 403 XML_SetNotationDeclHandler(parser, NotationDeclHandler); 404 XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler, 405 EndNamespaceDeclHandler); 406 XML_SetNotStandaloneHandler(parser, NotStandaloneHandler); 407 XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler); 408 XML_SetSkippedEntityHandler(parser, SkippedEntityHandler); 409 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser); 410 } 411 412 DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) { 413 g_external_entity = nullptr; 414 415 if (! testcase.actions_size()) { 416 return; 417 } 418 419 g_allocation_count = 0; 420 g_fail_allocations.clear(); 421 for (int i = 0; i < testcase.fail_allocations_size(); ++i) { 422 g_fail_allocations.push_back(testcase.fail_allocations(i)); 423 } 424 425 SetEncoding(testcase.encoding()); 426 XML_Parser parser 427 = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|"); 428 InitializeParser(parser); 429 430 for (int i = 0; i < testcase.actions_size(); ++i) { 431 const auto &action = testcase.actions(i); 432 switch (action.action_case()) { 433 case xml_lpm_fuzzer::Action::kChunk: 434 if (XML_STATUS_ERROR 435 == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) { 436 // Force a reset after parse error. 437 XML_ParserReset(parser, g_encoding); 438 InitializeParser(parser); 439 } 440 break; 441 442 case xml_lpm_fuzzer::Action::kLastChunk: 443 Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1); 444 XML_ParserReset(parser, g_encoding); 445 InitializeParser(parser); 446 break; 447 448 case xml_lpm_fuzzer::Action::kReset: 449 XML_ParserReset(parser, g_encoding); 450 InitializeParser(parser); 451 break; 452 453 case xml_lpm_fuzzer::Action::kExternalEntity: 454 g_external_entity = action.external_entity().data(); 455 g_external_entity_size = action.external_entity().size(); 456 break; 457 458 default: 459 break; 460 } 461 } 462 463 XML_ParserFree(parser); 464 } 465