1*fe927888SPhilip Paeps /*
2*fe927888SPhilip Paeps __ __ _
3*fe927888SPhilip Paeps ___\ \/ /_ __ __ _| |_
4*fe927888SPhilip Paeps / _ \\ /| '_ \ / _` | __|
5*fe927888SPhilip Paeps | __// \| |_) | (_| | |_
6*fe927888SPhilip Paeps \___/_/\_\ .__/ \__,_|\__|
7*fe927888SPhilip Paeps |_| XML parser
8*fe927888SPhilip Paeps
9*fe927888SPhilip Paeps Copyright (c) 2022 Mark Brand <markbrand@google.com>
10*fe927888SPhilip Paeps Copyright (c) 2025 Sebastian Pipping <sebastian@pipping.org>
11*fe927888SPhilip Paeps Licensed under the MIT license:
12*fe927888SPhilip Paeps
13*fe927888SPhilip Paeps Permission is hereby granted, free of charge, to any person obtaining
14*fe927888SPhilip Paeps a copy of this software and associated documentation files (the
15*fe927888SPhilip Paeps "Software"), to deal in the Software without restriction, including
16*fe927888SPhilip Paeps without limitation the rights to use, copy, modify, merge, publish,
17*fe927888SPhilip Paeps distribute, sublicense, and/or sell copies of the Software, and to permit
18*fe927888SPhilip Paeps persons to whom the Software is furnished to do so, subject to the
19*fe927888SPhilip Paeps following conditions:
20*fe927888SPhilip Paeps
21*fe927888SPhilip Paeps The above copyright notice and this permission notice shall be included
22*fe927888SPhilip Paeps in all copies or substantial portions of the Software.
23*fe927888SPhilip Paeps
24*fe927888SPhilip Paeps THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25*fe927888SPhilip Paeps EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26*fe927888SPhilip Paeps MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27*fe927888SPhilip Paeps NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28*fe927888SPhilip Paeps DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29*fe927888SPhilip Paeps OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30*fe927888SPhilip Paeps USE OR OTHER DEALINGS IN THE SOFTWARE.
31*fe927888SPhilip Paeps */
32*fe927888SPhilip Paeps
33*fe927888SPhilip Paeps #if defined(NDEBUG)
34*fe927888SPhilip Paeps # undef NDEBUG // because checks below rely on assert(...)
35*fe927888SPhilip Paeps #endif
36*fe927888SPhilip Paeps
37*fe927888SPhilip Paeps #include <assert.h>
38*fe927888SPhilip Paeps #include <stdint.h>
39*fe927888SPhilip Paeps #include <vector>
40*fe927888SPhilip Paeps
41*fe927888SPhilip Paeps #include "expat.h"
42*fe927888SPhilip Paeps #include "xml_lpm_fuzzer.pb.h"
43*fe927888SPhilip Paeps #include "src/libfuzzer/libfuzzer_macro.h"
44*fe927888SPhilip Paeps
45*fe927888SPhilip Paeps static const char *g_encoding = nullptr;
46*fe927888SPhilip Paeps static const char *g_external_entity = nullptr;
47*fe927888SPhilip Paeps static size_t g_external_entity_size = 0;
48*fe927888SPhilip Paeps
49*fe927888SPhilip Paeps void
SetEncoding(const xml_lpm_fuzzer::Encoding & e)50*fe927888SPhilip Paeps SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
51*fe927888SPhilip Paeps switch (e) {
52*fe927888SPhilip Paeps case xml_lpm_fuzzer::Encoding::UTF8:
53*fe927888SPhilip Paeps g_encoding = "UTF-8";
54*fe927888SPhilip Paeps break;
55*fe927888SPhilip Paeps
56*fe927888SPhilip Paeps case xml_lpm_fuzzer::Encoding::UTF16:
57*fe927888SPhilip Paeps g_encoding = "UTF-16";
58*fe927888SPhilip Paeps break;
59*fe927888SPhilip Paeps
60*fe927888SPhilip Paeps case xml_lpm_fuzzer::Encoding::ISO88591:
61*fe927888SPhilip Paeps g_encoding = "ISO-8859-1";
62*fe927888SPhilip Paeps break;
63*fe927888SPhilip Paeps
64*fe927888SPhilip Paeps case xml_lpm_fuzzer::Encoding::ASCII:
65*fe927888SPhilip Paeps g_encoding = "US-ASCII";
66*fe927888SPhilip Paeps break;
67*fe927888SPhilip Paeps
68*fe927888SPhilip Paeps case xml_lpm_fuzzer::Encoding::NONE:
69*fe927888SPhilip Paeps g_encoding = NULL;
70*fe927888SPhilip Paeps break;
71*fe927888SPhilip Paeps
72*fe927888SPhilip Paeps default:
73*fe927888SPhilip Paeps g_encoding = "UNKNOWN";
74*fe927888SPhilip Paeps break;
75*fe927888SPhilip Paeps }
76*fe927888SPhilip Paeps }
77*fe927888SPhilip Paeps
78*fe927888SPhilip Paeps static int g_allocation_count = 0;
79*fe927888SPhilip Paeps static std::vector<int> g_fail_allocations = {};
80*fe927888SPhilip Paeps
81*fe927888SPhilip Paeps void *
MallocHook(size_t size)82*fe927888SPhilip Paeps MallocHook(size_t size) {
83*fe927888SPhilip Paeps g_allocation_count += 1;
84*fe927888SPhilip Paeps for (auto index : g_fail_allocations) {
85*fe927888SPhilip Paeps if (index == g_allocation_count) {
86*fe927888SPhilip Paeps return NULL;
87*fe927888SPhilip Paeps }
88*fe927888SPhilip Paeps }
89*fe927888SPhilip Paeps return malloc(size);
90*fe927888SPhilip Paeps }
91*fe927888SPhilip Paeps
92*fe927888SPhilip Paeps void *
ReallocHook(void * ptr,size_t size)93*fe927888SPhilip Paeps ReallocHook(void *ptr, size_t size) {
94*fe927888SPhilip Paeps g_allocation_count += 1;
95*fe927888SPhilip Paeps for (auto index : g_fail_allocations) {
96*fe927888SPhilip Paeps if (index == g_allocation_count) {
97*fe927888SPhilip Paeps return NULL;
98*fe927888SPhilip Paeps }
99*fe927888SPhilip Paeps }
100*fe927888SPhilip Paeps return realloc(ptr, size);
101*fe927888SPhilip Paeps }
102*fe927888SPhilip Paeps
103*fe927888SPhilip Paeps void
FreeHook(void * ptr)104*fe927888SPhilip Paeps FreeHook(void *ptr) {
105*fe927888SPhilip Paeps free(ptr);
106*fe927888SPhilip Paeps }
107*fe927888SPhilip Paeps
108*fe927888SPhilip Paeps XML_Memory_Handling_Suite memory_handling_suite
109*fe927888SPhilip Paeps = {MallocHook, ReallocHook, FreeHook};
110*fe927888SPhilip Paeps
111*fe927888SPhilip Paeps void InitializeParser(XML_Parser parser);
112*fe927888SPhilip Paeps
113*fe927888SPhilip Paeps // We want a parse function that supports resumption, so that we can cover the
114*fe927888SPhilip Paeps // suspend/resume code.
115*fe927888SPhilip Paeps enum XML_Status
Parse(XML_Parser parser,const char * input,int input_len,int is_final)116*fe927888SPhilip Paeps Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
117*fe927888SPhilip Paeps enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
118*fe927888SPhilip Paeps while (status == XML_STATUS_SUSPENDED) {
119*fe927888SPhilip Paeps status = XML_ResumeParser(parser);
120*fe927888SPhilip Paeps }
121*fe927888SPhilip Paeps return status;
122*fe927888SPhilip Paeps }
123*fe927888SPhilip Paeps
124*fe927888SPhilip Paeps // When the fuzzer is compiled with instrumentation such as ASan, then the
125*fe927888SPhilip Paeps // accesses in TouchString will fault if they access invalid memory (ie. detect
126*fe927888SPhilip Paeps // either a use-after-free or buffer-overflow). By calling TouchString in each
127*fe927888SPhilip Paeps // of the callbacks, we can check that the arguments meet the API specifications
128*fe927888SPhilip Paeps // in terms of length/null-termination. no_optimize is used to ensure that the
129*fe927888SPhilip Paeps // compiler has to emit actual memory reads, instead of removing them.
130*fe927888SPhilip Paeps static volatile size_t no_optimize = 0;
131*fe927888SPhilip Paeps static void
TouchString(const XML_Char * ptr,int len=-1)132*fe927888SPhilip Paeps TouchString(const XML_Char *ptr, int len = -1) {
133*fe927888SPhilip Paeps if (! ptr) {
134*fe927888SPhilip Paeps return;
135*fe927888SPhilip Paeps }
136*fe927888SPhilip Paeps
137*fe927888SPhilip Paeps if (len == -1) {
138*fe927888SPhilip Paeps for (XML_Char value = *ptr++; value; value = *ptr++) {
139*fe927888SPhilip Paeps no_optimize += value;
140*fe927888SPhilip Paeps }
141*fe927888SPhilip Paeps } else {
142*fe927888SPhilip Paeps for (int i = 0; i < len; ++i) {
143*fe927888SPhilip Paeps no_optimize += ptr[i];
144*fe927888SPhilip Paeps }
145*fe927888SPhilip Paeps }
146*fe927888SPhilip Paeps }
147*fe927888SPhilip Paeps
148*fe927888SPhilip Paeps static void
TouchNodeAndRecurse(XML_Content * content)149*fe927888SPhilip Paeps TouchNodeAndRecurse(XML_Content *content) {
150*fe927888SPhilip Paeps switch (content->type) {
151*fe927888SPhilip Paeps case XML_CTYPE_EMPTY:
152*fe927888SPhilip Paeps case XML_CTYPE_ANY:
153*fe927888SPhilip Paeps assert(content->quant == XML_CQUANT_NONE);
154*fe927888SPhilip Paeps assert(content->name == NULL);
155*fe927888SPhilip Paeps assert(content->numchildren == 0);
156*fe927888SPhilip Paeps assert(content->children == NULL);
157*fe927888SPhilip Paeps break;
158*fe927888SPhilip Paeps
159*fe927888SPhilip Paeps case XML_CTYPE_MIXED:
160*fe927888SPhilip Paeps assert(content->quant == XML_CQUANT_NONE
161*fe927888SPhilip Paeps || content->quant == XML_CQUANT_REP);
162*fe927888SPhilip Paeps assert(content->name == NULL);
163*fe927888SPhilip Paeps for (unsigned int i = 0; i < content->numchildren; ++i) {
164*fe927888SPhilip Paeps assert(content->children[i].type == XML_CTYPE_NAME);
165*fe927888SPhilip Paeps assert(content->children[i].quant == XML_CQUANT_NONE);
166*fe927888SPhilip Paeps assert(content->children[i].numchildren == 0);
167*fe927888SPhilip Paeps assert(content->children[i].children == NULL);
168*fe927888SPhilip Paeps TouchString(content->children[i].name);
169*fe927888SPhilip Paeps }
170*fe927888SPhilip Paeps break;
171*fe927888SPhilip Paeps
172*fe927888SPhilip Paeps case XML_CTYPE_NAME:
173*fe927888SPhilip Paeps assert((content->quant == XML_CQUANT_NONE)
174*fe927888SPhilip Paeps || (content->quant == XML_CQUANT_OPT)
175*fe927888SPhilip Paeps || (content->quant == XML_CQUANT_REP)
176*fe927888SPhilip Paeps || (content->quant == XML_CQUANT_PLUS));
177*fe927888SPhilip Paeps assert(content->numchildren == 0);
178*fe927888SPhilip Paeps assert(content->children == NULL);
179*fe927888SPhilip Paeps TouchString(content->name);
180*fe927888SPhilip Paeps break;
181*fe927888SPhilip Paeps
182*fe927888SPhilip Paeps case XML_CTYPE_CHOICE:
183*fe927888SPhilip Paeps case XML_CTYPE_SEQ:
184*fe927888SPhilip Paeps assert((content->quant == XML_CQUANT_NONE)
185*fe927888SPhilip Paeps || (content->quant == XML_CQUANT_OPT)
186*fe927888SPhilip Paeps || (content->quant == XML_CQUANT_REP)
187*fe927888SPhilip Paeps || (content->quant == XML_CQUANT_PLUS));
188*fe927888SPhilip Paeps assert(content->name == NULL);
189*fe927888SPhilip Paeps for (unsigned int i = 0; i < content->numchildren; ++i) {
190*fe927888SPhilip Paeps TouchNodeAndRecurse(&content->children[i]);
191*fe927888SPhilip Paeps }
192*fe927888SPhilip Paeps break;
193*fe927888SPhilip Paeps
194*fe927888SPhilip Paeps default:
195*fe927888SPhilip Paeps assert(false);
196*fe927888SPhilip Paeps }
197*fe927888SPhilip Paeps }
198*fe927888SPhilip Paeps
199*fe927888SPhilip Paeps static void XMLCALL
ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)200*fe927888SPhilip Paeps ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
201*fe927888SPhilip Paeps TouchString(name);
202*fe927888SPhilip Paeps TouchNodeAndRecurse(model);
203*fe927888SPhilip Paeps XML_FreeContentModel((XML_Parser)userData, model);
204*fe927888SPhilip Paeps }
205*fe927888SPhilip Paeps
206*fe927888SPhilip Paeps static void XMLCALL
AttlistDeclHandler(void * userData,const XML_Char * elname,const XML_Char * attname,const XML_Char * atttype,const XML_Char * dflt,int isrequired)207*fe927888SPhilip Paeps AttlistDeclHandler(void *userData, const XML_Char *elname,
208*fe927888SPhilip Paeps const XML_Char *attname, const XML_Char *atttype,
209*fe927888SPhilip Paeps const XML_Char *dflt, int isrequired) {
210*fe927888SPhilip Paeps (void)userData;
211*fe927888SPhilip Paeps TouchString(elname);
212*fe927888SPhilip Paeps TouchString(attname);
213*fe927888SPhilip Paeps TouchString(atttype);
214*fe927888SPhilip Paeps TouchString(dflt);
215*fe927888SPhilip Paeps (void)isrequired;
216*fe927888SPhilip Paeps }
217*fe927888SPhilip Paeps
218*fe927888SPhilip Paeps static void XMLCALL
XmlDeclHandler(void * userData,const XML_Char * version,const XML_Char * encoding,int standalone)219*fe927888SPhilip Paeps XmlDeclHandler(void *userData, const XML_Char *version,
220*fe927888SPhilip Paeps const XML_Char *encoding, int standalone) {
221*fe927888SPhilip Paeps (void)userData;
222*fe927888SPhilip Paeps TouchString(version);
223*fe927888SPhilip Paeps TouchString(encoding);
224*fe927888SPhilip Paeps (void)standalone;
225*fe927888SPhilip Paeps }
226*fe927888SPhilip Paeps
227*fe927888SPhilip Paeps static void XMLCALL
StartElementHandler(void * userData,const XML_Char * name,const XML_Char ** atts)228*fe927888SPhilip Paeps StartElementHandler(void *userData, const XML_Char *name,
229*fe927888SPhilip Paeps const XML_Char **atts) {
230*fe927888SPhilip Paeps (void)userData;
231*fe927888SPhilip Paeps TouchString(name);
232*fe927888SPhilip Paeps for (size_t i = 0; atts[i] != NULL; ++i) {
233*fe927888SPhilip Paeps TouchString(atts[i]);
234*fe927888SPhilip Paeps }
235*fe927888SPhilip Paeps }
236*fe927888SPhilip Paeps
237*fe927888SPhilip Paeps static void XMLCALL
EndElementHandler(void * userData,const XML_Char * name)238*fe927888SPhilip Paeps EndElementHandler(void *userData, const XML_Char *name) {
239*fe927888SPhilip Paeps (void)userData;
240*fe927888SPhilip Paeps TouchString(name);
241*fe927888SPhilip Paeps }
242*fe927888SPhilip Paeps
243*fe927888SPhilip Paeps static void XMLCALL
CharacterDataHandler(void * userData,const XML_Char * s,int len)244*fe927888SPhilip Paeps CharacterDataHandler(void *userData, const XML_Char *s, int len) {
245*fe927888SPhilip Paeps (void)userData;
246*fe927888SPhilip Paeps TouchString(s, len);
247*fe927888SPhilip Paeps }
248*fe927888SPhilip Paeps
249*fe927888SPhilip Paeps static void XMLCALL
ProcessingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)250*fe927888SPhilip Paeps ProcessingInstructionHandler(void *userData, const XML_Char *target,
251*fe927888SPhilip Paeps const XML_Char *data) {
252*fe927888SPhilip Paeps (void)userData;
253*fe927888SPhilip Paeps TouchString(target);
254*fe927888SPhilip Paeps TouchString(data);
255*fe927888SPhilip Paeps }
256*fe927888SPhilip Paeps
257*fe927888SPhilip Paeps static void XMLCALL
CommentHandler(void * userData,const XML_Char * data)258*fe927888SPhilip Paeps CommentHandler(void *userData, const XML_Char *data) {
259*fe927888SPhilip Paeps TouchString(data);
260*fe927888SPhilip Paeps // Use the comment handler to trigger parser suspend, so that we can get
261*fe927888SPhilip Paeps // coverage of that code.
262*fe927888SPhilip Paeps XML_StopParser((XML_Parser)userData, XML_TRUE);
263*fe927888SPhilip Paeps }
264*fe927888SPhilip Paeps
265*fe927888SPhilip Paeps static void XMLCALL
StartCdataSectionHandler(void * userData)266*fe927888SPhilip Paeps StartCdataSectionHandler(void *userData) {
267*fe927888SPhilip Paeps (void)userData;
268*fe927888SPhilip Paeps }
269*fe927888SPhilip Paeps
270*fe927888SPhilip Paeps static void XMLCALL
EndCdataSectionHandler(void * userData)271*fe927888SPhilip Paeps EndCdataSectionHandler(void *userData) {
272*fe927888SPhilip Paeps (void)userData;
273*fe927888SPhilip Paeps }
274*fe927888SPhilip Paeps
275*fe927888SPhilip Paeps static void XMLCALL
DefaultHandler(void * userData,const XML_Char * s,int len)276*fe927888SPhilip Paeps DefaultHandler(void *userData, const XML_Char *s, int len) {
277*fe927888SPhilip Paeps (void)userData;
278*fe927888SPhilip Paeps TouchString(s, len);
279*fe927888SPhilip Paeps }
280*fe927888SPhilip Paeps
281*fe927888SPhilip Paeps static void XMLCALL
StartDoctypeDeclHandler(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)282*fe927888SPhilip Paeps StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
283*fe927888SPhilip Paeps const XML_Char *sysid, const XML_Char *pubid,
284*fe927888SPhilip Paeps int has_internal_subset) {
285*fe927888SPhilip Paeps (void)userData;
286*fe927888SPhilip Paeps TouchString(doctypeName);
287*fe927888SPhilip Paeps TouchString(sysid);
288*fe927888SPhilip Paeps TouchString(pubid);
289*fe927888SPhilip Paeps (void)has_internal_subset;
290*fe927888SPhilip Paeps }
291*fe927888SPhilip Paeps
292*fe927888SPhilip Paeps static void XMLCALL
EndDoctypeDeclHandler(void * userData)293*fe927888SPhilip Paeps EndDoctypeDeclHandler(void *userData) {
294*fe927888SPhilip Paeps (void)userData;
295*fe927888SPhilip Paeps }
296*fe927888SPhilip Paeps
297*fe927888SPhilip Paeps static void XMLCALL
EntityDeclHandler(void * userData,const XML_Char * entityName,int is_parameter_entity,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)298*fe927888SPhilip Paeps EntityDeclHandler(void *userData, const XML_Char *entityName,
299*fe927888SPhilip Paeps int is_parameter_entity, const XML_Char *value,
300*fe927888SPhilip Paeps int value_length, const XML_Char *base,
301*fe927888SPhilip Paeps const XML_Char *systemId, const XML_Char *publicId,
302*fe927888SPhilip Paeps const XML_Char *notationName) {
303*fe927888SPhilip Paeps (void)userData;
304*fe927888SPhilip Paeps TouchString(entityName);
305*fe927888SPhilip Paeps (void)is_parameter_entity;
306*fe927888SPhilip Paeps TouchString(value, value_length);
307*fe927888SPhilip Paeps TouchString(base);
308*fe927888SPhilip Paeps TouchString(systemId);
309*fe927888SPhilip Paeps TouchString(publicId);
310*fe927888SPhilip Paeps TouchString(notationName);
311*fe927888SPhilip Paeps }
312*fe927888SPhilip Paeps
313*fe927888SPhilip Paeps static void XMLCALL
NotationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)314*fe927888SPhilip Paeps NotationDeclHandler(void *userData, const XML_Char *notationName,
315*fe927888SPhilip Paeps const XML_Char *base, const XML_Char *systemId,
316*fe927888SPhilip Paeps const XML_Char *publicId) {
317*fe927888SPhilip Paeps (void)userData;
318*fe927888SPhilip Paeps TouchString(notationName);
319*fe927888SPhilip Paeps TouchString(base);
320*fe927888SPhilip Paeps TouchString(systemId);
321*fe927888SPhilip Paeps TouchString(publicId);
322*fe927888SPhilip Paeps }
323*fe927888SPhilip Paeps
324*fe927888SPhilip Paeps static void XMLCALL
StartNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)325*fe927888SPhilip Paeps StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
326*fe927888SPhilip Paeps const XML_Char *uri) {
327*fe927888SPhilip Paeps (void)userData;
328*fe927888SPhilip Paeps TouchString(prefix);
329*fe927888SPhilip Paeps TouchString(uri);
330*fe927888SPhilip Paeps }
331*fe927888SPhilip Paeps
332*fe927888SPhilip Paeps static void XMLCALL
EndNamespaceDeclHandler(void * userData,const XML_Char * prefix)333*fe927888SPhilip Paeps EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
334*fe927888SPhilip Paeps (void)userData;
335*fe927888SPhilip Paeps TouchString(prefix);
336*fe927888SPhilip Paeps }
337*fe927888SPhilip Paeps
338*fe927888SPhilip Paeps static int XMLCALL
NotStandaloneHandler(void * userData)339*fe927888SPhilip Paeps NotStandaloneHandler(void *userData) {
340*fe927888SPhilip Paeps (void)userData;
341*fe927888SPhilip Paeps return XML_STATUS_OK;
342*fe927888SPhilip Paeps }
343*fe927888SPhilip Paeps
344*fe927888SPhilip Paeps static int XMLCALL
ExternalEntityRefHandler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)345*fe927888SPhilip Paeps ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
346*fe927888SPhilip Paeps const XML_Char *base, const XML_Char *systemId,
347*fe927888SPhilip Paeps const XML_Char *publicId) {
348*fe927888SPhilip Paeps int rc = XML_STATUS_ERROR;
349*fe927888SPhilip Paeps TouchString(context);
350*fe927888SPhilip Paeps TouchString(base);
351*fe927888SPhilip Paeps TouchString(systemId);
352*fe927888SPhilip Paeps TouchString(publicId);
353*fe927888SPhilip Paeps
354*fe927888SPhilip Paeps if (g_external_entity) {
355*fe927888SPhilip Paeps XML_Parser ext_parser
356*fe927888SPhilip Paeps = XML_ExternalEntityParserCreate(parser, context, g_encoding);
357*fe927888SPhilip Paeps rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
358*fe927888SPhilip Paeps XML_ParserFree(ext_parser);
359*fe927888SPhilip Paeps }
360*fe927888SPhilip Paeps
361*fe927888SPhilip Paeps return rc;
362*fe927888SPhilip Paeps }
363*fe927888SPhilip Paeps
364*fe927888SPhilip Paeps static void XMLCALL
SkippedEntityHandler(void * userData,const XML_Char * entityName,int is_parameter_entity)365*fe927888SPhilip Paeps SkippedEntityHandler(void *userData, const XML_Char *entityName,
366*fe927888SPhilip Paeps int is_parameter_entity) {
367*fe927888SPhilip Paeps (void)userData;
368*fe927888SPhilip Paeps TouchString(entityName);
369*fe927888SPhilip Paeps (void)is_parameter_entity;
370*fe927888SPhilip Paeps }
371*fe927888SPhilip Paeps
372*fe927888SPhilip Paeps static int XMLCALL
UnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)373*fe927888SPhilip Paeps UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
374*fe927888SPhilip Paeps XML_Encoding *info) {
375*fe927888SPhilip Paeps (void)encodingHandlerData;
376*fe927888SPhilip Paeps TouchString(name);
377*fe927888SPhilip Paeps (void)info;
378*fe927888SPhilip Paeps return XML_STATUS_ERROR;
379*fe927888SPhilip Paeps }
380*fe927888SPhilip Paeps
381*fe927888SPhilip Paeps void
InitializeParser(XML_Parser parser)382*fe927888SPhilip Paeps InitializeParser(XML_Parser parser) {
383*fe927888SPhilip Paeps XML_SetUserData(parser, (void *)parser);
384*fe927888SPhilip Paeps XML_SetHashSalt(parser, 0x41414141);
385*fe927888SPhilip Paeps XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
386*fe927888SPhilip Paeps
387*fe927888SPhilip Paeps XML_SetElementDeclHandler(parser, ElementDeclHandler);
388*fe927888SPhilip Paeps XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
389*fe927888SPhilip Paeps XML_SetXmlDeclHandler(parser, XmlDeclHandler);
390*fe927888SPhilip Paeps XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
391*fe927888SPhilip Paeps XML_SetCharacterDataHandler(parser, CharacterDataHandler);
392*fe927888SPhilip Paeps XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
393*fe927888SPhilip Paeps XML_SetCommentHandler(parser, CommentHandler);
394*fe927888SPhilip Paeps XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
395*fe927888SPhilip Paeps EndCdataSectionHandler);
396*fe927888SPhilip Paeps // XML_SetDefaultHandler disables entity expansion
397*fe927888SPhilip Paeps XML_SetDefaultHandlerExpand(parser, DefaultHandler);
398*fe927888SPhilip Paeps XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
399*fe927888SPhilip Paeps EndDoctypeDeclHandler);
400*fe927888SPhilip Paeps // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
401*fe927888SPhilip Paeps // and there isn't any significant code change between the two.
402*fe927888SPhilip Paeps XML_SetEntityDeclHandler(parser, EntityDeclHandler);
403*fe927888SPhilip Paeps XML_SetNotationDeclHandler(parser, NotationDeclHandler);
404*fe927888SPhilip Paeps XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
405*fe927888SPhilip Paeps EndNamespaceDeclHandler);
406*fe927888SPhilip Paeps XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
407*fe927888SPhilip Paeps XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
408*fe927888SPhilip Paeps XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
409*fe927888SPhilip Paeps XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
410*fe927888SPhilip Paeps }
411*fe927888SPhilip Paeps
DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase & testcase)412*fe927888SPhilip Paeps DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
413*fe927888SPhilip Paeps g_external_entity = nullptr;
414*fe927888SPhilip Paeps
415*fe927888SPhilip Paeps if (! testcase.actions_size()) {
416*fe927888SPhilip Paeps return;
417*fe927888SPhilip Paeps }
418*fe927888SPhilip Paeps
419*fe927888SPhilip Paeps g_allocation_count = 0;
420*fe927888SPhilip Paeps g_fail_allocations.clear();
421*fe927888SPhilip Paeps for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
422*fe927888SPhilip Paeps g_fail_allocations.push_back(testcase.fail_allocations(i));
423*fe927888SPhilip Paeps }
424*fe927888SPhilip Paeps
425*fe927888SPhilip Paeps SetEncoding(testcase.encoding());
426*fe927888SPhilip Paeps XML_Parser parser
427*fe927888SPhilip Paeps = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
428*fe927888SPhilip Paeps InitializeParser(parser);
429*fe927888SPhilip Paeps
430*fe927888SPhilip Paeps for (int i = 0; i < testcase.actions_size(); ++i) {
431*fe927888SPhilip Paeps const auto &action = testcase.actions(i);
432*fe927888SPhilip Paeps switch (action.action_case()) {
433*fe927888SPhilip Paeps case xml_lpm_fuzzer::Action::kChunk:
434*fe927888SPhilip Paeps if (XML_STATUS_ERROR
435*fe927888SPhilip Paeps == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
436*fe927888SPhilip Paeps // Force a reset after parse error.
437*fe927888SPhilip Paeps XML_ParserReset(parser, g_encoding);
438*fe927888SPhilip Paeps InitializeParser(parser);
439*fe927888SPhilip Paeps }
440*fe927888SPhilip Paeps break;
441*fe927888SPhilip Paeps
442*fe927888SPhilip Paeps case xml_lpm_fuzzer::Action::kLastChunk:
443*fe927888SPhilip Paeps Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
444*fe927888SPhilip Paeps XML_ParserReset(parser, g_encoding);
445*fe927888SPhilip Paeps InitializeParser(parser);
446*fe927888SPhilip Paeps break;
447*fe927888SPhilip Paeps
448*fe927888SPhilip Paeps case xml_lpm_fuzzer::Action::kReset:
449*fe927888SPhilip Paeps XML_ParserReset(parser, g_encoding);
450*fe927888SPhilip Paeps InitializeParser(parser);
451*fe927888SPhilip Paeps break;
452*fe927888SPhilip Paeps
453*fe927888SPhilip Paeps case xml_lpm_fuzzer::Action::kExternalEntity:
454*fe927888SPhilip Paeps g_external_entity = action.external_entity().data();
455*fe927888SPhilip Paeps g_external_entity_size = action.external_entity().size();
456*fe927888SPhilip Paeps break;
457*fe927888SPhilip Paeps
458*fe927888SPhilip Paeps default:
459*fe927888SPhilip Paeps break;
460*fe927888SPhilip Paeps }
461*fe927888SPhilip Paeps }
462*fe927888SPhilip Paeps
463*fe927888SPhilip Paeps XML_ParserFree(parser);
464*fe927888SPhilip Paeps }
465