xref: /freebsd/contrib/expat/lib/xmlparse.c (revision fe9278888fd4414abe2d922e469cf608005f4c65)
1 /* d19ae032c224863c1527ba44d228cc34b99192c3a4c5a27af1f4e054d45ee031 (2.7.1+)
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15    Copyright (c) 2016      Eric Rahm <erahm@mozilla.com>
16    Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
17    Copyright (c) 2016      Gaurav <g.gupta@samsung.com>
18    Copyright (c) 2016      Thomas Beutlich <tc@tbeu.de>
19    Copyright (c) 2016      Gustavo Grieco <gustavo.grieco@imag.fr>
20    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
21    Copyright (c) 2016      Ed Schouten <ed@nuxi.nl>
22    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23    Copyright (c) 2017      Václav Slavík <vaclav@slavik.io>
24    Copyright (c) 2017      Viktor Szakats <commit@vsz.me>
25    Copyright (c) 2017      Chanho Park <chanho61.park@samsung.com>
26    Copyright (c) 2017      Rolf Eike Beer <eike@sf-mail.de>
27    Copyright (c) 2017      Hans Wennborg <hans@chromium.org>
28    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
29    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
30    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
31    Copyright (c) 2018      Mariusz Zaborski <oshogbo@vexillium.org>
32    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
33    Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34    Copyright (c) 2019      Vadim Zeitlin <vadim@zeitlins.org>
35    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
36    Copyright (c) 2022      Samanta Navarro <ferivoz@riseup.net>
37    Copyright (c) 2022      Jeffrey Walton <noloader@gmail.com>
38    Copyright (c) 2022      Jann Horn <jannh@google.com>
39    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
40    Copyright (c) 2023      Owain Davies <owaind@bath.edu>
41    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42    Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43    Copyright (c) 2024      Hanno Böck <hanno@gentoo.org>
44    Licensed under the MIT license:
45 
46    Permission is  hereby granted,  free of charge,  to any  person obtaining
47    a  copy  of  this  software   and  associated  documentation  files  (the
48    "Software"),  to  deal in  the  Software  without restriction,  including
49    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
50    distribute, sublicense, and/or sell copies of the Software, and to permit
51    persons  to whom  the Software  is  furnished to  do so,  subject to  the
52    following conditions:
53 
54    The above copyright  notice and this permission notice  shall be included
55    in all copies or substantial portions of the Software.
56 
57    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
58    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
59    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
60    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
61    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
62    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
63    USE OR OTHER DEALINGS IN THE SOFTWARE.
64 */
65 
66 #define XML_BUILDING_EXPAT 1
67 
68 #include "expat_config.h"
69 
70 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
71 #  error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
72 #endif
73 
74 #if defined(XML_DTD) && XML_GE == 0
75 #  error Either undefine XML_DTD or define XML_GE to 1.
76 #endif
77 
78 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2)           \
79     || (XML_CONTEXT_BYTES + 0 < 0)
80 #  error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
81 #endif
82 
83 #if defined(HAVE_SYSCALL_GETRANDOM)
84 #  if ! defined(_GNU_SOURCE)
85 #    define _GNU_SOURCE 1 /* syscall prototype */
86 #  endif
87 #endif
88 
89 #ifdef _WIN32
90 /* force stdlib to define rand_s() */
91 #  if ! defined(_CRT_RAND_S)
92 #    define _CRT_RAND_S
93 #  endif
94 #endif
95 
96 #include <stdbool.h>
97 #include <stddef.h>
98 #include <string.h> /* memset(), memcpy() */
99 #include <assert.h>
100 #include <limits.h> /* UINT_MAX */
101 #include <stdio.h>  /* fprintf */
102 #include <stdlib.h> /* getenv, rand_s */
103 #include <stdint.h> /* uintptr_t */
104 #include <math.h>   /* isnan */
105 
106 #ifdef _WIN32
107 #  define getpid GetCurrentProcessId
108 #else
109 #  include <sys/time.h>  /* gettimeofday() */
110 #  include <sys/types.h> /* getpid() */
111 #  include <unistd.h>    /* getpid() */
112 #  include <fcntl.h>     /* O_RDONLY */
113 #  include <errno.h>
114 #endif
115 
116 #ifdef _WIN32
117 #  include "winconfig.h"
118 #endif
119 
120 #include "ascii.h"
121 #include "expat.h"
122 #include "siphash.h"
123 
124 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
125 #  if defined(HAVE_GETRANDOM)
126 #    include <sys/random.h> /* getrandom */
127 #  else
128 #    include <unistd.h>      /* syscall */
129 #    include <sys/syscall.h> /* SYS_getrandom */
130 #  endif
131 #  if ! defined(GRND_NONBLOCK)
132 #    define GRND_NONBLOCK 0x0001
133 #  endif /* defined(GRND_NONBLOCK) */
134 #endif   /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
135 
136 #if defined(HAVE_LIBBSD)                                                       \
137     && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
138 #  include <bsd/stdlib.h>
139 #endif
140 
141 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
142 #  define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
143 #endif
144 
145 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM)             \
146     && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)            \
147     && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32)                         \
148     && ! defined(XML_POOR_ENTROPY)
149 #  error You do not have support for any sources of high quality entropy \
150     enabled.  For end user security, that is probably not what you want. \
151     \
152     Your options include: \
153       * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
154       * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
155       * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
156       * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
157       * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
158       * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
159       * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
160       * Windows >=Vista (rand_s): _WIN32. \
161     \
162     If insist on not using any of these, bypass this error by defining \
163     XML_POOR_ENTROPY; you have been warned. \
164     \
165     If you have reasons to patch this detection code away or need changes \
166     to the build system, please open a bug.  Thank you!
167 #endif
168 
169 #ifdef XML_UNICODE
170 #  define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
171 #  define XmlConvert XmlUtf16Convert
172 #  define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
173 #  define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
174 #  define XmlEncode XmlUtf16Encode
175 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
176 typedef unsigned short ICHAR;
177 #else
178 #  define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
179 #  define XmlConvert XmlUtf8Convert
180 #  define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
181 #  define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
182 #  define XmlEncode XmlUtf8Encode
183 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
184 typedef char ICHAR;
185 #endif
186 
187 #ifndef XML_NS
188 
189 #  define XmlInitEncodingNS XmlInitEncoding
190 #  define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
191 #  undef XmlGetInternalEncodingNS
192 #  define XmlGetInternalEncodingNS XmlGetInternalEncoding
193 #  define XmlParseXmlDeclNS XmlParseXmlDecl
194 
195 #endif
196 
197 #ifdef XML_UNICODE
198 
199 #  ifdef XML_UNICODE_WCHAR_T
200 #    define XML_T(x) (const wchar_t) x
201 #    define XML_L(x) L##x
202 #  else
203 #    define XML_T(x) (const unsigned short)x
204 #    define XML_L(x) x
205 #  endif
206 
207 #else
208 
209 #  define XML_T(x) x
210 #  define XML_L(x) x
211 
212 #endif
213 
214 /* Round up n to be a multiple of sz, where sz is a power of 2. */
215 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
216 
217 /* Do safe (NULL-aware) pointer arithmetic */
218 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
219 
220 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
221 
222 #include "internal.h"
223 #include "xmltok.h"
224 #include "xmlrole.h"
225 
226 typedef const XML_Char *KEY;
227 
228 typedef struct {
229   KEY name;
230 } NAMED;
231 
232 typedef struct {
233   NAMED **v;
234   unsigned char power;
235   size_t size;
236   size_t used;
237   const XML_Memory_Handling_Suite *mem;
238 } HASH_TABLE;
239 
240 static size_t keylen(KEY s);
241 
242 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
243 
244 /* For probing (after a collision) we need a step size relative prime
245    to the hash table size, which is a power of 2. We use double-hashing,
246    since we can calculate a second hash value cheaply by taking those bits
247    of the first hash value that were discarded (masked out) when the table
248    index was calculated: index = hash & mask, where mask = table->size - 1.
249    We limit the maximum step size to table->size / 4 (mask >> 2) and make
250    it odd, since odd numbers are always relative prime to a power of 2.
251 */
252 #define SECOND_HASH(hash, mask, power)                                         \
253   ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
254 #define PROBE_STEP(hash, mask, power)                                          \
255   ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
256 
257 typedef struct {
258   NAMED **p;
259   NAMED **end;
260 } HASH_TABLE_ITER;
261 
262 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
263 #define INIT_DATA_BUF_SIZE 1024
264 #define INIT_ATTS_SIZE 16
265 #define INIT_ATTS_VERSION 0xFFFFFFFF
266 #define INIT_BLOCK_SIZE 1024
267 #define INIT_BUFFER_SIZE 1024
268 
269 #define EXPAND_SPARE 24
270 
271 typedef struct binding {
272   struct prefix *prefix;
273   struct binding *nextTagBinding;
274   struct binding *prevPrefixBinding;
275   const struct attribute_id *attId;
276   XML_Char *uri;
277   int uriLen;
278   int uriAlloc;
279 } BINDING;
280 
281 typedef struct prefix {
282   const XML_Char *name;
283   BINDING *binding;
284 } PREFIX;
285 
286 typedef struct {
287   const XML_Char *str;
288   const XML_Char *localPart;
289   const XML_Char *prefix;
290   int strLen;
291   int uriLen;
292   int prefixLen;
293 } TAG_NAME;
294 
295 /* TAG represents an open element.
296    The name of the element is stored in both the document and API
297    encodings.  The memory buffer 'buf' is a separately-allocated
298    memory area which stores the name.  During the XML_Parse()/
299    XML_ParseBuffer() when the element is open, the memory for the 'raw'
300    version of the name (in the document encoding) is shared with the
301    document buffer.  If the element is open across calls to
302    XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
303    contain the 'raw' name as well.
304 
305    A parser reuses these structures, maintaining a list of allocated
306    TAG objects in a free list.
307 */
308 typedef struct tag {
309   struct tag *parent;  /* parent of this element */
310   const char *rawName; /* tagName in the original encoding */
311   int rawNameLength;
312   TAG_NAME name; /* tagName in the API encoding */
313   char *buf;     /* buffer for name components */
314   char *bufEnd;  /* end of the buffer */
315   BINDING *bindings;
316 } TAG;
317 
318 typedef struct {
319   const XML_Char *name;
320   const XML_Char *textPtr;
321   int textLen;   /* length in XML_Chars */
322   int processed; /* # of processed bytes - when suspended */
323   const XML_Char *systemId;
324   const XML_Char *base;
325   const XML_Char *publicId;
326   const XML_Char *notation;
327   XML_Bool open;
328   XML_Bool hasMore; /* true if entity has not been completely processed */
329   /* An entity can be open while being already completely processed (hasMore ==
330     XML_FALSE). The reason is the delayed closing of entities until their inner
331     entities are processed and closed */
332   XML_Bool is_param;
333   XML_Bool is_internal; /* true if declared in internal subset outside PE */
334 } ENTITY;
335 
336 typedef struct {
337   enum XML_Content_Type type;
338   enum XML_Content_Quant quant;
339   const XML_Char *name;
340   int firstchild;
341   int lastchild;
342   int childcnt;
343   int nextsib;
344 } CONTENT_SCAFFOLD;
345 
346 #define INIT_SCAFFOLD_ELEMENTS 32
347 
348 typedef struct block {
349   struct block *next;
350   int size;
351   XML_Char s[1];
352 } BLOCK;
353 
354 typedef struct {
355   BLOCK *blocks;
356   BLOCK *freeBlocks;
357   const XML_Char *end;
358   XML_Char *ptr;
359   XML_Char *start;
360   const XML_Memory_Handling_Suite *mem;
361 } STRING_POOL;
362 
363 /* The XML_Char before the name is used to determine whether
364    an attribute has been specified. */
365 typedef struct attribute_id {
366   XML_Char *name;
367   PREFIX *prefix;
368   XML_Bool maybeTokenized;
369   XML_Bool xmlns;
370 } ATTRIBUTE_ID;
371 
372 typedef struct {
373   const ATTRIBUTE_ID *id;
374   XML_Bool isCdata;
375   const XML_Char *value;
376 } DEFAULT_ATTRIBUTE;
377 
378 typedef struct {
379   unsigned long version;
380   unsigned long hash;
381   const XML_Char *uriName;
382 } NS_ATT;
383 
384 typedef struct {
385   const XML_Char *name;
386   PREFIX *prefix;
387   const ATTRIBUTE_ID *idAtt;
388   int nDefaultAtts;
389   int allocDefaultAtts;
390   DEFAULT_ATTRIBUTE *defaultAtts;
391 } ELEMENT_TYPE;
392 
393 typedef struct {
394   HASH_TABLE generalEntities;
395   HASH_TABLE elementTypes;
396   HASH_TABLE attributeIds;
397   HASH_TABLE prefixes;
398   STRING_POOL pool;
399   STRING_POOL entityValuePool;
400   /* false once a parameter entity reference has been skipped */
401   XML_Bool keepProcessing;
402   /* true once an internal or external PE reference has been encountered;
403      this includes the reference to an external subset */
404   XML_Bool hasParamEntityRefs;
405   XML_Bool standalone;
406 #ifdef XML_DTD
407   /* indicates if external PE has been read */
408   XML_Bool paramEntityRead;
409   HASH_TABLE paramEntities;
410 #endif /* XML_DTD */
411   PREFIX defaultPrefix;
412   /* === scaffolding for building content model === */
413   XML_Bool in_eldecl;
414   CONTENT_SCAFFOLD *scaffold;
415   unsigned contentStringLen;
416   unsigned scaffSize;
417   unsigned scaffCount;
418   int scaffLevel;
419   int *scaffIndex;
420 } DTD;
421 
422 enum EntityType {
423   ENTITY_INTERNAL,
424   ENTITY_ATTRIBUTE,
425   ENTITY_VALUE,
426 };
427 
428 typedef struct open_internal_entity {
429   const char *internalEventPtr;
430   const char *internalEventEndPtr;
431   struct open_internal_entity *next;
432   ENTITY *entity;
433   int startTagLevel;
434   XML_Bool betweenDecl; /* WFC: PE Between Declarations */
435   enum EntityType type;
436 } OPEN_INTERNAL_ENTITY;
437 
438 enum XML_Account {
439   XML_ACCOUNT_DIRECT,           /* bytes directly passed to the Expat parser */
440   XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
441                                    expansion */
442   XML_ACCOUNT_NONE              /* i.e. do not account, was accounted already */
443 };
444 
445 #if XML_GE == 1
446 typedef unsigned long long XmlBigCount;
447 typedef struct accounting {
448   XmlBigCount countBytesDirect;
449   XmlBigCount countBytesIndirect;
450   unsigned long debugLevel;
451   float maximumAmplificationFactor; // >=1.0
452   unsigned long long activationThresholdBytes;
453 } ACCOUNTING;
454 
455 typedef struct entity_stats {
456   unsigned int countEverOpened;
457   unsigned int currentDepth;
458   unsigned int maximumDepthSeen;
459   unsigned long debugLevel;
460 } ENTITY_STATS;
461 #endif /* XML_GE == 1 */
462 
463 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
464                                          const char *end, const char **endPtr);
465 
466 static Processor prologProcessor;
467 static Processor prologInitProcessor;
468 static Processor contentProcessor;
469 static Processor cdataSectionProcessor;
470 #ifdef XML_DTD
471 static Processor ignoreSectionProcessor;
472 static Processor externalParEntProcessor;
473 static Processor externalParEntInitProcessor;
474 static Processor entityValueProcessor;
475 static Processor entityValueInitProcessor;
476 #endif /* XML_DTD */
477 static Processor epilogProcessor;
478 static Processor errorProcessor;
479 static Processor externalEntityInitProcessor;
480 static Processor externalEntityInitProcessor2;
481 static Processor externalEntityInitProcessor3;
482 static Processor externalEntityContentProcessor;
483 static Processor internalEntityProcessor;
484 
485 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
486                                             const XML_Char *encodingName);
487 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
488                                      const char *s, const char *next);
489 static enum XML_Error initializeEncoding(XML_Parser parser);
490 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
491                                const char *s, const char *end, int tok,
492                                const char *next, const char **nextPtr,
493                                XML_Bool haveMore, XML_Bool allowClosingDoctype,
494                                enum XML_Account account);
495 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
496                                     XML_Bool betweenDecl, enum EntityType type);
497 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
498                                 const ENCODING *enc, const char *start,
499                                 const char *end, const char **endPtr,
500                                 XML_Bool haveMore, enum XML_Account account);
501 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
502                                      const char **startPtr, const char *end,
503                                      const char **nextPtr, XML_Bool haveMore,
504                                      enum XML_Account account);
505 #ifdef XML_DTD
506 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
507                                       const char **startPtr, const char *end,
508                                       const char **nextPtr, XML_Bool haveMore);
509 #endif /* XML_DTD */
510 
511 static void freeBindings(XML_Parser parser, BINDING *bindings);
512 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
513                                 const char *attStr, TAG_NAME *tagNamePtr,
514                                 BINDING **bindingsPtr,
515                                 enum XML_Account account);
516 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
517                                  const ATTRIBUTE_ID *attId, const XML_Char *uri,
518                                  BINDING **bindingsPtr);
519 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
520                            XML_Bool isCdata, XML_Bool isId,
521                            const XML_Char *value, XML_Parser parser);
522 static enum XML_Error storeAttributeValue(XML_Parser parser,
523                                           const ENCODING *enc, XML_Bool isCdata,
524                                           const char *ptr, const char *end,
525                                           STRING_POOL *pool,
526                                           enum XML_Account account);
527 static enum XML_Error
528 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
529                      const char *ptr, const char *end, STRING_POOL *pool,
530                      enum XML_Account account, const char **nextPtr);
531 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
532                                     const char *start, const char *end);
533 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
534 #if XML_GE == 1
535 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
536                                        const char *start, const char *end,
537                                        enum XML_Account account,
538                                        const char **nextPtr);
539 static enum XML_Error callStoreEntityValue(XML_Parser parser,
540                                            const ENCODING *enc,
541                                            const char *start, const char *end,
542                                            enum XML_Account account);
543 #else
544 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
545 #endif
546 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
547                                        const char *start, const char *end);
548 static int reportComment(XML_Parser parser, const ENCODING *enc,
549                          const char *start, const char *end);
550 static void reportDefault(XML_Parser parser, const ENCODING *enc,
551                           const char *start, const char *end);
552 
553 static const XML_Char *getContext(XML_Parser parser);
554 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
555 
556 static void FASTCALL normalizePublicId(XML_Char *s);
557 
558 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
559 /* do not call if m_parentParser != NULL */
560 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
561 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
562                        const XML_Memory_Handling_Suite *ms);
563 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
564                    const XML_Memory_Handling_Suite *ms);
565 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
566                            STRING_POOL *newPool, const HASH_TABLE *oldTable);
567 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
568                      size_t createSize);
569 static void FASTCALL hashTableInit(HASH_TABLE *table,
570                                    const XML_Memory_Handling_Suite *ms);
571 static void FASTCALL hashTableClear(HASH_TABLE *table);
572 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
573 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
574                                        const HASH_TABLE *table);
575 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
576 
577 static void FASTCALL poolInit(STRING_POOL *pool,
578                               const XML_Memory_Handling_Suite *ms);
579 static void FASTCALL poolClear(STRING_POOL *pool);
580 static void FASTCALL poolDestroy(STRING_POOL *pool);
581 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
582                             const char *ptr, const char *end);
583 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
584                                  const char *ptr, const char *end);
585 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
586 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
587                                                const XML_Char *s);
588 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
589                                        int n);
590 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
591                                                  const XML_Char *s);
592 
593 static int FASTCALL nextScaffoldPart(XML_Parser parser);
594 static XML_Content *build_model(XML_Parser parser);
595 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
596                                     const char *ptr, const char *end);
597 
598 static XML_Char *copyString(const XML_Char *s,
599                             const XML_Memory_Handling_Suite *memsuite);
600 
601 static unsigned long generate_hash_secret_salt(XML_Parser parser);
602 static XML_Bool startParsing(XML_Parser parser);
603 
604 static XML_Parser parserCreate(const XML_Char *encodingName,
605                                const XML_Memory_Handling_Suite *memsuite,
606                                const XML_Char *nameSep, DTD *dtd);
607 
608 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
609 
610 #if XML_GE == 1
611 static float accountingGetCurrentAmplification(XML_Parser rootParser);
612 static void accountingReportStats(XML_Parser originParser, const char *epilog);
613 static void accountingOnAbort(XML_Parser originParser);
614 static void accountingReportDiff(XML_Parser rootParser,
615                                  unsigned int levelsAwayFromRootParser,
616                                  const char *before, const char *after,
617                                  ptrdiff_t bytesMore, int source_line,
618                                  enum XML_Account account);
619 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
620                                         const char *before, const char *after,
621                                         int source_line,
622                                         enum XML_Account account);
623 
624 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
625                                       const char *action, int sourceLine);
626 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
627                                  int sourceLine);
628 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
629                                   int sourceLine);
630 
631 static XML_Parser getRootParserOf(XML_Parser parser,
632                                   unsigned int *outLevelDiff);
633 #endif /* XML_GE == 1 */
634 
635 static unsigned long getDebugLevel(const char *variableName,
636                                    unsigned long defaultDebugLevel);
637 
638 #define poolStart(pool) ((pool)->start)
639 #define poolLength(pool) ((pool)->ptr - (pool)->start)
640 #define poolChop(pool) ((void)--(pool->ptr))
641 #define poolLastChar(pool) (((pool)->ptr)[-1])
642 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
643 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
644 #define poolAppendChar(pool, c)                                                \
645   (((pool)->ptr == (pool)->end && ! poolGrow(pool))                            \
646        ? 0                                                                     \
647        : ((*((pool)->ptr)++ = c), 1))
648 
649 #if ! defined(XML_TESTING)
650 const
651 #endif
652     XML_Bool g_reparseDeferralEnabledDefault
653     = XML_TRUE; // write ONLY in runtests.c
654 #if defined(XML_TESTING)
655 unsigned int g_bytesScanned = 0; // used for testing only
656 #endif
657 
658 struct XML_ParserStruct {
659   /* The first member must be m_userData so that the XML_GetUserData
660      macro works. */
661   void *m_userData;
662   void *m_handlerArg;
663 
664   // How the four parse buffer pointers below relate in time and space:
665   //
666   //   m_buffer <= m_bufferPtr <= m_bufferEnd  <= m_bufferLim
667   //   |           |              |               |
668   //   <--parsed-->|              |               |
669   //               <---parsing--->|               |
670   //                              <--unoccupied-->|
671   //   <---------total-malloced/realloced-------->|
672 
673   char *m_buffer; // malloc/realloc base pointer of parse buffer
674   const XML_Memory_Handling_Suite m_mem;
675   const char *m_bufferPtr; // first character to be parsed
676   char *m_bufferEnd;       // past last character to be parsed
677   const char *m_bufferLim; // allocated end of m_buffer
678 
679   XML_Index m_parseEndByteIndex;
680   const char *m_parseEndPtr;
681   size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
682   XML_Bool m_reparseDeferralEnabled;
683   int m_lastBufferRequestSize;
684   XML_Char *m_dataBuf;
685   XML_Char *m_dataBufEnd;
686   XML_StartElementHandler m_startElementHandler;
687   XML_EndElementHandler m_endElementHandler;
688   XML_CharacterDataHandler m_characterDataHandler;
689   XML_ProcessingInstructionHandler m_processingInstructionHandler;
690   XML_CommentHandler m_commentHandler;
691   XML_StartCdataSectionHandler m_startCdataSectionHandler;
692   XML_EndCdataSectionHandler m_endCdataSectionHandler;
693   XML_DefaultHandler m_defaultHandler;
694   XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
695   XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
696   XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
697   XML_NotationDeclHandler m_notationDeclHandler;
698   XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
699   XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
700   XML_NotStandaloneHandler m_notStandaloneHandler;
701   XML_ExternalEntityRefHandler m_externalEntityRefHandler;
702   XML_Parser m_externalEntityRefHandlerArg;
703   XML_SkippedEntityHandler m_skippedEntityHandler;
704   XML_UnknownEncodingHandler m_unknownEncodingHandler;
705   XML_ElementDeclHandler m_elementDeclHandler;
706   XML_AttlistDeclHandler m_attlistDeclHandler;
707   XML_EntityDeclHandler m_entityDeclHandler;
708   XML_XmlDeclHandler m_xmlDeclHandler;
709   const ENCODING *m_encoding;
710   INIT_ENCODING m_initEncoding;
711   const ENCODING *m_internalEncoding;
712   const XML_Char *m_protocolEncodingName;
713   XML_Bool m_ns;
714   XML_Bool m_ns_triplets;
715   void *m_unknownEncodingMem;
716   void *m_unknownEncodingData;
717   void *m_unknownEncodingHandlerData;
718   void(XMLCALL *m_unknownEncodingRelease)(void *);
719   PROLOG_STATE m_prologState;
720   Processor *m_processor;
721   enum XML_Error m_errorCode;
722   const char *m_eventPtr;
723   const char *m_eventEndPtr;
724   const char *m_positionPtr;
725   OPEN_INTERNAL_ENTITY *m_openInternalEntities;
726   OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
727   OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
728   OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
729   OPEN_INTERNAL_ENTITY *m_openValueEntities;
730   OPEN_INTERNAL_ENTITY *m_freeValueEntities;
731   XML_Bool m_defaultExpandInternalEntities;
732   int m_tagLevel;
733   ENTITY *m_declEntity;
734   const XML_Char *m_doctypeName;
735   const XML_Char *m_doctypeSysid;
736   const XML_Char *m_doctypePubid;
737   const XML_Char *m_declAttributeType;
738   const XML_Char *m_declNotationName;
739   const XML_Char *m_declNotationPublicId;
740   ELEMENT_TYPE *m_declElementType;
741   ATTRIBUTE_ID *m_declAttributeId;
742   XML_Bool m_declAttributeIsCdata;
743   XML_Bool m_declAttributeIsId;
744   DTD *m_dtd;
745   const XML_Char *m_curBase;
746   TAG *m_tagStack;
747   TAG *m_freeTagList;
748   BINDING *m_inheritedBindings;
749   BINDING *m_freeBindingList;
750   int m_attsSize;
751   int m_nSpecifiedAtts;
752   int m_idAttIndex;
753   ATTRIBUTE *m_atts;
754   NS_ATT *m_nsAtts;
755   unsigned long m_nsAttsVersion;
756   unsigned char m_nsAttsPower;
757 #ifdef XML_ATTR_INFO
758   XML_AttrInfo *m_attInfo;
759 #endif
760   POSITION m_position;
761   STRING_POOL m_tempPool;
762   STRING_POOL m_temp2Pool;
763   char *m_groupConnector;
764   unsigned int m_groupSize;
765   XML_Char m_namespaceSeparator;
766   XML_Parser m_parentParser;
767   XML_ParsingStatus m_parsingStatus;
768 #ifdef XML_DTD
769   XML_Bool m_isParamEntity;
770   XML_Bool m_useForeignDTD;
771   enum XML_ParamEntityParsing m_paramEntityParsing;
772 #endif
773   unsigned long m_hash_secret_salt;
774 #if XML_GE == 1
775   ACCOUNTING m_accounting;
776   ENTITY_STATS m_entity_stats;
777 #endif
778   XML_Bool m_reenter;
779 };
780 
781 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
782 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
783 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
784 
785 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)786 XML_ParserCreate(const XML_Char *encodingName) {
787   return XML_ParserCreate_MM(encodingName, NULL, NULL);
788 }
789 
790 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)791 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
792   XML_Char tmp[2] = {nsSep, 0};
793   return XML_ParserCreate_MM(encodingName, NULL, tmp);
794 }
795 
796 // "xml=http://www.w3.org/XML/1998/namespace"
797 static const XML_Char implicitContext[]
798     = {ASCII_x,     ASCII_m,     ASCII_l,      ASCII_EQUALS, ASCII_h,
799        ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,  ASCII_SLASH,
800        ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,      ASCII_PERIOD,
801        ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,      ASCII_r,
802        ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,      ASCII_L,
803        ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,      ASCII_8,
804        ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,      ASCII_e,
805        ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,      ASCII_e,
806        '\0'};
807 
808 /* To avoid warnings about unused functions: */
809 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
810 
811 #  if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
812 
813 /* Obtain entropy on Linux 3.17+ */
814 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)815 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
816   int success = 0; /* full count bytes written? */
817   size_t bytesWrittenTotal = 0;
818   const unsigned int getrandomFlags = GRND_NONBLOCK;
819 
820   do {
821     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
822     const size_t bytesToWrite = count - bytesWrittenTotal;
823 
824     const int bytesWrittenMore =
825 #    if defined(HAVE_GETRANDOM)
826         getrandom(currentTarget, bytesToWrite, getrandomFlags);
827 #    else
828         syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
829 #    endif
830 
831     if (bytesWrittenMore > 0) {
832       bytesWrittenTotal += bytesWrittenMore;
833       if (bytesWrittenTotal >= count)
834         success = 1;
835     }
836   } while (! success && (errno == EINTR));
837 
838   return success;
839 }
840 
841 #  endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
842 
843 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
844 
845 /* Extract entropy from /dev/urandom */
846 static int
writeRandomBytes_dev_urandom(void * target,size_t count)847 writeRandomBytes_dev_urandom(void *target, size_t count) {
848   int success = 0; /* full count bytes written? */
849   size_t bytesWrittenTotal = 0;
850 
851   const int fd = open("/dev/urandom", O_RDONLY);
852   if (fd < 0) {
853     return 0;
854   }
855 
856   do {
857     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
858     const size_t bytesToWrite = count - bytesWrittenTotal;
859 
860     const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
861 
862     if (bytesWrittenMore > 0) {
863       bytesWrittenTotal += bytesWrittenMore;
864       if (bytesWrittenTotal >= count)
865         success = 1;
866     }
867   } while (! success && (errno == EINTR));
868 
869   close(fd);
870   return success;
871 }
872 
873 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
874 
875 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
876 
877 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
878 
879 static void
writeRandomBytes_arc4random(void * target,size_t count)880 writeRandomBytes_arc4random(void *target, size_t count) {
881   size_t bytesWrittenTotal = 0;
882 
883   while (bytesWrittenTotal < count) {
884     const uint32_t random32 = arc4random();
885     size_t i = 0;
886 
887     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
888          i++, bytesWrittenTotal++) {
889       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
890       ((uint8_t *)target)[bytesWrittenTotal] = random8;
891     }
892   }
893 }
894 
895 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
896 
897 #ifdef _WIN32
898 
899 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
900    as it didn't declare it in its header prior to version 5.3.0 of its
901    runtime package (mingwrt, containing stdlib.h).  The upstream fix
902    was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
903 #  if defined(__MINGW32__) && defined(__MINGW32_VERSION)                       \
904       && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
905 __declspec(dllimport) int rand_s(unsigned int *);
906 #  endif
907 
908 /* Obtain entropy on Windows using the rand_s() function which
909  * generates cryptographically secure random numbers.  Internally it
910  * uses RtlGenRandom API which is present in Windows XP and later.
911  */
912 static int
writeRandomBytes_rand_s(void * target,size_t count)913 writeRandomBytes_rand_s(void *target, size_t count) {
914   size_t bytesWrittenTotal = 0;
915 
916   while (bytesWrittenTotal < count) {
917     unsigned int random32 = 0;
918     size_t i = 0;
919 
920     if (rand_s(&random32))
921       return 0; /* failure */
922 
923     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
924          i++, bytesWrittenTotal++) {
925       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
926       ((uint8_t *)target)[bytesWrittenTotal] = random8;
927     }
928   }
929   return 1; /* success */
930 }
931 
932 #endif /* _WIN32 */
933 
934 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
935 
936 static unsigned long
gather_time_entropy(void)937 gather_time_entropy(void) {
938 #  ifdef _WIN32
939   FILETIME ft;
940   GetSystemTimeAsFileTime(&ft); /* never fails */
941   return ft.dwHighDateTime ^ ft.dwLowDateTime;
942 #  else
943   struct timeval tv;
944   int gettimeofday_res;
945 
946   gettimeofday_res = gettimeofday(&tv, NULL);
947 
948 #    if defined(NDEBUG)
949   (void)gettimeofday_res;
950 #    else
951   assert(gettimeofday_res == 0);
952 #    endif /* defined(NDEBUG) */
953 
954   /* Microseconds time is <20 bits entropy */
955   return tv.tv_usec;
956 #  endif
957 }
958 
959 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
960 
961 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)962 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
963   if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
964     fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
965             (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
966   }
967   return entropy;
968 }
969 
970 static unsigned long
generate_hash_secret_salt(XML_Parser parser)971 generate_hash_secret_salt(XML_Parser parser) {
972   unsigned long entropy;
973   (void)parser;
974 
975   /* "Failproof" high quality providers: */
976 #if defined(HAVE_ARC4RANDOM_BUF)
977   arc4random_buf(&entropy, sizeof(entropy));
978   return ENTROPY_DEBUG("arc4random_buf", entropy);
979 #elif defined(HAVE_ARC4RANDOM)
980   writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
981   return ENTROPY_DEBUG("arc4random", entropy);
982 #else
983   /* Try high quality providers first .. */
984 #  ifdef _WIN32
985   if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
986     return ENTROPY_DEBUG("rand_s", entropy);
987   }
988 #  elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
989   if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
990     return ENTROPY_DEBUG("getrandom", entropy);
991   }
992 #  endif
993 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
994   if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
995     return ENTROPY_DEBUG("/dev/urandom", entropy);
996   }
997 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
998   /* .. and self-made low quality for backup: */
999 
1000   /* Process ID is 0 bits entropy if attacker has local access */
1001   entropy = gather_time_entropy() ^ getpid();
1002 
1003   /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1004   if (sizeof(unsigned long) == 4) {
1005     return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
1006   } else {
1007     return ENTROPY_DEBUG("fallback(8)",
1008                          entropy * (unsigned long)2305843009213693951ULL);
1009   }
1010 #endif
1011 }
1012 
1013 static unsigned long
get_hash_secret_salt(XML_Parser parser)1014 get_hash_secret_salt(XML_Parser parser) {
1015   if (parser->m_parentParser != NULL)
1016     return get_hash_secret_salt(parser->m_parentParser);
1017   return parser->m_hash_secret_salt;
1018 }
1019 
1020 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1021 callProcessor(XML_Parser parser, const char *start, const char *end,
1022               const char **endPtr) {
1023   const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1024 
1025   if (parser->m_reparseDeferralEnabled
1026       && ! parser->m_parsingStatus.finalBuffer) {
1027     // Heuristic: don't try to parse a partial token again until the amount of
1028     // available data has increased significantly.
1029     const size_t had_before = parser->m_partialTokenBytesBefore;
1030     // ...but *do* try anyway if we're close to causing a reallocation.
1031     size_t available_buffer
1032         = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1033 #if XML_CONTEXT_BYTES > 0
1034     available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1035 #endif
1036     available_buffer
1037         += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1038     // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1039     const bool enough
1040         = (have_now >= 2 * had_before)
1041           || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1042 
1043     if (! enough) {
1044       *endPtr = start; // callers may expect this to be set
1045       return XML_ERROR_NONE;
1046     }
1047   }
1048 #if defined(XML_TESTING)
1049   g_bytesScanned += (unsigned)have_now;
1050 #endif
1051   // Run in a loop to eliminate dangerous recursion depths
1052   enum XML_Error ret;
1053   *endPtr = start;
1054   while (1) {
1055     // Use endPtr as the new start in each iteration, since it will
1056     // be set to the next start point by m_processor.
1057     ret = parser->m_processor(parser, *endPtr, end, endPtr);
1058 
1059     // Make parsing status (and in particular XML_SUSPENDED) take
1060     // precedence over re-enter flag when they disagree
1061     if (parser->m_parsingStatus.parsing != XML_PARSING) {
1062       parser->m_reenter = XML_FALSE;
1063     }
1064 
1065     if (! parser->m_reenter) {
1066       break;
1067     }
1068 
1069     parser->m_reenter = XML_FALSE;
1070     if (ret != XML_ERROR_NONE)
1071       return ret;
1072   }
1073 
1074   if (ret == XML_ERROR_NONE) {
1075     // if we consumed nothing, remember what we had on this parse attempt.
1076     if (*endPtr == start) {
1077       parser->m_partialTokenBytesBefore = have_now;
1078     } else {
1079       parser->m_partialTokenBytesBefore = 0;
1080     }
1081   }
1082   return ret;
1083 }
1084 
1085 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1086 startParsing(XML_Parser parser) {
1087   /* hash functions must be initialized before setContext() is called */
1088   if (parser->m_hash_secret_salt == 0)
1089     parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1090   if (parser->m_ns) {
1091     /* implicit context only set for root parser, since child
1092        parsers (i.e. external entity parsers) will inherit it
1093     */
1094     return setContext(parser, implicitContext);
1095   }
1096   return XML_TRUE;
1097 }
1098 
1099 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1100 XML_ParserCreate_MM(const XML_Char *encodingName,
1101                     const XML_Memory_Handling_Suite *memsuite,
1102                     const XML_Char *nameSep) {
1103   return parserCreate(encodingName, memsuite, nameSep, NULL);
1104 }
1105 
1106 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1107 parserCreate(const XML_Char *encodingName,
1108              const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1109              DTD *dtd) {
1110   XML_Parser parser;
1111 
1112   if (memsuite) {
1113     XML_Memory_Handling_Suite *mtemp;
1114     parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1115     if (parser != NULL) {
1116       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1117       mtemp->malloc_fcn = memsuite->malloc_fcn;
1118       mtemp->realloc_fcn = memsuite->realloc_fcn;
1119       mtemp->free_fcn = memsuite->free_fcn;
1120     }
1121   } else {
1122     XML_Memory_Handling_Suite *mtemp;
1123     parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1124     if (parser != NULL) {
1125       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1126       mtemp->malloc_fcn = malloc;
1127       mtemp->realloc_fcn = realloc;
1128       mtemp->free_fcn = free;
1129     }
1130   }
1131 
1132   if (! parser)
1133     return parser;
1134 
1135   parser->m_buffer = NULL;
1136   parser->m_bufferLim = NULL;
1137 
1138   parser->m_attsSize = INIT_ATTS_SIZE;
1139   parser->m_atts
1140       = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1141   if (parser->m_atts == NULL) {
1142     FREE(parser, parser);
1143     return NULL;
1144   }
1145 #ifdef XML_ATTR_INFO
1146   parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1147       parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1148   if (parser->m_attInfo == NULL) {
1149     FREE(parser, parser->m_atts);
1150     FREE(parser, parser);
1151     return NULL;
1152   }
1153 #endif
1154   parser->m_dataBuf
1155       = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1156   if (parser->m_dataBuf == NULL) {
1157     FREE(parser, parser->m_atts);
1158 #ifdef XML_ATTR_INFO
1159     FREE(parser, parser->m_attInfo);
1160 #endif
1161     FREE(parser, parser);
1162     return NULL;
1163   }
1164   parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1165 
1166   if (dtd)
1167     parser->m_dtd = dtd;
1168   else {
1169     parser->m_dtd = dtdCreate(&parser->m_mem);
1170     if (parser->m_dtd == NULL) {
1171       FREE(parser, parser->m_dataBuf);
1172       FREE(parser, parser->m_atts);
1173 #ifdef XML_ATTR_INFO
1174       FREE(parser, parser->m_attInfo);
1175 #endif
1176       FREE(parser, parser);
1177       return NULL;
1178     }
1179   }
1180 
1181   parser->m_freeBindingList = NULL;
1182   parser->m_freeTagList = NULL;
1183   parser->m_freeInternalEntities = NULL;
1184   parser->m_freeAttributeEntities = NULL;
1185   parser->m_freeValueEntities = NULL;
1186 
1187   parser->m_groupSize = 0;
1188   parser->m_groupConnector = NULL;
1189 
1190   parser->m_unknownEncodingHandler = NULL;
1191   parser->m_unknownEncodingHandlerData = NULL;
1192 
1193   parser->m_namespaceSeparator = ASCII_EXCL;
1194   parser->m_ns = XML_FALSE;
1195   parser->m_ns_triplets = XML_FALSE;
1196 
1197   parser->m_nsAtts = NULL;
1198   parser->m_nsAttsVersion = 0;
1199   parser->m_nsAttsPower = 0;
1200 
1201   parser->m_protocolEncodingName = NULL;
1202 
1203   poolInit(&parser->m_tempPool, &(parser->m_mem));
1204   poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1205   parserInit(parser, encodingName);
1206 
1207   if (encodingName && ! parser->m_protocolEncodingName) {
1208     if (dtd) {
1209       // We need to stop the upcoming call to XML_ParserFree from happily
1210       // destroying parser->m_dtd because the DTD is shared with the parent
1211       // parser and the only guard that keeps XML_ParserFree from destroying
1212       // parser->m_dtd is parser->m_isParamEntity but it will be set to
1213       // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1214       parser->m_dtd = NULL;
1215     }
1216     XML_ParserFree(parser);
1217     return NULL;
1218   }
1219 
1220   if (nameSep) {
1221     parser->m_ns = XML_TRUE;
1222     parser->m_internalEncoding = XmlGetInternalEncodingNS();
1223     parser->m_namespaceSeparator = *nameSep;
1224   } else {
1225     parser->m_internalEncoding = XmlGetInternalEncoding();
1226   }
1227 
1228   return parser;
1229 }
1230 
1231 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1232 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1233   parser->m_processor = prologInitProcessor;
1234   XmlPrologStateInit(&parser->m_prologState);
1235   if (encodingName != NULL) {
1236     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1237   }
1238   parser->m_curBase = NULL;
1239   XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1240   parser->m_userData = NULL;
1241   parser->m_handlerArg = NULL;
1242   parser->m_startElementHandler = NULL;
1243   parser->m_endElementHandler = NULL;
1244   parser->m_characterDataHandler = NULL;
1245   parser->m_processingInstructionHandler = NULL;
1246   parser->m_commentHandler = NULL;
1247   parser->m_startCdataSectionHandler = NULL;
1248   parser->m_endCdataSectionHandler = NULL;
1249   parser->m_defaultHandler = NULL;
1250   parser->m_startDoctypeDeclHandler = NULL;
1251   parser->m_endDoctypeDeclHandler = NULL;
1252   parser->m_unparsedEntityDeclHandler = NULL;
1253   parser->m_notationDeclHandler = NULL;
1254   parser->m_startNamespaceDeclHandler = NULL;
1255   parser->m_endNamespaceDeclHandler = NULL;
1256   parser->m_notStandaloneHandler = NULL;
1257   parser->m_externalEntityRefHandler = NULL;
1258   parser->m_externalEntityRefHandlerArg = parser;
1259   parser->m_skippedEntityHandler = NULL;
1260   parser->m_elementDeclHandler = NULL;
1261   parser->m_attlistDeclHandler = NULL;
1262   parser->m_entityDeclHandler = NULL;
1263   parser->m_xmlDeclHandler = NULL;
1264   parser->m_bufferPtr = parser->m_buffer;
1265   parser->m_bufferEnd = parser->m_buffer;
1266   parser->m_parseEndByteIndex = 0;
1267   parser->m_parseEndPtr = NULL;
1268   parser->m_partialTokenBytesBefore = 0;
1269   parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1270   parser->m_lastBufferRequestSize = 0;
1271   parser->m_declElementType = NULL;
1272   parser->m_declAttributeId = NULL;
1273   parser->m_declEntity = NULL;
1274   parser->m_doctypeName = NULL;
1275   parser->m_doctypeSysid = NULL;
1276   parser->m_doctypePubid = NULL;
1277   parser->m_declAttributeType = NULL;
1278   parser->m_declNotationName = NULL;
1279   parser->m_declNotationPublicId = NULL;
1280   parser->m_declAttributeIsCdata = XML_FALSE;
1281   parser->m_declAttributeIsId = XML_FALSE;
1282   memset(&parser->m_position, 0, sizeof(POSITION));
1283   parser->m_errorCode = XML_ERROR_NONE;
1284   parser->m_eventPtr = NULL;
1285   parser->m_eventEndPtr = NULL;
1286   parser->m_positionPtr = NULL;
1287   parser->m_openInternalEntities = NULL;
1288   parser->m_openAttributeEntities = NULL;
1289   parser->m_openValueEntities = NULL;
1290   parser->m_defaultExpandInternalEntities = XML_TRUE;
1291   parser->m_tagLevel = 0;
1292   parser->m_tagStack = NULL;
1293   parser->m_inheritedBindings = NULL;
1294   parser->m_nSpecifiedAtts = 0;
1295   parser->m_unknownEncodingMem = NULL;
1296   parser->m_unknownEncodingRelease = NULL;
1297   parser->m_unknownEncodingData = NULL;
1298   parser->m_parentParser = NULL;
1299   parser->m_parsingStatus.parsing = XML_INITIALIZED;
1300   // Reentry can only be triggered inside m_processor calls
1301   parser->m_reenter = XML_FALSE;
1302 #ifdef XML_DTD
1303   parser->m_isParamEntity = XML_FALSE;
1304   parser->m_useForeignDTD = XML_FALSE;
1305   parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1306 #endif
1307   parser->m_hash_secret_salt = 0;
1308 
1309 #if XML_GE == 1
1310   memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1311   parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1312   parser->m_accounting.maximumAmplificationFactor
1313       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1314   parser->m_accounting.activationThresholdBytes
1315       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1316 
1317   memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1318   parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1319 #endif
1320 }
1321 
1322 /* moves list of bindings to m_freeBindingList */
1323 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1324 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1325   while (bindings) {
1326     BINDING *b = bindings;
1327     bindings = bindings->nextTagBinding;
1328     b->nextTagBinding = parser->m_freeBindingList;
1329     parser->m_freeBindingList = b;
1330   }
1331 }
1332 
1333 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1334 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1335   TAG *tStk;
1336   OPEN_INTERNAL_ENTITY *openEntityList;
1337 
1338   if (parser == NULL)
1339     return XML_FALSE;
1340 
1341   if (parser->m_parentParser)
1342     return XML_FALSE;
1343   /* move m_tagStack to m_freeTagList */
1344   tStk = parser->m_tagStack;
1345   while (tStk) {
1346     TAG *tag = tStk;
1347     tStk = tStk->parent;
1348     tag->parent = parser->m_freeTagList;
1349     moveToFreeBindingList(parser, tag->bindings);
1350     tag->bindings = NULL;
1351     parser->m_freeTagList = tag;
1352   }
1353   /* move m_openInternalEntities to m_freeInternalEntities */
1354   openEntityList = parser->m_openInternalEntities;
1355   while (openEntityList) {
1356     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1357     openEntityList = openEntity->next;
1358     openEntity->next = parser->m_freeInternalEntities;
1359     parser->m_freeInternalEntities = openEntity;
1360   }
1361   /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1362    * for attributes) */
1363   openEntityList = parser->m_openAttributeEntities;
1364   while (openEntityList) {
1365     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1366     openEntityList = openEntity->next;
1367     openEntity->next = parser->m_freeAttributeEntities;
1368     parser->m_freeAttributeEntities = openEntity;
1369   }
1370   /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1371    * for value entities) */
1372   openEntityList = parser->m_openValueEntities;
1373   while (openEntityList) {
1374     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1375     openEntityList = openEntity->next;
1376     openEntity->next = parser->m_freeValueEntities;
1377     parser->m_freeValueEntities = openEntity;
1378   }
1379   moveToFreeBindingList(parser, parser->m_inheritedBindings);
1380   FREE(parser, parser->m_unknownEncodingMem);
1381   if (parser->m_unknownEncodingRelease)
1382     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1383   poolClear(&parser->m_tempPool);
1384   poolClear(&parser->m_temp2Pool);
1385   FREE(parser, (void *)parser->m_protocolEncodingName);
1386   parser->m_protocolEncodingName = NULL;
1387   parserInit(parser, encodingName);
1388   dtdReset(parser->m_dtd, &parser->m_mem);
1389   return XML_TRUE;
1390 }
1391 
1392 static XML_Bool
parserBusy(XML_Parser parser)1393 parserBusy(XML_Parser parser) {
1394   switch (parser->m_parsingStatus.parsing) {
1395   case XML_PARSING:
1396   case XML_SUSPENDED:
1397     return XML_TRUE;
1398   case XML_INITIALIZED:
1399   case XML_FINISHED:
1400   default:
1401     return XML_FALSE;
1402   }
1403 }
1404 
1405 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1406 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1407   if (parser == NULL)
1408     return XML_STATUS_ERROR;
1409   /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1410      XXX There's no way for the caller to determine which of the
1411      XXX possible error cases caused the XML_STATUS_ERROR return.
1412   */
1413   if (parserBusy(parser))
1414     return XML_STATUS_ERROR;
1415 
1416   /* Get rid of any previous encoding name */
1417   FREE(parser, (void *)parser->m_protocolEncodingName);
1418 
1419   if (encodingName == NULL)
1420     /* No new encoding name */
1421     parser->m_protocolEncodingName = NULL;
1422   else {
1423     /* Copy the new encoding name into allocated memory */
1424     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1425     if (! parser->m_protocolEncodingName)
1426       return XML_STATUS_ERROR;
1427   }
1428   return XML_STATUS_OK;
1429 }
1430 
1431 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1432 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1433                                const XML_Char *encodingName) {
1434   XML_Parser parser = oldParser;
1435   DTD *newDtd = NULL;
1436   DTD *oldDtd;
1437   XML_StartElementHandler oldStartElementHandler;
1438   XML_EndElementHandler oldEndElementHandler;
1439   XML_CharacterDataHandler oldCharacterDataHandler;
1440   XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1441   XML_CommentHandler oldCommentHandler;
1442   XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1443   XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1444   XML_DefaultHandler oldDefaultHandler;
1445   XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1446   XML_NotationDeclHandler oldNotationDeclHandler;
1447   XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1448   XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1449   XML_NotStandaloneHandler oldNotStandaloneHandler;
1450   XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1451   XML_SkippedEntityHandler oldSkippedEntityHandler;
1452   XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1453   XML_ElementDeclHandler oldElementDeclHandler;
1454   XML_AttlistDeclHandler oldAttlistDeclHandler;
1455   XML_EntityDeclHandler oldEntityDeclHandler;
1456   XML_XmlDeclHandler oldXmlDeclHandler;
1457   ELEMENT_TYPE *oldDeclElementType;
1458 
1459   void *oldUserData;
1460   void *oldHandlerArg;
1461   XML_Bool oldDefaultExpandInternalEntities;
1462   XML_Parser oldExternalEntityRefHandlerArg;
1463 #ifdef XML_DTD
1464   enum XML_ParamEntityParsing oldParamEntityParsing;
1465   int oldInEntityValue;
1466 #endif
1467   XML_Bool oldns_triplets;
1468   /* Note that the new parser shares the same hash secret as the old
1469      parser, so that dtdCopy and copyEntityTable can lookup values
1470      from hash tables associated with either parser without us having
1471      to worry which hash secrets each table has.
1472   */
1473   unsigned long oldhash_secret_salt;
1474   XML_Bool oldReparseDeferralEnabled;
1475 
1476   /* Validate the oldParser parameter before we pull everything out of it */
1477   if (oldParser == NULL)
1478     return NULL;
1479 
1480   /* Stash the original parser contents on the stack */
1481   oldDtd = parser->m_dtd;
1482   oldStartElementHandler = parser->m_startElementHandler;
1483   oldEndElementHandler = parser->m_endElementHandler;
1484   oldCharacterDataHandler = parser->m_characterDataHandler;
1485   oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1486   oldCommentHandler = parser->m_commentHandler;
1487   oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1488   oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1489   oldDefaultHandler = parser->m_defaultHandler;
1490   oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1491   oldNotationDeclHandler = parser->m_notationDeclHandler;
1492   oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1493   oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1494   oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1495   oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1496   oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1497   oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1498   oldElementDeclHandler = parser->m_elementDeclHandler;
1499   oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1500   oldEntityDeclHandler = parser->m_entityDeclHandler;
1501   oldXmlDeclHandler = parser->m_xmlDeclHandler;
1502   oldDeclElementType = parser->m_declElementType;
1503 
1504   oldUserData = parser->m_userData;
1505   oldHandlerArg = parser->m_handlerArg;
1506   oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1507   oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1508 #ifdef XML_DTD
1509   oldParamEntityParsing = parser->m_paramEntityParsing;
1510   oldInEntityValue = parser->m_prologState.inEntityValue;
1511 #endif
1512   oldns_triplets = parser->m_ns_triplets;
1513   /* Note that the new parser shares the same hash secret as the old
1514      parser, so that dtdCopy and copyEntityTable can lookup values
1515      from hash tables associated with either parser without us having
1516      to worry which hash secrets each table has.
1517   */
1518   oldhash_secret_salt = parser->m_hash_secret_salt;
1519   oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1520 
1521 #ifdef XML_DTD
1522   if (! context)
1523     newDtd = oldDtd;
1524 #endif /* XML_DTD */
1525 
1526   /* Note that the magical uses of the pre-processor to make field
1527      access look more like C++ require that `parser' be overwritten
1528      here.  This makes this function more painful to follow than it
1529      would be otherwise.
1530   */
1531   if (parser->m_ns) {
1532     XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1533     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1534   } else {
1535     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1536   }
1537 
1538   if (! parser)
1539     return NULL;
1540 
1541   parser->m_startElementHandler = oldStartElementHandler;
1542   parser->m_endElementHandler = oldEndElementHandler;
1543   parser->m_characterDataHandler = oldCharacterDataHandler;
1544   parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1545   parser->m_commentHandler = oldCommentHandler;
1546   parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1547   parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1548   parser->m_defaultHandler = oldDefaultHandler;
1549   parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1550   parser->m_notationDeclHandler = oldNotationDeclHandler;
1551   parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1552   parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1553   parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1554   parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1555   parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1556   parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1557   parser->m_elementDeclHandler = oldElementDeclHandler;
1558   parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1559   parser->m_entityDeclHandler = oldEntityDeclHandler;
1560   parser->m_xmlDeclHandler = oldXmlDeclHandler;
1561   parser->m_declElementType = oldDeclElementType;
1562   parser->m_userData = oldUserData;
1563   if (oldUserData == oldHandlerArg)
1564     parser->m_handlerArg = parser->m_userData;
1565   else
1566     parser->m_handlerArg = parser;
1567   if (oldExternalEntityRefHandlerArg != oldParser)
1568     parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1569   parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1570   parser->m_ns_triplets = oldns_triplets;
1571   parser->m_hash_secret_salt = oldhash_secret_salt;
1572   parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1573   parser->m_parentParser = oldParser;
1574 #ifdef XML_DTD
1575   parser->m_paramEntityParsing = oldParamEntityParsing;
1576   parser->m_prologState.inEntityValue = oldInEntityValue;
1577   if (context) {
1578 #endif /* XML_DTD */
1579     if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1580         || ! setContext(parser, context)) {
1581       XML_ParserFree(parser);
1582       return NULL;
1583     }
1584     parser->m_processor = externalEntityInitProcessor;
1585 #ifdef XML_DTD
1586   } else {
1587     /* The DTD instance referenced by parser->m_dtd is shared between the
1588        document's root parser and external PE parsers, therefore one does not
1589        need to call setContext. In addition, one also *must* not call
1590        setContext, because this would overwrite existing prefix->binding
1591        pointers in parser->m_dtd with ones that get destroyed with the external
1592        PE parser. This would leave those prefixes with dangling pointers.
1593     */
1594     parser->m_isParamEntity = XML_TRUE;
1595     XmlPrologStateInitExternalEntity(&parser->m_prologState);
1596     parser->m_processor = externalParEntInitProcessor;
1597   }
1598 #endif /* XML_DTD */
1599   return parser;
1600 }
1601 
1602 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1603 destroyBindings(BINDING *bindings, XML_Parser parser) {
1604   for (;;) {
1605     BINDING *b = bindings;
1606     if (! b)
1607       break;
1608     bindings = b->nextTagBinding;
1609     FREE(parser, b->uri);
1610     FREE(parser, b);
1611   }
1612 }
1613 
1614 void XMLCALL
XML_ParserFree(XML_Parser parser)1615 XML_ParserFree(XML_Parser parser) {
1616   TAG *tagList;
1617   OPEN_INTERNAL_ENTITY *entityList;
1618   if (parser == NULL)
1619     return;
1620   /* free m_tagStack and m_freeTagList */
1621   tagList = parser->m_tagStack;
1622   for (;;) {
1623     TAG *p;
1624     if (tagList == NULL) {
1625       if (parser->m_freeTagList == NULL)
1626         break;
1627       tagList = parser->m_freeTagList;
1628       parser->m_freeTagList = NULL;
1629     }
1630     p = tagList;
1631     tagList = tagList->parent;
1632     FREE(parser, p->buf);
1633     destroyBindings(p->bindings, parser);
1634     FREE(parser, p);
1635   }
1636   /* free m_openInternalEntities and m_freeInternalEntities */
1637   entityList = parser->m_openInternalEntities;
1638   for (;;) {
1639     OPEN_INTERNAL_ENTITY *openEntity;
1640     if (entityList == NULL) {
1641       if (parser->m_freeInternalEntities == NULL)
1642         break;
1643       entityList = parser->m_freeInternalEntities;
1644       parser->m_freeInternalEntities = NULL;
1645     }
1646     openEntity = entityList;
1647     entityList = entityList->next;
1648     FREE(parser, openEntity);
1649   }
1650   /* free m_openAttributeEntities and m_freeAttributeEntities */
1651   entityList = parser->m_openAttributeEntities;
1652   for (;;) {
1653     OPEN_INTERNAL_ENTITY *openEntity;
1654     if (entityList == NULL) {
1655       if (parser->m_freeAttributeEntities == NULL)
1656         break;
1657       entityList = parser->m_freeAttributeEntities;
1658       parser->m_freeAttributeEntities = NULL;
1659     }
1660     openEntity = entityList;
1661     entityList = entityList->next;
1662     FREE(parser, openEntity);
1663   }
1664   /* free m_openValueEntities and m_freeValueEntities */
1665   entityList = parser->m_openValueEntities;
1666   for (;;) {
1667     OPEN_INTERNAL_ENTITY *openEntity;
1668     if (entityList == NULL) {
1669       if (parser->m_freeValueEntities == NULL)
1670         break;
1671       entityList = parser->m_freeValueEntities;
1672       parser->m_freeValueEntities = NULL;
1673     }
1674     openEntity = entityList;
1675     entityList = entityList->next;
1676     FREE(parser, openEntity);
1677   }
1678   destroyBindings(parser->m_freeBindingList, parser);
1679   destroyBindings(parser->m_inheritedBindings, parser);
1680   poolDestroy(&parser->m_tempPool);
1681   poolDestroy(&parser->m_temp2Pool);
1682   FREE(parser, (void *)parser->m_protocolEncodingName);
1683 #ifdef XML_DTD
1684   /* external parameter entity parsers share the DTD structure
1685      parser->m_dtd with the root parser, so we must not destroy it
1686   */
1687   if (! parser->m_isParamEntity && parser->m_dtd)
1688 #else
1689   if (parser->m_dtd)
1690 #endif /* XML_DTD */
1691     dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1692                &parser->m_mem);
1693   FREE(parser, (void *)parser->m_atts);
1694 #ifdef XML_ATTR_INFO
1695   FREE(parser, (void *)parser->m_attInfo);
1696 #endif
1697   FREE(parser, parser->m_groupConnector);
1698   FREE(parser, parser->m_buffer);
1699   FREE(parser, parser->m_dataBuf);
1700   FREE(parser, parser->m_nsAtts);
1701   FREE(parser, parser->m_unknownEncodingMem);
1702   if (parser->m_unknownEncodingRelease)
1703     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1704   FREE(parser, parser);
1705 }
1706 
1707 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1708 XML_UseParserAsHandlerArg(XML_Parser parser) {
1709   if (parser != NULL)
1710     parser->m_handlerArg = parser;
1711 }
1712 
1713 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1714 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1715   if (parser == NULL)
1716     return XML_ERROR_INVALID_ARGUMENT;
1717 #ifdef XML_DTD
1718   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1719   if (parserBusy(parser))
1720     return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1721   parser->m_useForeignDTD = useDTD;
1722   return XML_ERROR_NONE;
1723 #else
1724   UNUSED_P(useDTD);
1725   return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1726 #endif
1727 }
1728 
1729 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1730 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1731   if (parser == NULL)
1732     return;
1733   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1734   if (parserBusy(parser))
1735     return;
1736   parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1737 }
1738 
1739 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1740 XML_SetUserData(XML_Parser parser, void *p) {
1741   if (parser == NULL)
1742     return;
1743   if (parser->m_handlerArg == parser->m_userData)
1744     parser->m_handlerArg = parser->m_userData = p;
1745   else
1746     parser->m_userData = p;
1747 }
1748 
1749 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1750 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1751   if (parser == NULL)
1752     return XML_STATUS_ERROR;
1753   if (p) {
1754     p = poolCopyString(&parser->m_dtd->pool, p);
1755     if (! p)
1756       return XML_STATUS_ERROR;
1757     parser->m_curBase = p;
1758   } else
1759     parser->m_curBase = NULL;
1760   return XML_STATUS_OK;
1761 }
1762 
1763 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1764 XML_GetBase(XML_Parser parser) {
1765   if (parser == NULL)
1766     return NULL;
1767   return parser->m_curBase;
1768 }
1769 
1770 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1771 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1772   if (parser == NULL)
1773     return -1;
1774   return parser->m_nSpecifiedAtts;
1775 }
1776 
1777 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1778 XML_GetIdAttributeIndex(XML_Parser parser) {
1779   if (parser == NULL)
1780     return -1;
1781   return parser->m_idAttIndex;
1782 }
1783 
1784 #ifdef XML_ATTR_INFO
1785 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1786 XML_GetAttributeInfo(XML_Parser parser) {
1787   if (parser == NULL)
1788     return NULL;
1789   return parser->m_attInfo;
1790 }
1791 #endif
1792 
1793 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1794 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1795                       XML_EndElementHandler end) {
1796   if (parser == NULL)
1797     return;
1798   parser->m_startElementHandler = start;
1799   parser->m_endElementHandler = end;
1800 }
1801 
1802 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1803 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1804   if (parser != NULL)
1805     parser->m_startElementHandler = start;
1806 }
1807 
1808 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1809 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1810   if (parser != NULL)
1811     parser->m_endElementHandler = end;
1812 }
1813 
1814 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1815 XML_SetCharacterDataHandler(XML_Parser parser,
1816                             XML_CharacterDataHandler handler) {
1817   if (parser != NULL)
1818     parser->m_characterDataHandler = handler;
1819 }
1820 
1821 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1822 XML_SetProcessingInstructionHandler(XML_Parser parser,
1823                                     XML_ProcessingInstructionHandler handler) {
1824   if (parser != NULL)
1825     parser->m_processingInstructionHandler = handler;
1826 }
1827 
1828 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1829 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1830   if (parser != NULL)
1831     parser->m_commentHandler = handler;
1832 }
1833 
1834 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1835 XML_SetCdataSectionHandler(XML_Parser parser,
1836                            XML_StartCdataSectionHandler start,
1837                            XML_EndCdataSectionHandler end) {
1838   if (parser == NULL)
1839     return;
1840   parser->m_startCdataSectionHandler = start;
1841   parser->m_endCdataSectionHandler = end;
1842 }
1843 
1844 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1845 XML_SetStartCdataSectionHandler(XML_Parser parser,
1846                                 XML_StartCdataSectionHandler start) {
1847   if (parser != NULL)
1848     parser->m_startCdataSectionHandler = start;
1849 }
1850 
1851 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1852 XML_SetEndCdataSectionHandler(XML_Parser parser,
1853                               XML_EndCdataSectionHandler end) {
1854   if (parser != NULL)
1855     parser->m_endCdataSectionHandler = end;
1856 }
1857 
1858 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1859 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1860   if (parser == NULL)
1861     return;
1862   parser->m_defaultHandler = handler;
1863   parser->m_defaultExpandInternalEntities = XML_FALSE;
1864 }
1865 
1866 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1867 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1868   if (parser == NULL)
1869     return;
1870   parser->m_defaultHandler = handler;
1871   parser->m_defaultExpandInternalEntities = XML_TRUE;
1872 }
1873 
1874 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1875 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1876                           XML_EndDoctypeDeclHandler end) {
1877   if (parser == NULL)
1878     return;
1879   parser->m_startDoctypeDeclHandler = start;
1880   parser->m_endDoctypeDeclHandler = end;
1881 }
1882 
1883 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1884 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1885                                XML_StartDoctypeDeclHandler start) {
1886   if (parser != NULL)
1887     parser->m_startDoctypeDeclHandler = start;
1888 }
1889 
1890 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1891 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1892   if (parser != NULL)
1893     parser->m_endDoctypeDeclHandler = end;
1894 }
1895 
1896 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1897 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1898                                  XML_UnparsedEntityDeclHandler handler) {
1899   if (parser != NULL)
1900     parser->m_unparsedEntityDeclHandler = handler;
1901 }
1902 
1903 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1904 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1905   if (parser != NULL)
1906     parser->m_notationDeclHandler = handler;
1907 }
1908 
1909 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1910 XML_SetNamespaceDeclHandler(XML_Parser parser,
1911                             XML_StartNamespaceDeclHandler start,
1912                             XML_EndNamespaceDeclHandler end) {
1913   if (parser == NULL)
1914     return;
1915   parser->m_startNamespaceDeclHandler = start;
1916   parser->m_endNamespaceDeclHandler = end;
1917 }
1918 
1919 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1920 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1921                                  XML_StartNamespaceDeclHandler start) {
1922   if (parser != NULL)
1923     parser->m_startNamespaceDeclHandler = start;
1924 }
1925 
1926 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1927 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1928                                XML_EndNamespaceDeclHandler end) {
1929   if (parser != NULL)
1930     parser->m_endNamespaceDeclHandler = end;
1931 }
1932 
1933 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1934 XML_SetNotStandaloneHandler(XML_Parser parser,
1935                             XML_NotStandaloneHandler handler) {
1936   if (parser != NULL)
1937     parser->m_notStandaloneHandler = handler;
1938 }
1939 
1940 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1941 XML_SetExternalEntityRefHandler(XML_Parser parser,
1942                                 XML_ExternalEntityRefHandler handler) {
1943   if (parser != NULL)
1944     parser->m_externalEntityRefHandler = handler;
1945 }
1946 
1947 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1948 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1949   if (parser == NULL)
1950     return;
1951   if (arg)
1952     parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1953   else
1954     parser->m_externalEntityRefHandlerArg = parser;
1955 }
1956 
1957 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1958 XML_SetSkippedEntityHandler(XML_Parser parser,
1959                             XML_SkippedEntityHandler handler) {
1960   if (parser != NULL)
1961     parser->m_skippedEntityHandler = handler;
1962 }
1963 
1964 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1965 XML_SetUnknownEncodingHandler(XML_Parser parser,
1966                               XML_UnknownEncodingHandler handler, void *data) {
1967   if (parser == NULL)
1968     return;
1969   parser->m_unknownEncodingHandler = handler;
1970   parser->m_unknownEncodingHandlerData = data;
1971 }
1972 
1973 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1974 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1975   if (parser != NULL)
1976     parser->m_elementDeclHandler = eldecl;
1977 }
1978 
1979 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1980 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1981   if (parser != NULL)
1982     parser->m_attlistDeclHandler = attdecl;
1983 }
1984 
1985 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1986 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1987   if (parser != NULL)
1988     parser->m_entityDeclHandler = handler;
1989 }
1990 
1991 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1992 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1993   if (parser != NULL)
1994     parser->m_xmlDeclHandler = handler;
1995 }
1996 
1997 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1998 XML_SetParamEntityParsing(XML_Parser parser,
1999                           enum XML_ParamEntityParsing peParsing) {
2000   if (parser == NULL)
2001     return 0;
2002   /* block after XML_Parse()/XML_ParseBuffer() has been called */
2003   if (parserBusy(parser))
2004     return 0;
2005 #ifdef XML_DTD
2006   parser->m_paramEntityParsing = peParsing;
2007   return 1;
2008 #else
2009   return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2010 #endif
2011 }
2012 
2013 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)2014 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2015   if (parser == NULL)
2016     return 0;
2017   if (parser->m_parentParser)
2018     return XML_SetHashSalt(parser->m_parentParser, hash_salt);
2019   /* block after XML_Parse()/XML_ParseBuffer() has been called */
2020   if (parserBusy(parser))
2021     return 0;
2022   parser->m_hash_secret_salt = hash_salt;
2023   return 1;
2024 }
2025 
2026 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)2027 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2028   if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2029     if (parser != NULL)
2030       parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2031     return XML_STATUS_ERROR;
2032   }
2033   switch (parser->m_parsingStatus.parsing) {
2034   case XML_SUSPENDED:
2035     parser->m_errorCode = XML_ERROR_SUSPENDED;
2036     return XML_STATUS_ERROR;
2037   case XML_FINISHED:
2038     parser->m_errorCode = XML_ERROR_FINISHED;
2039     return XML_STATUS_ERROR;
2040   case XML_INITIALIZED:
2041     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2042       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2043       return XML_STATUS_ERROR;
2044     }
2045     /* fall through */
2046   default:
2047     parser->m_parsingStatus.parsing = XML_PARSING;
2048   }
2049 
2050 #if XML_CONTEXT_BYTES == 0
2051   if (parser->m_bufferPtr == parser->m_bufferEnd) {
2052     const char *end;
2053     int nLeftOver;
2054     enum XML_Status result;
2055     /* Detect overflow (a+b > MAX <==> b > MAX-a) */
2056     if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2057       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2058       parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2059       parser->m_processor = errorProcessor;
2060       return XML_STATUS_ERROR;
2061     }
2062     // though this isn't a buffer request, we assume that `len` is the app's
2063     // preferred buffer fill size, and therefore save it here.
2064     parser->m_lastBufferRequestSize = len;
2065     parser->m_parseEndByteIndex += len;
2066     parser->m_positionPtr = s;
2067     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2068 
2069     parser->m_errorCode
2070         = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2071 
2072     if (parser->m_errorCode != XML_ERROR_NONE) {
2073       parser->m_eventEndPtr = parser->m_eventPtr;
2074       parser->m_processor = errorProcessor;
2075       return XML_STATUS_ERROR;
2076     } else {
2077       switch (parser->m_parsingStatus.parsing) {
2078       case XML_SUSPENDED:
2079         result = XML_STATUS_SUSPENDED;
2080         break;
2081       case XML_INITIALIZED:
2082       case XML_PARSING:
2083         if (isFinal) {
2084           parser->m_parsingStatus.parsing = XML_FINISHED;
2085           return XML_STATUS_OK;
2086         }
2087       /* fall through */
2088       default:
2089         result = XML_STATUS_OK;
2090       }
2091     }
2092 
2093     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2094                       &parser->m_position);
2095     nLeftOver = s + len - end;
2096     if (nLeftOver) {
2097       // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2098       // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2099       const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2100       parser->m_parsingStatus.parsing = XML_PARSING;
2101       void *const temp = XML_GetBuffer(parser, nLeftOver);
2102       parser->m_parsingStatus.parsing = originalStatus;
2103       // GetBuffer may have overwritten this, but we want to remember what the
2104       // app requested, not how many bytes were left over after parsing.
2105       parser->m_lastBufferRequestSize = len;
2106       if (temp == NULL) {
2107         // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2108         parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2109         parser->m_processor = errorProcessor;
2110         return XML_STATUS_ERROR;
2111       }
2112       // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2113       // don't have any data to preserve, and can copy straight into the start
2114       // of the buffer rather than the GetBuffer return pointer (which may be
2115       // pointing further into the allocated buffer).
2116       memcpy(parser->m_buffer, end, nLeftOver);
2117     }
2118     parser->m_bufferPtr = parser->m_buffer;
2119     parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2120     parser->m_positionPtr = parser->m_bufferPtr;
2121     parser->m_parseEndPtr = parser->m_bufferEnd;
2122     parser->m_eventPtr = parser->m_bufferPtr;
2123     parser->m_eventEndPtr = parser->m_bufferPtr;
2124     return result;
2125   }
2126 #endif /* XML_CONTEXT_BYTES == 0 */
2127   void *buff = XML_GetBuffer(parser, len);
2128   if (buff == NULL)
2129     return XML_STATUS_ERROR;
2130   if (len > 0) {
2131     assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2132     memcpy(buff, s, len);
2133   }
2134   return XML_ParseBuffer(parser, len, isFinal);
2135 }
2136 
2137 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2138 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2139   const char *start;
2140   enum XML_Status result = XML_STATUS_OK;
2141 
2142   if (parser == NULL)
2143     return XML_STATUS_ERROR;
2144 
2145   if (len < 0) {
2146     parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2147     return XML_STATUS_ERROR;
2148   }
2149 
2150   switch (parser->m_parsingStatus.parsing) {
2151   case XML_SUSPENDED:
2152     parser->m_errorCode = XML_ERROR_SUSPENDED;
2153     return XML_STATUS_ERROR;
2154   case XML_FINISHED:
2155     parser->m_errorCode = XML_ERROR_FINISHED;
2156     return XML_STATUS_ERROR;
2157   case XML_INITIALIZED:
2158     /* Has someone called XML_GetBuffer successfully before? */
2159     if (! parser->m_bufferPtr) {
2160       parser->m_errorCode = XML_ERROR_NO_BUFFER;
2161       return XML_STATUS_ERROR;
2162     }
2163 
2164     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2165       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2166       return XML_STATUS_ERROR;
2167     }
2168     /* fall through */
2169   default:
2170     parser->m_parsingStatus.parsing = XML_PARSING;
2171   }
2172 
2173   start = parser->m_bufferPtr;
2174   parser->m_positionPtr = start;
2175   parser->m_bufferEnd += len;
2176   parser->m_parseEndPtr = parser->m_bufferEnd;
2177   parser->m_parseEndByteIndex += len;
2178   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2179 
2180   parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2181                                       &parser->m_bufferPtr);
2182 
2183   if (parser->m_errorCode != XML_ERROR_NONE) {
2184     parser->m_eventEndPtr = parser->m_eventPtr;
2185     parser->m_processor = errorProcessor;
2186     return XML_STATUS_ERROR;
2187   } else {
2188     switch (parser->m_parsingStatus.parsing) {
2189     case XML_SUSPENDED:
2190       result = XML_STATUS_SUSPENDED;
2191       break;
2192     case XML_INITIALIZED:
2193     case XML_PARSING:
2194       if (isFinal) {
2195         parser->m_parsingStatus.parsing = XML_FINISHED;
2196         return result;
2197       }
2198     default:; /* should not happen */
2199     }
2200   }
2201 
2202   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2203                     parser->m_bufferPtr, &parser->m_position);
2204   parser->m_positionPtr = parser->m_bufferPtr;
2205   return result;
2206 }
2207 
2208 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2209 XML_GetBuffer(XML_Parser parser, int len) {
2210   if (parser == NULL)
2211     return NULL;
2212   if (len < 0) {
2213     parser->m_errorCode = XML_ERROR_NO_MEMORY;
2214     return NULL;
2215   }
2216   switch (parser->m_parsingStatus.parsing) {
2217   case XML_SUSPENDED:
2218     parser->m_errorCode = XML_ERROR_SUSPENDED;
2219     return NULL;
2220   case XML_FINISHED:
2221     parser->m_errorCode = XML_ERROR_FINISHED;
2222     return NULL;
2223   default:;
2224   }
2225 
2226   // whether or not the request succeeds, `len` seems to be the app's preferred
2227   // buffer fill size; remember it.
2228   parser->m_lastBufferRequestSize = len;
2229   if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2230       || parser->m_buffer == NULL) {
2231 #if XML_CONTEXT_BYTES > 0
2232     int keep;
2233 #endif /* XML_CONTEXT_BYTES > 0 */
2234     /* Do not invoke signed arithmetic overflow: */
2235     int neededSize = (int)((unsigned)len
2236                            + (unsigned)EXPAT_SAFE_PTR_DIFF(
2237                                parser->m_bufferEnd, parser->m_bufferPtr));
2238     if (neededSize < 0) {
2239       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2240       return NULL;
2241     }
2242 #if XML_CONTEXT_BYTES > 0
2243     keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2244     if (keep > XML_CONTEXT_BYTES)
2245       keep = XML_CONTEXT_BYTES;
2246     /* Detect and prevent integer overflow */
2247     if (keep > INT_MAX - neededSize) {
2248       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2249       return NULL;
2250     }
2251     neededSize += keep;
2252 #endif /* XML_CONTEXT_BYTES > 0 */
2253     if (parser->m_buffer && parser->m_bufferPtr
2254         && neededSize
2255                <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2256 #if XML_CONTEXT_BYTES > 0
2257       if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2258         int offset
2259             = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2260               - keep;
2261         /* The buffer pointers cannot be NULL here; we have at least some bytes
2262          * in the buffer */
2263         memmove(parser->m_buffer, &parser->m_buffer[offset],
2264                 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2265         parser->m_bufferEnd -= offset;
2266         parser->m_bufferPtr -= offset;
2267       }
2268 #else
2269       memmove(parser->m_buffer, parser->m_bufferPtr,
2270               EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2271       parser->m_bufferEnd
2272           = parser->m_buffer
2273             + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2274       parser->m_bufferPtr = parser->m_buffer;
2275 #endif /* XML_CONTEXT_BYTES > 0 */
2276     } else {
2277       char *newBuf;
2278       int bufferSize
2279           = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2280       if (bufferSize == 0)
2281         bufferSize = INIT_BUFFER_SIZE;
2282       do {
2283         /* Do not invoke signed arithmetic overflow: */
2284         bufferSize = (int)(2U * (unsigned)bufferSize);
2285       } while (bufferSize < neededSize && bufferSize > 0);
2286       if (bufferSize <= 0) {
2287         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2288         return NULL;
2289       }
2290       newBuf = (char *)MALLOC(parser, bufferSize);
2291       if (newBuf == 0) {
2292         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2293         return NULL;
2294       }
2295       parser->m_bufferLim = newBuf + bufferSize;
2296 #if XML_CONTEXT_BYTES > 0
2297       if (parser->m_bufferPtr) {
2298         memcpy(newBuf, &parser->m_bufferPtr[-keep],
2299                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2300                    + keep);
2301         FREE(parser, parser->m_buffer);
2302         parser->m_buffer = newBuf;
2303         parser->m_bufferEnd
2304             = parser->m_buffer
2305               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2306               + keep;
2307         parser->m_bufferPtr = parser->m_buffer + keep;
2308       } else {
2309         /* This must be a brand new buffer with no data in it yet */
2310         parser->m_bufferEnd = newBuf;
2311         parser->m_bufferPtr = parser->m_buffer = newBuf;
2312       }
2313 #else
2314       if (parser->m_bufferPtr) {
2315         memcpy(newBuf, parser->m_bufferPtr,
2316                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2317         FREE(parser, parser->m_buffer);
2318         parser->m_bufferEnd
2319             = newBuf
2320               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2321       } else {
2322         /* This must be a brand new buffer with no data in it yet */
2323         parser->m_bufferEnd = newBuf;
2324       }
2325       parser->m_bufferPtr = parser->m_buffer = newBuf;
2326 #endif /* XML_CONTEXT_BYTES > 0 */
2327     }
2328     parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2329     parser->m_positionPtr = NULL;
2330   }
2331   return parser->m_bufferEnd;
2332 }
2333 
2334 static void
triggerReenter(XML_Parser parser)2335 triggerReenter(XML_Parser parser) {
2336   parser->m_reenter = XML_TRUE;
2337 }
2338 
2339 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2340 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2341   if (parser == NULL)
2342     return XML_STATUS_ERROR;
2343   switch (parser->m_parsingStatus.parsing) {
2344   case XML_INITIALIZED:
2345     parser->m_errorCode = XML_ERROR_NOT_STARTED;
2346     return XML_STATUS_ERROR;
2347   case XML_SUSPENDED:
2348     if (resumable) {
2349       parser->m_errorCode = XML_ERROR_SUSPENDED;
2350       return XML_STATUS_ERROR;
2351     }
2352     parser->m_parsingStatus.parsing = XML_FINISHED;
2353     break;
2354   case XML_FINISHED:
2355     parser->m_errorCode = XML_ERROR_FINISHED;
2356     return XML_STATUS_ERROR;
2357   case XML_PARSING:
2358     if (resumable) {
2359 #ifdef XML_DTD
2360       if (parser->m_isParamEntity) {
2361         parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2362         return XML_STATUS_ERROR;
2363       }
2364 #endif
2365       parser->m_parsingStatus.parsing = XML_SUSPENDED;
2366     } else
2367       parser->m_parsingStatus.parsing = XML_FINISHED;
2368     break;
2369   default:
2370     assert(0);
2371   }
2372   return XML_STATUS_OK;
2373 }
2374 
2375 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2376 XML_ResumeParser(XML_Parser parser) {
2377   enum XML_Status result = XML_STATUS_OK;
2378 
2379   if (parser == NULL)
2380     return XML_STATUS_ERROR;
2381   if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2382     parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2383     return XML_STATUS_ERROR;
2384   }
2385   parser->m_parsingStatus.parsing = XML_PARSING;
2386 
2387   parser->m_errorCode = callProcessor(
2388       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2389 
2390   if (parser->m_errorCode != XML_ERROR_NONE) {
2391     parser->m_eventEndPtr = parser->m_eventPtr;
2392     parser->m_processor = errorProcessor;
2393     return XML_STATUS_ERROR;
2394   } else {
2395     switch (parser->m_parsingStatus.parsing) {
2396     case XML_SUSPENDED:
2397       result = XML_STATUS_SUSPENDED;
2398       break;
2399     case XML_INITIALIZED:
2400     case XML_PARSING:
2401       if (parser->m_parsingStatus.finalBuffer) {
2402         parser->m_parsingStatus.parsing = XML_FINISHED;
2403         return result;
2404       }
2405     default:;
2406     }
2407   }
2408 
2409   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2410                     parser->m_bufferPtr, &parser->m_position);
2411   parser->m_positionPtr = parser->m_bufferPtr;
2412   return result;
2413 }
2414 
2415 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2416 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2417   if (parser == NULL)
2418     return;
2419   assert(status != NULL);
2420   *status = parser->m_parsingStatus;
2421 }
2422 
2423 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2424 XML_GetErrorCode(XML_Parser parser) {
2425   if (parser == NULL)
2426     return XML_ERROR_INVALID_ARGUMENT;
2427   return parser->m_errorCode;
2428 }
2429 
2430 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2431 XML_GetCurrentByteIndex(XML_Parser parser) {
2432   if (parser == NULL)
2433     return -1;
2434   if (parser->m_eventPtr)
2435     return (XML_Index)(parser->m_parseEndByteIndex
2436                        - (parser->m_parseEndPtr - parser->m_eventPtr));
2437   return -1;
2438 }
2439 
2440 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2441 XML_GetCurrentByteCount(XML_Parser parser) {
2442   if (parser == NULL)
2443     return 0;
2444   if (parser->m_eventEndPtr && parser->m_eventPtr)
2445     return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2446   return 0;
2447 }
2448 
2449 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2450 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2451 #if XML_CONTEXT_BYTES > 0
2452   if (parser == NULL)
2453     return NULL;
2454   if (parser->m_eventPtr && parser->m_buffer) {
2455     if (offset != NULL)
2456       *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2457     if (size != NULL)
2458       *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2459     return parser->m_buffer;
2460   }
2461 #else
2462   (void)parser;
2463   (void)offset;
2464   (void)size;
2465 #endif /* XML_CONTEXT_BYTES > 0 */
2466   return (const char *)0;
2467 }
2468 
2469 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2470 XML_GetCurrentLineNumber(XML_Parser parser) {
2471   if (parser == NULL)
2472     return 0;
2473   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2474     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2475                       parser->m_eventPtr, &parser->m_position);
2476     parser->m_positionPtr = parser->m_eventPtr;
2477   }
2478   return parser->m_position.lineNumber + 1;
2479 }
2480 
2481 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2482 XML_GetCurrentColumnNumber(XML_Parser parser) {
2483   if (parser == NULL)
2484     return 0;
2485   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2486     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2487                       parser->m_eventPtr, &parser->m_position);
2488     parser->m_positionPtr = parser->m_eventPtr;
2489   }
2490   return parser->m_position.columnNumber;
2491 }
2492 
2493 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2494 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2495   if (parser != NULL)
2496     FREE(parser, model);
2497 }
2498 
2499 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2500 XML_MemMalloc(XML_Parser parser, size_t size) {
2501   if (parser == NULL)
2502     return NULL;
2503   return MALLOC(parser, size);
2504 }
2505 
2506 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2507 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2508   if (parser == NULL)
2509     return NULL;
2510   return REALLOC(parser, ptr, size);
2511 }
2512 
2513 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2514 XML_MemFree(XML_Parser parser, void *ptr) {
2515   if (parser != NULL)
2516     FREE(parser, ptr);
2517 }
2518 
2519 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2520 XML_DefaultCurrent(XML_Parser parser) {
2521   if (parser == NULL)
2522     return;
2523   if (parser->m_defaultHandler) {
2524     if (parser->m_openInternalEntities)
2525       reportDefault(parser, parser->m_internalEncoding,
2526                     parser->m_openInternalEntities->internalEventPtr,
2527                     parser->m_openInternalEntities->internalEventEndPtr);
2528     else
2529       reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2530                     parser->m_eventEndPtr);
2531   }
2532 }
2533 
2534 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2535 XML_ErrorString(enum XML_Error code) {
2536   switch (code) {
2537   case XML_ERROR_NONE:
2538     return NULL;
2539   case XML_ERROR_NO_MEMORY:
2540     return XML_L("out of memory");
2541   case XML_ERROR_SYNTAX:
2542     return XML_L("syntax error");
2543   case XML_ERROR_NO_ELEMENTS:
2544     return XML_L("no element found");
2545   case XML_ERROR_INVALID_TOKEN:
2546     return XML_L("not well-formed (invalid token)");
2547   case XML_ERROR_UNCLOSED_TOKEN:
2548     return XML_L("unclosed token");
2549   case XML_ERROR_PARTIAL_CHAR:
2550     return XML_L("partial character");
2551   case XML_ERROR_TAG_MISMATCH:
2552     return XML_L("mismatched tag");
2553   case XML_ERROR_DUPLICATE_ATTRIBUTE:
2554     return XML_L("duplicate attribute");
2555   case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2556     return XML_L("junk after document element");
2557   case XML_ERROR_PARAM_ENTITY_REF:
2558     return XML_L("illegal parameter entity reference");
2559   case XML_ERROR_UNDEFINED_ENTITY:
2560     return XML_L("undefined entity");
2561   case XML_ERROR_RECURSIVE_ENTITY_REF:
2562     return XML_L("recursive entity reference");
2563   case XML_ERROR_ASYNC_ENTITY:
2564     return XML_L("asynchronous entity");
2565   case XML_ERROR_BAD_CHAR_REF:
2566     return XML_L("reference to invalid character number");
2567   case XML_ERROR_BINARY_ENTITY_REF:
2568     return XML_L("reference to binary entity");
2569   case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2570     return XML_L("reference to external entity in attribute");
2571   case XML_ERROR_MISPLACED_XML_PI:
2572     return XML_L("XML or text declaration not at start of entity");
2573   case XML_ERROR_UNKNOWN_ENCODING:
2574     return XML_L("unknown encoding");
2575   case XML_ERROR_INCORRECT_ENCODING:
2576     return XML_L("encoding specified in XML declaration is incorrect");
2577   case XML_ERROR_UNCLOSED_CDATA_SECTION:
2578     return XML_L("unclosed CDATA section");
2579   case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2580     return XML_L("error in processing external entity reference");
2581   case XML_ERROR_NOT_STANDALONE:
2582     return XML_L("document is not standalone");
2583   case XML_ERROR_UNEXPECTED_STATE:
2584     return XML_L("unexpected parser state - please send a bug report");
2585   case XML_ERROR_ENTITY_DECLARED_IN_PE:
2586     return XML_L("entity declared in parameter entity");
2587   case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2588     return XML_L("requested feature requires XML_DTD support in Expat");
2589   case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2590     return XML_L("cannot change setting once parsing has begun");
2591   /* Added in 1.95.7. */
2592   case XML_ERROR_UNBOUND_PREFIX:
2593     return XML_L("unbound prefix");
2594   /* Added in 1.95.8. */
2595   case XML_ERROR_UNDECLARING_PREFIX:
2596     return XML_L("must not undeclare prefix");
2597   case XML_ERROR_INCOMPLETE_PE:
2598     return XML_L("incomplete markup in parameter entity");
2599   case XML_ERROR_XML_DECL:
2600     return XML_L("XML declaration not well-formed");
2601   case XML_ERROR_TEXT_DECL:
2602     return XML_L("text declaration not well-formed");
2603   case XML_ERROR_PUBLICID:
2604     return XML_L("illegal character(s) in public id");
2605   case XML_ERROR_SUSPENDED:
2606     return XML_L("parser suspended");
2607   case XML_ERROR_NOT_SUSPENDED:
2608     return XML_L("parser not suspended");
2609   case XML_ERROR_ABORTED:
2610     return XML_L("parsing aborted");
2611   case XML_ERROR_FINISHED:
2612     return XML_L("parsing finished");
2613   case XML_ERROR_SUSPEND_PE:
2614     return XML_L("cannot suspend in external parameter entity");
2615   /* Added in 2.0.0. */
2616   case XML_ERROR_RESERVED_PREFIX_XML:
2617     return XML_L(
2618         "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2619   case XML_ERROR_RESERVED_PREFIX_XMLNS:
2620     return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2621   case XML_ERROR_RESERVED_NAMESPACE_URI:
2622     return XML_L(
2623         "prefix must not be bound to one of the reserved namespace names");
2624   /* Added in 2.2.5. */
2625   case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2626     return XML_L("invalid argument");
2627     /* Added in 2.3.0. */
2628   case XML_ERROR_NO_BUFFER:
2629     return XML_L(
2630         "a successful prior call to function XML_GetBuffer is required");
2631   /* Added in 2.4.0. */
2632   case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2633     return XML_L(
2634         "limit on input amplification factor (from DTD and entities) breached");
2635   /* Added in 2.6.4. */
2636   case XML_ERROR_NOT_STARTED:
2637     return XML_L("parser not started");
2638   }
2639   return NULL;
2640 }
2641 
2642 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2643 XML_ExpatVersion(void) {
2644   /* V1 is used to string-ize the version number. However, it would
2645      string-ize the actual version macro *names* unless we get them
2646      substituted before being passed to V1. CPP is defined to expand
2647      a macro, then rescan for more expansions. Thus, we use V2 to expand
2648      the version macros, then CPP will expand the resulting V1() macro
2649      with the correct numerals. */
2650   /* ### I'm assuming cpp is portable in this respect... */
2651 
2652 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2653 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2654 
2655   return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2656 
2657 #undef V1
2658 #undef V2
2659 }
2660 
2661 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2662 XML_ExpatVersionInfo(void) {
2663   XML_Expat_Version version;
2664 
2665   version.major = XML_MAJOR_VERSION;
2666   version.minor = XML_MINOR_VERSION;
2667   version.micro = XML_MICRO_VERSION;
2668 
2669   return version;
2670 }
2671 
2672 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2673 XML_GetFeatureList(void) {
2674   static const XML_Feature features[] = {
2675       {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2676        sizeof(XML_Char)},
2677       {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2678        sizeof(XML_LChar)},
2679 #ifdef XML_UNICODE
2680       {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2681 #endif
2682 #ifdef XML_UNICODE_WCHAR_T
2683       {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2684 #endif
2685 #ifdef XML_DTD
2686       {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2687 #endif
2688 #if XML_CONTEXT_BYTES > 0
2689       {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2690        XML_CONTEXT_BYTES},
2691 #endif
2692 #ifdef XML_MIN_SIZE
2693       {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2694 #endif
2695 #ifdef XML_NS
2696       {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2697 #endif
2698 #ifdef XML_LARGE_SIZE
2699       {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2700 #endif
2701 #ifdef XML_ATTR_INFO
2702       {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2703 #endif
2704 #if XML_GE == 1
2705       /* Added in Expat 2.4.0 for XML_DTD defined and
2706        * added in Expat 2.6.0 for XML_GE == 1. */
2707       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2708        XML_L("XML_BLAP_MAX_AMP"),
2709        (long int)
2710            EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2711       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2712        XML_L("XML_BLAP_ACT_THRES"),
2713        EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2714       /* Added in Expat 2.6.0. */
2715       {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2716 #endif
2717       {XML_FEATURE_END, NULL, 0}};
2718 
2719   return features;
2720 }
2721 
2722 #if XML_GE == 1
2723 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2724 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2725     XML_Parser parser, float maximumAmplificationFactor) {
2726   if ((parser == NULL) || (parser->m_parentParser != NULL)
2727       || isnan(maximumAmplificationFactor)
2728       || (maximumAmplificationFactor < 1.0f)) {
2729     return XML_FALSE;
2730   }
2731   parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2732   return XML_TRUE;
2733 }
2734 
2735 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2736 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2737     XML_Parser parser, unsigned long long activationThresholdBytes) {
2738   if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2739     return XML_FALSE;
2740   }
2741   parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2742   return XML_TRUE;
2743 }
2744 #endif /* XML_GE == 1 */
2745 
2746 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2747 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2748   if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2749     parser->m_reparseDeferralEnabled = enabled;
2750     return XML_TRUE;
2751   }
2752   return XML_FALSE;
2753 }
2754 
2755 /* Initially tag->rawName always points into the parse buffer;
2756    for those TAG instances opened while the current parse buffer was
2757    processed, and not yet closed, we need to store tag->rawName in a more
2758    permanent location, since the parse buffer is about to be discarded.
2759 */
2760 static XML_Bool
storeRawNames(XML_Parser parser)2761 storeRawNames(XML_Parser parser) {
2762   TAG *tag = parser->m_tagStack;
2763   while (tag) {
2764     int bufSize;
2765     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2766     size_t rawNameLen;
2767     char *rawNameBuf = tag->buf + nameLen;
2768     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
2769        at the first entry that has already been copied; everything
2770        below it in the stack is already been accounted for in a
2771        previous call to this function.
2772     */
2773     if (tag->rawName == rawNameBuf)
2774       break;
2775     /* For reuse purposes we need to ensure that the
2776        size of tag->buf is a multiple of sizeof(XML_Char).
2777     */
2778     rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2779     /* Detect and prevent integer overflow. */
2780     if (rawNameLen > (size_t)INT_MAX - nameLen)
2781       return XML_FALSE;
2782     bufSize = nameLen + (int)rawNameLen;
2783     if (bufSize > tag->bufEnd - tag->buf) {
2784       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2785       if (temp == NULL)
2786         return XML_FALSE;
2787       /* if tag->name.str points to tag->buf (only when namespace
2788          processing is off) then we have to update it
2789       */
2790       if (tag->name.str == (XML_Char *)tag->buf)
2791         tag->name.str = (XML_Char *)temp;
2792       /* if tag->name.localPart is set (when namespace processing is on)
2793          then update it as well, since it will always point into tag->buf
2794       */
2795       if (tag->name.localPart)
2796         tag->name.localPart
2797             = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2798       tag->buf = temp;
2799       tag->bufEnd = temp + bufSize;
2800       rawNameBuf = temp + nameLen;
2801     }
2802     memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2803     tag->rawName = rawNameBuf;
2804     tag = tag->parent;
2805   }
2806   return XML_TRUE;
2807 }
2808 
2809 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2810 contentProcessor(XML_Parser parser, const char *start, const char *end,
2811                  const char **endPtr) {
2812   enum XML_Error result = doContent(
2813       parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
2814       endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2815       XML_ACCOUNT_DIRECT);
2816   if (result == XML_ERROR_NONE) {
2817     if (! storeRawNames(parser))
2818       return XML_ERROR_NO_MEMORY;
2819   }
2820   return result;
2821 }
2822 
2823 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2824 externalEntityInitProcessor(XML_Parser parser, const char *start,
2825                             const char *end, const char **endPtr) {
2826   enum XML_Error result = initializeEncoding(parser);
2827   if (result != XML_ERROR_NONE)
2828     return result;
2829   parser->m_processor = externalEntityInitProcessor2;
2830   return externalEntityInitProcessor2(parser, start, end, endPtr);
2831 }
2832 
2833 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2834 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2835                              const char *end, const char **endPtr) {
2836   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2837   int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2838   switch (tok) {
2839   case XML_TOK_BOM:
2840 #if XML_GE == 1
2841     if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2842                                   XML_ACCOUNT_DIRECT)) {
2843       accountingOnAbort(parser);
2844       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2845     }
2846 #endif /* XML_GE == 1 */
2847 
2848     /* If we are at the end of the buffer, this would cause the next stage,
2849        i.e. externalEntityInitProcessor3, to pass control directly to
2850        doContent (by detecting XML_TOK_NONE) without processing any xml text
2851        declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2852     */
2853     if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2854       *endPtr = next;
2855       return XML_ERROR_NONE;
2856     }
2857     start = next;
2858     break;
2859   case XML_TOK_PARTIAL:
2860     if (! parser->m_parsingStatus.finalBuffer) {
2861       *endPtr = start;
2862       return XML_ERROR_NONE;
2863     }
2864     parser->m_eventPtr = start;
2865     return XML_ERROR_UNCLOSED_TOKEN;
2866   case XML_TOK_PARTIAL_CHAR:
2867     if (! parser->m_parsingStatus.finalBuffer) {
2868       *endPtr = start;
2869       return XML_ERROR_NONE;
2870     }
2871     parser->m_eventPtr = start;
2872     return XML_ERROR_PARTIAL_CHAR;
2873   }
2874   parser->m_processor = externalEntityInitProcessor3;
2875   return externalEntityInitProcessor3(parser, start, end, endPtr);
2876 }
2877 
2878 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2879 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2880                              const char *end, const char **endPtr) {
2881   int tok;
2882   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2883   parser->m_eventPtr = start;
2884   tok = XmlContentTok(parser->m_encoding, start, end, &next);
2885   /* Note: These bytes are accounted later in:
2886            - processXmlDecl
2887            - externalEntityContentProcessor
2888   */
2889   parser->m_eventEndPtr = next;
2890 
2891   switch (tok) {
2892   case XML_TOK_XML_DECL: {
2893     enum XML_Error result;
2894     result = processXmlDecl(parser, 1, start, next);
2895     if (result != XML_ERROR_NONE)
2896       return result;
2897     switch (parser->m_parsingStatus.parsing) {
2898     case XML_SUSPENDED:
2899       *endPtr = next;
2900       return XML_ERROR_NONE;
2901     case XML_FINISHED:
2902       return XML_ERROR_ABORTED;
2903     case XML_PARSING:
2904       if (parser->m_reenter) {
2905         return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
2906       }
2907       /* Fall through */
2908     default:
2909       start = next;
2910     }
2911   } break;
2912   case XML_TOK_PARTIAL:
2913     if (! parser->m_parsingStatus.finalBuffer) {
2914       *endPtr = start;
2915       return XML_ERROR_NONE;
2916     }
2917     return XML_ERROR_UNCLOSED_TOKEN;
2918   case XML_TOK_PARTIAL_CHAR:
2919     if (! parser->m_parsingStatus.finalBuffer) {
2920       *endPtr = start;
2921       return XML_ERROR_NONE;
2922     }
2923     return XML_ERROR_PARTIAL_CHAR;
2924   }
2925   parser->m_processor = externalEntityContentProcessor;
2926   parser->m_tagLevel = 1;
2927   return externalEntityContentProcessor(parser, start, end, endPtr);
2928 }
2929 
2930 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2931 externalEntityContentProcessor(XML_Parser parser, const char *start,
2932                                const char *end, const char **endPtr) {
2933   enum XML_Error result
2934       = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2935                   (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2936                   XML_ACCOUNT_ENTITY_EXPANSION);
2937   if (result == XML_ERROR_NONE) {
2938     if (! storeRawNames(parser))
2939       return XML_ERROR_NO_MEMORY;
2940   }
2941   return result;
2942 }
2943 
2944 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2945 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2946           const char *s, const char *end, const char **nextPtr,
2947           XML_Bool haveMore, enum XML_Account account) {
2948   /* save one level of indirection */
2949   DTD *const dtd = parser->m_dtd;
2950 
2951   const char **eventPP;
2952   const char **eventEndPP;
2953   if (enc == parser->m_encoding) {
2954     eventPP = &parser->m_eventPtr;
2955     eventEndPP = &parser->m_eventEndPtr;
2956   } else {
2957     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2958     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2959   }
2960   *eventPP = s;
2961 
2962   for (;;) {
2963     const char *next = s; /* XmlContentTok doesn't always set the last arg */
2964     int tok = XmlContentTok(enc, s, end, &next);
2965 #if XML_GE == 1
2966     const char *accountAfter
2967         = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2968               ? (haveMore ? s /* i.e. 0 bytes */ : end)
2969               : next;
2970     if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2971                                   account)) {
2972       accountingOnAbort(parser);
2973       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2974     }
2975 #endif
2976     *eventEndPP = next;
2977     switch (tok) {
2978     case XML_TOK_TRAILING_CR:
2979       if (haveMore) {
2980         *nextPtr = s;
2981         return XML_ERROR_NONE;
2982       }
2983       *eventEndPP = end;
2984       if (parser->m_characterDataHandler) {
2985         XML_Char c = 0xA;
2986         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2987       } else if (parser->m_defaultHandler)
2988         reportDefault(parser, enc, s, end);
2989       /* We are at the end of the final buffer, should we check for
2990          XML_SUSPENDED, XML_FINISHED?
2991       */
2992       if (startTagLevel == 0)
2993         return XML_ERROR_NO_ELEMENTS;
2994       if (parser->m_tagLevel != startTagLevel)
2995         return XML_ERROR_ASYNC_ENTITY;
2996       *nextPtr = end;
2997       return XML_ERROR_NONE;
2998     case XML_TOK_NONE:
2999       if (haveMore) {
3000         *nextPtr = s;
3001         return XML_ERROR_NONE;
3002       }
3003       if (startTagLevel > 0) {
3004         if (parser->m_tagLevel != startTagLevel)
3005           return XML_ERROR_ASYNC_ENTITY;
3006         *nextPtr = s;
3007         return XML_ERROR_NONE;
3008       }
3009       return XML_ERROR_NO_ELEMENTS;
3010     case XML_TOK_INVALID:
3011       *eventPP = next;
3012       return XML_ERROR_INVALID_TOKEN;
3013     case XML_TOK_PARTIAL:
3014       if (haveMore) {
3015         *nextPtr = s;
3016         return XML_ERROR_NONE;
3017       }
3018       return XML_ERROR_UNCLOSED_TOKEN;
3019     case XML_TOK_PARTIAL_CHAR:
3020       if (haveMore) {
3021         *nextPtr = s;
3022         return XML_ERROR_NONE;
3023       }
3024       return XML_ERROR_PARTIAL_CHAR;
3025     case XML_TOK_ENTITY_REF: {
3026       const XML_Char *name;
3027       ENTITY *entity;
3028       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3029           enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3030       if (ch) {
3031 #if XML_GE == 1
3032         /* NOTE: We are replacing 4-6 characters original input for 1 character
3033          *       so there is no amplification and hence recording without
3034          *       protection. */
3035         accountingDiffTolerated(parser, tok, (char *)&ch,
3036                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
3037                                 XML_ACCOUNT_ENTITY_EXPANSION);
3038 #endif /* XML_GE == 1 */
3039         if (parser->m_characterDataHandler)
3040           parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3041         else if (parser->m_defaultHandler)
3042           reportDefault(parser, enc, s, next);
3043         break;
3044       }
3045       name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3046                              next - enc->minBytesPerChar);
3047       if (! name)
3048         return XML_ERROR_NO_MEMORY;
3049       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3050       poolDiscard(&dtd->pool);
3051       /* First, determine if a check for an existing declaration is needed;
3052          if yes, check that the entity exists, and that it is internal,
3053          otherwise call the skipped entity or default handler.
3054       */
3055       if (! dtd->hasParamEntityRefs || dtd->standalone) {
3056         if (! entity)
3057           return XML_ERROR_UNDEFINED_ENTITY;
3058         else if (! entity->is_internal)
3059           return XML_ERROR_ENTITY_DECLARED_IN_PE;
3060       } else if (! entity) {
3061         if (parser->m_skippedEntityHandler)
3062           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3063         else if (parser->m_defaultHandler)
3064           reportDefault(parser, enc, s, next);
3065         break;
3066       }
3067       if (entity->open)
3068         return XML_ERROR_RECURSIVE_ENTITY_REF;
3069       if (entity->notation)
3070         return XML_ERROR_BINARY_ENTITY_REF;
3071       if (entity->textPtr) {
3072         enum XML_Error result;
3073         if (! parser->m_defaultExpandInternalEntities) {
3074           if (parser->m_skippedEntityHandler)
3075             parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3076                                            0);
3077           else if (parser->m_defaultHandler)
3078             reportDefault(parser, enc, s, next);
3079           break;
3080         }
3081         result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3082         if (result != XML_ERROR_NONE)
3083           return result;
3084       } else if (parser->m_externalEntityRefHandler) {
3085         const XML_Char *context;
3086         entity->open = XML_TRUE;
3087         context = getContext(parser);
3088         entity->open = XML_FALSE;
3089         if (! context)
3090           return XML_ERROR_NO_MEMORY;
3091         if (! parser->m_externalEntityRefHandler(
3092                 parser->m_externalEntityRefHandlerArg, context, entity->base,
3093                 entity->systemId, entity->publicId))
3094           return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3095         poolDiscard(&parser->m_tempPool);
3096       } else if (parser->m_defaultHandler)
3097         reportDefault(parser, enc, s, next);
3098       break;
3099     }
3100     case XML_TOK_START_TAG_NO_ATTS:
3101       /* fall through */
3102     case XML_TOK_START_TAG_WITH_ATTS: {
3103       TAG *tag;
3104       enum XML_Error result;
3105       XML_Char *toPtr;
3106       if (parser->m_freeTagList) {
3107         tag = parser->m_freeTagList;
3108         parser->m_freeTagList = parser->m_freeTagList->parent;
3109       } else {
3110         tag = (TAG *)MALLOC(parser, sizeof(TAG));
3111         if (! tag)
3112           return XML_ERROR_NO_MEMORY;
3113         tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
3114         if (! tag->buf) {
3115           FREE(parser, tag);
3116           return XML_ERROR_NO_MEMORY;
3117         }
3118         tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
3119       }
3120       tag->bindings = NULL;
3121       tag->parent = parser->m_tagStack;
3122       parser->m_tagStack = tag;
3123       tag->name.localPart = NULL;
3124       tag->name.prefix = NULL;
3125       tag->rawName = s + enc->minBytesPerChar;
3126       tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3127       ++parser->m_tagLevel;
3128       {
3129         const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3130         const char *fromPtr = tag->rawName;
3131         toPtr = (XML_Char *)tag->buf;
3132         for (;;) {
3133           int bufSize;
3134           int convLen;
3135           const enum XML_Convert_Result convert_res
3136               = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3137                            (ICHAR *)tag->bufEnd - 1);
3138           convLen = (int)(toPtr - (XML_Char *)tag->buf);
3139           if ((fromPtr >= rawNameEnd)
3140               || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3141             tag->name.strLen = convLen;
3142             break;
3143           }
3144           bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3145           {
3146             char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3147             if (temp == NULL)
3148               return XML_ERROR_NO_MEMORY;
3149             tag->buf = temp;
3150             tag->bufEnd = temp + bufSize;
3151             toPtr = (XML_Char *)temp + convLen;
3152           }
3153         }
3154       }
3155       tag->name.str = (XML_Char *)tag->buf;
3156       *toPtr = XML_T('\0');
3157       result
3158           = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3159       if (result)
3160         return result;
3161       if (parser->m_startElementHandler)
3162         parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3163                                       (const XML_Char **)parser->m_atts);
3164       else if (parser->m_defaultHandler)
3165         reportDefault(parser, enc, s, next);
3166       poolClear(&parser->m_tempPool);
3167       break;
3168     }
3169     case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3170       /* fall through */
3171     case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3172       const char *rawName = s + enc->minBytesPerChar;
3173       enum XML_Error result;
3174       BINDING *bindings = NULL;
3175       XML_Bool noElmHandlers = XML_TRUE;
3176       TAG_NAME name;
3177       name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3178                                  rawName + XmlNameLength(enc, rawName));
3179       if (! name.str)
3180         return XML_ERROR_NO_MEMORY;
3181       poolFinish(&parser->m_tempPool);
3182       result = storeAtts(parser, enc, s, &name, &bindings,
3183                          XML_ACCOUNT_NONE /* token spans whole start tag */);
3184       if (result != XML_ERROR_NONE) {
3185         freeBindings(parser, bindings);
3186         return result;
3187       }
3188       poolFinish(&parser->m_tempPool);
3189       if (parser->m_startElementHandler) {
3190         parser->m_startElementHandler(parser->m_handlerArg, name.str,
3191                                       (const XML_Char **)parser->m_atts);
3192         noElmHandlers = XML_FALSE;
3193       }
3194       if (parser->m_endElementHandler) {
3195         if (parser->m_startElementHandler)
3196           *eventPP = *eventEndPP;
3197         parser->m_endElementHandler(parser->m_handlerArg, name.str);
3198         noElmHandlers = XML_FALSE;
3199       }
3200       if (noElmHandlers && parser->m_defaultHandler)
3201         reportDefault(parser, enc, s, next);
3202       poolClear(&parser->m_tempPool);
3203       freeBindings(parser, bindings);
3204     }
3205       if ((parser->m_tagLevel == 0)
3206           && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3207         if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3208             || (parser->m_parsingStatus.parsing == XML_PARSING
3209                 && parser->m_reenter))
3210           parser->m_processor = epilogProcessor;
3211         else
3212           return epilogProcessor(parser, next, end, nextPtr);
3213       }
3214       break;
3215     case XML_TOK_END_TAG:
3216       if (parser->m_tagLevel == startTagLevel)
3217         return XML_ERROR_ASYNC_ENTITY;
3218       else {
3219         int len;
3220         const char *rawName;
3221         TAG *tag = parser->m_tagStack;
3222         rawName = s + enc->minBytesPerChar * 2;
3223         len = XmlNameLength(enc, rawName);
3224         if (len != tag->rawNameLength
3225             || memcmp(tag->rawName, rawName, len) != 0) {
3226           *eventPP = rawName;
3227           return XML_ERROR_TAG_MISMATCH;
3228         }
3229         parser->m_tagStack = tag->parent;
3230         tag->parent = parser->m_freeTagList;
3231         parser->m_freeTagList = tag;
3232         --parser->m_tagLevel;
3233         if (parser->m_endElementHandler) {
3234           const XML_Char *localPart;
3235           const XML_Char *prefix;
3236           XML_Char *uri;
3237           localPart = tag->name.localPart;
3238           if (parser->m_ns && localPart) {
3239             /* localPart and prefix may have been overwritten in
3240                tag->name.str, since this points to the binding->uri
3241                buffer which gets reused; so we have to add them again
3242             */
3243             uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3244             /* don't need to check for space - already done in storeAtts() */
3245             while (*localPart)
3246               *uri++ = *localPart++;
3247             prefix = tag->name.prefix;
3248             if (parser->m_ns_triplets && prefix) {
3249               *uri++ = parser->m_namespaceSeparator;
3250               while (*prefix)
3251                 *uri++ = *prefix++;
3252             }
3253             *uri = XML_T('\0');
3254           }
3255           parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3256         } else if (parser->m_defaultHandler)
3257           reportDefault(parser, enc, s, next);
3258         while (tag->bindings) {
3259           BINDING *b = tag->bindings;
3260           if (parser->m_endNamespaceDeclHandler)
3261             parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3262                                               b->prefix->name);
3263           tag->bindings = tag->bindings->nextTagBinding;
3264           b->nextTagBinding = parser->m_freeBindingList;
3265           parser->m_freeBindingList = b;
3266           b->prefix->binding = b->prevPrefixBinding;
3267         }
3268         if ((parser->m_tagLevel == 0)
3269             && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3270           if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3271               || (parser->m_parsingStatus.parsing == XML_PARSING
3272                   && parser->m_reenter))
3273             parser->m_processor = epilogProcessor;
3274           else
3275             return epilogProcessor(parser, next, end, nextPtr);
3276         }
3277       }
3278       break;
3279     case XML_TOK_CHAR_REF: {
3280       int n = XmlCharRefNumber(enc, s);
3281       if (n < 0)
3282         return XML_ERROR_BAD_CHAR_REF;
3283       if (parser->m_characterDataHandler) {
3284         XML_Char buf[XML_ENCODE_MAX];
3285         parser->m_characterDataHandler(parser->m_handlerArg, buf,
3286                                        XmlEncode(n, (ICHAR *)buf));
3287       } else if (parser->m_defaultHandler)
3288         reportDefault(parser, enc, s, next);
3289     } break;
3290     case XML_TOK_XML_DECL:
3291       return XML_ERROR_MISPLACED_XML_PI;
3292     case XML_TOK_DATA_NEWLINE:
3293       if (parser->m_characterDataHandler) {
3294         XML_Char c = 0xA;
3295         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3296       } else if (parser->m_defaultHandler)
3297         reportDefault(parser, enc, s, next);
3298       break;
3299     case XML_TOK_CDATA_SECT_OPEN: {
3300       enum XML_Error result;
3301       if (parser->m_startCdataSectionHandler)
3302         parser->m_startCdataSectionHandler(parser->m_handlerArg);
3303       /* BEGIN disabled code */
3304       /* Suppose you doing a transformation on a document that involves
3305          changing only the character data.  You set up a defaultHandler
3306          and a characterDataHandler.  The defaultHandler simply copies
3307          characters through.  The characterDataHandler does the
3308          transformation and writes the characters out escaping them as
3309          necessary.  This case will fail to work if we leave out the
3310          following two lines (because & and < inside CDATA sections will
3311          be incorrectly escaped).
3312 
3313          However, now we have a start/endCdataSectionHandler, so it seems
3314          easier to let the user deal with this.
3315       */
3316       else if ((0) && parser->m_characterDataHandler)
3317         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3318                                        0);
3319       /* END disabled code */
3320       else if (parser->m_defaultHandler)
3321         reportDefault(parser, enc, s, next);
3322       result
3323           = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3324       if (result != XML_ERROR_NONE)
3325         return result;
3326       else if (! next) {
3327         parser->m_processor = cdataSectionProcessor;
3328         return result;
3329       }
3330     } break;
3331     case XML_TOK_TRAILING_RSQB:
3332       if (haveMore) {
3333         *nextPtr = s;
3334         return XML_ERROR_NONE;
3335       }
3336       if (parser->m_characterDataHandler) {
3337         if (MUST_CONVERT(enc, s)) {
3338           ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3339           XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3340           parser->m_characterDataHandler(
3341               parser->m_handlerArg, parser->m_dataBuf,
3342               (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3343         } else
3344           parser->m_characterDataHandler(
3345               parser->m_handlerArg, (const XML_Char *)s,
3346               (int)((const XML_Char *)end - (const XML_Char *)s));
3347       } else if (parser->m_defaultHandler)
3348         reportDefault(parser, enc, s, end);
3349       /* We are at the end of the final buffer, should we check for
3350          XML_SUSPENDED, XML_FINISHED?
3351       */
3352       if (startTagLevel == 0) {
3353         *eventPP = end;
3354         return XML_ERROR_NO_ELEMENTS;
3355       }
3356       if (parser->m_tagLevel != startTagLevel) {
3357         *eventPP = end;
3358         return XML_ERROR_ASYNC_ENTITY;
3359       }
3360       *nextPtr = end;
3361       return XML_ERROR_NONE;
3362     case XML_TOK_DATA_CHARS: {
3363       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3364       if (charDataHandler) {
3365         if (MUST_CONVERT(enc, s)) {
3366           for (;;) {
3367             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3368             const enum XML_Convert_Result convert_res = XmlConvert(
3369                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3370             *eventEndPP = s;
3371             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3372                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3373             if ((convert_res == XML_CONVERT_COMPLETED)
3374                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3375               break;
3376             *eventPP = s;
3377           }
3378         } else
3379           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3380                           (int)((const XML_Char *)next - (const XML_Char *)s));
3381       } else if (parser->m_defaultHandler)
3382         reportDefault(parser, enc, s, next);
3383     } break;
3384     case XML_TOK_PI:
3385       if (! reportProcessingInstruction(parser, enc, s, next))
3386         return XML_ERROR_NO_MEMORY;
3387       break;
3388     case XML_TOK_COMMENT:
3389       if (! reportComment(parser, enc, s, next))
3390         return XML_ERROR_NO_MEMORY;
3391       break;
3392     default:
3393       /* All of the tokens produced by XmlContentTok() have their own
3394        * explicit cases, so this default is not strictly necessary.
3395        * However it is a useful safety net, so we retain the code and
3396        * simply exclude it from the coverage tests.
3397        *
3398        * LCOV_EXCL_START
3399        */
3400       if (parser->m_defaultHandler)
3401         reportDefault(parser, enc, s, next);
3402       break;
3403       /* LCOV_EXCL_STOP */
3404     }
3405     switch (parser->m_parsingStatus.parsing) {
3406     case XML_SUSPENDED:
3407       *eventPP = next;
3408       *nextPtr = next;
3409       return XML_ERROR_NONE;
3410     case XML_FINISHED:
3411       *eventPP = next;
3412       return XML_ERROR_ABORTED;
3413     case XML_PARSING:
3414       if (parser->m_reenter) {
3415         *nextPtr = next;
3416         return XML_ERROR_NONE;
3417       }
3418       /* Fall through */
3419     default:;
3420       *eventPP = s = next;
3421     }
3422   }
3423   /* not reached */
3424 }
3425 
3426 /* This function does not call free() on the allocated memory, merely
3427  * moving it to the parser's m_freeBindingList where it can be freed or
3428  * reused as appropriate.
3429  */
3430 static void
freeBindings(XML_Parser parser,BINDING * bindings)3431 freeBindings(XML_Parser parser, BINDING *bindings) {
3432   while (bindings) {
3433     BINDING *b = bindings;
3434 
3435     /* m_startNamespaceDeclHandler will have been called for this
3436      * binding in addBindings(), so call the end handler now.
3437      */
3438     if (parser->m_endNamespaceDeclHandler)
3439       parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3440 
3441     bindings = bindings->nextTagBinding;
3442     b->nextTagBinding = parser->m_freeBindingList;
3443     parser->m_freeBindingList = b;
3444     b->prefix->binding = b->prevPrefixBinding;
3445   }
3446 }
3447 
3448 /* Precondition: all arguments must be non-NULL;
3449    Purpose:
3450    - normalize attributes
3451    - check attributes for well-formedness
3452    - generate namespace aware attribute names (URI, prefix)
3453    - build list of attributes for startElementHandler
3454    - default attributes
3455    - process namespace declarations (check and report them)
3456    - generate namespace aware element name (URI, prefix)
3457 */
3458 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3459 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3460           TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3461           enum XML_Account account) {
3462   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3463   ELEMENT_TYPE *elementType;
3464   int nDefaultAtts;
3465   const XML_Char **appAtts; /* the attribute list for the application */
3466   int attIndex = 0;
3467   int prefixLen;
3468   int i;
3469   int n;
3470   XML_Char *uri;
3471   int nPrefixes = 0;
3472   BINDING *binding;
3473   const XML_Char *localPart;
3474 
3475   /* lookup the element type name */
3476   elementType
3477       = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3478   if (! elementType) {
3479     const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3480     if (! name)
3481       return XML_ERROR_NO_MEMORY;
3482     elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3483                                          sizeof(ELEMENT_TYPE));
3484     if (! elementType)
3485       return XML_ERROR_NO_MEMORY;
3486     if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3487       return XML_ERROR_NO_MEMORY;
3488   }
3489   nDefaultAtts = elementType->nDefaultAtts;
3490 
3491   /* get the attributes from the tokenizer */
3492   n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3493 
3494   /* Detect and prevent integer overflow */
3495   if (n > INT_MAX - nDefaultAtts) {
3496     return XML_ERROR_NO_MEMORY;
3497   }
3498 
3499   if (n + nDefaultAtts > parser->m_attsSize) {
3500     int oldAttsSize = parser->m_attsSize;
3501     ATTRIBUTE *temp;
3502 #ifdef XML_ATTR_INFO
3503     XML_AttrInfo *temp2;
3504 #endif
3505 
3506     /* Detect and prevent integer overflow */
3507     if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3508         || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3509       return XML_ERROR_NO_MEMORY;
3510     }
3511 
3512     parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3513 
3514     /* Detect and prevent integer overflow.
3515      * The preprocessor guard addresses the "always false" warning
3516      * from -Wtype-limits on platforms where
3517      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3518 #if UINT_MAX >= SIZE_MAX
3519     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3520       parser->m_attsSize = oldAttsSize;
3521       return XML_ERROR_NO_MEMORY;
3522     }
3523 #endif
3524 
3525     temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3526                                 parser->m_attsSize * sizeof(ATTRIBUTE));
3527     if (temp == NULL) {
3528       parser->m_attsSize = oldAttsSize;
3529       return XML_ERROR_NO_MEMORY;
3530     }
3531     parser->m_atts = temp;
3532 #ifdef XML_ATTR_INFO
3533     /* Detect and prevent integer overflow.
3534      * The preprocessor guard addresses the "always false" warning
3535      * from -Wtype-limits on platforms where
3536      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3537 #  if UINT_MAX >= SIZE_MAX
3538     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3539       parser->m_attsSize = oldAttsSize;
3540       return XML_ERROR_NO_MEMORY;
3541     }
3542 #  endif
3543 
3544     temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3545                                     parser->m_attsSize * sizeof(XML_AttrInfo));
3546     if (temp2 == NULL) {
3547       parser->m_attsSize = oldAttsSize;
3548       return XML_ERROR_NO_MEMORY;
3549     }
3550     parser->m_attInfo = temp2;
3551 #endif
3552     if (n > oldAttsSize)
3553       XmlGetAttributes(enc, attStr, n, parser->m_atts);
3554   }
3555 
3556   appAtts = (const XML_Char **)parser->m_atts;
3557   for (i = 0; i < n; i++) {
3558     ATTRIBUTE *currAtt = &parser->m_atts[i];
3559 #ifdef XML_ATTR_INFO
3560     XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3561 #endif
3562     /* add the name and value to the attribute list */
3563     ATTRIBUTE_ID *attId
3564         = getAttributeId(parser, enc, currAtt->name,
3565                          currAtt->name + XmlNameLength(enc, currAtt->name));
3566     if (! attId)
3567       return XML_ERROR_NO_MEMORY;
3568 #ifdef XML_ATTR_INFO
3569     currAttInfo->nameStart
3570         = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3571     currAttInfo->nameEnd
3572         = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3573     currAttInfo->valueStart = parser->m_parseEndByteIndex
3574                               - (parser->m_parseEndPtr - currAtt->valuePtr);
3575     currAttInfo->valueEnd = parser->m_parseEndByteIndex
3576                             - (parser->m_parseEndPtr - currAtt->valueEnd);
3577 #endif
3578     /* Detect duplicate attributes by their QNames. This does not work when
3579        namespace processing is turned on and different prefixes for the same
3580        namespace are used. For this case we have a check further down.
3581     */
3582     if ((attId->name)[-1]) {
3583       if (enc == parser->m_encoding)
3584         parser->m_eventPtr = parser->m_atts[i].name;
3585       return XML_ERROR_DUPLICATE_ATTRIBUTE;
3586     }
3587     (attId->name)[-1] = 1;
3588     appAtts[attIndex++] = attId->name;
3589     if (! parser->m_atts[i].normalized) {
3590       enum XML_Error result;
3591       XML_Bool isCdata = XML_TRUE;
3592 
3593       /* figure out whether declared as other than CDATA */
3594       if (attId->maybeTokenized) {
3595         int j;
3596         for (j = 0; j < nDefaultAtts; j++) {
3597           if (attId == elementType->defaultAtts[j].id) {
3598             isCdata = elementType->defaultAtts[j].isCdata;
3599             break;
3600           }
3601         }
3602       }
3603 
3604       /* normalize the attribute value */
3605       result = storeAttributeValue(
3606           parser, enc, isCdata, parser->m_atts[i].valuePtr,
3607           parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3608       if (result)
3609         return result;
3610       appAtts[attIndex] = poolStart(&parser->m_tempPool);
3611       poolFinish(&parser->m_tempPool);
3612     } else {
3613       /* the value did not need normalizing */
3614       appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3615                                           parser->m_atts[i].valuePtr,
3616                                           parser->m_atts[i].valueEnd);
3617       if (appAtts[attIndex] == 0)
3618         return XML_ERROR_NO_MEMORY;
3619       poolFinish(&parser->m_tempPool);
3620     }
3621     /* handle prefixed attribute names */
3622     if (attId->prefix) {
3623       if (attId->xmlns) {
3624         /* deal with namespace declarations here */
3625         enum XML_Error result = addBinding(parser, attId->prefix, attId,
3626                                            appAtts[attIndex], bindingsPtr);
3627         if (result)
3628           return result;
3629         --attIndex;
3630       } else {
3631         /* deal with other prefixed names later */
3632         attIndex++;
3633         nPrefixes++;
3634         (attId->name)[-1] = 2;
3635       }
3636     } else
3637       attIndex++;
3638   }
3639 
3640   /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3641   parser->m_nSpecifiedAtts = attIndex;
3642   if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3643     for (i = 0; i < attIndex; i += 2)
3644       if (appAtts[i] == elementType->idAtt->name) {
3645         parser->m_idAttIndex = i;
3646         break;
3647       }
3648   } else
3649     parser->m_idAttIndex = -1;
3650 
3651   /* do attribute defaulting */
3652   for (i = 0; i < nDefaultAtts; i++) {
3653     const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3654     if (! (da->id->name)[-1] && da->value) {
3655       if (da->id->prefix) {
3656         if (da->id->xmlns) {
3657           enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3658                                              da->value, bindingsPtr);
3659           if (result)
3660             return result;
3661         } else {
3662           (da->id->name)[-1] = 2;
3663           nPrefixes++;
3664           appAtts[attIndex++] = da->id->name;
3665           appAtts[attIndex++] = da->value;
3666         }
3667       } else {
3668         (da->id->name)[-1] = 1;
3669         appAtts[attIndex++] = da->id->name;
3670         appAtts[attIndex++] = da->value;
3671       }
3672     }
3673   }
3674   appAtts[attIndex] = 0;
3675 
3676   /* expand prefixed attribute names, check for duplicates,
3677      and clear flags that say whether attributes were specified */
3678   i = 0;
3679   if (nPrefixes) {
3680     int j; /* hash table index */
3681     unsigned long version = parser->m_nsAttsVersion;
3682 
3683     /* Detect and prevent invalid shift */
3684     if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3685       return XML_ERROR_NO_MEMORY;
3686     }
3687 
3688     unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3689     unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3690     /* size of hash table must be at least 2 * (# of prefixed attributes) */
3691     if ((nPrefixes << 1)
3692         >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3693       NS_ATT *temp;
3694       /* hash table size must also be a power of 2 and >= 8 */
3695       while (nPrefixes >> parser->m_nsAttsPower++)
3696         ;
3697       if (parser->m_nsAttsPower < 3)
3698         parser->m_nsAttsPower = 3;
3699 
3700       /* Detect and prevent invalid shift */
3701       if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3702         /* Restore actual size of memory in m_nsAtts */
3703         parser->m_nsAttsPower = oldNsAttsPower;
3704         return XML_ERROR_NO_MEMORY;
3705       }
3706 
3707       nsAttsSize = 1u << parser->m_nsAttsPower;
3708 
3709       /* Detect and prevent integer overflow.
3710        * The preprocessor guard addresses the "always false" warning
3711        * from -Wtype-limits on platforms where
3712        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3713 #if UINT_MAX >= SIZE_MAX
3714       if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3715         /* Restore actual size of memory in m_nsAtts */
3716         parser->m_nsAttsPower = oldNsAttsPower;
3717         return XML_ERROR_NO_MEMORY;
3718       }
3719 #endif
3720 
3721       temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3722                                nsAttsSize * sizeof(NS_ATT));
3723       if (! temp) {
3724         /* Restore actual size of memory in m_nsAtts */
3725         parser->m_nsAttsPower = oldNsAttsPower;
3726         return XML_ERROR_NO_MEMORY;
3727       }
3728       parser->m_nsAtts = temp;
3729       version = 0; /* force re-initialization of m_nsAtts hash table */
3730     }
3731     /* using a version flag saves us from initializing m_nsAtts every time */
3732     if (! version) { /* initialize version flags when version wraps around */
3733       version = INIT_ATTS_VERSION;
3734       for (j = nsAttsSize; j != 0;)
3735         parser->m_nsAtts[--j].version = version;
3736     }
3737     parser->m_nsAttsVersion = --version;
3738 
3739     /* expand prefixed names and check for duplicates */
3740     for (; i < attIndex; i += 2) {
3741       const XML_Char *s = appAtts[i];
3742       if (s[-1] == 2) { /* prefixed */
3743         ATTRIBUTE_ID *id;
3744         const BINDING *b;
3745         unsigned long uriHash;
3746         struct siphash sip_state;
3747         struct sipkey sip_key;
3748 
3749         copy_salt_to_sipkey(parser, &sip_key);
3750         sip24_init(&sip_state, &sip_key);
3751 
3752         ((XML_Char *)s)[-1] = 0; /* clear flag */
3753         id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3754         if (! id || ! id->prefix) {
3755           /* This code is walking through the appAtts array, dealing
3756            * with (in this case) a prefixed attribute name.  To be in
3757            * the array, the attribute must have already been bound, so
3758            * has to have passed through the hash table lookup once
3759            * already.  That implies that an entry for it already
3760            * exists, so the lookup above will return a pointer to
3761            * already allocated memory.  There is no opportunaity for
3762            * the allocator to fail, so the condition above cannot be
3763            * fulfilled.
3764            *
3765            * Since it is difficult to be certain that the above
3766            * analysis is complete, we retain the test and merely
3767            * remove the code from coverage tests.
3768            */
3769           return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3770         }
3771         b = id->prefix->binding;
3772         if (! b)
3773           return XML_ERROR_UNBOUND_PREFIX;
3774 
3775         for (j = 0; j < b->uriLen; j++) {
3776           const XML_Char c = b->uri[j];
3777           if (! poolAppendChar(&parser->m_tempPool, c))
3778             return XML_ERROR_NO_MEMORY;
3779         }
3780 
3781         sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3782 
3783         while (*s++ != XML_T(ASCII_COLON))
3784           ;
3785 
3786         sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3787 
3788         do { /* copies null terminator */
3789           if (! poolAppendChar(&parser->m_tempPool, *s))
3790             return XML_ERROR_NO_MEMORY;
3791         } while (*s++);
3792 
3793         uriHash = (unsigned long)sip24_final(&sip_state);
3794 
3795         { /* Check hash table for duplicate of expanded name (uriName).
3796              Derived from code in lookup(parser, HASH_TABLE *table, ...).
3797           */
3798           unsigned char step = 0;
3799           unsigned long mask = nsAttsSize - 1;
3800           j = uriHash & mask; /* index into hash table */
3801           while (parser->m_nsAtts[j].version == version) {
3802             /* for speed we compare stored hash values first */
3803             if (uriHash == parser->m_nsAtts[j].hash) {
3804               const XML_Char *s1 = poolStart(&parser->m_tempPool);
3805               const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3806               /* s1 is null terminated, but not s2 */
3807               for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3808                 ;
3809               if (*s1 == 0)
3810                 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3811             }
3812             if (! step)
3813               step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3814             j < step ? (j += nsAttsSize - step) : (j -= step);
3815           }
3816         }
3817 
3818         if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3819           parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3820           s = b->prefix->name;
3821           do {
3822             if (! poolAppendChar(&parser->m_tempPool, *s))
3823               return XML_ERROR_NO_MEMORY;
3824           } while (*s++);
3825         }
3826 
3827         /* store expanded name in attribute list */
3828         s = poolStart(&parser->m_tempPool);
3829         poolFinish(&parser->m_tempPool);
3830         appAtts[i] = s;
3831 
3832         /* fill empty slot with new version, uriName and hash value */
3833         parser->m_nsAtts[j].version = version;
3834         parser->m_nsAtts[j].hash = uriHash;
3835         parser->m_nsAtts[j].uriName = s;
3836 
3837         if (! --nPrefixes) {
3838           i += 2;
3839           break;
3840         }
3841       } else                     /* not prefixed */
3842         ((XML_Char *)s)[-1] = 0; /* clear flag */
3843     }
3844   }
3845   /* clear flags for the remaining attributes */
3846   for (; i < attIndex; i += 2)
3847     ((XML_Char *)(appAtts[i]))[-1] = 0;
3848   for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3849     binding->attId->name[-1] = 0;
3850 
3851   if (! parser->m_ns)
3852     return XML_ERROR_NONE;
3853 
3854   /* expand the element type name */
3855   if (elementType->prefix) {
3856     binding = elementType->prefix->binding;
3857     if (! binding)
3858       return XML_ERROR_UNBOUND_PREFIX;
3859     localPart = tagNamePtr->str;
3860     while (*localPart++ != XML_T(ASCII_COLON))
3861       ;
3862   } else if (dtd->defaultPrefix.binding) {
3863     binding = dtd->defaultPrefix.binding;
3864     localPart = tagNamePtr->str;
3865   } else
3866     return XML_ERROR_NONE;
3867   prefixLen = 0;
3868   if (parser->m_ns_triplets && binding->prefix->name) {
3869     for (; binding->prefix->name[prefixLen++];)
3870       ; /* prefixLen includes null terminator */
3871   }
3872   tagNamePtr->localPart = localPart;
3873   tagNamePtr->uriLen = binding->uriLen;
3874   tagNamePtr->prefix = binding->prefix->name;
3875   tagNamePtr->prefixLen = prefixLen;
3876   for (i = 0; localPart[i++];)
3877     ; /* i includes null terminator */
3878 
3879   /* Detect and prevent integer overflow */
3880   if (binding->uriLen > INT_MAX - prefixLen
3881       || i > INT_MAX - (binding->uriLen + prefixLen)) {
3882     return XML_ERROR_NO_MEMORY;
3883   }
3884 
3885   n = i + binding->uriLen + prefixLen;
3886   if (n > binding->uriAlloc) {
3887     TAG *p;
3888 
3889     /* Detect and prevent integer overflow */
3890     if (n > INT_MAX - EXPAND_SPARE) {
3891       return XML_ERROR_NO_MEMORY;
3892     }
3893     /* Detect and prevent integer overflow.
3894      * The preprocessor guard addresses the "always false" warning
3895      * from -Wtype-limits on platforms where
3896      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3897 #if UINT_MAX >= SIZE_MAX
3898     if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3899       return XML_ERROR_NO_MEMORY;
3900     }
3901 #endif
3902 
3903     uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3904     if (! uri)
3905       return XML_ERROR_NO_MEMORY;
3906     binding->uriAlloc = n + EXPAND_SPARE;
3907     memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3908     for (p = parser->m_tagStack; p; p = p->parent)
3909       if (p->name.str == binding->uri)
3910         p->name.str = uri;
3911     FREE(parser, binding->uri);
3912     binding->uri = uri;
3913   }
3914   /* if m_namespaceSeparator != '\0' then uri includes it already */
3915   uri = binding->uri + binding->uriLen;
3916   memcpy(uri, localPart, i * sizeof(XML_Char));
3917   /* we always have a namespace separator between localPart and prefix */
3918   if (prefixLen) {
3919     uri += i - 1;
3920     *uri = parser->m_namespaceSeparator; /* replace null terminator */
3921     memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3922   }
3923   tagNamePtr->str = binding->uri;
3924   return XML_ERROR_NONE;
3925 }
3926 
3927 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3928 is_rfc3986_uri_char(XML_Char candidate) {
3929   // For the RFC 3986 ANBF grammar see
3930   // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3931 
3932   switch (candidate) {
3933   // From rule "ALPHA" (uppercase half)
3934   case 'A':
3935   case 'B':
3936   case 'C':
3937   case 'D':
3938   case 'E':
3939   case 'F':
3940   case 'G':
3941   case 'H':
3942   case 'I':
3943   case 'J':
3944   case 'K':
3945   case 'L':
3946   case 'M':
3947   case 'N':
3948   case 'O':
3949   case 'P':
3950   case 'Q':
3951   case 'R':
3952   case 'S':
3953   case 'T':
3954   case 'U':
3955   case 'V':
3956   case 'W':
3957   case 'X':
3958   case 'Y':
3959   case 'Z':
3960 
3961   // From rule "ALPHA" (lowercase half)
3962   case 'a':
3963   case 'b':
3964   case 'c':
3965   case 'd':
3966   case 'e':
3967   case 'f':
3968   case 'g':
3969   case 'h':
3970   case 'i':
3971   case 'j':
3972   case 'k':
3973   case 'l':
3974   case 'm':
3975   case 'n':
3976   case 'o':
3977   case 'p':
3978   case 'q':
3979   case 'r':
3980   case 's':
3981   case 't':
3982   case 'u':
3983   case 'v':
3984   case 'w':
3985   case 'x':
3986   case 'y':
3987   case 'z':
3988 
3989   // From rule "DIGIT"
3990   case '0':
3991   case '1':
3992   case '2':
3993   case '3':
3994   case '4':
3995   case '5':
3996   case '6':
3997   case '7':
3998   case '8':
3999   case '9':
4000 
4001   // From rule "pct-encoded"
4002   case '%':
4003 
4004   // From rule "unreserved"
4005   case '-':
4006   case '.':
4007   case '_':
4008   case '~':
4009 
4010   // From rule "gen-delims"
4011   case ':':
4012   case '/':
4013   case '?':
4014   case '#':
4015   case '[':
4016   case ']':
4017   case '@':
4018 
4019   // From rule "sub-delims"
4020   case '!':
4021   case '$':
4022   case '&':
4023   case '\'':
4024   case '(':
4025   case ')':
4026   case '*':
4027   case '+':
4028   case ',':
4029   case ';':
4030   case '=':
4031     return XML_TRUE;
4032 
4033   default:
4034     return XML_FALSE;
4035   }
4036 }
4037 
4038 /* addBinding() overwrites the value of prefix->binding without checking.
4039    Therefore one must keep track of the old value outside of addBinding().
4040 */
4041 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)4042 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4043            const XML_Char *uri, BINDING **bindingsPtr) {
4044   // "http://www.w3.org/XML/1998/namespace"
4045   static const XML_Char xmlNamespace[]
4046       = {ASCII_h,      ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,
4047          ASCII_SLASH,  ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,
4048          ASCII_PERIOD, ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,
4049          ASCII_r,      ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,
4050          ASCII_L,      ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,
4051          ASCII_8,      ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,
4052          ASCII_e,      ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,
4053          ASCII_e,      '\0'};
4054   static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4055   // "http://www.w3.org/2000/xmlns/"
4056   static const XML_Char xmlnsNamespace[]
4057       = {ASCII_h,     ASCII_t,      ASCII_t, ASCII_p, ASCII_COLON,  ASCII_SLASH,
4058          ASCII_SLASH, ASCII_w,      ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4059          ASCII_3,     ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,      ASCII_SLASH,
4060          ASCII_2,     ASCII_0,      ASCII_0, ASCII_0, ASCII_SLASH,  ASCII_x,
4061          ASCII_m,     ASCII_l,      ASCII_n, ASCII_s, ASCII_SLASH,  '\0'};
4062   static const int xmlnsLen
4063       = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4064 
4065   XML_Bool mustBeXML = XML_FALSE;
4066   XML_Bool isXML = XML_TRUE;
4067   XML_Bool isXMLNS = XML_TRUE;
4068 
4069   BINDING *b;
4070   int len;
4071 
4072   /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4073   if (*uri == XML_T('\0') && prefix->name)
4074     return XML_ERROR_UNDECLARING_PREFIX;
4075 
4076   if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4077       && prefix->name[1] == XML_T(ASCII_m)
4078       && prefix->name[2] == XML_T(ASCII_l)) {
4079     /* Not allowed to bind xmlns */
4080     if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4081         && prefix->name[5] == XML_T('\0'))
4082       return XML_ERROR_RESERVED_PREFIX_XMLNS;
4083 
4084     if (prefix->name[3] == XML_T('\0'))
4085       mustBeXML = XML_TRUE;
4086   }
4087 
4088   for (len = 0; uri[len]; len++) {
4089     if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4090       isXML = XML_FALSE;
4091 
4092     if (! mustBeXML && isXMLNS
4093         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4094       isXMLNS = XML_FALSE;
4095 
4096     // NOTE: While Expat does not validate namespace URIs against RFC 3986
4097     //       today (and is not REQUIRED to do so with regard to the XML 1.0
4098     //       namespaces specification) we have to at least make sure, that
4099     //       the application on top of Expat (that is likely splitting expanded
4100     //       element names ("qualified names") of form
4101     //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4102     //       in its element handler code) cannot be confused by an attacker
4103     //       putting additional namespace separator characters into namespace
4104     //       declarations.  That would be ambiguous and not to be expected.
4105     //
4106     //       While the HTML API docs of function XML_ParserCreateNS have been
4107     //       advising against use of a namespace separator character that can
4108     //       appear in a URI for >20 years now, some widespread applications
4109     //       are using URI characters (':' (colon) in particular) for a
4110     //       namespace separator, in practice.  To keep these applications
4111     //       functional, we only reject namespaces URIs containing the
4112     //       application-chosen namespace separator if the chosen separator
4113     //       is a non-URI character with regard to RFC 3986.
4114     if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4115         && ! is_rfc3986_uri_char(uri[len])) {
4116       return XML_ERROR_SYNTAX;
4117     }
4118   }
4119   isXML = isXML && len == xmlLen;
4120   isXMLNS = isXMLNS && len == xmlnsLen;
4121 
4122   if (mustBeXML != isXML)
4123     return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4124                      : XML_ERROR_RESERVED_NAMESPACE_URI;
4125 
4126   if (isXMLNS)
4127     return XML_ERROR_RESERVED_NAMESPACE_URI;
4128 
4129   if (parser->m_namespaceSeparator)
4130     len++;
4131   if (parser->m_freeBindingList) {
4132     b = parser->m_freeBindingList;
4133     if (len > b->uriAlloc) {
4134       /* Detect and prevent integer overflow */
4135       if (len > INT_MAX - EXPAND_SPARE) {
4136         return XML_ERROR_NO_MEMORY;
4137       }
4138 
4139       /* Detect and prevent integer overflow.
4140        * The preprocessor guard addresses the "always false" warning
4141        * from -Wtype-limits on platforms where
4142        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4143 #if UINT_MAX >= SIZE_MAX
4144       if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4145         return XML_ERROR_NO_MEMORY;
4146       }
4147 #endif
4148 
4149       XML_Char *temp = (XML_Char *)REALLOC(
4150           parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4151       if (temp == NULL)
4152         return XML_ERROR_NO_MEMORY;
4153       b->uri = temp;
4154       b->uriAlloc = len + EXPAND_SPARE;
4155     }
4156     parser->m_freeBindingList = b->nextTagBinding;
4157   } else {
4158     b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4159     if (! b)
4160       return XML_ERROR_NO_MEMORY;
4161 
4162     /* Detect and prevent integer overflow */
4163     if (len > INT_MAX - EXPAND_SPARE) {
4164       return XML_ERROR_NO_MEMORY;
4165     }
4166     /* Detect and prevent integer overflow.
4167      * The preprocessor guard addresses the "always false" warning
4168      * from -Wtype-limits on platforms where
4169      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4170 #if UINT_MAX >= SIZE_MAX
4171     if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4172       return XML_ERROR_NO_MEMORY;
4173     }
4174 #endif
4175 
4176     b->uri
4177         = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4178     if (! b->uri) {
4179       FREE(parser, b);
4180       return XML_ERROR_NO_MEMORY;
4181     }
4182     b->uriAlloc = len + EXPAND_SPARE;
4183   }
4184   b->uriLen = len;
4185   memcpy(b->uri, uri, len * sizeof(XML_Char));
4186   if (parser->m_namespaceSeparator)
4187     b->uri[len - 1] = parser->m_namespaceSeparator;
4188   b->prefix = prefix;
4189   b->attId = attId;
4190   b->prevPrefixBinding = prefix->binding;
4191   /* NULL binding when default namespace undeclared */
4192   if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4193     prefix->binding = NULL;
4194   else
4195     prefix->binding = b;
4196   b->nextTagBinding = *bindingsPtr;
4197   *bindingsPtr = b;
4198   /* if attId == NULL then we are not starting a namespace scope */
4199   if (attId && parser->m_startNamespaceDeclHandler)
4200     parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4201                                         prefix->binding ? uri : 0);
4202   return XML_ERROR_NONE;
4203 }
4204 
4205 /* The idea here is to avoid using stack for each CDATA section when
4206    the whole file is parsed with one call.
4207 */
4208 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4209 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4210                       const char **endPtr) {
4211   enum XML_Error result = doCdataSection(
4212       parser, parser->m_encoding, &start, end, endPtr,
4213       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4214   if (result != XML_ERROR_NONE)
4215     return result;
4216   if (start) {
4217     if (parser->m_parentParser) { /* we are parsing an external entity */
4218       parser->m_processor = externalEntityContentProcessor;
4219       return externalEntityContentProcessor(parser, start, end, endPtr);
4220     } else {
4221       parser->m_processor = contentProcessor;
4222       return contentProcessor(parser, start, end, endPtr);
4223     }
4224   }
4225   return result;
4226 }
4227 
4228 /* startPtr gets set to non-null if the section is closed, and to null if
4229    the section is not yet closed.
4230 */
4231 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4232 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4233                const char *end, const char **nextPtr, XML_Bool haveMore,
4234                enum XML_Account account) {
4235   const char *s = *startPtr;
4236   const char **eventPP;
4237   const char **eventEndPP;
4238   if (enc == parser->m_encoding) {
4239     eventPP = &parser->m_eventPtr;
4240     *eventPP = s;
4241     eventEndPP = &parser->m_eventEndPtr;
4242   } else {
4243     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4244     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4245   }
4246   *eventPP = s;
4247   *startPtr = NULL;
4248 
4249   for (;;) {
4250     const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4251     int tok = XmlCdataSectionTok(enc, s, end, &next);
4252 #if XML_GE == 1
4253     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4254       accountingOnAbort(parser);
4255       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4256     }
4257 #else
4258     UNUSED_P(account);
4259 #endif
4260     *eventEndPP = next;
4261     switch (tok) {
4262     case XML_TOK_CDATA_SECT_CLOSE:
4263       if (parser->m_endCdataSectionHandler)
4264         parser->m_endCdataSectionHandler(parser->m_handlerArg);
4265       /* BEGIN disabled code */
4266       /* see comment under XML_TOK_CDATA_SECT_OPEN */
4267       else if ((0) && parser->m_characterDataHandler)
4268         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4269                                        0);
4270       /* END disabled code */
4271       else if (parser->m_defaultHandler)
4272         reportDefault(parser, enc, s, next);
4273       *startPtr = next;
4274       *nextPtr = next;
4275       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4276         return XML_ERROR_ABORTED;
4277       else
4278         return XML_ERROR_NONE;
4279     case XML_TOK_DATA_NEWLINE:
4280       if (parser->m_characterDataHandler) {
4281         XML_Char c = 0xA;
4282         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4283       } else if (parser->m_defaultHandler)
4284         reportDefault(parser, enc, s, next);
4285       break;
4286     case XML_TOK_DATA_CHARS: {
4287       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4288       if (charDataHandler) {
4289         if (MUST_CONVERT(enc, s)) {
4290           for (;;) {
4291             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4292             const enum XML_Convert_Result convert_res = XmlConvert(
4293                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4294             *eventEndPP = next;
4295             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4296                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4297             if ((convert_res == XML_CONVERT_COMPLETED)
4298                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4299               break;
4300             *eventPP = s;
4301           }
4302         } else
4303           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4304                           (int)((const XML_Char *)next - (const XML_Char *)s));
4305       } else if (parser->m_defaultHandler)
4306         reportDefault(parser, enc, s, next);
4307     } break;
4308     case XML_TOK_INVALID:
4309       *eventPP = next;
4310       return XML_ERROR_INVALID_TOKEN;
4311     case XML_TOK_PARTIAL_CHAR:
4312       if (haveMore) {
4313         *nextPtr = s;
4314         return XML_ERROR_NONE;
4315       }
4316       return XML_ERROR_PARTIAL_CHAR;
4317     case XML_TOK_PARTIAL:
4318     case XML_TOK_NONE:
4319       if (haveMore) {
4320         *nextPtr = s;
4321         return XML_ERROR_NONE;
4322       }
4323       return XML_ERROR_UNCLOSED_CDATA_SECTION;
4324     default:
4325       /* Every token returned by XmlCdataSectionTok() has its own
4326        * explicit case, so this default case will never be executed.
4327        * We retain it as a safety net and exclude it from the coverage
4328        * statistics.
4329        *
4330        * LCOV_EXCL_START
4331        */
4332       *eventPP = next;
4333       return XML_ERROR_UNEXPECTED_STATE;
4334       /* LCOV_EXCL_STOP */
4335     }
4336 
4337     switch (parser->m_parsingStatus.parsing) {
4338     case XML_SUSPENDED:
4339       *eventPP = next;
4340       *nextPtr = next;
4341       return XML_ERROR_NONE;
4342     case XML_FINISHED:
4343       *eventPP = next;
4344       return XML_ERROR_ABORTED;
4345     case XML_PARSING:
4346       if (parser->m_reenter) {
4347         return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4348       }
4349       /* Fall through */
4350     default:;
4351       *eventPP = s = next;
4352     }
4353   }
4354   /* not reached */
4355 }
4356 
4357 #ifdef XML_DTD
4358 
4359 /* The idea here is to avoid using stack for each IGNORE section when
4360    the whole file is parsed with one call.
4361 */
4362 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4363 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4364                        const char **endPtr) {
4365   enum XML_Error result
4366       = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4367                         (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4368   if (result != XML_ERROR_NONE)
4369     return result;
4370   if (start) {
4371     parser->m_processor = prologProcessor;
4372     return prologProcessor(parser, start, end, endPtr);
4373   }
4374   return result;
4375 }
4376 
4377 /* startPtr gets set to non-null is the section is closed, and to null
4378    if the section is not yet closed.
4379 */
4380 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4381 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4382                 const char *end, const char **nextPtr, XML_Bool haveMore) {
4383   const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4384   int tok;
4385   const char *s = *startPtr;
4386   const char **eventPP;
4387   const char **eventEndPP;
4388   if (enc == parser->m_encoding) {
4389     eventPP = &parser->m_eventPtr;
4390     *eventPP = s;
4391     eventEndPP = &parser->m_eventEndPtr;
4392   } else {
4393     /* It's not entirely clear, but it seems the following two lines
4394      * of code cannot be executed.  The only occasions on which 'enc'
4395      * is not 'encoding' are when this function is called
4396      * from the internal entity processing, and IGNORE sections are an
4397      * error in internal entities.
4398      *
4399      * Since it really isn't clear that this is true, we keep the code
4400      * and just remove it from our coverage tests.
4401      *
4402      * LCOV_EXCL_START
4403      */
4404     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4405     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4406     /* LCOV_EXCL_STOP */
4407   }
4408   *eventPP = s;
4409   *startPtr = NULL;
4410   tok = XmlIgnoreSectionTok(enc, s, end, &next);
4411 #  if XML_GE == 1
4412   if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4413                                 XML_ACCOUNT_DIRECT)) {
4414     accountingOnAbort(parser);
4415     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4416   }
4417 #  endif
4418   *eventEndPP = next;
4419   switch (tok) {
4420   case XML_TOK_IGNORE_SECT:
4421     if (parser->m_defaultHandler)
4422       reportDefault(parser, enc, s, next);
4423     *startPtr = next;
4424     *nextPtr = next;
4425     if (parser->m_parsingStatus.parsing == XML_FINISHED)
4426       return XML_ERROR_ABORTED;
4427     else
4428       return XML_ERROR_NONE;
4429   case XML_TOK_INVALID:
4430     *eventPP = next;
4431     return XML_ERROR_INVALID_TOKEN;
4432   case XML_TOK_PARTIAL_CHAR:
4433     if (haveMore) {
4434       *nextPtr = s;
4435       return XML_ERROR_NONE;
4436     }
4437     return XML_ERROR_PARTIAL_CHAR;
4438   case XML_TOK_PARTIAL:
4439   case XML_TOK_NONE:
4440     if (haveMore) {
4441       *nextPtr = s;
4442       return XML_ERROR_NONE;
4443     }
4444     return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4445   default:
4446     /* All of the tokens that XmlIgnoreSectionTok() returns have
4447      * explicit cases to handle them, so this default case is never
4448      * executed.  We keep it as a safety net anyway, and remove it
4449      * from our test coverage statistics.
4450      *
4451      * LCOV_EXCL_START
4452      */
4453     *eventPP = next;
4454     return XML_ERROR_UNEXPECTED_STATE;
4455     /* LCOV_EXCL_STOP */
4456   }
4457   /* not reached */
4458 }
4459 
4460 #endif /* XML_DTD */
4461 
4462 static enum XML_Error
initializeEncoding(XML_Parser parser)4463 initializeEncoding(XML_Parser parser) {
4464   const char *s;
4465 #ifdef XML_UNICODE
4466   char encodingBuf[128];
4467   /* See comments about `protocolEncodingName` in parserInit() */
4468   if (! parser->m_protocolEncodingName)
4469     s = NULL;
4470   else {
4471     int i;
4472     for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4473       if (i == sizeof(encodingBuf) - 1
4474           || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4475         encodingBuf[0] = '\0';
4476         break;
4477       }
4478       encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4479     }
4480     encodingBuf[i] = '\0';
4481     s = encodingBuf;
4482   }
4483 #else
4484   s = parser->m_protocolEncodingName;
4485 #endif
4486   if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4487           &parser->m_initEncoding, &parser->m_encoding, s))
4488     return XML_ERROR_NONE;
4489   return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4490 }
4491 
4492 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4493 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4494                const char *next) {
4495   const char *encodingName = NULL;
4496   const XML_Char *storedEncName = NULL;
4497   const ENCODING *newEncoding = NULL;
4498   const char *version = NULL;
4499   const char *versionend = NULL;
4500   const XML_Char *storedversion = NULL;
4501   int standalone = -1;
4502 
4503 #if XML_GE == 1
4504   if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4505                                 XML_ACCOUNT_DIRECT)) {
4506     accountingOnAbort(parser);
4507     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4508   }
4509 #endif
4510 
4511   if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4512           isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4513           &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4514     if (isGeneralTextEntity)
4515       return XML_ERROR_TEXT_DECL;
4516     else
4517       return XML_ERROR_XML_DECL;
4518   }
4519   if (! isGeneralTextEntity && standalone == 1) {
4520     parser->m_dtd->standalone = XML_TRUE;
4521 #ifdef XML_DTD
4522     if (parser->m_paramEntityParsing
4523         == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4524       parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4525 #endif /* XML_DTD */
4526   }
4527   if (parser->m_xmlDeclHandler) {
4528     if (encodingName != NULL) {
4529       storedEncName = poolStoreString(
4530           &parser->m_temp2Pool, parser->m_encoding, encodingName,
4531           encodingName + XmlNameLength(parser->m_encoding, encodingName));
4532       if (! storedEncName)
4533         return XML_ERROR_NO_MEMORY;
4534       poolFinish(&parser->m_temp2Pool);
4535     }
4536     if (version) {
4537       storedversion
4538           = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4539                             versionend - parser->m_encoding->minBytesPerChar);
4540       if (! storedversion)
4541         return XML_ERROR_NO_MEMORY;
4542     }
4543     parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4544                              standalone);
4545   } else if (parser->m_defaultHandler)
4546     reportDefault(parser, parser->m_encoding, s, next);
4547   if (parser->m_protocolEncodingName == NULL) {
4548     if (newEncoding) {
4549       /* Check that the specified encoding does not conflict with what
4550        * the parser has already deduced.  Do we have the same number
4551        * of bytes in the smallest representation of a character?  If
4552        * this is UTF-16, is it the same endianness?
4553        */
4554       if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4555           || (newEncoding->minBytesPerChar == 2
4556               && newEncoding != parser->m_encoding)) {
4557         parser->m_eventPtr = encodingName;
4558         return XML_ERROR_INCORRECT_ENCODING;
4559       }
4560       parser->m_encoding = newEncoding;
4561     } else if (encodingName) {
4562       enum XML_Error result;
4563       if (! storedEncName) {
4564         storedEncName = poolStoreString(
4565             &parser->m_temp2Pool, parser->m_encoding, encodingName,
4566             encodingName + XmlNameLength(parser->m_encoding, encodingName));
4567         if (! storedEncName)
4568           return XML_ERROR_NO_MEMORY;
4569       }
4570       result = handleUnknownEncoding(parser, storedEncName);
4571       poolClear(&parser->m_temp2Pool);
4572       if (result == XML_ERROR_UNKNOWN_ENCODING)
4573         parser->m_eventPtr = encodingName;
4574       return result;
4575     }
4576   }
4577 
4578   if (storedEncName || storedversion)
4579     poolClear(&parser->m_temp2Pool);
4580 
4581   return XML_ERROR_NONE;
4582 }
4583 
4584 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4585 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4586   if (parser->m_unknownEncodingHandler) {
4587     XML_Encoding info;
4588     int i;
4589     for (i = 0; i < 256; i++)
4590       info.map[i] = -1;
4591     info.convert = NULL;
4592     info.data = NULL;
4593     info.release = NULL;
4594     if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4595                                          encodingName, &info)) {
4596       ENCODING *enc;
4597       parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4598       if (! parser->m_unknownEncodingMem) {
4599         if (info.release)
4600           info.release(info.data);
4601         return XML_ERROR_NO_MEMORY;
4602       }
4603       enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4604           parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4605       if (enc) {
4606         parser->m_unknownEncodingData = info.data;
4607         parser->m_unknownEncodingRelease = info.release;
4608         parser->m_encoding = enc;
4609         return XML_ERROR_NONE;
4610       }
4611     }
4612     if (info.release != NULL)
4613       info.release(info.data);
4614   }
4615   return XML_ERROR_UNKNOWN_ENCODING;
4616 }
4617 
4618 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4619 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4620                     const char **nextPtr) {
4621   enum XML_Error result = initializeEncoding(parser);
4622   if (result != XML_ERROR_NONE)
4623     return result;
4624   parser->m_processor = prologProcessor;
4625   return prologProcessor(parser, s, end, nextPtr);
4626 }
4627 
4628 #ifdef XML_DTD
4629 
4630 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4631 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4632                             const char **nextPtr) {
4633   enum XML_Error result = initializeEncoding(parser);
4634   if (result != XML_ERROR_NONE)
4635     return result;
4636 
4637   /* we know now that XML_Parse(Buffer) has been called,
4638      so we consider the external parameter entity read */
4639   parser->m_dtd->paramEntityRead = XML_TRUE;
4640 
4641   if (parser->m_prologState.inEntityValue) {
4642     parser->m_processor = entityValueInitProcessor;
4643     return entityValueInitProcessor(parser, s, end, nextPtr);
4644   } else {
4645     parser->m_processor = externalParEntProcessor;
4646     return externalParEntProcessor(parser, s, end, nextPtr);
4647   }
4648 }
4649 
4650 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4651 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4652                          const char **nextPtr) {
4653   int tok;
4654   const char *start = s;
4655   const char *next = start;
4656   parser->m_eventPtr = start;
4657 
4658   for (;;) {
4659     tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4660     /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4661              - storeEntityValue
4662              - processXmlDecl
4663     */
4664     parser->m_eventEndPtr = next;
4665     if (tok <= 0) {
4666       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4667         *nextPtr = s;
4668         return XML_ERROR_NONE;
4669       }
4670       switch (tok) {
4671       case XML_TOK_INVALID:
4672         return XML_ERROR_INVALID_TOKEN;
4673       case XML_TOK_PARTIAL:
4674         return XML_ERROR_UNCLOSED_TOKEN;
4675       case XML_TOK_PARTIAL_CHAR:
4676         return XML_ERROR_PARTIAL_CHAR;
4677       case XML_TOK_NONE: /* start == end */
4678       default:
4679         break;
4680       }
4681       /* found end of entity value - can store it now */
4682       return storeEntityValue(parser, parser->m_encoding, s, end,
4683                               XML_ACCOUNT_DIRECT, NULL);
4684     } else if (tok == XML_TOK_XML_DECL) {
4685       enum XML_Error result;
4686       result = processXmlDecl(parser, 0, start, next);
4687       if (result != XML_ERROR_NONE)
4688         return result;
4689       /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED.  For
4690        * that to happen, a parameter entity parsing handler must have attempted
4691        * to suspend the parser, which fails and raises an error.  The parser can
4692        * be aborted, but can't be suspended.
4693        */
4694       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4695         return XML_ERROR_ABORTED;
4696       *nextPtr = next;
4697       /* stop scanning for text declaration - we found one */
4698       parser->m_processor = entityValueProcessor;
4699       return entityValueProcessor(parser, next, end, nextPtr);
4700     }
4701     /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4702        must move s and nextPtr forward to consume the BOM.
4703 
4704        If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4705        would leave the BOM in the buffer and return. On the next call to this
4706        function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4707        is not valid to have multiple BOMs.
4708     */
4709     else if (tok == XML_TOK_BOM) {
4710 #  if XML_GE == 1
4711       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4712                                     XML_ACCOUNT_DIRECT)) {
4713         accountingOnAbort(parser);
4714         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4715       }
4716 #  endif
4717 
4718       *nextPtr = next;
4719       s = next;
4720     }
4721     /* If we get this token, we have the start of what might be a
4722        normal tag, but not a declaration (i.e. it doesn't begin with
4723        "<!").  In a DTD context, that isn't legal.
4724     */
4725     else if (tok == XML_TOK_INSTANCE_START) {
4726       *nextPtr = next;
4727       return XML_ERROR_SYNTAX;
4728     }
4729     start = next;
4730     parser->m_eventPtr = start;
4731   }
4732 }
4733 
4734 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4735 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4736                         const char **nextPtr) {
4737   const char *next = s;
4738   int tok;
4739 
4740   tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4741   if (tok <= 0) {
4742     if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4743       *nextPtr = s;
4744       return XML_ERROR_NONE;
4745     }
4746     switch (tok) {
4747     case XML_TOK_INVALID:
4748       return XML_ERROR_INVALID_TOKEN;
4749     case XML_TOK_PARTIAL:
4750       return XML_ERROR_UNCLOSED_TOKEN;
4751     case XML_TOK_PARTIAL_CHAR:
4752       return XML_ERROR_PARTIAL_CHAR;
4753     case XML_TOK_NONE: /* start == end */
4754     default:
4755       break;
4756     }
4757   }
4758   /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4759      However, when parsing an external subset, doProlog will not accept a BOM
4760      as valid, and report a syntax error, so we have to skip the BOM, and
4761      account for the BOM bytes.
4762   */
4763   else if (tok == XML_TOK_BOM) {
4764     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4765                                   XML_ACCOUNT_DIRECT)) {
4766       accountingOnAbort(parser);
4767       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4768     }
4769 
4770     s = next;
4771     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4772   }
4773 
4774   parser->m_processor = prologProcessor;
4775   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4776                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4777                   XML_ACCOUNT_DIRECT);
4778 }
4779 
4780 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4781 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4782                      const char **nextPtr) {
4783   const char *start = s;
4784   const char *next = s;
4785   const ENCODING *enc = parser->m_encoding;
4786   int tok;
4787 
4788   for (;;) {
4789     tok = XmlPrologTok(enc, start, end, &next);
4790     /* Note: These bytes are accounted later in:
4791              - storeEntityValue
4792     */
4793     if (tok <= 0) {
4794       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4795         *nextPtr = s;
4796         return XML_ERROR_NONE;
4797       }
4798       switch (tok) {
4799       case XML_TOK_INVALID:
4800         return XML_ERROR_INVALID_TOKEN;
4801       case XML_TOK_PARTIAL:
4802         return XML_ERROR_UNCLOSED_TOKEN;
4803       case XML_TOK_PARTIAL_CHAR:
4804         return XML_ERROR_PARTIAL_CHAR;
4805       case XML_TOK_NONE: /* start == end */
4806       default:
4807         break;
4808       }
4809       /* found end of entity value - can store it now */
4810       return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
4811     }
4812     start = next;
4813   }
4814 }
4815 
4816 #endif /* XML_DTD */
4817 
4818 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4819 prologProcessor(XML_Parser parser, const char *s, const char *end,
4820                 const char **nextPtr) {
4821   const char *next = s;
4822   int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4823   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4824                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4825                   XML_ACCOUNT_DIRECT);
4826 }
4827 
4828 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4829 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4830          int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4831          XML_Bool allowClosingDoctype, enum XML_Account account) {
4832 #ifdef XML_DTD
4833   static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4834 #endif /* XML_DTD */
4835   static const XML_Char atypeCDATA[]
4836       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4837   static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4838   static const XML_Char atypeIDREF[]
4839       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4840   static const XML_Char atypeIDREFS[]
4841       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4842   static const XML_Char atypeENTITY[]
4843       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4844   static const XML_Char atypeENTITIES[]
4845       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4846          ASCII_I, ASCII_E, ASCII_S, '\0'};
4847   static const XML_Char atypeNMTOKEN[]
4848       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4849   static const XML_Char atypeNMTOKENS[]
4850       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4851          ASCII_E, ASCII_N, ASCII_S, '\0'};
4852   static const XML_Char notationPrefix[]
4853       = {ASCII_N, ASCII_O, ASCII_T, ASCII_A,      ASCII_T,
4854          ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4855   static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4856   static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4857 
4858 #ifndef XML_DTD
4859   UNUSED_P(account);
4860 #endif
4861 
4862   /* save one level of indirection */
4863   DTD *const dtd = parser->m_dtd;
4864 
4865   const char **eventPP;
4866   const char **eventEndPP;
4867   enum XML_Content_Quant quant;
4868 
4869   if (enc == parser->m_encoding) {
4870     eventPP = &parser->m_eventPtr;
4871     eventEndPP = &parser->m_eventEndPtr;
4872   } else {
4873     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4874     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4875   }
4876 
4877   for (;;) {
4878     int role;
4879     XML_Bool handleDefault = XML_TRUE;
4880     *eventPP = s;
4881     *eventEndPP = next;
4882     if (tok <= 0) {
4883       if (haveMore && tok != XML_TOK_INVALID) {
4884         *nextPtr = s;
4885         return XML_ERROR_NONE;
4886       }
4887       switch (tok) {
4888       case XML_TOK_INVALID:
4889         *eventPP = next;
4890         return XML_ERROR_INVALID_TOKEN;
4891       case XML_TOK_PARTIAL:
4892         return XML_ERROR_UNCLOSED_TOKEN;
4893       case XML_TOK_PARTIAL_CHAR:
4894         return XML_ERROR_PARTIAL_CHAR;
4895       case -XML_TOK_PROLOG_S:
4896         tok = -tok;
4897         break;
4898       case XML_TOK_NONE:
4899 #ifdef XML_DTD
4900         /* for internal PE NOT referenced between declarations */
4901         if (enc != parser->m_encoding
4902             && ! parser->m_openInternalEntities->betweenDecl) {
4903           *nextPtr = s;
4904           return XML_ERROR_NONE;
4905         }
4906         /* WFC: PE Between Declarations - must check that PE contains
4907            complete markup, not only for external PEs, but also for
4908            internal PEs if the reference occurs between declarations.
4909         */
4910         if (parser->m_isParamEntity || enc != parser->m_encoding) {
4911           if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4912               == XML_ROLE_ERROR)
4913             return XML_ERROR_INCOMPLETE_PE;
4914           *nextPtr = s;
4915           return XML_ERROR_NONE;
4916         }
4917 #endif /* XML_DTD */
4918         return XML_ERROR_NO_ELEMENTS;
4919       default:
4920         tok = -tok;
4921         next = end;
4922         break;
4923       }
4924     }
4925     role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4926 #if XML_GE == 1
4927     switch (role) {
4928     case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4929     case XML_ROLE_XML_DECL:       // bytes accounted in processXmlDecl
4930 #  ifdef XML_DTD
4931     case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4932 #  endif
4933       break;
4934     default:
4935       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4936         accountingOnAbort(parser);
4937         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4938       }
4939     }
4940 #endif
4941     switch (role) {
4942     case XML_ROLE_XML_DECL: {
4943       enum XML_Error result = processXmlDecl(parser, 0, s, next);
4944       if (result != XML_ERROR_NONE)
4945         return result;
4946       enc = parser->m_encoding;
4947       handleDefault = XML_FALSE;
4948     } break;
4949     case XML_ROLE_DOCTYPE_NAME:
4950       if (parser->m_startDoctypeDeclHandler) {
4951         parser->m_doctypeName
4952             = poolStoreString(&parser->m_tempPool, enc, s, next);
4953         if (! parser->m_doctypeName)
4954           return XML_ERROR_NO_MEMORY;
4955         poolFinish(&parser->m_tempPool);
4956         parser->m_doctypePubid = NULL;
4957         handleDefault = XML_FALSE;
4958       }
4959       parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4960       break;
4961     case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4962       if (parser->m_startDoctypeDeclHandler) {
4963         parser->m_startDoctypeDeclHandler(
4964             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4965             parser->m_doctypePubid, 1);
4966         parser->m_doctypeName = NULL;
4967         poolClear(&parser->m_tempPool);
4968         handleDefault = XML_FALSE;
4969       }
4970       break;
4971 #ifdef XML_DTD
4972     case XML_ROLE_TEXT_DECL: {
4973       enum XML_Error result = processXmlDecl(parser, 1, s, next);
4974       if (result != XML_ERROR_NONE)
4975         return result;
4976       enc = parser->m_encoding;
4977       handleDefault = XML_FALSE;
4978     } break;
4979 #endif /* XML_DTD */
4980     case XML_ROLE_DOCTYPE_PUBLIC_ID:
4981 #ifdef XML_DTD
4982       parser->m_useForeignDTD = XML_FALSE;
4983       parser->m_declEntity = (ENTITY *)lookup(
4984           parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4985       if (! parser->m_declEntity)
4986         return XML_ERROR_NO_MEMORY;
4987 #endif /* XML_DTD */
4988       dtd->hasParamEntityRefs = XML_TRUE;
4989       if (parser->m_startDoctypeDeclHandler) {
4990         XML_Char *pubId;
4991         if (! XmlIsPublicId(enc, s, next, eventPP))
4992           return XML_ERROR_PUBLICID;
4993         pubId = poolStoreString(&parser->m_tempPool, enc,
4994                                 s + enc->minBytesPerChar,
4995                                 next - enc->minBytesPerChar);
4996         if (! pubId)
4997           return XML_ERROR_NO_MEMORY;
4998         normalizePublicId(pubId);
4999         poolFinish(&parser->m_tempPool);
5000         parser->m_doctypePubid = pubId;
5001         handleDefault = XML_FALSE;
5002         goto alreadyChecked;
5003       }
5004       /* fall through */
5005     case XML_ROLE_ENTITY_PUBLIC_ID:
5006       if (! XmlIsPublicId(enc, s, next, eventPP))
5007         return XML_ERROR_PUBLICID;
5008     alreadyChecked:
5009       if (dtd->keepProcessing && parser->m_declEntity) {
5010         XML_Char *tem
5011             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5012                               next - enc->minBytesPerChar);
5013         if (! tem)
5014           return XML_ERROR_NO_MEMORY;
5015         normalizePublicId(tem);
5016         parser->m_declEntity->publicId = tem;
5017         poolFinish(&dtd->pool);
5018         /* Don't suppress the default handler if we fell through from
5019          * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5020          */
5021         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5022           handleDefault = XML_FALSE;
5023       }
5024       break;
5025     case XML_ROLE_DOCTYPE_CLOSE:
5026       if (allowClosingDoctype != XML_TRUE) {
5027         /* Must not close doctype from within expanded parameter entities */
5028         return XML_ERROR_INVALID_TOKEN;
5029       }
5030 
5031       if (parser->m_doctypeName) {
5032         parser->m_startDoctypeDeclHandler(
5033             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5034             parser->m_doctypePubid, 0);
5035         poolClear(&parser->m_tempPool);
5036         handleDefault = XML_FALSE;
5037       }
5038       /* parser->m_doctypeSysid will be non-NULL in the case of a previous
5039          XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5040          was not set, indicating an external subset
5041       */
5042 #ifdef XML_DTD
5043       if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5044         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5045         dtd->hasParamEntityRefs = XML_TRUE;
5046         if (parser->m_paramEntityParsing
5047             && parser->m_externalEntityRefHandler) {
5048           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5049                                             externalSubsetName, sizeof(ENTITY));
5050           if (! entity) {
5051             /* The external subset name "#" will have already been
5052              * inserted into the hash table at the start of the
5053              * external entity parsing, so no allocation will happen
5054              * and lookup() cannot fail.
5055              */
5056             return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5057           }
5058           if (parser->m_useForeignDTD)
5059             entity->base = parser->m_curBase;
5060           dtd->paramEntityRead = XML_FALSE;
5061           if (! parser->m_externalEntityRefHandler(
5062                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5063                   entity->systemId, entity->publicId))
5064             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5065           if (dtd->paramEntityRead) {
5066             if (! dtd->standalone && parser->m_notStandaloneHandler
5067                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5068               return XML_ERROR_NOT_STANDALONE;
5069           }
5070           /* if we didn't read the foreign DTD then this means that there
5071              is no external subset and we must reset dtd->hasParamEntityRefs
5072           */
5073           else if (! parser->m_doctypeSysid)
5074             dtd->hasParamEntityRefs = hadParamEntityRefs;
5075           /* end of DTD - no need to update dtd->keepProcessing */
5076         }
5077         parser->m_useForeignDTD = XML_FALSE;
5078       }
5079 #endif /* XML_DTD */
5080       if (parser->m_endDoctypeDeclHandler) {
5081         parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5082         handleDefault = XML_FALSE;
5083       }
5084       break;
5085     case XML_ROLE_INSTANCE_START:
5086 #ifdef XML_DTD
5087       /* if there is no DOCTYPE declaration then now is the
5088          last chance to read the foreign DTD
5089       */
5090       if (parser->m_useForeignDTD) {
5091         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5092         dtd->hasParamEntityRefs = XML_TRUE;
5093         if (parser->m_paramEntityParsing
5094             && parser->m_externalEntityRefHandler) {
5095           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5096                                             externalSubsetName, sizeof(ENTITY));
5097           if (! entity)
5098             return XML_ERROR_NO_MEMORY;
5099           entity->base = parser->m_curBase;
5100           dtd->paramEntityRead = XML_FALSE;
5101           if (! parser->m_externalEntityRefHandler(
5102                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5103                   entity->systemId, entity->publicId))
5104             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5105           if (dtd->paramEntityRead) {
5106             if (! dtd->standalone && parser->m_notStandaloneHandler
5107                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5108               return XML_ERROR_NOT_STANDALONE;
5109           }
5110           /* if we didn't read the foreign DTD then this means that there
5111              is no external subset and we must reset dtd->hasParamEntityRefs
5112           */
5113           else
5114             dtd->hasParamEntityRefs = hadParamEntityRefs;
5115           /* end of DTD - no need to update dtd->keepProcessing */
5116         }
5117       }
5118 #endif /* XML_DTD */
5119       parser->m_processor = contentProcessor;
5120       return contentProcessor(parser, s, end, nextPtr);
5121     case XML_ROLE_ATTLIST_ELEMENT_NAME:
5122       parser->m_declElementType = getElementType(parser, enc, s, next);
5123       if (! parser->m_declElementType)
5124         return XML_ERROR_NO_MEMORY;
5125       goto checkAttListDeclHandler;
5126     case XML_ROLE_ATTRIBUTE_NAME:
5127       parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5128       if (! parser->m_declAttributeId)
5129         return XML_ERROR_NO_MEMORY;
5130       parser->m_declAttributeIsCdata = XML_FALSE;
5131       parser->m_declAttributeType = NULL;
5132       parser->m_declAttributeIsId = XML_FALSE;
5133       goto checkAttListDeclHandler;
5134     case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5135       parser->m_declAttributeIsCdata = XML_TRUE;
5136       parser->m_declAttributeType = atypeCDATA;
5137       goto checkAttListDeclHandler;
5138     case XML_ROLE_ATTRIBUTE_TYPE_ID:
5139       parser->m_declAttributeIsId = XML_TRUE;
5140       parser->m_declAttributeType = atypeID;
5141       goto checkAttListDeclHandler;
5142     case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5143       parser->m_declAttributeType = atypeIDREF;
5144       goto checkAttListDeclHandler;
5145     case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5146       parser->m_declAttributeType = atypeIDREFS;
5147       goto checkAttListDeclHandler;
5148     case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5149       parser->m_declAttributeType = atypeENTITY;
5150       goto checkAttListDeclHandler;
5151     case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5152       parser->m_declAttributeType = atypeENTITIES;
5153       goto checkAttListDeclHandler;
5154     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5155       parser->m_declAttributeType = atypeNMTOKEN;
5156       goto checkAttListDeclHandler;
5157     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5158       parser->m_declAttributeType = atypeNMTOKENS;
5159     checkAttListDeclHandler:
5160       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5161         handleDefault = XML_FALSE;
5162       break;
5163     case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5164     case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5165       if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5166         const XML_Char *prefix;
5167         if (parser->m_declAttributeType) {
5168           prefix = enumValueSep;
5169         } else {
5170           prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5171                                                               : enumValueStart);
5172         }
5173         if (! poolAppendString(&parser->m_tempPool, prefix))
5174           return XML_ERROR_NO_MEMORY;
5175         if (! poolAppend(&parser->m_tempPool, enc, s, next))
5176           return XML_ERROR_NO_MEMORY;
5177         parser->m_declAttributeType = parser->m_tempPool.start;
5178         handleDefault = XML_FALSE;
5179       }
5180       break;
5181     case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5182     case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5183       if (dtd->keepProcessing) {
5184         if (! defineAttribute(parser->m_declElementType,
5185                               parser->m_declAttributeId,
5186                               parser->m_declAttributeIsCdata,
5187                               parser->m_declAttributeIsId, 0, parser))
5188           return XML_ERROR_NO_MEMORY;
5189         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5190           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5191               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5192                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5193             /* Enumerated or Notation type */
5194             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5195                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5196               return XML_ERROR_NO_MEMORY;
5197             parser->m_declAttributeType = parser->m_tempPool.start;
5198             poolFinish(&parser->m_tempPool);
5199           }
5200           *eventEndPP = s;
5201           parser->m_attlistDeclHandler(
5202               parser->m_handlerArg, parser->m_declElementType->name,
5203               parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5204               role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5205           handleDefault = XML_FALSE;
5206         }
5207       }
5208       poolClear(&parser->m_tempPool);
5209       break;
5210     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5211     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5212       if (dtd->keepProcessing) {
5213         const XML_Char *attVal;
5214         enum XML_Error result = storeAttributeValue(
5215             parser, enc, parser->m_declAttributeIsCdata,
5216             s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5217             XML_ACCOUNT_NONE);
5218         if (result)
5219           return result;
5220         attVal = poolStart(&dtd->pool);
5221         poolFinish(&dtd->pool);
5222         /* ID attributes aren't allowed to have a default */
5223         if (! defineAttribute(
5224                 parser->m_declElementType, parser->m_declAttributeId,
5225                 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5226           return XML_ERROR_NO_MEMORY;
5227         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5228           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5229               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5230                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5231             /* Enumerated or Notation type */
5232             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5233                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5234               return XML_ERROR_NO_MEMORY;
5235             parser->m_declAttributeType = parser->m_tempPool.start;
5236             poolFinish(&parser->m_tempPool);
5237           }
5238           *eventEndPP = s;
5239           parser->m_attlistDeclHandler(
5240               parser->m_handlerArg, parser->m_declElementType->name,
5241               parser->m_declAttributeId->name, parser->m_declAttributeType,
5242               attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5243           poolClear(&parser->m_tempPool);
5244           handleDefault = XML_FALSE;
5245         }
5246       }
5247       break;
5248     case XML_ROLE_ENTITY_VALUE:
5249       if (dtd->keepProcessing) {
5250 #if XML_GE == 1
5251         // This will store the given replacement text in
5252         // parser->m_declEntity->textPtr.
5253         enum XML_Error result = callStoreEntityValue(
5254             parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5255             XML_ACCOUNT_NONE);
5256         if (parser->m_declEntity) {
5257           parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5258           parser->m_declEntity->textLen
5259               = (int)(poolLength(&dtd->entityValuePool));
5260           poolFinish(&dtd->entityValuePool);
5261           if (parser->m_entityDeclHandler) {
5262             *eventEndPP = s;
5263             parser->m_entityDeclHandler(
5264                 parser->m_handlerArg, parser->m_declEntity->name,
5265                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5266                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5267             handleDefault = XML_FALSE;
5268           }
5269         } else
5270           poolDiscard(&dtd->entityValuePool);
5271         if (result != XML_ERROR_NONE)
5272           return result;
5273 #else
5274         // This will store "&amp;entity123;" in parser->m_declEntity->textPtr
5275         // to end up as "&entity123;" in the handler.
5276         if (parser->m_declEntity != NULL) {
5277           const enum XML_Error result
5278               = storeSelfEntityValue(parser, parser->m_declEntity);
5279           if (result != XML_ERROR_NONE)
5280             return result;
5281 
5282           if (parser->m_entityDeclHandler) {
5283             *eventEndPP = s;
5284             parser->m_entityDeclHandler(
5285                 parser->m_handlerArg, parser->m_declEntity->name,
5286                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5287                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5288             handleDefault = XML_FALSE;
5289           }
5290         }
5291 #endif
5292       }
5293       break;
5294     case XML_ROLE_DOCTYPE_SYSTEM_ID:
5295 #ifdef XML_DTD
5296       parser->m_useForeignDTD = XML_FALSE;
5297 #endif /* XML_DTD */
5298       dtd->hasParamEntityRefs = XML_TRUE;
5299       if (parser->m_startDoctypeDeclHandler) {
5300         parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5301                                                  s + enc->minBytesPerChar,
5302                                                  next - enc->minBytesPerChar);
5303         if (parser->m_doctypeSysid == NULL)
5304           return XML_ERROR_NO_MEMORY;
5305         poolFinish(&parser->m_tempPool);
5306         handleDefault = XML_FALSE;
5307       }
5308 #ifdef XML_DTD
5309       else
5310         /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5311            for the case where no parser->m_startDoctypeDeclHandler is set */
5312         parser->m_doctypeSysid = externalSubsetName;
5313 #endif /* XML_DTD */
5314       if (! dtd->standalone
5315 #ifdef XML_DTD
5316           && ! parser->m_paramEntityParsing
5317 #endif /* XML_DTD */
5318           && parser->m_notStandaloneHandler
5319           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5320         return XML_ERROR_NOT_STANDALONE;
5321 #ifndef XML_DTD
5322       break;
5323 #else  /* XML_DTD */
5324       if (! parser->m_declEntity) {
5325         parser->m_declEntity = (ENTITY *)lookup(
5326             parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5327         if (! parser->m_declEntity)
5328           return XML_ERROR_NO_MEMORY;
5329         parser->m_declEntity->publicId = NULL;
5330       }
5331 #endif /* XML_DTD */
5332       /* fall through */
5333     case XML_ROLE_ENTITY_SYSTEM_ID:
5334       if (dtd->keepProcessing && parser->m_declEntity) {
5335         parser->m_declEntity->systemId
5336             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5337                               next - enc->minBytesPerChar);
5338         if (! parser->m_declEntity->systemId)
5339           return XML_ERROR_NO_MEMORY;
5340         parser->m_declEntity->base = parser->m_curBase;
5341         poolFinish(&dtd->pool);
5342         /* Don't suppress the default handler if we fell through from
5343          * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5344          */
5345         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5346           handleDefault = XML_FALSE;
5347       }
5348       break;
5349     case XML_ROLE_ENTITY_COMPLETE:
5350 #if XML_GE == 0
5351       // This will store "&amp;entity123;" in entity->textPtr
5352       // to end up as "&entity123;" in the handler.
5353       if (parser->m_declEntity != NULL) {
5354         const enum XML_Error result
5355             = storeSelfEntityValue(parser, parser->m_declEntity);
5356         if (result != XML_ERROR_NONE)
5357           return result;
5358       }
5359 #endif
5360       if (dtd->keepProcessing && parser->m_declEntity
5361           && parser->m_entityDeclHandler) {
5362         *eventEndPP = s;
5363         parser->m_entityDeclHandler(
5364             parser->m_handlerArg, parser->m_declEntity->name,
5365             parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5366             parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5367         handleDefault = XML_FALSE;
5368       }
5369       break;
5370     case XML_ROLE_ENTITY_NOTATION_NAME:
5371       if (dtd->keepProcessing && parser->m_declEntity) {
5372         parser->m_declEntity->notation
5373             = poolStoreString(&dtd->pool, enc, s, next);
5374         if (! parser->m_declEntity->notation)
5375           return XML_ERROR_NO_MEMORY;
5376         poolFinish(&dtd->pool);
5377         if (parser->m_unparsedEntityDeclHandler) {
5378           *eventEndPP = s;
5379           parser->m_unparsedEntityDeclHandler(
5380               parser->m_handlerArg, parser->m_declEntity->name,
5381               parser->m_declEntity->base, parser->m_declEntity->systemId,
5382               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5383           handleDefault = XML_FALSE;
5384         } else if (parser->m_entityDeclHandler) {
5385           *eventEndPP = s;
5386           parser->m_entityDeclHandler(
5387               parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5388               parser->m_declEntity->base, parser->m_declEntity->systemId,
5389               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5390           handleDefault = XML_FALSE;
5391         }
5392       }
5393       break;
5394     case XML_ROLE_GENERAL_ENTITY_NAME: {
5395       if (XmlPredefinedEntityName(enc, s, next)) {
5396         parser->m_declEntity = NULL;
5397         break;
5398       }
5399       if (dtd->keepProcessing) {
5400         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5401         if (! name)
5402           return XML_ERROR_NO_MEMORY;
5403         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5404                                                 name, sizeof(ENTITY));
5405         if (! parser->m_declEntity)
5406           return XML_ERROR_NO_MEMORY;
5407         if (parser->m_declEntity->name != name) {
5408           poolDiscard(&dtd->pool);
5409           parser->m_declEntity = NULL;
5410         } else {
5411           poolFinish(&dtd->pool);
5412           parser->m_declEntity->publicId = NULL;
5413           parser->m_declEntity->is_param = XML_FALSE;
5414           /* if we have a parent parser or are reading an internal parameter
5415              entity, then the entity declaration is not considered "internal"
5416           */
5417           parser->m_declEntity->is_internal
5418               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5419           if (parser->m_entityDeclHandler)
5420             handleDefault = XML_FALSE;
5421         }
5422       } else {
5423         poolDiscard(&dtd->pool);
5424         parser->m_declEntity = NULL;
5425       }
5426     } break;
5427     case XML_ROLE_PARAM_ENTITY_NAME:
5428 #ifdef XML_DTD
5429       if (dtd->keepProcessing) {
5430         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5431         if (! name)
5432           return XML_ERROR_NO_MEMORY;
5433         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5434                                                 name, sizeof(ENTITY));
5435         if (! parser->m_declEntity)
5436           return XML_ERROR_NO_MEMORY;
5437         if (parser->m_declEntity->name != name) {
5438           poolDiscard(&dtd->pool);
5439           parser->m_declEntity = NULL;
5440         } else {
5441           poolFinish(&dtd->pool);
5442           parser->m_declEntity->publicId = NULL;
5443           parser->m_declEntity->is_param = XML_TRUE;
5444           /* if we have a parent parser or are reading an internal parameter
5445              entity, then the entity declaration is not considered "internal"
5446           */
5447           parser->m_declEntity->is_internal
5448               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5449           if (parser->m_entityDeclHandler)
5450             handleDefault = XML_FALSE;
5451         }
5452       } else {
5453         poolDiscard(&dtd->pool);
5454         parser->m_declEntity = NULL;
5455       }
5456 #else  /* not XML_DTD */
5457       parser->m_declEntity = NULL;
5458 #endif /* XML_DTD */
5459       break;
5460     case XML_ROLE_NOTATION_NAME:
5461       parser->m_declNotationPublicId = NULL;
5462       parser->m_declNotationName = NULL;
5463       if (parser->m_notationDeclHandler) {
5464         parser->m_declNotationName
5465             = poolStoreString(&parser->m_tempPool, enc, s, next);
5466         if (! parser->m_declNotationName)
5467           return XML_ERROR_NO_MEMORY;
5468         poolFinish(&parser->m_tempPool);
5469         handleDefault = XML_FALSE;
5470       }
5471       break;
5472     case XML_ROLE_NOTATION_PUBLIC_ID:
5473       if (! XmlIsPublicId(enc, s, next, eventPP))
5474         return XML_ERROR_PUBLICID;
5475       if (parser
5476               ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5477         XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5478                                         s + enc->minBytesPerChar,
5479                                         next - enc->minBytesPerChar);
5480         if (! tem)
5481           return XML_ERROR_NO_MEMORY;
5482         normalizePublicId(tem);
5483         parser->m_declNotationPublicId = tem;
5484         poolFinish(&parser->m_tempPool);
5485         handleDefault = XML_FALSE;
5486       }
5487       break;
5488     case XML_ROLE_NOTATION_SYSTEM_ID:
5489       if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5490         const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5491                                                    s + enc->minBytesPerChar,
5492                                                    next - enc->minBytesPerChar);
5493         if (! systemId)
5494           return XML_ERROR_NO_MEMORY;
5495         *eventEndPP = s;
5496         parser->m_notationDeclHandler(
5497             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5498             systemId, parser->m_declNotationPublicId);
5499         handleDefault = XML_FALSE;
5500       }
5501       poolClear(&parser->m_tempPool);
5502       break;
5503     case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5504       if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5505         *eventEndPP = s;
5506         parser->m_notationDeclHandler(
5507             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5508             0, parser->m_declNotationPublicId);
5509         handleDefault = XML_FALSE;
5510       }
5511       poolClear(&parser->m_tempPool);
5512       break;
5513     case XML_ROLE_ERROR:
5514       switch (tok) {
5515       case XML_TOK_PARAM_ENTITY_REF:
5516         /* PE references in internal subset are
5517            not allowed within declarations. */
5518         return XML_ERROR_PARAM_ENTITY_REF;
5519       case XML_TOK_XML_DECL:
5520         return XML_ERROR_MISPLACED_XML_PI;
5521       default:
5522         return XML_ERROR_SYNTAX;
5523       }
5524 #ifdef XML_DTD
5525     case XML_ROLE_IGNORE_SECT: {
5526       enum XML_Error result;
5527       if (parser->m_defaultHandler)
5528         reportDefault(parser, enc, s, next);
5529       handleDefault = XML_FALSE;
5530       result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5531       if (result != XML_ERROR_NONE)
5532         return result;
5533       else if (! next) {
5534         parser->m_processor = ignoreSectionProcessor;
5535         return result;
5536       }
5537     } break;
5538 #endif /* XML_DTD */
5539     case XML_ROLE_GROUP_OPEN:
5540       if (parser->m_prologState.level >= parser->m_groupSize) {
5541         if (parser->m_groupSize) {
5542           {
5543             /* Detect and prevent integer overflow */
5544             if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5545               return XML_ERROR_NO_MEMORY;
5546             }
5547 
5548             char *const new_connector = (char *)REALLOC(
5549                 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5550             if (new_connector == NULL) {
5551               parser->m_groupSize /= 2;
5552               return XML_ERROR_NO_MEMORY;
5553             }
5554             parser->m_groupConnector = new_connector;
5555           }
5556 
5557           if (dtd->scaffIndex) {
5558             /* Detect and prevent integer overflow.
5559              * The preprocessor guard addresses the "always false" warning
5560              * from -Wtype-limits on platforms where
5561              * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5562 #if UINT_MAX >= SIZE_MAX
5563             if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5564               return XML_ERROR_NO_MEMORY;
5565             }
5566 #endif
5567 
5568             int *const new_scaff_index = (int *)REALLOC(
5569                 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5570             if (new_scaff_index == NULL)
5571               return XML_ERROR_NO_MEMORY;
5572             dtd->scaffIndex = new_scaff_index;
5573           }
5574         } else {
5575           parser->m_groupConnector
5576               = (char *)MALLOC(parser, parser->m_groupSize = 32);
5577           if (! parser->m_groupConnector) {
5578             parser->m_groupSize = 0;
5579             return XML_ERROR_NO_MEMORY;
5580           }
5581         }
5582       }
5583       parser->m_groupConnector[parser->m_prologState.level] = 0;
5584       if (dtd->in_eldecl) {
5585         int myindex = nextScaffoldPart(parser);
5586         if (myindex < 0)
5587           return XML_ERROR_NO_MEMORY;
5588         assert(dtd->scaffIndex != NULL);
5589         dtd->scaffIndex[dtd->scaffLevel] = myindex;
5590         dtd->scaffLevel++;
5591         dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5592         if (parser->m_elementDeclHandler)
5593           handleDefault = XML_FALSE;
5594       }
5595       break;
5596     case XML_ROLE_GROUP_SEQUENCE:
5597       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5598         return XML_ERROR_SYNTAX;
5599       parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5600       if (dtd->in_eldecl && parser->m_elementDeclHandler)
5601         handleDefault = XML_FALSE;
5602       break;
5603     case XML_ROLE_GROUP_CHOICE:
5604       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5605         return XML_ERROR_SYNTAX;
5606       if (dtd->in_eldecl
5607           && ! parser->m_groupConnector[parser->m_prologState.level]
5608           && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5609               != XML_CTYPE_MIXED)) {
5610         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5611             = XML_CTYPE_CHOICE;
5612         if (parser->m_elementDeclHandler)
5613           handleDefault = XML_FALSE;
5614       }
5615       parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5616       break;
5617     case XML_ROLE_PARAM_ENTITY_REF:
5618 #ifdef XML_DTD
5619     case XML_ROLE_INNER_PARAM_ENTITY_REF:
5620       dtd->hasParamEntityRefs = XML_TRUE;
5621       if (! parser->m_paramEntityParsing)
5622         dtd->keepProcessing = dtd->standalone;
5623       else {
5624         const XML_Char *name;
5625         ENTITY *entity;
5626         name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5627                                next - enc->minBytesPerChar);
5628         if (! name)
5629           return XML_ERROR_NO_MEMORY;
5630         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5631         poolDiscard(&dtd->pool);
5632         /* first, determine if a check for an existing declaration is needed;
5633            if yes, check that the entity exists, and that it is internal,
5634            otherwise call the skipped entity handler
5635         */
5636         if (parser->m_prologState.documentEntity
5637             && (dtd->standalone ? ! parser->m_openInternalEntities
5638                                 : ! dtd->hasParamEntityRefs)) {
5639           if (! entity)
5640             return XML_ERROR_UNDEFINED_ENTITY;
5641           else if (! entity->is_internal) {
5642             /* It's hard to exhaustively search the code to be sure,
5643              * but there doesn't seem to be a way of executing the
5644              * following line.  There are two cases:
5645              *
5646              * If 'standalone' is false, the DTD must have no
5647              * parameter entities or we wouldn't have passed the outer
5648              * 'if' statement.  That means the only entity in the hash
5649              * table is the external subset name "#" which cannot be
5650              * given as a parameter entity name in XML syntax, so the
5651              * lookup must have returned NULL and we don't even reach
5652              * the test for an internal entity.
5653              *
5654              * If 'standalone' is true, it does not seem to be
5655              * possible to create entities taking this code path that
5656              * are not internal entities, so fail the test above.
5657              *
5658              * Because this analysis is very uncertain, the code is
5659              * being left in place and merely removed from the
5660              * coverage test statistics.
5661              */
5662             return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5663           }
5664         } else if (! entity) {
5665           dtd->keepProcessing = dtd->standalone;
5666           /* cannot report skipped entities in declarations */
5667           if ((role == XML_ROLE_PARAM_ENTITY_REF)
5668               && parser->m_skippedEntityHandler) {
5669             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5670             handleDefault = XML_FALSE;
5671           }
5672           break;
5673         }
5674         if (entity->open)
5675           return XML_ERROR_RECURSIVE_ENTITY_REF;
5676         if (entity->textPtr) {
5677           enum XML_Error result;
5678           XML_Bool betweenDecl
5679               = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5680           result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
5681           if (result != XML_ERROR_NONE)
5682             return result;
5683           handleDefault = XML_FALSE;
5684           break;
5685         }
5686         if (parser->m_externalEntityRefHandler) {
5687           dtd->paramEntityRead = XML_FALSE;
5688           entity->open = XML_TRUE;
5689           entityTrackingOnOpen(parser, entity, __LINE__);
5690           if (! parser->m_externalEntityRefHandler(
5691                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5692                   entity->systemId, entity->publicId)) {
5693             entityTrackingOnClose(parser, entity, __LINE__);
5694             entity->open = XML_FALSE;
5695             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5696           }
5697           entityTrackingOnClose(parser, entity, __LINE__);
5698           entity->open = XML_FALSE;
5699           handleDefault = XML_FALSE;
5700           if (! dtd->paramEntityRead) {
5701             dtd->keepProcessing = dtd->standalone;
5702             break;
5703           }
5704         } else {
5705           dtd->keepProcessing = dtd->standalone;
5706           break;
5707         }
5708       }
5709 #endif /* XML_DTD */
5710       if (! dtd->standalone && parser->m_notStandaloneHandler
5711           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5712         return XML_ERROR_NOT_STANDALONE;
5713       break;
5714 
5715       /* Element declaration stuff */
5716 
5717     case XML_ROLE_ELEMENT_NAME:
5718       if (parser->m_elementDeclHandler) {
5719         parser->m_declElementType = getElementType(parser, enc, s, next);
5720         if (! parser->m_declElementType)
5721           return XML_ERROR_NO_MEMORY;
5722         dtd->scaffLevel = 0;
5723         dtd->scaffCount = 0;
5724         dtd->in_eldecl = XML_TRUE;
5725         handleDefault = XML_FALSE;
5726       }
5727       break;
5728 
5729     case XML_ROLE_CONTENT_ANY:
5730     case XML_ROLE_CONTENT_EMPTY:
5731       if (dtd->in_eldecl) {
5732         if (parser->m_elementDeclHandler) {
5733           XML_Content *content
5734               = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5735           if (! content)
5736             return XML_ERROR_NO_MEMORY;
5737           content->quant = XML_CQUANT_NONE;
5738           content->name = NULL;
5739           content->numchildren = 0;
5740           content->children = NULL;
5741           content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5742                                                           : XML_CTYPE_EMPTY);
5743           *eventEndPP = s;
5744           parser->m_elementDeclHandler(
5745               parser->m_handlerArg, parser->m_declElementType->name, content);
5746           handleDefault = XML_FALSE;
5747         }
5748         dtd->in_eldecl = XML_FALSE;
5749       }
5750       break;
5751 
5752     case XML_ROLE_CONTENT_PCDATA:
5753       if (dtd->in_eldecl) {
5754         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5755             = XML_CTYPE_MIXED;
5756         if (parser->m_elementDeclHandler)
5757           handleDefault = XML_FALSE;
5758       }
5759       break;
5760 
5761     case XML_ROLE_CONTENT_ELEMENT:
5762       quant = XML_CQUANT_NONE;
5763       goto elementContent;
5764     case XML_ROLE_CONTENT_ELEMENT_OPT:
5765       quant = XML_CQUANT_OPT;
5766       goto elementContent;
5767     case XML_ROLE_CONTENT_ELEMENT_REP:
5768       quant = XML_CQUANT_REP;
5769       goto elementContent;
5770     case XML_ROLE_CONTENT_ELEMENT_PLUS:
5771       quant = XML_CQUANT_PLUS;
5772     elementContent:
5773       if (dtd->in_eldecl) {
5774         ELEMENT_TYPE *el;
5775         const XML_Char *name;
5776         size_t nameLen;
5777         const char *nxt
5778             = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5779         int myindex = nextScaffoldPart(parser);
5780         if (myindex < 0)
5781           return XML_ERROR_NO_MEMORY;
5782         dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5783         dtd->scaffold[myindex].quant = quant;
5784         el = getElementType(parser, enc, s, nxt);
5785         if (! el)
5786           return XML_ERROR_NO_MEMORY;
5787         name = el->name;
5788         dtd->scaffold[myindex].name = name;
5789         nameLen = 0;
5790         for (; name[nameLen++];)
5791           ;
5792 
5793         /* Detect and prevent integer overflow */
5794         if (nameLen > UINT_MAX - dtd->contentStringLen) {
5795           return XML_ERROR_NO_MEMORY;
5796         }
5797 
5798         dtd->contentStringLen += (unsigned)nameLen;
5799         if (parser->m_elementDeclHandler)
5800           handleDefault = XML_FALSE;
5801       }
5802       break;
5803 
5804     case XML_ROLE_GROUP_CLOSE:
5805       quant = XML_CQUANT_NONE;
5806       goto closeGroup;
5807     case XML_ROLE_GROUP_CLOSE_OPT:
5808       quant = XML_CQUANT_OPT;
5809       goto closeGroup;
5810     case XML_ROLE_GROUP_CLOSE_REP:
5811       quant = XML_CQUANT_REP;
5812       goto closeGroup;
5813     case XML_ROLE_GROUP_CLOSE_PLUS:
5814       quant = XML_CQUANT_PLUS;
5815     closeGroup:
5816       if (dtd->in_eldecl) {
5817         if (parser->m_elementDeclHandler)
5818           handleDefault = XML_FALSE;
5819         dtd->scaffLevel--;
5820         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5821         if (dtd->scaffLevel == 0) {
5822           if (! handleDefault) {
5823             XML_Content *model = build_model(parser);
5824             if (! model)
5825               return XML_ERROR_NO_MEMORY;
5826             *eventEndPP = s;
5827             parser->m_elementDeclHandler(
5828                 parser->m_handlerArg, parser->m_declElementType->name, model);
5829           }
5830           dtd->in_eldecl = XML_FALSE;
5831           dtd->contentStringLen = 0;
5832         }
5833       }
5834       break;
5835       /* End element declaration stuff */
5836 
5837     case XML_ROLE_PI:
5838       if (! reportProcessingInstruction(parser, enc, s, next))
5839         return XML_ERROR_NO_MEMORY;
5840       handleDefault = XML_FALSE;
5841       break;
5842     case XML_ROLE_COMMENT:
5843       if (! reportComment(parser, enc, s, next))
5844         return XML_ERROR_NO_MEMORY;
5845       handleDefault = XML_FALSE;
5846       break;
5847     case XML_ROLE_NONE:
5848       switch (tok) {
5849       case XML_TOK_BOM:
5850         handleDefault = XML_FALSE;
5851         break;
5852       }
5853       break;
5854     case XML_ROLE_DOCTYPE_NONE:
5855       if (parser->m_startDoctypeDeclHandler)
5856         handleDefault = XML_FALSE;
5857       break;
5858     case XML_ROLE_ENTITY_NONE:
5859       if (dtd->keepProcessing && parser->m_entityDeclHandler)
5860         handleDefault = XML_FALSE;
5861       break;
5862     case XML_ROLE_NOTATION_NONE:
5863       if (parser->m_notationDeclHandler)
5864         handleDefault = XML_FALSE;
5865       break;
5866     case XML_ROLE_ATTLIST_NONE:
5867       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5868         handleDefault = XML_FALSE;
5869       break;
5870     case XML_ROLE_ELEMENT_NONE:
5871       if (parser->m_elementDeclHandler)
5872         handleDefault = XML_FALSE;
5873       break;
5874     } /* end of big switch */
5875 
5876     if (handleDefault && parser->m_defaultHandler)
5877       reportDefault(parser, enc, s, next);
5878 
5879     switch (parser->m_parsingStatus.parsing) {
5880     case XML_SUSPENDED:
5881       *nextPtr = next;
5882       return XML_ERROR_NONE;
5883     case XML_FINISHED:
5884       return XML_ERROR_ABORTED;
5885     case XML_PARSING:
5886       if (parser->m_reenter) {
5887         *nextPtr = next;
5888         return XML_ERROR_NONE;
5889       }
5890     /* Fall through */
5891     default:
5892       s = next;
5893       tok = XmlPrologTok(enc, s, end, &next);
5894     }
5895   }
5896   /* not reached */
5897 }
5898 
5899 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5900 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5901                 const char **nextPtr) {
5902   parser->m_processor = epilogProcessor;
5903   parser->m_eventPtr = s;
5904   for (;;) {
5905     const char *next = NULL;
5906     int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5907 #if XML_GE == 1
5908     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5909                                   XML_ACCOUNT_DIRECT)) {
5910       accountingOnAbort(parser);
5911       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5912     }
5913 #endif
5914     parser->m_eventEndPtr = next;
5915     switch (tok) {
5916     /* report partial linebreak - it might be the last token */
5917     case -XML_TOK_PROLOG_S:
5918       if (parser->m_defaultHandler) {
5919         reportDefault(parser, parser->m_encoding, s, next);
5920         if (parser->m_parsingStatus.parsing == XML_FINISHED)
5921           return XML_ERROR_ABORTED;
5922       }
5923       *nextPtr = next;
5924       return XML_ERROR_NONE;
5925     case XML_TOK_NONE:
5926       *nextPtr = s;
5927       return XML_ERROR_NONE;
5928     case XML_TOK_PROLOG_S:
5929       if (parser->m_defaultHandler)
5930         reportDefault(parser, parser->m_encoding, s, next);
5931       break;
5932     case XML_TOK_PI:
5933       if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5934         return XML_ERROR_NO_MEMORY;
5935       break;
5936     case XML_TOK_COMMENT:
5937       if (! reportComment(parser, parser->m_encoding, s, next))
5938         return XML_ERROR_NO_MEMORY;
5939       break;
5940     case XML_TOK_INVALID:
5941       parser->m_eventPtr = next;
5942       return XML_ERROR_INVALID_TOKEN;
5943     case XML_TOK_PARTIAL:
5944       if (! parser->m_parsingStatus.finalBuffer) {
5945         *nextPtr = s;
5946         return XML_ERROR_NONE;
5947       }
5948       return XML_ERROR_UNCLOSED_TOKEN;
5949     case XML_TOK_PARTIAL_CHAR:
5950       if (! parser->m_parsingStatus.finalBuffer) {
5951         *nextPtr = s;
5952         return XML_ERROR_NONE;
5953       }
5954       return XML_ERROR_PARTIAL_CHAR;
5955     default:
5956       return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5957     }
5958     switch (parser->m_parsingStatus.parsing) {
5959     case XML_SUSPENDED:
5960       parser->m_eventPtr = next;
5961       *nextPtr = next;
5962       return XML_ERROR_NONE;
5963     case XML_FINISHED:
5964       parser->m_eventPtr = next;
5965       return XML_ERROR_ABORTED;
5966     case XML_PARSING:
5967       if (parser->m_reenter) {
5968         return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
5969       }
5970     /* Fall through */
5971     default:;
5972       parser->m_eventPtr = s = next;
5973     }
5974   }
5975 }
5976 
5977 static enum XML_Error
processEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl,enum EntityType type)5978 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
5979               enum EntityType type) {
5980   OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
5981   switch (type) {
5982   case ENTITY_INTERNAL:
5983     parser->m_processor = internalEntityProcessor;
5984     openEntityList = &parser->m_openInternalEntities;
5985     freeEntityList = &parser->m_freeInternalEntities;
5986     break;
5987   case ENTITY_ATTRIBUTE:
5988     openEntityList = &parser->m_openAttributeEntities;
5989     freeEntityList = &parser->m_freeAttributeEntities;
5990     break;
5991   case ENTITY_VALUE:
5992     openEntityList = &parser->m_openValueEntities;
5993     freeEntityList = &parser->m_freeValueEntities;
5994     break;
5995     /* default case serves merely as a safety net in case of a
5996      * wrong entityType. Therefore we exclude the following lines
5997      * from the test coverage.
5998      *
5999      * LCOV_EXCL_START
6000      */
6001   default:
6002     // Should not reach here
6003     assert(0);
6004     /* LCOV_EXCL_STOP */
6005   }
6006 
6007   if (*freeEntityList) {
6008     openEntity = *freeEntityList;
6009     *freeEntityList = openEntity->next;
6010   } else {
6011     openEntity
6012         = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6013     if (! openEntity)
6014       return XML_ERROR_NO_MEMORY;
6015   }
6016   entity->open = XML_TRUE;
6017   entity->hasMore = XML_TRUE;
6018 #if XML_GE == 1
6019   entityTrackingOnOpen(parser, entity, __LINE__);
6020 #endif
6021   entity->processed = 0;
6022   openEntity->next = *openEntityList;
6023   *openEntityList = openEntity;
6024   openEntity->entity = entity;
6025   openEntity->type = type;
6026   openEntity->startTagLevel = parser->m_tagLevel;
6027   openEntity->betweenDecl = betweenDecl;
6028   openEntity->internalEventPtr = NULL;
6029   openEntity->internalEventEndPtr = NULL;
6030 
6031   // Only internal entities make use of the reenter flag
6032   // therefore no need to set it for other entity types
6033   if (type == ENTITY_INTERNAL) {
6034     triggerReenter(parser);
6035   }
6036   return XML_ERROR_NONE;
6037 }
6038 
6039 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)6040 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6041                         const char **nextPtr) {
6042   UNUSED_P(s);
6043   UNUSED_P(end);
6044   UNUSED_P(nextPtr);
6045   ENTITY *entity;
6046   const char *textStart, *textEnd;
6047   const char *next;
6048   enum XML_Error result;
6049   OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6050   if (! openEntity)
6051     return XML_ERROR_UNEXPECTED_STATE;
6052 
6053   entity = openEntity->entity;
6054 
6055   // This will return early
6056   if (entity->hasMore) {
6057     textStart = ((const char *)entity->textPtr) + entity->processed;
6058     textEnd = (const char *)(entity->textPtr + entity->textLen);
6059     /* Set a safe default value in case 'next' does not get set */
6060     next = textStart;
6061 
6062     if (entity->is_param) {
6063       int tok
6064           = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6065       result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6066                         tok, next, &next, XML_FALSE, XML_FALSE,
6067                         XML_ACCOUNT_ENTITY_EXPANSION);
6068     } else {
6069       result = doContent(parser, openEntity->startTagLevel,
6070                          parser->m_internalEncoding, textStart, textEnd, &next,
6071                          XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6072     }
6073 
6074     if (result != XML_ERROR_NONE)
6075       return result;
6076     // Check if entity is complete, if not, mark down how much of it is
6077     // processed
6078     if (textEnd != next
6079         && (parser->m_parsingStatus.parsing == XML_SUSPENDED
6080             || (parser->m_parsingStatus.parsing == XML_PARSING
6081                 && parser->m_reenter))) {
6082       entity->processed = (int)(next - (const char *)entity->textPtr);
6083       return result;
6084     }
6085 
6086     // Entity is complete. We cannot close it here since we need to first
6087     // process its possible inner entities (which are added to the
6088     // m_openInternalEntities during doProlog or doContent calls above)
6089     entity->hasMore = XML_FALSE;
6090     triggerReenter(parser);
6091     return result;
6092   } // End of entity processing, "if" block will return here
6093 
6094   // Remove fully processed openEntity from open entity list.
6095 #if XML_GE == 1
6096   entityTrackingOnClose(parser, entity, __LINE__);
6097 #endif
6098   // openEntity is m_openInternalEntities' head, as we set it at the start of
6099   // this function and we skipped doProlog and doContent calls with hasMore set
6100   // to false. This means we can directly remove the head of
6101   // m_openInternalEntities
6102   assert(parser->m_openInternalEntities == openEntity);
6103   entity->open = XML_FALSE;
6104   parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6105 
6106   /* put openEntity back in list of free instances */
6107   openEntity->next = parser->m_freeInternalEntities;
6108   parser->m_freeInternalEntities = openEntity;
6109 
6110   if (parser->m_openInternalEntities == NULL) {
6111     parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6112   }
6113   triggerReenter(parser);
6114   return XML_ERROR_NONE;
6115 }
6116 
6117 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)6118 errorProcessor(XML_Parser parser, const char *s, const char *end,
6119                const char **nextPtr) {
6120   UNUSED_P(s);
6121   UNUSED_P(end);
6122   UNUSED_P(nextPtr);
6123   return parser->m_errorCode;
6124 }
6125 
6126 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)6127 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6128                     const char *ptr, const char *end, STRING_POOL *pool,
6129                     enum XML_Account account) {
6130   const char *next = ptr;
6131   enum XML_Error result = XML_ERROR_NONE;
6132 
6133   while (1) {
6134     if (! parser->m_openAttributeEntities) {
6135       result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6136                                     account, &next);
6137     } else {
6138       OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6139       if (! openEntity)
6140         return XML_ERROR_UNEXPECTED_STATE;
6141 
6142       ENTITY *const entity = openEntity->entity;
6143       const char *const textStart
6144           = ((const char *)entity->textPtr) + entity->processed;
6145       const char *const textEnd
6146           = (const char *)(entity->textPtr + entity->textLen);
6147       /* Set a safe default value in case 'next' does not get set */
6148       const char *nextInEntity = textStart;
6149       if (entity->hasMore) {
6150         result = appendAttributeValue(
6151             parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6152             pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6153         if (result != XML_ERROR_NONE)
6154           break;
6155         // Check if entity is complete, if not, mark down how much of it is
6156         // processed. A XML_SUSPENDED check here is not required as
6157         // appendAttributeValue will never suspend the parser.
6158         if (textEnd != nextInEntity) {
6159           entity->processed
6160               = (int)(nextInEntity - (const char *)entity->textPtr);
6161           continue;
6162         }
6163 
6164         // Entity is complete. We cannot close it here since we need to first
6165         // process its possible inner entities (which are added to the
6166         // m_openAttributeEntities during appendAttributeValue)
6167         entity->hasMore = XML_FALSE;
6168         continue;
6169       } // End of entity processing, "if" block skips the rest
6170 
6171       // Remove fully processed openEntity from open entity list.
6172 #if XML_GE == 1
6173       entityTrackingOnClose(parser, entity, __LINE__);
6174 #endif
6175       // openEntity is m_openAttributeEntities' head, since we set it at the
6176       // start of this function and because we skipped appendAttributeValue call
6177       // with hasMore set to false. This means we can directly remove the head
6178       // of m_openAttributeEntities
6179       assert(parser->m_openAttributeEntities == openEntity);
6180       entity->open = XML_FALSE;
6181       parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6182 
6183       /* put openEntity back in list of free instances */
6184       openEntity->next = parser->m_freeAttributeEntities;
6185       parser->m_freeAttributeEntities = openEntity;
6186     }
6187 
6188     // Break if an error occurred or there is nothing left to process
6189     if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6190       break;
6191     }
6192   }
6193 
6194   if (result)
6195     return result;
6196   if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6197     poolChop(pool);
6198   if (! poolAppendChar(pool, XML_T('\0')))
6199     return XML_ERROR_NO_MEMORY;
6200   return XML_ERROR_NONE;
6201 }
6202 
6203 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account,const char ** nextPtr)6204 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6205                      const char *ptr, const char *end, STRING_POOL *pool,
6206                      enum XML_Account account, const char **nextPtr) {
6207   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6208 #ifndef XML_DTD
6209   UNUSED_P(account);
6210 #endif
6211 
6212   for (;;) {
6213     const char *next
6214         = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6215     int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6216 #if XML_GE == 1
6217     if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6218       accountingOnAbort(parser);
6219       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6220     }
6221 #endif
6222     switch (tok) {
6223     case XML_TOK_NONE:
6224       if (nextPtr) {
6225         *nextPtr = next;
6226       }
6227       return XML_ERROR_NONE;
6228     case XML_TOK_INVALID:
6229       if (enc == parser->m_encoding)
6230         parser->m_eventPtr = next;
6231       return XML_ERROR_INVALID_TOKEN;
6232     case XML_TOK_PARTIAL:
6233       if (enc == parser->m_encoding)
6234         parser->m_eventPtr = ptr;
6235       return XML_ERROR_INVALID_TOKEN;
6236     case XML_TOK_CHAR_REF: {
6237       XML_Char buf[XML_ENCODE_MAX];
6238       int i;
6239       int n = XmlCharRefNumber(enc, ptr);
6240       if (n < 0) {
6241         if (enc == parser->m_encoding)
6242           parser->m_eventPtr = ptr;
6243         return XML_ERROR_BAD_CHAR_REF;
6244       }
6245       if (! isCdata && n == 0x20 /* space */
6246           && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6247         break;
6248       n = XmlEncode(n, (ICHAR *)buf);
6249       /* The XmlEncode() functions can never return 0 here.  That
6250        * error return happens if the code point passed in is either
6251        * negative or greater than or equal to 0x110000.  The
6252        * XmlCharRefNumber() functions will all return a number
6253        * strictly less than 0x110000 or a negative value if an error
6254        * occurred.  The negative value is intercepted above, so
6255        * XmlEncode() is never passed a value it might return an
6256        * error for.
6257        */
6258       for (i = 0; i < n; i++) {
6259         if (! poolAppendChar(pool, buf[i]))
6260           return XML_ERROR_NO_MEMORY;
6261       }
6262     } break;
6263     case XML_TOK_DATA_CHARS:
6264       if (! poolAppend(pool, enc, ptr, next))
6265         return XML_ERROR_NO_MEMORY;
6266       break;
6267     case XML_TOK_TRAILING_CR:
6268       next = ptr + enc->minBytesPerChar;
6269       /* fall through */
6270     case XML_TOK_ATTRIBUTE_VALUE_S:
6271     case XML_TOK_DATA_NEWLINE:
6272       if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6273         break;
6274       if (! poolAppendChar(pool, 0x20))
6275         return XML_ERROR_NO_MEMORY;
6276       break;
6277     case XML_TOK_ENTITY_REF: {
6278       const XML_Char *name;
6279       ENTITY *entity;
6280       char checkEntityDecl;
6281       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6282           enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6283       if (ch) {
6284 #if XML_GE == 1
6285         /* NOTE: We are replacing 4-6 characters original input for 1 character
6286          *       so there is no amplification and hence recording without
6287          *       protection. */
6288         accountingDiffTolerated(parser, tok, (char *)&ch,
6289                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6290                                 XML_ACCOUNT_ENTITY_EXPANSION);
6291 #endif /* XML_GE == 1 */
6292         if (! poolAppendChar(pool, ch))
6293           return XML_ERROR_NO_MEMORY;
6294         break;
6295       }
6296       name = poolStoreString(&parser->m_temp2Pool, enc,
6297                              ptr + enc->minBytesPerChar,
6298                              next - enc->minBytesPerChar);
6299       if (! name)
6300         return XML_ERROR_NO_MEMORY;
6301       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6302       poolDiscard(&parser->m_temp2Pool);
6303       /* First, determine if a check for an existing declaration is needed;
6304          if yes, check that the entity exists, and that it is internal.
6305       */
6306       if (pool == &dtd->pool) /* are we called from prolog? */
6307         checkEntityDecl =
6308 #ifdef XML_DTD
6309             parser->m_prologState.documentEntity &&
6310 #endif /* XML_DTD */
6311             (dtd->standalone ? ! parser->m_openInternalEntities
6312                              : ! dtd->hasParamEntityRefs);
6313       else /* if (pool == &parser->m_tempPool): we are called from content */
6314         checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6315       if (checkEntityDecl) {
6316         if (! entity)
6317           return XML_ERROR_UNDEFINED_ENTITY;
6318         else if (! entity->is_internal)
6319           return XML_ERROR_ENTITY_DECLARED_IN_PE;
6320       } else if (! entity) {
6321         /* Cannot report skipped entity here - see comments on
6322            parser->m_skippedEntityHandler.
6323         if (parser->m_skippedEntityHandler)
6324           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6325         */
6326         /* Cannot call the default handler because this would be
6327            out of sync with the call to the startElementHandler.
6328         if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6329           reportDefault(parser, enc, ptr, next);
6330         */
6331         break;
6332       }
6333       if (entity->open) {
6334         if (enc == parser->m_encoding) {
6335           /* It does not appear that this line can be executed.
6336            *
6337            * The "if (entity->open)" check catches recursive entity
6338            * definitions.  In order to be called with an open
6339            * entity, it must have gone through this code before and
6340            * been through the recursive call to
6341            * appendAttributeValue() some lines below.  That call
6342            * sets the local encoding ("enc") to the parser's
6343            * internal encoding (internal_utf8 or internal_utf16),
6344            * which can never be the same as the principle encoding.
6345            * It doesn't appear there is another code path that gets
6346            * here with entity->open being TRUE.
6347            *
6348            * Since it is not certain that this logic is watertight,
6349            * we keep the line and merely exclude it from coverage
6350            * tests.
6351            */
6352           parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6353         }
6354         return XML_ERROR_RECURSIVE_ENTITY_REF;
6355       }
6356       if (entity->notation) {
6357         if (enc == parser->m_encoding)
6358           parser->m_eventPtr = ptr;
6359         return XML_ERROR_BINARY_ENTITY_REF;
6360       }
6361       if (! entity->textPtr) {
6362         if (enc == parser->m_encoding)
6363           parser->m_eventPtr = ptr;
6364         return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6365       } else {
6366         enum XML_Error result;
6367         result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6368         if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6369           *nextPtr = next;
6370         }
6371         return result;
6372       }
6373     } break;
6374     default:
6375       /* The only token returned by XmlAttributeValueTok() that does
6376        * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6377        * Getting that would require an entity name to contain an
6378        * incomplete XML character (e.g. \xE2\x82); however previous
6379        * tokenisers will have already recognised and rejected such
6380        * names before XmlAttributeValueTok() gets a look-in.  This
6381        * default case should be retained as a safety net, but the code
6382        * excluded from coverage tests.
6383        *
6384        * LCOV_EXCL_START
6385        */
6386       if (enc == parser->m_encoding)
6387         parser->m_eventPtr = ptr;
6388       return XML_ERROR_UNEXPECTED_STATE;
6389       /* LCOV_EXCL_STOP */
6390     }
6391     ptr = next;
6392   }
6393   /* not reached */
6394 }
6395 
6396 #if XML_GE == 1
6397 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account,const char ** nextPtr)6398 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6399                  const char *entityTextPtr, const char *entityTextEnd,
6400                  enum XML_Account account, const char **nextPtr) {
6401   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6402   STRING_POOL *pool = &(dtd->entityValuePool);
6403   enum XML_Error result = XML_ERROR_NONE;
6404 #  ifdef XML_DTD
6405   int oldInEntityValue = parser->m_prologState.inEntityValue;
6406   parser->m_prologState.inEntityValue = 1;
6407 #  else
6408   UNUSED_P(account);
6409 #  endif /* XML_DTD */
6410   /* never return Null for the value argument in EntityDeclHandler,
6411      since this would indicate an external entity; therefore we
6412      have to make sure that entityValuePool.start is not null */
6413   if (! pool->blocks) {
6414     if (! poolGrow(pool))
6415       return XML_ERROR_NO_MEMORY;
6416   }
6417 
6418   const char *next;
6419   for (;;) {
6420     next
6421         = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6422     int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6423 
6424     if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6425                                   account)) {
6426       accountingOnAbort(parser);
6427       result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6428       goto endEntityValue;
6429     }
6430 
6431     switch (tok) {
6432     case XML_TOK_PARAM_ENTITY_REF:
6433 #  ifdef XML_DTD
6434       if (parser->m_isParamEntity || enc != parser->m_encoding) {
6435         const XML_Char *name;
6436         ENTITY *entity;
6437         name = poolStoreString(&parser->m_tempPool, enc,
6438                                entityTextPtr + enc->minBytesPerChar,
6439                                next - enc->minBytesPerChar);
6440         if (! name) {
6441           result = XML_ERROR_NO_MEMORY;
6442           goto endEntityValue;
6443         }
6444         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6445         poolDiscard(&parser->m_tempPool);
6446         if (! entity) {
6447           /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6448           /* cannot report skipped entity here - see comments on
6449              parser->m_skippedEntityHandler
6450           if (parser->m_skippedEntityHandler)
6451             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6452           */
6453           dtd->keepProcessing = dtd->standalone;
6454           goto endEntityValue;
6455         }
6456         if (entity->open || (entity == parser->m_declEntity)) {
6457           if (enc == parser->m_encoding)
6458             parser->m_eventPtr = entityTextPtr;
6459           result = XML_ERROR_RECURSIVE_ENTITY_REF;
6460           goto endEntityValue;
6461         }
6462         if (entity->systemId) {
6463           if (parser->m_externalEntityRefHandler) {
6464             dtd->paramEntityRead = XML_FALSE;
6465             entity->open = XML_TRUE;
6466             entityTrackingOnOpen(parser, entity, __LINE__);
6467             if (! parser->m_externalEntityRefHandler(
6468                     parser->m_externalEntityRefHandlerArg, 0, entity->base,
6469                     entity->systemId, entity->publicId)) {
6470               entityTrackingOnClose(parser, entity, __LINE__);
6471               entity->open = XML_FALSE;
6472               result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6473               goto endEntityValue;
6474             }
6475             entityTrackingOnClose(parser, entity, __LINE__);
6476             entity->open = XML_FALSE;
6477             if (! dtd->paramEntityRead)
6478               dtd->keepProcessing = dtd->standalone;
6479           } else
6480             dtd->keepProcessing = dtd->standalone;
6481         } else {
6482           result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6483           goto endEntityValue;
6484         }
6485         break;
6486       }
6487 #  endif /* XML_DTD */
6488       /* In the internal subset, PE references are not legal
6489          within markup declarations, e.g entity values in this case. */
6490       parser->m_eventPtr = entityTextPtr;
6491       result = XML_ERROR_PARAM_ENTITY_REF;
6492       goto endEntityValue;
6493     case XML_TOK_NONE:
6494       result = XML_ERROR_NONE;
6495       goto endEntityValue;
6496     case XML_TOK_ENTITY_REF:
6497     case XML_TOK_DATA_CHARS:
6498       if (! poolAppend(pool, enc, entityTextPtr, next)) {
6499         result = XML_ERROR_NO_MEMORY;
6500         goto endEntityValue;
6501       }
6502       break;
6503     case XML_TOK_TRAILING_CR:
6504       next = entityTextPtr + enc->minBytesPerChar;
6505       /* fall through */
6506     case XML_TOK_DATA_NEWLINE:
6507       if (pool->end == pool->ptr && ! poolGrow(pool)) {
6508         result = XML_ERROR_NO_MEMORY;
6509         goto endEntityValue;
6510       }
6511       *(pool->ptr)++ = 0xA;
6512       break;
6513     case XML_TOK_CHAR_REF: {
6514       XML_Char buf[XML_ENCODE_MAX];
6515       int i;
6516       int n = XmlCharRefNumber(enc, entityTextPtr);
6517       if (n < 0) {
6518         if (enc == parser->m_encoding)
6519           parser->m_eventPtr = entityTextPtr;
6520         result = XML_ERROR_BAD_CHAR_REF;
6521         goto endEntityValue;
6522       }
6523       n = XmlEncode(n, (ICHAR *)buf);
6524       /* The XmlEncode() functions can never return 0 here.  That
6525        * error return happens if the code point passed in is either
6526        * negative or greater than or equal to 0x110000.  The
6527        * XmlCharRefNumber() functions will all return a number
6528        * strictly less than 0x110000 or a negative value if an error
6529        * occurred.  The negative value is intercepted above, so
6530        * XmlEncode() is never passed a value it might return an
6531        * error for.
6532        */
6533       for (i = 0; i < n; i++) {
6534         if (pool->end == pool->ptr && ! poolGrow(pool)) {
6535           result = XML_ERROR_NO_MEMORY;
6536           goto endEntityValue;
6537         }
6538         *(pool->ptr)++ = buf[i];
6539       }
6540     } break;
6541     case XML_TOK_PARTIAL:
6542       if (enc == parser->m_encoding)
6543         parser->m_eventPtr = entityTextPtr;
6544       result = XML_ERROR_INVALID_TOKEN;
6545       goto endEntityValue;
6546     case XML_TOK_INVALID:
6547       if (enc == parser->m_encoding)
6548         parser->m_eventPtr = next;
6549       result = XML_ERROR_INVALID_TOKEN;
6550       goto endEntityValue;
6551     default:
6552       /* This default case should be unnecessary -- all the tokens
6553        * that XmlEntityValueTok() can return have their own explicit
6554        * cases -- but should be retained for safety.  We do however
6555        * exclude it from the coverage statistics.
6556        *
6557        * LCOV_EXCL_START
6558        */
6559       if (enc == parser->m_encoding)
6560         parser->m_eventPtr = entityTextPtr;
6561       result = XML_ERROR_UNEXPECTED_STATE;
6562       goto endEntityValue;
6563       /* LCOV_EXCL_STOP */
6564     }
6565     entityTextPtr = next;
6566   }
6567 endEntityValue:
6568 #  ifdef XML_DTD
6569   parser->m_prologState.inEntityValue = oldInEntityValue;
6570 #  endif /* XML_DTD */
6571   // If 'nextPtr' is given, it should be updated during the processing
6572   if (nextPtr != NULL) {
6573     *nextPtr = next;
6574   }
6575   return result;
6576 }
6577 
6578 static enum XML_Error
callStoreEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6579 callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6580                      const char *entityTextPtr, const char *entityTextEnd,
6581                      enum XML_Account account) {
6582   const char *next = entityTextPtr;
6583   enum XML_Error result = XML_ERROR_NONE;
6584   while (1) {
6585     if (! parser->m_openValueEntities) {
6586       result
6587           = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6588     } else {
6589       OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6590       if (! openEntity)
6591         return XML_ERROR_UNEXPECTED_STATE;
6592 
6593       ENTITY *const entity = openEntity->entity;
6594       const char *const textStart
6595           = ((const char *)entity->textPtr) + entity->processed;
6596       const char *const textEnd
6597           = (const char *)(entity->textPtr + entity->textLen);
6598       /* Set a safe default value in case 'next' does not get set */
6599       const char *nextInEntity = textStart;
6600       if (entity->hasMore) {
6601         result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6602                                   textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6603                                   &nextInEntity);
6604         if (result != XML_ERROR_NONE)
6605           break;
6606         // Check if entity is complete, if not, mark down how much of it is
6607         // processed. A XML_SUSPENDED check here is not required as
6608         // appendAttributeValue will never suspend the parser.
6609         if (textEnd != nextInEntity) {
6610           entity->processed
6611               = (int)(nextInEntity - (const char *)entity->textPtr);
6612           continue;
6613         }
6614 
6615         // Entity is complete. We cannot close it here since we need to first
6616         // process its possible inner entities (which are added to the
6617         // m_openValueEntities during storeEntityValue)
6618         entity->hasMore = XML_FALSE;
6619         continue;
6620       } // End of entity processing, "if" block skips the rest
6621 
6622       // Remove fully processed openEntity from open entity list.
6623 #  if XML_GE == 1
6624       entityTrackingOnClose(parser, entity, __LINE__);
6625 #  endif
6626       // openEntity is m_openValueEntities' head, since we set it at the
6627       // start of this function and because we skipped storeEntityValue call
6628       // with hasMore set to false. This means we can directly remove the head
6629       // of m_openValueEntities
6630       assert(parser->m_openValueEntities == openEntity);
6631       entity->open = XML_FALSE;
6632       parser->m_openValueEntities = parser->m_openValueEntities->next;
6633 
6634       /* put openEntity back in list of free instances */
6635       openEntity->next = parser->m_freeValueEntities;
6636       parser->m_freeValueEntities = openEntity;
6637     }
6638 
6639     // Break if an error occurred or there is nothing left to process
6640     if (result
6641         || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
6642       break;
6643     }
6644   }
6645 
6646   return result;
6647 }
6648 
6649 #else /* XML_GE == 0 */
6650 
6651 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6652 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6653   // This will store "&amp;entity123;" in entity->textPtr
6654   // to end up as "&entity123;" in the handler.
6655   const char *const entity_start = "&amp;";
6656   const char *const entity_end = ";";
6657 
6658   STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6659   if (! poolAppendString(pool, entity_start)
6660       || ! poolAppendString(pool, entity->name)
6661       || ! poolAppendString(pool, entity_end)) {
6662     poolDiscard(pool);
6663     return XML_ERROR_NO_MEMORY;
6664   }
6665 
6666   entity->textPtr = poolStart(pool);
6667   entity->textLen = (int)(poolLength(pool));
6668   poolFinish(pool);
6669 
6670   return XML_ERROR_NONE;
6671 }
6672 
6673 #endif /* XML_GE == 0 */
6674 
6675 static void FASTCALL
normalizeLines(XML_Char * s)6676 normalizeLines(XML_Char *s) {
6677   XML_Char *p;
6678   for (;; s++) {
6679     if (*s == XML_T('\0'))
6680       return;
6681     if (*s == 0xD)
6682       break;
6683   }
6684   p = s;
6685   do {
6686     if (*s == 0xD) {
6687       *p++ = 0xA;
6688       if (*++s == 0xA)
6689         s++;
6690     } else
6691       *p++ = *s++;
6692   } while (*s);
6693   *p = XML_T('\0');
6694 }
6695 
6696 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6697 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6698                             const char *start, const char *end) {
6699   const XML_Char *target;
6700   XML_Char *data;
6701   const char *tem;
6702   if (! parser->m_processingInstructionHandler) {
6703     if (parser->m_defaultHandler)
6704       reportDefault(parser, enc, start, end);
6705     return 1;
6706   }
6707   start += enc->minBytesPerChar * 2;
6708   tem = start + XmlNameLength(enc, start);
6709   target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6710   if (! target)
6711     return 0;
6712   poolFinish(&parser->m_tempPool);
6713   data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6714                          end - enc->minBytesPerChar * 2);
6715   if (! data)
6716     return 0;
6717   normalizeLines(data);
6718   parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6719   poolClear(&parser->m_tempPool);
6720   return 1;
6721 }
6722 
6723 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6724 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6725               const char *end) {
6726   XML_Char *data;
6727   if (! parser->m_commentHandler) {
6728     if (parser->m_defaultHandler)
6729       reportDefault(parser, enc, start, end);
6730     return 1;
6731   }
6732   data = poolStoreString(&parser->m_tempPool, enc,
6733                          start + enc->minBytesPerChar * 4,
6734                          end - enc->minBytesPerChar * 3);
6735   if (! data)
6736     return 0;
6737   normalizeLines(data);
6738   parser->m_commentHandler(parser->m_handlerArg, data);
6739   poolClear(&parser->m_tempPool);
6740   return 1;
6741 }
6742 
6743 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6744 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6745               const char *end) {
6746   if (MUST_CONVERT(enc, s)) {
6747     enum XML_Convert_Result convert_res;
6748     const char **eventPP;
6749     const char **eventEndPP;
6750     if (enc == parser->m_encoding) {
6751       eventPP = &parser->m_eventPtr;
6752       eventEndPP = &parser->m_eventEndPtr;
6753     } else {
6754       /* To get here, two things must be true; the parser must be
6755        * using a character encoding that is not the same as the
6756        * encoding passed in, and the encoding passed in must need
6757        * conversion to the internal format (UTF-8 unless XML_UNICODE
6758        * is defined).  The only occasions on which the encoding passed
6759        * in is not the same as the parser's encoding are when it is
6760        * the internal encoding (e.g. a previously defined parameter
6761        * entity, already converted to internal format).  This by
6762        * definition doesn't need conversion, so the whole branch never
6763        * gets executed.
6764        *
6765        * For safety's sake we don't delete these lines and merely
6766        * exclude them from coverage statistics.
6767        *
6768        * LCOV_EXCL_START
6769        */
6770       eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6771       eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6772       /* LCOV_EXCL_STOP */
6773     }
6774     do {
6775       ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6776       convert_res
6777           = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6778       *eventEndPP = s;
6779       parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6780                                (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6781       *eventPP = s;
6782     } while ((convert_res != XML_CONVERT_COMPLETED)
6783              && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6784   } else
6785     parser->m_defaultHandler(
6786         parser->m_handlerArg, (const XML_Char *)s,
6787         (int)((const XML_Char *)end - (const XML_Char *)s));
6788 }
6789 
6790 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6791 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6792                 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6793   DEFAULT_ATTRIBUTE *att;
6794   if (value || isId) {
6795     /* The handling of default attributes gets messed up if we have
6796        a default which duplicates a non-default. */
6797     int i;
6798     for (i = 0; i < type->nDefaultAtts; i++)
6799       if (attId == type->defaultAtts[i].id)
6800         return 1;
6801     if (isId && ! type->idAtt && ! attId->xmlns)
6802       type->idAtt = attId;
6803   }
6804   if (type->nDefaultAtts == type->allocDefaultAtts) {
6805     if (type->allocDefaultAtts == 0) {
6806       type->allocDefaultAtts = 8;
6807       type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6808           parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6809       if (! type->defaultAtts) {
6810         type->allocDefaultAtts = 0;
6811         return 0;
6812       }
6813     } else {
6814       DEFAULT_ATTRIBUTE *temp;
6815 
6816       /* Detect and prevent integer overflow */
6817       if (type->allocDefaultAtts > INT_MAX / 2) {
6818         return 0;
6819       }
6820 
6821       int count = type->allocDefaultAtts * 2;
6822 
6823       /* Detect and prevent integer overflow.
6824        * The preprocessor guard addresses the "always false" warning
6825        * from -Wtype-limits on platforms where
6826        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6827 #if UINT_MAX >= SIZE_MAX
6828       if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6829         return 0;
6830       }
6831 #endif
6832 
6833       temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6834                                           (count * sizeof(DEFAULT_ATTRIBUTE)));
6835       if (temp == NULL)
6836         return 0;
6837       type->allocDefaultAtts = count;
6838       type->defaultAtts = temp;
6839     }
6840   }
6841   att = type->defaultAtts + type->nDefaultAtts;
6842   att->id = attId;
6843   att->value = value;
6844   att->isCdata = isCdata;
6845   if (! isCdata)
6846     attId->maybeTokenized = XML_TRUE;
6847   type->nDefaultAtts += 1;
6848   return 1;
6849 }
6850 
6851 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6852 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6853   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6854   const XML_Char *name;
6855   for (name = elementType->name; *name; name++) {
6856     if (*name == XML_T(ASCII_COLON)) {
6857       PREFIX *prefix;
6858       const XML_Char *s;
6859       for (s = elementType->name; s != name; s++) {
6860         if (! poolAppendChar(&dtd->pool, *s))
6861           return 0;
6862       }
6863       if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6864         return 0;
6865       prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6866                                 sizeof(PREFIX));
6867       if (! prefix)
6868         return 0;
6869       if (prefix->name == poolStart(&dtd->pool))
6870         poolFinish(&dtd->pool);
6871       else
6872         poolDiscard(&dtd->pool);
6873       elementType->prefix = prefix;
6874       break;
6875     }
6876   }
6877   return 1;
6878 }
6879 
6880 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6881 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6882                const char *end) {
6883   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6884   ATTRIBUTE_ID *id;
6885   const XML_Char *name;
6886   if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6887     return NULL;
6888   name = poolStoreString(&dtd->pool, enc, start, end);
6889   if (! name)
6890     return NULL;
6891   /* skip quotation mark - its storage will be reused (like in name[-1]) */
6892   ++name;
6893   id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6894                               sizeof(ATTRIBUTE_ID));
6895   if (! id)
6896     return NULL;
6897   if (id->name != name)
6898     poolDiscard(&dtd->pool);
6899   else {
6900     poolFinish(&dtd->pool);
6901     if (! parser->m_ns)
6902       ;
6903     else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6904              && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6905              && name[4] == XML_T(ASCII_s)
6906              && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6907       if (name[5] == XML_T('\0'))
6908         id->prefix = &dtd->defaultPrefix;
6909       else
6910         id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6911                                       sizeof(PREFIX));
6912       id->xmlns = XML_TRUE;
6913     } else {
6914       int i;
6915       for (i = 0; name[i]; i++) {
6916         /* attributes without prefix are *not* in the default namespace */
6917         if (name[i] == XML_T(ASCII_COLON)) {
6918           int j;
6919           for (j = 0; j < i; j++) {
6920             if (! poolAppendChar(&dtd->pool, name[j]))
6921               return NULL;
6922           }
6923           if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6924             return NULL;
6925           id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6926                                         poolStart(&dtd->pool), sizeof(PREFIX));
6927           if (! id->prefix)
6928             return NULL;
6929           if (id->prefix->name == poolStart(&dtd->pool))
6930             poolFinish(&dtd->pool);
6931           else
6932             poolDiscard(&dtd->pool);
6933           break;
6934         }
6935       }
6936     }
6937   }
6938   return id;
6939 }
6940 
6941 #define CONTEXT_SEP XML_T(ASCII_FF)
6942 
6943 static const XML_Char *
getContext(XML_Parser parser)6944 getContext(XML_Parser parser) {
6945   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6946   HASH_TABLE_ITER iter;
6947   XML_Bool needSep = XML_FALSE;
6948 
6949   if (dtd->defaultPrefix.binding) {
6950     int i;
6951     int len;
6952     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6953       return NULL;
6954     len = dtd->defaultPrefix.binding->uriLen;
6955     if (parser->m_namespaceSeparator)
6956       len--;
6957     for (i = 0; i < len; i++) {
6958       if (! poolAppendChar(&parser->m_tempPool,
6959                            dtd->defaultPrefix.binding->uri[i])) {
6960         /* Because of memory caching, I don't believe this line can be
6961          * executed.
6962          *
6963          * This is part of a loop copying the default prefix binding
6964          * URI into the parser's temporary string pool.  Previously,
6965          * that URI was copied into the same string pool, with a
6966          * terminating NUL character, as part of setContext().  When
6967          * the pool was cleared, that leaves a block definitely big
6968          * enough to hold the URI on the free block list of the pool.
6969          * The URI copy in getContext() therefore cannot run out of
6970          * memory.
6971          *
6972          * If the pool is used between the setContext() and
6973          * getContext() calls, the worst it can do is leave a bigger
6974          * block on the front of the free list.  Given that this is
6975          * all somewhat inobvious and program logic can be changed, we
6976          * don't delete the line but we do exclude it from the test
6977          * coverage statistics.
6978          */
6979         return NULL; /* LCOV_EXCL_LINE */
6980       }
6981     }
6982     needSep = XML_TRUE;
6983   }
6984 
6985   hashTableIterInit(&iter, &(dtd->prefixes));
6986   for (;;) {
6987     int i;
6988     int len;
6989     const XML_Char *s;
6990     PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6991     if (! prefix)
6992       break;
6993     if (! prefix->binding) {
6994       /* This test appears to be (justifiable) paranoia.  There does
6995        * not seem to be a way of injecting a prefix without a binding
6996        * that doesn't get errored long before this function is called.
6997        * The test should remain for safety's sake, so we instead
6998        * exclude the following line from the coverage statistics.
6999        */
7000       continue; /* LCOV_EXCL_LINE */
7001     }
7002     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7003       return NULL;
7004     for (s = prefix->name; *s; s++)
7005       if (! poolAppendChar(&parser->m_tempPool, *s))
7006         return NULL;
7007     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7008       return NULL;
7009     len = prefix->binding->uriLen;
7010     if (parser->m_namespaceSeparator)
7011       len--;
7012     for (i = 0; i < len; i++)
7013       if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7014         return NULL;
7015     needSep = XML_TRUE;
7016   }
7017 
7018   hashTableIterInit(&iter, &(dtd->generalEntities));
7019   for (;;) {
7020     const XML_Char *s;
7021     ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7022     if (! e)
7023       break;
7024     if (! e->open)
7025       continue;
7026     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7027       return NULL;
7028     for (s = e->name; *s; s++)
7029       if (! poolAppendChar(&parser->m_tempPool, *s))
7030         return 0;
7031     needSep = XML_TRUE;
7032   }
7033 
7034   if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7035     return NULL;
7036   return parser->m_tempPool.start;
7037 }
7038 
7039 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)7040 setContext(XML_Parser parser, const XML_Char *context) {
7041   if (context == NULL) {
7042     return XML_FALSE;
7043   }
7044 
7045   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7046   const XML_Char *s = context;
7047 
7048   while (*context != XML_T('\0')) {
7049     if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7050       ENTITY *e;
7051       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7052         return XML_FALSE;
7053       e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7054                            poolStart(&parser->m_tempPool), 0);
7055       if (e)
7056         e->open = XML_TRUE;
7057       if (*s != XML_T('\0'))
7058         s++;
7059       context = s;
7060       poolDiscard(&parser->m_tempPool);
7061     } else if (*s == XML_T(ASCII_EQUALS)) {
7062       PREFIX *prefix;
7063       if (poolLength(&parser->m_tempPool) == 0)
7064         prefix = &dtd->defaultPrefix;
7065       else {
7066         if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7067           return XML_FALSE;
7068         prefix
7069             = (PREFIX *)lookup(parser, &dtd->prefixes,
7070                                poolStart(&parser->m_tempPool), sizeof(PREFIX));
7071         if (! prefix)
7072           return XML_FALSE;
7073         if (prefix->name == poolStart(&parser->m_tempPool)) {
7074           prefix->name = poolCopyString(&dtd->pool, prefix->name);
7075           if (! prefix->name)
7076             return XML_FALSE;
7077         }
7078         poolDiscard(&parser->m_tempPool);
7079       }
7080       for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7081            context++)
7082         if (! poolAppendChar(&parser->m_tempPool, *context))
7083           return XML_FALSE;
7084       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7085         return XML_FALSE;
7086       if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7087                      &parser->m_inheritedBindings)
7088           != XML_ERROR_NONE)
7089         return XML_FALSE;
7090       poolDiscard(&parser->m_tempPool);
7091       if (*context != XML_T('\0'))
7092         ++context;
7093       s = context;
7094     } else {
7095       if (! poolAppendChar(&parser->m_tempPool, *s))
7096         return XML_FALSE;
7097       s++;
7098     }
7099   }
7100   return XML_TRUE;
7101 }
7102 
7103 static void FASTCALL
normalizePublicId(XML_Char * publicId)7104 normalizePublicId(XML_Char *publicId) {
7105   XML_Char *p = publicId;
7106   XML_Char *s;
7107   for (s = publicId; *s; s++) {
7108     switch (*s) {
7109     case 0x20:
7110     case 0xD:
7111     case 0xA:
7112       if (p != publicId && p[-1] != 0x20)
7113         *p++ = 0x20;
7114       break;
7115     default:
7116       *p++ = *s;
7117     }
7118   }
7119   if (p != publicId && p[-1] == 0x20)
7120     --p;
7121   *p = XML_T('\0');
7122 }
7123 
7124 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)7125 dtdCreate(const XML_Memory_Handling_Suite *ms) {
7126   DTD *p = ms->malloc_fcn(sizeof(DTD));
7127   if (p == NULL)
7128     return p;
7129   poolInit(&(p->pool), ms);
7130   poolInit(&(p->entityValuePool), ms);
7131   hashTableInit(&(p->generalEntities), ms);
7132   hashTableInit(&(p->elementTypes), ms);
7133   hashTableInit(&(p->attributeIds), ms);
7134   hashTableInit(&(p->prefixes), ms);
7135 #ifdef XML_DTD
7136   p->paramEntityRead = XML_FALSE;
7137   hashTableInit(&(p->paramEntities), ms);
7138 #endif /* XML_DTD */
7139   p->defaultPrefix.name = NULL;
7140   p->defaultPrefix.binding = NULL;
7141 
7142   p->in_eldecl = XML_FALSE;
7143   p->scaffIndex = NULL;
7144   p->scaffold = NULL;
7145   p->scaffLevel = 0;
7146   p->scaffSize = 0;
7147   p->scaffCount = 0;
7148   p->contentStringLen = 0;
7149 
7150   p->keepProcessing = XML_TRUE;
7151   p->hasParamEntityRefs = XML_FALSE;
7152   p->standalone = XML_FALSE;
7153   return p;
7154 }
7155 
7156 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)7157 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
7158   HASH_TABLE_ITER iter;
7159   hashTableIterInit(&iter, &(p->elementTypes));
7160   for (;;) {
7161     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7162     if (! e)
7163       break;
7164     if (e->allocDefaultAtts != 0)
7165       ms->free_fcn(e->defaultAtts);
7166   }
7167   hashTableClear(&(p->generalEntities));
7168 #ifdef XML_DTD
7169   p->paramEntityRead = XML_FALSE;
7170   hashTableClear(&(p->paramEntities));
7171 #endif /* XML_DTD */
7172   hashTableClear(&(p->elementTypes));
7173   hashTableClear(&(p->attributeIds));
7174   hashTableClear(&(p->prefixes));
7175   poolClear(&(p->pool));
7176   poolClear(&(p->entityValuePool));
7177   p->defaultPrefix.name = NULL;
7178   p->defaultPrefix.binding = NULL;
7179 
7180   p->in_eldecl = XML_FALSE;
7181 
7182   ms->free_fcn(p->scaffIndex);
7183   p->scaffIndex = NULL;
7184   ms->free_fcn(p->scaffold);
7185   p->scaffold = NULL;
7186 
7187   p->scaffLevel = 0;
7188   p->scaffSize = 0;
7189   p->scaffCount = 0;
7190   p->contentStringLen = 0;
7191 
7192   p->keepProcessing = XML_TRUE;
7193   p->hasParamEntityRefs = XML_FALSE;
7194   p->standalone = XML_FALSE;
7195 }
7196 
7197 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)7198 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
7199   HASH_TABLE_ITER iter;
7200   hashTableIterInit(&iter, &(p->elementTypes));
7201   for (;;) {
7202     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7203     if (! e)
7204       break;
7205     if (e->allocDefaultAtts != 0)
7206       ms->free_fcn(e->defaultAtts);
7207   }
7208   hashTableDestroy(&(p->generalEntities));
7209 #ifdef XML_DTD
7210   hashTableDestroy(&(p->paramEntities));
7211 #endif /* XML_DTD */
7212   hashTableDestroy(&(p->elementTypes));
7213   hashTableDestroy(&(p->attributeIds));
7214   hashTableDestroy(&(p->prefixes));
7215   poolDestroy(&(p->pool));
7216   poolDestroy(&(p->entityValuePool));
7217   if (isDocEntity) {
7218     ms->free_fcn(p->scaffIndex);
7219     ms->free_fcn(p->scaffold);
7220   }
7221   ms->free_fcn(p);
7222 }
7223 
7224 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7225    The new DTD has already been initialized.
7226 */
7227 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)7228 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7229         const XML_Memory_Handling_Suite *ms) {
7230   HASH_TABLE_ITER iter;
7231 
7232   /* Copy the prefix table. */
7233 
7234   hashTableIterInit(&iter, &(oldDtd->prefixes));
7235   for (;;) {
7236     const XML_Char *name;
7237     const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7238     if (! oldP)
7239       break;
7240     name = poolCopyString(&(newDtd->pool), oldP->name);
7241     if (! name)
7242       return 0;
7243     if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7244       return 0;
7245   }
7246 
7247   hashTableIterInit(&iter, &(oldDtd->attributeIds));
7248 
7249   /* Copy the attribute id table. */
7250 
7251   for (;;) {
7252     ATTRIBUTE_ID *newA;
7253     const XML_Char *name;
7254     const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7255 
7256     if (! oldA)
7257       break;
7258     /* Remember to allocate the scratch byte before the name. */
7259     if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7260       return 0;
7261     name = poolCopyString(&(newDtd->pool), oldA->name);
7262     if (! name)
7263       return 0;
7264     ++name;
7265     newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7266                                   sizeof(ATTRIBUTE_ID));
7267     if (! newA)
7268       return 0;
7269     newA->maybeTokenized = oldA->maybeTokenized;
7270     if (oldA->prefix) {
7271       newA->xmlns = oldA->xmlns;
7272       if (oldA->prefix == &oldDtd->defaultPrefix)
7273         newA->prefix = &newDtd->defaultPrefix;
7274       else
7275         newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7276                                         oldA->prefix->name, 0);
7277     }
7278   }
7279 
7280   /* Copy the element type table. */
7281 
7282   hashTableIterInit(&iter, &(oldDtd->elementTypes));
7283 
7284   for (;;) {
7285     int i;
7286     ELEMENT_TYPE *newE;
7287     const XML_Char *name;
7288     const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7289     if (! oldE)
7290       break;
7291     name = poolCopyString(&(newDtd->pool), oldE->name);
7292     if (! name)
7293       return 0;
7294     newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7295                                   sizeof(ELEMENT_TYPE));
7296     if (! newE)
7297       return 0;
7298     if (oldE->nDefaultAtts) {
7299       /* Detect and prevent integer overflow.
7300        * The preprocessor guard addresses the "always false" warning
7301        * from -Wtype-limits on platforms where
7302        * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7303 #if UINT_MAX >= SIZE_MAX
7304       if ((size_t)oldE->nDefaultAtts
7305           > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7306         return 0;
7307       }
7308 #endif
7309       newE->defaultAtts
7310           = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7311       if (! newE->defaultAtts) {
7312         return 0;
7313       }
7314     }
7315     if (oldE->idAtt)
7316       newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7317                                            oldE->idAtt->name, 0);
7318     newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7319     if (oldE->prefix)
7320       newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7321                                       oldE->prefix->name, 0);
7322     for (i = 0; i < newE->nDefaultAtts; i++) {
7323       newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7324           oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7325       newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7326       if (oldE->defaultAtts[i].value) {
7327         newE->defaultAtts[i].value
7328             = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7329         if (! newE->defaultAtts[i].value)
7330           return 0;
7331       } else
7332         newE->defaultAtts[i].value = NULL;
7333     }
7334   }
7335 
7336   /* Copy the entity tables. */
7337   if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7338                         &(oldDtd->generalEntities)))
7339     return 0;
7340 
7341 #ifdef XML_DTD
7342   if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7343                         &(oldDtd->paramEntities)))
7344     return 0;
7345   newDtd->paramEntityRead = oldDtd->paramEntityRead;
7346 #endif /* XML_DTD */
7347 
7348   newDtd->keepProcessing = oldDtd->keepProcessing;
7349   newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7350   newDtd->standalone = oldDtd->standalone;
7351 
7352   /* Don't want deep copying for scaffolding */
7353   newDtd->in_eldecl = oldDtd->in_eldecl;
7354   newDtd->scaffold = oldDtd->scaffold;
7355   newDtd->contentStringLen = oldDtd->contentStringLen;
7356   newDtd->scaffSize = oldDtd->scaffSize;
7357   newDtd->scaffLevel = oldDtd->scaffLevel;
7358   newDtd->scaffIndex = oldDtd->scaffIndex;
7359 
7360   return 1;
7361 } /* End dtdCopy */
7362 
7363 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7364 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7365                 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7366   HASH_TABLE_ITER iter;
7367   const XML_Char *cachedOldBase = NULL;
7368   const XML_Char *cachedNewBase = NULL;
7369 
7370   hashTableIterInit(&iter, oldTable);
7371 
7372   for (;;) {
7373     ENTITY *newE;
7374     const XML_Char *name;
7375     const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7376     if (! oldE)
7377       break;
7378     name = poolCopyString(newPool, oldE->name);
7379     if (! name)
7380       return 0;
7381     newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7382     if (! newE)
7383       return 0;
7384     if (oldE->systemId) {
7385       const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7386       if (! tem)
7387         return 0;
7388       newE->systemId = tem;
7389       if (oldE->base) {
7390         if (oldE->base == cachedOldBase)
7391           newE->base = cachedNewBase;
7392         else {
7393           cachedOldBase = oldE->base;
7394           tem = poolCopyString(newPool, cachedOldBase);
7395           if (! tem)
7396             return 0;
7397           cachedNewBase = newE->base = tem;
7398         }
7399       }
7400       if (oldE->publicId) {
7401         tem = poolCopyString(newPool, oldE->publicId);
7402         if (! tem)
7403           return 0;
7404         newE->publicId = tem;
7405       }
7406     } else {
7407       const XML_Char *tem
7408           = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7409       if (! tem)
7410         return 0;
7411       newE->textPtr = tem;
7412       newE->textLen = oldE->textLen;
7413     }
7414     if (oldE->notation) {
7415       const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7416       if (! tem)
7417         return 0;
7418       newE->notation = tem;
7419     }
7420     newE->is_param = oldE->is_param;
7421     newE->is_internal = oldE->is_internal;
7422   }
7423   return 1;
7424 }
7425 
7426 #define INIT_POWER 6
7427 
7428 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7429 keyeq(KEY s1, KEY s2) {
7430   for (; *s1 == *s2; s1++, s2++)
7431     if (*s1 == 0)
7432       return XML_TRUE;
7433   return XML_FALSE;
7434 }
7435 
7436 static size_t
keylen(KEY s)7437 keylen(KEY s) {
7438   size_t len = 0;
7439   for (; *s; s++, len++)
7440     ;
7441   return len;
7442 }
7443 
7444 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7445 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7446   key->k[0] = 0;
7447   key->k[1] = get_hash_secret_salt(parser);
7448 }
7449 
7450 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7451 hash(XML_Parser parser, KEY s) {
7452   struct siphash state;
7453   struct sipkey key;
7454   (void)sip24_valid;
7455   copy_salt_to_sipkey(parser, &key);
7456   sip24_init(&state, &key);
7457   sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7458   return (unsigned long)sip24_final(&state);
7459 }
7460 
7461 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7462 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7463   size_t i;
7464   if (table->size == 0) {
7465     size_t tsize;
7466     if (! createSize)
7467       return NULL;
7468     table->power = INIT_POWER;
7469     /* table->size is a power of 2 */
7470     table->size = (size_t)1 << INIT_POWER;
7471     tsize = table->size * sizeof(NAMED *);
7472     table->v = table->mem->malloc_fcn(tsize);
7473     if (! table->v) {
7474       table->size = 0;
7475       return NULL;
7476     }
7477     memset(table->v, 0, tsize);
7478     i = hash(parser, name) & ((unsigned long)table->size - 1);
7479   } else {
7480     unsigned long h = hash(parser, name);
7481     unsigned long mask = (unsigned long)table->size - 1;
7482     unsigned char step = 0;
7483     i = h & mask;
7484     while (table->v[i]) {
7485       if (keyeq(name, table->v[i]->name))
7486         return table->v[i];
7487       if (! step)
7488         step = PROBE_STEP(h, mask, table->power);
7489       i < step ? (i += table->size - step) : (i -= step);
7490     }
7491     if (! createSize)
7492       return NULL;
7493 
7494     /* check for overflow (table is half full) */
7495     if (table->used >> (table->power - 1)) {
7496       unsigned char newPower = table->power + 1;
7497 
7498       /* Detect and prevent invalid shift */
7499       if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7500         return NULL;
7501       }
7502 
7503       size_t newSize = (size_t)1 << newPower;
7504       unsigned long newMask = (unsigned long)newSize - 1;
7505 
7506       /* Detect and prevent integer overflow */
7507       if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7508         return NULL;
7509       }
7510 
7511       size_t tsize = newSize * sizeof(NAMED *);
7512       NAMED **newV = table->mem->malloc_fcn(tsize);
7513       if (! newV)
7514         return NULL;
7515       memset(newV, 0, tsize);
7516       for (i = 0; i < table->size; i++)
7517         if (table->v[i]) {
7518           unsigned long newHash = hash(parser, table->v[i]->name);
7519           size_t j = newHash & newMask;
7520           step = 0;
7521           while (newV[j]) {
7522             if (! step)
7523               step = PROBE_STEP(newHash, newMask, newPower);
7524             j < step ? (j += newSize - step) : (j -= step);
7525           }
7526           newV[j] = table->v[i];
7527         }
7528       table->mem->free_fcn(table->v);
7529       table->v = newV;
7530       table->power = newPower;
7531       table->size = newSize;
7532       i = h & newMask;
7533       step = 0;
7534       while (table->v[i]) {
7535         if (! step)
7536           step = PROBE_STEP(h, newMask, newPower);
7537         i < step ? (i += newSize - step) : (i -= step);
7538       }
7539     }
7540   }
7541   table->v[i] = table->mem->malloc_fcn(createSize);
7542   if (! table->v[i])
7543     return NULL;
7544   memset(table->v[i], 0, createSize);
7545   table->v[i]->name = name;
7546   (table->used)++;
7547   return table->v[i];
7548 }
7549 
7550 static void FASTCALL
hashTableClear(HASH_TABLE * table)7551 hashTableClear(HASH_TABLE *table) {
7552   size_t i;
7553   for (i = 0; i < table->size; i++) {
7554     table->mem->free_fcn(table->v[i]);
7555     table->v[i] = NULL;
7556   }
7557   table->used = 0;
7558 }
7559 
7560 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7561 hashTableDestroy(HASH_TABLE *table) {
7562   size_t i;
7563   for (i = 0; i < table->size; i++)
7564     table->mem->free_fcn(table->v[i]);
7565   table->mem->free_fcn(table->v);
7566 }
7567 
7568 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7569 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7570   p->power = 0;
7571   p->size = 0;
7572   p->used = 0;
7573   p->v = NULL;
7574   p->mem = ms;
7575 }
7576 
7577 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7578 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7579   iter->p = table->v;
7580   iter->end = iter->p ? iter->p + table->size : NULL;
7581 }
7582 
7583 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7584 hashTableIterNext(HASH_TABLE_ITER *iter) {
7585   while (iter->p != iter->end) {
7586     NAMED *tem = *(iter->p)++;
7587     if (tem)
7588       return tem;
7589   }
7590   return NULL;
7591 }
7592 
7593 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7594 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7595   pool->blocks = NULL;
7596   pool->freeBlocks = NULL;
7597   pool->start = NULL;
7598   pool->ptr = NULL;
7599   pool->end = NULL;
7600   pool->mem = ms;
7601 }
7602 
7603 static void FASTCALL
poolClear(STRING_POOL * pool)7604 poolClear(STRING_POOL *pool) {
7605   if (! pool->freeBlocks)
7606     pool->freeBlocks = pool->blocks;
7607   else {
7608     BLOCK *p = pool->blocks;
7609     while (p) {
7610       BLOCK *tem = p->next;
7611       p->next = pool->freeBlocks;
7612       pool->freeBlocks = p;
7613       p = tem;
7614     }
7615   }
7616   pool->blocks = NULL;
7617   pool->start = NULL;
7618   pool->ptr = NULL;
7619   pool->end = NULL;
7620 }
7621 
7622 static void FASTCALL
poolDestroy(STRING_POOL * pool)7623 poolDestroy(STRING_POOL *pool) {
7624   BLOCK *p = pool->blocks;
7625   while (p) {
7626     BLOCK *tem = p->next;
7627     pool->mem->free_fcn(p);
7628     p = tem;
7629   }
7630   p = pool->freeBlocks;
7631   while (p) {
7632     BLOCK *tem = p->next;
7633     pool->mem->free_fcn(p);
7634     p = tem;
7635   }
7636 }
7637 
7638 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7639 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7640            const char *end) {
7641   if (! pool->ptr && ! poolGrow(pool))
7642     return NULL;
7643   for (;;) {
7644     const enum XML_Convert_Result convert_res = XmlConvert(
7645         enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7646     if ((convert_res == XML_CONVERT_COMPLETED)
7647         || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7648       break;
7649     if (! poolGrow(pool))
7650       return NULL;
7651   }
7652   return pool->start;
7653 }
7654 
7655 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7656 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7657   do {
7658     if (! poolAppendChar(pool, *s))
7659       return NULL;
7660   } while (*s++);
7661   s = pool->start;
7662   poolFinish(pool);
7663   return s;
7664 }
7665 
7666 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7667 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7668   if (! pool->ptr && ! poolGrow(pool)) {
7669     /* The following line is unreachable given the current usage of
7670      * poolCopyStringN().  Currently it is called from exactly one
7671      * place to copy the text of a simple general entity.  By that
7672      * point, the name of the entity is already stored in the pool, so
7673      * pool->ptr cannot be NULL.
7674      *
7675      * If poolCopyStringN() is used elsewhere as it well might be,
7676      * this line may well become executable again.  Regardless, this
7677      * sort of check shouldn't be removed lightly, so we just exclude
7678      * it from the coverage statistics.
7679      */
7680     return NULL; /* LCOV_EXCL_LINE */
7681   }
7682   for (; n > 0; --n, s++) {
7683     if (! poolAppendChar(pool, *s))
7684       return NULL;
7685   }
7686   s = pool->start;
7687   poolFinish(pool);
7688   return s;
7689 }
7690 
7691 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7692 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7693   while (*s) {
7694     if (! poolAppendChar(pool, *s))
7695       return NULL;
7696     s++;
7697   }
7698   return pool->start;
7699 }
7700 
7701 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7702 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7703                 const char *end) {
7704   if (! poolAppend(pool, enc, ptr, end))
7705     return NULL;
7706   if (pool->ptr == pool->end && ! poolGrow(pool))
7707     return NULL;
7708   *(pool->ptr)++ = 0;
7709   return pool->start;
7710 }
7711 
7712 static size_t
poolBytesToAllocateFor(int blockSize)7713 poolBytesToAllocateFor(int blockSize) {
7714   /* Unprotected math would be:
7715   ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7716   **
7717   ** Detect overflow, avoiding _signed_ overflow undefined behavior
7718   ** For a + b * c we check b * c in isolation first, so that addition of a
7719   ** on top has no chance of making us accept a small non-negative number
7720   */
7721   const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7722 
7723   if (blockSize <= 0)
7724     return 0;
7725 
7726   if (blockSize > (int)(INT_MAX / stretch))
7727     return 0;
7728 
7729   {
7730     const int stretchedBlockSize = blockSize * (int)stretch;
7731     const int bytesToAllocate
7732         = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7733     if (bytesToAllocate < 0)
7734       return 0;
7735 
7736     return (size_t)bytesToAllocate;
7737   }
7738 }
7739 
7740 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7741 poolGrow(STRING_POOL *pool) {
7742   if (pool->freeBlocks) {
7743     if (pool->start == 0) {
7744       pool->blocks = pool->freeBlocks;
7745       pool->freeBlocks = pool->freeBlocks->next;
7746       pool->blocks->next = NULL;
7747       pool->start = pool->blocks->s;
7748       pool->end = pool->start + pool->blocks->size;
7749       pool->ptr = pool->start;
7750       return XML_TRUE;
7751     }
7752     if (pool->end - pool->start < pool->freeBlocks->size) {
7753       BLOCK *tem = pool->freeBlocks->next;
7754       pool->freeBlocks->next = pool->blocks;
7755       pool->blocks = pool->freeBlocks;
7756       pool->freeBlocks = tem;
7757       memcpy(pool->blocks->s, pool->start,
7758              (pool->end - pool->start) * sizeof(XML_Char));
7759       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7760       pool->start = pool->blocks->s;
7761       pool->end = pool->start + pool->blocks->size;
7762       return XML_TRUE;
7763     }
7764   }
7765   if (pool->blocks && pool->start == pool->blocks->s) {
7766     BLOCK *temp;
7767     int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7768     size_t bytesToAllocate;
7769 
7770     /* NOTE: Needs to be calculated prior to calling `realloc`
7771              to avoid dangling pointers: */
7772     const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7773 
7774     if (blockSize < 0) {
7775       /* This condition traps a situation where either more than
7776        * INT_MAX/2 bytes have already been allocated.  This isn't
7777        * readily testable, since it is unlikely that an average
7778        * machine will have that much memory, so we exclude it from the
7779        * coverage statistics.
7780        */
7781       return XML_FALSE; /* LCOV_EXCL_LINE */
7782     }
7783 
7784     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7785     if (bytesToAllocate == 0)
7786       return XML_FALSE;
7787 
7788     temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7789                                            (unsigned)bytesToAllocate);
7790     if (temp == NULL)
7791       return XML_FALSE;
7792     pool->blocks = temp;
7793     pool->blocks->size = blockSize;
7794     pool->ptr = pool->blocks->s + offsetInsideBlock;
7795     pool->start = pool->blocks->s;
7796     pool->end = pool->start + blockSize;
7797   } else {
7798     BLOCK *tem;
7799     int blockSize = (int)(pool->end - pool->start);
7800     size_t bytesToAllocate;
7801 
7802     if (blockSize < 0) {
7803       /* This condition traps a situation where either more than
7804        * INT_MAX bytes have already been allocated (which is prevented
7805        * by various pieces of program logic, not least this one, never
7806        * mind the unlikelihood of actually having that much memory) or
7807        * the pool control fields have been corrupted (which could
7808        * conceivably happen in an extremely buggy user handler
7809        * function).  Either way it isn't readily testable, so we
7810        * exclude it from the coverage statistics.
7811        */
7812       return XML_FALSE; /* LCOV_EXCL_LINE */
7813     }
7814 
7815     if (blockSize < INIT_BLOCK_SIZE)
7816       blockSize = INIT_BLOCK_SIZE;
7817     else {
7818       /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7819       if ((int)((unsigned)blockSize * 2U) < 0) {
7820         return XML_FALSE;
7821       }
7822       blockSize *= 2;
7823     }
7824 
7825     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7826     if (bytesToAllocate == 0)
7827       return XML_FALSE;
7828 
7829     tem = pool->mem->malloc_fcn(bytesToAllocate);
7830     if (! tem)
7831       return XML_FALSE;
7832     tem->size = blockSize;
7833     tem->next = pool->blocks;
7834     pool->blocks = tem;
7835     if (pool->ptr != pool->start)
7836       memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7837     pool->ptr = tem->s + (pool->ptr - pool->start);
7838     pool->start = tem->s;
7839     pool->end = tem->s + blockSize;
7840   }
7841   return XML_TRUE;
7842 }
7843 
7844 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7845 nextScaffoldPart(XML_Parser parser) {
7846   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7847   CONTENT_SCAFFOLD *me;
7848   int next;
7849 
7850   if (! dtd->scaffIndex) {
7851     /* Detect and prevent integer overflow.
7852      * The preprocessor guard addresses the "always false" warning
7853      * from -Wtype-limits on platforms where
7854      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7855 #if UINT_MAX >= SIZE_MAX
7856     if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
7857       return -1;
7858     }
7859 #endif
7860     dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7861     if (! dtd->scaffIndex)
7862       return -1;
7863     dtd->scaffIndex[0] = 0;
7864   }
7865 
7866   if (dtd->scaffCount >= dtd->scaffSize) {
7867     CONTENT_SCAFFOLD *temp;
7868     if (dtd->scaffold) {
7869       /* Detect and prevent integer overflow */
7870       if (dtd->scaffSize > UINT_MAX / 2u) {
7871         return -1;
7872       }
7873       /* Detect and prevent integer overflow.
7874        * The preprocessor guard addresses the "always false" warning
7875        * from -Wtype-limits on platforms where
7876        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7877 #if UINT_MAX >= SIZE_MAX
7878       if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7879         return -1;
7880       }
7881 #endif
7882 
7883       temp = (CONTENT_SCAFFOLD *)REALLOC(
7884           parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7885       if (temp == NULL)
7886         return -1;
7887       dtd->scaffSize *= 2;
7888     } else {
7889       temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7890                                                     * sizeof(CONTENT_SCAFFOLD));
7891       if (temp == NULL)
7892         return -1;
7893       dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7894     }
7895     dtd->scaffold = temp;
7896   }
7897   next = dtd->scaffCount++;
7898   me = &dtd->scaffold[next];
7899   if (dtd->scaffLevel) {
7900     CONTENT_SCAFFOLD *parent
7901         = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7902     if (parent->lastchild) {
7903       dtd->scaffold[parent->lastchild].nextsib = next;
7904     }
7905     if (! parent->childcnt)
7906       parent->firstchild = next;
7907     parent->lastchild = next;
7908     parent->childcnt++;
7909   }
7910   me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7911   return next;
7912 }
7913 
7914 static XML_Content *
build_model(XML_Parser parser)7915 build_model(XML_Parser parser) {
7916   /* Function build_model transforms the existing parser->m_dtd->scaffold
7917    * array of CONTENT_SCAFFOLD tree nodes into a new array of
7918    * XML_Content tree nodes followed by a gapless list of zero-terminated
7919    * strings. */
7920   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7921   XML_Content *ret;
7922   XML_Char *str; /* the current string writing location */
7923 
7924   /* Detect and prevent integer overflow.
7925    * The preprocessor guard addresses the "always false" warning
7926    * from -Wtype-limits on platforms where
7927    * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7928 #if UINT_MAX >= SIZE_MAX
7929   if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7930     return NULL;
7931   }
7932   if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7933     return NULL;
7934   }
7935 #endif
7936   if (dtd->scaffCount * sizeof(XML_Content)
7937       > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7938     return NULL;
7939   }
7940 
7941   const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7942                             + (dtd->contentStringLen * sizeof(XML_Char)));
7943 
7944   ret = (XML_Content *)MALLOC(parser, allocsize);
7945   if (! ret)
7946     return NULL;
7947 
7948   /* What follows is an iterative implementation (of what was previously done
7949    * recursively in a dedicated function called "build_node".  The old recursive
7950    * build_node could be forced into stack exhaustion from input as small as a
7951    * few megabyte, and so that was a security issue.  Hence, a function call
7952    * stack is avoided now by resolving recursion.)
7953    *
7954    * The iterative approach works as follows:
7955    *
7956    * - We have two writing pointers, both walking up the result array; one does
7957    *   the work, the other creates "jobs" for its colleague to do, and leads
7958    *   the way:
7959    *
7960    *   - The faster one, pointer jobDest, always leads and writes "what job
7961    *     to do" by the other, once they reach that place in the
7962    *     array: leader "jobDest" stores the source node array index (relative
7963    *     to array dtd->scaffold) in field "numchildren".
7964    *
7965    *   - The slower one, pointer dest, looks at the value stored in the
7966    *     "numchildren" field (which actually holds a source node array index
7967    *     at that time) and puts the real data from dtd->scaffold in.
7968    *
7969    * - Before the loop starts, jobDest writes source array index 0
7970    *   (where the root node is located) so that dest will have something to do
7971    *   when it starts operation.
7972    *
7973    * - Whenever nodes with children are encountered, jobDest appends
7974    *   them as new jobs, in order.  As a result, tree node siblings are
7975    *   adjacent in the resulting array, for example:
7976    *
7977    *     [0] root, has two children
7978    *       [1] first child of 0, has three children
7979    *         [3] first child of 1, does not have children
7980    *         [4] second child of 1, does not have children
7981    *         [5] third child of 1, does not have children
7982    *       [2] second child of 0, does not have children
7983    *
7984    *   Or (the same data) presented in flat array view:
7985    *
7986    *     [0] root, has two children
7987    *
7988    *     [1] first child of 0, has three children
7989    *     [2] second child of 0, does not have children
7990    *
7991    *     [3] first child of 1, does not have children
7992    *     [4] second child of 1, does not have children
7993    *     [5] third child of 1, does not have children
7994    *
7995    * - The algorithm repeats until all target array indices have been processed.
7996    */
7997   XML_Content *dest = ret; /* tree node writing location, moves upwards */
7998   XML_Content *const destLimit = &ret[dtd->scaffCount];
7999   XML_Content *jobDest = ret; /* next free writing location in target array */
8000   str = (XML_Char *)&ret[dtd->scaffCount];
8001 
8002   /* Add the starting job, the root node (index 0) of the source tree  */
8003   (jobDest++)->numchildren = 0;
8004 
8005   for (; dest < destLimit; dest++) {
8006     /* Retrieve source tree array index from job storage */
8007     const int src_node = (int)dest->numchildren;
8008 
8009     /* Convert item */
8010     dest->type = dtd->scaffold[src_node].type;
8011     dest->quant = dtd->scaffold[src_node].quant;
8012     if (dest->type == XML_CTYPE_NAME) {
8013       const XML_Char *src;
8014       dest->name = str;
8015       src = dtd->scaffold[src_node].name;
8016       for (;;) {
8017         *str++ = *src;
8018         if (! *src)
8019           break;
8020         src++;
8021       }
8022       dest->numchildren = 0;
8023       dest->children = NULL;
8024     } else {
8025       unsigned int i;
8026       int cn;
8027       dest->name = NULL;
8028       dest->numchildren = dtd->scaffold[src_node].childcnt;
8029       dest->children = jobDest;
8030 
8031       /* Append scaffold indices of children to array */
8032       for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8033            i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8034         (jobDest++)->numchildren = (unsigned int)cn;
8035     }
8036   }
8037 
8038   return ret;
8039 }
8040 
8041 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)8042 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8043                const char *end) {
8044   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8045   const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8046   ELEMENT_TYPE *ret;
8047 
8048   if (! name)
8049     return NULL;
8050   ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8051                                sizeof(ELEMENT_TYPE));
8052   if (! ret)
8053     return NULL;
8054   if (ret->name != name)
8055     poolDiscard(&dtd->pool);
8056   else {
8057     poolFinish(&dtd->pool);
8058     if (! setElementTypePrefix(parser, ret))
8059       return NULL;
8060   }
8061   return ret;
8062 }
8063 
8064 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)8065 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
8066   size_t charsRequired = 0;
8067   XML_Char *result;
8068 
8069   /* First determine how long the string is */
8070   while (s[charsRequired] != 0) {
8071     charsRequired++;
8072   }
8073   /* Include the terminator */
8074   charsRequired++;
8075 
8076   /* Now allocate space for the copy */
8077   result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
8078   if (result == NULL)
8079     return NULL;
8080   /* Copy the original into place */
8081   memcpy(result, s, charsRequired * sizeof(XML_Char));
8082   return result;
8083 }
8084 
8085 #if XML_GE == 1
8086 
8087 static float
accountingGetCurrentAmplification(XML_Parser rootParser)8088 accountingGetCurrentAmplification(XML_Parser rootParser) {
8089   //                                          1.........1.........12 => 22
8090   const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8091   const XmlBigCount countBytesOutput
8092       = rootParser->m_accounting.countBytesDirect
8093         + rootParser->m_accounting.countBytesIndirect;
8094   const float amplificationFactor
8095       = rootParser->m_accounting.countBytesDirect
8096             ? (countBytesOutput
8097                / (float)(rootParser->m_accounting.countBytesDirect))
8098             : ((lenOfShortestInclude
8099                 + rootParser->m_accounting.countBytesIndirect)
8100                / (float)lenOfShortestInclude);
8101   assert(! rootParser->m_parentParser);
8102   return amplificationFactor;
8103 }
8104 
8105 static void
accountingReportStats(XML_Parser originParser,const char * epilog)8106 accountingReportStats(XML_Parser originParser, const char *epilog) {
8107   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8108   assert(! rootParser->m_parentParser);
8109 
8110   if (rootParser->m_accounting.debugLevel == 0u) {
8111     return;
8112   }
8113 
8114   const float amplificationFactor
8115       = accountingGetCurrentAmplification(rootParser);
8116   fprintf(stderr,
8117           "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8118               "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8119           (void *)rootParser, rootParser->m_accounting.countBytesDirect,
8120           rootParser->m_accounting.countBytesIndirect,
8121           (double)amplificationFactor, epilog);
8122 }
8123 
8124 static void
accountingOnAbort(XML_Parser originParser)8125 accountingOnAbort(XML_Parser originParser) {
8126   accountingReportStats(originParser, " ABORTING\n");
8127 }
8128 
8129 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)8130 accountingReportDiff(XML_Parser rootParser,
8131                      unsigned int levelsAwayFromRootParser, const char *before,
8132                      const char *after, ptrdiff_t bytesMore, int source_line,
8133                      enum XML_Account account) {
8134   assert(! rootParser->m_parentParser);
8135 
8136   fprintf(stderr,
8137           " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8138           bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8139           levelsAwayFromRootParser, source_line, 10, "");
8140 
8141   const char ellipis[] = "[..]";
8142   const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8143   const unsigned int contextLength = 10;
8144 
8145   /* Note: Performance is of no concern here */
8146   const char *walker = before;
8147   if ((rootParser->m_accounting.debugLevel >= 3u)
8148       || (after - before)
8149              <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8150     for (; walker < after; walker++) {
8151       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8152     }
8153   } else {
8154     for (; walker < before + contextLength; walker++) {
8155       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8156     }
8157     fprintf(stderr, ellipis);
8158     walker = after - contextLength;
8159     for (; walker < after; walker++) {
8160       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8161     }
8162   }
8163   fprintf(stderr, "\"\n");
8164 }
8165 
8166 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)8167 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8168                         const char *after, int source_line,
8169                         enum XML_Account account) {
8170   /* Note: We need to check the token type *first* to be sure that
8171    *       we can even access variable <after>, safely.
8172    *       E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8173   switch (tok) {
8174   case XML_TOK_INVALID:
8175   case XML_TOK_PARTIAL:
8176   case XML_TOK_PARTIAL_CHAR:
8177   case XML_TOK_NONE:
8178     return XML_TRUE;
8179   }
8180 
8181   if (account == XML_ACCOUNT_NONE)
8182     return XML_TRUE; /* because these bytes have been accounted for, already */
8183 
8184   unsigned int levelsAwayFromRootParser;
8185   const XML_Parser rootParser
8186       = getRootParserOf(originParser, &levelsAwayFromRootParser);
8187   assert(! rootParser->m_parentParser);
8188 
8189   const int isDirect
8190       = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8191   const ptrdiff_t bytesMore = after - before;
8192 
8193   XmlBigCount *const additionTarget
8194       = isDirect ? &rootParser->m_accounting.countBytesDirect
8195                  : &rootParser->m_accounting.countBytesIndirect;
8196 
8197   /* Detect and avoid integer overflow */
8198   if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8199     return XML_FALSE;
8200   *additionTarget += bytesMore;
8201 
8202   const XmlBigCount countBytesOutput
8203       = rootParser->m_accounting.countBytesDirect
8204         + rootParser->m_accounting.countBytesIndirect;
8205   const float amplificationFactor
8206       = accountingGetCurrentAmplification(rootParser);
8207   const XML_Bool tolerated
8208       = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8209         || (amplificationFactor
8210             <= rootParser->m_accounting.maximumAmplificationFactor);
8211 
8212   if (rootParser->m_accounting.debugLevel >= 2u) {
8213     accountingReportStats(rootParser, "");
8214     accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8215                          bytesMore, source_line, account);
8216   }
8217 
8218   return tolerated;
8219 }
8220 
8221 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)8222 testingAccountingGetCountBytesDirect(XML_Parser parser) {
8223   if (! parser)
8224     return 0;
8225   return parser->m_accounting.countBytesDirect;
8226 }
8227 
8228 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)8229 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8230   if (! parser)
8231     return 0;
8232   return parser->m_accounting.countBytesIndirect;
8233 }
8234 
8235 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)8236 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8237                           const char *action, int sourceLine) {
8238   assert(! rootParser->m_parentParser);
8239   if (rootParser->m_entity_stats.debugLevel == 0u)
8240     return;
8241 
8242 #  if defined(XML_UNICODE)
8243   const char *const entityName = "[..]";
8244 #  else
8245   const char *const entityName = entity->name;
8246 #  endif
8247 
8248   fprintf(
8249       stderr,
8250       "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8251       (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8252       rootParser->m_entity_stats.currentDepth,
8253       rootParser->m_entity_stats.maximumDepthSeen,
8254       ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8255       entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8256       sourceLine);
8257 }
8258 
8259 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)8260 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8261   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8262   assert(! rootParser->m_parentParser);
8263 
8264   rootParser->m_entity_stats.countEverOpened++;
8265   rootParser->m_entity_stats.currentDepth++;
8266   if (rootParser->m_entity_stats.currentDepth
8267       > rootParser->m_entity_stats.maximumDepthSeen) {
8268     rootParser->m_entity_stats.maximumDepthSeen++;
8269   }
8270 
8271   entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8272 }
8273 
8274 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)8275 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8276   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8277   assert(! rootParser->m_parentParser);
8278 
8279   entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8280   rootParser->m_entity_stats.currentDepth--;
8281 }
8282 
8283 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)8284 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8285   XML_Parser rootParser = parser;
8286   unsigned int stepsTakenUpwards = 0;
8287   while (rootParser->m_parentParser) {
8288     rootParser = rootParser->m_parentParser;
8289     stepsTakenUpwards++;
8290   }
8291   assert(! rootParser->m_parentParser);
8292   if (outLevelDiff != NULL) {
8293     *outLevelDiff = stepsTakenUpwards;
8294   }
8295   return rootParser;
8296 }
8297 
8298 const char *
unsignedCharToPrintable(unsigned char c)8299 unsignedCharToPrintable(unsigned char c) {
8300   switch (c) {
8301   case 0:
8302     return "\\0";
8303   case 1:
8304     return "\\x1";
8305   case 2:
8306     return "\\x2";
8307   case 3:
8308     return "\\x3";
8309   case 4:
8310     return "\\x4";
8311   case 5:
8312     return "\\x5";
8313   case 6:
8314     return "\\x6";
8315   case 7:
8316     return "\\x7";
8317   case 8:
8318     return "\\x8";
8319   case 9:
8320     return "\\t";
8321   case 10:
8322     return "\\n";
8323   case 11:
8324     return "\\xB";
8325   case 12:
8326     return "\\xC";
8327   case 13:
8328     return "\\r";
8329   case 14:
8330     return "\\xE";
8331   case 15:
8332     return "\\xF";
8333   case 16:
8334     return "\\x10";
8335   case 17:
8336     return "\\x11";
8337   case 18:
8338     return "\\x12";
8339   case 19:
8340     return "\\x13";
8341   case 20:
8342     return "\\x14";
8343   case 21:
8344     return "\\x15";
8345   case 22:
8346     return "\\x16";
8347   case 23:
8348     return "\\x17";
8349   case 24:
8350     return "\\x18";
8351   case 25:
8352     return "\\x19";
8353   case 26:
8354     return "\\x1A";
8355   case 27:
8356     return "\\x1B";
8357   case 28:
8358     return "\\x1C";
8359   case 29:
8360     return "\\x1D";
8361   case 30:
8362     return "\\x1E";
8363   case 31:
8364     return "\\x1F";
8365   case 32:
8366     return " ";
8367   case 33:
8368     return "!";
8369   case 34:
8370     return "\\\"";
8371   case 35:
8372     return "#";
8373   case 36:
8374     return "$";
8375   case 37:
8376     return "%";
8377   case 38:
8378     return "&";
8379   case 39:
8380     return "'";
8381   case 40:
8382     return "(";
8383   case 41:
8384     return ")";
8385   case 42:
8386     return "*";
8387   case 43:
8388     return "+";
8389   case 44:
8390     return ",";
8391   case 45:
8392     return "-";
8393   case 46:
8394     return ".";
8395   case 47:
8396     return "/";
8397   case 48:
8398     return "0";
8399   case 49:
8400     return "1";
8401   case 50:
8402     return "2";
8403   case 51:
8404     return "3";
8405   case 52:
8406     return "4";
8407   case 53:
8408     return "5";
8409   case 54:
8410     return "6";
8411   case 55:
8412     return "7";
8413   case 56:
8414     return "8";
8415   case 57:
8416     return "9";
8417   case 58:
8418     return ":";
8419   case 59:
8420     return ";";
8421   case 60:
8422     return "<";
8423   case 61:
8424     return "=";
8425   case 62:
8426     return ">";
8427   case 63:
8428     return "?";
8429   case 64:
8430     return "@";
8431   case 65:
8432     return "A";
8433   case 66:
8434     return "B";
8435   case 67:
8436     return "C";
8437   case 68:
8438     return "D";
8439   case 69:
8440     return "E";
8441   case 70:
8442     return "F";
8443   case 71:
8444     return "G";
8445   case 72:
8446     return "H";
8447   case 73:
8448     return "I";
8449   case 74:
8450     return "J";
8451   case 75:
8452     return "K";
8453   case 76:
8454     return "L";
8455   case 77:
8456     return "M";
8457   case 78:
8458     return "N";
8459   case 79:
8460     return "O";
8461   case 80:
8462     return "P";
8463   case 81:
8464     return "Q";
8465   case 82:
8466     return "R";
8467   case 83:
8468     return "S";
8469   case 84:
8470     return "T";
8471   case 85:
8472     return "U";
8473   case 86:
8474     return "V";
8475   case 87:
8476     return "W";
8477   case 88:
8478     return "X";
8479   case 89:
8480     return "Y";
8481   case 90:
8482     return "Z";
8483   case 91:
8484     return "[";
8485   case 92:
8486     return "\\\\";
8487   case 93:
8488     return "]";
8489   case 94:
8490     return "^";
8491   case 95:
8492     return "_";
8493   case 96:
8494     return "`";
8495   case 97:
8496     return "a";
8497   case 98:
8498     return "b";
8499   case 99:
8500     return "c";
8501   case 100:
8502     return "d";
8503   case 101:
8504     return "e";
8505   case 102:
8506     return "f";
8507   case 103:
8508     return "g";
8509   case 104:
8510     return "h";
8511   case 105:
8512     return "i";
8513   case 106:
8514     return "j";
8515   case 107:
8516     return "k";
8517   case 108:
8518     return "l";
8519   case 109:
8520     return "m";
8521   case 110:
8522     return "n";
8523   case 111:
8524     return "o";
8525   case 112:
8526     return "p";
8527   case 113:
8528     return "q";
8529   case 114:
8530     return "r";
8531   case 115:
8532     return "s";
8533   case 116:
8534     return "t";
8535   case 117:
8536     return "u";
8537   case 118:
8538     return "v";
8539   case 119:
8540     return "w";
8541   case 120:
8542     return "x";
8543   case 121:
8544     return "y";
8545   case 122:
8546     return "z";
8547   case 123:
8548     return "{";
8549   case 124:
8550     return "|";
8551   case 125:
8552     return "}";
8553   case 126:
8554     return "~";
8555   case 127:
8556     return "\\x7F";
8557   case 128:
8558     return "\\x80";
8559   case 129:
8560     return "\\x81";
8561   case 130:
8562     return "\\x82";
8563   case 131:
8564     return "\\x83";
8565   case 132:
8566     return "\\x84";
8567   case 133:
8568     return "\\x85";
8569   case 134:
8570     return "\\x86";
8571   case 135:
8572     return "\\x87";
8573   case 136:
8574     return "\\x88";
8575   case 137:
8576     return "\\x89";
8577   case 138:
8578     return "\\x8A";
8579   case 139:
8580     return "\\x8B";
8581   case 140:
8582     return "\\x8C";
8583   case 141:
8584     return "\\x8D";
8585   case 142:
8586     return "\\x8E";
8587   case 143:
8588     return "\\x8F";
8589   case 144:
8590     return "\\x90";
8591   case 145:
8592     return "\\x91";
8593   case 146:
8594     return "\\x92";
8595   case 147:
8596     return "\\x93";
8597   case 148:
8598     return "\\x94";
8599   case 149:
8600     return "\\x95";
8601   case 150:
8602     return "\\x96";
8603   case 151:
8604     return "\\x97";
8605   case 152:
8606     return "\\x98";
8607   case 153:
8608     return "\\x99";
8609   case 154:
8610     return "\\x9A";
8611   case 155:
8612     return "\\x9B";
8613   case 156:
8614     return "\\x9C";
8615   case 157:
8616     return "\\x9D";
8617   case 158:
8618     return "\\x9E";
8619   case 159:
8620     return "\\x9F";
8621   case 160:
8622     return "\\xA0";
8623   case 161:
8624     return "\\xA1";
8625   case 162:
8626     return "\\xA2";
8627   case 163:
8628     return "\\xA3";
8629   case 164:
8630     return "\\xA4";
8631   case 165:
8632     return "\\xA5";
8633   case 166:
8634     return "\\xA6";
8635   case 167:
8636     return "\\xA7";
8637   case 168:
8638     return "\\xA8";
8639   case 169:
8640     return "\\xA9";
8641   case 170:
8642     return "\\xAA";
8643   case 171:
8644     return "\\xAB";
8645   case 172:
8646     return "\\xAC";
8647   case 173:
8648     return "\\xAD";
8649   case 174:
8650     return "\\xAE";
8651   case 175:
8652     return "\\xAF";
8653   case 176:
8654     return "\\xB0";
8655   case 177:
8656     return "\\xB1";
8657   case 178:
8658     return "\\xB2";
8659   case 179:
8660     return "\\xB3";
8661   case 180:
8662     return "\\xB4";
8663   case 181:
8664     return "\\xB5";
8665   case 182:
8666     return "\\xB6";
8667   case 183:
8668     return "\\xB7";
8669   case 184:
8670     return "\\xB8";
8671   case 185:
8672     return "\\xB9";
8673   case 186:
8674     return "\\xBA";
8675   case 187:
8676     return "\\xBB";
8677   case 188:
8678     return "\\xBC";
8679   case 189:
8680     return "\\xBD";
8681   case 190:
8682     return "\\xBE";
8683   case 191:
8684     return "\\xBF";
8685   case 192:
8686     return "\\xC0";
8687   case 193:
8688     return "\\xC1";
8689   case 194:
8690     return "\\xC2";
8691   case 195:
8692     return "\\xC3";
8693   case 196:
8694     return "\\xC4";
8695   case 197:
8696     return "\\xC5";
8697   case 198:
8698     return "\\xC6";
8699   case 199:
8700     return "\\xC7";
8701   case 200:
8702     return "\\xC8";
8703   case 201:
8704     return "\\xC9";
8705   case 202:
8706     return "\\xCA";
8707   case 203:
8708     return "\\xCB";
8709   case 204:
8710     return "\\xCC";
8711   case 205:
8712     return "\\xCD";
8713   case 206:
8714     return "\\xCE";
8715   case 207:
8716     return "\\xCF";
8717   case 208:
8718     return "\\xD0";
8719   case 209:
8720     return "\\xD1";
8721   case 210:
8722     return "\\xD2";
8723   case 211:
8724     return "\\xD3";
8725   case 212:
8726     return "\\xD4";
8727   case 213:
8728     return "\\xD5";
8729   case 214:
8730     return "\\xD6";
8731   case 215:
8732     return "\\xD7";
8733   case 216:
8734     return "\\xD8";
8735   case 217:
8736     return "\\xD9";
8737   case 218:
8738     return "\\xDA";
8739   case 219:
8740     return "\\xDB";
8741   case 220:
8742     return "\\xDC";
8743   case 221:
8744     return "\\xDD";
8745   case 222:
8746     return "\\xDE";
8747   case 223:
8748     return "\\xDF";
8749   case 224:
8750     return "\\xE0";
8751   case 225:
8752     return "\\xE1";
8753   case 226:
8754     return "\\xE2";
8755   case 227:
8756     return "\\xE3";
8757   case 228:
8758     return "\\xE4";
8759   case 229:
8760     return "\\xE5";
8761   case 230:
8762     return "\\xE6";
8763   case 231:
8764     return "\\xE7";
8765   case 232:
8766     return "\\xE8";
8767   case 233:
8768     return "\\xE9";
8769   case 234:
8770     return "\\xEA";
8771   case 235:
8772     return "\\xEB";
8773   case 236:
8774     return "\\xEC";
8775   case 237:
8776     return "\\xED";
8777   case 238:
8778     return "\\xEE";
8779   case 239:
8780     return "\\xEF";
8781   case 240:
8782     return "\\xF0";
8783   case 241:
8784     return "\\xF1";
8785   case 242:
8786     return "\\xF2";
8787   case 243:
8788     return "\\xF3";
8789   case 244:
8790     return "\\xF4";
8791   case 245:
8792     return "\\xF5";
8793   case 246:
8794     return "\\xF6";
8795   case 247:
8796     return "\\xF7";
8797   case 248:
8798     return "\\xF8";
8799   case 249:
8800     return "\\xF9";
8801   case 250:
8802     return "\\xFA";
8803   case 251:
8804     return "\\xFB";
8805   case 252:
8806     return "\\xFC";
8807   case 253:
8808     return "\\xFD";
8809   case 254:
8810     return "\\xFE";
8811   case 255:
8812     return "\\xFF";
8813   // LCOV_EXCL_START
8814   default:
8815     assert(0); /* never gets here */
8816     return "dead code";
8817   }
8818   assert(0); /* never gets here */
8819   // LCOV_EXCL_STOP
8820 }
8821 
8822 #endif /* XML_GE == 1 */
8823 
8824 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8825 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8826   const char *const valueOrNull = getenv(variableName);
8827   if (valueOrNull == NULL) {
8828     return defaultDebugLevel;
8829   }
8830   const char *const value = valueOrNull;
8831 
8832   errno = 0;
8833   char *afterValue = NULL;
8834   unsigned long debugLevel = strtoul(value, &afterValue, 10);
8835   if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8836     errno = 0;
8837     return defaultDebugLevel;
8838   }
8839 
8840   return debugLevel;
8841 }
8842