xref: /freebsd/contrib/expat/lib/xmlparse.c (revision ffd294a1f4c23863c3e515d16dce31d5509bcb01)
1 /* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+)
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14    Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15    Copyright (c) 2016      Eric Rahm <erahm@mozilla.com>
16    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17    Copyright (c) 2016      Gaurav <g.gupta@samsung.com>
18    Copyright (c) 2016      Thomas Beutlich <tc@tbeu.de>
19    Copyright (c) 2016      Gustavo Grieco <gustavo.grieco@imag.fr>
20    Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
21    Copyright (c) 2016      Ed Schouten <ed@nuxi.nl>
22    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23    Copyright (c) 2017      Václav Slavík <vaclav@slavik.io>
24    Copyright (c) 2017      Viktor Szakats <commit@vsz.me>
25    Copyright (c) 2017      Chanho Park <chanho61.park@samsung.com>
26    Copyright (c) 2017      Rolf Eike Beer <eike@sf-mail.de>
27    Copyright (c) 2017      Hans Wennborg <hans@chromium.org>
28    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
29    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
30    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
31    Copyright (c) 2018      Mariusz Zaborski <oshogbo@vexillium.org>
32    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
33    Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34    Copyright (c) 2019      Vadim Zeitlin <vadim@zeitlins.org>
35    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
36    Copyright (c) 2022      Samanta Navarro <ferivoz@riseup.net>
37    Copyright (c) 2022      Jeffrey Walton <noloader@gmail.com>
38    Copyright (c) 2022      Jann Horn <jannh@google.com>
39    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
40    Copyright (c) 2023      Owain Davies <owaind@bath.edu>
41    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42    Copyright (c) 2024      Berkay Eren Ürün <berkay.ueruen@siemens.com>
43    Licensed under the MIT license:
44 
45    Permission is  hereby granted,  free of charge,  to any  person obtaining
46    a  copy  of  this  software   and  associated  documentation  files  (the
47    "Software"),  to  deal in  the  Software  without restriction,  including
48    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
49    distribute, sublicense, and/or sell copies of the Software, and to permit
50    persons  to whom  the Software  is  furnished to  do so,  subject to  the
51    following conditions:
52 
53    The above copyright  notice and this permission notice  shall be included
54    in all copies or substantial portions of the Software.
55 
56    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
57    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
58    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
59    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
60    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
61    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
62    USE OR OTHER DEALINGS IN THE SOFTWARE.
63 */
64 
65 #define XML_BUILDING_EXPAT 1
66 
67 #include "expat_config.h"
68 
69 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
70 #  error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
71 #endif
72 
73 #if defined(XML_DTD) && XML_GE == 0
74 #  error Either undefine XML_DTD or define XML_GE to 1.
75 #endif
76 
77 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2)           \
78     || (XML_CONTEXT_BYTES + 0 < 0)
79 #  error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
80 #endif
81 
82 #if defined(HAVE_SYSCALL_GETRANDOM)
83 #  if ! defined(_GNU_SOURCE)
84 #    define _GNU_SOURCE 1 /* syscall prototype */
85 #  endif
86 #endif
87 
88 #ifdef _WIN32
89 /* force stdlib to define rand_s() */
90 #  if ! defined(_CRT_RAND_S)
91 #    define _CRT_RAND_S
92 #  endif
93 #endif
94 
95 #include <stdbool.h>
96 #include <stddef.h>
97 #include <string.h> /* memset(), memcpy() */
98 #include <assert.h>
99 #include <limits.h> /* UINT_MAX */
100 #include <stdio.h>  /* fprintf */
101 #include <stdlib.h> /* getenv, rand_s */
102 #include <stdint.h> /* uintptr_t */
103 #include <math.h>   /* isnan */
104 
105 #ifdef _WIN32
106 #  define getpid GetCurrentProcessId
107 #else
108 #  include <sys/time.h>  /* gettimeofday() */
109 #  include <sys/types.h> /* getpid() */
110 #  include <unistd.h>    /* getpid() */
111 #  include <fcntl.h>     /* O_RDONLY */
112 #  include <errno.h>
113 #endif
114 
115 #ifdef _WIN32
116 #  include "winconfig.h"
117 #endif
118 
119 #include "ascii.h"
120 #include "expat.h"
121 #include "siphash.h"
122 
123 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
124 #  if defined(HAVE_GETRANDOM)
125 #    include <sys/random.h> /* getrandom */
126 #  else
127 #    include <unistd.h>      /* syscall */
128 #    include <sys/syscall.h> /* SYS_getrandom */
129 #  endif
130 #  if ! defined(GRND_NONBLOCK)
131 #    define GRND_NONBLOCK 0x0001
132 #  endif /* defined(GRND_NONBLOCK) */
133 #endif   /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
134 
135 #if defined(HAVE_LIBBSD)                                                       \
136     && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
137 #  include <bsd/stdlib.h>
138 #endif
139 
140 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
141 #  define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
142 #endif
143 
144 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM)             \
145     && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)            \
146     && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32)                         \
147     && ! defined(XML_POOR_ENTROPY)
148 #  error You do not have support for any sources of high quality entropy \
149     enabled.  For end user security, that is probably not what you want. \
150     \
151     Your options include: \
152       * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
153       * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
154       * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
155       * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
156       * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
157       * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
158       * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
159       * Windows >=Vista (rand_s): _WIN32. \
160     \
161     If insist on not using any of these, bypass this error by defining \
162     XML_POOR_ENTROPY; you have been warned. \
163     \
164     If you have reasons to patch this detection code away or need changes \
165     to the build system, please open a bug.  Thank you!
166 #endif
167 
168 #ifdef XML_UNICODE
169 #  define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
170 #  define XmlConvert XmlUtf16Convert
171 #  define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
172 #  define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
173 #  define XmlEncode XmlUtf16Encode
174 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
175 typedef unsigned short ICHAR;
176 #else
177 #  define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
178 #  define XmlConvert XmlUtf8Convert
179 #  define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
180 #  define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
181 #  define XmlEncode XmlUtf8Encode
182 #  define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
183 typedef char ICHAR;
184 #endif
185 
186 #ifndef XML_NS
187 
188 #  define XmlInitEncodingNS XmlInitEncoding
189 #  define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
190 #  undef XmlGetInternalEncodingNS
191 #  define XmlGetInternalEncodingNS XmlGetInternalEncoding
192 #  define XmlParseXmlDeclNS XmlParseXmlDecl
193 
194 #endif
195 
196 #ifdef XML_UNICODE
197 
198 #  ifdef XML_UNICODE_WCHAR_T
199 #    define XML_T(x) (const wchar_t) x
200 #    define XML_L(x) L##x
201 #  else
202 #    define XML_T(x) (const unsigned short)x
203 #    define XML_L(x) x
204 #  endif
205 
206 #else
207 
208 #  define XML_T(x) x
209 #  define XML_L(x) x
210 
211 #endif
212 
213 /* Round up n to be a multiple of sz, where sz is a power of 2. */
214 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
215 
216 /* Do safe (NULL-aware) pointer arithmetic */
217 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
218 
219 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
220 
221 #include "internal.h"
222 #include "xmltok.h"
223 #include "xmlrole.h"
224 
225 typedef const XML_Char *KEY;
226 
227 typedef struct {
228   KEY name;
229 } NAMED;
230 
231 typedef struct {
232   NAMED **v;
233   unsigned char power;
234   size_t size;
235   size_t used;
236   const XML_Memory_Handling_Suite *mem;
237 } HASH_TABLE;
238 
239 static size_t keylen(KEY s);
240 
241 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
242 
243 /* For probing (after a collision) we need a step size relative prime
244    to the hash table size, which is a power of 2. We use double-hashing,
245    since we can calculate a second hash value cheaply by taking those bits
246    of the first hash value that were discarded (masked out) when the table
247    index was calculated: index = hash & mask, where mask = table->size - 1.
248    We limit the maximum step size to table->size / 4 (mask >> 2) and make
249    it odd, since odd numbers are always relative prime to a power of 2.
250 */
251 #define SECOND_HASH(hash, mask, power)                                         \
252   ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
253 #define PROBE_STEP(hash, mask, power)                                          \
254   ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
255 
256 typedef struct {
257   NAMED **p;
258   NAMED **end;
259 } HASH_TABLE_ITER;
260 
261 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
262 #define INIT_DATA_BUF_SIZE 1024
263 #define INIT_ATTS_SIZE 16
264 #define INIT_ATTS_VERSION 0xFFFFFFFF
265 #define INIT_BLOCK_SIZE 1024
266 #define INIT_BUFFER_SIZE 1024
267 
268 #define EXPAND_SPARE 24
269 
270 typedef struct binding {
271   struct prefix *prefix;
272   struct binding *nextTagBinding;
273   struct binding *prevPrefixBinding;
274   const struct attribute_id *attId;
275   XML_Char *uri;
276   int uriLen;
277   int uriAlloc;
278 } BINDING;
279 
280 typedef struct prefix {
281   const XML_Char *name;
282   BINDING *binding;
283 } PREFIX;
284 
285 typedef struct {
286   const XML_Char *str;
287   const XML_Char *localPart;
288   const XML_Char *prefix;
289   int strLen;
290   int uriLen;
291   int prefixLen;
292 } TAG_NAME;
293 
294 /* TAG represents an open element.
295    The name of the element is stored in both the document and API
296    encodings.  The memory buffer 'buf' is a separately-allocated
297    memory area which stores the name.  During the XML_Parse()/
298    XML_ParseBuffer() when the element is open, the memory for the 'raw'
299    version of the name (in the document encoding) is shared with the
300    document buffer.  If the element is open across calls to
301    XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
302    contain the 'raw' name as well.
303 
304    A parser reuses these structures, maintaining a list of allocated
305    TAG objects in a free list.
306 */
307 typedef struct tag {
308   struct tag *parent;  /* parent of this element */
309   const char *rawName; /* tagName in the original encoding */
310   int rawNameLength;
311   TAG_NAME name; /* tagName in the API encoding */
312   char *buf;     /* buffer for name components */
313   char *bufEnd;  /* end of the buffer */
314   BINDING *bindings;
315 } TAG;
316 
317 typedef struct {
318   const XML_Char *name;
319   const XML_Char *textPtr;
320   int textLen;   /* length in XML_Chars */
321   int processed; /* # of processed bytes - when suspended */
322   const XML_Char *systemId;
323   const XML_Char *base;
324   const XML_Char *publicId;
325   const XML_Char *notation;
326   XML_Bool open;
327   XML_Bool is_param;
328   XML_Bool is_internal; /* true if declared in internal subset outside PE */
329 } ENTITY;
330 
331 typedef struct {
332   enum XML_Content_Type type;
333   enum XML_Content_Quant quant;
334   const XML_Char *name;
335   int firstchild;
336   int lastchild;
337   int childcnt;
338   int nextsib;
339 } CONTENT_SCAFFOLD;
340 
341 #define INIT_SCAFFOLD_ELEMENTS 32
342 
343 typedef struct block {
344   struct block *next;
345   int size;
346   XML_Char s[1];
347 } BLOCK;
348 
349 typedef struct {
350   BLOCK *blocks;
351   BLOCK *freeBlocks;
352   const XML_Char *end;
353   XML_Char *ptr;
354   XML_Char *start;
355   const XML_Memory_Handling_Suite *mem;
356 } STRING_POOL;
357 
358 /* The XML_Char before the name is used to determine whether
359    an attribute has been specified. */
360 typedef struct attribute_id {
361   XML_Char *name;
362   PREFIX *prefix;
363   XML_Bool maybeTokenized;
364   XML_Bool xmlns;
365 } ATTRIBUTE_ID;
366 
367 typedef struct {
368   const ATTRIBUTE_ID *id;
369   XML_Bool isCdata;
370   const XML_Char *value;
371 } DEFAULT_ATTRIBUTE;
372 
373 typedef struct {
374   unsigned long version;
375   unsigned long hash;
376   const XML_Char *uriName;
377 } NS_ATT;
378 
379 typedef struct {
380   const XML_Char *name;
381   PREFIX *prefix;
382   const ATTRIBUTE_ID *idAtt;
383   int nDefaultAtts;
384   int allocDefaultAtts;
385   DEFAULT_ATTRIBUTE *defaultAtts;
386 } ELEMENT_TYPE;
387 
388 typedef struct {
389   HASH_TABLE generalEntities;
390   HASH_TABLE elementTypes;
391   HASH_TABLE attributeIds;
392   HASH_TABLE prefixes;
393   STRING_POOL pool;
394   STRING_POOL entityValuePool;
395   /* false once a parameter entity reference has been skipped */
396   XML_Bool keepProcessing;
397   /* true once an internal or external PE reference has been encountered;
398      this includes the reference to an external subset */
399   XML_Bool hasParamEntityRefs;
400   XML_Bool standalone;
401 #ifdef XML_DTD
402   /* indicates if external PE has been read */
403   XML_Bool paramEntityRead;
404   HASH_TABLE paramEntities;
405 #endif /* XML_DTD */
406   PREFIX defaultPrefix;
407   /* === scaffolding for building content model === */
408   XML_Bool in_eldecl;
409   CONTENT_SCAFFOLD *scaffold;
410   unsigned contentStringLen;
411   unsigned scaffSize;
412   unsigned scaffCount;
413   int scaffLevel;
414   int *scaffIndex;
415 } DTD;
416 
417 typedef struct open_internal_entity {
418   const char *internalEventPtr;
419   const char *internalEventEndPtr;
420   struct open_internal_entity *next;
421   ENTITY *entity;
422   int startTagLevel;
423   XML_Bool betweenDecl; /* WFC: PE Between Declarations */
424 } OPEN_INTERNAL_ENTITY;
425 
426 enum XML_Account {
427   XML_ACCOUNT_DIRECT,           /* bytes directly passed to the Expat parser */
428   XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
429                                    expansion */
430   XML_ACCOUNT_NONE              /* i.e. do not account, was accounted already */
431 };
432 
433 #if XML_GE == 1
434 typedef unsigned long long XmlBigCount;
435 typedef struct accounting {
436   XmlBigCount countBytesDirect;
437   XmlBigCount countBytesIndirect;
438   unsigned long debugLevel;
439   float maximumAmplificationFactor; // >=1.0
440   unsigned long long activationThresholdBytes;
441 } ACCOUNTING;
442 
443 typedef struct entity_stats {
444   unsigned int countEverOpened;
445   unsigned int currentDepth;
446   unsigned int maximumDepthSeen;
447   unsigned long debugLevel;
448 } ENTITY_STATS;
449 #endif /* XML_GE == 1 */
450 
451 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
452                                          const char *end, const char **endPtr);
453 
454 static Processor prologProcessor;
455 static Processor prologInitProcessor;
456 static Processor contentProcessor;
457 static Processor cdataSectionProcessor;
458 #ifdef XML_DTD
459 static Processor ignoreSectionProcessor;
460 static Processor externalParEntProcessor;
461 static Processor externalParEntInitProcessor;
462 static Processor entityValueProcessor;
463 static Processor entityValueInitProcessor;
464 #endif /* XML_DTD */
465 static Processor epilogProcessor;
466 static Processor errorProcessor;
467 static Processor externalEntityInitProcessor;
468 static Processor externalEntityInitProcessor2;
469 static Processor externalEntityInitProcessor3;
470 static Processor externalEntityContentProcessor;
471 static Processor internalEntityProcessor;
472 
473 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
474                                             const XML_Char *encodingName);
475 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
476                                      const char *s, const char *next);
477 static enum XML_Error initializeEncoding(XML_Parser parser);
478 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
479                                const char *s, const char *end, int tok,
480                                const char *next, const char **nextPtr,
481                                XML_Bool haveMore, XML_Bool allowClosingDoctype,
482                                enum XML_Account account);
483 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
484                                             XML_Bool betweenDecl);
485 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
486                                 const ENCODING *enc, const char *start,
487                                 const char *end, const char **endPtr,
488                                 XML_Bool haveMore, enum XML_Account account);
489 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
490                                      const char **startPtr, const char *end,
491                                      const char **nextPtr, XML_Bool haveMore,
492                                      enum XML_Account account);
493 #ifdef XML_DTD
494 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
495                                       const char **startPtr, const char *end,
496                                       const char **nextPtr, XML_Bool haveMore);
497 #endif /* XML_DTD */
498 
499 static void freeBindings(XML_Parser parser, BINDING *bindings);
500 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
501                                 const char *attStr, TAG_NAME *tagNamePtr,
502                                 BINDING **bindingsPtr,
503                                 enum XML_Account account);
504 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
505                                  const ATTRIBUTE_ID *attId, const XML_Char *uri,
506                                  BINDING **bindingsPtr);
507 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
508                            XML_Bool isCdata, XML_Bool isId,
509                            const XML_Char *value, XML_Parser parser);
510 static enum XML_Error storeAttributeValue(XML_Parser parser,
511                                           const ENCODING *enc, XML_Bool isCdata,
512                                           const char *ptr, const char *end,
513                                           STRING_POOL *pool,
514                                           enum XML_Account account);
515 static enum XML_Error appendAttributeValue(XML_Parser parser,
516                                            const ENCODING *enc,
517                                            XML_Bool isCdata, const char *ptr,
518                                            const char *end, STRING_POOL *pool,
519                                            enum XML_Account account);
520 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
521                                     const char *start, const char *end);
522 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
523 #if XML_GE == 1
524 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
525                                        const char *start, const char *end,
526                                        enum XML_Account account);
527 #else
528 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
529 #endif
530 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
531                                        const char *start, const char *end);
532 static int reportComment(XML_Parser parser, const ENCODING *enc,
533                          const char *start, const char *end);
534 static void reportDefault(XML_Parser parser, const ENCODING *enc,
535                           const char *start, const char *end);
536 
537 static const XML_Char *getContext(XML_Parser parser);
538 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
539 
540 static void FASTCALL normalizePublicId(XML_Char *s);
541 
542 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
543 /* do not call if m_parentParser != NULL */
544 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
545 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
546                        const XML_Memory_Handling_Suite *ms);
547 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
548                    const XML_Memory_Handling_Suite *ms);
549 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
550                            STRING_POOL *newPool, const HASH_TABLE *oldTable);
551 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
552                      size_t createSize);
553 static void FASTCALL hashTableInit(HASH_TABLE *table,
554                                    const XML_Memory_Handling_Suite *ms);
555 static void FASTCALL hashTableClear(HASH_TABLE *table);
556 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
557 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
558                                        const HASH_TABLE *table);
559 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
560 
561 static void FASTCALL poolInit(STRING_POOL *pool,
562                               const XML_Memory_Handling_Suite *ms);
563 static void FASTCALL poolClear(STRING_POOL *pool);
564 static void FASTCALL poolDestroy(STRING_POOL *pool);
565 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
566                             const char *ptr, const char *end);
567 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
568                                  const char *ptr, const char *end);
569 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
570 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
571                                                const XML_Char *s);
572 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
573                                        int n);
574 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
575                                                  const XML_Char *s);
576 
577 static int FASTCALL nextScaffoldPart(XML_Parser parser);
578 static XML_Content *build_model(XML_Parser parser);
579 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
580                                     const char *ptr, const char *end);
581 
582 static XML_Char *copyString(const XML_Char *s,
583                             const XML_Memory_Handling_Suite *memsuite);
584 
585 static unsigned long generate_hash_secret_salt(XML_Parser parser);
586 static XML_Bool startParsing(XML_Parser parser);
587 
588 static XML_Parser parserCreate(const XML_Char *encodingName,
589                                const XML_Memory_Handling_Suite *memsuite,
590                                const XML_Char *nameSep, DTD *dtd);
591 
592 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
593 
594 #if XML_GE == 1
595 static float accountingGetCurrentAmplification(XML_Parser rootParser);
596 static void accountingReportStats(XML_Parser originParser, const char *epilog);
597 static void accountingOnAbort(XML_Parser originParser);
598 static void accountingReportDiff(XML_Parser rootParser,
599                                  unsigned int levelsAwayFromRootParser,
600                                  const char *before, const char *after,
601                                  ptrdiff_t bytesMore, int source_line,
602                                  enum XML_Account account);
603 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
604                                         const char *before, const char *after,
605                                         int source_line,
606                                         enum XML_Account account);
607 
608 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
609                                       const char *action, int sourceLine);
610 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
611                                  int sourceLine);
612 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
613                                   int sourceLine);
614 
615 static XML_Parser getRootParserOf(XML_Parser parser,
616                                   unsigned int *outLevelDiff);
617 #endif /* XML_GE == 1 */
618 
619 static unsigned long getDebugLevel(const char *variableName,
620                                    unsigned long defaultDebugLevel);
621 
622 #define poolStart(pool) ((pool)->start)
623 #define poolLength(pool) ((pool)->ptr - (pool)->start)
624 #define poolChop(pool) ((void)--(pool->ptr))
625 #define poolLastChar(pool) (((pool)->ptr)[-1])
626 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
627 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
628 #define poolAppendChar(pool, c)                                                \
629   (((pool)->ptr == (pool)->end && ! poolGrow(pool))                            \
630        ? 0                                                                     \
631        : ((*((pool)->ptr)++ = c), 1))
632 
633 #if ! defined(XML_TESTING)
634 const
635 #endif
636     XML_Bool g_reparseDeferralEnabledDefault
637     = XML_TRUE; // write ONLY in runtests.c
638 #if defined(XML_TESTING)
639 unsigned int g_bytesScanned = 0; // used for testing only
640 #endif
641 
642 struct XML_ParserStruct {
643   /* The first member must be m_userData so that the XML_GetUserData
644      macro works. */
645   void *m_userData;
646   void *m_handlerArg;
647 
648   // How the four parse buffer pointers below relate in time and space:
649   //
650   //   m_buffer <= m_bufferPtr <= m_bufferEnd  <= m_bufferLim
651   //   |           |              |               |
652   //   <--parsed-->|              |               |
653   //               <---parsing--->|               |
654   //                              <--unoccupied-->|
655   //   <---------total-malloced/realloced-------->|
656 
657   char *m_buffer; // malloc/realloc base pointer of parse buffer
658   const XML_Memory_Handling_Suite m_mem;
659   const char *m_bufferPtr; // first character to be parsed
660   char *m_bufferEnd;       // past last character to be parsed
661   const char *m_bufferLim; // allocated end of m_buffer
662 
663   XML_Index m_parseEndByteIndex;
664   const char *m_parseEndPtr;
665   size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
666   XML_Bool m_reparseDeferralEnabled;
667   int m_lastBufferRequestSize;
668   XML_Char *m_dataBuf;
669   XML_Char *m_dataBufEnd;
670   XML_StartElementHandler m_startElementHandler;
671   XML_EndElementHandler m_endElementHandler;
672   XML_CharacterDataHandler m_characterDataHandler;
673   XML_ProcessingInstructionHandler m_processingInstructionHandler;
674   XML_CommentHandler m_commentHandler;
675   XML_StartCdataSectionHandler m_startCdataSectionHandler;
676   XML_EndCdataSectionHandler m_endCdataSectionHandler;
677   XML_DefaultHandler m_defaultHandler;
678   XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
679   XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
680   XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
681   XML_NotationDeclHandler m_notationDeclHandler;
682   XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
683   XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
684   XML_NotStandaloneHandler m_notStandaloneHandler;
685   XML_ExternalEntityRefHandler m_externalEntityRefHandler;
686   XML_Parser m_externalEntityRefHandlerArg;
687   XML_SkippedEntityHandler m_skippedEntityHandler;
688   XML_UnknownEncodingHandler m_unknownEncodingHandler;
689   XML_ElementDeclHandler m_elementDeclHandler;
690   XML_AttlistDeclHandler m_attlistDeclHandler;
691   XML_EntityDeclHandler m_entityDeclHandler;
692   XML_XmlDeclHandler m_xmlDeclHandler;
693   const ENCODING *m_encoding;
694   INIT_ENCODING m_initEncoding;
695   const ENCODING *m_internalEncoding;
696   const XML_Char *m_protocolEncodingName;
697   XML_Bool m_ns;
698   XML_Bool m_ns_triplets;
699   void *m_unknownEncodingMem;
700   void *m_unknownEncodingData;
701   void *m_unknownEncodingHandlerData;
702   void(XMLCALL *m_unknownEncodingRelease)(void *);
703   PROLOG_STATE m_prologState;
704   Processor *m_processor;
705   enum XML_Error m_errorCode;
706   const char *m_eventPtr;
707   const char *m_eventEndPtr;
708   const char *m_positionPtr;
709   OPEN_INTERNAL_ENTITY *m_openInternalEntities;
710   OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
711   XML_Bool m_defaultExpandInternalEntities;
712   int m_tagLevel;
713   ENTITY *m_declEntity;
714   const XML_Char *m_doctypeName;
715   const XML_Char *m_doctypeSysid;
716   const XML_Char *m_doctypePubid;
717   const XML_Char *m_declAttributeType;
718   const XML_Char *m_declNotationName;
719   const XML_Char *m_declNotationPublicId;
720   ELEMENT_TYPE *m_declElementType;
721   ATTRIBUTE_ID *m_declAttributeId;
722   XML_Bool m_declAttributeIsCdata;
723   XML_Bool m_declAttributeIsId;
724   DTD *m_dtd;
725   const XML_Char *m_curBase;
726   TAG *m_tagStack;
727   TAG *m_freeTagList;
728   BINDING *m_inheritedBindings;
729   BINDING *m_freeBindingList;
730   int m_attsSize;
731   int m_nSpecifiedAtts;
732   int m_idAttIndex;
733   ATTRIBUTE *m_atts;
734   NS_ATT *m_nsAtts;
735   unsigned long m_nsAttsVersion;
736   unsigned char m_nsAttsPower;
737 #ifdef XML_ATTR_INFO
738   XML_AttrInfo *m_attInfo;
739 #endif
740   POSITION m_position;
741   STRING_POOL m_tempPool;
742   STRING_POOL m_temp2Pool;
743   char *m_groupConnector;
744   unsigned int m_groupSize;
745   XML_Char m_namespaceSeparator;
746   XML_Parser m_parentParser;
747   XML_ParsingStatus m_parsingStatus;
748 #ifdef XML_DTD
749   XML_Bool m_isParamEntity;
750   XML_Bool m_useForeignDTD;
751   enum XML_ParamEntityParsing m_paramEntityParsing;
752 #endif
753   unsigned long m_hash_secret_salt;
754 #if XML_GE == 1
755   ACCOUNTING m_accounting;
756   ENTITY_STATS m_entity_stats;
757 #endif
758 };
759 
760 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
761 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
762 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
763 
764 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)765 XML_ParserCreate(const XML_Char *encodingName) {
766   return XML_ParserCreate_MM(encodingName, NULL, NULL);
767 }
768 
769 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)770 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
771   XML_Char tmp[2] = {nsSep, 0};
772   return XML_ParserCreate_MM(encodingName, NULL, tmp);
773 }
774 
775 // "xml=http://www.w3.org/XML/1998/namespace"
776 static const XML_Char implicitContext[]
777     = {ASCII_x,     ASCII_m,     ASCII_l,      ASCII_EQUALS, ASCII_h,
778        ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,  ASCII_SLASH,
779        ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,      ASCII_PERIOD,
780        ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,      ASCII_r,
781        ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,      ASCII_L,
782        ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,      ASCII_8,
783        ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,      ASCII_e,
784        ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,      ASCII_e,
785        '\0'};
786 
787 /* To avoid warnings about unused functions: */
788 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
789 
790 #  if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
791 
792 /* Obtain entropy on Linux 3.17+ */
793 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)794 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
795   int success = 0; /* full count bytes written? */
796   size_t bytesWrittenTotal = 0;
797   const unsigned int getrandomFlags = GRND_NONBLOCK;
798 
799   do {
800     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
801     const size_t bytesToWrite = count - bytesWrittenTotal;
802 
803     const int bytesWrittenMore =
804 #    if defined(HAVE_GETRANDOM)
805         getrandom(currentTarget, bytesToWrite, getrandomFlags);
806 #    else
807         syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
808 #    endif
809 
810     if (bytesWrittenMore > 0) {
811       bytesWrittenTotal += bytesWrittenMore;
812       if (bytesWrittenTotal >= count)
813         success = 1;
814     }
815   } while (! success && (errno == EINTR));
816 
817   return success;
818 }
819 
820 #  endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
821 
822 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
823 
824 /* Extract entropy from /dev/urandom */
825 static int
writeRandomBytes_dev_urandom(void * target,size_t count)826 writeRandomBytes_dev_urandom(void *target, size_t count) {
827   int success = 0; /* full count bytes written? */
828   size_t bytesWrittenTotal = 0;
829 
830   const int fd = open("/dev/urandom", O_RDONLY);
831   if (fd < 0) {
832     return 0;
833   }
834 
835   do {
836     void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
837     const size_t bytesToWrite = count - bytesWrittenTotal;
838 
839     const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
840 
841     if (bytesWrittenMore > 0) {
842       bytesWrittenTotal += bytesWrittenMore;
843       if (bytesWrittenTotal >= count)
844         success = 1;
845     }
846   } while (! success && (errno == EINTR));
847 
848   close(fd);
849   return success;
850 }
851 
852 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
853 
854 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
855 
856 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
857 
858 static void
writeRandomBytes_arc4random(void * target,size_t count)859 writeRandomBytes_arc4random(void *target, size_t count) {
860   size_t bytesWrittenTotal = 0;
861 
862   while (bytesWrittenTotal < count) {
863     const uint32_t random32 = arc4random();
864     size_t i = 0;
865 
866     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
867          i++, bytesWrittenTotal++) {
868       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
869       ((uint8_t *)target)[bytesWrittenTotal] = random8;
870     }
871   }
872 }
873 
874 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
875 
876 #ifdef _WIN32
877 
878 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
879    as it didn't declare it in its header prior to version 5.3.0 of its
880    runtime package (mingwrt, containing stdlib.h).  The upstream fix
881    was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
882 #  if defined(__MINGW32__) && defined(__MINGW32_VERSION)                       \
883       && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
884 __declspec(dllimport) int rand_s(unsigned int *);
885 #  endif
886 
887 /* Obtain entropy on Windows using the rand_s() function which
888  * generates cryptographically secure random numbers.  Internally it
889  * uses RtlGenRandom API which is present in Windows XP and later.
890  */
891 static int
writeRandomBytes_rand_s(void * target,size_t count)892 writeRandomBytes_rand_s(void *target, size_t count) {
893   size_t bytesWrittenTotal = 0;
894 
895   while (bytesWrittenTotal < count) {
896     unsigned int random32 = 0;
897     size_t i = 0;
898 
899     if (rand_s(&random32))
900       return 0; /* failure */
901 
902     for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
903          i++, bytesWrittenTotal++) {
904       const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
905       ((uint8_t *)target)[bytesWrittenTotal] = random8;
906     }
907   }
908   return 1; /* success */
909 }
910 
911 #endif /* _WIN32 */
912 
913 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
914 
915 static unsigned long
gather_time_entropy(void)916 gather_time_entropy(void) {
917 #  ifdef _WIN32
918   FILETIME ft;
919   GetSystemTimeAsFileTime(&ft); /* never fails */
920   return ft.dwHighDateTime ^ ft.dwLowDateTime;
921 #  else
922   struct timeval tv;
923   int gettimeofday_res;
924 
925   gettimeofday_res = gettimeofday(&tv, NULL);
926 
927 #    if defined(NDEBUG)
928   (void)gettimeofday_res;
929 #    else
930   assert(gettimeofday_res == 0);
931 #    endif /* defined(NDEBUG) */
932 
933   /* Microseconds time is <20 bits entropy */
934   return tv.tv_usec;
935 #  endif
936 }
937 
938 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
939 
940 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)941 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
942   if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
943     fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
944             (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
945   }
946   return entropy;
947 }
948 
949 static unsigned long
generate_hash_secret_salt(XML_Parser parser)950 generate_hash_secret_salt(XML_Parser parser) {
951   unsigned long entropy;
952   (void)parser;
953 
954   /* "Failproof" high quality providers: */
955 #if defined(HAVE_ARC4RANDOM_BUF)
956   arc4random_buf(&entropy, sizeof(entropy));
957   return ENTROPY_DEBUG("arc4random_buf", entropy);
958 #elif defined(HAVE_ARC4RANDOM)
959   writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
960   return ENTROPY_DEBUG("arc4random", entropy);
961 #else
962   /* Try high quality providers first .. */
963 #  ifdef _WIN32
964   if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
965     return ENTROPY_DEBUG("rand_s", entropy);
966   }
967 #  elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
968   if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
969     return ENTROPY_DEBUG("getrandom", entropy);
970   }
971 #  endif
972 #  if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
973   if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
974     return ENTROPY_DEBUG("/dev/urandom", entropy);
975   }
976 #  endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
977   /* .. and self-made low quality for backup: */
978 
979   /* Process ID is 0 bits entropy if attacker has local access */
980   entropy = gather_time_entropy() ^ getpid();
981 
982   /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
983   if (sizeof(unsigned long) == 4) {
984     return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
985   } else {
986     return ENTROPY_DEBUG("fallback(8)",
987                          entropy * (unsigned long)2305843009213693951ULL);
988   }
989 #endif
990 }
991 
992 static unsigned long
get_hash_secret_salt(XML_Parser parser)993 get_hash_secret_salt(XML_Parser parser) {
994   if (parser->m_parentParser != NULL)
995     return get_hash_secret_salt(parser->m_parentParser);
996   return parser->m_hash_secret_salt;
997 }
998 
999 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1000 callProcessor(XML_Parser parser, const char *start, const char *end,
1001               const char **endPtr) {
1002   const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1003 
1004   if (parser->m_reparseDeferralEnabled
1005       && ! parser->m_parsingStatus.finalBuffer) {
1006     // Heuristic: don't try to parse a partial token again until the amount of
1007     // available data has increased significantly.
1008     const size_t had_before = parser->m_partialTokenBytesBefore;
1009     // ...but *do* try anyway if we're close to causing a reallocation.
1010     size_t available_buffer
1011         = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1012 #if XML_CONTEXT_BYTES > 0
1013     available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1014 #endif
1015     available_buffer
1016         += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1017     // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1018     const bool enough
1019         = (have_now >= 2 * had_before)
1020           || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1021 
1022     if (! enough) {
1023       *endPtr = start; // callers may expect this to be set
1024       return XML_ERROR_NONE;
1025     }
1026   }
1027 #if defined(XML_TESTING)
1028   g_bytesScanned += (unsigned)have_now;
1029 #endif
1030   const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1031   if (ret == XML_ERROR_NONE) {
1032     // if we consumed nothing, remember what we had on this parse attempt.
1033     if (*endPtr == start) {
1034       parser->m_partialTokenBytesBefore = have_now;
1035     } else {
1036       parser->m_partialTokenBytesBefore = 0;
1037     }
1038   }
1039   return ret;
1040 }
1041 
1042 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1043 startParsing(XML_Parser parser) {
1044   /* hash functions must be initialized before setContext() is called */
1045   if (parser->m_hash_secret_salt == 0)
1046     parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1047   if (parser->m_ns) {
1048     /* implicit context only set for root parser, since child
1049        parsers (i.e. external entity parsers) will inherit it
1050     */
1051     return setContext(parser, implicitContext);
1052   }
1053   return XML_TRUE;
1054 }
1055 
1056 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1057 XML_ParserCreate_MM(const XML_Char *encodingName,
1058                     const XML_Memory_Handling_Suite *memsuite,
1059                     const XML_Char *nameSep) {
1060   return parserCreate(encodingName, memsuite, nameSep, NULL);
1061 }
1062 
1063 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1064 parserCreate(const XML_Char *encodingName,
1065              const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1066              DTD *dtd) {
1067   XML_Parser parser;
1068 
1069   if (memsuite) {
1070     XML_Memory_Handling_Suite *mtemp;
1071     parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1072     if (parser != NULL) {
1073       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1074       mtemp->malloc_fcn = memsuite->malloc_fcn;
1075       mtemp->realloc_fcn = memsuite->realloc_fcn;
1076       mtemp->free_fcn = memsuite->free_fcn;
1077     }
1078   } else {
1079     XML_Memory_Handling_Suite *mtemp;
1080     parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1081     if (parser != NULL) {
1082       mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1083       mtemp->malloc_fcn = malloc;
1084       mtemp->realloc_fcn = realloc;
1085       mtemp->free_fcn = free;
1086     }
1087   }
1088 
1089   if (! parser)
1090     return parser;
1091 
1092   parser->m_buffer = NULL;
1093   parser->m_bufferLim = NULL;
1094 
1095   parser->m_attsSize = INIT_ATTS_SIZE;
1096   parser->m_atts
1097       = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1098   if (parser->m_atts == NULL) {
1099     FREE(parser, parser);
1100     return NULL;
1101   }
1102 #ifdef XML_ATTR_INFO
1103   parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1104       parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1105   if (parser->m_attInfo == NULL) {
1106     FREE(parser, parser->m_atts);
1107     FREE(parser, parser);
1108     return NULL;
1109   }
1110 #endif
1111   parser->m_dataBuf
1112       = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1113   if (parser->m_dataBuf == NULL) {
1114     FREE(parser, parser->m_atts);
1115 #ifdef XML_ATTR_INFO
1116     FREE(parser, parser->m_attInfo);
1117 #endif
1118     FREE(parser, parser);
1119     return NULL;
1120   }
1121   parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1122 
1123   if (dtd)
1124     parser->m_dtd = dtd;
1125   else {
1126     parser->m_dtd = dtdCreate(&parser->m_mem);
1127     if (parser->m_dtd == NULL) {
1128       FREE(parser, parser->m_dataBuf);
1129       FREE(parser, parser->m_atts);
1130 #ifdef XML_ATTR_INFO
1131       FREE(parser, parser->m_attInfo);
1132 #endif
1133       FREE(parser, parser);
1134       return NULL;
1135     }
1136   }
1137 
1138   parser->m_freeBindingList = NULL;
1139   parser->m_freeTagList = NULL;
1140   parser->m_freeInternalEntities = NULL;
1141 
1142   parser->m_groupSize = 0;
1143   parser->m_groupConnector = NULL;
1144 
1145   parser->m_unknownEncodingHandler = NULL;
1146   parser->m_unknownEncodingHandlerData = NULL;
1147 
1148   parser->m_namespaceSeparator = ASCII_EXCL;
1149   parser->m_ns = XML_FALSE;
1150   parser->m_ns_triplets = XML_FALSE;
1151 
1152   parser->m_nsAtts = NULL;
1153   parser->m_nsAttsVersion = 0;
1154   parser->m_nsAttsPower = 0;
1155 
1156   parser->m_protocolEncodingName = NULL;
1157 
1158   poolInit(&parser->m_tempPool, &(parser->m_mem));
1159   poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1160   parserInit(parser, encodingName);
1161 
1162   if (encodingName && ! parser->m_protocolEncodingName) {
1163     if (dtd) {
1164       // We need to stop the upcoming call to XML_ParserFree from happily
1165       // destroying parser->m_dtd because the DTD is shared with the parent
1166       // parser and the only guard that keeps XML_ParserFree from destroying
1167       // parser->m_dtd is parser->m_isParamEntity but it will be set to
1168       // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1169       parser->m_dtd = NULL;
1170     }
1171     XML_ParserFree(parser);
1172     return NULL;
1173   }
1174 
1175   if (nameSep) {
1176     parser->m_ns = XML_TRUE;
1177     parser->m_internalEncoding = XmlGetInternalEncodingNS();
1178     parser->m_namespaceSeparator = *nameSep;
1179   } else {
1180     parser->m_internalEncoding = XmlGetInternalEncoding();
1181   }
1182 
1183   return parser;
1184 }
1185 
1186 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1187 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1188   parser->m_processor = prologInitProcessor;
1189   XmlPrologStateInit(&parser->m_prologState);
1190   if (encodingName != NULL) {
1191     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1192   }
1193   parser->m_curBase = NULL;
1194   XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1195   parser->m_userData = NULL;
1196   parser->m_handlerArg = NULL;
1197   parser->m_startElementHandler = NULL;
1198   parser->m_endElementHandler = NULL;
1199   parser->m_characterDataHandler = NULL;
1200   parser->m_processingInstructionHandler = NULL;
1201   parser->m_commentHandler = NULL;
1202   parser->m_startCdataSectionHandler = NULL;
1203   parser->m_endCdataSectionHandler = NULL;
1204   parser->m_defaultHandler = NULL;
1205   parser->m_startDoctypeDeclHandler = NULL;
1206   parser->m_endDoctypeDeclHandler = NULL;
1207   parser->m_unparsedEntityDeclHandler = NULL;
1208   parser->m_notationDeclHandler = NULL;
1209   parser->m_startNamespaceDeclHandler = NULL;
1210   parser->m_endNamespaceDeclHandler = NULL;
1211   parser->m_notStandaloneHandler = NULL;
1212   parser->m_externalEntityRefHandler = NULL;
1213   parser->m_externalEntityRefHandlerArg = parser;
1214   parser->m_skippedEntityHandler = NULL;
1215   parser->m_elementDeclHandler = NULL;
1216   parser->m_attlistDeclHandler = NULL;
1217   parser->m_entityDeclHandler = NULL;
1218   parser->m_xmlDeclHandler = NULL;
1219   parser->m_bufferPtr = parser->m_buffer;
1220   parser->m_bufferEnd = parser->m_buffer;
1221   parser->m_parseEndByteIndex = 0;
1222   parser->m_parseEndPtr = NULL;
1223   parser->m_partialTokenBytesBefore = 0;
1224   parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1225   parser->m_lastBufferRequestSize = 0;
1226   parser->m_declElementType = NULL;
1227   parser->m_declAttributeId = NULL;
1228   parser->m_declEntity = NULL;
1229   parser->m_doctypeName = NULL;
1230   parser->m_doctypeSysid = NULL;
1231   parser->m_doctypePubid = NULL;
1232   parser->m_declAttributeType = NULL;
1233   parser->m_declNotationName = NULL;
1234   parser->m_declNotationPublicId = NULL;
1235   parser->m_declAttributeIsCdata = XML_FALSE;
1236   parser->m_declAttributeIsId = XML_FALSE;
1237   memset(&parser->m_position, 0, sizeof(POSITION));
1238   parser->m_errorCode = XML_ERROR_NONE;
1239   parser->m_eventPtr = NULL;
1240   parser->m_eventEndPtr = NULL;
1241   parser->m_positionPtr = NULL;
1242   parser->m_openInternalEntities = NULL;
1243   parser->m_defaultExpandInternalEntities = XML_TRUE;
1244   parser->m_tagLevel = 0;
1245   parser->m_tagStack = NULL;
1246   parser->m_inheritedBindings = NULL;
1247   parser->m_nSpecifiedAtts = 0;
1248   parser->m_unknownEncodingMem = NULL;
1249   parser->m_unknownEncodingRelease = NULL;
1250   parser->m_unknownEncodingData = NULL;
1251   parser->m_parentParser = NULL;
1252   parser->m_parsingStatus.parsing = XML_INITIALIZED;
1253 #ifdef XML_DTD
1254   parser->m_isParamEntity = XML_FALSE;
1255   parser->m_useForeignDTD = XML_FALSE;
1256   parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1257 #endif
1258   parser->m_hash_secret_salt = 0;
1259 
1260 #if XML_GE == 1
1261   memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1262   parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1263   parser->m_accounting.maximumAmplificationFactor
1264       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1265   parser->m_accounting.activationThresholdBytes
1266       = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1267 
1268   memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1269   parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1270 #endif
1271 }
1272 
1273 /* moves list of bindings to m_freeBindingList */
1274 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1275 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1276   while (bindings) {
1277     BINDING *b = bindings;
1278     bindings = bindings->nextTagBinding;
1279     b->nextTagBinding = parser->m_freeBindingList;
1280     parser->m_freeBindingList = b;
1281   }
1282 }
1283 
1284 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1285 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1286   TAG *tStk;
1287   OPEN_INTERNAL_ENTITY *openEntityList;
1288 
1289   if (parser == NULL)
1290     return XML_FALSE;
1291 
1292   if (parser->m_parentParser)
1293     return XML_FALSE;
1294   /* move m_tagStack to m_freeTagList */
1295   tStk = parser->m_tagStack;
1296   while (tStk) {
1297     TAG *tag = tStk;
1298     tStk = tStk->parent;
1299     tag->parent = parser->m_freeTagList;
1300     moveToFreeBindingList(parser, tag->bindings);
1301     tag->bindings = NULL;
1302     parser->m_freeTagList = tag;
1303   }
1304   /* move m_openInternalEntities to m_freeInternalEntities */
1305   openEntityList = parser->m_openInternalEntities;
1306   while (openEntityList) {
1307     OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1308     openEntityList = openEntity->next;
1309     openEntity->next = parser->m_freeInternalEntities;
1310     parser->m_freeInternalEntities = openEntity;
1311   }
1312   moveToFreeBindingList(parser, parser->m_inheritedBindings);
1313   FREE(parser, parser->m_unknownEncodingMem);
1314   if (parser->m_unknownEncodingRelease)
1315     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1316   poolClear(&parser->m_tempPool);
1317   poolClear(&parser->m_temp2Pool);
1318   FREE(parser, (void *)parser->m_protocolEncodingName);
1319   parser->m_protocolEncodingName = NULL;
1320   parserInit(parser, encodingName);
1321   dtdReset(parser->m_dtd, &parser->m_mem);
1322   return XML_TRUE;
1323 }
1324 
1325 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1326 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1327   if (parser == NULL)
1328     return XML_STATUS_ERROR;
1329   /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1330      XXX There's no way for the caller to determine which of the
1331      XXX possible error cases caused the XML_STATUS_ERROR return.
1332   */
1333   if (parser->m_parsingStatus.parsing == XML_PARSING
1334       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1335     return XML_STATUS_ERROR;
1336 
1337   /* Get rid of any previous encoding name */
1338   FREE(parser, (void *)parser->m_protocolEncodingName);
1339 
1340   if (encodingName == NULL)
1341     /* No new encoding name */
1342     parser->m_protocolEncodingName = NULL;
1343   else {
1344     /* Copy the new encoding name into allocated memory */
1345     parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1346     if (! parser->m_protocolEncodingName)
1347       return XML_STATUS_ERROR;
1348   }
1349   return XML_STATUS_OK;
1350 }
1351 
1352 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1353 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1354                                const XML_Char *encodingName) {
1355   XML_Parser parser = oldParser;
1356   DTD *newDtd = NULL;
1357   DTD *oldDtd;
1358   XML_StartElementHandler oldStartElementHandler;
1359   XML_EndElementHandler oldEndElementHandler;
1360   XML_CharacterDataHandler oldCharacterDataHandler;
1361   XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1362   XML_CommentHandler oldCommentHandler;
1363   XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1364   XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1365   XML_DefaultHandler oldDefaultHandler;
1366   XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1367   XML_NotationDeclHandler oldNotationDeclHandler;
1368   XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1369   XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1370   XML_NotStandaloneHandler oldNotStandaloneHandler;
1371   XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1372   XML_SkippedEntityHandler oldSkippedEntityHandler;
1373   XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1374   XML_ElementDeclHandler oldElementDeclHandler;
1375   XML_AttlistDeclHandler oldAttlistDeclHandler;
1376   XML_EntityDeclHandler oldEntityDeclHandler;
1377   XML_XmlDeclHandler oldXmlDeclHandler;
1378   ELEMENT_TYPE *oldDeclElementType;
1379 
1380   void *oldUserData;
1381   void *oldHandlerArg;
1382   XML_Bool oldDefaultExpandInternalEntities;
1383   XML_Parser oldExternalEntityRefHandlerArg;
1384 #ifdef XML_DTD
1385   enum XML_ParamEntityParsing oldParamEntityParsing;
1386   int oldInEntityValue;
1387 #endif
1388   XML_Bool oldns_triplets;
1389   /* Note that the new parser shares the same hash secret as the old
1390      parser, so that dtdCopy and copyEntityTable can lookup values
1391      from hash tables associated with either parser without us having
1392      to worry which hash secrets each table has.
1393   */
1394   unsigned long oldhash_secret_salt;
1395   XML_Bool oldReparseDeferralEnabled;
1396 
1397   /* Validate the oldParser parameter before we pull everything out of it */
1398   if (oldParser == NULL)
1399     return NULL;
1400 
1401   /* Stash the original parser contents on the stack */
1402   oldDtd = parser->m_dtd;
1403   oldStartElementHandler = parser->m_startElementHandler;
1404   oldEndElementHandler = parser->m_endElementHandler;
1405   oldCharacterDataHandler = parser->m_characterDataHandler;
1406   oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1407   oldCommentHandler = parser->m_commentHandler;
1408   oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1409   oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1410   oldDefaultHandler = parser->m_defaultHandler;
1411   oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1412   oldNotationDeclHandler = parser->m_notationDeclHandler;
1413   oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1414   oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1415   oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1416   oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1417   oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1418   oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1419   oldElementDeclHandler = parser->m_elementDeclHandler;
1420   oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1421   oldEntityDeclHandler = parser->m_entityDeclHandler;
1422   oldXmlDeclHandler = parser->m_xmlDeclHandler;
1423   oldDeclElementType = parser->m_declElementType;
1424 
1425   oldUserData = parser->m_userData;
1426   oldHandlerArg = parser->m_handlerArg;
1427   oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1428   oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1429 #ifdef XML_DTD
1430   oldParamEntityParsing = parser->m_paramEntityParsing;
1431   oldInEntityValue = parser->m_prologState.inEntityValue;
1432 #endif
1433   oldns_triplets = parser->m_ns_triplets;
1434   /* Note that the new parser shares the same hash secret as the old
1435      parser, so that dtdCopy and copyEntityTable can lookup values
1436      from hash tables associated with either parser without us having
1437      to worry which hash secrets each table has.
1438   */
1439   oldhash_secret_salt = parser->m_hash_secret_salt;
1440   oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1441 
1442 #ifdef XML_DTD
1443   if (! context)
1444     newDtd = oldDtd;
1445 #endif /* XML_DTD */
1446 
1447   /* Note that the magical uses of the pre-processor to make field
1448      access look more like C++ require that `parser' be overwritten
1449      here.  This makes this function more painful to follow than it
1450      would be otherwise.
1451   */
1452   if (parser->m_ns) {
1453     XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1454     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1455   } else {
1456     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1457   }
1458 
1459   if (! parser)
1460     return NULL;
1461 
1462   parser->m_startElementHandler = oldStartElementHandler;
1463   parser->m_endElementHandler = oldEndElementHandler;
1464   parser->m_characterDataHandler = oldCharacterDataHandler;
1465   parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1466   parser->m_commentHandler = oldCommentHandler;
1467   parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1468   parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1469   parser->m_defaultHandler = oldDefaultHandler;
1470   parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1471   parser->m_notationDeclHandler = oldNotationDeclHandler;
1472   parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1473   parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1474   parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1475   parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1476   parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1477   parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1478   parser->m_elementDeclHandler = oldElementDeclHandler;
1479   parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1480   parser->m_entityDeclHandler = oldEntityDeclHandler;
1481   parser->m_xmlDeclHandler = oldXmlDeclHandler;
1482   parser->m_declElementType = oldDeclElementType;
1483   parser->m_userData = oldUserData;
1484   if (oldUserData == oldHandlerArg)
1485     parser->m_handlerArg = parser->m_userData;
1486   else
1487     parser->m_handlerArg = parser;
1488   if (oldExternalEntityRefHandlerArg != oldParser)
1489     parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1490   parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1491   parser->m_ns_triplets = oldns_triplets;
1492   parser->m_hash_secret_salt = oldhash_secret_salt;
1493   parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1494   parser->m_parentParser = oldParser;
1495 #ifdef XML_DTD
1496   parser->m_paramEntityParsing = oldParamEntityParsing;
1497   parser->m_prologState.inEntityValue = oldInEntityValue;
1498   if (context) {
1499 #endif /* XML_DTD */
1500     if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1501         || ! setContext(parser, context)) {
1502       XML_ParserFree(parser);
1503       return NULL;
1504     }
1505     parser->m_processor = externalEntityInitProcessor;
1506 #ifdef XML_DTD
1507   } else {
1508     /* The DTD instance referenced by parser->m_dtd is shared between the
1509        document's root parser and external PE parsers, therefore one does not
1510        need to call setContext. In addition, one also *must* not call
1511        setContext, because this would overwrite existing prefix->binding
1512        pointers in parser->m_dtd with ones that get destroyed with the external
1513        PE parser. This would leave those prefixes with dangling pointers.
1514     */
1515     parser->m_isParamEntity = XML_TRUE;
1516     XmlPrologStateInitExternalEntity(&parser->m_prologState);
1517     parser->m_processor = externalParEntInitProcessor;
1518   }
1519 #endif /* XML_DTD */
1520   return parser;
1521 }
1522 
1523 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1524 destroyBindings(BINDING *bindings, XML_Parser parser) {
1525   for (;;) {
1526     BINDING *b = bindings;
1527     if (! b)
1528       break;
1529     bindings = b->nextTagBinding;
1530     FREE(parser, b->uri);
1531     FREE(parser, b);
1532   }
1533 }
1534 
1535 void XMLCALL
XML_ParserFree(XML_Parser parser)1536 XML_ParserFree(XML_Parser parser) {
1537   TAG *tagList;
1538   OPEN_INTERNAL_ENTITY *entityList;
1539   if (parser == NULL)
1540     return;
1541   /* free m_tagStack and m_freeTagList */
1542   tagList = parser->m_tagStack;
1543   for (;;) {
1544     TAG *p;
1545     if (tagList == NULL) {
1546       if (parser->m_freeTagList == NULL)
1547         break;
1548       tagList = parser->m_freeTagList;
1549       parser->m_freeTagList = NULL;
1550     }
1551     p = tagList;
1552     tagList = tagList->parent;
1553     FREE(parser, p->buf);
1554     destroyBindings(p->bindings, parser);
1555     FREE(parser, p);
1556   }
1557   /* free m_openInternalEntities and m_freeInternalEntities */
1558   entityList = parser->m_openInternalEntities;
1559   for (;;) {
1560     OPEN_INTERNAL_ENTITY *openEntity;
1561     if (entityList == NULL) {
1562       if (parser->m_freeInternalEntities == NULL)
1563         break;
1564       entityList = parser->m_freeInternalEntities;
1565       parser->m_freeInternalEntities = NULL;
1566     }
1567     openEntity = entityList;
1568     entityList = entityList->next;
1569     FREE(parser, openEntity);
1570   }
1571 
1572   destroyBindings(parser->m_freeBindingList, parser);
1573   destroyBindings(parser->m_inheritedBindings, parser);
1574   poolDestroy(&parser->m_tempPool);
1575   poolDestroy(&parser->m_temp2Pool);
1576   FREE(parser, (void *)parser->m_protocolEncodingName);
1577 #ifdef XML_DTD
1578   /* external parameter entity parsers share the DTD structure
1579      parser->m_dtd with the root parser, so we must not destroy it
1580   */
1581   if (! parser->m_isParamEntity && parser->m_dtd)
1582 #else
1583   if (parser->m_dtd)
1584 #endif /* XML_DTD */
1585     dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1586                &parser->m_mem);
1587   FREE(parser, (void *)parser->m_atts);
1588 #ifdef XML_ATTR_INFO
1589   FREE(parser, (void *)parser->m_attInfo);
1590 #endif
1591   FREE(parser, parser->m_groupConnector);
1592   FREE(parser, parser->m_buffer);
1593   FREE(parser, parser->m_dataBuf);
1594   FREE(parser, parser->m_nsAtts);
1595   FREE(parser, parser->m_unknownEncodingMem);
1596   if (parser->m_unknownEncodingRelease)
1597     parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1598   FREE(parser, parser);
1599 }
1600 
1601 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1602 XML_UseParserAsHandlerArg(XML_Parser parser) {
1603   if (parser != NULL)
1604     parser->m_handlerArg = parser;
1605 }
1606 
1607 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1608 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1609   if (parser == NULL)
1610     return XML_ERROR_INVALID_ARGUMENT;
1611 #ifdef XML_DTD
1612   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1613   if (parser->m_parsingStatus.parsing == XML_PARSING
1614       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1615     return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1616   parser->m_useForeignDTD = useDTD;
1617   return XML_ERROR_NONE;
1618 #else
1619   UNUSED_P(useDTD);
1620   return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1621 #endif
1622 }
1623 
1624 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1625 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1626   if (parser == NULL)
1627     return;
1628   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1629   if (parser->m_parsingStatus.parsing == XML_PARSING
1630       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1631     return;
1632   parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1633 }
1634 
1635 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1636 XML_SetUserData(XML_Parser parser, void *p) {
1637   if (parser == NULL)
1638     return;
1639   if (parser->m_handlerArg == parser->m_userData)
1640     parser->m_handlerArg = parser->m_userData = p;
1641   else
1642     parser->m_userData = p;
1643 }
1644 
1645 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1646 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1647   if (parser == NULL)
1648     return XML_STATUS_ERROR;
1649   if (p) {
1650     p = poolCopyString(&parser->m_dtd->pool, p);
1651     if (! p)
1652       return XML_STATUS_ERROR;
1653     parser->m_curBase = p;
1654   } else
1655     parser->m_curBase = NULL;
1656   return XML_STATUS_OK;
1657 }
1658 
1659 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1660 XML_GetBase(XML_Parser parser) {
1661   if (parser == NULL)
1662     return NULL;
1663   return parser->m_curBase;
1664 }
1665 
1666 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1667 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1668   if (parser == NULL)
1669     return -1;
1670   return parser->m_nSpecifiedAtts;
1671 }
1672 
1673 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1674 XML_GetIdAttributeIndex(XML_Parser parser) {
1675   if (parser == NULL)
1676     return -1;
1677   return parser->m_idAttIndex;
1678 }
1679 
1680 #ifdef XML_ATTR_INFO
1681 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1682 XML_GetAttributeInfo(XML_Parser parser) {
1683   if (parser == NULL)
1684     return NULL;
1685   return parser->m_attInfo;
1686 }
1687 #endif
1688 
1689 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1690 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1691                       XML_EndElementHandler end) {
1692   if (parser == NULL)
1693     return;
1694   parser->m_startElementHandler = start;
1695   parser->m_endElementHandler = end;
1696 }
1697 
1698 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1699 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1700   if (parser != NULL)
1701     parser->m_startElementHandler = start;
1702 }
1703 
1704 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1705 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1706   if (parser != NULL)
1707     parser->m_endElementHandler = end;
1708 }
1709 
1710 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1711 XML_SetCharacterDataHandler(XML_Parser parser,
1712                             XML_CharacterDataHandler handler) {
1713   if (parser != NULL)
1714     parser->m_characterDataHandler = handler;
1715 }
1716 
1717 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1718 XML_SetProcessingInstructionHandler(XML_Parser parser,
1719                                     XML_ProcessingInstructionHandler handler) {
1720   if (parser != NULL)
1721     parser->m_processingInstructionHandler = handler;
1722 }
1723 
1724 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1725 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1726   if (parser != NULL)
1727     parser->m_commentHandler = handler;
1728 }
1729 
1730 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1731 XML_SetCdataSectionHandler(XML_Parser parser,
1732                            XML_StartCdataSectionHandler start,
1733                            XML_EndCdataSectionHandler end) {
1734   if (parser == NULL)
1735     return;
1736   parser->m_startCdataSectionHandler = start;
1737   parser->m_endCdataSectionHandler = end;
1738 }
1739 
1740 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1741 XML_SetStartCdataSectionHandler(XML_Parser parser,
1742                                 XML_StartCdataSectionHandler start) {
1743   if (parser != NULL)
1744     parser->m_startCdataSectionHandler = start;
1745 }
1746 
1747 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1748 XML_SetEndCdataSectionHandler(XML_Parser parser,
1749                               XML_EndCdataSectionHandler end) {
1750   if (parser != NULL)
1751     parser->m_endCdataSectionHandler = end;
1752 }
1753 
1754 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1755 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1756   if (parser == NULL)
1757     return;
1758   parser->m_defaultHandler = handler;
1759   parser->m_defaultExpandInternalEntities = XML_FALSE;
1760 }
1761 
1762 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1763 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1764   if (parser == NULL)
1765     return;
1766   parser->m_defaultHandler = handler;
1767   parser->m_defaultExpandInternalEntities = XML_TRUE;
1768 }
1769 
1770 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1771 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1772                           XML_EndDoctypeDeclHandler end) {
1773   if (parser == NULL)
1774     return;
1775   parser->m_startDoctypeDeclHandler = start;
1776   parser->m_endDoctypeDeclHandler = end;
1777 }
1778 
1779 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1780 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1781                                XML_StartDoctypeDeclHandler start) {
1782   if (parser != NULL)
1783     parser->m_startDoctypeDeclHandler = start;
1784 }
1785 
1786 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1787 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1788   if (parser != NULL)
1789     parser->m_endDoctypeDeclHandler = end;
1790 }
1791 
1792 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1793 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1794                                  XML_UnparsedEntityDeclHandler handler) {
1795   if (parser != NULL)
1796     parser->m_unparsedEntityDeclHandler = handler;
1797 }
1798 
1799 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1800 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1801   if (parser != NULL)
1802     parser->m_notationDeclHandler = handler;
1803 }
1804 
1805 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1806 XML_SetNamespaceDeclHandler(XML_Parser parser,
1807                             XML_StartNamespaceDeclHandler start,
1808                             XML_EndNamespaceDeclHandler end) {
1809   if (parser == NULL)
1810     return;
1811   parser->m_startNamespaceDeclHandler = start;
1812   parser->m_endNamespaceDeclHandler = end;
1813 }
1814 
1815 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1816 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1817                                  XML_StartNamespaceDeclHandler start) {
1818   if (parser != NULL)
1819     parser->m_startNamespaceDeclHandler = start;
1820 }
1821 
1822 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1823 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1824                                XML_EndNamespaceDeclHandler end) {
1825   if (parser != NULL)
1826     parser->m_endNamespaceDeclHandler = end;
1827 }
1828 
1829 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1830 XML_SetNotStandaloneHandler(XML_Parser parser,
1831                             XML_NotStandaloneHandler handler) {
1832   if (parser != NULL)
1833     parser->m_notStandaloneHandler = handler;
1834 }
1835 
1836 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1837 XML_SetExternalEntityRefHandler(XML_Parser parser,
1838                                 XML_ExternalEntityRefHandler handler) {
1839   if (parser != NULL)
1840     parser->m_externalEntityRefHandler = handler;
1841 }
1842 
1843 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1844 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1845   if (parser == NULL)
1846     return;
1847   if (arg)
1848     parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1849   else
1850     parser->m_externalEntityRefHandlerArg = parser;
1851 }
1852 
1853 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1854 XML_SetSkippedEntityHandler(XML_Parser parser,
1855                             XML_SkippedEntityHandler handler) {
1856   if (parser != NULL)
1857     parser->m_skippedEntityHandler = handler;
1858 }
1859 
1860 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1861 XML_SetUnknownEncodingHandler(XML_Parser parser,
1862                               XML_UnknownEncodingHandler handler, void *data) {
1863   if (parser == NULL)
1864     return;
1865   parser->m_unknownEncodingHandler = handler;
1866   parser->m_unknownEncodingHandlerData = data;
1867 }
1868 
1869 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1870 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1871   if (parser != NULL)
1872     parser->m_elementDeclHandler = eldecl;
1873 }
1874 
1875 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1876 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1877   if (parser != NULL)
1878     parser->m_attlistDeclHandler = attdecl;
1879 }
1880 
1881 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1882 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1883   if (parser != NULL)
1884     parser->m_entityDeclHandler = handler;
1885 }
1886 
1887 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1888 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1889   if (parser != NULL)
1890     parser->m_xmlDeclHandler = handler;
1891 }
1892 
1893 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1894 XML_SetParamEntityParsing(XML_Parser parser,
1895                           enum XML_ParamEntityParsing peParsing) {
1896   if (parser == NULL)
1897     return 0;
1898   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1899   if (parser->m_parsingStatus.parsing == XML_PARSING
1900       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1901     return 0;
1902 #ifdef XML_DTD
1903   parser->m_paramEntityParsing = peParsing;
1904   return 1;
1905 #else
1906   return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1907 #endif
1908 }
1909 
1910 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1911 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1912   if (parser == NULL)
1913     return 0;
1914   if (parser->m_parentParser)
1915     return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1916   /* block after XML_Parse()/XML_ParseBuffer() has been called */
1917   if (parser->m_parsingStatus.parsing == XML_PARSING
1918       || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1919     return 0;
1920   parser->m_hash_secret_salt = hash_salt;
1921   return 1;
1922 }
1923 
1924 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1925 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1926   if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1927     if (parser != NULL)
1928       parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1929     return XML_STATUS_ERROR;
1930   }
1931   switch (parser->m_parsingStatus.parsing) {
1932   case XML_SUSPENDED:
1933     parser->m_errorCode = XML_ERROR_SUSPENDED;
1934     return XML_STATUS_ERROR;
1935   case XML_FINISHED:
1936     parser->m_errorCode = XML_ERROR_FINISHED;
1937     return XML_STATUS_ERROR;
1938   case XML_INITIALIZED:
1939     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1940       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1941       return XML_STATUS_ERROR;
1942     }
1943     /* fall through */
1944   default:
1945     parser->m_parsingStatus.parsing = XML_PARSING;
1946   }
1947 
1948 #if XML_CONTEXT_BYTES == 0
1949   if (parser->m_bufferPtr == parser->m_bufferEnd) {
1950     const char *end;
1951     int nLeftOver;
1952     enum XML_Status result;
1953     /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1954     if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1955       parser->m_errorCode = XML_ERROR_NO_MEMORY;
1956       parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1957       parser->m_processor = errorProcessor;
1958       return XML_STATUS_ERROR;
1959     }
1960     // though this isn't a buffer request, we assume that `len` is the app's
1961     // preferred buffer fill size, and therefore save it here.
1962     parser->m_lastBufferRequestSize = len;
1963     parser->m_parseEndByteIndex += len;
1964     parser->m_positionPtr = s;
1965     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1966 
1967     parser->m_errorCode
1968         = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1969 
1970     if (parser->m_errorCode != XML_ERROR_NONE) {
1971       parser->m_eventEndPtr = parser->m_eventPtr;
1972       parser->m_processor = errorProcessor;
1973       return XML_STATUS_ERROR;
1974     } else {
1975       switch (parser->m_parsingStatus.parsing) {
1976       case XML_SUSPENDED:
1977         result = XML_STATUS_SUSPENDED;
1978         break;
1979       case XML_INITIALIZED:
1980       case XML_PARSING:
1981         if (isFinal) {
1982           parser->m_parsingStatus.parsing = XML_FINISHED;
1983           return XML_STATUS_OK;
1984         }
1985       /* fall through */
1986       default:
1987         result = XML_STATUS_OK;
1988       }
1989     }
1990 
1991     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1992                       &parser->m_position);
1993     nLeftOver = s + len - end;
1994     if (nLeftOver) {
1995       // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1996       // (and XML_ERROR_FINISHED) from XML_GetBuffer.
1997       const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1998       parser->m_parsingStatus.parsing = XML_PARSING;
1999       void *const temp = XML_GetBuffer(parser, nLeftOver);
2000       parser->m_parsingStatus.parsing = originalStatus;
2001       // GetBuffer may have overwritten this, but we want to remember what the
2002       // app requested, not how many bytes were left over after parsing.
2003       parser->m_lastBufferRequestSize = len;
2004       if (temp == NULL) {
2005         // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2006         parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2007         parser->m_processor = errorProcessor;
2008         return XML_STATUS_ERROR;
2009       }
2010       // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2011       // don't have any data to preserve, and can copy straight into the start
2012       // of the buffer rather than the GetBuffer return pointer (which may be
2013       // pointing further into the allocated buffer).
2014       memcpy(parser->m_buffer, end, nLeftOver);
2015     }
2016     parser->m_bufferPtr = parser->m_buffer;
2017     parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2018     parser->m_positionPtr = parser->m_bufferPtr;
2019     parser->m_parseEndPtr = parser->m_bufferEnd;
2020     parser->m_eventPtr = parser->m_bufferPtr;
2021     parser->m_eventEndPtr = parser->m_bufferPtr;
2022     return result;
2023   }
2024 #endif /* XML_CONTEXT_BYTES == 0 */
2025   void *buff = XML_GetBuffer(parser, len);
2026   if (buff == NULL)
2027     return XML_STATUS_ERROR;
2028   if (len > 0) {
2029     assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2030     memcpy(buff, s, len);
2031   }
2032   return XML_ParseBuffer(parser, len, isFinal);
2033 }
2034 
2035 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2036 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2037   const char *start;
2038   enum XML_Status result = XML_STATUS_OK;
2039 
2040   if (parser == NULL)
2041     return XML_STATUS_ERROR;
2042 
2043   if (len < 0) {
2044     parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2045     return XML_STATUS_ERROR;
2046   }
2047 
2048   switch (parser->m_parsingStatus.parsing) {
2049   case XML_SUSPENDED:
2050     parser->m_errorCode = XML_ERROR_SUSPENDED;
2051     return XML_STATUS_ERROR;
2052   case XML_FINISHED:
2053     parser->m_errorCode = XML_ERROR_FINISHED;
2054     return XML_STATUS_ERROR;
2055   case XML_INITIALIZED:
2056     /* Has someone called XML_GetBuffer successfully before? */
2057     if (! parser->m_bufferPtr) {
2058       parser->m_errorCode = XML_ERROR_NO_BUFFER;
2059       return XML_STATUS_ERROR;
2060     }
2061 
2062     if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2063       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2064       return XML_STATUS_ERROR;
2065     }
2066     /* fall through */
2067   default:
2068     parser->m_parsingStatus.parsing = XML_PARSING;
2069   }
2070 
2071   start = parser->m_bufferPtr;
2072   parser->m_positionPtr = start;
2073   parser->m_bufferEnd += len;
2074   parser->m_parseEndPtr = parser->m_bufferEnd;
2075   parser->m_parseEndByteIndex += len;
2076   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2077 
2078   parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2079                                       &parser->m_bufferPtr);
2080 
2081   if (parser->m_errorCode != XML_ERROR_NONE) {
2082     parser->m_eventEndPtr = parser->m_eventPtr;
2083     parser->m_processor = errorProcessor;
2084     return XML_STATUS_ERROR;
2085   } else {
2086     switch (parser->m_parsingStatus.parsing) {
2087     case XML_SUSPENDED:
2088       result = XML_STATUS_SUSPENDED;
2089       break;
2090     case XML_INITIALIZED:
2091     case XML_PARSING:
2092       if (isFinal) {
2093         parser->m_parsingStatus.parsing = XML_FINISHED;
2094         return result;
2095       }
2096     default:; /* should not happen */
2097     }
2098   }
2099 
2100   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2101                     parser->m_bufferPtr, &parser->m_position);
2102   parser->m_positionPtr = parser->m_bufferPtr;
2103   return result;
2104 }
2105 
2106 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2107 XML_GetBuffer(XML_Parser parser, int len) {
2108   if (parser == NULL)
2109     return NULL;
2110   if (len < 0) {
2111     parser->m_errorCode = XML_ERROR_NO_MEMORY;
2112     return NULL;
2113   }
2114   switch (parser->m_parsingStatus.parsing) {
2115   case XML_SUSPENDED:
2116     parser->m_errorCode = XML_ERROR_SUSPENDED;
2117     return NULL;
2118   case XML_FINISHED:
2119     parser->m_errorCode = XML_ERROR_FINISHED;
2120     return NULL;
2121   default:;
2122   }
2123 
2124   // whether or not the request succeeds, `len` seems to be the app's preferred
2125   // buffer fill size; remember it.
2126   parser->m_lastBufferRequestSize = len;
2127   if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2128       || parser->m_buffer == NULL) {
2129 #if XML_CONTEXT_BYTES > 0
2130     int keep;
2131 #endif /* XML_CONTEXT_BYTES > 0 */
2132     /* Do not invoke signed arithmetic overflow: */
2133     int neededSize = (int)((unsigned)len
2134                            + (unsigned)EXPAT_SAFE_PTR_DIFF(
2135                                parser->m_bufferEnd, parser->m_bufferPtr));
2136     if (neededSize < 0) {
2137       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2138       return NULL;
2139     }
2140 #if XML_CONTEXT_BYTES > 0
2141     keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2142     if (keep > XML_CONTEXT_BYTES)
2143       keep = XML_CONTEXT_BYTES;
2144     /* Detect and prevent integer overflow */
2145     if (keep > INT_MAX - neededSize) {
2146       parser->m_errorCode = XML_ERROR_NO_MEMORY;
2147       return NULL;
2148     }
2149     neededSize += keep;
2150 #endif /* XML_CONTEXT_BYTES > 0 */
2151     if (parser->m_buffer && parser->m_bufferPtr
2152         && neededSize
2153                <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2154 #if XML_CONTEXT_BYTES > 0
2155       if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2156         int offset
2157             = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2158               - keep;
2159         /* The buffer pointers cannot be NULL here; we have at least some bytes
2160          * in the buffer */
2161         memmove(parser->m_buffer, &parser->m_buffer[offset],
2162                 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2163         parser->m_bufferEnd -= offset;
2164         parser->m_bufferPtr -= offset;
2165       }
2166 #else
2167       memmove(parser->m_buffer, parser->m_bufferPtr,
2168               EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2169       parser->m_bufferEnd
2170           = parser->m_buffer
2171             + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2172       parser->m_bufferPtr = parser->m_buffer;
2173 #endif /* XML_CONTEXT_BYTES > 0 */
2174     } else {
2175       char *newBuf;
2176       int bufferSize
2177           = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2178       if (bufferSize == 0)
2179         bufferSize = INIT_BUFFER_SIZE;
2180       do {
2181         /* Do not invoke signed arithmetic overflow: */
2182         bufferSize = (int)(2U * (unsigned)bufferSize);
2183       } while (bufferSize < neededSize && bufferSize > 0);
2184       if (bufferSize <= 0) {
2185         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2186         return NULL;
2187       }
2188       newBuf = (char *)MALLOC(parser, bufferSize);
2189       if (newBuf == 0) {
2190         parser->m_errorCode = XML_ERROR_NO_MEMORY;
2191         return NULL;
2192       }
2193       parser->m_bufferLim = newBuf + bufferSize;
2194 #if XML_CONTEXT_BYTES > 0
2195       if (parser->m_bufferPtr) {
2196         memcpy(newBuf, &parser->m_bufferPtr[-keep],
2197                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2198                    + keep);
2199         FREE(parser, parser->m_buffer);
2200         parser->m_buffer = newBuf;
2201         parser->m_bufferEnd
2202             = parser->m_buffer
2203               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2204               + keep;
2205         parser->m_bufferPtr = parser->m_buffer + keep;
2206       } else {
2207         /* This must be a brand new buffer with no data in it yet */
2208         parser->m_bufferEnd = newBuf;
2209         parser->m_bufferPtr = parser->m_buffer = newBuf;
2210       }
2211 #else
2212       if (parser->m_bufferPtr) {
2213         memcpy(newBuf, parser->m_bufferPtr,
2214                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2215         FREE(parser, parser->m_buffer);
2216         parser->m_bufferEnd
2217             = newBuf
2218               + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2219       } else {
2220         /* This must be a brand new buffer with no data in it yet */
2221         parser->m_bufferEnd = newBuf;
2222       }
2223       parser->m_bufferPtr = parser->m_buffer = newBuf;
2224 #endif /* XML_CONTEXT_BYTES > 0 */
2225     }
2226     parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2227     parser->m_positionPtr = NULL;
2228   }
2229   return parser->m_bufferEnd;
2230 }
2231 
2232 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2233 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2234   if (parser == NULL)
2235     return XML_STATUS_ERROR;
2236   switch (parser->m_parsingStatus.parsing) {
2237   case XML_SUSPENDED:
2238     if (resumable) {
2239       parser->m_errorCode = XML_ERROR_SUSPENDED;
2240       return XML_STATUS_ERROR;
2241     }
2242     parser->m_parsingStatus.parsing = XML_FINISHED;
2243     break;
2244   case XML_FINISHED:
2245     parser->m_errorCode = XML_ERROR_FINISHED;
2246     return XML_STATUS_ERROR;
2247   default:
2248     if (resumable) {
2249 #ifdef XML_DTD
2250       if (parser->m_isParamEntity) {
2251         parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2252         return XML_STATUS_ERROR;
2253       }
2254 #endif
2255       parser->m_parsingStatus.parsing = XML_SUSPENDED;
2256     } else
2257       parser->m_parsingStatus.parsing = XML_FINISHED;
2258   }
2259   return XML_STATUS_OK;
2260 }
2261 
2262 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2263 XML_ResumeParser(XML_Parser parser) {
2264   enum XML_Status result = XML_STATUS_OK;
2265 
2266   if (parser == NULL)
2267     return XML_STATUS_ERROR;
2268   if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2269     parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2270     return XML_STATUS_ERROR;
2271   }
2272   parser->m_parsingStatus.parsing = XML_PARSING;
2273 
2274   parser->m_errorCode = callProcessor(
2275       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2276 
2277   if (parser->m_errorCode != XML_ERROR_NONE) {
2278     parser->m_eventEndPtr = parser->m_eventPtr;
2279     parser->m_processor = errorProcessor;
2280     return XML_STATUS_ERROR;
2281   } else {
2282     switch (parser->m_parsingStatus.parsing) {
2283     case XML_SUSPENDED:
2284       result = XML_STATUS_SUSPENDED;
2285       break;
2286     case XML_INITIALIZED:
2287     case XML_PARSING:
2288       if (parser->m_parsingStatus.finalBuffer) {
2289         parser->m_parsingStatus.parsing = XML_FINISHED;
2290         return result;
2291       }
2292     default:;
2293     }
2294   }
2295 
2296   XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2297                     parser->m_bufferPtr, &parser->m_position);
2298   parser->m_positionPtr = parser->m_bufferPtr;
2299   return result;
2300 }
2301 
2302 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2303 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2304   if (parser == NULL)
2305     return;
2306   assert(status != NULL);
2307   *status = parser->m_parsingStatus;
2308 }
2309 
2310 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2311 XML_GetErrorCode(XML_Parser parser) {
2312   if (parser == NULL)
2313     return XML_ERROR_INVALID_ARGUMENT;
2314   return parser->m_errorCode;
2315 }
2316 
2317 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2318 XML_GetCurrentByteIndex(XML_Parser parser) {
2319   if (parser == NULL)
2320     return -1;
2321   if (parser->m_eventPtr)
2322     return (XML_Index)(parser->m_parseEndByteIndex
2323                        - (parser->m_parseEndPtr - parser->m_eventPtr));
2324   return -1;
2325 }
2326 
2327 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2328 XML_GetCurrentByteCount(XML_Parser parser) {
2329   if (parser == NULL)
2330     return 0;
2331   if (parser->m_eventEndPtr && parser->m_eventPtr)
2332     return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2333   return 0;
2334 }
2335 
2336 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2337 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2338 #if XML_CONTEXT_BYTES > 0
2339   if (parser == NULL)
2340     return NULL;
2341   if (parser->m_eventPtr && parser->m_buffer) {
2342     if (offset != NULL)
2343       *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2344     if (size != NULL)
2345       *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2346     return parser->m_buffer;
2347   }
2348 #else
2349   (void)parser;
2350   (void)offset;
2351   (void)size;
2352 #endif /* XML_CONTEXT_BYTES > 0 */
2353   return (const char *)0;
2354 }
2355 
2356 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2357 XML_GetCurrentLineNumber(XML_Parser parser) {
2358   if (parser == NULL)
2359     return 0;
2360   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2361     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2362                       parser->m_eventPtr, &parser->m_position);
2363     parser->m_positionPtr = parser->m_eventPtr;
2364   }
2365   return parser->m_position.lineNumber + 1;
2366 }
2367 
2368 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2369 XML_GetCurrentColumnNumber(XML_Parser parser) {
2370   if (parser == NULL)
2371     return 0;
2372   if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2373     XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2374                       parser->m_eventPtr, &parser->m_position);
2375     parser->m_positionPtr = parser->m_eventPtr;
2376   }
2377   return parser->m_position.columnNumber;
2378 }
2379 
2380 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2381 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2382   if (parser != NULL)
2383     FREE(parser, model);
2384 }
2385 
2386 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2387 XML_MemMalloc(XML_Parser parser, size_t size) {
2388   if (parser == NULL)
2389     return NULL;
2390   return MALLOC(parser, size);
2391 }
2392 
2393 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2394 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2395   if (parser == NULL)
2396     return NULL;
2397   return REALLOC(parser, ptr, size);
2398 }
2399 
2400 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2401 XML_MemFree(XML_Parser parser, void *ptr) {
2402   if (parser != NULL)
2403     FREE(parser, ptr);
2404 }
2405 
2406 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2407 XML_DefaultCurrent(XML_Parser parser) {
2408   if (parser == NULL)
2409     return;
2410   if (parser->m_defaultHandler) {
2411     if (parser->m_openInternalEntities)
2412       reportDefault(parser, parser->m_internalEncoding,
2413                     parser->m_openInternalEntities->internalEventPtr,
2414                     parser->m_openInternalEntities->internalEventEndPtr);
2415     else
2416       reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2417                     parser->m_eventEndPtr);
2418   }
2419 }
2420 
2421 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2422 XML_ErrorString(enum XML_Error code) {
2423   switch (code) {
2424   case XML_ERROR_NONE:
2425     return NULL;
2426   case XML_ERROR_NO_MEMORY:
2427     return XML_L("out of memory");
2428   case XML_ERROR_SYNTAX:
2429     return XML_L("syntax error");
2430   case XML_ERROR_NO_ELEMENTS:
2431     return XML_L("no element found");
2432   case XML_ERROR_INVALID_TOKEN:
2433     return XML_L("not well-formed (invalid token)");
2434   case XML_ERROR_UNCLOSED_TOKEN:
2435     return XML_L("unclosed token");
2436   case XML_ERROR_PARTIAL_CHAR:
2437     return XML_L("partial character");
2438   case XML_ERROR_TAG_MISMATCH:
2439     return XML_L("mismatched tag");
2440   case XML_ERROR_DUPLICATE_ATTRIBUTE:
2441     return XML_L("duplicate attribute");
2442   case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2443     return XML_L("junk after document element");
2444   case XML_ERROR_PARAM_ENTITY_REF:
2445     return XML_L("illegal parameter entity reference");
2446   case XML_ERROR_UNDEFINED_ENTITY:
2447     return XML_L("undefined entity");
2448   case XML_ERROR_RECURSIVE_ENTITY_REF:
2449     return XML_L("recursive entity reference");
2450   case XML_ERROR_ASYNC_ENTITY:
2451     return XML_L("asynchronous entity");
2452   case XML_ERROR_BAD_CHAR_REF:
2453     return XML_L("reference to invalid character number");
2454   case XML_ERROR_BINARY_ENTITY_REF:
2455     return XML_L("reference to binary entity");
2456   case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2457     return XML_L("reference to external entity in attribute");
2458   case XML_ERROR_MISPLACED_XML_PI:
2459     return XML_L("XML or text declaration not at start of entity");
2460   case XML_ERROR_UNKNOWN_ENCODING:
2461     return XML_L("unknown encoding");
2462   case XML_ERROR_INCORRECT_ENCODING:
2463     return XML_L("encoding specified in XML declaration is incorrect");
2464   case XML_ERROR_UNCLOSED_CDATA_SECTION:
2465     return XML_L("unclosed CDATA section");
2466   case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2467     return XML_L("error in processing external entity reference");
2468   case XML_ERROR_NOT_STANDALONE:
2469     return XML_L("document is not standalone");
2470   case XML_ERROR_UNEXPECTED_STATE:
2471     return XML_L("unexpected parser state - please send a bug report");
2472   case XML_ERROR_ENTITY_DECLARED_IN_PE:
2473     return XML_L("entity declared in parameter entity");
2474   case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2475     return XML_L("requested feature requires XML_DTD support in Expat");
2476   case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2477     return XML_L("cannot change setting once parsing has begun");
2478   /* Added in 1.95.7. */
2479   case XML_ERROR_UNBOUND_PREFIX:
2480     return XML_L("unbound prefix");
2481   /* Added in 1.95.8. */
2482   case XML_ERROR_UNDECLARING_PREFIX:
2483     return XML_L("must not undeclare prefix");
2484   case XML_ERROR_INCOMPLETE_PE:
2485     return XML_L("incomplete markup in parameter entity");
2486   case XML_ERROR_XML_DECL:
2487     return XML_L("XML declaration not well-formed");
2488   case XML_ERROR_TEXT_DECL:
2489     return XML_L("text declaration not well-formed");
2490   case XML_ERROR_PUBLICID:
2491     return XML_L("illegal character(s) in public id");
2492   case XML_ERROR_SUSPENDED:
2493     return XML_L("parser suspended");
2494   case XML_ERROR_NOT_SUSPENDED:
2495     return XML_L("parser not suspended");
2496   case XML_ERROR_ABORTED:
2497     return XML_L("parsing aborted");
2498   case XML_ERROR_FINISHED:
2499     return XML_L("parsing finished");
2500   case XML_ERROR_SUSPEND_PE:
2501     return XML_L("cannot suspend in external parameter entity");
2502   /* Added in 2.0.0. */
2503   case XML_ERROR_RESERVED_PREFIX_XML:
2504     return XML_L(
2505         "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2506   case XML_ERROR_RESERVED_PREFIX_XMLNS:
2507     return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2508   case XML_ERROR_RESERVED_NAMESPACE_URI:
2509     return XML_L(
2510         "prefix must not be bound to one of the reserved namespace names");
2511   /* Added in 2.2.5. */
2512   case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2513     return XML_L("invalid argument");
2514     /* Added in 2.3.0. */
2515   case XML_ERROR_NO_BUFFER:
2516     return XML_L(
2517         "a successful prior call to function XML_GetBuffer is required");
2518   /* Added in 2.4.0. */
2519   case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2520     return XML_L(
2521         "limit on input amplification factor (from DTD and entities) breached");
2522   }
2523   return NULL;
2524 }
2525 
2526 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2527 XML_ExpatVersion(void) {
2528   /* V1 is used to string-ize the version number. However, it would
2529      string-ize the actual version macro *names* unless we get them
2530      substituted before being passed to V1. CPP is defined to expand
2531      a macro, then rescan for more expansions. Thus, we use V2 to expand
2532      the version macros, then CPP will expand the resulting V1() macro
2533      with the correct numerals. */
2534   /* ### I'm assuming cpp is portable in this respect... */
2535 
2536 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2537 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2538 
2539   return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2540 
2541 #undef V1
2542 #undef V2
2543 }
2544 
2545 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2546 XML_ExpatVersionInfo(void) {
2547   XML_Expat_Version version;
2548 
2549   version.major = XML_MAJOR_VERSION;
2550   version.minor = XML_MINOR_VERSION;
2551   version.micro = XML_MICRO_VERSION;
2552 
2553   return version;
2554 }
2555 
2556 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2557 XML_GetFeatureList(void) {
2558   static const XML_Feature features[] = {
2559       {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2560        sizeof(XML_Char)},
2561       {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2562        sizeof(XML_LChar)},
2563 #ifdef XML_UNICODE
2564       {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2565 #endif
2566 #ifdef XML_UNICODE_WCHAR_T
2567       {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2568 #endif
2569 #ifdef XML_DTD
2570       {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2571 #endif
2572 #if XML_CONTEXT_BYTES > 0
2573       {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2574        XML_CONTEXT_BYTES},
2575 #endif
2576 #ifdef XML_MIN_SIZE
2577       {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2578 #endif
2579 #ifdef XML_NS
2580       {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2581 #endif
2582 #ifdef XML_LARGE_SIZE
2583       {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2584 #endif
2585 #ifdef XML_ATTR_INFO
2586       {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2587 #endif
2588 #if XML_GE == 1
2589       /* Added in Expat 2.4.0 for XML_DTD defined and
2590        * added in Expat 2.6.0 for XML_GE == 1. */
2591       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2592        XML_L("XML_BLAP_MAX_AMP"),
2593        (long int)
2594            EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2595       {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2596        XML_L("XML_BLAP_ACT_THRES"),
2597        EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2598       /* Added in Expat 2.6.0. */
2599       {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2600 #endif
2601       {XML_FEATURE_END, NULL, 0}};
2602 
2603   return features;
2604 }
2605 
2606 #if XML_GE == 1
2607 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2608 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2609     XML_Parser parser, float maximumAmplificationFactor) {
2610   if ((parser == NULL) || (parser->m_parentParser != NULL)
2611       || isnan(maximumAmplificationFactor)
2612       || (maximumAmplificationFactor < 1.0f)) {
2613     return XML_FALSE;
2614   }
2615   parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2616   return XML_TRUE;
2617 }
2618 
2619 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2620 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2621     XML_Parser parser, unsigned long long activationThresholdBytes) {
2622   if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2623     return XML_FALSE;
2624   }
2625   parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2626   return XML_TRUE;
2627 }
2628 #endif /* XML_GE == 1 */
2629 
2630 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2631 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2632   if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2633     parser->m_reparseDeferralEnabled = enabled;
2634     return XML_TRUE;
2635   }
2636   return XML_FALSE;
2637 }
2638 
2639 /* Initially tag->rawName always points into the parse buffer;
2640    for those TAG instances opened while the current parse buffer was
2641    processed, and not yet closed, we need to store tag->rawName in a more
2642    permanent location, since the parse buffer is about to be discarded.
2643 */
2644 static XML_Bool
storeRawNames(XML_Parser parser)2645 storeRawNames(XML_Parser parser) {
2646   TAG *tag = parser->m_tagStack;
2647   while (tag) {
2648     int bufSize;
2649     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2650     size_t rawNameLen;
2651     char *rawNameBuf = tag->buf + nameLen;
2652     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
2653        at the first entry that has already been copied; everything
2654        below it in the stack is already been accounted for in a
2655        previous call to this function.
2656     */
2657     if (tag->rawName == rawNameBuf)
2658       break;
2659     /* For reuse purposes we need to ensure that the
2660        size of tag->buf is a multiple of sizeof(XML_Char).
2661     */
2662     rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2663     /* Detect and prevent integer overflow. */
2664     if (rawNameLen > (size_t)INT_MAX - nameLen)
2665       return XML_FALSE;
2666     bufSize = nameLen + (int)rawNameLen;
2667     if (bufSize > tag->bufEnd - tag->buf) {
2668       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2669       if (temp == NULL)
2670         return XML_FALSE;
2671       /* if tag->name.str points to tag->buf (only when namespace
2672          processing is off) then we have to update it
2673       */
2674       if (tag->name.str == (XML_Char *)tag->buf)
2675         tag->name.str = (XML_Char *)temp;
2676       /* if tag->name.localPart is set (when namespace processing is on)
2677          then update it as well, since it will always point into tag->buf
2678       */
2679       if (tag->name.localPart)
2680         tag->name.localPart
2681             = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2682       tag->buf = temp;
2683       tag->bufEnd = temp + bufSize;
2684       rawNameBuf = temp + nameLen;
2685     }
2686     memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2687     tag->rawName = rawNameBuf;
2688     tag = tag->parent;
2689   }
2690   return XML_TRUE;
2691 }
2692 
2693 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2694 contentProcessor(XML_Parser parser, const char *start, const char *end,
2695                  const char **endPtr) {
2696   enum XML_Error result = doContent(
2697       parser, 0, parser->m_encoding, start, end, endPtr,
2698       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2699   if (result == XML_ERROR_NONE) {
2700     if (! storeRawNames(parser))
2701       return XML_ERROR_NO_MEMORY;
2702   }
2703   return result;
2704 }
2705 
2706 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2707 externalEntityInitProcessor(XML_Parser parser, const char *start,
2708                             const char *end, const char **endPtr) {
2709   enum XML_Error result = initializeEncoding(parser);
2710   if (result != XML_ERROR_NONE)
2711     return result;
2712   parser->m_processor = externalEntityInitProcessor2;
2713   return externalEntityInitProcessor2(parser, start, end, endPtr);
2714 }
2715 
2716 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2717 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2718                              const char *end, const char **endPtr) {
2719   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2720   int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2721   switch (tok) {
2722   case XML_TOK_BOM:
2723 #if XML_GE == 1
2724     if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2725                                   XML_ACCOUNT_DIRECT)) {
2726       accountingOnAbort(parser);
2727       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2728     }
2729 #endif /* XML_GE == 1 */
2730 
2731     /* If we are at the end of the buffer, this would cause the next stage,
2732        i.e. externalEntityInitProcessor3, to pass control directly to
2733        doContent (by detecting XML_TOK_NONE) without processing any xml text
2734        declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2735     */
2736     if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2737       *endPtr = next;
2738       return XML_ERROR_NONE;
2739     }
2740     start = next;
2741     break;
2742   case XML_TOK_PARTIAL:
2743     if (! parser->m_parsingStatus.finalBuffer) {
2744       *endPtr = start;
2745       return XML_ERROR_NONE;
2746     }
2747     parser->m_eventPtr = start;
2748     return XML_ERROR_UNCLOSED_TOKEN;
2749   case XML_TOK_PARTIAL_CHAR:
2750     if (! parser->m_parsingStatus.finalBuffer) {
2751       *endPtr = start;
2752       return XML_ERROR_NONE;
2753     }
2754     parser->m_eventPtr = start;
2755     return XML_ERROR_PARTIAL_CHAR;
2756   }
2757   parser->m_processor = externalEntityInitProcessor3;
2758   return externalEntityInitProcessor3(parser, start, end, endPtr);
2759 }
2760 
2761 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2762 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2763                              const char *end, const char **endPtr) {
2764   int tok;
2765   const char *next = start; /* XmlContentTok doesn't always set the last arg */
2766   parser->m_eventPtr = start;
2767   tok = XmlContentTok(parser->m_encoding, start, end, &next);
2768   /* Note: These bytes are accounted later in:
2769            - processXmlDecl
2770            - externalEntityContentProcessor
2771   */
2772   parser->m_eventEndPtr = next;
2773 
2774   switch (tok) {
2775   case XML_TOK_XML_DECL: {
2776     enum XML_Error result;
2777     result = processXmlDecl(parser, 1, start, next);
2778     if (result != XML_ERROR_NONE)
2779       return result;
2780     switch (parser->m_parsingStatus.parsing) {
2781     case XML_SUSPENDED:
2782       *endPtr = next;
2783       return XML_ERROR_NONE;
2784     case XML_FINISHED:
2785       return XML_ERROR_ABORTED;
2786     default:
2787       start = next;
2788     }
2789   } break;
2790   case XML_TOK_PARTIAL:
2791     if (! parser->m_parsingStatus.finalBuffer) {
2792       *endPtr = start;
2793       return XML_ERROR_NONE;
2794     }
2795     return XML_ERROR_UNCLOSED_TOKEN;
2796   case XML_TOK_PARTIAL_CHAR:
2797     if (! parser->m_parsingStatus.finalBuffer) {
2798       *endPtr = start;
2799       return XML_ERROR_NONE;
2800     }
2801     return XML_ERROR_PARTIAL_CHAR;
2802   }
2803   parser->m_processor = externalEntityContentProcessor;
2804   parser->m_tagLevel = 1;
2805   return externalEntityContentProcessor(parser, start, end, endPtr);
2806 }
2807 
2808 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2809 externalEntityContentProcessor(XML_Parser parser, const char *start,
2810                                const char *end, const char **endPtr) {
2811   enum XML_Error result
2812       = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2813                   (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2814                   XML_ACCOUNT_ENTITY_EXPANSION);
2815   if (result == XML_ERROR_NONE) {
2816     if (! storeRawNames(parser))
2817       return XML_ERROR_NO_MEMORY;
2818   }
2819   return result;
2820 }
2821 
2822 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2823 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2824           const char *s, const char *end, const char **nextPtr,
2825           XML_Bool haveMore, enum XML_Account account) {
2826   /* save one level of indirection */
2827   DTD *const dtd = parser->m_dtd;
2828 
2829   const char **eventPP;
2830   const char **eventEndPP;
2831   if (enc == parser->m_encoding) {
2832     eventPP = &parser->m_eventPtr;
2833     eventEndPP = &parser->m_eventEndPtr;
2834   } else {
2835     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2836     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2837   }
2838   *eventPP = s;
2839 
2840   for (;;) {
2841     const char *next = s; /* XmlContentTok doesn't always set the last arg */
2842     int tok = XmlContentTok(enc, s, end, &next);
2843 #if XML_GE == 1
2844     const char *accountAfter
2845         = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2846               ? (haveMore ? s /* i.e. 0 bytes */ : end)
2847               : next;
2848     if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2849                                   account)) {
2850       accountingOnAbort(parser);
2851       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2852     }
2853 #endif
2854     *eventEndPP = next;
2855     switch (tok) {
2856     case XML_TOK_TRAILING_CR:
2857       if (haveMore) {
2858         *nextPtr = s;
2859         return XML_ERROR_NONE;
2860       }
2861       *eventEndPP = end;
2862       if (parser->m_characterDataHandler) {
2863         XML_Char c = 0xA;
2864         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2865       } else if (parser->m_defaultHandler)
2866         reportDefault(parser, enc, s, end);
2867       /* We are at the end of the final buffer, should we check for
2868          XML_SUSPENDED, XML_FINISHED?
2869       */
2870       if (startTagLevel == 0)
2871         return XML_ERROR_NO_ELEMENTS;
2872       if (parser->m_tagLevel != startTagLevel)
2873         return XML_ERROR_ASYNC_ENTITY;
2874       *nextPtr = end;
2875       return XML_ERROR_NONE;
2876     case XML_TOK_NONE:
2877       if (haveMore) {
2878         *nextPtr = s;
2879         return XML_ERROR_NONE;
2880       }
2881       if (startTagLevel > 0) {
2882         if (parser->m_tagLevel != startTagLevel)
2883           return XML_ERROR_ASYNC_ENTITY;
2884         *nextPtr = s;
2885         return XML_ERROR_NONE;
2886       }
2887       return XML_ERROR_NO_ELEMENTS;
2888     case XML_TOK_INVALID:
2889       *eventPP = next;
2890       return XML_ERROR_INVALID_TOKEN;
2891     case XML_TOK_PARTIAL:
2892       if (haveMore) {
2893         *nextPtr = s;
2894         return XML_ERROR_NONE;
2895       }
2896       return XML_ERROR_UNCLOSED_TOKEN;
2897     case XML_TOK_PARTIAL_CHAR:
2898       if (haveMore) {
2899         *nextPtr = s;
2900         return XML_ERROR_NONE;
2901       }
2902       return XML_ERROR_PARTIAL_CHAR;
2903     case XML_TOK_ENTITY_REF: {
2904       const XML_Char *name;
2905       ENTITY *entity;
2906       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2907           enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2908       if (ch) {
2909 #if XML_GE == 1
2910         /* NOTE: We are replacing 4-6 characters original input for 1 character
2911          *       so there is no amplification and hence recording without
2912          *       protection. */
2913         accountingDiffTolerated(parser, tok, (char *)&ch,
2914                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2915                                 XML_ACCOUNT_ENTITY_EXPANSION);
2916 #endif /* XML_GE == 1 */
2917         if (parser->m_characterDataHandler)
2918           parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2919         else if (parser->m_defaultHandler)
2920           reportDefault(parser, enc, s, next);
2921         break;
2922       }
2923       name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2924                              next - enc->minBytesPerChar);
2925       if (! name)
2926         return XML_ERROR_NO_MEMORY;
2927       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2928       poolDiscard(&dtd->pool);
2929       /* First, determine if a check for an existing declaration is needed;
2930          if yes, check that the entity exists, and that it is internal,
2931          otherwise call the skipped entity or default handler.
2932       */
2933       if (! dtd->hasParamEntityRefs || dtd->standalone) {
2934         if (! entity)
2935           return XML_ERROR_UNDEFINED_ENTITY;
2936         else if (! entity->is_internal)
2937           return XML_ERROR_ENTITY_DECLARED_IN_PE;
2938       } else if (! entity) {
2939         if (parser->m_skippedEntityHandler)
2940           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2941         else if (parser->m_defaultHandler)
2942           reportDefault(parser, enc, s, next);
2943         break;
2944       }
2945       if (entity->open)
2946         return XML_ERROR_RECURSIVE_ENTITY_REF;
2947       if (entity->notation)
2948         return XML_ERROR_BINARY_ENTITY_REF;
2949       if (entity->textPtr) {
2950         enum XML_Error result;
2951         if (! parser->m_defaultExpandInternalEntities) {
2952           if (parser->m_skippedEntityHandler)
2953             parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2954                                            0);
2955           else if (parser->m_defaultHandler)
2956             reportDefault(parser, enc, s, next);
2957           break;
2958         }
2959         result = processInternalEntity(parser, entity, XML_FALSE);
2960         if (result != XML_ERROR_NONE)
2961           return result;
2962       } else if (parser->m_externalEntityRefHandler) {
2963         const XML_Char *context;
2964         entity->open = XML_TRUE;
2965         context = getContext(parser);
2966         entity->open = XML_FALSE;
2967         if (! context)
2968           return XML_ERROR_NO_MEMORY;
2969         if (! parser->m_externalEntityRefHandler(
2970                 parser->m_externalEntityRefHandlerArg, context, entity->base,
2971                 entity->systemId, entity->publicId))
2972           return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2973         poolDiscard(&parser->m_tempPool);
2974       } else if (parser->m_defaultHandler)
2975         reportDefault(parser, enc, s, next);
2976       break;
2977     }
2978     case XML_TOK_START_TAG_NO_ATTS:
2979       /* fall through */
2980     case XML_TOK_START_TAG_WITH_ATTS: {
2981       TAG *tag;
2982       enum XML_Error result;
2983       XML_Char *toPtr;
2984       if (parser->m_freeTagList) {
2985         tag = parser->m_freeTagList;
2986         parser->m_freeTagList = parser->m_freeTagList->parent;
2987       } else {
2988         tag = (TAG *)MALLOC(parser, sizeof(TAG));
2989         if (! tag)
2990           return XML_ERROR_NO_MEMORY;
2991         tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2992         if (! tag->buf) {
2993           FREE(parser, tag);
2994           return XML_ERROR_NO_MEMORY;
2995         }
2996         tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2997       }
2998       tag->bindings = NULL;
2999       tag->parent = parser->m_tagStack;
3000       parser->m_tagStack = tag;
3001       tag->name.localPart = NULL;
3002       tag->name.prefix = NULL;
3003       tag->rawName = s + enc->minBytesPerChar;
3004       tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3005       ++parser->m_tagLevel;
3006       {
3007         const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3008         const char *fromPtr = tag->rawName;
3009         toPtr = (XML_Char *)tag->buf;
3010         for (;;) {
3011           int bufSize;
3012           int convLen;
3013           const enum XML_Convert_Result convert_res
3014               = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3015                            (ICHAR *)tag->bufEnd - 1);
3016           convLen = (int)(toPtr - (XML_Char *)tag->buf);
3017           if ((fromPtr >= rawNameEnd)
3018               || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3019             tag->name.strLen = convLen;
3020             break;
3021           }
3022           bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3023           {
3024             char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3025             if (temp == NULL)
3026               return XML_ERROR_NO_MEMORY;
3027             tag->buf = temp;
3028             tag->bufEnd = temp + bufSize;
3029             toPtr = (XML_Char *)temp + convLen;
3030           }
3031         }
3032       }
3033       tag->name.str = (XML_Char *)tag->buf;
3034       *toPtr = XML_T('\0');
3035       result
3036           = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3037       if (result)
3038         return result;
3039       if (parser->m_startElementHandler)
3040         parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3041                                       (const XML_Char **)parser->m_atts);
3042       else if (parser->m_defaultHandler)
3043         reportDefault(parser, enc, s, next);
3044       poolClear(&parser->m_tempPool);
3045       break;
3046     }
3047     case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3048       /* fall through */
3049     case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3050       const char *rawName = s + enc->minBytesPerChar;
3051       enum XML_Error result;
3052       BINDING *bindings = NULL;
3053       XML_Bool noElmHandlers = XML_TRUE;
3054       TAG_NAME name;
3055       name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3056                                  rawName + XmlNameLength(enc, rawName));
3057       if (! name.str)
3058         return XML_ERROR_NO_MEMORY;
3059       poolFinish(&parser->m_tempPool);
3060       result = storeAtts(parser, enc, s, &name, &bindings,
3061                          XML_ACCOUNT_NONE /* token spans whole start tag */);
3062       if (result != XML_ERROR_NONE) {
3063         freeBindings(parser, bindings);
3064         return result;
3065       }
3066       poolFinish(&parser->m_tempPool);
3067       if (parser->m_startElementHandler) {
3068         parser->m_startElementHandler(parser->m_handlerArg, name.str,
3069                                       (const XML_Char **)parser->m_atts);
3070         noElmHandlers = XML_FALSE;
3071       }
3072       if (parser->m_endElementHandler) {
3073         if (parser->m_startElementHandler)
3074           *eventPP = *eventEndPP;
3075         parser->m_endElementHandler(parser->m_handlerArg, name.str);
3076         noElmHandlers = XML_FALSE;
3077       }
3078       if (noElmHandlers && parser->m_defaultHandler)
3079         reportDefault(parser, enc, s, next);
3080       poolClear(&parser->m_tempPool);
3081       freeBindings(parser, bindings);
3082     }
3083       if ((parser->m_tagLevel == 0)
3084           && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3085         if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3086           parser->m_processor = epilogProcessor;
3087         else
3088           return epilogProcessor(parser, next, end, nextPtr);
3089       }
3090       break;
3091     case XML_TOK_END_TAG:
3092       if (parser->m_tagLevel == startTagLevel)
3093         return XML_ERROR_ASYNC_ENTITY;
3094       else {
3095         int len;
3096         const char *rawName;
3097         TAG *tag = parser->m_tagStack;
3098         rawName = s + enc->minBytesPerChar * 2;
3099         len = XmlNameLength(enc, rawName);
3100         if (len != tag->rawNameLength
3101             || memcmp(tag->rawName, rawName, len) != 0) {
3102           *eventPP = rawName;
3103           return XML_ERROR_TAG_MISMATCH;
3104         }
3105         parser->m_tagStack = tag->parent;
3106         tag->parent = parser->m_freeTagList;
3107         parser->m_freeTagList = tag;
3108         --parser->m_tagLevel;
3109         if (parser->m_endElementHandler) {
3110           const XML_Char *localPart;
3111           const XML_Char *prefix;
3112           XML_Char *uri;
3113           localPart = tag->name.localPart;
3114           if (parser->m_ns && localPart) {
3115             /* localPart and prefix may have been overwritten in
3116                tag->name.str, since this points to the binding->uri
3117                buffer which gets reused; so we have to add them again
3118             */
3119             uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3120             /* don't need to check for space - already done in storeAtts() */
3121             while (*localPart)
3122               *uri++ = *localPart++;
3123             prefix = tag->name.prefix;
3124             if (parser->m_ns_triplets && prefix) {
3125               *uri++ = parser->m_namespaceSeparator;
3126               while (*prefix)
3127                 *uri++ = *prefix++;
3128             }
3129             *uri = XML_T('\0');
3130           }
3131           parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3132         } else if (parser->m_defaultHandler)
3133           reportDefault(parser, enc, s, next);
3134         while (tag->bindings) {
3135           BINDING *b = tag->bindings;
3136           if (parser->m_endNamespaceDeclHandler)
3137             parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3138                                               b->prefix->name);
3139           tag->bindings = tag->bindings->nextTagBinding;
3140           b->nextTagBinding = parser->m_freeBindingList;
3141           parser->m_freeBindingList = b;
3142           b->prefix->binding = b->prevPrefixBinding;
3143         }
3144         if ((parser->m_tagLevel == 0)
3145             && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3146           if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3147             parser->m_processor = epilogProcessor;
3148           else
3149             return epilogProcessor(parser, next, end, nextPtr);
3150         }
3151       }
3152       break;
3153     case XML_TOK_CHAR_REF: {
3154       int n = XmlCharRefNumber(enc, s);
3155       if (n < 0)
3156         return XML_ERROR_BAD_CHAR_REF;
3157       if (parser->m_characterDataHandler) {
3158         XML_Char buf[XML_ENCODE_MAX];
3159         parser->m_characterDataHandler(parser->m_handlerArg, buf,
3160                                        XmlEncode(n, (ICHAR *)buf));
3161       } else if (parser->m_defaultHandler)
3162         reportDefault(parser, enc, s, next);
3163     } break;
3164     case XML_TOK_XML_DECL:
3165       return XML_ERROR_MISPLACED_XML_PI;
3166     case XML_TOK_DATA_NEWLINE:
3167       if (parser->m_characterDataHandler) {
3168         XML_Char c = 0xA;
3169         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3170       } else if (parser->m_defaultHandler)
3171         reportDefault(parser, enc, s, next);
3172       break;
3173     case XML_TOK_CDATA_SECT_OPEN: {
3174       enum XML_Error result;
3175       if (parser->m_startCdataSectionHandler)
3176         parser->m_startCdataSectionHandler(parser->m_handlerArg);
3177       /* BEGIN disabled code */
3178       /* Suppose you doing a transformation on a document that involves
3179          changing only the character data.  You set up a defaultHandler
3180          and a characterDataHandler.  The defaultHandler simply copies
3181          characters through.  The characterDataHandler does the
3182          transformation and writes the characters out escaping them as
3183          necessary.  This case will fail to work if we leave out the
3184          following two lines (because & and < inside CDATA sections will
3185          be incorrectly escaped).
3186 
3187          However, now we have a start/endCdataSectionHandler, so it seems
3188          easier to let the user deal with this.
3189       */
3190       else if ((0) && parser->m_characterDataHandler)
3191         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3192                                        0);
3193       /* END disabled code */
3194       else if (parser->m_defaultHandler)
3195         reportDefault(parser, enc, s, next);
3196       result
3197           = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3198       if (result != XML_ERROR_NONE)
3199         return result;
3200       else if (! next) {
3201         parser->m_processor = cdataSectionProcessor;
3202         return result;
3203       }
3204     } break;
3205     case XML_TOK_TRAILING_RSQB:
3206       if (haveMore) {
3207         *nextPtr = s;
3208         return XML_ERROR_NONE;
3209       }
3210       if (parser->m_characterDataHandler) {
3211         if (MUST_CONVERT(enc, s)) {
3212           ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3213           XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3214           parser->m_characterDataHandler(
3215               parser->m_handlerArg, parser->m_dataBuf,
3216               (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3217         } else
3218           parser->m_characterDataHandler(
3219               parser->m_handlerArg, (const XML_Char *)s,
3220               (int)((const XML_Char *)end - (const XML_Char *)s));
3221       } else if (parser->m_defaultHandler)
3222         reportDefault(parser, enc, s, end);
3223       /* We are at the end of the final buffer, should we check for
3224          XML_SUSPENDED, XML_FINISHED?
3225       */
3226       if (startTagLevel == 0) {
3227         *eventPP = end;
3228         return XML_ERROR_NO_ELEMENTS;
3229       }
3230       if (parser->m_tagLevel != startTagLevel) {
3231         *eventPP = end;
3232         return XML_ERROR_ASYNC_ENTITY;
3233       }
3234       *nextPtr = end;
3235       return XML_ERROR_NONE;
3236     case XML_TOK_DATA_CHARS: {
3237       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3238       if (charDataHandler) {
3239         if (MUST_CONVERT(enc, s)) {
3240           for (;;) {
3241             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3242             const enum XML_Convert_Result convert_res = XmlConvert(
3243                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3244             *eventEndPP = s;
3245             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3246                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3247             if ((convert_res == XML_CONVERT_COMPLETED)
3248                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3249               break;
3250             *eventPP = s;
3251           }
3252         } else
3253           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3254                           (int)((const XML_Char *)next - (const XML_Char *)s));
3255       } else if (parser->m_defaultHandler)
3256         reportDefault(parser, enc, s, next);
3257     } break;
3258     case XML_TOK_PI:
3259       if (! reportProcessingInstruction(parser, enc, s, next))
3260         return XML_ERROR_NO_MEMORY;
3261       break;
3262     case XML_TOK_COMMENT:
3263       if (! reportComment(parser, enc, s, next))
3264         return XML_ERROR_NO_MEMORY;
3265       break;
3266     default:
3267       /* All of the tokens produced by XmlContentTok() have their own
3268        * explicit cases, so this default is not strictly necessary.
3269        * However it is a useful safety net, so we retain the code and
3270        * simply exclude it from the coverage tests.
3271        *
3272        * LCOV_EXCL_START
3273        */
3274       if (parser->m_defaultHandler)
3275         reportDefault(parser, enc, s, next);
3276       break;
3277       /* LCOV_EXCL_STOP */
3278     }
3279     *eventPP = s = next;
3280     switch (parser->m_parsingStatus.parsing) {
3281     case XML_SUSPENDED:
3282       *nextPtr = next;
3283       return XML_ERROR_NONE;
3284     case XML_FINISHED:
3285       return XML_ERROR_ABORTED;
3286     default:;
3287     }
3288   }
3289   /* not reached */
3290 }
3291 
3292 /* This function does not call free() on the allocated memory, merely
3293  * moving it to the parser's m_freeBindingList where it can be freed or
3294  * reused as appropriate.
3295  */
3296 static void
freeBindings(XML_Parser parser,BINDING * bindings)3297 freeBindings(XML_Parser parser, BINDING *bindings) {
3298   while (bindings) {
3299     BINDING *b = bindings;
3300 
3301     /* m_startNamespaceDeclHandler will have been called for this
3302      * binding in addBindings(), so call the end handler now.
3303      */
3304     if (parser->m_endNamespaceDeclHandler)
3305       parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3306 
3307     bindings = bindings->nextTagBinding;
3308     b->nextTagBinding = parser->m_freeBindingList;
3309     parser->m_freeBindingList = b;
3310     b->prefix->binding = b->prevPrefixBinding;
3311   }
3312 }
3313 
3314 /* Precondition: all arguments must be non-NULL;
3315    Purpose:
3316    - normalize attributes
3317    - check attributes for well-formedness
3318    - generate namespace aware attribute names (URI, prefix)
3319    - build list of attributes for startElementHandler
3320    - default attributes
3321    - process namespace declarations (check and report them)
3322    - generate namespace aware element name (URI, prefix)
3323 */
3324 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3325 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3326           TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3327           enum XML_Account account) {
3328   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3329   ELEMENT_TYPE *elementType;
3330   int nDefaultAtts;
3331   const XML_Char **appAtts; /* the attribute list for the application */
3332   int attIndex = 0;
3333   int prefixLen;
3334   int i;
3335   int n;
3336   XML_Char *uri;
3337   int nPrefixes = 0;
3338   BINDING *binding;
3339   const XML_Char *localPart;
3340 
3341   /* lookup the element type name */
3342   elementType
3343       = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3344   if (! elementType) {
3345     const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3346     if (! name)
3347       return XML_ERROR_NO_MEMORY;
3348     elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3349                                          sizeof(ELEMENT_TYPE));
3350     if (! elementType)
3351       return XML_ERROR_NO_MEMORY;
3352     if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3353       return XML_ERROR_NO_MEMORY;
3354   }
3355   nDefaultAtts = elementType->nDefaultAtts;
3356 
3357   /* get the attributes from the tokenizer */
3358   n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3359 
3360   /* Detect and prevent integer overflow */
3361   if (n > INT_MAX - nDefaultAtts) {
3362     return XML_ERROR_NO_MEMORY;
3363   }
3364 
3365   if (n + nDefaultAtts > parser->m_attsSize) {
3366     int oldAttsSize = parser->m_attsSize;
3367     ATTRIBUTE *temp;
3368 #ifdef XML_ATTR_INFO
3369     XML_AttrInfo *temp2;
3370 #endif
3371 
3372     /* Detect and prevent integer overflow */
3373     if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3374         || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3375       return XML_ERROR_NO_MEMORY;
3376     }
3377 
3378     parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3379 
3380     /* Detect and prevent integer overflow.
3381      * The preprocessor guard addresses the "always false" warning
3382      * from -Wtype-limits on platforms where
3383      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3384 #if UINT_MAX >= SIZE_MAX
3385     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3386       parser->m_attsSize = oldAttsSize;
3387       return XML_ERROR_NO_MEMORY;
3388     }
3389 #endif
3390 
3391     temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3392                                 parser->m_attsSize * sizeof(ATTRIBUTE));
3393     if (temp == NULL) {
3394       parser->m_attsSize = oldAttsSize;
3395       return XML_ERROR_NO_MEMORY;
3396     }
3397     parser->m_atts = temp;
3398 #ifdef XML_ATTR_INFO
3399     /* Detect and prevent integer overflow.
3400      * The preprocessor guard addresses the "always false" warning
3401      * from -Wtype-limits on platforms where
3402      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3403 #  if UINT_MAX >= SIZE_MAX
3404     if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3405       parser->m_attsSize = oldAttsSize;
3406       return XML_ERROR_NO_MEMORY;
3407     }
3408 #  endif
3409 
3410     temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3411                                     parser->m_attsSize * sizeof(XML_AttrInfo));
3412     if (temp2 == NULL) {
3413       parser->m_attsSize = oldAttsSize;
3414       return XML_ERROR_NO_MEMORY;
3415     }
3416     parser->m_attInfo = temp2;
3417 #endif
3418     if (n > oldAttsSize)
3419       XmlGetAttributes(enc, attStr, n, parser->m_atts);
3420   }
3421 
3422   appAtts = (const XML_Char **)parser->m_atts;
3423   for (i = 0; i < n; i++) {
3424     ATTRIBUTE *currAtt = &parser->m_atts[i];
3425 #ifdef XML_ATTR_INFO
3426     XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3427 #endif
3428     /* add the name and value to the attribute list */
3429     ATTRIBUTE_ID *attId
3430         = getAttributeId(parser, enc, currAtt->name,
3431                          currAtt->name + XmlNameLength(enc, currAtt->name));
3432     if (! attId)
3433       return XML_ERROR_NO_MEMORY;
3434 #ifdef XML_ATTR_INFO
3435     currAttInfo->nameStart
3436         = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3437     currAttInfo->nameEnd
3438         = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3439     currAttInfo->valueStart = parser->m_parseEndByteIndex
3440                               - (parser->m_parseEndPtr - currAtt->valuePtr);
3441     currAttInfo->valueEnd = parser->m_parseEndByteIndex
3442                             - (parser->m_parseEndPtr - currAtt->valueEnd);
3443 #endif
3444     /* Detect duplicate attributes by their QNames. This does not work when
3445        namespace processing is turned on and different prefixes for the same
3446        namespace are used. For this case we have a check further down.
3447     */
3448     if ((attId->name)[-1]) {
3449       if (enc == parser->m_encoding)
3450         parser->m_eventPtr = parser->m_atts[i].name;
3451       return XML_ERROR_DUPLICATE_ATTRIBUTE;
3452     }
3453     (attId->name)[-1] = 1;
3454     appAtts[attIndex++] = attId->name;
3455     if (! parser->m_atts[i].normalized) {
3456       enum XML_Error result;
3457       XML_Bool isCdata = XML_TRUE;
3458 
3459       /* figure out whether declared as other than CDATA */
3460       if (attId->maybeTokenized) {
3461         int j;
3462         for (j = 0; j < nDefaultAtts; j++) {
3463           if (attId == elementType->defaultAtts[j].id) {
3464             isCdata = elementType->defaultAtts[j].isCdata;
3465             break;
3466           }
3467         }
3468       }
3469 
3470       /* normalize the attribute value */
3471       result = storeAttributeValue(
3472           parser, enc, isCdata, parser->m_atts[i].valuePtr,
3473           parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3474       if (result)
3475         return result;
3476       appAtts[attIndex] = poolStart(&parser->m_tempPool);
3477       poolFinish(&parser->m_tempPool);
3478     } else {
3479       /* the value did not need normalizing */
3480       appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3481                                           parser->m_atts[i].valuePtr,
3482                                           parser->m_atts[i].valueEnd);
3483       if (appAtts[attIndex] == 0)
3484         return XML_ERROR_NO_MEMORY;
3485       poolFinish(&parser->m_tempPool);
3486     }
3487     /* handle prefixed attribute names */
3488     if (attId->prefix) {
3489       if (attId->xmlns) {
3490         /* deal with namespace declarations here */
3491         enum XML_Error result = addBinding(parser, attId->prefix, attId,
3492                                            appAtts[attIndex], bindingsPtr);
3493         if (result)
3494           return result;
3495         --attIndex;
3496       } else {
3497         /* deal with other prefixed names later */
3498         attIndex++;
3499         nPrefixes++;
3500         (attId->name)[-1] = 2;
3501       }
3502     } else
3503       attIndex++;
3504   }
3505 
3506   /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3507   parser->m_nSpecifiedAtts = attIndex;
3508   if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3509     for (i = 0; i < attIndex; i += 2)
3510       if (appAtts[i] == elementType->idAtt->name) {
3511         parser->m_idAttIndex = i;
3512         break;
3513       }
3514   } else
3515     parser->m_idAttIndex = -1;
3516 
3517   /* do attribute defaulting */
3518   for (i = 0; i < nDefaultAtts; i++) {
3519     const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3520     if (! (da->id->name)[-1] && da->value) {
3521       if (da->id->prefix) {
3522         if (da->id->xmlns) {
3523           enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3524                                              da->value, bindingsPtr);
3525           if (result)
3526             return result;
3527         } else {
3528           (da->id->name)[-1] = 2;
3529           nPrefixes++;
3530           appAtts[attIndex++] = da->id->name;
3531           appAtts[attIndex++] = da->value;
3532         }
3533       } else {
3534         (da->id->name)[-1] = 1;
3535         appAtts[attIndex++] = da->id->name;
3536         appAtts[attIndex++] = da->value;
3537       }
3538     }
3539   }
3540   appAtts[attIndex] = 0;
3541 
3542   /* expand prefixed attribute names, check for duplicates,
3543      and clear flags that say whether attributes were specified */
3544   i = 0;
3545   if (nPrefixes) {
3546     int j; /* hash table index */
3547     unsigned long version = parser->m_nsAttsVersion;
3548 
3549     /* Detect and prevent invalid shift */
3550     if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3551       return XML_ERROR_NO_MEMORY;
3552     }
3553 
3554     unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3555     unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3556     /* size of hash table must be at least 2 * (# of prefixed attributes) */
3557     if ((nPrefixes << 1)
3558         >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3559       NS_ATT *temp;
3560       /* hash table size must also be a power of 2 and >= 8 */
3561       while (nPrefixes >> parser->m_nsAttsPower++)
3562         ;
3563       if (parser->m_nsAttsPower < 3)
3564         parser->m_nsAttsPower = 3;
3565 
3566       /* Detect and prevent invalid shift */
3567       if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3568         /* Restore actual size of memory in m_nsAtts */
3569         parser->m_nsAttsPower = oldNsAttsPower;
3570         return XML_ERROR_NO_MEMORY;
3571       }
3572 
3573       nsAttsSize = 1u << parser->m_nsAttsPower;
3574 
3575       /* Detect and prevent integer overflow.
3576        * The preprocessor guard addresses the "always false" warning
3577        * from -Wtype-limits on platforms where
3578        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3579 #if UINT_MAX >= SIZE_MAX
3580       if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3581         /* Restore actual size of memory in m_nsAtts */
3582         parser->m_nsAttsPower = oldNsAttsPower;
3583         return XML_ERROR_NO_MEMORY;
3584       }
3585 #endif
3586 
3587       temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3588                                nsAttsSize * sizeof(NS_ATT));
3589       if (! temp) {
3590         /* Restore actual size of memory in m_nsAtts */
3591         parser->m_nsAttsPower = oldNsAttsPower;
3592         return XML_ERROR_NO_MEMORY;
3593       }
3594       parser->m_nsAtts = temp;
3595       version = 0; /* force re-initialization of m_nsAtts hash table */
3596     }
3597     /* using a version flag saves us from initializing m_nsAtts every time */
3598     if (! version) { /* initialize version flags when version wraps around */
3599       version = INIT_ATTS_VERSION;
3600       for (j = nsAttsSize; j != 0;)
3601         parser->m_nsAtts[--j].version = version;
3602     }
3603     parser->m_nsAttsVersion = --version;
3604 
3605     /* expand prefixed names and check for duplicates */
3606     for (; i < attIndex; i += 2) {
3607       const XML_Char *s = appAtts[i];
3608       if (s[-1] == 2) { /* prefixed */
3609         ATTRIBUTE_ID *id;
3610         const BINDING *b;
3611         unsigned long uriHash;
3612         struct siphash sip_state;
3613         struct sipkey sip_key;
3614 
3615         copy_salt_to_sipkey(parser, &sip_key);
3616         sip24_init(&sip_state, &sip_key);
3617 
3618         ((XML_Char *)s)[-1] = 0; /* clear flag */
3619         id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3620         if (! id || ! id->prefix) {
3621           /* This code is walking through the appAtts array, dealing
3622            * with (in this case) a prefixed attribute name.  To be in
3623            * the array, the attribute must have already been bound, so
3624            * has to have passed through the hash table lookup once
3625            * already.  That implies that an entry for it already
3626            * exists, so the lookup above will return a pointer to
3627            * already allocated memory.  There is no opportunaity for
3628            * the allocator to fail, so the condition above cannot be
3629            * fulfilled.
3630            *
3631            * Since it is difficult to be certain that the above
3632            * analysis is complete, we retain the test and merely
3633            * remove the code from coverage tests.
3634            */
3635           return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3636         }
3637         b = id->prefix->binding;
3638         if (! b)
3639           return XML_ERROR_UNBOUND_PREFIX;
3640 
3641         for (j = 0; j < b->uriLen; j++) {
3642           const XML_Char c = b->uri[j];
3643           if (! poolAppendChar(&parser->m_tempPool, c))
3644             return XML_ERROR_NO_MEMORY;
3645         }
3646 
3647         sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3648 
3649         while (*s++ != XML_T(ASCII_COLON))
3650           ;
3651 
3652         sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3653 
3654         do { /* copies null terminator */
3655           if (! poolAppendChar(&parser->m_tempPool, *s))
3656             return XML_ERROR_NO_MEMORY;
3657         } while (*s++);
3658 
3659         uriHash = (unsigned long)sip24_final(&sip_state);
3660 
3661         { /* Check hash table for duplicate of expanded name (uriName).
3662              Derived from code in lookup(parser, HASH_TABLE *table, ...).
3663           */
3664           unsigned char step = 0;
3665           unsigned long mask = nsAttsSize - 1;
3666           j = uriHash & mask; /* index into hash table */
3667           while (parser->m_nsAtts[j].version == version) {
3668             /* for speed we compare stored hash values first */
3669             if (uriHash == parser->m_nsAtts[j].hash) {
3670               const XML_Char *s1 = poolStart(&parser->m_tempPool);
3671               const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3672               /* s1 is null terminated, but not s2 */
3673               for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3674                 ;
3675               if (*s1 == 0)
3676                 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3677             }
3678             if (! step)
3679               step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3680             j < step ? (j += nsAttsSize - step) : (j -= step);
3681           }
3682         }
3683 
3684         if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3685           parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3686           s = b->prefix->name;
3687           do {
3688             if (! poolAppendChar(&parser->m_tempPool, *s))
3689               return XML_ERROR_NO_MEMORY;
3690           } while (*s++);
3691         }
3692 
3693         /* store expanded name in attribute list */
3694         s = poolStart(&parser->m_tempPool);
3695         poolFinish(&parser->m_tempPool);
3696         appAtts[i] = s;
3697 
3698         /* fill empty slot with new version, uriName and hash value */
3699         parser->m_nsAtts[j].version = version;
3700         parser->m_nsAtts[j].hash = uriHash;
3701         parser->m_nsAtts[j].uriName = s;
3702 
3703         if (! --nPrefixes) {
3704           i += 2;
3705           break;
3706         }
3707       } else                     /* not prefixed */
3708         ((XML_Char *)s)[-1] = 0; /* clear flag */
3709     }
3710   }
3711   /* clear flags for the remaining attributes */
3712   for (; i < attIndex; i += 2)
3713     ((XML_Char *)(appAtts[i]))[-1] = 0;
3714   for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3715     binding->attId->name[-1] = 0;
3716 
3717   if (! parser->m_ns)
3718     return XML_ERROR_NONE;
3719 
3720   /* expand the element type name */
3721   if (elementType->prefix) {
3722     binding = elementType->prefix->binding;
3723     if (! binding)
3724       return XML_ERROR_UNBOUND_PREFIX;
3725     localPart = tagNamePtr->str;
3726     while (*localPart++ != XML_T(ASCII_COLON))
3727       ;
3728   } else if (dtd->defaultPrefix.binding) {
3729     binding = dtd->defaultPrefix.binding;
3730     localPart = tagNamePtr->str;
3731   } else
3732     return XML_ERROR_NONE;
3733   prefixLen = 0;
3734   if (parser->m_ns_triplets && binding->prefix->name) {
3735     for (; binding->prefix->name[prefixLen++];)
3736       ; /* prefixLen includes null terminator */
3737   }
3738   tagNamePtr->localPart = localPart;
3739   tagNamePtr->uriLen = binding->uriLen;
3740   tagNamePtr->prefix = binding->prefix->name;
3741   tagNamePtr->prefixLen = prefixLen;
3742   for (i = 0; localPart[i++];)
3743     ; /* i includes null terminator */
3744 
3745   /* Detect and prevent integer overflow */
3746   if (binding->uriLen > INT_MAX - prefixLen
3747       || i > INT_MAX - (binding->uriLen + prefixLen)) {
3748     return XML_ERROR_NO_MEMORY;
3749   }
3750 
3751   n = i + binding->uriLen + prefixLen;
3752   if (n > binding->uriAlloc) {
3753     TAG *p;
3754 
3755     /* Detect and prevent integer overflow */
3756     if (n > INT_MAX - EXPAND_SPARE) {
3757       return XML_ERROR_NO_MEMORY;
3758     }
3759     /* Detect and prevent integer overflow.
3760      * The preprocessor guard addresses the "always false" warning
3761      * from -Wtype-limits on platforms where
3762      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3763 #if UINT_MAX >= SIZE_MAX
3764     if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3765       return XML_ERROR_NO_MEMORY;
3766     }
3767 #endif
3768 
3769     uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3770     if (! uri)
3771       return XML_ERROR_NO_MEMORY;
3772     binding->uriAlloc = n + EXPAND_SPARE;
3773     memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3774     for (p = parser->m_tagStack; p; p = p->parent)
3775       if (p->name.str == binding->uri)
3776         p->name.str = uri;
3777     FREE(parser, binding->uri);
3778     binding->uri = uri;
3779   }
3780   /* if m_namespaceSeparator != '\0' then uri includes it already */
3781   uri = binding->uri + binding->uriLen;
3782   memcpy(uri, localPart, i * sizeof(XML_Char));
3783   /* we always have a namespace separator between localPart and prefix */
3784   if (prefixLen) {
3785     uri += i - 1;
3786     *uri = parser->m_namespaceSeparator; /* replace null terminator */
3787     memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3788   }
3789   tagNamePtr->str = binding->uri;
3790   return XML_ERROR_NONE;
3791 }
3792 
3793 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3794 is_rfc3986_uri_char(XML_Char candidate) {
3795   // For the RFC 3986 ANBF grammar see
3796   // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3797 
3798   switch (candidate) {
3799   // From rule "ALPHA" (uppercase half)
3800   case 'A':
3801   case 'B':
3802   case 'C':
3803   case 'D':
3804   case 'E':
3805   case 'F':
3806   case 'G':
3807   case 'H':
3808   case 'I':
3809   case 'J':
3810   case 'K':
3811   case 'L':
3812   case 'M':
3813   case 'N':
3814   case 'O':
3815   case 'P':
3816   case 'Q':
3817   case 'R':
3818   case 'S':
3819   case 'T':
3820   case 'U':
3821   case 'V':
3822   case 'W':
3823   case 'X':
3824   case 'Y':
3825   case 'Z':
3826 
3827   // From rule "ALPHA" (lowercase half)
3828   case 'a':
3829   case 'b':
3830   case 'c':
3831   case 'd':
3832   case 'e':
3833   case 'f':
3834   case 'g':
3835   case 'h':
3836   case 'i':
3837   case 'j':
3838   case 'k':
3839   case 'l':
3840   case 'm':
3841   case 'n':
3842   case 'o':
3843   case 'p':
3844   case 'q':
3845   case 'r':
3846   case 's':
3847   case 't':
3848   case 'u':
3849   case 'v':
3850   case 'w':
3851   case 'x':
3852   case 'y':
3853   case 'z':
3854 
3855   // From rule "DIGIT"
3856   case '0':
3857   case '1':
3858   case '2':
3859   case '3':
3860   case '4':
3861   case '5':
3862   case '6':
3863   case '7':
3864   case '8':
3865   case '9':
3866 
3867   // From rule "pct-encoded"
3868   case '%':
3869 
3870   // From rule "unreserved"
3871   case '-':
3872   case '.':
3873   case '_':
3874   case '~':
3875 
3876   // From rule "gen-delims"
3877   case ':':
3878   case '/':
3879   case '?':
3880   case '#':
3881   case '[':
3882   case ']':
3883   case '@':
3884 
3885   // From rule "sub-delims"
3886   case '!':
3887   case '$':
3888   case '&':
3889   case '\'':
3890   case '(':
3891   case ')':
3892   case '*':
3893   case '+':
3894   case ',':
3895   case ';':
3896   case '=':
3897     return XML_TRUE;
3898 
3899   default:
3900     return XML_FALSE;
3901   }
3902 }
3903 
3904 /* addBinding() overwrites the value of prefix->binding without checking.
3905    Therefore one must keep track of the old value outside of addBinding().
3906 */
3907 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3908 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3909            const XML_Char *uri, BINDING **bindingsPtr) {
3910   // "http://www.w3.org/XML/1998/namespace"
3911   static const XML_Char xmlNamespace[]
3912       = {ASCII_h,      ASCII_t,     ASCII_t,     ASCII_p,      ASCII_COLON,
3913          ASCII_SLASH,  ASCII_SLASH, ASCII_w,     ASCII_w,      ASCII_w,
3914          ASCII_PERIOD, ASCII_w,     ASCII_3,     ASCII_PERIOD, ASCII_o,
3915          ASCII_r,      ASCII_g,     ASCII_SLASH, ASCII_X,      ASCII_M,
3916          ASCII_L,      ASCII_SLASH, ASCII_1,     ASCII_9,      ASCII_9,
3917          ASCII_8,      ASCII_SLASH, ASCII_n,     ASCII_a,      ASCII_m,
3918          ASCII_e,      ASCII_s,     ASCII_p,     ASCII_a,      ASCII_c,
3919          ASCII_e,      '\0'};
3920   static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3921   // "http://www.w3.org/2000/xmlns/"
3922   static const XML_Char xmlnsNamespace[]
3923       = {ASCII_h,     ASCII_t,      ASCII_t, ASCII_p, ASCII_COLON,  ASCII_SLASH,
3924          ASCII_SLASH, ASCII_w,      ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3925          ASCII_3,     ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,      ASCII_SLASH,
3926          ASCII_2,     ASCII_0,      ASCII_0, ASCII_0, ASCII_SLASH,  ASCII_x,
3927          ASCII_m,     ASCII_l,      ASCII_n, ASCII_s, ASCII_SLASH,  '\0'};
3928   static const int xmlnsLen
3929       = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3930 
3931   XML_Bool mustBeXML = XML_FALSE;
3932   XML_Bool isXML = XML_TRUE;
3933   XML_Bool isXMLNS = XML_TRUE;
3934 
3935   BINDING *b;
3936   int len;
3937 
3938   /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3939   if (*uri == XML_T('\0') && prefix->name)
3940     return XML_ERROR_UNDECLARING_PREFIX;
3941 
3942   if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3943       && prefix->name[1] == XML_T(ASCII_m)
3944       && prefix->name[2] == XML_T(ASCII_l)) {
3945     /* Not allowed to bind xmlns */
3946     if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3947         && prefix->name[5] == XML_T('\0'))
3948       return XML_ERROR_RESERVED_PREFIX_XMLNS;
3949 
3950     if (prefix->name[3] == XML_T('\0'))
3951       mustBeXML = XML_TRUE;
3952   }
3953 
3954   for (len = 0; uri[len]; len++) {
3955     if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3956       isXML = XML_FALSE;
3957 
3958     if (! mustBeXML && isXMLNS
3959         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3960       isXMLNS = XML_FALSE;
3961 
3962     // NOTE: While Expat does not validate namespace URIs against RFC 3986
3963     //       today (and is not REQUIRED to do so with regard to the XML 1.0
3964     //       namespaces specification) we have to at least make sure, that
3965     //       the application on top of Expat (that is likely splitting expanded
3966     //       element names ("qualified names") of form
3967     //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3968     //       in its element handler code) cannot be confused by an attacker
3969     //       putting additional namespace separator characters into namespace
3970     //       declarations.  That would be ambiguous and not to be expected.
3971     //
3972     //       While the HTML API docs of function XML_ParserCreateNS have been
3973     //       advising against use of a namespace separator character that can
3974     //       appear in a URI for >20 years now, some widespread applications
3975     //       are using URI characters (':' (colon) in particular) for a
3976     //       namespace separator, in practice.  To keep these applications
3977     //       functional, we only reject namespaces URIs containing the
3978     //       application-chosen namespace separator if the chosen separator
3979     //       is a non-URI character with regard to RFC 3986.
3980     if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3981         && ! is_rfc3986_uri_char(uri[len])) {
3982       return XML_ERROR_SYNTAX;
3983     }
3984   }
3985   isXML = isXML && len == xmlLen;
3986   isXMLNS = isXMLNS && len == xmlnsLen;
3987 
3988   if (mustBeXML != isXML)
3989     return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3990                      : XML_ERROR_RESERVED_NAMESPACE_URI;
3991 
3992   if (isXMLNS)
3993     return XML_ERROR_RESERVED_NAMESPACE_URI;
3994 
3995   if (parser->m_namespaceSeparator)
3996     len++;
3997   if (parser->m_freeBindingList) {
3998     b = parser->m_freeBindingList;
3999     if (len > b->uriAlloc) {
4000       /* Detect and prevent integer overflow */
4001       if (len > INT_MAX - EXPAND_SPARE) {
4002         return XML_ERROR_NO_MEMORY;
4003       }
4004 
4005       /* Detect and prevent integer overflow.
4006        * The preprocessor guard addresses the "always false" warning
4007        * from -Wtype-limits on platforms where
4008        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4009 #if UINT_MAX >= SIZE_MAX
4010       if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4011         return XML_ERROR_NO_MEMORY;
4012       }
4013 #endif
4014 
4015       XML_Char *temp = (XML_Char *)REALLOC(
4016           parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4017       if (temp == NULL)
4018         return XML_ERROR_NO_MEMORY;
4019       b->uri = temp;
4020       b->uriAlloc = len + EXPAND_SPARE;
4021     }
4022     parser->m_freeBindingList = b->nextTagBinding;
4023   } else {
4024     b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4025     if (! b)
4026       return XML_ERROR_NO_MEMORY;
4027 
4028     /* Detect and prevent integer overflow */
4029     if (len > INT_MAX - EXPAND_SPARE) {
4030       return XML_ERROR_NO_MEMORY;
4031     }
4032     /* Detect and prevent integer overflow.
4033      * The preprocessor guard addresses the "always false" warning
4034      * from -Wtype-limits on platforms where
4035      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4036 #if UINT_MAX >= SIZE_MAX
4037     if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4038       return XML_ERROR_NO_MEMORY;
4039     }
4040 #endif
4041 
4042     b->uri
4043         = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4044     if (! b->uri) {
4045       FREE(parser, b);
4046       return XML_ERROR_NO_MEMORY;
4047     }
4048     b->uriAlloc = len + EXPAND_SPARE;
4049   }
4050   b->uriLen = len;
4051   memcpy(b->uri, uri, len * sizeof(XML_Char));
4052   if (parser->m_namespaceSeparator)
4053     b->uri[len - 1] = parser->m_namespaceSeparator;
4054   b->prefix = prefix;
4055   b->attId = attId;
4056   b->prevPrefixBinding = prefix->binding;
4057   /* NULL binding when default namespace undeclared */
4058   if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4059     prefix->binding = NULL;
4060   else
4061     prefix->binding = b;
4062   b->nextTagBinding = *bindingsPtr;
4063   *bindingsPtr = b;
4064   /* if attId == NULL then we are not starting a namespace scope */
4065   if (attId && parser->m_startNamespaceDeclHandler)
4066     parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4067                                         prefix->binding ? uri : 0);
4068   return XML_ERROR_NONE;
4069 }
4070 
4071 /* The idea here is to avoid using stack for each CDATA section when
4072    the whole file is parsed with one call.
4073 */
4074 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4075 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4076                       const char **endPtr) {
4077   enum XML_Error result = doCdataSection(
4078       parser, parser->m_encoding, &start, end, endPtr,
4079       (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4080   if (result != XML_ERROR_NONE)
4081     return result;
4082   if (start) {
4083     if (parser->m_parentParser) { /* we are parsing an external entity */
4084       parser->m_processor = externalEntityContentProcessor;
4085       return externalEntityContentProcessor(parser, start, end, endPtr);
4086     } else {
4087       parser->m_processor = contentProcessor;
4088       return contentProcessor(parser, start, end, endPtr);
4089     }
4090   }
4091   return result;
4092 }
4093 
4094 /* startPtr gets set to non-null if the section is closed, and to null if
4095    the section is not yet closed.
4096 */
4097 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4098 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4099                const char *end, const char **nextPtr, XML_Bool haveMore,
4100                enum XML_Account account) {
4101   const char *s = *startPtr;
4102   const char **eventPP;
4103   const char **eventEndPP;
4104   if (enc == parser->m_encoding) {
4105     eventPP = &parser->m_eventPtr;
4106     *eventPP = s;
4107     eventEndPP = &parser->m_eventEndPtr;
4108   } else {
4109     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4110     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4111   }
4112   *eventPP = s;
4113   *startPtr = NULL;
4114 
4115   for (;;) {
4116     const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4117     int tok = XmlCdataSectionTok(enc, s, end, &next);
4118 #if XML_GE == 1
4119     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4120       accountingOnAbort(parser);
4121       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4122     }
4123 #else
4124     UNUSED_P(account);
4125 #endif
4126     *eventEndPP = next;
4127     switch (tok) {
4128     case XML_TOK_CDATA_SECT_CLOSE:
4129       if (parser->m_endCdataSectionHandler)
4130         parser->m_endCdataSectionHandler(parser->m_handlerArg);
4131       /* BEGIN disabled code */
4132       /* see comment under XML_TOK_CDATA_SECT_OPEN */
4133       else if ((0) && parser->m_characterDataHandler)
4134         parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4135                                        0);
4136       /* END disabled code */
4137       else if (parser->m_defaultHandler)
4138         reportDefault(parser, enc, s, next);
4139       *startPtr = next;
4140       *nextPtr = next;
4141       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4142         return XML_ERROR_ABORTED;
4143       else
4144         return XML_ERROR_NONE;
4145     case XML_TOK_DATA_NEWLINE:
4146       if (parser->m_characterDataHandler) {
4147         XML_Char c = 0xA;
4148         parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4149       } else if (parser->m_defaultHandler)
4150         reportDefault(parser, enc, s, next);
4151       break;
4152     case XML_TOK_DATA_CHARS: {
4153       XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4154       if (charDataHandler) {
4155         if (MUST_CONVERT(enc, s)) {
4156           for (;;) {
4157             ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4158             const enum XML_Convert_Result convert_res = XmlConvert(
4159                 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4160             *eventEndPP = next;
4161             charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4162                             (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4163             if ((convert_res == XML_CONVERT_COMPLETED)
4164                 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4165               break;
4166             *eventPP = s;
4167           }
4168         } else
4169           charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4170                           (int)((const XML_Char *)next - (const XML_Char *)s));
4171       } else if (parser->m_defaultHandler)
4172         reportDefault(parser, enc, s, next);
4173     } break;
4174     case XML_TOK_INVALID:
4175       *eventPP = next;
4176       return XML_ERROR_INVALID_TOKEN;
4177     case XML_TOK_PARTIAL_CHAR:
4178       if (haveMore) {
4179         *nextPtr = s;
4180         return XML_ERROR_NONE;
4181       }
4182       return XML_ERROR_PARTIAL_CHAR;
4183     case XML_TOK_PARTIAL:
4184     case XML_TOK_NONE:
4185       if (haveMore) {
4186         *nextPtr = s;
4187         return XML_ERROR_NONE;
4188       }
4189       return XML_ERROR_UNCLOSED_CDATA_SECTION;
4190     default:
4191       /* Every token returned by XmlCdataSectionTok() has its own
4192        * explicit case, so this default case will never be executed.
4193        * We retain it as a safety net and exclude it from the coverage
4194        * statistics.
4195        *
4196        * LCOV_EXCL_START
4197        */
4198       *eventPP = next;
4199       return XML_ERROR_UNEXPECTED_STATE;
4200       /* LCOV_EXCL_STOP */
4201     }
4202 
4203     *eventPP = s = next;
4204     switch (parser->m_parsingStatus.parsing) {
4205     case XML_SUSPENDED:
4206       *nextPtr = next;
4207       return XML_ERROR_NONE;
4208     case XML_FINISHED:
4209       return XML_ERROR_ABORTED;
4210     default:;
4211     }
4212   }
4213   /* not reached */
4214 }
4215 
4216 #ifdef XML_DTD
4217 
4218 /* The idea here is to avoid using stack for each IGNORE section when
4219    the whole file is parsed with one call.
4220 */
4221 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4222 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4223                        const char **endPtr) {
4224   enum XML_Error result
4225       = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4226                         (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4227   if (result != XML_ERROR_NONE)
4228     return result;
4229   if (start) {
4230     parser->m_processor = prologProcessor;
4231     return prologProcessor(parser, start, end, endPtr);
4232   }
4233   return result;
4234 }
4235 
4236 /* startPtr gets set to non-null is the section is closed, and to null
4237    if the section is not yet closed.
4238 */
4239 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4240 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4241                 const char *end, const char **nextPtr, XML_Bool haveMore) {
4242   const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4243   int tok;
4244   const char *s = *startPtr;
4245   const char **eventPP;
4246   const char **eventEndPP;
4247   if (enc == parser->m_encoding) {
4248     eventPP = &parser->m_eventPtr;
4249     *eventPP = s;
4250     eventEndPP = &parser->m_eventEndPtr;
4251   } else {
4252     /* It's not entirely clear, but it seems the following two lines
4253      * of code cannot be executed.  The only occasions on which 'enc'
4254      * is not 'encoding' are when this function is called
4255      * from the internal entity processing, and IGNORE sections are an
4256      * error in internal entities.
4257      *
4258      * Since it really isn't clear that this is true, we keep the code
4259      * and just remove it from our coverage tests.
4260      *
4261      * LCOV_EXCL_START
4262      */
4263     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4264     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4265     /* LCOV_EXCL_STOP */
4266   }
4267   *eventPP = s;
4268   *startPtr = NULL;
4269   tok = XmlIgnoreSectionTok(enc, s, end, &next);
4270 #  if XML_GE == 1
4271   if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4272                                 XML_ACCOUNT_DIRECT)) {
4273     accountingOnAbort(parser);
4274     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4275   }
4276 #  endif
4277   *eventEndPP = next;
4278   switch (tok) {
4279   case XML_TOK_IGNORE_SECT:
4280     if (parser->m_defaultHandler)
4281       reportDefault(parser, enc, s, next);
4282     *startPtr = next;
4283     *nextPtr = next;
4284     if (parser->m_parsingStatus.parsing == XML_FINISHED)
4285       return XML_ERROR_ABORTED;
4286     else
4287       return XML_ERROR_NONE;
4288   case XML_TOK_INVALID:
4289     *eventPP = next;
4290     return XML_ERROR_INVALID_TOKEN;
4291   case XML_TOK_PARTIAL_CHAR:
4292     if (haveMore) {
4293       *nextPtr = s;
4294       return XML_ERROR_NONE;
4295     }
4296     return XML_ERROR_PARTIAL_CHAR;
4297   case XML_TOK_PARTIAL:
4298   case XML_TOK_NONE:
4299     if (haveMore) {
4300       *nextPtr = s;
4301       return XML_ERROR_NONE;
4302     }
4303     return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4304   default:
4305     /* All of the tokens that XmlIgnoreSectionTok() returns have
4306      * explicit cases to handle them, so this default case is never
4307      * executed.  We keep it as a safety net anyway, and remove it
4308      * from our test coverage statistics.
4309      *
4310      * LCOV_EXCL_START
4311      */
4312     *eventPP = next;
4313     return XML_ERROR_UNEXPECTED_STATE;
4314     /* LCOV_EXCL_STOP */
4315   }
4316   /* not reached */
4317 }
4318 
4319 #endif /* XML_DTD */
4320 
4321 static enum XML_Error
initializeEncoding(XML_Parser parser)4322 initializeEncoding(XML_Parser parser) {
4323   const char *s;
4324 #ifdef XML_UNICODE
4325   char encodingBuf[128];
4326   /* See comments about `protocolEncodingName` in parserInit() */
4327   if (! parser->m_protocolEncodingName)
4328     s = NULL;
4329   else {
4330     int i;
4331     for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4332       if (i == sizeof(encodingBuf) - 1
4333           || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4334         encodingBuf[0] = '\0';
4335         break;
4336       }
4337       encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4338     }
4339     encodingBuf[i] = '\0';
4340     s = encodingBuf;
4341   }
4342 #else
4343   s = parser->m_protocolEncodingName;
4344 #endif
4345   if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4346           &parser->m_initEncoding, &parser->m_encoding, s))
4347     return XML_ERROR_NONE;
4348   return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4349 }
4350 
4351 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4352 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4353                const char *next) {
4354   const char *encodingName = NULL;
4355   const XML_Char *storedEncName = NULL;
4356   const ENCODING *newEncoding = NULL;
4357   const char *version = NULL;
4358   const char *versionend = NULL;
4359   const XML_Char *storedversion = NULL;
4360   int standalone = -1;
4361 
4362 #if XML_GE == 1
4363   if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4364                                 XML_ACCOUNT_DIRECT)) {
4365     accountingOnAbort(parser);
4366     return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4367   }
4368 #endif
4369 
4370   if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4371           isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4372           &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4373     if (isGeneralTextEntity)
4374       return XML_ERROR_TEXT_DECL;
4375     else
4376       return XML_ERROR_XML_DECL;
4377   }
4378   if (! isGeneralTextEntity && standalone == 1) {
4379     parser->m_dtd->standalone = XML_TRUE;
4380 #ifdef XML_DTD
4381     if (parser->m_paramEntityParsing
4382         == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4383       parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4384 #endif /* XML_DTD */
4385   }
4386   if (parser->m_xmlDeclHandler) {
4387     if (encodingName != NULL) {
4388       storedEncName = poolStoreString(
4389           &parser->m_temp2Pool, parser->m_encoding, encodingName,
4390           encodingName + XmlNameLength(parser->m_encoding, encodingName));
4391       if (! storedEncName)
4392         return XML_ERROR_NO_MEMORY;
4393       poolFinish(&parser->m_temp2Pool);
4394     }
4395     if (version) {
4396       storedversion
4397           = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4398                             versionend - parser->m_encoding->minBytesPerChar);
4399       if (! storedversion)
4400         return XML_ERROR_NO_MEMORY;
4401     }
4402     parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4403                              standalone);
4404   } else if (parser->m_defaultHandler)
4405     reportDefault(parser, parser->m_encoding, s, next);
4406   if (parser->m_protocolEncodingName == NULL) {
4407     if (newEncoding) {
4408       /* Check that the specified encoding does not conflict with what
4409        * the parser has already deduced.  Do we have the same number
4410        * of bytes in the smallest representation of a character?  If
4411        * this is UTF-16, is it the same endianness?
4412        */
4413       if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4414           || (newEncoding->minBytesPerChar == 2
4415               && newEncoding != parser->m_encoding)) {
4416         parser->m_eventPtr = encodingName;
4417         return XML_ERROR_INCORRECT_ENCODING;
4418       }
4419       parser->m_encoding = newEncoding;
4420     } else if (encodingName) {
4421       enum XML_Error result;
4422       if (! storedEncName) {
4423         storedEncName = poolStoreString(
4424             &parser->m_temp2Pool, parser->m_encoding, encodingName,
4425             encodingName + XmlNameLength(parser->m_encoding, encodingName));
4426         if (! storedEncName)
4427           return XML_ERROR_NO_MEMORY;
4428       }
4429       result = handleUnknownEncoding(parser, storedEncName);
4430       poolClear(&parser->m_temp2Pool);
4431       if (result == XML_ERROR_UNKNOWN_ENCODING)
4432         parser->m_eventPtr = encodingName;
4433       return result;
4434     }
4435   }
4436 
4437   if (storedEncName || storedversion)
4438     poolClear(&parser->m_temp2Pool);
4439 
4440   return XML_ERROR_NONE;
4441 }
4442 
4443 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4444 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4445   if (parser->m_unknownEncodingHandler) {
4446     XML_Encoding info;
4447     int i;
4448     for (i = 0; i < 256; i++)
4449       info.map[i] = -1;
4450     info.convert = NULL;
4451     info.data = NULL;
4452     info.release = NULL;
4453     if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4454                                          encodingName, &info)) {
4455       ENCODING *enc;
4456       parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4457       if (! parser->m_unknownEncodingMem) {
4458         if (info.release)
4459           info.release(info.data);
4460         return XML_ERROR_NO_MEMORY;
4461       }
4462       enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4463           parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4464       if (enc) {
4465         parser->m_unknownEncodingData = info.data;
4466         parser->m_unknownEncodingRelease = info.release;
4467         parser->m_encoding = enc;
4468         return XML_ERROR_NONE;
4469       }
4470     }
4471     if (info.release != NULL)
4472       info.release(info.data);
4473   }
4474   return XML_ERROR_UNKNOWN_ENCODING;
4475 }
4476 
4477 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4478 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4479                     const char **nextPtr) {
4480   enum XML_Error result = initializeEncoding(parser);
4481   if (result != XML_ERROR_NONE)
4482     return result;
4483   parser->m_processor = prologProcessor;
4484   return prologProcessor(parser, s, end, nextPtr);
4485 }
4486 
4487 #ifdef XML_DTD
4488 
4489 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4490 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4491                             const char **nextPtr) {
4492   enum XML_Error result = initializeEncoding(parser);
4493   if (result != XML_ERROR_NONE)
4494     return result;
4495 
4496   /* we know now that XML_Parse(Buffer) has been called,
4497      so we consider the external parameter entity read */
4498   parser->m_dtd->paramEntityRead = XML_TRUE;
4499 
4500   if (parser->m_prologState.inEntityValue) {
4501     parser->m_processor = entityValueInitProcessor;
4502     return entityValueInitProcessor(parser, s, end, nextPtr);
4503   } else {
4504     parser->m_processor = externalParEntProcessor;
4505     return externalParEntProcessor(parser, s, end, nextPtr);
4506   }
4507 }
4508 
4509 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4510 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4511                          const char **nextPtr) {
4512   int tok;
4513   const char *start = s;
4514   const char *next = start;
4515   parser->m_eventPtr = start;
4516 
4517   for (;;) {
4518     tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4519     /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4520              - storeEntityValue
4521              - processXmlDecl
4522     */
4523     parser->m_eventEndPtr = next;
4524     if (tok <= 0) {
4525       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4526         *nextPtr = s;
4527         return XML_ERROR_NONE;
4528       }
4529       switch (tok) {
4530       case XML_TOK_INVALID:
4531         return XML_ERROR_INVALID_TOKEN;
4532       case XML_TOK_PARTIAL:
4533         return XML_ERROR_UNCLOSED_TOKEN;
4534       case XML_TOK_PARTIAL_CHAR:
4535         return XML_ERROR_PARTIAL_CHAR;
4536       case XML_TOK_NONE: /* start == end */
4537       default:
4538         break;
4539       }
4540       /* found end of entity value - can store it now */
4541       return storeEntityValue(parser, parser->m_encoding, s, end,
4542                               XML_ACCOUNT_DIRECT);
4543     } else if (tok == XML_TOK_XML_DECL) {
4544       enum XML_Error result;
4545       result = processXmlDecl(parser, 0, start, next);
4546       if (result != XML_ERROR_NONE)
4547         return result;
4548       /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED.  For
4549        * that to happen, a parameter entity parsing handler must have attempted
4550        * to suspend the parser, which fails and raises an error.  The parser can
4551        * be aborted, but can't be suspended.
4552        */
4553       if (parser->m_parsingStatus.parsing == XML_FINISHED)
4554         return XML_ERROR_ABORTED;
4555       *nextPtr = next;
4556       /* stop scanning for text declaration - we found one */
4557       parser->m_processor = entityValueProcessor;
4558       return entityValueProcessor(parser, next, end, nextPtr);
4559     }
4560     /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4561        must move s and nextPtr forward to consume the BOM.
4562 
4563        If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4564        would leave the BOM in the buffer and return. On the next call to this
4565        function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4566        is not valid to have multiple BOMs.
4567     */
4568     else if (tok == XML_TOK_BOM) {
4569 #  if XML_GE == 1
4570       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4571                                     XML_ACCOUNT_DIRECT)) {
4572         accountingOnAbort(parser);
4573         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4574       }
4575 #  endif
4576 
4577       *nextPtr = next;
4578       s = next;
4579     }
4580     /* If we get this token, we have the start of what might be a
4581        normal tag, but not a declaration (i.e. it doesn't begin with
4582        "<!").  In a DTD context, that isn't legal.
4583     */
4584     else if (tok == XML_TOK_INSTANCE_START) {
4585       *nextPtr = next;
4586       return XML_ERROR_SYNTAX;
4587     }
4588     start = next;
4589     parser->m_eventPtr = start;
4590   }
4591 }
4592 
4593 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4594 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4595                         const char **nextPtr) {
4596   const char *next = s;
4597   int tok;
4598 
4599   tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4600   if (tok <= 0) {
4601     if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4602       *nextPtr = s;
4603       return XML_ERROR_NONE;
4604     }
4605     switch (tok) {
4606     case XML_TOK_INVALID:
4607       return XML_ERROR_INVALID_TOKEN;
4608     case XML_TOK_PARTIAL:
4609       return XML_ERROR_UNCLOSED_TOKEN;
4610     case XML_TOK_PARTIAL_CHAR:
4611       return XML_ERROR_PARTIAL_CHAR;
4612     case XML_TOK_NONE: /* start == end */
4613     default:
4614       break;
4615     }
4616   }
4617   /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4618      However, when parsing an external subset, doProlog will not accept a BOM
4619      as valid, and report a syntax error, so we have to skip the BOM, and
4620      account for the BOM bytes.
4621   */
4622   else if (tok == XML_TOK_BOM) {
4623     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4624                                   XML_ACCOUNT_DIRECT)) {
4625       accountingOnAbort(parser);
4626       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4627     }
4628 
4629     s = next;
4630     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4631   }
4632 
4633   parser->m_processor = prologProcessor;
4634   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4635                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4636                   XML_ACCOUNT_DIRECT);
4637 }
4638 
4639 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4640 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4641                      const char **nextPtr) {
4642   const char *start = s;
4643   const char *next = s;
4644   const ENCODING *enc = parser->m_encoding;
4645   int tok;
4646 
4647   for (;;) {
4648     tok = XmlPrologTok(enc, start, end, &next);
4649     /* Note: These bytes are accounted later in:
4650              - storeEntityValue
4651     */
4652     if (tok <= 0) {
4653       if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4654         *nextPtr = s;
4655         return XML_ERROR_NONE;
4656       }
4657       switch (tok) {
4658       case XML_TOK_INVALID:
4659         return XML_ERROR_INVALID_TOKEN;
4660       case XML_TOK_PARTIAL:
4661         return XML_ERROR_UNCLOSED_TOKEN;
4662       case XML_TOK_PARTIAL_CHAR:
4663         return XML_ERROR_PARTIAL_CHAR;
4664       case XML_TOK_NONE: /* start == end */
4665       default:
4666         break;
4667       }
4668       /* found end of entity value - can store it now */
4669       return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4670     }
4671     start = next;
4672   }
4673 }
4674 
4675 #endif /* XML_DTD */
4676 
4677 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4678 prologProcessor(XML_Parser parser, const char *s, const char *end,
4679                 const char **nextPtr) {
4680   const char *next = s;
4681   int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4682   return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4683                   (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4684                   XML_ACCOUNT_DIRECT);
4685 }
4686 
4687 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4688 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4689          int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4690          XML_Bool allowClosingDoctype, enum XML_Account account) {
4691 #ifdef XML_DTD
4692   static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4693 #endif /* XML_DTD */
4694   static const XML_Char atypeCDATA[]
4695       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4696   static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4697   static const XML_Char atypeIDREF[]
4698       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4699   static const XML_Char atypeIDREFS[]
4700       = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4701   static const XML_Char atypeENTITY[]
4702       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4703   static const XML_Char atypeENTITIES[]
4704       = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4705          ASCII_I, ASCII_E, ASCII_S, '\0'};
4706   static const XML_Char atypeNMTOKEN[]
4707       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4708   static const XML_Char atypeNMTOKENS[]
4709       = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4710          ASCII_E, ASCII_N, ASCII_S, '\0'};
4711   static const XML_Char notationPrefix[]
4712       = {ASCII_N, ASCII_O, ASCII_T, ASCII_A,      ASCII_T,
4713          ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4714   static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4715   static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4716 
4717 #ifndef XML_DTD
4718   UNUSED_P(account);
4719 #endif
4720 
4721   /* save one level of indirection */
4722   DTD *const dtd = parser->m_dtd;
4723 
4724   const char **eventPP;
4725   const char **eventEndPP;
4726   enum XML_Content_Quant quant;
4727 
4728   if (enc == parser->m_encoding) {
4729     eventPP = &parser->m_eventPtr;
4730     eventEndPP = &parser->m_eventEndPtr;
4731   } else {
4732     eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4733     eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4734   }
4735 
4736   for (;;) {
4737     int role;
4738     XML_Bool handleDefault = XML_TRUE;
4739     *eventPP = s;
4740     *eventEndPP = next;
4741     if (tok <= 0) {
4742       if (haveMore && tok != XML_TOK_INVALID) {
4743         *nextPtr = s;
4744         return XML_ERROR_NONE;
4745       }
4746       switch (tok) {
4747       case XML_TOK_INVALID:
4748         *eventPP = next;
4749         return XML_ERROR_INVALID_TOKEN;
4750       case XML_TOK_PARTIAL:
4751         return XML_ERROR_UNCLOSED_TOKEN;
4752       case XML_TOK_PARTIAL_CHAR:
4753         return XML_ERROR_PARTIAL_CHAR;
4754       case -XML_TOK_PROLOG_S:
4755         tok = -tok;
4756         break;
4757       case XML_TOK_NONE:
4758 #ifdef XML_DTD
4759         /* for internal PE NOT referenced between declarations */
4760         if (enc != parser->m_encoding
4761             && ! parser->m_openInternalEntities->betweenDecl) {
4762           *nextPtr = s;
4763           return XML_ERROR_NONE;
4764         }
4765         /* WFC: PE Between Declarations - must check that PE contains
4766            complete markup, not only for external PEs, but also for
4767            internal PEs if the reference occurs between declarations.
4768         */
4769         if (parser->m_isParamEntity || enc != parser->m_encoding) {
4770           if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4771               == XML_ROLE_ERROR)
4772             return XML_ERROR_INCOMPLETE_PE;
4773           *nextPtr = s;
4774           return XML_ERROR_NONE;
4775         }
4776 #endif /* XML_DTD */
4777         return XML_ERROR_NO_ELEMENTS;
4778       default:
4779         tok = -tok;
4780         next = end;
4781         break;
4782       }
4783     }
4784     role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4785 #if XML_GE == 1
4786     switch (role) {
4787     case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4788     case XML_ROLE_XML_DECL:       // bytes accounted in processXmlDecl
4789 #  ifdef XML_DTD
4790     case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4791 #  endif
4792       break;
4793     default:
4794       if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4795         accountingOnAbort(parser);
4796         return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4797       }
4798     }
4799 #endif
4800     switch (role) {
4801     case XML_ROLE_XML_DECL: {
4802       enum XML_Error result = processXmlDecl(parser, 0, s, next);
4803       if (result != XML_ERROR_NONE)
4804         return result;
4805       enc = parser->m_encoding;
4806       handleDefault = XML_FALSE;
4807     } break;
4808     case XML_ROLE_DOCTYPE_NAME:
4809       if (parser->m_startDoctypeDeclHandler) {
4810         parser->m_doctypeName
4811             = poolStoreString(&parser->m_tempPool, enc, s, next);
4812         if (! parser->m_doctypeName)
4813           return XML_ERROR_NO_MEMORY;
4814         poolFinish(&parser->m_tempPool);
4815         parser->m_doctypePubid = NULL;
4816         handleDefault = XML_FALSE;
4817       }
4818       parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4819       break;
4820     case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4821       if (parser->m_startDoctypeDeclHandler) {
4822         parser->m_startDoctypeDeclHandler(
4823             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4824             parser->m_doctypePubid, 1);
4825         parser->m_doctypeName = NULL;
4826         poolClear(&parser->m_tempPool);
4827         handleDefault = XML_FALSE;
4828       }
4829       break;
4830 #ifdef XML_DTD
4831     case XML_ROLE_TEXT_DECL: {
4832       enum XML_Error result = processXmlDecl(parser, 1, s, next);
4833       if (result != XML_ERROR_NONE)
4834         return result;
4835       enc = parser->m_encoding;
4836       handleDefault = XML_FALSE;
4837     } break;
4838 #endif /* XML_DTD */
4839     case XML_ROLE_DOCTYPE_PUBLIC_ID:
4840 #ifdef XML_DTD
4841       parser->m_useForeignDTD = XML_FALSE;
4842       parser->m_declEntity = (ENTITY *)lookup(
4843           parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4844       if (! parser->m_declEntity)
4845         return XML_ERROR_NO_MEMORY;
4846 #endif /* XML_DTD */
4847       dtd->hasParamEntityRefs = XML_TRUE;
4848       if (parser->m_startDoctypeDeclHandler) {
4849         XML_Char *pubId;
4850         if (! XmlIsPublicId(enc, s, next, eventPP))
4851           return XML_ERROR_PUBLICID;
4852         pubId = poolStoreString(&parser->m_tempPool, enc,
4853                                 s + enc->minBytesPerChar,
4854                                 next - enc->minBytesPerChar);
4855         if (! pubId)
4856           return XML_ERROR_NO_MEMORY;
4857         normalizePublicId(pubId);
4858         poolFinish(&parser->m_tempPool);
4859         parser->m_doctypePubid = pubId;
4860         handleDefault = XML_FALSE;
4861         goto alreadyChecked;
4862       }
4863       /* fall through */
4864     case XML_ROLE_ENTITY_PUBLIC_ID:
4865       if (! XmlIsPublicId(enc, s, next, eventPP))
4866         return XML_ERROR_PUBLICID;
4867     alreadyChecked:
4868       if (dtd->keepProcessing && parser->m_declEntity) {
4869         XML_Char *tem
4870             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4871                               next - enc->minBytesPerChar);
4872         if (! tem)
4873           return XML_ERROR_NO_MEMORY;
4874         normalizePublicId(tem);
4875         parser->m_declEntity->publicId = tem;
4876         poolFinish(&dtd->pool);
4877         /* Don't suppress the default handler if we fell through from
4878          * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4879          */
4880         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4881           handleDefault = XML_FALSE;
4882       }
4883       break;
4884     case XML_ROLE_DOCTYPE_CLOSE:
4885       if (allowClosingDoctype != XML_TRUE) {
4886         /* Must not close doctype from within expanded parameter entities */
4887         return XML_ERROR_INVALID_TOKEN;
4888       }
4889 
4890       if (parser->m_doctypeName) {
4891         parser->m_startDoctypeDeclHandler(
4892             parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4893             parser->m_doctypePubid, 0);
4894         poolClear(&parser->m_tempPool);
4895         handleDefault = XML_FALSE;
4896       }
4897       /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4898          XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4899          was not set, indicating an external subset
4900       */
4901 #ifdef XML_DTD
4902       if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4903         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4904         dtd->hasParamEntityRefs = XML_TRUE;
4905         if (parser->m_paramEntityParsing
4906             && parser->m_externalEntityRefHandler) {
4907           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4908                                             externalSubsetName, sizeof(ENTITY));
4909           if (! entity) {
4910             /* The external subset name "#" will have already been
4911              * inserted into the hash table at the start of the
4912              * external entity parsing, so no allocation will happen
4913              * and lookup() cannot fail.
4914              */
4915             return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4916           }
4917           if (parser->m_useForeignDTD)
4918             entity->base = parser->m_curBase;
4919           dtd->paramEntityRead = XML_FALSE;
4920           if (! parser->m_externalEntityRefHandler(
4921                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4922                   entity->systemId, entity->publicId))
4923             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4924           if (dtd->paramEntityRead) {
4925             if (! dtd->standalone && parser->m_notStandaloneHandler
4926                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4927               return XML_ERROR_NOT_STANDALONE;
4928           }
4929           /* if we didn't read the foreign DTD then this means that there
4930              is no external subset and we must reset dtd->hasParamEntityRefs
4931           */
4932           else if (! parser->m_doctypeSysid)
4933             dtd->hasParamEntityRefs = hadParamEntityRefs;
4934           /* end of DTD - no need to update dtd->keepProcessing */
4935         }
4936         parser->m_useForeignDTD = XML_FALSE;
4937       }
4938 #endif /* XML_DTD */
4939       if (parser->m_endDoctypeDeclHandler) {
4940         parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4941         handleDefault = XML_FALSE;
4942       }
4943       break;
4944     case XML_ROLE_INSTANCE_START:
4945 #ifdef XML_DTD
4946       /* if there is no DOCTYPE declaration then now is the
4947          last chance to read the foreign DTD
4948       */
4949       if (parser->m_useForeignDTD) {
4950         XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4951         dtd->hasParamEntityRefs = XML_TRUE;
4952         if (parser->m_paramEntityParsing
4953             && parser->m_externalEntityRefHandler) {
4954           ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4955                                             externalSubsetName, sizeof(ENTITY));
4956           if (! entity)
4957             return XML_ERROR_NO_MEMORY;
4958           entity->base = parser->m_curBase;
4959           dtd->paramEntityRead = XML_FALSE;
4960           if (! parser->m_externalEntityRefHandler(
4961                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
4962                   entity->systemId, entity->publicId))
4963             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4964           if (dtd->paramEntityRead) {
4965             if (! dtd->standalone && parser->m_notStandaloneHandler
4966                 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4967               return XML_ERROR_NOT_STANDALONE;
4968           }
4969           /* if we didn't read the foreign DTD then this means that there
4970              is no external subset and we must reset dtd->hasParamEntityRefs
4971           */
4972           else
4973             dtd->hasParamEntityRefs = hadParamEntityRefs;
4974           /* end of DTD - no need to update dtd->keepProcessing */
4975         }
4976       }
4977 #endif /* XML_DTD */
4978       parser->m_processor = contentProcessor;
4979       return contentProcessor(parser, s, end, nextPtr);
4980     case XML_ROLE_ATTLIST_ELEMENT_NAME:
4981       parser->m_declElementType = getElementType(parser, enc, s, next);
4982       if (! parser->m_declElementType)
4983         return XML_ERROR_NO_MEMORY;
4984       goto checkAttListDeclHandler;
4985     case XML_ROLE_ATTRIBUTE_NAME:
4986       parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4987       if (! parser->m_declAttributeId)
4988         return XML_ERROR_NO_MEMORY;
4989       parser->m_declAttributeIsCdata = XML_FALSE;
4990       parser->m_declAttributeType = NULL;
4991       parser->m_declAttributeIsId = XML_FALSE;
4992       goto checkAttListDeclHandler;
4993     case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4994       parser->m_declAttributeIsCdata = XML_TRUE;
4995       parser->m_declAttributeType = atypeCDATA;
4996       goto checkAttListDeclHandler;
4997     case XML_ROLE_ATTRIBUTE_TYPE_ID:
4998       parser->m_declAttributeIsId = XML_TRUE;
4999       parser->m_declAttributeType = atypeID;
5000       goto checkAttListDeclHandler;
5001     case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5002       parser->m_declAttributeType = atypeIDREF;
5003       goto checkAttListDeclHandler;
5004     case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5005       parser->m_declAttributeType = atypeIDREFS;
5006       goto checkAttListDeclHandler;
5007     case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5008       parser->m_declAttributeType = atypeENTITY;
5009       goto checkAttListDeclHandler;
5010     case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5011       parser->m_declAttributeType = atypeENTITIES;
5012       goto checkAttListDeclHandler;
5013     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5014       parser->m_declAttributeType = atypeNMTOKEN;
5015       goto checkAttListDeclHandler;
5016     case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5017       parser->m_declAttributeType = atypeNMTOKENS;
5018     checkAttListDeclHandler:
5019       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5020         handleDefault = XML_FALSE;
5021       break;
5022     case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5023     case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5024       if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5025         const XML_Char *prefix;
5026         if (parser->m_declAttributeType) {
5027           prefix = enumValueSep;
5028         } else {
5029           prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5030                                                               : enumValueStart);
5031         }
5032         if (! poolAppendString(&parser->m_tempPool, prefix))
5033           return XML_ERROR_NO_MEMORY;
5034         if (! poolAppend(&parser->m_tempPool, enc, s, next))
5035           return XML_ERROR_NO_MEMORY;
5036         parser->m_declAttributeType = parser->m_tempPool.start;
5037         handleDefault = XML_FALSE;
5038       }
5039       break;
5040     case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5041     case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5042       if (dtd->keepProcessing) {
5043         if (! defineAttribute(parser->m_declElementType,
5044                               parser->m_declAttributeId,
5045                               parser->m_declAttributeIsCdata,
5046                               parser->m_declAttributeIsId, 0, parser))
5047           return XML_ERROR_NO_MEMORY;
5048         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5049           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5050               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5051                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5052             /* Enumerated or Notation type */
5053             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5054                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5055               return XML_ERROR_NO_MEMORY;
5056             parser->m_declAttributeType = parser->m_tempPool.start;
5057             poolFinish(&parser->m_tempPool);
5058           }
5059           *eventEndPP = s;
5060           parser->m_attlistDeclHandler(
5061               parser->m_handlerArg, parser->m_declElementType->name,
5062               parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5063               role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5064           handleDefault = XML_FALSE;
5065         }
5066       }
5067       poolClear(&parser->m_tempPool);
5068       break;
5069     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5070     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5071       if (dtd->keepProcessing) {
5072         const XML_Char *attVal;
5073         enum XML_Error result = storeAttributeValue(
5074             parser, enc, parser->m_declAttributeIsCdata,
5075             s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5076             XML_ACCOUNT_NONE);
5077         if (result)
5078           return result;
5079         attVal = poolStart(&dtd->pool);
5080         poolFinish(&dtd->pool);
5081         /* ID attributes aren't allowed to have a default */
5082         if (! defineAttribute(
5083                 parser->m_declElementType, parser->m_declAttributeId,
5084                 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5085           return XML_ERROR_NO_MEMORY;
5086         if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5087           if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5088               || (*parser->m_declAttributeType == XML_T(ASCII_N)
5089                   && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5090             /* Enumerated or Notation type */
5091             if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5092                 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5093               return XML_ERROR_NO_MEMORY;
5094             parser->m_declAttributeType = parser->m_tempPool.start;
5095             poolFinish(&parser->m_tempPool);
5096           }
5097           *eventEndPP = s;
5098           parser->m_attlistDeclHandler(
5099               parser->m_handlerArg, parser->m_declElementType->name,
5100               parser->m_declAttributeId->name, parser->m_declAttributeType,
5101               attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5102           poolClear(&parser->m_tempPool);
5103           handleDefault = XML_FALSE;
5104         }
5105       }
5106       break;
5107     case XML_ROLE_ENTITY_VALUE:
5108       if (dtd->keepProcessing) {
5109 #if XML_GE == 1
5110         // This will store the given replacement text in
5111         // parser->m_declEntity->textPtr.
5112         enum XML_Error result
5113             = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5114                                next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5115         if (parser->m_declEntity) {
5116           parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5117           parser->m_declEntity->textLen
5118               = (int)(poolLength(&dtd->entityValuePool));
5119           poolFinish(&dtd->entityValuePool);
5120           if (parser->m_entityDeclHandler) {
5121             *eventEndPP = s;
5122             parser->m_entityDeclHandler(
5123                 parser->m_handlerArg, parser->m_declEntity->name,
5124                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5125                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5126             handleDefault = XML_FALSE;
5127           }
5128         } else
5129           poolDiscard(&dtd->entityValuePool);
5130         if (result != XML_ERROR_NONE)
5131           return result;
5132 #else
5133         // This will store "&amp;entity123;" in parser->m_declEntity->textPtr
5134         // to end up as "&entity123;" in the handler.
5135         if (parser->m_declEntity != NULL) {
5136           const enum XML_Error result
5137               = storeSelfEntityValue(parser, parser->m_declEntity);
5138           if (result != XML_ERROR_NONE)
5139             return result;
5140 
5141           if (parser->m_entityDeclHandler) {
5142             *eventEndPP = s;
5143             parser->m_entityDeclHandler(
5144                 parser->m_handlerArg, parser->m_declEntity->name,
5145                 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5146                 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5147             handleDefault = XML_FALSE;
5148           }
5149         }
5150 #endif
5151       }
5152       break;
5153     case XML_ROLE_DOCTYPE_SYSTEM_ID:
5154 #ifdef XML_DTD
5155       parser->m_useForeignDTD = XML_FALSE;
5156 #endif /* XML_DTD */
5157       dtd->hasParamEntityRefs = XML_TRUE;
5158       if (parser->m_startDoctypeDeclHandler) {
5159         parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5160                                                  s + enc->minBytesPerChar,
5161                                                  next - enc->minBytesPerChar);
5162         if (parser->m_doctypeSysid == NULL)
5163           return XML_ERROR_NO_MEMORY;
5164         poolFinish(&parser->m_tempPool);
5165         handleDefault = XML_FALSE;
5166       }
5167 #ifdef XML_DTD
5168       else
5169         /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5170            for the case where no parser->m_startDoctypeDeclHandler is set */
5171         parser->m_doctypeSysid = externalSubsetName;
5172 #endif /* XML_DTD */
5173       if (! dtd->standalone
5174 #ifdef XML_DTD
5175           && ! parser->m_paramEntityParsing
5176 #endif /* XML_DTD */
5177           && parser->m_notStandaloneHandler
5178           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5179         return XML_ERROR_NOT_STANDALONE;
5180 #ifndef XML_DTD
5181       break;
5182 #else  /* XML_DTD */
5183       if (! parser->m_declEntity) {
5184         parser->m_declEntity = (ENTITY *)lookup(
5185             parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5186         if (! parser->m_declEntity)
5187           return XML_ERROR_NO_MEMORY;
5188         parser->m_declEntity->publicId = NULL;
5189       }
5190 #endif /* XML_DTD */
5191       /* fall through */
5192     case XML_ROLE_ENTITY_SYSTEM_ID:
5193       if (dtd->keepProcessing && parser->m_declEntity) {
5194         parser->m_declEntity->systemId
5195             = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5196                               next - enc->minBytesPerChar);
5197         if (! parser->m_declEntity->systemId)
5198           return XML_ERROR_NO_MEMORY;
5199         parser->m_declEntity->base = parser->m_curBase;
5200         poolFinish(&dtd->pool);
5201         /* Don't suppress the default handler if we fell through from
5202          * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5203          */
5204         if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5205           handleDefault = XML_FALSE;
5206       }
5207       break;
5208     case XML_ROLE_ENTITY_COMPLETE:
5209 #if XML_GE == 0
5210       // This will store "&amp;entity123;" in entity->textPtr
5211       // to end up as "&entity123;" in the handler.
5212       if (parser->m_declEntity != NULL) {
5213         const enum XML_Error result
5214             = storeSelfEntityValue(parser, parser->m_declEntity);
5215         if (result != XML_ERROR_NONE)
5216           return result;
5217       }
5218 #endif
5219       if (dtd->keepProcessing && parser->m_declEntity
5220           && parser->m_entityDeclHandler) {
5221         *eventEndPP = s;
5222         parser->m_entityDeclHandler(
5223             parser->m_handlerArg, parser->m_declEntity->name,
5224             parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5225             parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5226         handleDefault = XML_FALSE;
5227       }
5228       break;
5229     case XML_ROLE_ENTITY_NOTATION_NAME:
5230       if (dtd->keepProcessing && parser->m_declEntity) {
5231         parser->m_declEntity->notation
5232             = poolStoreString(&dtd->pool, enc, s, next);
5233         if (! parser->m_declEntity->notation)
5234           return XML_ERROR_NO_MEMORY;
5235         poolFinish(&dtd->pool);
5236         if (parser->m_unparsedEntityDeclHandler) {
5237           *eventEndPP = s;
5238           parser->m_unparsedEntityDeclHandler(
5239               parser->m_handlerArg, parser->m_declEntity->name,
5240               parser->m_declEntity->base, parser->m_declEntity->systemId,
5241               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5242           handleDefault = XML_FALSE;
5243         } else if (parser->m_entityDeclHandler) {
5244           *eventEndPP = s;
5245           parser->m_entityDeclHandler(
5246               parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5247               parser->m_declEntity->base, parser->m_declEntity->systemId,
5248               parser->m_declEntity->publicId, parser->m_declEntity->notation);
5249           handleDefault = XML_FALSE;
5250         }
5251       }
5252       break;
5253     case XML_ROLE_GENERAL_ENTITY_NAME: {
5254       if (XmlPredefinedEntityName(enc, s, next)) {
5255         parser->m_declEntity = NULL;
5256         break;
5257       }
5258       if (dtd->keepProcessing) {
5259         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5260         if (! name)
5261           return XML_ERROR_NO_MEMORY;
5262         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5263                                                 name, sizeof(ENTITY));
5264         if (! parser->m_declEntity)
5265           return XML_ERROR_NO_MEMORY;
5266         if (parser->m_declEntity->name != name) {
5267           poolDiscard(&dtd->pool);
5268           parser->m_declEntity = NULL;
5269         } else {
5270           poolFinish(&dtd->pool);
5271           parser->m_declEntity->publicId = NULL;
5272           parser->m_declEntity->is_param = XML_FALSE;
5273           /* if we have a parent parser or are reading an internal parameter
5274              entity, then the entity declaration is not considered "internal"
5275           */
5276           parser->m_declEntity->is_internal
5277               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5278           if (parser->m_entityDeclHandler)
5279             handleDefault = XML_FALSE;
5280         }
5281       } else {
5282         poolDiscard(&dtd->pool);
5283         parser->m_declEntity = NULL;
5284       }
5285     } break;
5286     case XML_ROLE_PARAM_ENTITY_NAME:
5287 #ifdef XML_DTD
5288       if (dtd->keepProcessing) {
5289         const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5290         if (! name)
5291           return XML_ERROR_NO_MEMORY;
5292         parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5293                                                 name, sizeof(ENTITY));
5294         if (! parser->m_declEntity)
5295           return XML_ERROR_NO_MEMORY;
5296         if (parser->m_declEntity->name != name) {
5297           poolDiscard(&dtd->pool);
5298           parser->m_declEntity = NULL;
5299         } else {
5300           poolFinish(&dtd->pool);
5301           parser->m_declEntity->publicId = NULL;
5302           parser->m_declEntity->is_param = XML_TRUE;
5303           /* if we have a parent parser or are reading an internal parameter
5304              entity, then the entity declaration is not considered "internal"
5305           */
5306           parser->m_declEntity->is_internal
5307               = ! (parser->m_parentParser || parser->m_openInternalEntities);
5308           if (parser->m_entityDeclHandler)
5309             handleDefault = XML_FALSE;
5310         }
5311       } else {
5312         poolDiscard(&dtd->pool);
5313         parser->m_declEntity = NULL;
5314       }
5315 #else  /* not XML_DTD */
5316       parser->m_declEntity = NULL;
5317 #endif /* XML_DTD */
5318       break;
5319     case XML_ROLE_NOTATION_NAME:
5320       parser->m_declNotationPublicId = NULL;
5321       parser->m_declNotationName = NULL;
5322       if (parser->m_notationDeclHandler) {
5323         parser->m_declNotationName
5324             = poolStoreString(&parser->m_tempPool, enc, s, next);
5325         if (! parser->m_declNotationName)
5326           return XML_ERROR_NO_MEMORY;
5327         poolFinish(&parser->m_tempPool);
5328         handleDefault = XML_FALSE;
5329       }
5330       break;
5331     case XML_ROLE_NOTATION_PUBLIC_ID:
5332       if (! XmlIsPublicId(enc, s, next, eventPP))
5333         return XML_ERROR_PUBLICID;
5334       if (parser
5335               ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5336         XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5337                                         s + enc->minBytesPerChar,
5338                                         next - enc->minBytesPerChar);
5339         if (! tem)
5340           return XML_ERROR_NO_MEMORY;
5341         normalizePublicId(tem);
5342         parser->m_declNotationPublicId = tem;
5343         poolFinish(&parser->m_tempPool);
5344         handleDefault = XML_FALSE;
5345       }
5346       break;
5347     case XML_ROLE_NOTATION_SYSTEM_ID:
5348       if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5349         const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5350                                                    s + enc->minBytesPerChar,
5351                                                    next - enc->minBytesPerChar);
5352         if (! systemId)
5353           return XML_ERROR_NO_MEMORY;
5354         *eventEndPP = s;
5355         parser->m_notationDeclHandler(
5356             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5357             systemId, parser->m_declNotationPublicId);
5358         handleDefault = XML_FALSE;
5359       }
5360       poolClear(&parser->m_tempPool);
5361       break;
5362     case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5363       if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5364         *eventEndPP = s;
5365         parser->m_notationDeclHandler(
5366             parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5367             0, parser->m_declNotationPublicId);
5368         handleDefault = XML_FALSE;
5369       }
5370       poolClear(&parser->m_tempPool);
5371       break;
5372     case XML_ROLE_ERROR:
5373       switch (tok) {
5374       case XML_TOK_PARAM_ENTITY_REF:
5375         /* PE references in internal subset are
5376            not allowed within declarations. */
5377         return XML_ERROR_PARAM_ENTITY_REF;
5378       case XML_TOK_XML_DECL:
5379         return XML_ERROR_MISPLACED_XML_PI;
5380       default:
5381         return XML_ERROR_SYNTAX;
5382       }
5383 #ifdef XML_DTD
5384     case XML_ROLE_IGNORE_SECT: {
5385       enum XML_Error result;
5386       if (parser->m_defaultHandler)
5387         reportDefault(parser, enc, s, next);
5388       handleDefault = XML_FALSE;
5389       result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5390       if (result != XML_ERROR_NONE)
5391         return result;
5392       else if (! next) {
5393         parser->m_processor = ignoreSectionProcessor;
5394         return result;
5395       }
5396     } break;
5397 #endif /* XML_DTD */
5398     case XML_ROLE_GROUP_OPEN:
5399       if (parser->m_prologState.level >= parser->m_groupSize) {
5400         if (parser->m_groupSize) {
5401           {
5402             /* Detect and prevent integer overflow */
5403             if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5404               return XML_ERROR_NO_MEMORY;
5405             }
5406 
5407             char *const new_connector = (char *)REALLOC(
5408                 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5409             if (new_connector == NULL) {
5410               parser->m_groupSize /= 2;
5411               return XML_ERROR_NO_MEMORY;
5412             }
5413             parser->m_groupConnector = new_connector;
5414           }
5415 
5416           if (dtd->scaffIndex) {
5417             /* Detect and prevent integer overflow.
5418              * The preprocessor guard addresses the "always false" warning
5419              * from -Wtype-limits on platforms where
5420              * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5421 #if UINT_MAX >= SIZE_MAX
5422             if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5423               return XML_ERROR_NO_MEMORY;
5424             }
5425 #endif
5426 
5427             int *const new_scaff_index = (int *)REALLOC(
5428                 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5429             if (new_scaff_index == NULL)
5430               return XML_ERROR_NO_MEMORY;
5431             dtd->scaffIndex = new_scaff_index;
5432           }
5433         } else {
5434           parser->m_groupConnector
5435               = (char *)MALLOC(parser, parser->m_groupSize = 32);
5436           if (! parser->m_groupConnector) {
5437             parser->m_groupSize = 0;
5438             return XML_ERROR_NO_MEMORY;
5439           }
5440         }
5441       }
5442       parser->m_groupConnector[parser->m_prologState.level] = 0;
5443       if (dtd->in_eldecl) {
5444         int myindex = nextScaffoldPart(parser);
5445         if (myindex < 0)
5446           return XML_ERROR_NO_MEMORY;
5447         assert(dtd->scaffIndex != NULL);
5448         dtd->scaffIndex[dtd->scaffLevel] = myindex;
5449         dtd->scaffLevel++;
5450         dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5451         if (parser->m_elementDeclHandler)
5452           handleDefault = XML_FALSE;
5453       }
5454       break;
5455     case XML_ROLE_GROUP_SEQUENCE:
5456       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5457         return XML_ERROR_SYNTAX;
5458       parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5459       if (dtd->in_eldecl && parser->m_elementDeclHandler)
5460         handleDefault = XML_FALSE;
5461       break;
5462     case XML_ROLE_GROUP_CHOICE:
5463       if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5464         return XML_ERROR_SYNTAX;
5465       if (dtd->in_eldecl
5466           && ! parser->m_groupConnector[parser->m_prologState.level]
5467           && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5468               != XML_CTYPE_MIXED)) {
5469         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5470             = XML_CTYPE_CHOICE;
5471         if (parser->m_elementDeclHandler)
5472           handleDefault = XML_FALSE;
5473       }
5474       parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5475       break;
5476     case XML_ROLE_PARAM_ENTITY_REF:
5477 #ifdef XML_DTD
5478     case XML_ROLE_INNER_PARAM_ENTITY_REF:
5479       dtd->hasParamEntityRefs = XML_TRUE;
5480       if (! parser->m_paramEntityParsing)
5481         dtd->keepProcessing = dtd->standalone;
5482       else {
5483         const XML_Char *name;
5484         ENTITY *entity;
5485         name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5486                                next - enc->minBytesPerChar);
5487         if (! name)
5488           return XML_ERROR_NO_MEMORY;
5489         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5490         poolDiscard(&dtd->pool);
5491         /* first, determine if a check for an existing declaration is needed;
5492            if yes, check that the entity exists, and that it is internal,
5493            otherwise call the skipped entity handler
5494         */
5495         if (parser->m_prologState.documentEntity
5496             && (dtd->standalone ? ! parser->m_openInternalEntities
5497                                 : ! dtd->hasParamEntityRefs)) {
5498           if (! entity)
5499             return XML_ERROR_UNDEFINED_ENTITY;
5500           else if (! entity->is_internal) {
5501             /* It's hard to exhaustively search the code to be sure,
5502              * but there doesn't seem to be a way of executing the
5503              * following line.  There are two cases:
5504              *
5505              * If 'standalone' is false, the DTD must have no
5506              * parameter entities or we wouldn't have passed the outer
5507              * 'if' statement.  That means the only entity in the hash
5508              * table is the external subset name "#" which cannot be
5509              * given as a parameter entity name in XML syntax, so the
5510              * lookup must have returned NULL and we don't even reach
5511              * the test for an internal entity.
5512              *
5513              * If 'standalone' is true, it does not seem to be
5514              * possible to create entities taking this code path that
5515              * are not internal entities, so fail the test above.
5516              *
5517              * Because this analysis is very uncertain, the code is
5518              * being left in place and merely removed from the
5519              * coverage test statistics.
5520              */
5521             return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5522           }
5523         } else if (! entity) {
5524           dtd->keepProcessing = dtd->standalone;
5525           /* cannot report skipped entities in declarations */
5526           if ((role == XML_ROLE_PARAM_ENTITY_REF)
5527               && parser->m_skippedEntityHandler) {
5528             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5529             handleDefault = XML_FALSE;
5530           }
5531           break;
5532         }
5533         if (entity->open)
5534           return XML_ERROR_RECURSIVE_ENTITY_REF;
5535         if (entity->textPtr) {
5536           enum XML_Error result;
5537           XML_Bool betweenDecl
5538               = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5539           result = processInternalEntity(parser, entity, betweenDecl);
5540           if (result != XML_ERROR_NONE)
5541             return result;
5542           handleDefault = XML_FALSE;
5543           break;
5544         }
5545         if (parser->m_externalEntityRefHandler) {
5546           dtd->paramEntityRead = XML_FALSE;
5547           entity->open = XML_TRUE;
5548           entityTrackingOnOpen(parser, entity, __LINE__);
5549           if (! parser->m_externalEntityRefHandler(
5550                   parser->m_externalEntityRefHandlerArg, 0, entity->base,
5551                   entity->systemId, entity->publicId)) {
5552             entityTrackingOnClose(parser, entity, __LINE__);
5553             entity->open = XML_FALSE;
5554             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5555           }
5556           entityTrackingOnClose(parser, entity, __LINE__);
5557           entity->open = XML_FALSE;
5558           handleDefault = XML_FALSE;
5559           if (! dtd->paramEntityRead) {
5560             dtd->keepProcessing = dtd->standalone;
5561             break;
5562           }
5563         } else {
5564           dtd->keepProcessing = dtd->standalone;
5565           break;
5566         }
5567       }
5568 #endif /* XML_DTD */
5569       if (! dtd->standalone && parser->m_notStandaloneHandler
5570           && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5571         return XML_ERROR_NOT_STANDALONE;
5572       break;
5573 
5574       /* Element declaration stuff */
5575 
5576     case XML_ROLE_ELEMENT_NAME:
5577       if (parser->m_elementDeclHandler) {
5578         parser->m_declElementType = getElementType(parser, enc, s, next);
5579         if (! parser->m_declElementType)
5580           return XML_ERROR_NO_MEMORY;
5581         dtd->scaffLevel = 0;
5582         dtd->scaffCount = 0;
5583         dtd->in_eldecl = XML_TRUE;
5584         handleDefault = XML_FALSE;
5585       }
5586       break;
5587 
5588     case XML_ROLE_CONTENT_ANY:
5589     case XML_ROLE_CONTENT_EMPTY:
5590       if (dtd->in_eldecl) {
5591         if (parser->m_elementDeclHandler) {
5592           XML_Content *content
5593               = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5594           if (! content)
5595             return XML_ERROR_NO_MEMORY;
5596           content->quant = XML_CQUANT_NONE;
5597           content->name = NULL;
5598           content->numchildren = 0;
5599           content->children = NULL;
5600           content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5601                                                           : XML_CTYPE_EMPTY);
5602           *eventEndPP = s;
5603           parser->m_elementDeclHandler(
5604               parser->m_handlerArg, parser->m_declElementType->name, content);
5605           handleDefault = XML_FALSE;
5606         }
5607         dtd->in_eldecl = XML_FALSE;
5608       }
5609       break;
5610 
5611     case XML_ROLE_CONTENT_PCDATA:
5612       if (dtd->in_eldecl) {
5613         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5614             = XML_CTYPE_MIXED;
5615         if (parser->m_elementDeclHandler)
5616           handleDefault = XML_FALSE;
5617       }
5618       break;
5619 
5620     case XML_ROLE_CONTENT_ELEMENT:
5621       quant = XML_CQUANT_NONE;
5622       goto elementContent;
5623     case XML_ROLE_CONTENT_ELEMENT_OPT:
5624       quant = XML_CQUANT_OPT;
5625       goto elementContent;
5626     case XML_ROLE_CONTENT_ELEMENT_REP:
5627       quant = XML_CQUANT_REP;
5628       goto elementContent;
5629     case XML_ROLE_CONTENT_ELEMENT_PLUS:
5630       quant = XML_CQUANT_PLUS;
5631     elementContent:
5632       if (dtd->in_eldecl) {
5633         ELEMENT_TYPE *el;
5634         const XML_Char *name;
5635         size_t nameLen;
5636         const char *nxt
5637             = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5638         int myindex = nextScaffoldPart(parser);
5639         if (myindex < 0)
5640           return XML_ERROR_NO_MEMORY;
5641         dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5642         dtd->scaffold[myindex].quant = quant;
5643         el = getElementType(parser, enc, s, nxt);
5644         if (! el)
5645           return XML_ERROR_NO_MEMORY;
5646         name = el->name;
5647         dtd->scaffold[myindex].name = name;
5648         nameLen = 0;
5649         for (; name[nameLen++];)
5650           ;
5651 
5652         /* Detect and prevent integer overflow */
5653         if (nameLen > UINT_MAX - dtd->contentStringLen) {
5654           return XML_ERROR_NO_MEMORY;
5655         }
5656 
5657         dtd->contentStringLen += (unsigned)nameLen;
5658         if (parser->m_elementDeclHandler)
5659           handleDefault = XML_FALSE;
5660       }
5661       break;
5662 
5663     case XML_ROLE_GROUP_CLOSE:
5664       quant = XML_CQUANT_NONE;
5665       goto closeGroup;
5666     case XML_ROLE_GROUP_CLOSE_OPT:
5667       quant = XML_CQUANT_OPT;
5668       goto closeGroup;
5669     case XML_ROLE_GROUP_CLOSE_REP:
5670       quant = XML_CQUANT_REP;
5671       goto closeGroup;
5672     case XML_ROLE_GROUP_CLOSE_PLUS:
5673       quant = XML_CQUANT_PLUS;
5674     closeGroup:
5675       if (dtd->in_eldecl) {
5676         if (parser->m_elementDeclHandler)
5677           handleDefault = XML_FALSE;
5678         dtd->scaffLevel--;
5679         dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5680         if (dtd->scaffLevel == 0) {
5681           if (! handleDefault) {
5682             XML_Content *model = build_model(parser);
5683             if (! model)
5684               return XML_ERROR_NO_MEMORY;
5685             *eventEndPP = s;
5686             parser->m_elementDeclHandler(
5687                 parser->m_handlerArg, parser->m_declElementType->name, model);
5688           }
5689           dtd->in_eldecl = XML_FALSE;
5690           dtd->contentStringLen = 0;
5691         }
5692       }
5693       break;
5694       /* End element declaration stuff */
5695 
5696     case XML_ROLE_PI:
5697       if (! reportProcessingInstruction(parser, enc, s, next))
5698         return XML_ERROR_NO_MEMORY;
5699       handleDefault = XML_FALSE;
5700       break;
5701     case XML_ROLE_COMMENT:
5702       if (! reportComment(parser, enc, s, next))
5703         return XML_ERROR_NO_MEMORY;
5704       handleDefault = XML_FALSE;
5705       break;
5706     case XML_ROLE_NONE:
5707       switch (tok) {
5708       case XML_TOK_BOM:
5709         handleDefault = XML_FALSE;
5710         break;
5711       }
5712       break;
5713     case XML_ROLE_DOCTYPE_NONE:
5714       if (parser->m_startDoctypeDeclHandler)
5715         handleDefault = XML_FALSE;
5716       break;
5717     case XML_ROLE_ENTITY_NONE:
5718       if (dtd->keepProcessing && parser->m_entityDeclHandler)
5719         handleDefault = XML_FALSE;
5720       break;
5721     case XML_ROLE_NOTATION_NONE:
5722       if (parser->m_notationDeclHandler)
5723         handleDefault = XML_FALSE;
5724       break;
5725     case XML_ROLE_ATTLIST_NONE:
5726       if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5727         handleDefault = XML_FALSE;
5728       break;
5729     case XML_ROLE_ELEMENT_NONE:
5730       if (parser->m_elementDeclHandler)
5731         handleDefault = XML_FALSE;
5732       break;
5733     } /* end of big switch */
5734 
5735     if (handleDefault && parser->m_defaultHandler)
5736       reportDefault(parser, enc, s, next);
5737 
5738     switch (parser->m_parsingStatus.parsing) {
5739     case XML_SUSPENDED:
5740       *nextPtr = next;
5741       return XML_ERROR_NONE;
5742     case XML_FINISHED:
5743       return XML_ERROR_ABORTED;
5744     default:
5745       s = next;
5746       tok = XmlPrologTok(enc, s, end, &next);
5747     }
5748   }
5749   /* not reached */
5750 }
5751 
5752 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5753 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5754                 const char **nextPtr) {
5755   parser->m_processor = epilogProcessor;
5756   parser->m_eventPtr = s;
5757   for (;;) {
5758     const char *next = NULL;
5759     int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5760 #if XML_GE == 1
5761     if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5762                                   XML_ACCOUNT_DIRECT)) {
5763       accountingOnAbort(parser);
5764       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5765     }
5766 #endif
5767     parser->m_eventEndPtr = next;
5768     switch (tok) {
5769     /* report partial linebreak - it might be the last token */
5770     case -XML_TOK_PROLOG_S:
5771       if (parser->m_defaultHandler) {
5772         reportDefault(parser, parser->m_encoding, s, next);
5773         if (parser->m_parsingStatus.parsing == XML_FINISHED)
5774           return XML_ERROR_ABORTED;
5775       }
5776       *nextPtr = next;
5777       return XML_ERROR_NONE;
5778     case XML_TOK_NONE:
5779       *nextPtr = s;
5780       return XML_ERROR_NONE;
5781     case XML_TOK_PROLOG_S:
5782       if (parser->m_defaultHandler)
5783         reportDefault(parser, parser->m_encoding, s, next);
5784       break;
5785     case XML_TOK_PI:
5786       if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5787         return XML_ERROR_NO_MEMORY;
5788       break;
5789     case XML_TOK_COMMENT:
5790       if (! reportComment(parser, parser->m_encoding, s, next))
5791         return XML_ERROR_NO_MEMORY;
5792       break;
5793     case XML_TOK_INVALID:
5794       parser->m_eventPtr = next;
5795       return XML_ERROR_INVALID_TOKEN;
5796     case XML_TOK_PARTIAL:
5797       if (! parser->m_parsingStatus.finalBuffer) {
5798         *nextPtr = s;
5799         return XML_ERROR_NONE;
5800       }
5801       return XML_ERROR_UNCLOSED_TOKEN;
5802     case XML_TOK_PARTIAL_CHAR:
5803       if (! parser->m_parsingStatus.finalBuffer) {
5804         *nextPtr = s;
5805         return XML_ERROR_NONE;
5806       }
5807       return XML_ERROR_PARTIAL_CHAR;
5808     default:
5809       return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5810     }
5811     parser->m_eventPtr = s = next;
5812     switch (parser->m_parsingStatus.parsing) {
5813     case XML_SUSPENDED:
5814       *nextPtr = next;
5815       return XML_ERROR_NONE;
5816     case XML_FINISHED:
5817       return XML_ERROR_ABORTED;
5818     default:;
5819     }
5820   }
5821 }
5822 
5823 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5824 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5825   const char *textStart, *textEnd;
5826   const char *next;
5827   enum XML_Error result;
5828   OPEN_INTERNAL_ENTITY *openEntity;
5829 
5830   if (parser->m_freeInternalEntities) {
5831     openEntity = parser->m_freeInternalEntities;
5832     parser->m_freeInternalEntities = openEntity->next;
5833   } else {
5834     openEntity
5835         = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5836     if (! openEntity)
5837       return XML_ERROR_NO_MEMORY;
5838   }
5839   entity->open = XML_TRUE;
5840 #if XML_GE == 1
5841   entityTrackingOnOpen(parser, entity, __LINE__);
5842 #endif
5843   entity->processed = 0;
5844   openEntity->next = parser->m_openInternalEntities;
5845   parser->m_openInternalEntities = openEntity;
5846   openEntity->entity = entity;
5847   openEntity->startTagLevel = parser->m_tagLevel;
5848   openEntity->betweenDecl = betweenDecl;
5849   openEntity->internalEventPtr = NULL;
5850   openEntity->internalEventEndPtr = NULL;
5851   textStart = (const char *)entity->textPtr;
5852   textEnd = (const char *)(entity->textPtr + entity->textLen);
5853   /* Set a safe default value in case 'next' does not get set */
5854   next = textStart;
5855 
5856   if (entity->is_param) {
5857     int tok
5858         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5859     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5860                       tok, next, &next, XML_FALSE, XML_FALSE,
5861                       XML_ACCOUNT_ENTITY_EXPANSION);
5862   } else {
5863     result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5864                        textStart, textEnd, &next, XML_FALSE,
5865                        XML_ACCOUNT_ENTITY_EXPANSION);
5866   }
5867 
5868   if (result == XML_ERROR_NONE) {
5869     if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5870       entity->processed = (int)(next - textStart);
5871       parser->m_processor = internalEntityProcessor;
5872     } else if (parser->m_openInternalEntities->entity == entity) {
5873 #if XML_GE == 1
5874       entityTrackingOnClose(parser, entity, __LINE__);
5875 #endif /* XML_GE == 1 */
5876       entity->open = XML_FALSE;
5877       parser->m_openInternalEntities = openEntity->next;
5878       /* put openEntity back in list of free instances */
5879       openEntity->next = parser->m_freeInternalEntities;
5880       parser->m_freeInternalEntities = openEntity;
5881     }
5882   }
5883   return result;
5884 }
5885 
5886 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5887 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5888                         const char **nextPtr) {
5889   ENTITY *entity;
5890   const char *textStart, *textEnd;
5891   const char *next;
5892   enum XML_Error result;
5893   OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5894   if (! openEntity)
5895     return XML_ERROR_UNEXPECTED_STATE;
5896 
5897   entity = openEntity->entity;
5898   textStart = ((const char *)entity->textPtr) + entity->processed;
5899   textEnd = (const char *)(entity->textPtr + entity->textLen);
5900   /* Set a safe default value in case 'next' does not get set */
5901   next = textStart;
5902 
5903   if (entity->is_param) {
5904     int tok
5905         = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5906     result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5907                       tok, next, &next, XML_FALSE, XML_TRUE,
5908                       XML_ACCOUNT_ENTITY_EXPANSION);
5909   } else {
5910     result = doContent(parser, openEntity->startTagLevel,
5911                        parser->m_internalEncoding, textStart, textEnd, &next,
5912                        XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5913   }
5914 
5915   if (result != XML_ERROR_NONE)
5916     return result;
5917 
5918   if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5919     entity->processed = (int)(next - (const char *)entity->textPtr);
5920     return result;
5921   }
5922 
5923 #if XML_GE == 1
5924   entityTrackingOnClose(parser, entity, __LINE__);
5925 #endif
5926   entity->open = XML_FALSE;
5927   parser->m_openInternalEntities = openEntity->next;
5928   /* put openEntity back in list of free instances */
5929   openEntity->next = parser->m_freeInternalEntities;
5930   parser->m_freeInternalEntities = openEntity;
5931 
5932   // If there are more open entities we want to stop right here and have the
5933   // upcoming call to XML_ResumeParser continue with entity content, or it would
5934   // be ignored altogether.
5935   if (parser->m_openInternalEntities != NULL
5936       && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5937     return XML_ERROR_NONE;
5938   }
5939 
5940   if (entity->is_param) {
5941     int tok;
5942     parser->m_processor = prologProcessor;
5943     tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5944     return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5945                     (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5946                     XML_ACCOUNT_DIRECT);
5947   } else {
5948     parser->m_processor = contentProcessor;
5949     /* see externalEntityContentProcessor vs contentProcessor */
5950     result = doContent(parser, parser->m_parentParser ? 1 : 0,
5951                        parser->m_encoding, s, end, nextPtr,
5952                        (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5953                        XML_ACCOUNT_DIRECT);
5954     if (result == XML_ERROR_NONE) {
5955       if (! storeRawNames(parser))
5956         return XML_ERROR_NO_MEMORY;
5957     }
5958     return result;
5959   }
5960 }
5961 
5962 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5963 errorProcessor(XML_Parser parser, const char *s, const char *end,
5964                const char **nextPtr) {
5965   UNUSED_P(s);
5966   UNUSED_P(end);
5967   UNUSED_P(nextPtr);
5968   return parser->m_errorCode;
5969 }
5970 
5971 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5972 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5973                     const char *ptr, const char *end, STRING_POOL *pool,
5974                     enum XML_Account account) {
5975   enum XML_Error result
5976       = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5977   if (result)
5978     return result;
5979   if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5980     poolChop(pool);
5981   if (! poolAppendChar(pool, XML_T('\0')))
5982     return XML_ERROR_NO_MEMORY;
5983   return XML_ERROR_NONE;
5984 }
5985 
5986 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5987 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5988                      const char *ptr, const char *end, STRING_POOL *pool,
5989                      enum XML_Account account) {
5990   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5991 #ifndef XML_DTD
5992   UNUSED_P(account);
5993 #endif
5994 
5995   for (;;) {
5996     const char *next
5997         = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5998     int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5999 #if XML_GE == 1
6000     if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6001       accountingOnAbort(parser);
6002       return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6003     }
6004 #endif
6005     switch (tok) {
6006     case XML_TOK_NONE:
6007       return XML_ERROR_NONE;
6008     case XML_TOK_INVALID:
6009       if (enc == parser->m_encoding)
6010         parser->m_eventPtr = next;
6011       return XML_ERROR_INVALID_TOKEN;
6012     case XML_TOK_PARTIAL:
6013       if (enc == parser->m_encoding)
6014         parser->m_eventPtr = ptr;
6015       return XML_ERROR_INVALID_TOKEN;
6016     case XML_TOK_CHAR_REF: {
6017       XML_Char buf[XML_ENCODE_MAX];
6018       int i;
6019       int n = XmlCharRefNumber(enc, ptr);
6020       if (n < 0) {
6021         if (enc == parser->m_encoding)
6022           parser->m_eventPtr = ptr;
6023         return XML_ERROR_BAD_CHAR_REF;
6024       }
6025       if (! isCdata && n == 0x20 /* space */
6026           && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6027         break;
6028       n = XmlEncode(n, (ICHAR *)buf);
6029       /* The XmlEncode() functions can never return 0 here.  That
6030        * error return happens if the code point passed in is either
6031        * negative or greater than or equal to 0x110000.  The
6032        * XmlCharRefNumber() functions will all return a number
6033        * strictly less than 0x110000 or a negative value if an error
6034        * occurred.  The negative value is intercepted above, so
6035        * XmlEncode() is never passed a value it might return an
6036        * error for.
6037        */
6038       for (i = 0; i < n; i++) {
6039         if (! poolAppendChar(pool, buf[i]))
6040           return XML_ERROR_NO_MEMORY;
6041       }
6042     } break;
6043     case XML_TOK_DATA_CHARS:
6044       if (! poolAppend(pool, enc, ptr, next))
6045         return XML_ERROR_NO_MEMORY;
6046       break;
6047     case XML_TOK_TRAILING_CR:
6048       next = ptr + enc->minBytesPerChar;
6049       /* fall through */
6050     case XML_TOK_ATTRIBUTE_VALUE_S:
6051     case XML_TOK_DATA_NEWLINE:
6052       if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6053         break;
6054       if (! poolAppendChar(pool, 0x20))
6055         return XML_ERROR_NO_MEMORY;
6056       break;
6057     case XML_TOK_ENTITY_REF: {
6058       const XML_Char *name;
6059       ENTITY *entity;
6060       char checkEntityDecl;
6061       XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6062           enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6063       if (ch) {
6064 #if XML_GE == 1
6065         /* NOTE: We are replacing 4-6 characters original input for 1 character
6066          *       so there is no amplification and hence recording without
6067          *       protection. */
6068         accountingDiffTolerated(parser, tok, (char *)&ch,
6069                                 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6070                                 XML_ACCOUNT_ENTITY_EXPANSION);
6071 #endif /* XML_GE == 1 */
6072         if (! poolAppendChar(pool, ch))
6073           return XML_ERROR_NO_MEMORY;
6074         break;
6075       }
6076       name = poolStoreString(&parser->m_temp2Pool, enc,
6077                              ptr + enc->minBytesPerChar,
6078                              next - enc->minBytesPerChar);
6079       if (! name)
6080         return XML_ERROR_NO_MEMORY;
6081       entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6082       poolDiscard(&parser->m_temp2Pool);
6083       /* First, determine if a check for an existing declaration is needed;
6084          if yes, check that the entity exists, and that it is internal.
6085       */
6086       if (pool == &dtd->pool) /* are we called from prolog? */
6087         checkEntityDecl =
6088 #ifdef XML_DTD
6089             parser->m_prologState.documentEntity &&
6090 #endif /* XML_DTD */
6091             (dtd->standalone ? ! parser->m_openInternalEntities
6092                              : ! dtd->hasParamEntityRefs);
6093       else /* if (pool == &parser->m_tempPool): we are called from content */
6094         checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6095       if (checkEntityDecl) {
6096         if (! entity)
6097           return XML_ERROR_UNDEFINED_ENTITY;
6098         else if (! entity->is_internal)
6099           return XML_ERROR_ENTITY_DECLARED_IN_PE;
6100       } else if (! entity) {
6101         /* Cannot report skipped entity here - see comments on
6102            parser->m_skippedEntityHandler.
6103         if (parser->m_skippedEntityHandler)
6104           parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6105         */
6106         /* Cannot call the default handler because this would be
6107            out of sync with the call to the startElementHandler.
6108         if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6109           reportDefault(parser, enc, ptr, next);
6110         */
6111         break;
6112       }
6113       if (entity->open) {
6114         if (enc == parser->m_encoding) {
6115           /* It does not appear that this line can be executed.
6116            *
6117            * The "if (entity->open)" check catches recursive entity
6118            * definitions.  In order to be called with an open
6119            * entity, it must have gone through this code before and
6120            * been through the recursive call to
6121            * appendAttributeValue() some lines below.  That call
6122            * sets the local encoding ("enc") to the parser's
6123            * internal encoding (internal_utf8 or internal_utf16),
6124            * which can never be the same as the principle encoding.
6125            * It doesn't appear there is another code path that gets
6126            * here with entity->open being TRUE.
6127            *
6128            * Since it is not certain that this logic is watertight,
6129            * we keep the line and merely exclude it from coverage
6130            * tests.
6131            */
6132           parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6133         }
6134         return XML_ERROR_RECURSIVE_ENTITY_REF;
6135       }
6136       if (entity->notation) {
6137         if (enc == parser->m_encoding)
6138           parser->m_eventPtr = ptr;
6139         return XML_ERROR_BINARY_ENTITY_REF;
6140       }
6141       if (! entity->textPtr) {
6142         if (enc == parser->m_encoding)
6143           parser->m_eventPtr = ptr;
6144         return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6145       } else {
6146         enum XML_Error result;
6147         const XML_Char *textEnd = entity->textPtr + entity->textLen;
6148         entity->open = XML_TRUE;
6149 #if XML_GE == 1
6150         entityTrackingOnOpen(parser, entity, __LINE__);
6151 #endif
6152         result = appendAttributeValue(parser, parser->m_internalEncoding,
6153                                       isCdata, (const char *)entity->textPtr,
6154                                       (const char *)textEnd, pool,
6155                                       XML_ACCOUNT_ENTITY_EXPANSION);
6156 #if XML_GE == 1
6157         entityTrackingOnClose(parser, entity, __LINE__);
6158 #endif
6159         entity->open = XML_FALSE;
6160         if (result)
6161           return result;
6162       }
6163     } break;
6164     default:
6165       /* The only token returned by XmlAttributeValueTok() that does
6166        * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6167        * Getting that would require an entity name to contain an
6168        * incomplete XML character (e.g. \xE2\x82); however previous
6169        * tokenisers will have already recognised and rejected such
6170        * names before XmlAttributeValueTok() gets a look-in.  This
6171        * default case should be retained as a safety net, but the code
6172        * excluded from coverage tests.
6173        *
6174        * LCOV_EXCL_START
6175        */
6176       if (enc == parser->m_encoding)
6177         parser->m_eventPtr = ptr;
6178       return XML_ERROR_UNEXPECTED_STATE;
6179       /* LCOV_EXCL_STOP */
6180     }
6181     ptr = next;
6182   }
6183   /* not reached */
6184 }
6185 
6186 #if XML_GE == 1
6187 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6188 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6189                  const char *entityTextPtr, const char *entityTextEnd,
6190                  enum XML_Account account) {
6191   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6192   STRING_POOL *pool = &(dtd->entityValuePool);
6193   enum XML_Error result = XML_ERROR_NONE;
6194 #  ifdef XML_DTD
6195   int oldInEntityValue = parser->m_prologState.inEntityValue;
6196   parser->m_prologState.inEntityValue = 1;
6197 #  else
6198   UNUSED_P(account);
6199 #  endif /* XML_DTD */
6200   /* never return Null for the value argument in EntityDeclHandler,
6201      since this would indicate an external entity; therefore we
6202      have to make sure that entityValuePool.start is not null */
6203   if (! pool->blocks) {
6204     if (! poolGrow(pool))
6205       return XML_ERROR_NO_MEMORY;
6206   }
6207 
6208   for (;;) {
6209     const char *next
6210         = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6211     int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6212 
6213     if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6214                                   account)) {
6215       accountingOnAbort(parser);
6216       result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6217       goto endEntityValue;
6218     }
6219 
6220     switch (tok) {
6221     case XML_TOK_PARAM_ENTITY_REF:
6222 #  ifdef XML_DTD
6223       if (parser->m_isParamEntity || enc != parser->m_encoding) {
6224         const XML_Char *name;
6225         ENTITY *entity;
6226         name = poolStoreString(&parser->m_tempPool, enc,
6227                                entityTextPtr + enc->minBytesPerChar,
6228                                next - enc->minBytesPerChar);
6229         if (! name) {
6230           result = XML_ERROR_NO_MEMORY;
6231           goto endEntityValue;
6232         }
6233         entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6234         poolDiscard(&parser->m_tempPool);
6235         if (! entity) {
6236           /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6237           /* cannot report skipped entity here - see comments on
6238              parser->m_skippedEntityHandler
6239           if (parser->m_skippedEntityHandler)
6240             parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6241           */
6242           dtd->keepProcessing = dtd->standalone;
6243           goto endEntityValue;
6244         }
6245         if (entity->open || (entity == parser->m_declEntity)) {
6246           if (enc == parser->m_encoding)
6247             parser->m_eventPtr = entityTextPtr;
6248           result = XML_ERROR_RECURSIVE_ENTITY_REF;
6249           goto endEntityValue;
6250         }
6251         if (entity->systemId) {
6252           if (parser->m_externalEntityRefHandler) {
6253             dtd->paramEntityRead = XML_FALSE;
6254             entity->open = XML_TRUE;
6255             entityTrackingOnOpen(parser, entity, __LINE__);
6256             if (! parser->m_externalEntityRefHandler(
6257                     parser->m_externalEntityRefHandlerArg, 0, entity->base,
6258                     entity->systemId, entity->publicId)) {
6259               entityTrackingOnClose(parser, entity, __LINE__);
6260               entity->open = XML_FALSE;
6261               result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6262               goto endEntityValue;
6263             }
6264             entityTrackingOnClose(parser, entity, __LINE__);
6265             entity->open = XML_FALSE;
6266             if (! dtd->paramEntityRead)
6267               dtd->keepProcessing = dtd->standalone;
6268           } else
6269             dtd->keepProcessing = dtd->standalone;
6270         } else {
6271           entity->open = XML_TRUE;
6272           entityTrackingOnOpen(parser, entity, __LINE__);
6273           result = storeEntityValue(
6274               parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6275               (const char *)(entity->textPtr + entity->textLen),
6276               XML_ACCOUNT_ENTITY_EXPANSION);
6277           entityTrackingOnClose(parser, entity, __LINE__);
6278           entity->open = XML_FALSE;
6279           if (result)
6280             goto endEntityValue;
6281         }
6282         break;
6283       }
6284 #  endif /* XML_DTD */
6285       /* In the internal subset, PE references are not legal
6286          within markup declarations, e.g entity values in this case. */
6287       parser->m_eventPtr = entityTextPtr;
6288       result = XML_ERROR_PARAM_ENTITY_REF;
6289       goto endEntityValue;
6290     case XML_TOK_NONE:
6291       result = XML_ERROR_NONE;
6292       goto endEntityValue;
6293     case XML_TOK_ENTITY_REF:
6294     case XML_TOK_DATA_CHARS:
6295       if (! poolAppend(pool, enc, entityTextPtr, next)) {
6296         result = XML_ERROR_NO_MEMORY;
6297         goto endEntityValue;
6298       }
6299       break;
6300     case XML_TOK_TRAILING_CR:
6301       next = entityTextPtr + enc->minBytesPerChar;
6302       /* fall through */
6303     case XML_TOK_DATA_NEWLINE:
6304       if (pool->end == pool->ptr && ! poolGrow(pool)) {
6305         result = XML_ERROR_NO_MEMORY;
6306         goto endEntityValue;
6307       }
6308       *(pool->ptr)++ = 0xA;
6309       break;
6310     case XML_TOK_CHAR_REF: {
6311       XML_Char buf[XML_ENCODE_MAX];
6312       int i;
6313       int n = XmlCharRefNumber(enc, entityTextPtr);
6314       if (n < 0) {
6315         if (enc == parser->m_encoding)
6316           parser->m_eventPtr = entityTextPtr;
6317         result = XML_ERROR_BAD_CHAR_REF;
6318         goto endEntityValue;
6319       }
6320       n = XmlEncode(n, (ICHAR *)buf);
6321       /* The XmlEncode() functions can never return 0 here.  That
6322        * error return happens if the code point passed in is either
6323        * negative or greater than or equal to 0x110000.  The
6324        * XmlCharRefNumber() functions will all return a number
6325        * strictly less than 0x110000 or a negative value if an error
6326        * occurred.  The negative value is intercepted above, so
6327        * XmlEncode() is never passed a value it might return an
6328        * error for.
6329        */
6330       for (i = 0; i < n; i++) {
6331         if (pool->end == pool->ptr && ! poolGrow(pool)) {
6332           result = XML_ERROR_NO_MEMORY;
6333           goto endEntityValue;
6334         }
6335         *(pool->ptr)++ = buf[i];
6336       }
6337     } break;
6338     case XML_TOK_PARTIAL:
6339       if (enc == parser->m_encoding)
6340         parser->m_eventPtr = entityTextPtr;
6341       result = XML_ERROR_INVALID_TOKEN;
6342       goto endEntityValue;
6343     case XML_TOK_INVALID:
6344       if (enc == parser->m_encoding)
6345         parser->m_eventPtr = next;
6346       result = XML_ERROR_INVALID_TOKEN;
6347       goto endEntityValue;
6348     default:
6349       /* This default case should be unnecessary -- all the tokens
6350        * that XmlEntityValueTok() can return have their own explicit
6351        * cases -- but should be retained for safety.  We do however
6352        * exclude it from the coverage statistics.
6353        *
6354        * LCOV_EXCL_START
6355        */
6356       if (enc == parser->m_encoding)
6357         parser->m_eventPtr = entityTextPtr;
6358       result = XML_ERROR_UNEXPECTED_STATE;
6359       goto endEntityValue;
6360       /* LCOV_EXCL_STOP */
6361     }
6362     entityTextPtr = next;
6363   }
6364 endEntityValue:
6365 #  ifdef XML_DTD
6366   parser->m_prologState.inEntityValue = oldInEntityValue;
6367 #  endif /* XML_DTD */
6368   return result;
6369 }
6370 
6371 #else /* XML_GE == 0 */
6372 
6373 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6374 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6375   // This will store "&amp;entity123;" in entity->textPtr
6376   // to end up as "&entity123;" in the handler.
6377   const char *const entity_start = "&amp;";
6378   const char *const entity_end = ";";
6379 
6380   STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6381   if (! poolAppendString(pool, entity_start)
6382       || ! poolAppendString(pool, entity->name)
6383       || ! poolAppendString(pool, entity_end)) {
6384     poolDiscard(pool);
6385     return XML_ERROR_NO_MEMORY;
6386   }
6387 
6388   entity->textPtr = poolStart(pool);
6389   entity->textLen = (int)(poolLength(pool));
6390   poolFinish(pool);
6391 
6392   return XML_ERROR_NONE;
6393 }
6394 
6395 #endif /* XML_GE == 0 */
6396 
6397 static void FASTCALL
normalizeLines(XML_Char * s)6398 normalizeLines(XML_Char *s) {
6399   XML_Char *p;
6400   for (;; s++) {
6401     if (*s == XML_T('\0'))
6402       return;
6403     if (*s == 0xD)
6404       break;
6405   }
6406   p = s;
6407   do {
6408     if (*s == 0xD) {
6409       *p++ = 0xA;
6410       if (*++s == 0xA)
6411         s++;
6412     } else
6413       *p++ = *s++;
6414   } while (*s);
6415   *p = XML_T('\0');
6416 }
6417 
6418 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6419 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6420                             const char *start, const char *end) {
6421   const XML_Char *target;
6422   XML_Char *data;
6423   const char *tem;
6424   if (! parser->m_processingInstructionHandler) {
6425     if (parser->m_defaultHandler)
6426       reportDefault(parser, enc, start, end);
6427     return 1;
6428   }
6429   start += enc->minBytesPerChar * 2;
6430   tem = start + XmlNameLength(enc, start);
6431   target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6432   if (! target)
6433     return 0;
6434   poolFinish(&parser->m_tempPool);
6435   data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6436                          end - enc->minBytesPerChar * 2);
6437   if (! data)
6438     return 0;
6439   normalizeLines(data);
6440   parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6441   poolClear(&parser->m_tempPool);
6442   return 1;
6443 }
6444 
6445 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6446 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6447               const char *end) {
6448   XML_Char *data;
6449   if (! parser->m_commentHandler) {
6450     if (parser->m_defaultHandler)
6451       reportDefault(parser, enc, start, end);
6452     return 1;
6453   }
6454   data = poolStoreString(&parser->m_tempPool, enc,
6455                          start + enc->minBytesPerChar * 4,
6456                          end - enc->minBytesPerChar * 3);
6457   if (! data)
6458     return 0;
6459   normalizeLines(data);
6460   parser->m_commentHandler(parser->m_handlerArg, data);
6461   poolClear(&parser->m_tempPool);
6462   return 1;
6463 }
6464 
6465 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6466 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6467               const char *end) {
6468   if (MUST_CONVERT(enc, s)) {
6469     enum XML_Convert_Result convert_res;
6470     const char **eventPP;
6471     const char **eventEndPP;
6472     if (enc == parser->m_encoding) {
6473       eventPP = &parser->m_eventPtr;
6474       eventEndPP = &parser->m_eventEndPtr;
6475     } else {
6476       /* To get here, two things must be true; the parser must be
6477        * using a character encoding that is not the same as the
6478        * encoding passed in, and the encoding passed in must need
6479        * conversion to the internal format (UTF-8 unless XML_UNICODE
6480        * is defined).  The only occasions on which the encoding passed
6481        * in is not the same as the parser's encoding are when it is
6482        * the internal encoding (e.g. a previously defined parameter
6483        * entity, already converted to internal format).  This by
6484        * definition doesn't need conversion, so the whole branch never
6485        * gets executed.
6486        *
6487        * For safety's sake we don't delete these lines and merely
6488        * exclude them from coverage statistics.
6489        *
6490        * LCOV_EXCL_START
6491        */
6492       eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6493       eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6494       /* LCOV_EXCL_STOP */
6495     }
6496     do {
6497       ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6498       convert_res
6499           = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6500       *eventEndPP = s;
6501       parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6502                                (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6503       *eventPP = s;
6504     } while ((convert_res != XML_CONVERT_COMPLETED)
6505              && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6506   } else
6507     parser->m_defaultHandler(
6508         parser->m_handlerArg, (const XML_Char *)s,
6509         (int)((const XML_Char *)end - (const XML_Char *)s));
6510 }
6511 
6512 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6513 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6514                 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6515   DEFAULT_ATTRIBUTE *att;
6516   if (value || isId) {
6517     /* The handling of default attributes gets messed up if we have
6518        a default which duplicates a non-default. */
6519     int i;
6520     for (i = 0; i < type->nDefaultAtts; i++)
6521       if (attId == type->defaultAtts[i].id)
6522         return 1;
6523     if (isId && ! type->idAtt && ! attId->xmlns)
6524       type->idAtt = attId;
6525   }
6526   if (type->nDefaultAtts == type->allocDefaultAtts) {
6527     if (type->allocDefaultAtts == 0) {
6528       type->allocDefaultAtts = 8;
6529       type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6530           parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6531       if (! type->defaultAtts) {
6532         type->allocDefaultAtts = 0;
6533         return 0;
6534       }
6535     } else {
6536       DEFAULT_ATTRIBUTE *temp;
6537 
6538       /* Detect and prevent integer overflow */
6539       if (type->allocDefaultAtts > INT_MAX / 2) {
6540         return 0;
6541       }
6542 
6543       int count = type->allocDefaultAtts * 2;
6544 
6545       /* Detect and prevent integer overflow.
6546        * The preprocessor guard addresses the "always false" warning
6547        * from -Wtype-limits on platforms where
6548        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6549 #if UINT_MAX >= SIZE_MAX
6550       if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6551         return 0;
6552       }
6553 #endif
6554 
6555       temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6556                                           (count * sizeof(DEFAULT_ATTRIBUTE)));
6557       if (temp == NULL)
6558         return 0;
6559       type->allocDefaultAtts = count;
6560       type->defaultAtts = temp;
6561     }
6562   }
6563   att = type->defaultAtts + type->nDefaultAtts;
6564   att->id = attId;
6565   att->value = value;
6566   att->isCdata = isCdata;
6567   if (! isCdata)
6568     attId->maybeTokenized = XML_TRUE;
6569   type->nDefaultAtts += 1;
6570   return 1;
6571 }
6572 
6573 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6574 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6575   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6576   const XML_Char *name;
6577   for (name = elementType->name; *name; name++) {
6578     if (*name == XML_T(ASCII_COLON)) {
6579       PREFIX *prefix;
6580       const XML_Char *s;
6581       for (s = elementType->name; s != name; s++) {
6582         if (! poolAppendChar(&dtd->pool, *s))
6583           return 0;
6584       }
6585       if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6586         return 0;
6587       prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6588                                 sizeof(PREFIX));
6589       if (! prefix)
6590         return 0;
6591       if (prefix->name == poolStart(&dtd->pool))
6592         poolFinish(&dtd->pool);
6593       else
6594         poolDiscard(&dtd->pool);
6595       elementType->prefix = prefix;
6596       break;
6597     }
6598   }
6599   return 1;
6600 }
6601 
6602 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6603 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6604                const char *end) {
6605   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6606   ATTRIBUTE_ID *id;
6607   const XML_Char *name;
6608   if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6609     return NULL;
6610   name = poolStoreString(&dtd->pool, enc, start, end);
6611   if (! name)
6612     return NULL;
6613   /* skip quotation mark - its storage will be reused (like in name[-1]) */
6614   ++name;
6615   id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6616                               sizeof(ATTRIBUTE_ID));
6617   if (! id)
6618     return NULL;
6619   if (id->name != name)
6620     poolDiscard(&dtd->pool);
6621   else {
6622     poolFinish(&dtd->pool);
6623     if (! parser->m_ns)
6624       ;
6625     else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6626              && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6627              && name[4] == XML_T(ASCII_s)
6628              && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6629       if (name[5] == XML_T('\0'))
6630         id->prefix = &dtd->defaultPrefix;
6631       else
6632         id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6633                                       sizeof(PREFIX));
6634       id->xmlns = XML_TRUE;
6635     } else {
6636       int i;
6637       for (i = 0; name[i]; i++) {
6638         /* attributes without prefix are *not* in the default namespace */
6639         if (name[i] == XML_T(ASCII_COLON)) {
6640           int j;
6641           for (j = 0; j < i; j++) {
6642             if (! poolAppendChar(&dtd->pool, name[j]))
6643               return NULL;
6644           }
6645           if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6646             return NULL;
6647           id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6648                                         poolStart(&dtd->pool), sizeof(PREFIX));
6649           if (! id->prefix)
6650             return NULL;
6651           if (id->prefix->name == poolStart(&dtd->pool))
6652             poolFinish(&dtd->pool);
6653           else
6654             poolDiscard(&dtd->pool);
6655           break;
6656         }
6657       }
6658     }
6659   }
6660   return id;
6661 }
6662 
6663 #define CONTEXT_SEP XML_T(ASCII_FF)
6664 
6665 static const XML_Char *
getContext(XML_Parser parser)6666 getContext(XML_Parser parser) {
6667   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6668   HASH_TABLE_ITER iter;
6669   XML_Bool needSep = XML_FALSE;
6670 
6671   if (dtd->defaultPrefix.binding) {
6672     int i;
6673     int len;
6674     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6675       return NULL;
6676     len = dtd->defaultPrefix.binding->uriLen;
6677     if (parser->m_namespaceSeparator)
6678       len--;
6679     for (i = 0; i < len; i++) {
6680       if (! poolAppendChar(&parser->m_tempPool,
6681                            dtd->defaultPrefix.binding->uri[i])) {
6682         /* Because of memory caching, I don't believe this line can be
6683          * executed.
6684          *
6685          * This is part of a loop copying the default prefix binding
6686          * URI into the parser's temporary string pool.  Previously,
6687          * that URI was copied into the same string pool, with a
6688          * terminating NUL character, as part of setContext().  When
6689          * the pool was cleared, that leaves a block definitely big
6690          * enough to hold the URI on the free block list of the pool.
6691          * The URI copy in getContext() therefore cannot run out of
6692          * memory.
6693          *
6694          * If the pool is used between the setContext() and
6695          * getContext() calls, the worst it can do is leave a bigger
6696          * block on the front of the free list.  Given that this is
6697          * all somewhat inobvious and program logic can be changed, we
6698          * don't delete the line but we do exclude it from the test
6699          * coverage statistics.
6700          */
6701         return NULL; /* LCOV_EXCL_LINE */
6702       }
6703     }
6704     needSep = XML_TRUE;
6705   }
6706 
6707   hashTableIterInit(&iter, &(dtd->prefixes));
6708   for (;;) {
6709     int i;
6710     int len;
6711     const XML_Char *s;
6712     PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6713     if (! prefix)
6714       break;
6715     if (! prefix->binding) {
6716       /* This test appears to be (justifiable) paranoia.  There does
6717        * not seem to be a way of injecting a prefix without a binding
6718        * that doesn't get errored long before this function is called.
6719        * The test should remain for safety's sake, so we instead
6720        * exclude the following line from the coverage statistics.
6721        */
6722       continue; /* LCOV_EXCL_LINE */
6723     }
6724     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6725       return NULL;
6726     for (s = prefix->name; *s; s++)
6727       if (! poolAppendChar(&parser->m_tempPool, *s))
6728         return NULL;
6729     if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6730       return NULL;
6731     len = prefix->binding->uriLen;
6732     if (parser->m_namespaceSeparator)
6733       len--;
6734     for (i = 0; i < len; i++)
6735       if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6736         return NULL;
6737     needSep = XML_TRUE;
6738   }
6739 
6740   hashTableIterInit(&iter, &(dtd->generalEntities));
6741   for (;;) {
6742     const XML_Char *s;
6743     ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6744     if (! e)
6745       break;
6746     if (! e->open)
6747       continue;
6748     if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6749       return NULL;
6750     for (s = e->name; *s; s++)
6751       if (! poolAppendChar(&parser->m_tempPool, *s))
6752         return 0;
6753     needSep = XML_TRUE;
6754   }
6755 
6756   if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6757     return NULL;
6758   return parser->m_tempPool.start;
6759 }
6760 
6761 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6762 setContext(XML_Parser parser, const XML_Char *context) {
6763   if (context == NULL) {
6764     return XML_FALSE;
6765   }
6766 
6767   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6768   const XML_Char *s = context;
6769 
6770   while (*context != XML_T('\0')) {
6771     if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6772       ENTITY *e;
6773       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6774         return XML_FALSE;
6775       e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6776                            poolStart(&parser->m_tempPool), 0);
6777       if (e)
6778         e->open = XML_TRUE;
6779       if (*s != XML_T('\0'))
6780         s++;
6781       context = s;
6782       poolDiscard(&parser->m_tempPool);
6783     } else if (*s == XML_T(ASCII_EQUALS)) {
6784       PREFIX *prefix;
6785       if (poolLength(&parser->m_tempPool) == 0)
6786         prefix = &dtd->defaultPrefix;
6787       else {
6788         if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6789           return XML_FALSE;
6790         prefix
6791             = (PREFIX *)lookup(parser, &dtd->prefixes,
6792                                poolStart(&parser->m_tempPool), sizeof(PREFIX));
6793         if (! prefix)
6794           return XML_FALSE;
6795         if (prefix->name == poolStart(&parser->m_tempPool)) {
6796           prefix->name = poolCopyString(&dtd->pool, prefix->name);
6797           if (! prefix->name)
6798             return XML_FALSE;
6799         }
6800         poolDiscard(&parser->m_tempPool);
6801       }
6802       for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6803            context++)
6804         if (! poolAppendChar(&parser->m_tempPool, *context))
6805           return XML_FALSE;
6806       if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6807         return XML_FALSE;
6808       if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6809                      &parser->m_inheritedBindings)
6810           != XML_ERROR_NONE)
6811         return XML_FALSE;
6812       poolDiscard(&parser->m_tempPool);
6813       if (*context != XML_T('\0'))
6814         ++context;
6815       s = context;
6816     } else {
6817       if (! poolAppendChar(&parser->m_tempPool, *s))
6818         return XML_FALSE;
6819       s++;
6820     }
6821   }
6822   return XML_TRUE;
6823 }
6824 
6825 static void FASTCALL
normalizePublicId(XML_Char * publicId)6826 normalizePublicId(XML_Char *publicId) {
6827   XML_Char *p = publicId;
6828   XML_Char *s;
6829   for (s = publicId; *s; s++) {
6830     switch (*s) {
6831     case 0x20:
6832     case 0xD:
6833     case 0xA:
6834       if (p != publicId && p[-1] != 0x20)
6835         *p++ = 0x20;
6836       break;
6837     default:
6838       *p++ = *s;
6839     }
6840   }
6841   if (p != publicId && p[-1] == 0x20)
6842     --p;
6843   *p = XML_T('\0');
6844 }
6845 
6846 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6847 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6848   DTD *p = ms->malloc_fcn(sizeof(DTD));
6849   if (p == NULL)
6850     return p;
6851   poolInit(&(p->pool), ms);
6852   poolInit(&(p->entityValuePool), ms);
6853   hashTableInit(&(p->generalEntities), ms);
6854   hashTableInit(&(p->elementTypes), ms);
6855   hashTableInit(&(p->attributeIds), ms);
6856   hashTableInit(&(p->prefixes), ms);
6857 #ifdef XML_DTD
6858   p->paramEntityRead = XML_FALSE;
6859   hashTableInit(&(p->paramEntities), ms);
6860 #endif /* XML_DTD */
6861   p->defaultPrefix.name = NULL;
6862   p->defaultPrefix.binding = NULL;
6863 
6864   p->in_eldecl = XML_FALSE;
6865   p->scaffIndex = NULL;
6866   p->scaffold = NULL;
6867   p->scaffLevel = 0;
6868   p->scaffSize = 0;
6869   p->scaffCount = 0;
6870   p->contentStringLen = 0;
6871 
6872   p->keepProcessing = XML_TRUE;
6873   p->hasParamEntityRefs = XML_FALSE;
6874   p->standalone = XML_FALSE;
6875   return p;
6876 }
6877 
6878 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6879 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6880   HASH_TABLE_ITER iter;
6881   hashTableIterInit(&iter, &(p->elementTypes));
6882   for (;;) {
6883     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6884     if (! e)
6885       break;
6886     if (e->allocDefaultAtts != 0)
6887       ms->free_fcn(e->defaultAtts);
6888   }
6889   hashTableClear(&(p->generalEntities));
6890 #ifdef XML_DTD
6891   p->paramEntityRead = XML_FALSE;
6892   hashTableClear(&(p->paramEntities));
6893 #endif /* XML_DTD */
6894   hashTableClear(&(p->elementTypes));
6895   hashTableClear(&(p->attributeIds));
6896   hashTableClear(&(p->prefixes));
6897   poolClear(&(p->pool));
6898   poolClear(&(p->entityValuePool));
6899   p->defaultPrefix.name = NULL;
6900   p->defaultPrefix.binding = NULL;
6901 
6902   p->in_eldecl = XML_FALSE;
6903 
6904   ms->free_fcn(p->scaffIndex);
6905   p->scaffIndex = NULL;
6906   ms->free_fcn(p->scaffold);
6907   p->scaffold = NULL;
6908 
6909   p->scaffLevel = 0;
6910   p->scaffSize = 0;
6911   p->scaffCount = 0;
6912   p->contentStringLen = 0;
6913 
6914   p->keepProcessing = XML_TRUE;
6915   p->hasParamEntityRefs = XML_FALSE;
6916   p->standalone = XML_FALSE;
6917 }
6918 
6919 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6920 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6921   HASH_TABLE_ITER iter;
6922   hashTableIterInit(&iter, &(p->elementTypes));
6923   for (;;) {
6924     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6925     if (! e)
6926       break;
6927     if (e->allocDefaultAtts != 0)
6928       ms->free_fcn(e->defaultAtts);
6929   }
6930   hashTableDestroy(&(p->generalEntities));
6931 #ifdef XML_DTD
6932   hashTableDestroy(&(p->paramEntities));
6933 #endif /* XML_DTD */
6934   hashTableDestroy(&(p->elementTypes));
6935   hashTableDestroy(&(p->attributeIds));
6936   hashTableDestroy(&(p->prefixes));
6937   poolDestroy(&(p->pool));
6938   poolDestroy(&(p->entityValuePool));
6939   if (isDocEntity) {
6940     ms->free_fcn(p->scaffIndex);
6941     ms->free_fcn(p->scaffold);
6942   }
6943   ms->free_fcn(p);
6944 }
6945 
6946 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6947    The new DTD has already been initialized.
6948 */
6949 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6950 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6951         const XML_Memory_Handling_Suite *ms) {
6952   HASH_TABLE_ITER iter;
6953 
6954   /* Copy the prefix table. */
6955 
6956   hashTableIterInit(&iter, &(oldDtd->prefixes));
6957   for (;;) {
6958     const XML_Char *name;
6959     const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6960     if (! oldP)
6961       break;
6962     name = poolCopyString(&(newDtd->pool), oldP->name);
6963     if (! name)
6964       return 0;
6965     if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6966       return 0;
6967   }
6968 
6969   hashTableIterInit(&iter, &(oldDtd->attributeIds));
6970 
6971   /* Copy the attribute id table. */
6972 
6973   for (;;) {
6974     ATTRIBUTE_ID *newA;
6975     const XML_Char *name;
6976     const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6977 
6978     if (! oldA)
6979       break;
6980     /* Remember to allocate the scratch byte before the name. */
6981     if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6982       return 0;
6983     name = poolCopyString(&(newDtd->pool), oldA->name);
6984     if (! name)
6985       return 0;
6986     ++name;
6987     newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6988                                   sizeof(ATTRIBUTE_ID));
6989     if (! newA)
6990       return 0;
6991     newA->maybeTokenized = oldA->maybeTokenized;
6992     if (oldA->prefix) {
6993       newA->xmlns = oldA->xmlns;
6994       if (oldA->prefix == &oldDtd->defaultPrefix)
6995         newA->prefix = &newDtd->defaultPrefix;
6996       else
6997         newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6998                                         oldA->prefix->name, 0);
6999     }
7000   }
7001 
7002   /* Copy the element type table. */
7003 
7004   hashTableIterInit(&iter, &(oldDtd->elementTypes));
7005 
7006   for (;;) {
7007     int i;
7008     ELEMENT_TYPE *newE;
7009     const XML_Char *name;
7010     const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7011     if (! oldE)
7012       break;
7013     name = poolCopyString(&(newDtd->pool), oldE->name);
7014     if (! name)
7015       return 0;
7016     newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7017                                   sizeof(ELEMENT_TYPE));
7018     if (! newE)
7019       return 0;
7020     if (oldE->nDefaultAtts) {
7021       /* Detect and prevent integer overflow.
7022        * The preprocessor guard addresses the "always false" warning
7023        * from -Wtype-limits on platforms where
7024        * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7025 #if UINT_MAX >= SIZE_MAX
7026       if ((size_t)oldE->nDefaultAtts
7027           > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7028         return 0;
7029       }
7030 #endif
7031       newE->defaultAtts
7032           = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7033       if (! newE->defaultAtts) {
7034         return 0;
7035       }
7036     }
7037     if (oldE->idAtt)
7038       newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7039                                            oldE->idAtt->name, 0);
7040     newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7041     if (oldE->prefix)
7042       newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7043                                       oldE->prefix->name, 0);
7044     for (i = 0; i < newE->nDefaultAtts; i++) {
7045       newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7046           oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7047       newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7048       if (oldE->defaultAtts[i].value) {
7049         newE->defaultAtts[i].value
7050             = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7051         if (! newE->defaultAtts[i].value)
7052           return 0;
7053       } else
7054         newE->defaultAtts[i].value = NULL;
7055     }
7056   }
7057 
7058   /* Copy the entity tables. */
7059   if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7060                         &(oldDtd->generalEntities)))
7061     return 0;
7062 
7063 #ifdef XML_DTD
7064   if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7065                         &(oldDtd->paramEntities)))
7066     return 0;
7067   newDtd->paramEntityRead = oldDtd->paramEntityRead;
7068 #endif /* XML_DTD */
7069 
7070   newDtd->keepProcessing = oldDtd->keepProcessing;
7071   newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7072   newDtd->standalone = oldDtd->standalone;
7073 
7074   /* Don't want deep copying for scaffolding */
7075   newDtd->in_eldecl = oldDtd->in_eldecl;
7076   newDtd->scaffold = oldDtd->scaffold;
7077   newDtd->contentStringLen = oldDtd->contentStringLen;
7078   newDtd->scaffSize = oldDtd->scaffSize;
7079   newDtd->scaffLevel = oldDtd->scaffLevel;
7080   newDtd->scaffIndex = oldDtd->scaffIndex;
7081 
7082   return 1;
7083 } /* End dtdCopy */
7084 
7085 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7086 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7087                 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7088   HASH_TABLE_ITER iter;
7089   const XML_Char *cachedOldBase = NULL;
7090   const XML_Char *cachedNewBase = NULL;
7091 
7092   hashTableIterInit(&iter, oldTable);
7093 
7094   for (;;) {
7095     ENTITY *newE;
7096     const XML_Char *name;
7097     const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7098     if (! oldE)
7099       break;
7100     name = poolCopyString(newPool, oldE->name);
7101     if (! name)
7102       return 0;
7103     newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7104     if (! newE)
7105       return 0;
7106     if (oldE->systemId) {
7107       const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7108       if (! tem)
7109         return 0;
7110       newE->systemId = tem;
7111       if (oldE->base) {
7112         if (oldE->base == cachedOldBase)
7113           newE->base = cachedNewBase;
7114         else {
7115           cachedOldBase = oldE->base;
7116           tem = poolCopyString(newPool, cachedOldBase);
7117           if (! tem)
7118             return 0;
7119           cachedNewBase = newE->base = tem;
7120         }
7121       }
7122       if (oldE->publicId) {
7123         tem = poolCopyString(newPool, oldE->publicId);
7124         if (! tem)
7125           return 0;
7126         newE->publicId = tem;
7127       }
7128     } else {
7129       const XML_Char *tem
7130           = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7131       if (! tem)
7132         return 0;
7133       newE->textPtr = tem;
7134       newE->textLen = oldE->textLen;
7135     }
7136     if (oldE->notation) {
7137       const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7138       if (! tem)
7139         return 0;
7140       newE->notation = tem;
7141     }
7142     newE->is_param = oldE->is_param;
7143     newE->is_internal = oldE->is_internal;
7144   }
7145   return 1;
7146 }
7147 
7148 #define INIT_POWER 6
7149 
7150 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7151 keyeq(KEY s1, KEY s2) {
7152   for (; *s1 == *s2; s1++, s2++)
7153     if (*s1 == 0)
7154       return XML_TRUE;
7155   return XML_FALSE;
7156 }
7157 
7158 static size_t
keylen(KEY s)7159 keylen(KEY s) {
7160   size_t len = 0;
7161   for (; *s; s++, len++)
7162     ;
7163   return len;
7164 }
7165 
7166 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7167 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7168   key->k[0] = 0;
7169   key->k[1] = get_hash_secret_salt(parser);
7170 }
7171 
7172 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7173 hash(XML_Parser parser, KEY s) {
7174   struct siphash state;
7175   struct sipkey key;
7176   (void)sip24_valid;
7177   copy_salt_to_sipkey(parser, &key);
7178   sip24_init(&state, &key);
7179   sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7180   return (unsigned long)sip24_final(&state);
7181 }
7182 
7183 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7184 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7185   size_t i;
7186   if (table->size == 0) {
7187     size_t tsize;
7188     if (! createSize)
7189       return NULL;
7190     table->power = INIT_POWER;
7191     /* table->size is a power of 2 */
7192     table->size = (size_t)1 << INIT_POWER;
7193     tsize = table->size * sizeof(NAMED *);
7194     table->v = table->mem->malloc_fcn(tsize);
7195     if (! table->v) {
7196       table->size = 0;
7197       return NULL;
7198     }
7199     memset(table->v, 0, tsize);
7200     i = hash(parser, name) & ((unsigned long)table->size - 1);
7201   } else {
7202     unsigned long h = hash(parser, name);
7203     unsigned long mask = (unsigned long)table->size - 1;
7204     unsigned char step = 0;
7205     i = h & mask;
7206     while (table->v[i]) {
7207       if (keyeq(name, table->v[i]->name))
7208         return table->v[i];
7209       if (! step)
7210         step = PROBE_STEP(h, mask, table->power);
7211       i < step ? (i += table->size - step) : (i -= step);
7212     }
7213     if (! createSize)
7214       return NULL;
7215 
7216     /* check for overflow (table is half full) */
7217     if (table->used >> (table->power - 1)) {
7218       unsigned char newPower = table->power + 1;
7219 
7220       /* Detect and prevent invalid shift */
7221       if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7222         return NULL;
7223       }
7224 
7225       size_t newSize = (size_t)1 << newPower;
7226       unsigned long newMask = (unsigned long)newSize - 1;
7227 
7228       /* Detect and prevent integer overflow */
7229       if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7230         return NULL;
7231       }
7232 
7233       size_t tsize = newSize * sizeof(NAMED *);
7234       NAMED **newV = table->mem->malloc_fcn(tsize);
7235       if (! newV)
7236         return NULL;
7237       memset(newV, 0, tsize);
7238       for (i = 0; i < table->size; i++)
7239         if (table->v[i]) {
7240           unsigned long newHash = hash(parser, table->v[i]->name);
7241           size_t j = newHash & newMask;
7242           step = 0;
7243           while (newV[j]) {
7244             if (! step)
7245               step = PROBE_STEP(newHash, newMask, newPower);
7246             j < step ? (j += newSize - step) : (j -= step);
7247           }
7248           newV[j] = table->v[i];
7249         }
7250       table->mem->free_fcn(table->v);
7251       table->v = newV;
7252       table->power = newPower;
7253       table->size = newSize;
7254       i = h & newMask;
7255       step = 0;
7256       while (table->v[i]) {
7257         if (! step)
7258           step = PROBE_STEP(h, newMask, newPower);
7259         i < step ? (i += newSize - step) : (i -= step);
7260       }
7261     }
7262   }
7263   table->v[i] = table->mem->malloc_fcn(createSize);
7264   if (! table->v[i])
7265     return NULL;
7266   memset(table->v[i], 0, createSize);
7267   table->v[i]->name = name;
7268   (table->used)++;
7269   return table->v[i];
7270 }
7271 
7272 static void FASTCALL
hashTableClear(HASH_TABLE * table)7273 hashTableClear(HASH_TABLE *table) {
7274   size_t i;
7275   for (i = 0; i < table->size; i++) {
7276     table->mem->free_fcn(table->v[i]);
7277     table->v[i] = NULL;
7278   }
7279   table->used = 0;
7280 }
7281 
7282 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7283 hashTableDestroy(HASH_TABLE *table) {
7284   size_t i;
7285   for (i = 0; i < table->size; i++)
7286     table->mem->free_fcn(table->v[i]);
7287   table->mem->free_fcn(table->v);
7288 }
7289 
7290 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7291 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7292   p->power = 0;
7293   p->size = 0;
7294   p->used = 0;
7295   p->v = NULL;
7296   p->mem = ms;
7297 }
7298 
7299 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7300 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7301   iter->p = table->v;
7302   iter->end = iter->p ? iter->p + table->size : NULL;
7303 }
7304 
7305 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7306 hashTableIterNext(HASH_TABLE_ITER *iter) {
7307   while (iter->p != iter->end) {
7308     NAMED *tem = *(iter->p)++;
7309     if (tem)
7310       return tem;
7311   }
7312   return NULL;
7313 }
7314 
7315 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7316 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7317   pool->blocks = NULL;
7318   pool->freeBlocks = NULL;
7319   pool->start = NULL;
7320   pool->ptr = NULL;
7321   pool->end = NULL;
7322   pool->mem = ms;
7323 }
7324 
7325 static void FASTCALL
poolClear(STRING_POOL * pool)7326 poolClear(STRING_POOL *pool) {
7327   if (! pool->freeBlocks)
7328     pool->freeBlocks = pool->blocks;
7329   else {
7330     BLOCK *p = pool->blocks;
7331     while (p) {
7332       BLOCK *tem = p->next;
7333       p->next = pool->freeBlocks;
7334       pool->freeBlocks = p;
7335       p = tem;
7336     }
7337   }
7338   pool->blocks = NULL;
7339   pool->start = NULL;
7340   pool->ptr = NULL;
7341   pool->end = NULL;
7342 }
7343 
7344 static void FASTCALL
poolDestroy(STRING_POOL * pool)7345 poolDestroy(STRING_POOL *pool) {
7346   BLOCK *p = pool->blocks;
7347   while (p) {
7348     BLOCK *tem = p->next;
7349     pool->mem->free_fcn(p);
7350     p = tem;
7351   }
7352   p = pool->freeBlocks;
7353   while (p) {
7354     BLOCK *tem = p->next;
7355     pool->mem->free_fcn(p);
7356     p = tem;
7357   }
7358 }
7359 
7360 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7361 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7362            const char *end) {
7363   if (! pool->ptr && ! poolGrow(pool))
7364     return NULL;
7365   for (;;) {
7366     const enum XML_Convert_Result convert_res = XmlConvert(
7367         enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7368     if ((convert_res == XML_CONVERT_COMPLETED)
7369         || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7370       break;
7371     if (! poolGrow(pool))
7372       return NULL;
7373   }
7374   return pool->start;
7375 }
7376 
7377 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7378 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7379   do {
7380     if (! poolAppendChar(pool, *s))
7381       return NULL;
7382   } while (*s++);
7383   s = pool->start;
7384   poolFinish(pool);
7385   return s;
7386 }
7387 
7388 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7389 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7390   if (! pool->ptr && ! poolGrow(pool)) {
7391     /* The following line is unreachable given the current usage of
7392      * poolCopyStringN().  Currently it is called from exactly one
7393      * place to copy the text of a simple general entity.  By that
7394      * point, the name of the entity is already stored in the pool, so
7395      * pool->ptr cannot be NULL.
7396      *
7397      * If poolCopyStringN() is used elsewhere as it well might be,
7398      * this line may well become executable again.  Regardless, this
7399      * sort of check shouldn't be removed lightly, so we just exclude
7400      * it from the coverage statistics.
7401      */
7402     return NULL; /* LCOV_EXCL_LINE */
7403   }
7404   for (; n > 0; --n, s++) {
7405     if (! poolAppendChar(pool, *s))
7406       return NULL;
7407   }
7408   s = pool->start;
7409   poolFinish(pool);
7410   return s;
7411 }
7412 
7413 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7414 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7415   while (*s) {
7416     if (! poolAppendChar(pool, *s))
7417       return NULL;
7418     s++;
7419   }
7420   return pool->start;
7421 }
7422 
7423 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7424 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7425                 const char *end) {
7426   if (! poolAppend(pool, enc, ptr, end))
7427     return NULL;
7428   if (pool->ptr == pool->end && ! poolGrow(pool))
7429     return NULL;
7430   *(pool->ptr)++ = 0;
7431   return pool->start;
7432 }
7433 
7434 static size_t
poolBytesToAllocateFor(int blockSize)7435 poolBytesToAllocateFor(int blockSize) {
7436   /* Unprotected math would be:
7437   ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7438   **
7439   ** Detect overflow, avoiding _signed_ overflow undefined behavior
7440   ** For a + b * c we check b * c in isolation first, so that addition of a
7441   ** on top has no chance of making us accept a small non-negative number
7442   */
7443   const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7444 
7445   if (blockSize <= 0)
7446     return 0;
7447 
7448   if (blockSize > (int)(INT_MAX / stretch))
7449     return 0;
7450 
7451   {
7452     const int stretchedBlockSize = blockSize * (int)stretch;
7453     const int bytesToAllocate
7454         = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7455     if (bytesToAllocate < 0)
7456       return 0;
7457 
7458     return (size_t)bytesToAllocate;
7459   }
7460 }
7461 
7462 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7463 poolGrow(STRING_POOL *pool) {
7464   if (pool->freeBlocks) {
7465     if (pool->start == 0) {
7466       pool->blocks = pool->freeBlocks;
7467       pool->freeBlocks = pool->freeBlocks->next;
7468       pool->blocks->next = NULL;
7469       pool->start = pool->blocks->s;
7470       pool->end = pool->start + pool->blocks->size;
7471       pool->ptr = pool->start;
7472       return XML_TRUE;
7473     }
7474     if (pool->end - pool->start < pool->freeBlocks->size) {
7475       BLOCK *tem = pool->freeBlocks->next;
7476       pool->freeBlocks->next = pool->blocks;
7477       pool->blocks = pool->freeBlocks;
7478       pool->freeBlocks = tem;
7479       memcpy(pool->blocks->s, pool->start,
7480              (pool->end - pool->start) * sizeof(XML_Char));
7481       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7482       pool->start = pool->blocks->s;
7483       pool->end = pool->start + pool->blocks->size;
7484       return XML_TRUE;
7485     }
7486   }
7487   if (pool->blocks && pool->start == pool->blocks->s) {
7488     BLOCK *temp;
7489     int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7490     size_t bytesToAllocate;
7491 
7492     /* NOTE: Needs to be calculated prior to calling `realloc`
7493              to avoid dangling pointers: */
7494     const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7495 
7496     if (blockSize < 0) {
7497       /* This condition traps a situation where either more than
7498        * INT_MAX/2 bytes have already been allocated.  This isn't
7499        * readily testable, since it is unlikely that an average
7500        * machine will have that much memory, so we exclude it from the
7501        * coverage statistics.
7502        */
7503       return XML_FALSE; /* LCOV_EXCL_LINE */
7504     }
7505 
7506     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7507     if (bytesToAllocate == 0)
7508       return XML_FALSE;
7509 
7510     temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7511                                            (unsigned)bytesToAllocate);
7512     if (temp == NULL)
7513       return XML_FALSE;
7514     pool->blocks = temp;
7515     pool->blocks->size = blockSize;
7516     pool->ptr = pool->blocks->s + offsetInsideBlock;
7517     pool->start = pool->blocks->s;
7518     pool->end = pool->start + blockSize;
7519   } else {
7520     BLOCK *tem;
7521     int blockSize = (int)(pool->end - pool->start);
7522     size_t bytesToAllocate;
7523 
7524     if (blockSize < 0) {
7525       /* This condition traps a situation where either more than
7526        * INT_MAX bytes have already been allocated (which is prevented
7527        * by various pieces of program logic, not least this one, never
7528        * mind the unlikelihood of actually having that much memory) or
7529        * the pool control fields have been corrupted (which could
7530        * conceivably happen in an extremely buggy user handler
7531        * function).  Either way it isn't readily testable, so we
7532        * exclude it from the coverage statistics.
7533        */
7534       return XML_FALSE; /* LCOV_EXCL_LINE */
7535     }
7536 
7537     if (blockSize < INIT_BLOCK_SIZE)
7538       blockSize = INIT_BLOCK_SIZE;
7539     else {
7540       /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7541       if ((int)((unsigned)blockSize * 2U) < 0) {
7542         return XML_FALSE;
7543       }
7544       blockSize *= 2;
7545     }
7546 
7547     bytesToAllocate = poolBytesToAllocateFor(blockSize);
7548     if (bytesToAllocate == 0)
7549       return XML_FALSE;
7550 
7551     tem = pool->mem->malloc_fcn(bytesToAllocate);
7552     if (! tem)
7553       return XML_FALSE;
7554     tem->size = blockSize;
7555     tem->next = pool->blocks;
7556     pool->blocks = tem;
7557     if (pool->ptr != pool->start)
7558       memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7559     pool->ptr = tem->s + (pool->ptr - pool->start);
7560     pool->start = tem->s;
7561     pool->end = tem->s + blockSize;
7562   }
7563   return XML_TRUE;
7564 }
7565 
7566 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7567 nextScaffoldPart(XML_Parser parser) {
7568   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7569   CONTENT_SCAFFOLD *me;
7570   int next;
7571 
7572   if (! dtd->scaffIndex) {
7573     /* Detect and prevent integer overflow.
7574      * The preprocessor guard addresses the "always false" warning
7575      * from -Wtype-limits on platforms where
7576      * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7577 #if UINT_MAX >= SIZE_MAX
7578     if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
7579       return -1;
7580     }
7581 #endif
7582     dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7583     if (! dtd->scaffIndex)
7584       return -1;
7585     dtd->scaffIndex[0] = 0;
7586   }
7587 
7588   if (dtd->scaffCount >= dtd->scaffSize) {
7589     CONTENT_SCAFFOLD *temp;
7590     if (dtd->scaffold) {
7591       /* Detect and prevent integer overflow */
7592       if (dtd->scaffSize > UINT_MAX / 2u) {
7593         return -1;
7594       }
7595       /* Detect and prevent integer overflow.
7596        * The preprocessor guard addresses the "always false" warning
7597        * from -Wtype-limits on platforms where
7598        * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7599 #if UINT_MAX >= SIZE_MAX
7600       if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7601         return -1;
7602       }
7603 #endif
7604 
7605       temp = (CONTENT_SCAFFOLD *)REALLOC(
7606           parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7607       if (temp == NULL)
7608         return -1;
7609       dtd->scaffSize *= 2;
7610     } else {
7611       temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7612                                                     * sizeof(CONTENT_SCAFFOLD));
7613       if (temp == NULL)
7614         return -1;
7615       dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7616     }
7617     dtd->scaffold = temp;
7618   }
7619   next = dtd->scaffCount++;
7620   me = &dtd->scaffold[next];
7621   if (dtd->scaffLevel) {
7622     CONTENT_SCAFFOLD *parent
7623         = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7624     if (parent->lastchild) {
7625       dtd->scaffold[parent->lastchild].nextsib = next;
7626     }
7627     if (! parent->childcnt)
7628       parent->firstchild = next;
7629     parent->lastchild = next;
7630     parent->childcnt++;
7631   }
7632   me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7633   return next;
7634 }
7635 
7636 static XML_Content *
build_model(XML_Parser parser)7637 build_model(XML_Parser parser) {
7638   /* Function build_model transforms the existing parser->m_dtd->scaffold
7639    * array of CONTENT_SCAFFOLD tree nodes into a new array of
7640    * XML_Content tree nodes followed by a gapless list of zero-terminated
7641    * strings. */
7642   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7643   XML_Content *ret;
7644   XML_Char *str; /* the current string writing location */
7645 
7646   /* Detect and prevent integer overflow.
7647    * The preprocessor guard addresses the "always false" warning
7648    * from -Wtype-limits on platforms where
7649    * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7650 #if UINT_MAX >= SIZE_MAX
7651   if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7652     return NULL;
7653   }
7654   if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7655     return NULL;
7656   }
7657 #endif
7658   if (dtd->scaffCount * sizeof(XML_Content)
7659       > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7660     return NULL;
7661   }
7662 
7663   const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7664                             + (dtd->contentStringLen * sizeof(XML_Char)));
7665 
7666   ret = (XML_Content *)MALLOC(parser, allocsize);
7667   if (! ret)
7668     return NULL;
7669 
7670   /* What follows is an iterative implementation (of what was previously done
7671    * recursively in a dedicated function called "build_node".  The old recursive
7672    * build_node could be forced into stack exhaustion from input as small as a
7673    * few megabyte, and so that was a security issue.  Hence, a function call
7674    * stack is avoided now by resolving recursion.)
7675    *
7676    * The iterative approach works as follows:
7677    *
7678    * - We have two writing pointers, both walking up the result array; one does
7679    *   the work, the other creates "jobs" for its colleague to do, and leads
7680    *   the way:
7681    *
7682    *   - The faster one, pointer jobDest, always leads and writes "what job
7683    *     to do" by the other, once they reach that place in the
7684    *     array: leader "jobDest" stores the source node array index (relative
7685    *     to array dtd->scaffold) in field "numchildren".
7686    *
7687    *   - The slower one, pointer dest, looks at the value stored in the
7688    *     "numchildren" field (which actually holds a source node array index
7689    *     at that time) and puts the real data from dtd->scaffold in.
7690    *
7691    * - Before the loop starts, jobDest writes source array index 0
7692    *   (where the root node is located) so that dest will have something to do
7693    *   when it starts operation.
7694    *
7695    * - Whenever nodes with children are encountered, jobDest appends
7696    *   them as new jobs, in order.  As a result, tree node siblings are
7697    *   adjacent in the resulting array, for example:
7698    *
7699    *     [0] root, has two children
7700    *       [1] first child of 0, has three children
7701    *         [3] first child of 1, does not have children
7702    *         [4] second child of 1, does not have children
7703    *         [5] third child of 1, does not have children
7704    *       [2] second child of 0, does not have children
7705    *
7706    *   Or (the same data) presented in flat array view:
7707    *
7708    *     [0] root, has two children
7709    *
7710    *     [1] first child of 0, has three children
7711    *     [2] second child of 0, does not have children
7712    *
7713    *     [3] first child of 1, does not have children
7714    *     [4] second child of 1, does not have children
7715    *     [5] third child of 1, does not have children
7716    *
7717    * - The algorithm repeats until all target array indices have been processed.
7718    */
7719   XML_Content *dest = ret; /* tree node writing location, moves upwards */
7720   XML_Content *const destLimit = &ret[dtd->scaffCount];
7721   XML_Content *jobDest = ret; /* next free writing location in target array */
7722   str = (XML_Char *)&ret[dtd->scaffCount];
7723 
7724   /* Add the starting job, the root node (index 0) of the source tree  */
7725   (jobDest++)->numchildren = 0;
7726 
7727   for (; dest < destLimit; dest++) {
7728     /* Retrieve source tree array index from job storage */
7729     const int src_node = (int)dest->numchildren;
7730 
7731     /* Convert item */
7732     dest->type = dtd->scaffold[src_node].type;
7733     dest->quant = dtd->scaffold[src_node].quant;
7734     if (dest->type == XML_CTYPE_NAME) {
7735       const XML_Char *src;
7736       dest->name = str;
7737       src = dtd->scaffold[src_node].name;
7738       for (;;) {
7739         *str++ = *src;
7740         if (! *src)
7741           break;
7742         src++;
7743       }
7744       dest->numchildren = 0;
7745       dest->children = NULL;
7746     } else {
7747       unsigned int i;
7748       int cn;
7749       dest->name = NULL;
7750       dest->numchildren = dtd->scaffold[src_node].childcnt;
7751       dest->children = jobDest;
7752 
7753       /* Append scaffold indices of children to array */
7754       for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7755            i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7756         (jobDest++)->numchildren = (unsigned int)cn;
7757     }
7758   }
7759 
7760   return ret;
7761 }
7762 
7763 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7764 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7765                const char *end) {
7766   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7767   const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7768   ELEMENT_TYPE *ret;
7769 
7770   if (! name)
7771     return NULL;
7772   ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7773                                sizeof(ELEMENT_TYPE));
7774   if (! ret)
7775     return NULL;
7776   if (ret->name != name)
7777     poolDiscard(&dtd->pool);
7778   else {
7779     poolFinish(&dtd->pool);
7780     if (! setElementTypePrefix(parser, ret))
7781       return NULL;
7782   }
7783   return ret;
7784 }
7785 
7786 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7787 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7788   size_t charsRequired = 0;
7789   XML_Char *result;
7790 
7791   /* First determine how long the string is */
7792   while (s[charsRequired] != 0) {
7793     charsRequired++;
7794   }
7795   /* Include the terminator */
7796   charsRequired++;
7797 
7798   /* Now allocate space for the copy */
7799   result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7800   if (result == NULL)
7801     return NULL;
7802   /* Copy the original into place */
7803   memcpy(result, s, charsRequired * sizeof(XML_Char));
7804   return result;
7805 }
7806 
7807 #if XML_GE == 1
7808 
7809 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7810 accountingGetCurrentAmplification(XML_Parser rootParser) {
7811   //                                          1.........1.........12 => 22
7812   const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
7813   const XmlBigCount countBytesOutput
7814       = rootParser->m_accounting.countBytesDirect
7815         + rootParser->m_accounting.countBytesIndirect;
7816   const float amplificationFactor
7817       = rootParser->m_accounting.countBytesDirect
7818             ? (countBytesOutput
7819                / (float)(rootParser->m_accounting.countBytesDirect))
7820             : ((lenOfShortestInclude
7821                 + rootParser->m_accounting.countBytesIndirect)
7822                / (float)lenOfShortestInclude);
7823   assert(! rootParser->m_parentParser);
7824   return amplificationFactor;
7825 }
7826 
7827 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7828 accountingReportStats(XML_Parser originParser, const char *epilog) {
7829   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7830   assert(! rootParser->m_parentParser);
7831 
7832   if (rootParser->m_accounting.debugLevel == 0u) {
7833     return;
7834   }
7835 
7836   const float amplificationFactor
7837       = accountingGetCurrentAmplification(rootParser);
7838   fprintf(stderr,
7839           "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7840               "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7841           (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7842           rootParser->m_accounting.countBytesIndirect,
7843           (double)amplificationFactor, epilog);
7844 }
7845 
7846 static void
accountingOnAbort(XML_Parser originParser)7847 accountingOnAbort(XML_Parser originParser) {
7848   accountingReportStats(originParser, " ABORTING\n");
7849 }
7850 
7851 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7852 accountingReportDiff(XML_Parser rootParser,
7853                      unsigned int levelsAwayFromRootParser, const char *before,
7854                      const char *after, ptrdiff_t bytesMore, int source_line,
7855                      enum XML_Account account) {
7856   assert(! rootParser->m_parentParser);
7857 
7858   fprintf(stderr,
7859           " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7860           bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7861           levelsAwayFromRootParser, source_line, 10, "");
7862 
7863   const char ellipis[] = "[..]";
7864   const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7865   const unsigned int contextLength = 10;
7866 
7867   /* Note: Performance is of no concern here */
7868   const char *walker = before;
7869   if ((rootParser->m_accounting.debugLevel >= 3u)
7870       || (after - before)
7871              <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7872     for (; walker < after; walker++) {
7873       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7874     }
7875   } else {
7876     for (; walker < before + contextLength; walker++) {
7877       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7878     }
7879     fprintf(stderr, ellipis);
7880     walker = after - contextLength;
7881     for (; walker < after; walker++) {
7882       fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7883     }
7884   }
7885   fprintf(stderr, "\"\n");
7886 }
7887 
7888 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7889 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7890                         const char *after, int source_line,
7891                         enum XML_Account account) {
7892   /* Note: We need to check the token type *first* to be sure that
7893    *       we can even access variable <after>, safely.
7894    *       E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7895   switch (tok) {
7896   case XML_TOK_INVALID:
7897   case XML_TOK_PARTIAL:
7898   case XML_TOK_PARTIAL_CHAR:
7899   case XML_TOK_NONE:
7900     return XML_TRUE;
7901   }
7902 
7903   if (account == XML_ACCOUNT_NONE)
7904     return XML_TRUE; /* because these bytes have been accounted for, already */
7905 
7906   unsigned int levelsAwayFromRootParser;
7907   const XML_Parser rootParser
7908       = getRootParserOf(originParser, &levelsAwayFromRootParser);
7909   assert(! rootParser->m_parentParser);
7910 
7911   const int isDirect
7912       = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7913   const ptrdiff_t bytesMore = after - before;
7914 
7915   XmlBigCount *const additionTarget
7916       = isDirect ? &rootParser->m_accounting.countBytesDirect
7917                  : &rootParser->m_accounting.countBytesIndirect;
7918 
7919   /* Detect and avoid integer overflow */
7920   if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7921     return XML_FALSE;
7922   *additionTarget += bytesMore;
7923 
7924   const XmlBigCount countBytesOutput
7925       = rootParser->m_accounting.countBytesDirect
7926         + rootParser->m_accounting.countBytesIndirect;
7927   const float amplificationFactor
7928       = accountingGetCurrentAmplification(rootParser);
7929   const XML_Bool tolerated
7930       = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7931         || (amplificationFactor
7932             <= rootParser->m_accounting.maximumAmplificationFactor);
7933 
7934   if (rootParser->m_accounting.debugLevel >= 2u) {
7935     accountingReportStats(rootParser, "");
7936     accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7937                          bytesMore, source_line, account);
7938   }
7939 
7940   return tolerated;
7941 }
7942 
7943 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7944 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7945   if (! parser)
7946     return 0;
7947   return parser->m_accounting.countBytesDirect;
7948 }
7949 
7950 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7951 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7952   if (! parser)
7953     return 0;
7954   return parser->m_accounting.countBytesIndirect;
7955 }
7956 
7957 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7958 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7959                           const char *action, int sourceLine) {
7960   assert(! rootParser->m_parentParser);
7961   if (rootParser->m_entity_stats.debugLevel == 0u)
7962     return;
7963 
7964 #  if defined(XML_UNICODE)
7965   const char *const entityName = "[..]";
7966 #  else
7967   const char *const entityName = entity->name;
7968 #  endif
7969 
7970   fprintf(
7971       stderr,
7972       "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7973       (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7974       rootParser->m_entity_stats.currentDepth,
7975       rootParser->m_entity_stats.maximumDepthSeen,
7976       (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7977       entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7978       sourceLine);
7979 }
7980 
7981 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7982 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7983   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7984   assert(! rootParser->m_parentParser);
7985 
7986   rootParser->m_entity_stats.countEverOpened++;
7987   rootParser->m_entity_stats.currentDepth++;
7988   if (rootParser->m_entity_stats.currentDepth
7989       > rootParser->m_entity_stats.maximumDepthSeen) {
7990     rootParser->m_entity_stats.maximumDepthSeen++;
7991   }
7992 
7993   entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7994 }
7995 
7996 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7997 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7998   const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7999   assert(! rootParser->m_parentParser);
8000 
8001   entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8002   rootParser->m_entity_stats.currentDepth--;
8003 }
8004 
8005 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)8006 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8007   XML_Parser rootParser = parser;
8008   unsigned int stepsTakenUpwards = 0;
8009   while (rootParser->m_parentParser) {
8010     rootParser = rootParser->m_parentParser;
8011     stepsTakenUpwards++;
8012   }
8013   assert(! rootParser->m_parentParser);
8014   if (outLevelDiff != NULL) {
8015     *outLevelDiff = stepsTakenUpwards;
8016   }
8017   return rootParser;
8018 }
8019 
8020 const char *
unsignedCharToPrintable(unsigned char c)8021 unsignedCharToPrintable(unsigned char c) {
8022   switch (c) {
8023   case 0:
8024     return "\\0";
8025   case 1:
8026     return "\\x1";
8027   case 2:
8028     return "\\x2";
8029   case 3:
8030     return "\\x3";
8031   case 4:
8032     return "\\x4";
8033   case 5:
8034     return "\\x5";
8035   case 6:
8036     return "\\x6";
8037   case 7:
8038     return "\\x7";
8039   case 8:
8040     return "\\x8";
8041   case 9:
8042     return "\\t";
8043   case 10:
8044     return "\\n";
8045   case 11:
8046     return "\\xB";
8047   case 12:
8048     return "\\xC";
8049   case 13:
8050     return "\\r";
8051   case 14:
8052     return "\\xE";
8053   case 15:
8054     return "\\xF";
8055   case 16:
8056     return "\\x10";
8057   case 17:
8058     return "\\x11";
8059   case 18:
8060     return "\\x12";
8061   case 19:
8062     return "\\x13";
8063   case 20:
8064     return "\\x14";
8065   case 21:
8066     return "\\x15";
8067   case 22:
8068     return "\\x16";
8069   case 23:
8070     return "\\x17";
8071   case 24:
8072     return "\\x18";
8073   case 25:
8074     return "\\x19";
8075   case 26:
8076     return "\\x1A";
8077   case 27:
8078     return "\\x1B";
8079   case 28:
8080     return "\\x1C";
8081   case 29:
8082     return "\\x1D";
8083   case 30:
8084     return "\\x1E";
8085   case 31:
8086     return "\\x1F";
8087   case 32:
8088     return " ";
8089   case 33:
8090     return "!";
8091   case 34:
8092     return "\\\"";
8093   case 35:
8094     return "#";
8095   case 36:
8096     return "$";
8097   case 37:
8098     return "%";
8099   case 38:
8100     return "&";
8101   case 39:
8102     return "'";
8103   case 40:
8104     return "(";
8105   case 41:
8106     return ")";
8107   case 42:
8108     return "*";
8109   case 43:
8110     return "+";
8111   case 44:
8112     return ",";
8113   case 45:
8114     return "-";
8115   case 46:
8116     return ".";
8117   case 47:
8118     return "/";
8119   case 48:
8120     return "0";
8121   case 49:
8122     return "1";
8123   case 50:
8124     return "2";
8125   case 51:
8126     return "3";
8127   case 52:
8128     return "4";
8129   case 53:
8130     return "5";
8131   case 54:
8132     return "6";
8133   case 55:
8134     return "7";
8135   case 56:
8136     return "8";
8137   case 57:
8138     return "9";
8139   case 58:
8140     return ":";
8141   case 59:
8142     return ";";
8143   case 60:
8144     return "<";
8145   case 61:
8146     return "=";
8147   case 62:
8148     return ">";
8149   case 63:
8150     return "?";
8151   case 64:
8152     return "@";
8153   case 65:
8154     return "A";
8155   case 66:
8156     return "B";
8157   case 67:
8158     return "C";
8159   case 68:
8160     return "D";
8161   case 69:
8162     return "E";
8163   case 70:
8164     return "F";
8165   case 71:
8166     return "G";
8167   case 72:
8168     return "H";
8169   case 73:
8170     return "I";
8171   case 74:
8172     return "J";
8173   case 75:
8174     return "K";
8175   case 76:
8176     return "L";
8177   case 77:
8178     return "M";
8179   case 78:
8180     return "N";
8181   case 79:
8182     return "O";
8183   case 80:
8184     return "P";
8185   case 81:
8186     return "Q";
8187   case 82:
8188     return "R";
8189   case 83:
8190     return "S";
8191   case 84:
8192     return "T";
8193   case 85:
8194     return "U";
8195   case 86:
8196     return "V";
8197   case 87:
8198     return "W";
8199   case 88:
8200     return "X";
8201   case 89:
8202     return "Y";
8203   case 90:
8204     return "Z";
8205   case 91:
8206     return "[";
8207   case 92:
8208     return "\\\\";
8209   case 93:
8210     return "]";
8211   case 94:
8212     return "^";
8213   case 95:
8214     return "_";
8215   case 96:
8216     return "`";
8217   case 97:
8218     return "a";
8219   case 98:
8220     return "b";
8221   case 99:
8222     return "c";
8223   case 100:
8224     return "d";
8225   case 101:
8226     return "e";
8227   case 102:
8228     return "f";
8229   case 103:
8230     return "g";
8231   case 104:
8232     return "h";
8233   case 105:
8234     return "i";
8235   case 106:
8236     return "j";
8237   case 107:
8238     return "k";
8239   case 108:
8240     return "l";
8241   case 109:
8242     return "m";
8243   case 110:
8244     return "n";
8245   case 111:
8246     return "o";
8247   case 112:
8248     return "p";
8249   case 113:
8250     return "q";
8251   case 114:
8252     return "r";
8253   case 115:
8254     return "s";
8255   case 116:
8256     return "t";
8257   case 117:
8258     return "u";
8259   case 118:
8260     return "v";
8261   case 119:
8262     return "w";
8263   case 120:
8264     return "x";
8265   case 121:
8266     return "y";
8267   case 122:
8268     return "z";
8269   case 123:
8270     return "{";
8271   case 124:
8272     return "|";
8273   case 125:
8274     return "}";
8275   case 126:
8276     return "~";
8277   case 127:
8278     return "\\x7F";
8279   case 128:
8280     return "\\x80";
8281   case 129:
8282     return "\\x81";
8283   case 130:
8284     return "\\x82";
8285   case 131:
8286     return "\\x83";
8287   case 132:
8288     return "\\x84";
8289   case 133:
8290     return "\\x85";
8291   case 134:
8292     return "\\x86";
8293   case 135:
8294     return "\\x87";
8295   case 136:
8296     return "\\x88";
8297   case 137:
8298     return "\\x89";
8299   case 138:
8300     return "\\x8A";
8301   case 139:
8302     return "\\x8B";
8303   case 140:
8304     return "\\x8C";
8305   case 141:
8306     return "\\x8D";
8307   case 142:
8308     return "\\x8E";
8309   case 143:
8310     return "\\x8F";
8311   case 144:
8312     return "\\x90";
8313   case 145:
8314     return "\\x91";
8315   case 146:
8316     return "\\x92";
8317   case 147:
8318     return "\\x93";
8319   case 148:
8320     return "\\x94";
8321   case 149:
8322     return "\\x95";
8323   case 150:
8324     return "\\x96";
8325   case 151:
8326     return "\\x97";
8327   case 152:
8328     return "\\x98";
8329   case 153:
8330     return "\\x99";
8331   case 154:
8332     return "\\x9A";
8333   case 155:
8334     return "\\x9B";
8335   case 156:
8336     return "\\x9C";
8337   case 157:
8338     return "\\x9D";
8339   case 158:
8340     return "\\x9E";
8341   case 159:
8342     return "\\x9F";
8343   case 160:
8344     return "\\xA0";
8345   case 161:
8346     return "\\xA1";
8347   case 162:
8348     return "\\xA2";
8349   case 163:
8350     return "\\xA3";
8351   case 164:
8352     return "\\xA4";
8353   case 165:
8354     return "\\xA5";
8355   case 166:
8356     return "\\xA6";
8357   case 167:
8358     return "\\xA7";
8359   case 168:
8360     return "\\xA8";
8361   case 169:
8362     return "\\xA9";
8363   case 170:
8364     return "\\xAA";
8365   case 171:
8366     return "\\xAB";
8367   case 172:
8368     return "\\xAC";
8369   case 173:
8370     return "\\xAD";
8371   case 174:
8372     return "\\xAE";
8373   case 175:
8374     return "\\xAF";
8375   case 176:
8376     return "\\xB0";
8377   case 177:
8378     return "\\xB1";
8379   case 178:
8380     return "\\xB2";
8381   case 179:
8382     return "\\xB3";
8383   case 180:
8384     return "\\xB4";
8385   case 181:
8386     return "\\xB5";
8387   case 182:
8388     return "\\xB6";
8389   case 183:
8390     return "\\xB7";
8391   case 184:
8392     return "\\xB8";
8393   case 185:
8394     return "\\xB9";
8395   case 186:
8396     return "\\xBA";
8397   case 187:
8398     return "\\xBB";
8399   case 188:
8400     return "\\xBC";
8401   case 189:
8402     return "\\xBD";
8403   case 190:
8404     return "\\xBE";
8405   case 191:
8406     return "\\xBF";
8407   case 192:
8408     return "\\xC0";
8409   case 193:
8410     return "\\xC1";
8411   case 194:
8412     return "\\xC2";
8413   case 195:
8414     return "\\xC3";
8415   case 196:
8416     return "\\xC4";
8417   case 197:
8418     return "\\xC5";
8419   case 198:
8420     return "\\xC6";
8421   case 199:
8422     return "\\xC7";
8423   case 200:
8424     return "\\xC8";
8425   case 201:
8426     return "\\xC9";
8427   case 202:
8428     return "\\xCA";
8429   case 203:
8430     return "\\xCB";
8431   case 204:
8432     return "\\xCC";
8433   case 205:
8434     return "\\xCD";
8435   case 206:
8436     return "\\xCE";
8437   case 207:
8438     return "\\xCF";
8439   case 208:
8440     return "\\xD0";
8441   case 209:
8442     return "\\xD1";
8443   case 210:
8444     return "\\xD2";
8445   case 211:
8446     return "\\xD3";
8447   case 212:
8448     return "\\xD4";
8449   case 213:
8450     return "\\xD5";
8451   case 214:
8452     return "\\xD6";
8453   case 215:
8454     return "\\xD7";
8455   case 216:
8456     return "\\xD8";
8457   case 217:
8458     return "\\xD9";
8459   case 218:
8460     return "\\xDA";
8461   case 219:
8462     return "\\xDB";
8463   case 220:
8464     return "\\xDC";
8465   case 221:
8466     return "\\xDD";
8467   case 222:
8468     return "\\xDE";
8469   case 223:
8470     return "\\xDF";
8471   case 224:
8472     return "\\xE0";
8473   case 225:
8474     return "\\xE1";
8475   case 226:
8476     return "\\xE2";
8477   case 227:
8478     return "\\xE3";
8479   case 228:
8480     return "\\xE4";
8481   case 229:
8482     return "\\xE5";
8483   case 230:
8484     return "\\xE6";
8485   case 231:
8486     return "\\xE7";
8487   case 232:
8488     return "\\xE8";
8489   case 233:
8490     return "\\xE9";
8491   case 234:
8492     return "\\xEA";
8493   case 235:
8494     return "\\xEB";
8495   case 236:
8496     return "\\xEC";
8497   case 237:
8498     return "\\xED";
8499   case 238:
8500     return "\\xEE";
8501   case 239:
8502     return "\\xEF";
8503   case 240:
8504     return "\\xF0";
8505   case 241:
8506     return "\\xF1";
8507   case 242:
8508     return "\\xF2";
8509   case 243:
8510     return "\\xF3";
8511   case 244:
8512     return "\\xF4";
8513   case 245:
8514     return "\\xF5";
8515   case 246:
8516     return "\\xF6";
8517   case 247:
8518     return "\\xF7";
8519   case 248:
8520     return "\\xF8";
8521   case 249:
8522     return "\\xF9";
8523   case 250:
8524     return "\\xFA";
8525   case 251:
8526     return "\\xFB";
8527   case 252:
8528     return "\\xFC";
8529   case 253:
8530     return "\\xFD";
8531   case 254:
8532     return "\\xFE";
8533   case 255:
8534     return "\\xFF";
8535   default:
8536     assert(0); /* never gets here */
8537     return "dead code";
8538   }
8539   assert(0); /* never gets here */
8540 }
8541 
8542 #endif /* XML_GE == 1 */
8543 
8544 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8545 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8546   const char *const valueOrNull = getenv(variableName);
8547   if (valueOrNull == NULL) {
8548     return defaultDebugLevel;
8549   }
8550   const char *const value = valueOrNull;
8551 
8552   errno = 0;
8553   char *afterValue = NULL;
8554   unsigned long debugLevel = strtoul(value, &afterValue, 10);
8555   if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8556     errno = 0;
8557     return defaultDebugLevel;
8558   }
8559 
8560   return debugLevel;
8561 }
8562