1 /* a5d18f6a50f536615ac1c70304f87d94f99cc85a86b502188952440610ccf0f8 (2.8.0+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
44 Copyright (c) 2025-2026 Matthew Fernandez <matthew.fernandez@gmail.com>
45 Copyright (c) 2025 Atrem Borovik <polzovatellllk@gmail.com>
46 Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com>
47 Copyright (c) 2026 Rosen Penev <rosenp@gmail.com>
48 Copyright (c) 2026 Francesco Bertolaccini
49 Copyright (c) 2026 Christian Ng <christianrng@berkeley.edu>
50 Licensed under the MIT license:
51
52 Permission is hereby granted, free of charge, to any person obtaining
53 a copy of this software and associated documentation files (the
54 "Software"), to deal in the Software without restriction, including
55 without limitation the rights to use, copy, modify, merge, publish,
56 distribute, sublicense, and/or sell copies of the Software, and to permit
57 persons to whom the Software is furnished to do so, subject to the
58 following conditions:
59
60 The above copyright notice and this permission notice shall be included
61 in all copies or substantial portions of the Software.
62
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
64 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
66 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
67 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
68 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
69 USE OR OTHER DEALINGS IN THE SOFTWARE.
70 */
71
72 #define XML_BUILDING_EXPAT 1
73
74 #include "expat_config.h"
75
76 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
77 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
78 #endif
79
80 #if defined(XML_DTD) && XML_GE == 0
81 # error Either undefine XML_DTD or define XML_GE to 1.
82 #endif
83
84 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
85 || (XML_CONTEXT_BYTES + 0 < 0)
86 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
87 #endif
88
89 #include <stdbool.h>
90 #include <stddef.h>
91 #include <string.h> /* memset(), memcpy() */
92 #include <assert.h>
93 #include <limits.h> /* INT_MAX, UINT_MAX */
94 #include <stdio.h> /* fprintf */
95 #include <stdlib.h> /* getenv */
96 #include <stdint.h> /* SIZE_MAX, uintptr_t */
97 #include <math.h> /* isnan */
98 #include <errno.h>
99
100 #ifdef _WIN32
101 # define getpid GetCurrentProcessId
102 #else
103 # include <sys/time.h> /* gettimeofday() */
104 # include <sys/types.h> /* getpid() */
105 # include <unistd.h> /* getpid() */
106 # include <fcntl.h> /* O_RDONLY */
107 # include <errno.h>
108 #endif
109
110 #ifdef _WIN32
111 # include "winconfig.h"
112 #endif
113
114 #include "ascii.h"
115 #include "expat.h"
116 #include "siphash.h"
117
118 #if defined(HAVE_ARC4RANDOM)
119 # include "random_arc4random.h"
120 #endif /* defined(HAVE_ARC4RANDOM) */
121
122 #if defined(HAVE_ARC4RANDOM_BUF)
123 # include "random_arc4random_buf.h"
124 #endif // defined(HAVE_ARC4RANDOM_BUF)
125
126 #if defined(XML_DEV_URANDOM)
127 # include "random_dev_urandom.h"
128 #endif /* defined(XML_DEV_URANDOM) */
129
130 #if defined(HAVE_GETENTROPY)
131 # include "random_getentropy.h"
132 #endif // defined(HAVE_GETENTROPY)
133
134 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
135 # include "random_getrandom.h"
136 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
137
138 #if defined(_WIN32)
139 # include "random_rand_s.h"
140 #endif /* defined(_WIN32) */
141
142 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
143 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
144 && ! defined(HAVE_GETENTROPY) && ! defined(XML_DEV_URANDOM) \
145 && ! defined(_WIN32) && ! defined(XML_POOR_ENTROPY)
146 # error You do not have support for any sources of high quality entropy \
147 enabled. For end user security, that is probably not what you want. \
148 \
149 Your options include: \
150 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
151 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
152 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
153 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
154 * BSD / macOS >=10.12 / glibc >=2.25 (getentropy): HAVE_GETENTROPY, \
155 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
156 * Windows >=Vista (rand_s): _WIN32. \
157 \
158 If you insist on not using any of these, bypass this error by defining \
159 XML_POOR_ENTROPY; you have been warned. \
160 \
161 If you have reasons to patch this detection code away or need changes \
162 to the build system, please open a bug. Thank you!
163 #endif
164
165 #ifdef XML_UNICODE
166 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
167 # define XmlConvert XmlUtf16Convert
168 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
169 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
170 # define XmlEncode XmlUtf16Encode
171 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
172 typedef unsigned short ICHAR;
173 #else
174 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
175 # define XmlConvert XmlUtf8Convert
176 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
177 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
178 # define XmlEncode XmlUtf8Encode
179 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
180 typedef char ICHAR;
181 #endif
182
183 #ifndef XML_NS
184
185 # define XmlInitEncodingNS XmlInitEncoding
186 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
187 # undef XmlGetInternalEncodingNS
188 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
189 # define XmlParseXmlDeclNS XmlParseXmlDecl
190
191 #endif
192
193 #ifdef XML_UNICODE
194
195 # ifdef XML_UNICODE_WCHAR_T
196 # define XML_T(x) (const wchar_t) x
197 # define XML_L(x) L##x
198 # else
199 # define XML_T(x) (const unsigned short)x
200 # define XML_L(x) x
201 # endif
202
203 #else
204
205 # define XML_T(x) x
206 # define XML_L(x) x
207
208 #endif
209
210 /* Round up n to be a multiple of sz, where sz is a power of 2. */
211 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
212
213 /* Do safe (NULL-aware) pointer arithmetic */
214 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
215
216 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
217
218 #include "internal.h"
219 #include "xmltok.h"
220 #include "xmlrole.h"
221
222 typedef const XML_Char *KEY;
223
224 typedef struct {
225 KEY name;
226 } NAMED;
227
228 typedef struct {
229 NAMED **v;
230 unsigned char power;
231 size_t size;
232 size_t used;
233 XML_Parser parser;
234 } HASH_TABLE;
235
236 static size_t keylen(KEY s);
237
238 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
239
240 /* For probing (after a collision) we need a step size relative prime
241 to the hash table size, which is a power of 2. We use double-hashing,
242 since we can calculate a second hash value cheaply by taking those bits
243 of the first hash value that were discarded (masked out) when the table
244 index was calculated: index = hash & mask, where mask = table->size - 1.
245 We limit the maximum step size to table->size / 4 (mask >> 2) and make
246 it odd, since odd numbers are always relative prime to a power of 2.
247 */
248 #define SECOND_HASH(hash, mask, power) \
249 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
250 #define PROBE_STEP(hash, mask, power) \
251 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
252
253 typedef struct {
254 NAMED **p;
255 NAMED **end;
256 } HASH_TABLE_ITER;
257
258 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
259 #define INIT_DATA_BUF_SIZE 1024
260 #define INIT_ATTS_SIZE 16
261 #define INIT_ATTS_VERSION 0xFFFFFFFF
262 #define INIT_BLOCK_SIZE 1024
263 #define INIT_BUFFER_SIZE 1024
264
265 #define EXPAND_SPARE 24
266
267 typedef struct binding {
268 struct prefix *prefix;
269 struct binding *nextTagBinding;
270 struct binding *prevPrefixBinding;
271 const struct attribute_id *attId;
272 XML_Char *uri;
273 int uriLen;
274 int uriAlloc;
275 } BINDING;
276
277 typedef struct prefix {
278 const XML_Char *name;
279 BINDING *binding;
280 } PREFIX;
281
282 typedef struct {
283 const XML_Char *str;
284 const XML_Char *localPart;
285 const XML_Char *prefix;
286 int strLen;
287 int uriLen;
288 int prefixLen;
289 } TAG_NAME;
290
291 /* TAG represents an open element.
292 The name of the element is stored in both the document and API
293 encodings. The memory buffer 'buf' is a separately-allocated
294 memory area which stores the name. During the XML_Parse()/
295 XML_ParseBuffer() when the element is open, the memory for the 'raw'
296 version of the name (in the document encoding) is shared with the
297 document buffer. If the element is open across calls to
298 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
299 contain the 'raw' name as well.
300
301 A parser reuses these structures, maintaining a list of allocated
302 TAG objects in a free list.
303 */
304 typedef struct tag {
305 struct tag *parent; /* parent of this element */
306 const char *rawName; /* tagName in the original encoding */
307 int rawNameLength;
308 TAG_NAME name; /* tagName in the API encoding */
309 union {
310 char *raw; /* for byte-level access (rawName storage) */
311 XML_Char *str; /* for character-level access (converted name) */
312 } buf; /* buffer for name components */
313 char *bufEnd; /* end of the buffer */
314 BINDING *bindings;
315 } TAG;
316
317 typedef struct {
318 const XML_Char *name;
319 const XML_Char *textPtr;
320 int textLen; /* length in XML_Chars */
321 int processed; /* # of processed bytes - when suspended */
322 const XML_Char *systemId;
323 const XML_Char *base;
324 const XML_Char *publicId;
325 const XML_Char *notation;
326 XML_Bool open;
327 XML_Bool hasMore; /* true if entity has not been completely processed */
328 /* An entity can be open while being already completely processed (hasMore ==
329 XML_FALSE). The reason is the delayed closing of entities until their inner
330 entities are processed and closed */
331 XML_Bool is_param;
332 XML_Bool is_internal; /* true if declared in internal subset outside PE */
333 } ENTITY;
334
335 typedef struct {
336 enum XML_Content_Type type;
337 enum XML_Content_Quant quant;
338 const XML_Char *name;
339 int firstchild;
340 int lastchild;
341 int childcnt;
342 int nextsib;
343 } CONTENT_SCAFFOLD;
344
345 #define INIT_SCAFFOLD_ELEMENTS 32
346
347 typedef struct block {
348 struct block *next;
349 int size;
350 XML_Char s[];
351 } BLOCK;
352
353 typedef struct {
354 BLOCK *blocks;
355 BLOCK *freeBlocks;
356 const XML_Char *end;
357 XML_Char *ptr;
358 XML_Char *start;
359 XML_Parser parser;
360 } STRING_POOL;
361
362 /* The XML_Char before the name is used to determine whether
363 an attribute has been specified. */
364 typedef struct attribute_id {
365 XML_Char *name;
366 PREFIX *prefix;
367 XML_Bool maybeTokenized;
368 XML_Bool xmlns;
369 } ATTRIBUTE_ID;
370
371 typedef struct {
372 const ATTRIBUTE_ID *id;
373 XML_Bool isCdata;
374 const XML_Char *value;
375 } DEFAULT_ATTRIBUTE;
376
377 typedef struct {
378 unsigned long version;
379 unsigned long hash;
380 const XML_Char *uriName;
381 } NS_ATT;
382
383 typedef struct {
384 const XML_Char *name;
385 PREFIX *prefix;
386 const ATTRIBUTE_ID *idAtt;
387 int nDefaultAtts;
388 int allocDefaultAtts;
389 DEFAULT_ATTRIBUTE *defaultAtts;
390 } ELEMENT_TYPE;
391
392 typedef struct {
393 HASH_TABLE generalEntities;
394 HASH_TABLE elementTypes;
395 HASH_TABLE attributeIds;
396 HASH_TABLE prefixes;
397 STRING_POOL pool;
398 STRING_POOL entityValuePool;
399 /* false once a parameter entity reference has been skipped */
400 XML_Bool keepProcessing;
401 /* true once an internal or external PE reference has been encountered;
402 this includes the reference to an external subset */
403 XML_Bool hasParamEntityRefs;
404 XML_Bool standalone;
405 #ifdef XML_DTD
406 /* indicates if external PE has been read */
407 XML_Bool paramEntityRead;
408 HASH_TABLE paramEntities;
409 #endif /* XML_DTD */
410 PREFIX defaultPrefix;
411 /* === scaffolding for building content model === */
412 XML_Bool in_eldecl;
413 CONTENT_SCAFFOLD *scaffold;
414 unsigned contentStringLen;
415 unsigned scaffSize;
416 unsigned scaffCount;
417 int scaffLevel;
418 int *scaffIndex;
419 } DTD;
420
421 enum EntityType {
422 ENTITY_INTERNAL,
423 ENTITY_ATTRIBUTE,
424 ENTITY_VALUE,
425 };
426
427 typedef struct open_internal_entity {
428 const char *internalEventPtr;
429 const char *internalEventEndPtr;
430 struct open_internal_entity *next;
431 ENTITY *entity;
432 int startTagLevel;
433 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
434 enum EntityType type;
435 } OPEN_INTERNAL_ENTITY;
436
437 enum XML_Account {
438 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
439 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
440 expansion */
441 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
442 };
443
444 #if XML_GE == 1
445 typedef unsigned long long XmlBigCount;
446 typedef struct accounting {
447 XmlBigCount countBytesDirect;
448 XmlBigCount countBytesIndirect;
449 unsigned long debugLevel;
450 float maximumAmplificationFactor; // >=1.0
451 unsigned long long activationThresholdBytes;
452 } ACCOUNTING;
453
454 typedef struct MALLOC_TRACKER {
455 XmlBigCount bytesAllocated;
456 XmlBigCount peakBytesAllocated; // updated live only for debug level >=2
457 unsigned long debugLevel;
458 float maximumAmplificationFactor; // >=1.0
459 XmlBigCount activationThresholdBytes;
460 } MALLOC_TRACKER;
461
462 typedef struct entity_stats {
463 unsigned int countEverOpened;
464 unsigned int currentDepth;
465 unsigned int maximumDepthSeen;
466 unsigned long debugLevel;
467 } ENTITY_STATS;
468 #endif /* XML_GE == 1 */
469
470 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
471 const char *end, const char **endPtr);
472
473 static Processor prologProcessor;
474 static Processor prologInitProcessor;
475 static Processor contentProcessor;
476 static Processor cdataSectionProcessor;
477 #ifdef XML_DTD
478 static Processor ignoreSectionProcessor;
479 static Processor externalParEntProcessor;
480 static Processor externalParEntInitProcessor;
481 static Processor entityValueProcessor;
482 static Processor entityValueInitProcessor;
483 #endif /* XML_DTD */
484 static Processor epilogProcessor;
485 static Processor errorProcessor;
486 static Processor externalEntityInitProcessor;
487 static Processor externalEntityInitProcessor2;
488 static Processor externalEntityInitProcessor3;
489 static Processor externalEntityContentProcessor;
490 static Processor internalEntityProcessor;
491
492 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
493 const XML_Char *encodingName);
494 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
495 const char *s, const char *next);
496 static enum XML_Error initializeEncoding(XML_Parser parser);
497 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
498 const char *s, const char *end, int tok,
499 const char *next, const char **nextPtr,
500 XML_Bool haveMore, XML_Bool allowClosingDoctype,
501 enum XML_Account account);
502 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
503 XML_Bool betweenDecl, enum EntityType type);
504 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
505 const ENCODING *enc, const char *start,
506 const char *end, const char **endPtr,
507 XML_Bool haveMore, enum XML_Account account);
508 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
509 const char **startPtr, const char *end,
510 const char **nextPtr, XML_Bool haveMore,
511 enum XML_Account account);
512 #ifdef XML_DTD
513 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
514 const char **startPtr, const char *end,
515 const char **nextPtr, XML_Bool haveMore);
516 #endif /* XML_DTD */
517
518 static void freeBindings(XML_Parser parser, BINDING *bindings);
519 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
520 const char *attStr, TAG_NAME *tagNamePtr,
521 BINDING **bindingsPtr,
522 enum XML_Account account);
523 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
524 const ATTRIBUTE_ID *attId, const XML_Char *uri,
525 BINDING **bindingsPtr);
526 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
527 XML_Bool isCdata, XML_Bool isId,
528 const XML_Char *value, XML_Parser parser);
529 static enum XML_Error storeAttributeValue(XML_Parser parser,
530 const ENCODING *enc, XML_Bool isCdata,
531 const char *ptr, const char *end,
532 STRING_POOL *pool,
533 enum XML_Account account);
534 static enum XML_Error
535 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
536 const char *ptr, const char *end, STRING_POOL *pool,
537 enum XML_Account account, const char **nextPtr);
538 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
539 const char *start, const char *end);
540 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
541 #if XML_GE == 1
542 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
543 const char *start, const char *end,
544 enum XML_Account account,
545 const char **nextPtr);
546 static enum XML_Error callStoreEntityValue(XML_Parser parser,
547 const ENCODING *enc,
548 const char *start, const char *end,
549 enum XML_Account account);
550 #else
551 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
552 #endif
553 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
554 const char *start, const char *end);
555 static int reportComment(XML_Parser parser, const ENCODING *enc,
556 const char *start, const char *end);
557 static void reportDefault(XML_Parser parser, const ENCODING *enc,
558 const char *start, const char *end);
559
560 static const XML_Char *getContext(XML_Parser parser);
561 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
562
563 static void FASTCALL normalizePublicId(XML_Char *s);
564
565 static DTD *dtdCreate(XML_Parser parser);
566 /* do not call if m_parentParser != NULL */
567 static void dtdReset(DTD *p, XML_Parser parser);
568 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser);
569 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
570 XML_Parser parser);
571 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
572 STRING_POOL *newPool, const HASH_TABLE *oldTable);
573 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
574 size_t createSize);
575 static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser);
576 static void FASTCALL hashTableClear(HASH_TABLE *table);
577 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
578 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
579 const HASH_TABLE *table);
580 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
581
582 static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser);
583 static void FASTCALL poolClear(STRING_POOL *pool);
584 static void FASTCALL poolDestroy(STRING_POOL *pool);
585 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
586 const char *ptr, const char *end);
587 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
588 const char *ptr, const char *end);
589 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
590 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
591 const XML_Char *s);
592 static const XML_Char *FASTCALL poolCopyStringNoFinish(STRING_POOL *pool,
593 const XML_Char *s);
594 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
595 int n);
596 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
597 const XML_Char *s);
598
599 static int FASTCALL nextScaffoldPart(XML_Parser parser);
600 static XML_Content *build_model(XML_Parser parser);
601 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
602 const char *ptr, const char *end);
603
604 static XML_Char *copyString(const XML_Char *s, XML_Parser parser);
605
606 static struct sipkey generate_hash_secret_salt(void);
607 static XML_Bool startParsing(XML_Parser parser);
608
609 static XML_Parser parserCreate(const XML_Char *encodingName,
610 const XML_Memory_Handling_Suite *memsuite,
611 const XML_Char *nameSep, DTD *dtd,
612 XML_Parser parentParser);
613
614 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
615
616 #if XML_GE == 1
617 static float accountingGetCurrentAmplification(XML_Parser rootParser);
618 static void accountingReportStats(XML_Parser originParser, const char *epilog);
619 static void accountingOnAbort(XML_Parser originParser);
620 static void accountingReportDiff(XML_Parser rootParser,
621 unsigned int levelsAwayFromRootParser,
622 const char *before, const char *after,
623 ptrdiff_t bytesMore, int source_line,
624 enum XML_Account account);
625 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
626 const char *before, const char *after,
627 int source_line,
628 enum XML_Account account);
629
630 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
631 const char *action, int sourceLine);
632 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
633 int sourceLine);
634 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
635 int sourceLine);
636 #endif /* XML_GE == 1 */
637
638 static XML_Parser getRootParserOf(XML_Parser parser,
639 unsigned int *outLevelDiff);
640
641 static unsigned long getDebugLevel(const char *variableName,
642 unsigned long defaultDebugLevel);
643
644 #define poolStart(pool) ((pool)->start)
645 #define poolLength(pool) ((pool)->ptr - (pool)->start)
646 #define poolChop(pool) ((void)--(pool->ptr))
647 #define poolLastChar(pool) (((pool)->ptr)[-1])
648 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
649 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
650 #define poolAppendChar(pool, c) \
651 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
652 ? 0 \
653 : ((*((pool)->ptr)++ = c), 1))
654
655 #if ! defined(XML_TESTING)
656 const
657 #endif
658 XML_Bool g_reparseDeferralEnabledDefault
659 = XML_TRUE; // write ONLY in runtests.c
660 #if defined(XML_TESTING)
661 unsigned int g_bytesScanned = 0; // used for testing only
662 #endif
663
664 struct XML_ParserStruct {
665 /* The first member must be m_userData so that the XML_GetUserData
666 macro works. */
667 void *m_userData;
668 void *m_handlerArg;
669
670 // How the four parse buffer pointers below relate in time and space:
671 //
672 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
673 // | | | |
674 // <--parsed-->| | |
675 // <---parsing--->| |
676 // <--unoccupied-->|
677 // <---------total-malloced/realloced-------->|
678
679 char *m_buffer; // malloc/realloc base pointer of parse buffer
680 const XML_Memory_Handling_Suite m_mem;
681 const char *m_bufferPtr; // first character to be parsed
682 char *m_bufferEnd; // past last character to be parsed
683 const char *m_bufferLim; // allocated end of m_buffer
684
685 XML_Index m_parseEndByteIndex;
686 const char *m_parseEndPtr;
687 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
688 XML_Bool m_reparseDeferralEnabled;
689 int m_lastBufferRequestSize;
690 XML_Char *m_dataBuf;
691 XML_Char *m_dataBufEnd;
692 XML_StartElementHandler m_startElementHandler;
693 XML_EndElementHandler m_endElementHandler;
694 XML_CharacterDataHandler m_characterDataHandler;
695 XML_ProcessingInstructionHandler m_processingInstructionHandler;
696 XML_CommentHandler m_commentHandler;
697 XML_StartCdataSectionHandler m_startCdataSectionHandler;
698 XML_EndCdataSectionHandler m_endCdataSectionHandler;
699 XML_DefaultHandler m_defaultHandler;
700 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
701 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
702 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
703 XML_NotationDeclHandler m_notationDeclHandler;
704 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
705 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
706 XML_NotStandaloneHandler m_notStandaloneHandler;
707 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
708 XML_Parser m_externalEntityRefHandlerArg;
709 XML_SkippedEntityHandler m_skippedEntityHandler;
710 XML_UnknownEncodingHandler m_unknownEncodingHandler;
711 XML_ElementDeclHandler m_elementDeclHandler;
712 XML_AttlistDeclHandler m_attlistDeclHandler;
713 XML_EntityDeclHandler m_entityDeclHandler;
714 XML_XmlDeclHandler m_xmlDeclHandler;
715 const ENCODING *m_encoding;
716 INIT_ENCODING m_initEncoding;
717 const ENCODING *m_internalEncoding;
718 const XML_Char *m_protocolEncodingName;
719 XML_Bool m_ns;
720 XML_Bool m_ns_triplets;
721 void *m_unknownEncodingMem;
722 void *m_unknownEncodingData;
723 void *m_unknownEncodingHandlerData;
724 void(XMLCALL *m_unknownEncodingRelease)(void *);
725 PROLOG_STATE m_prologState;
726 Processor *m_processor;
727 enum XML_Error m_errorCode;
728 const char *m_eventPtr;
729 const char *m_eventEndPtr;
730 const char *m_positionPtr;
731 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
732 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
733 OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
734 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
735 OPEN_INTERNAL_ENTITY *m_openValueEntities;
736 OPEN_INTERNAL_ENTITY *m_freeValueEntities;
737 XML_Bool m_defaultExpandInternalEntities;
738 int m_tagLevel;
739 ENTITY *m_declEntity;
740 const XML_Char *m_doctypeName;
741 const XML_Char *m_doctypeSysid;
742 const XML_Char *m_doctypePubid;
743 const XML_Char *m_declAttributeType;
744 const XML_Char *m_declNotationName;
745 const XML_Char *m_declNotationPublicId;
746 ELEMENT_TYPE *m_declElementType;
747 ATTRIBUTE_ID *m_declAttributeId;
748 XML_Bool m_declAttributeIsCdata;
749 XML_Bool m_declAttributeIsId;
750 DTD *m_dtd;
751 const XML_Char *m_curBase;
752 TAG *m_tagStack;
753 TAG *m_freeTagList;
754 BINDING *m_inheritedBindings;
755 BINDING *m_freeBindingList;
756 int m_attsSize;
757 int m_nSpecifiedAtts;
758 int m_idAttIndex;
759 ATTRIBUTE *m_atts;
760 NS_ATT *m_nsAtts;
761 unsigned long m_nsAttsVersion;
762 unsigned char m_nsAttsPower;
763 #ifdef XML_ATTR_INFO
764 XML_AttrInfo *m_attInfo;
765 #endif
766 POSITION m_position;
767 STRING_POOL m_tempPool;
768 STRING_POOL m_temp2Pool;
769 char *m_groupConnector;
770 unsigned int m_groupSize;
771 XML_Char m_namespaceSeparator;
772 XML_Parser m_parentParser;
773 XML_ParsingStatus m_parsingStatus;
774 #ifdef XML_DTD
775 XML_Bool m_isParamEntity;
776 XML_Bool m_useForeignDTD;
777 enum XML_ParamEntityParsing m_paramEntityParsing;
778 #endif
779 struct sipkey m_hash_secret_salt_128;
780 XML_Bool m_hash_secret_salt_set;
781 #if XML_GE == 1
782 ACCOUNTING m_accounting;
783 MALLOC_TRACKER m_alloc_tracker;
784 ENTITY_STATS m_entity_stats;
785 #endif
786 XML_Bool m_reenter;
787 };
788
789 #if XML_GE == 1
790 # define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__))
791 # define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__))
792 # define FREE(parser, p) (expat_free((parser), (p), __LINE__))
793 #else
794 # define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
795 # define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
796 # define FREE(parser, p) (parser->m_mem.free_fcn((p)))
797 #endif
798
799 #if XML_GE == 1
800 static void
expat_heap_stat(XML_Parser rootParser,char operator,XmlBigCount absDiff,XmlBigCount newTotal,XmlBigCount peakTotal,int sourceLine)801 expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff,
802 XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) {
803 // NOTE: This can be +infinity or -nan
804 const float amplification
805 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
806 fprintf(
807 stderr,
808 "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL(
809 "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n",
810 (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator,
811 absDiff, newTotal, peakTotal, (double)amplification, sourceLine);
812 }
813
814 static bool
expat_heap_increase_tolerable(XML_Parser rootParser,XmlBigCount increase,int sourceLine)815 expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase,
816 int sourceLine) {
817 assert(rootParser != NULL);
818 assert(increase > 0);
819
820 XmlBigCount newTotal = 0;
821 bool tolerable = true;
822
823 // Detect integer overflow
824 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) {
825 tolerable = false;
826 } else {
827 newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase;
828
829 if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) {
830 assert(newTotal > 0);
831 // NOTE: This can be +infinity when dividing by zero but not -nan
832 const float amplification
833 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
834 if (amplification
835 > rootParser->m_alloc_tracker.maximumAmplificationFactor) {
836 tolerable = false;
837 }
838 }
839 }
840
841 if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) {
842 expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine);
843 }
844
845 return tolerable;
846 }
847
848 # if defined(XML_TESTING)
849 void *
850 # else
851 static void *
852 # endif
expat_malloc(XML_Parser parser,size_t size,int sourceLine)853 expat_malloc(XML_Parser parser, size_t size, int sourceLine) {
854 // Detect integer overflow
855 if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) {
856 return NULL;
857 }
858
859 const XML_Parser rootParser = getRootParserOf(parser, NULL);
860 assert(rootParser->m_parentParser == NULL);
861
862 const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size;
863
864 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
865 < bytesToAllocate) {
866 return NULL; // i.e. signal integer overflow as out-of-memory
867 }
868
869 if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate,
870 sourceLine)) {
871 return NULL; // i.e. signal violation as out-of-memory
872 }
873
874 // Actually allocate
875 void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate);
876
877 if (mallocedPtr == NULL) {
878 return NULL;
879 }
880
881 // Update in-block recorded size
882 *(size_t *)mallocedPtr = size;
883
884 // Update accounting
885 rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate;
886
887 // Report as needed
888 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
889 if (rootParser->m_alloc_tracker.bytesAllocated
890 > rootParser->m_alloc_tracker.peakBytesAllocated) {
891 rootParser->m_alloc_tracker.peakBytesAllocated
892 = rootParser->m_alloc_tracker.bytesAllocated;
893 }
894 expat_heap_stat(rootParser, '+', bytesToAllocate,
895 rootParser->m_alloc_tracker.bytesAllocated,
896 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
897 }
898
899 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
900 }
901
902 # if defined(XML_TESTING)
903 void
904 # else
905 static void
906 # endif
expat_free(XML_Parser parser,void * ptr,int sourceLine)907 expat_free(XML_Parser parser, void *ptr, int sourceLine) {
908 assert(parser != NULL);
909
910 if (ptr == NULL) {
911 return;
912 }
913
914 const XML_Parser rootParser = getRootParserOf(parser, NULL);
915 assert(rootParser->m_parentParser == NULL);
916
917 // Extract size (to the eyes of malloc_fcn/realloc_fcn) and
918 // the original pointer returned by malloc/realloc
919 void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
920 const size_t bytesAllocated
921 = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr;
922
923 // Update accounting
924 assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated);
925 rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated;
926
927 // Report as needed
928 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
929 expat_heap_stat(rootParser, '-', bytesAllocated,
930 rootParser->m_alloc_tracker.bytesAllocated,
931 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
932 }
933
934 // NOTE: This may be freeing rootParser, so freeing has to come last
935 parser->m_mem.free_fcn(mallocedPtr);
936 }
937
938 # if defined(XML_TESTING)
939 void *
940 # else
941 static void *
942 # endif
expat_realloc(XML_Parser parser,void * ptr,size_t size,int sourceLine)943 expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) {
944 assert(parser != NULL);
945
946 if (ptr == NULL) {
947 return expat_malloc(parser, size, sourceLine);
948 }
949
950 if (size == 0) {
951 expat_free(parser, ptr, sourceLine);
952 return NULL;
953 }
954
955 const XML_Parser rootParser = getRootParserOf(parser, NULL);
956 assert(rootParser->m_parentParser == NULL);
957
958 // Extract original size (to the eyes of the caller) and the original
959 // pointer returned by malloc/realloc
960 void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
961 const size_t prevSize = *(size_t *)mallocedPtr;
962
963 // Classify upcoming change
964 const bool isIncrease = (size > prevSize);
965 const size_t absDiff
966 = (size > prevSize) ? (size - prevSize) : (prevSize - size);
967
968 // Ask for permission from accounting
969 if (isIncrease) {
970 if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) {
971 return NULL; // i.e. signal violation as out-of-memory
972 }
973 }
974
975 // NOTE: Integer overflow detection has already been done for us
976 // by expat_heap_increase_tolerable(..) above
977 assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size);
978
979 // Actually allocate
980 mallocedPtr = parser->m_mem.realloc_fcn(
981 mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size);
982
983 if (mallocedPtr == NULL) {
984 return NULL;
985 }
986
987 // Update accounting
988 if (isIncrease) {
989 assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
990 >= absDiff);
991 rootParser->m_alloc_tracker.bytesAllocated += absDiff;
992 } else { // i.e. decrease
993 assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff);
994 rootParser->m_alloc_tracker.bytesAllocated -= absDiff;
995 }
996
997 // Report as needed
998 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
999 if (rootParser->m_alloc_tracker.bytesAllocated
1000 > rootParser->m_alloc_tracker.peakBytesAllocated) {
1001 rootParser->m_alloc_tracker.peakBytesAllocated
1002 = rootParser->m_alloc_tracker.bytesAllocated;
1003 }
1004 expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff,
1005 rootParser->m_alloc_tracker.bytesAllocated,
1006 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
1007 }
1008
1009 // Update in-block recorded size
1010 *(size_t *)mallocedPtr = size;
1011
1012 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
1013 }
1014 #endif // XML_GE == 1
1015
1016 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)1017 XML_ParserCreate(const XML_Char *encodingName) {
1018 return XML_ParserCreate_MM(encodingName, NULL, NULL);
1019 }
1020
1021 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)1022 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
1023 XML_Char tmp[2] = {nsSep, 0};
1024 return XML_ParserCreate_MM(encodingName, NULL, tmp);
1025 }
1026
1027 // "xml=http://www.w3.org/XML/1998/namespace"
1028 static const XML_Char implicitContext[]
1029 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
1030 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
1031 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
1032 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
1033 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
1034 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
1035 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
1036 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
1037 '\0'};
1038
1039 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1040
1041 static unsigned long
gather_time_entropy(void)1042 gather_time_entropy(void) {
1043 # ifdef _WIN32
1044 FILETIME ft;
1045 GetSystemTimeAsFileTime(&ft); /* never fails */
1046 return ft.dwHighDateTime ^ ft.dwLowDateTime;
1047 # else
1048 struct timeval tv;
1049 int gettimeofday_res;
1050
1051 gettimeofday_res = gettimeofday(&tv, NULL);
1052
1053 # if defined(NDEBUG)
1054 (void)gettimeofday_res;
1055 # else
1056 assert(gettimeofday_res == 0);
1057 # endif /* defined(NDEBUG) */
1058
1059 /* Microseconds time is <20 bits entropy */
1060 return tv.tv_usec;
1061 # endif
1062 }
1063
1064 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1065
1066 static struct sipkey
ENTROPY_DEBUG(const char * label,struct sipkey entropy_128)1067 ENTROPY_DEBUG(const char *label, struct sipkey entropy_128) {
1068 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
1069 fprintf(stderr,
1070 "expat: Entropy: %s --> [0x" EXPAT_FMT_LLX(
1071 "016") ", 0x" EXPAT_FMT_LLX("016") "] (16 bytes)\n",
1072 label, (unsigned long long)entropy_128.k[0],
1073 (unsigned long long)entropy_128.k[1]);
1074 }
1075 return entropy_128;
1076 }
1077
1078 static struct sipkey
generate_hash_secret_salt(void)1079 generate_hash_secret_salt(void) {
1080 struct sipkey entropy;
1081
1082 /* "Failproof" high quality providers: */
1083 #if defined(HAVE_ARC4RANDOM_BUF)
1084 writeRandomBytes_arc4random_buf(&entropy, sizeof(entropy));
1085 return ENTROPY_DEBUG("arc4random_buf", entropy);
1086 #elif defined(HAVE_ARC4RANDOM)
1087 writeRandomBytes_arc4random(&entropy, sizeof(entropy));
1088 return ENTROPY_DEBUG("arc4random", entropy);
1089 #else
1090 /* Try high quality providers first .. */
1091 # ifdef _WIN32
1092 if (writeRandomBytes_rand_s(&entropy, sizeof(entropy))) {
1093 return ENTROPY_DEBUG("rand_s", entropy);
1094 }
1095 # elif defined(HAVE_GETENTROPY)
1096 if (writeRandomBytes_getentropy(&entropy, sizeof(entropy))) {
1097 return ENTROPY_DEBUG("getentropy", entropy);
1098 }
1099 errno = 0;
1100 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1101 if (writeRandomBytes_getrandom_nonblock(&entropy, sizeof(entropy))) {
1102 return ENTROPY_DEBUG("getrandom", entropy);
1103 }
1104 # endif
1105 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1106 if (writeRandomBytes_dev_urandom(&entropy, sizeof(entropy))) {
1107 return ENTROPY_DEBUG("/dev/urandom", entropy);
1108 }
1109 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1110 /* .. and self-made low quality for backup: */
1111
1112 entropy.k[0] = 0;
1113 entropy.k[1] = gather_time_entropy();
1114 # if ! defined(__wasi__)
1115 /* Process ID is 0 bits entropy if attacker has local access */
1116 entropy.k[1] ^= getpid();
1117 # endif
1118
1119 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1120 if (sizeof(unsigned long) == 4) {
1121 entropy.k[1] *= 2147483647;
1122 return ENTROPY_DEBUG("fallback(4)", entropy);
1123 } else {
1124 entropy.k[1] *= 2305843009213693951ULL;
1125 return ENTROPY_DEBUG("fallback(8)", entropy);
1126 }
1127 #endif
1128 }
1129
1130 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1131 callProcessor(XML_Parser parser, const char *start, const char *end,
1132 const char **endPtr) {
1133 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1134
1135 if (parser->m_reparseDeferralEnabled
1136 && ! parser->m_parsingStatus.finalBuffer) {
1137 // Heuristic: don't try to parse a partial token again until the amount of
1138 // available data has increased significantly.
1139 const size_t had_before = parser->m_partialTokenBytesBefore;
1140 // ...but *do* try anyway if we're close to causing a reallocation.
1141 size_t available_buffer
1142 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1143 #if XML_CONTEXT_BYTES > 0
1144 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1145 #endif
1146 available_buffer
1147 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1148 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1149 const bool enough
1150 = (have_now >= 2 * had_before)
1151 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1152
1153 if (! enough) {
1154 *endPtr = start; // callers may expect this to be set
1155 return XML_ERROR_NONE;
1156 }
1157 }
1158 #if defined(XML_TESTING)
1159 g_bytesScanned += (unsigned)have_now;
1160 #endif
1161 // Run in a loop to eliminate dangerous recursion depths
1162 enum XML_Error ret;
1163 *endPtr = start;
1164 while (1) {
1165 // Use endPtr as the new start in each iteration, since it will
1166 // be set to the next start point by m_processor.
1167 ret = parser->m_processor(parser, *endPtr, end, endPtr);
1168
1169 // Make parsing status (and in particular XML_SUSPENDED) take
1170 // precedence over re-enter flag when they disagree
1171 if (parser->m_parsingStatus.parsing != XML_PARSING) {
1172 parser->m_reenter = XML_FALSE;
1173 }
1174
1175 if (! parser->m_reenter) {
1176 break;
1177 }
1178
1179 parser->m_reenter = XML_FALSE;
1180 if (ret != XML_ERROR_NONE)
1181 return ret;
1182 }
1183
1184 if (ret == XML_ERROR_NONE) {
1185 // if we consumed nothing, remember what we had on this parse attempt.
1186 if (*endPtr == start) {
1187 parser->m_partialTokenBytesBefore = have_now;
1188 } else {
1189 parser->m_partialTokenBytesBefore = 0;
1190 }
1191 }
1192 return ret;
1193 }
1194
1195 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1196 startParsing(XML_Parser parser) {
1197 /* hash functions must be initialized before setContext() is called */
1198 if (parser->m_hash_secret_salt_set != XML_TRUE) {
1199 parser->m_hash_secret_salt_128 = generate_hash_secret_salt();
1200 parser->m_hash_secret_salt_set = XML_TRUE;
1201 }
1202 if (parser->m_ns) {
1203 /* implicit context only set for root parser, since child
1204 parsers (i.e. external entity parsers) will inherit it
1205 */
1206 return setContext(parser, implicitContext);
1207 }
1208 return XML_TRUE;
1209 }
1210
1211 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1212 XML_ParserCreate_MM(const XML_Char *encodingName,
1213 const XML_Memory_Handling_Suite *memsuite,
1214 const XML_Char *nameSep) {
1215 return parserCreate(encodingName, memsuite, nameSep, NULL, NULL);
1216 }
1217
1218 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd,XML_Parser parentParser)1219 parserCreate(const XML_Char *encodingName,
1220 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1221 DTD *dtd, XML_Parser parentParser) {
1222 XML_Parser parser = NULL;
1223
1224 #if XML_GE == 1
1225 const size_t increase
1226 = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct);
1227
1228 if (parentParser != NULL) {
1229 const XML_Parser rootParser = getRootParserOf(parentParser, NULL);
1230 if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) {
1231 return NULL;
1232 }
1233 }
1234 #else
1235 UNUSED_P(parentParser);
1236 #endif
1237
1238 if (memsuite) {
1239 XML_Memory_Handling_Suite *mtemp;
1240 #if XML_GE == 1
1241 void *const sizeAndParser
1242 = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING
1243 + sizeof(struct XML_ParserStruct));
1244 if (sizeAndParser != NULL) {
1245 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1246 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1247 + EXPAT_MALLOC_PADDING);
1248 #else
1249 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1250 if (parser != NULL) {
1251 #endif
1252 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1253 mtemp->malloc_fcn = memsuite->malloc_fcn;
1254 mtemp->realloc_fcn = memsuite->realloc_fcn;
1255 mtemp->free_fcn = memsuite->free_fcn;
1256 }
1257 } else {
1258 XML_Memory_Handling_Suite *mtemp;
1259 #if XML_GE == 1
1260 void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING
1261 + sizeof(struct XML_ParserStruct));
1262 if (sizeAndParser != NULL) {
1263 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1264 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1265 + EXPAT_MALLOC_PADDING);
1266 #else
1267 parser = malloc(sizeof(struct XML_ParserStruct));
1268 if (parser != NULL) {
1269 #endif
1270 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1271 mtemp->malloc_fcn = malloc;
1272 mtemp->realloc_fcn = realloc;
1273 mtemp->free_fcn = free;
1274 }
1275 } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0
1276
1277 if (! parser)
1278 return parser;
1279
1280 #if XML_GE == 1
1281 // Initialize .m_alloc_tracker
1282 memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER));
1283 if (parentParser == NULL) {
1284 parser->m_alloc_tracker.debugLevel
1285 = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u);
1286 parser->m_alloc_tracker.maximumAmplificationFactor
1287 = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT;
1288 parser->m_alloc_tracker.activationThresholdBytes
1289 = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT;
1290
1291 // NOTE: This initialization needs to come this early because these fields
1292 // are read by allocation tracking code
1293 parser->m_parentParser = NULL;
1294 parser->m_accounting.countBytesDirect = 0;
1295 } else {
1296 parser->m_parentParser = parentParser;
1297 }
1298
1299 // Record XML_ParserStruct allocation we did a few lines up before
1300 const XML_Parser rootParser = getRootParserOf(parser, NULL);
1301 assert(rootParser->m_parentParser == NULL);
1302 assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase);
1303 rootParser->m_alloc_tracker.bytesAllocated += increase;
1304
1305 // Report on allocation
1306 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
1307 if (rootParser->m_alloc_tracker.bytesAllocated
1308 > rootParser->m_alloc_tracker.peakBytesAllocated) {
1309 rootParser->m_alloc_tracker.peakBytesAllocated
1310 = rootParser->m_alloc_tracker.bytesAllocated;
1311 }
1312
1313 expat_heap_stat(rootParser, '+', increase,
1314 rootParser->m_alloc_tracker.bytesAllocated,
1315 rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__);
1316 }
1317 #else
1318 parser->m_parentParser = NULL;
1319 #endif // XML_GE == 1
1320
1321 parser->m_buffer = NULL;
1322 parser->m_bufferLim = NULL;
1323
1324 parser->m_attsSize = INIT_ATTS_SIZE;
1325 parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1326 if (parser->m_atts == NULL) {
1327 FREE(parser, parser);
1328 return NULL;
1329 }
1330 #ifdef XML_ATTR_INFO
1331 parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1332 if (parser->m_attInfo == NULL) {
1333 FREE(parser, parser->m_atts);
1334 FREE(parser, parser);
1335 return NULL;
1336 }
1337 #endif
1338 parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1339 if (parser->m_dataBuf == NULL) {
1340 FREE(parser, parser->m_atts);
1341 #ifdef XML_ATTR_INFO
1342 FREE(parser, parser->m_attInfo);
1343 #endif
1344 FREE(parser, parser);
1345 return NULL;
1346 }
1347 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1348
1349 if (dtd)
1350 parser->m_dtd = dtd;
1351 else {
1352 parser->m_dtd = dtdCreate(parser);
1353 if (parser->m_dtd == NULL) {
1354 FREE(parser, parser->m_dataBuf);
1355 FREE(parser, parser->m_atts);
1356 #ifdef XML_ATTR_INFO
1357 FREE(parser, parser->m_attInfo);
1358 #endif
1359 FREE(parser, parser);
1360 return NULL;
1361 }
1362 }
1363
1364 parser->m_freeBindingList = NULL;
1365 parser->m_freeTagList = NULL;
1366 parser->m_freeInternalEntities = NULL;
1367 parser->m_freeAttributeEntities = NULL;
1368 parser->m_freeValueEntities = NULL;
1369
1370 parser->m_groupSize = 0;
1371 parser->m_groupConnector = NULL;
1372
1373 parser->m_unknownEncodingHandler = NULL;
1374 parser->m_unknownEncodingHandlerData = NULL;
1375
1376 parser->m_namespaceSeparator = ASCII_EXCL;
1377 parser->m_ns = XML_FALSE;
1378 parser->m_ns_triplets = XML_FALSE;
1379
1380 parser->m_nsAtts = NULL;
1381 parser->m_nsAttsVersion = 0;
1382 parser->m_nsAttsPower = 0;
1383
1384 parser->m_protocolEncodingName = NULL;
1385
1386 poolInit(&parser->m_tempPool, parser);
1387 poolInit(&parser->m_temp2Pool, parser);
1388 parserInit(parser, encodingName);
1389
1390 if (encodingName && ! parser->m_protocolEncodingName) {
1391 if (dtd) {
1392 // We need to stop the upcoming call to XML_ParserFree from happily
1393 // destroying parser->m_dtd because the DTD is shared with the parent
1394 // parser and the only guard that keeps XML_ParserFree from destroying
1395 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1396 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1397 parser->m_dtd = NULL;
1398 }
1399 XML_ParserFree(parser);
1400 return NULL;
1401 }
1402
1403 if (nameSep) {
1404 parser->m_ns = XML_TRUE;
1405 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1406 parser->m_namespaceSeparator = *nameSep;
1407 } else {
1408 parser->m_internalEncoding = XmlGetInternalEncoding();
1409 }
1410
1411 return parser;
1412 }
1413
1414 static void
1415 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1416 parser->m_processor = prologInitProcessor;
1417 XmlPrologStateInit(&parser->m_prologState);
1418 if (encodingName != NULL) {
1419 parser->m_protocolEncodingName = copyString(encodingName, parser);
1420 }
1421 parser->m_curBase = NULL;
1422 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1423 parser->m_userData = NULL;
1424 parser->m_handlerArg = NULL;
1425 parser->m_startElementHandler = NULL;
1426 parser->m_endElementHandler = NULL;
1427 parser->m_characterDataHandler = NULL;
1428 parser->m_processingInstructionHandler = NULL;
1429 parser->m_commentHandler = NULL;
1430 parser->m_startCdataSectionHandler = NULL;
1431 parser->m_endCdataSectionHandler = NULL;
1432 parser->m_defaultHandler = NULL;
1433 parser->m_startDoctypeDeclHandler = NULL;
1434 parser->m_endDoctypeDeclHandler = NULL;
1435 parser->m_unparsedEntityDeclHandler = NULL;
1436 parser->m_notationDeclHandler = NULL;
1437 parser->m_startNamespaceDeclHandler = NULL;
1438 parser->m_endNamespaceDeclHandler = NULL;
1439 parser->m_notStandaloneHandler = NULL;
1440 parser->m_externalEntityRefHandler = NULL;
1441 parser->m_externalEntityRefHandlerArg = parser;
1442 parser->m_skippedEntityHandler = NULL;
1443 parser->m_elementDeclHandler = NULL;
1444 parser->m_attlistDeclHandler = NULL;
1445 parser->m_entityDeclHandler = NULL;
1446 parser->m_xmlDeclHandler = NULL;
1447 parser->m_bufferPtr = parser->m_buffer;
1448 parser->m_bufferEnd = parser->m_buffer;
1449 parser->m_parseEndByteIndex = 0;
1450 parser->m_parseEndPtr = NULL;
1451 parser->m_partialTokenBytesBefore = 0;
1452 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1453 parser->m_lastBufferRequestSize = 0;
1454 parser->m_declElementType = NULL;
1455 parser->m_declAttributeId = NULL;
1456 parser->m_declEntity = NULL;
1457 parser->m_doctypeName = NULL;
1458 parser->m_doctypeSysid = NULL;
1459 parser->m_doctypePubid = NULL;
1460 parser->m_declAttributeType = NULL;
1461 parser->m_declNotationName = NULL;
1462 parser->m_declNotationPublicId = NULL;
1463 parser->m_declAttributeIsCdata = XML_FALSE;
1464 parser->m_declAttributeIsId = XML_FALSE;
1465 memset(&parser->m_position, 0, sizeof(POSITION));
1466 parser->m_errorCode = XML_ERROR_NONE;
1467 parser->m_eventPtr = NULL;
1468 parser->m_eventEndPtr = NULL;
1469 parser->m_positionPtr = NULL;
1470 parser->m_openInternalEntities = NULL;
1471 parser->m_openAttributeEntities = NULL;
1472 parser->m_openValueEntities = NULL;
1473 parser->m_defaultExpandInternalEntities = XML_TRUE;
1474 parser->m_tagLevel = 0;
1475 parser->m_tagStack = NULL;
1476 parser->m_inheritedBindings = NULL;
1477 parser->m_nSpecifiedAtts = 0;
1478 parser->m_unknownEncodingMem = NULL;
1479 parser->m_unknownEncodingRelease = NULL;
1480 parser->m_unknownEncodingData = NULL;
1481 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1482 // Reentry can only be triggered inside m_processor calls
1483 parser->m_reenter = XML_FALSE;
1484 #ifdef XML_DTD
1485 parser->m_isParamEntity = XML_FALSE;
1486 parser->m_useForeignDTD = XML_FALSE;
1487 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1488 #endif
1489 parser->m_hash_secret_salt_128.k[0] = 0;
1490 parser->m_hash_secret_salt_128.k[1] = 0;
1491 parser->m_hash_secret_salt_set = XML_FALSE;
1492
1493 #if XML_GE == 1
1494 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1495 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1496 parser->m_accounting.maximumAmplificationFactor
1497 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1498 parser->m_accounting.activationThresholdBytes
1499 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1500
1501 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1502 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1503 #endif
1504 }
1505
1506 /* moves list of bindings to m_freeBindingList */
1507 static void FASTCALL
1508 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1509 while (bindings) {
1510 BINDING *b = bindings;
1511 bindings = bindings->nextTagBinding;
1512 b->nextTagBinding = parser->m_freeBindingList;
1513 parser->m_freeBindingList = b;
1514 }
1515 }
1516
1517 XML_Bool XMLCALL
1518 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1519 TAG *tStk;
1520 OPEN_INTERNAL_ENTITY *openEntityList;
1521
1522 if (parser == NULL)
1523 return XML_FALSE;
1524
1525 if (parser->m_parentParser)
1526 return XML_FALSE;
1527 /* move m_tagStack to m_freeTagList */
1528 tStk = parser->m_tagStack;
1529 while (tStk) {
1530 TAG *tag = tStk;
1531 tStk = tStk->parent;
1532 tag->parent = parser->m_freeTagList;
1533 moveToFreeBindingList(parser, tag->bindings);
1534 tag->bindings = NULL;
1535 parser->m_freeTagList = tag;
1536 }
1537 /* move m_openInternalEntities to m_freeInternalEntities */
1538 openEntityList = parser->m_openInternalEntities;
1539 while (openEntityList) {
1540 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1541 openEntityList = openEntity->next;
1542 openEntity->next = parser->m_freeInternalEntities;
1543 parser->m_freeInternalEntities = openEntity;
1544 }
1545 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1546 * for attributes) */
1547 openEntityList = parser->m_openAttributeEntities;
1548 while (openEntityList) {
1549 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1550 openEntityList = openEntity->next;
1551 openEntity->next = parser->m_freeAttributeEntities;
1552 parser->m_freeAttributeEntities = openEntity;
1553 }
1554 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1555 * for value entities) */
1556 openEntityList = parser->m_openValueEntities;
1557 while (openEntityList) {
1558 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1559 openEntityList = openEntity->next;
1560 openEntity->next = parser->m_freeValueEntities;
1561 parser->m_freeValueEntities = openEntity;
1562 }
1563 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1564 FREE(parser, parser->m_unknownEncodingMem);
1565 if (parser->m_unknownEncodingRelease)
1566 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1567 poolClear(&parser->m_tempPool);
1568 poolClear(&parser->m_temp2Pool);
1569 FREE(parser, (void *)parser->m_protocolEncodingName);
1570 parser->m_protocolEncodingName = NULL;
1571 parserInit(parser, encodingName);
1572 dtdReset(parser->m_dtd, parser);
1573 return XML_TRUE;
1574 }
1575
1576 static XML_Bool
1577 parserBusy(XML_Parser parser) {
1578 switch (parser->m_parsingStatus.parsing) {
1579 case XML_PARSING:
1580 case XML_SUSPENDED:
1581 return XML_TRUE;
1582 case XML_INITIALIZED:
1583 case XML_FINISHED:
1584 default:
1585 return XML_FALSE;
1586 }
1587 }
1588
1589 enum XML_Status XMLCALL
1590 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1591 if (parser == NULL)
1592 return XML_STATUS_ERROR;
1593 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1594 XXX There's no way for the caller to determine which of the
1595 XXX possible error cases caused the XML_STATUS_ERROR return.
1596 */
1597 if (parserBusy(parser))
1598 return XML_STATUS_ERROR;
1599
1600 /* Get rid of any previous encoding name */
1601 FREE(parser, (void *)parser->m_protocolEncodingName);
1602
1603 if (encodingName == NULL)
1604 /* No new encoding name */
1605 parser->m_protocolEncodingName = NULL;
1606 else {
1607 /* Copy the new encoding name into allocated memory */
1608 parser->m_protocolEncodingName = copyString(encodingName, parser);
1609 if (! parser->m_protocolEncodingName)
1610 return XML_STATUS_ERROR;
1611 }
1612 return XML_STATUS_OK;
1613 }
1614
1615 XML_Parser XMLCALL
1616 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1617 const XML_Char *encodingName) {
1618 XML_Parser parser = oldParser;
1619 DTD *newDtd = NULL;
1620 DTD *oldDtd;
1621 XML_StartElementHandler oldStartElementHandler;
1622 XML_EndElementHandler oldEndElementHandler;
1623 XML_CharacterDataHandler oldCharacterDataHandler;
1624 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1625 XML_CommentHandler oldCommentHandler;
1626 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1627 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1628 XML_DefaultHandler oldDefaultHandler;
1629 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1630 XML_NotationDeclHandler oldNotationDeclHandler;
1631 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1632 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1633 XML_NotStandaloneHandler oldNotStandaloneHandler;
1634 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1635 XML_SkippedEntityHandler oldSkippedEntityHandler;
1636 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1637 void *oldUnknownEncodingHandlerData;
1638 XML_ElementDeclHandler oldElementDeclHandler;
1639 XML_AttlistDeclHandler oldAttlistDeclHandler;
1640 XML_EntityDeclHandler oldEntityDeclHandler;
1641 XML_XmlDeclHandler oldXmlDeclHandler;
1642 ELEMENT_TYPE *oldDeclElementType;
1643
1644 void *oldUserData;
1645 void *oldHandlerArg;
1646 XML_Bool oldDefaultExpandInternalEntities;
1647 XML_Parser oldExternalEntityRefHandlerArg;
1648 #ifdef XML_DTD
1649 enum XML_ParamEntityParsing oldParamEntityParsing;
1650 int oldInEntityValue;
1651 #endif
1652 XML_Bool oldns_triplets;
1653 /* Note that the new parser shares the same hash secret as the old
1654 parser, so that dtdCopy and copyEntityTable can lookup values
1655 from hash tables associated with either parser without us having
1656 to worry which hash secrets each table has.
1657 */
1658 struct sipkey oldhash_secret_salt_128;
1659 XML_Bool oldhash_secret_salt_set;
1660 XML_Bool oldReparseDeferralEnabled;
1661
1662 /* Validate the oldParser parameter before we pull everything out of it */
1663 if (oldParser == NULL)
1664 return NULL;
1665
1666 /* Stash the original parser contents on the stack */
1667 oldDtd = parser->m_dtd;
1668 oldStartElementHandler = parser->m_startElementHandler;
1669 oldEndElementHandler = parser->m_endElementHandler;
1670 oldCharacterDataHandler = parser->m_characterDataHandler;
1671 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1672 oldCommentHandler = parser->m_commentHandler;
1673 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1674 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1675 oldDefaultHandler = parser->m_defaultHandler;
1676 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1677 oldNotationDeclHandler = parser->m_notationDeclHandler;
1678 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1679 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1680 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1681 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1682 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1683 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1684 oldUnknownEncodingHandlerData = parser->m_unknownEncodingHandlerData;
1685 oldElementDeclHandler = parser->m_elementDeclHandler;
1686 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1687 oldEntityDeclHandler = parser->m_entityDeclHandler;
1688 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1689 oldDeclElementType = parser->m_declElementType;
1690
1691 oldUserData = parser->m_userData;
1692 oldHandlerArg = parser->m_handlerArg;
1693 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1694 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1695 #ifdef XML_DTD
1696 oldParamEntityParsing = parser->m_paramEntityParsing;
1697 oldInEntityValue = parser->m_prologState.inEntityValue;
1698 #endif
1699 oldns_triplets = parser->m_ns_triplets;
1700 /* Note that the new parser shares the same hash secret as the old
1701 parser, so that dtdCopy and copyEntityTable can lookup values
1702 from hash tables associated with either parser without us having
1703 to worry which hash secrets each table has.
1704 */
1705 oldhash_secret_salt_128 = parser->m_hash_secret_salt_128;
1706 oldhash_secret_salt_set = parser->m_hash_secret_salt_set;
1707 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1708
1709 #ifdef XML_DTD
1710 if (! context)
1711 newDtd = oldDtd;
1712 #endif /* XML_DTD */
1713
1714 /* Note that the magical uses of the pre-processor to make field
1715 access look more like C++ require that `parser' be overwritten
1716 here. This makes this function more painful to follow than it
1717 would be otherwise.
1718 */
1719 if (parser->m_ns) {
1720 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1721 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser);
1722 } else {
1723 parser
1724 = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser);
1725 }
1726
1727 if (! parser)
1728 return NULL;
1729
1730 parser->m_startElementHandler = oldStartElementHandler;
1731 parser->m_endElementHandler = oldEndElementHandler;
1732 parser->m_characterDataHandler = oldCharacterDataHandler;
1733 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1734 parser->m_commentHandler = oldCommentHandler;
1735 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1736 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1737 parser->m_defaultHandler = oldDefaultHandler;
1738 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1739 parser->m_notationDeclHandler = oldNotationDeclHandler;
1740 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1741 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1742 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1743 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1744 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1745 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1746 parser->m_unknownEncodingHandlerData = oldUnknownEncodingHandlerData;
1747 parser->m_elementDeclHandler = oldElementDeclHandler;
1748 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1749 parser->m_entityDeclHandler = oldEntityDeclHandler;
1750 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1751 parser->m_declElementType = oldDeclElementType;
1752 parser->m_userData = oldUserData;
1753 if (oldUserData == oldHandlerArg)
1754 parser->m_handlerArg = parser->m_userData;
1755 else
1756 parser->m_handlerArg = parser;
1757 if (oldExternalEntityRefHandlerArg != oldParser)
1758 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1759 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1760 parser->m_ns_triplets = oldns_triplets;
1761 parser->m_hash_secret_salt_128 = oldhash_secret_salt_128;
1762 parser->m_hash_secret_salt_set = oldhash_secret_salt_set;
1763 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1764 parser->m_parentParser = oldParser;
1765 #ifdef XML_DTD
1766 parser->m_paramEntityParsing = oldParamEntityParsing;
1767 parser->m_prologState.inEntityValue = oldInEntityValue;
1768 if (context) {
1769 #endif /* XML_DTD */
1770 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser)
1771 || ! setContext(parser, context)) {
1772 XML_ParserFree(parser);
1773 return NULL;
1774 }
1775 parser->m_processor = externalEntityInitProcessor;
1776 #ifdef XML_DTD
1777 } else {
1778 /* The DTD instance referenced by parser->m_dtd is shared between the
1779 document's root parser and external PE parsers, therefore one does not
1780 need to call setContext. In addition, one also *must* not call
1781 setContext, because this would overwrite existing prefix->binding
1782 pointers in parser->m_dtd with ones that get destroyed with the external
1783 PE parser. This would leave those prefixes with dangling pointers.
1784 */
1785 parser->m_isParamEntity = XML_TRUE;
1786 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1787 parser->m_processor = externalParEntInitProcessor;
1788 }
1789 #endif /* XML_DTD */
1790 return parser;
1791 }
1792
1793 static void FASTCALL
1794 destroyBindings(BINDING *bindings, XML_Parser parser) {
1795 for (;;) {
1796 BINDING *b = bindings;
1797 if (! b)
1798 break;
1799 bindings = b->nextTagBinding;
1800 FREE(parser, b->uri);
1801 FREE(parser, b);
1802 }
1803 }
1804
1805 void XMLCALL
1806 XML_ParserFree(XML_Parser parser) {
1807 TAG *tagList;
1808 OPEN_INTERNAL_ENTITY *entityList;
1809 if (parser == NULL)
1810 return;
1811 /* free m_tagStack and m_freeTagList */
1812 tagList = parser->m_tagStack;
1813 for (;;) {
1814 TAG *p;
1815 if (tagList == NULL) {
1816 if (parser->m_freeTagList == NULL)
1817 break;
1818 tagList = parser->m_freeTagList;
1819 parser->m_freeTagList = NULL;
1820 }
1821 p = tagList;
1822 tagList = tagList->parent;
1823 FREE(parser, p->buf.raw);
1824 destroyBindings(p->bindings, parser);
1825 FREE(parser, p);
1826 }
1827 /* free m_openInternalEntities and m_freeInternalEntities */
1828 entityList = parser->m_openInternalEntities;
1829 for (;;) {
1830 OPEN_INTERNAL_ENTITY *openEntity;
1831 if (entityList == NULL) {
1832 if (parser->m_freeInternalEntities == NULL)
1833 break;
1834 entityList = parser->m_freeInternalEntities;
1835 parser->m_freeInternalEntities = NULL;
1836 }
1837 openEntity = entityList;
1838 entityList = entityList->next;
1839 FREE(parser, openEntity);
1840 }
1841 /* free m_openAttributeEntities and m_freeAttributeEntities */
1842 entityList = parser->m_openAttributeEntities;
1843 for (;;) {
1844 OPEN_INTERNAL_ENTITY *openEntity;
1845 if (entityList == NULL) {
1846 if (parser->m_freeAttributeEntities == NULL)
1847 break;
1848 entityList = parser->m_freeAttributeEntities;
1849 parser->m_freeAttributeEntities = NULL;
1850 }
1851 openEntity = entityList;
1852 entityList = entityList->next;
1853 FREE(parser, openEntity);
1854 }
1855 /* free m_openValueEntities and m_freeValueEntities */
1856 entityList = parser->m_openValueEntities;
1857 for (;;) {
1858 OPEN_INTERNAL_ENTITY *openEntity;
1859 if (entityList == NULL) {
1860 if (parser->m_freeValueEntities == NULL)
1861 break;
1862 entityList = parser->m_freeValueEntities;
1863 parser->m_freeValueEntities = NULL;
1864 }
1865 openEntity = entityList;
1866 entityList = entityList->next;
1867 FREE(parser, openEntity);
1868 }
1869 destroyBindings(parser->m_freeBindingList, parser);
1870 destroyBindings(parser->m_inheritedBindings, parser);
1871 poolDestroy(&parser->m_tempPool);
1872 poolDestroy(&parser->m_temp2Pool);
1873 FREE(parser, (void *)parser->m_protocolEncodingName);
1874 #ifdef XML_DTD
1875 /* external parameter entity parsers share the DTD structure
1876 parser->m_dtd with the root parser, so we must not destroy it
1877 */
1878 if (! parser->m_isParamEntity && parser->m_dtd)
1879 #else
1880 if (parser->m_dtd)
1881 #endif /* XML_DTD */
1882 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser);
1883 FREE(parser, parser->m_atts);
1884 #ifdef XML_ATTR_INFO
1885 FREE(parser, parser->m_attInfo);
1886 #endif
1887 FREE(parser, parser->m_groupConnector);
1888 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
1889 // is not being allocated with MALLOC(..) but with plain
1890 // .malloc_fcn(..).
1891 parser->m_mem.free_fcn(parser->m_buffer);
1892 FREE(parser, parser->m_dataBuf);
1893 FREE(parser, parser->m_nsAtts);
1894 FREE(parser, parser->m_unknownEncodingMem);
1895 if (parser->m_unknownEncodingRelease)
1896 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1897 FREE(parser, parser);
1898 }
1899
1900 void XMLCALL
1901 XML_UseParserAsHandlerArg(XML_Parser parser) {
1902 if (parser != NULL)
1903 parser->m_handlerArg = parser;
1904 }
1905
1906 enum XML_Error XMLCALL
1907 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1908 if (parser == NULL)
1909 return XML_ERROR_INVALID_ARGUMENT;
1910 #ifdef XML_DTD
1911 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1912 if (parserBusy(parser))
1913 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1914 parser->m_useForeignDTD = useDTD;
1915 return XML_ERROR_NONE;
1916 #else
1917 UNUSED_P(useDTD);
1918 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1919 #endif
1920 }
1921
1922 void XMLCALL
1923 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1924 if (parser == NULL)
1925 return;
1926 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1927 if (parserBusy(parser))
1928 return;
1929 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1930 }
1931
1932 void XMLCALL
1933 XML_SetUserData(XML_Parser parser, void *p) {
1934 if (parser == NULL)
1935 return;
1936 if (parser->m_handlerArg == parser->m_userData)
1937 parser->m_handlerArg = parser->m_userData = p;
1938 else
1939 parser->m_userData = p;
1940 }
1941
1942 enum XML_Status XMLCALL
1943 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1944 if (parser == NULL)
1945 return XML_STATUS_ERROR;
1946 if (p) {
1947 p = poolCopyString(&parser->m_dtd->pool, p);
1948 if (! p)
1949 return XML_STATUS_ERROR;
1950 parser->m_curBase = p;
1951 } else
1952 parser->m_curBase = NULL;
1953 return XML_STATUS_OK;
1954 }
1955
1956 const XML_Char *XMLCALL
1957 XML_GetBase(XML_Parser parser) {
1958 if (parser == NULL)
1959 return NULL;
1960 return parser->m_curBase;
1961 }
1962
1963 int XMLCALL
1964 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1965 if (parser == NULL)
1966 return -1;
1967 return parser->m_nSpecifiedAtts;
1968 }
1969
1970 int XMLCALL
1971 XML_GetIdAttributeIndex(XML_Parser parser) {
1972 if (parser == NULL)
1973 return -1;
1974 return parser->m_idAttIndex;
1975 }
1976
1977 #ifdef XML_ATTR_INFO
1978 const XML_AttrInfo *XMLCALL
1979 XML_GetAttributeInfo(XML_Parser parser) {
1980 if (parser == NULL)
1981 return NULL;
1982 return parser->m_attInfo;
1983 }
1984 #endif
1985
1986 void XMLCALL
1987 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1988 XML_EndElementHandler end) {
1989 if (parser == NULL)
1990 return;
1991 parser->m_startElementHandler = start;
1992 parser->m_endElementHandler = end;
1993 }
1994
1995 void XMLCALL
1996 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1997 if (parser != NULL)
1998 parser->m_startElementHandler = start;
1999 }
2000
2001 void XMLCALL
2002 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
2003 if (parser != NULL)
2004 parser->m_endElementHandler = end;
2005 }
2006
2007 void XMLCALL
2008 XML_SetCharacterDataHandler(XML_Parser parser,
2009 XML_CharacterDataHandler handler) {
2010 if (parser != NULL)
2011 parser->m_characterDataHandler = handler;
2012 }
2013
2014 void XMLCALL
2015 XML_SetProcessingInstructionHandler(XML_Parser parser,
2016 XML_ProcessingInstructionHandler handler) {
2017 if (parser != NULL)
2018 parser->m_processingInstructionHandler = handler;
2019 }
2020
2021 void XMLCALL
2022 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
2023 if (parser != NULL)
2024 parser->m_commentHandler = handler;
2025 }
2026
2027 void XMLCALL
2028 XML_SetCdataSectionHandler(XML_Parser parser,
2029 XML_StartCdataSectionHandler start,
2030 XML_EndCdataSectionHandler end) {
2031 if (parser == NULL)
2032 return;
2033 parser->m_startCdataSectionHandler = start;
2034 parser->m_endCdataSectionHandler = end;
2035 }
2036
2037 void XMLCALL
2038 XML_SetStartCdataSectionHandler(XML_Parser parser,
2039 XML_StartCdataSectionHandler start) {
2040 if (parser != NULL)
2041 parser->m_startCdataSectionHandler = start;
2042 }
2043
2044 void XMLCALL
2045 XML_SetEndCdataSectionHandler(XML_Parser parser,
2046 XML_EndCdataSectionHandler end) {
2047 if (parser != NULL)
2048 parser->m_endCdataSectionHandler = end;
2049 }
2050
2051 void XMLCALL
2052 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
2053 if (parser == NULL)
2054 return;
2055 parser->m_defaultHandler = handler;
2056 parser->m_defaultExpandInternalEntities = XML_FALSE;
2057 }
2058
2059 void XMLCALL
2060 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
2061 if (parser == NULL)
2062 return;
2063 parser->m_defaultHandler = handler;
2064 parser->m_defaultExpandInternalEntities = XML_TRUE;
2065 }
2066
2067 void XMLCALL
2068 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
2069 XML_EndDoctypeDeclHandler end) {
2070 if (parser == NULL)
2071 return;
2072 parser->m_startDoctypeDeclHandler = start;
2073 parser->m_endDoctypeDeclHandler = end;
2074 }
2075
2076 void XMLCALL
2077 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
2078 XML_StartDoctypeDeclHandler start) {
2079 if (parser != NULL)
2080 parser->m_startDoctypeDeclHandler = start;
2081 }
2082
2083 void XMLCALL
2084 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
2085 if (parser != NULL)
2086 parser->m_endDoctypeDeclHandler = end;
2087 }
2088
2089 void XMLCALL
2090 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
2091 XML_UnparsedEntityDeclHandler handler) {
2092 if (parser != NULL)
2093 parser->m_unparsedEntityDeclHandler = handler;
2094 }
2095
2096 void XMLCALL
2097 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
2098 if (parser != NULL)
2099 parser->m_notationDeclHandler = handler;
2100 }
2101
2102 void XMLCALL
2103 XML_SetNamespaceDeclHandler(XML_Parser parser,
2104 XML_StartNamespaceDeclHandler start,
2105 XML_EndNamespaceDeclHandler end) {
2106 if (parser == NULL)
2107 return;
2108 parser->m_startNamespaceDeclHandler = start;
2109 parser->m_endNamespaceDeclHandler = end;
2110 }
2111
2112 void XMLCALL
2113 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
2114 XML_StartNamespaceDeclHandler start) {
2115 if (parser != NULL)
2116 parser->m_startNamespaceDeclHandler = start;
2117 }
2118
2119 void XMLCALL
2120 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
2121 XML_EndNamespaceDeclHandler end) {
2122 if (parser != NULL)
2123 parser->m_endNamespaceDeclHandler = end;
2124 }
2125
2126 void XMLCALL
2127 XML_SetNotStandaloneHandler(XML_Parser parser,
2128 XML_NotStandaloneHandler handler) {
2129 if (parser != NULL)
2130 parser->m_notStandaloneHandler = handler;
2131 }
2132
2133 void XMLCALL
2134 XML_SetExternalEntityRefHandler(XML_Parser parser,
2135 XML_ExternalEntityRefHandler handler) {
2136 if (parser != NULL)
2137 parser->m_externalEntityRefHandler = handler;
2138 }
2139
2140 void XMLCALL
2141 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
2142 if (parser == NULL)
2143 return;
2144 if (arg)
2145 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
2146 else
2147 parser->m_externalEntityRefHandlerArg = parser;
2148 }
2149
2150 void XMLCALL
2151 XML_SetSkippedEntityHandler(XML_Parser parser,
2152 XML_SkippedEntityHandler handler) {
2153 if (parser != NULL)
2154 parser->m_skippedEntityHandler = handler;
2155 }
2156
2157 void XMLCALL
2158 XML_SetUnknownEncodingHandler(XML_Parser parser,
2159 XML_UnknownEncodingHandler handler, void *data) {
2160 if (parser == NULL)
2161 return;
2162 parser->m_unknownEncodingHandler = handler;
2163 parser->m_unknownEncodingHandlerData = data;
2164 }
2165
2166 void XMLCALL
2167 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
2168 if (parser != NULL)
2169 parser->m_elementDeclHandler = eldecl;
2170 }
2171
2172 void XMLCALL
2173 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
2174 if (parser != NULL)
2175 parser->m_attlistDeclHandler = attdecl;
2176 }
2177
2178 void XMLCALL
2179 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
2180 if (parser != NULL)
2181 parser->m_entityDeclHandler = handler;
2182 }
2183
2184 void XMLCALL
2185 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
2186 if (parser != NULL)
2187 parser->m_xmlDeclHandler = handler;
2188 }
2189
2190 int XMLCALL
2191 XML_SetParamEntityParsing(XML_Parser parser,
2192 enum XML_ParamEntityParsing peParsing) {
2193 if (parser == NULL)
2194 return 0;
2195 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2196 if (parserBusy(parser))
2197 return 0;
2198 #ifdef XML_DTD
2199 parser->m_paramEntityParsing = peParsing;
2200 return 1;
2201 #else
2202 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2203 #endif
2204 }
2205
2206 // DEPRECATED since Expat 2.8.0.
2207 int XMLCALL
2208 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2209 if (parser == NULL)
2210 return 0;
2211
2212 const XML_Parser rootParser = getRootParserOf(parser, NULL);
2213 assert(! rootParser->m_parentParser);
2214
2215 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2216 if (parserBusy(rootParser))
2217 return 0;
2218
2219 rootParser->m_hash_secret_salt_128.k[0] = 0;
2220 rootParser->m_hash_secret_salt_128.k[1] = hash_salt;
2221
2222 if (hash_salt != 0) { // to remain backwards compatible
2223 rootParser->m_hash_secret_salt_set = XML_TRUE;
2224
2225 if (sizeof(unsigned long) == 4)
2226 ENTROPY_DEBUG("explicit(4)", rootParser->m_hash_secret_salt_128);
2227 else
2228 ENTROPY_DEBUG("explicit(8)", rootParser->m_hash_secret_salt_128);
2229 }
2230
2231 return 1;
2232 }
2233
2234 XML_Bool XMLCALL
2235 XML_SetHashSalt16Bytes(XML_Parser parser, const uint8_t entropy[16]) {
2236 if (parser == NULL)
2237 return XML_FALSE;
2238
2239 if (entropy == NULL)
2240 return XML_FALSE;
2241
2242 const XML_Parser rootParser = getRootParserOf(parser, NULL);
2243 assert(! rootParser->m_parentParser);
2244
2245 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2246 if (parserBusy(rootParser))
2247 return XML_FALSE;
2248
2249 sip_tokey(&(rootParser->m_hash_secret_salt_128), entropy);
2250
2251 rootParser->m_hash_secret_salt_set = XML_TRUE;
2252
2253 ENTROPY_DEBUG("explicit(16)", rootParser->m_hash_secret_salt_128);
2254
2255 return XML_TRUE;
2256 }
2257
2258 enum XML_Status XMLCALL
2259 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2260 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2261 if (parser != NULL)
2262 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2263 return XML_STATUS_ERROR;
2264 }
2265 switch (parser->m_parsingStatus.parsing) {
2266 case XML_SUSPENDED:
2267 parser->m_errorCode = XML_ERROR_SUSPENDED;
2268 return XML_STATUS_ERROR;
2269 case XML_FINISHED:
2270 parser->m_errorCode = XML_ERROR_FINISHED;
2271 return XML_STATUS_ERROR;
2272 case XML_INITIALIZED:
2273 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2274 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2275 return XML_STATUS_ERROR;
2276 }
2277 /* fall through */
2278 default:
2279 parser->m_parsingStatus.parsing = XML_PARSING;
2280 }
2281
2282 #if XML_CONTEXT_BYTES == 0
2283 if (parser->m_bufferPtr == parser->m_bufferEnd) {
2284 const char *end;
2285 int nLeftOver;
2286 enum XML_Status result;
2287 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
2288 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2289 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2290 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2291 parser->m_processor = errorProcessor;
2292 return XML_STATUS_ERROR;
2293 }
2294 // though this isn't a buffer request, we assume that `len` is the app's
2295 // preferred buffer fill size, and therefore save it here.
2296 parser->m_lastBufferRequestSize = len;
2297 parser->m_parseEndByteIndex += len;
2298 parser->m_positionPtr = s;
2299 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2300
2301 parser->m_errorCode
2302 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2303
2304 if (parser->m_errorCode != XML_ERROR_NONE) {
2305 parser->m_eventEndPtr = parser->m_eventPtr;
2306 parser->m_processor = errorProcessor;
2307 return XML_STATUS_ERROR;
2308 } else {
2309 switch (parser->m_parsingStatus.parsing) {
2310 case XML_SUSPENDED:
2311 result = XML_STATUS_SUSPENDED;
2312 break;
2313 case XML_INITIALIZED:
2314 case XML_PARSING:
2315 if (isFinal) {
2316 parser->m_parsingStatus.parsing = XML_FINISHED;
2317 return XML_STATUS_OK;
2318 }
2319 /* fall through */
2320 default:
2321 result = XML_STATUS_OK;
2322 }
2323 }
2324
2325 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2326 &parser->m_position);
2327 nLeftOver = s + len - end;
2328 if (nLeftOver) {
2329 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2330 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2331 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2332 parser->m_parsingStatus.parsing = XML_PARSING;
2333 void *const temp = XML_GetBuffer(parser, nLeftOver);
2334 parser->m_parsingStatus.parsing = originalStatus;
2335 // GetBuffer may have overwritten this, but we want to remember what the
2336 // app requested, not how many bytes were left over after parsing.
2337 parser->m_lastBufferRequestSize = len;
2338 if (temp == NULL) {
2339 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2340 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2341 parser->m_processor = errorProcessor;
2342 return XML_STATUS_ERROR;
2343 }
2344 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2345 // don't have any data to preserve, and can copy straight into the start
2346 // of the buffer rather than the GetBuffer return pointer (which may be
2347 // pointing further into the allocated buffer).
2348 memcpy(parser->m_buffer, end, nLeftOver);
2349 }
2350 parser->m_bufferPtr = parser->m_buffer;
2351 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2352 parser->m_positionPtr = parser->m_bufferPtr;
2353 parser->m_parseEndPtr = parser->m_bufferEnd;
2354 parser->m_eventPtr = parser->m_bufferPtr;
2355 parser->m_eventEndPtr = parser->m_bufferPtr;
2356 return result;
2357 }
2358 #endif /* XML_CONTEXT_BYTES == 0 */
2359 void *buff = XML_GetBuffer(parser, len);
2360 if (buff == NULL)
2361 return XML_STATUS_ERROR;
2362 if (len > 0) {
2363 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2364 memcpy(buff, s, len);
2365 }
2366 return XML_ParseBuffer(parser, len, isFinal);
2367 }
2368
2369 enum XML_Status XMLCALL
2370 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2371 const char *start;
2372 enum XML_Status result = XML_STATUS_OK;
2373
2374 if (parser == NULL)
2375 return XML_STATUS_ERROR;
2376
2377 if (len < 0) {
2378 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2379 return XML_STATUS_ERROR;
2380 }
2381
2382 switch (parser->m_parsingStatus.parsing) {
2383 case XML_SUSPENDED:
2384 parser->m_errorCode = XML_ERROR_SUSPENDED;
2385 return XML_STATUS_ERROR;
2386 case XML_FINISHED:
2387 parser->m_errorCode = XML_ERROR_FINISHED;
2388 return XML_STATUS_ERROR;
2389 case XML_INITIALIZED:
2390 /* Has someone called XML_GetBuffer successfully before? */
2391 if (! parser->m_bufferPtr) {
2392 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2393 return XML_STATUS_ERROR;
2394 }
2395
2396 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2397 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2398 return XML_STATUS_ERROR;
2399 }
2400 /* fall through */
2401 default:
2402 parser->m_parsingStatus.parsing = XML_PARSING;
2403 }
2404
2405 start = parser->m_bufferPtr;
2406 parser->m_positionPtr = start;
2407 parser->m_bufferEnd += len;
2408 parser->m_parseEndPtr = parser->m_bufferEnd;
2409 parser->m_parseEndByteIndex += len;
2410 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2411
2412 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2413 &parser->m_bufferPtr);
2414
2415 if (parser->m_errorCode != XML_ERROR_NONE) {
2416 parser->m_eventEndPtr = parser->m_eventPtr;
2417 parser->m_processor = errorProcessor;
2418 return XML_STATUS_ERROR;
2419 } else {
2420 switch (parser->m_parsingStatus.parsing) {
2421 case XML_SUSPENDED:
2422 result = XML_STATUS_SUSPENDED;
2423 break;
2424 case XML_INITIALIZED:
2425 case XML_PARSING:
2426 if (isFinal) {
2427 parser->m_parsingStatus.parsing = XML_FINISHED;
2428 return result;
2429 }
2430 default:; /* should not happen */
2431 }
2432 }
2433
2434 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2435 parser->m_bufferPtr, &parser->m_position);
2436 parser->m_positionPtr = parser->m_bufferPtr;
2437 return result;
2438 }
2439
2440 void *XMLCALL
2441 XML_GetBuffer(XML_Parser parser, int len) {
2442 if (parser == NULL)
2443 return NULL;
2444 if (len < 0) {
2445 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2446 return NULL;
2447 }
2448 switch (parser->m_parsingStatus.parsing) {
2449 case XML_SUSPENDED:
2450 parser->m_errorCode = XML_ERROR_SUSPENDED;
2451 return NULL;
2452 case XML_FINISHED:
2453 parser->m_errorCode = XML_ERROR_FINISHED;
2454 return NULL;
2455 default:;
2456 }
2457
2458 // whether or not the request succeeds, `len` seems to be the app's preferred
2459 // buffer fill size; remember it.
2460 parser->m_lastBufferRequestSize = len;
2461 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2462 || parser->m_buffer == NULL) {
2463 #if XML_CONTEXT_BYTES > 0
2464 int keep;
2465 #endif /* XML_CONTEXT_BYTES > 0 */
2466 /* Do not invoke signed arithmetic overflow: */
2467 int neededSize = (int)((unsigned)len
2468 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2469 parser->m_bufferEnd, parser->m_bufferPtr));
2470 if (neededSize < 0) {
2471 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2472 return NULL;
2473 }
2474 #if XML_CONTEXT_BYTES > 0
2475 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2476 if (keep > XML_CONTEXT_BYTES)
2477 keep = XML_CONTEXT_BYTES;
2478 /* Detect and prevent integer overflow */
2479 if (keep > INT_MAX - neededSize) {
2480 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2481 return NULL;
2482 }
2483 neededSize += keep;
2484 #endif /* XML_CONTEXT_BYTES > 0 */
2485 if (parser->m_buffer && parser->m_bufferPtr
2486 && neededSize
2487 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2488 #if XML_CONTEXT_BYTES > 0
2489 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2490 int offset
2491 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2492 - keep;
2493 /* The buffer pointers cannot be NULL here; we have at least some bytes
2494 * in the buffer */
2495 memmove(parser->m_buffer, &parser->m_buffer[offset],
2496 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2497 parser->m_bufferEnd -= offset;
2498 parser->m_bufferPtr -= offset;
2499 }
2500 #else
2501 memmove(parser->m_buffer, parser->m_bufferPtr,
2502 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2503 parser->m_bufferEnd
2504 = parser->m_buffer
2505 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2506 parser->m_bufferPtr = parser->m_buffer;
2507 #endif /* XML_CONTEXT_BYTES > 0 */
2508 } else {
2509 char *newBuf;
2510 int bufferSize
2511 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2512 if (bufferSize == 0)
2513 bufferSize = INIT_BUFFER_SIZE;
2514 do {
2515 /* Do not invoke signed arithmetic overflow: */
2516 bufferSize = (int)(2U * (unsigned)bufferSize);
2517 } while (bufferSize < neededSize && bufferSize > 0);
2518 if (bufferSize <= 0) {
2519 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2520 return NULL;
2521 }
2522 // NOTE: We are avoiding MALLOC(..) here to leave limiting
2523 // the input size to the application using Expat.
2524 newBuf = parser->m_mem.malloc_fcn(bufferSize);
2525 if (newBuf == NULL) {
2526 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2527 return NULL;
2528 }
2529 parser->m_bufferLim = newBuf + bufferSize;
2530 #if XML_CONTEXT_BYTES > 0
2531 if (parser->m_bufferPtr) {
2532 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2533 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2534 + keep);
2535 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2536 // is not being allocated with MALLOC(..) but with plain
2537 // .malloc_fcn(..).
2538 parser->m_mem.free_fcn(parser->m_buffer);
2539 parser->m_buffer = newBuf;
2540 parser->m_bufferEnd
2541 = parser->m_buffer
2542 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2543 + keep;
2544 parser->m_bufferPtr = parser->m_buffer + keep;
2545 } else {
2546 /* This must be a brand new buffer with no data in it yet */
2547 parser->m_bufferEnd = newBuf;
2548 parser->m_bufferPtr = parser->m_buffer = newBuf;
2549 }
2550 #else
2551 if (parser->m_bufferPtr) {
2552 memcpy(newBuf, parser->m_bufferPtr,
2553 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2554 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2555 // is not being allocated with MALLOC(..) but with plain
2556 // .malloc_fcn(..).
2557 parser->m_mem.free_fcn(parser->m_buffer);
2558 parser->m_bufferEnd
2559 = newBuf
2560 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2561 } else {
2562 /* This must be a brand new buffer with no data in it yet */
2563 parser->m_bufferEnd = newBuf;
2564 }
2565 parser->m_bufferPtr = parser->m_buffer = newBuf;
2566 #endif /* XML_CONTEXT_BYTES > 0 */
2567 }
2568 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2569 parser->m_positionPtr = NULL;
2570 }
2571 return parser->m_bufferEnd;
2572 }
2573
2574 static void
2575 triggerReenter(XML_Parser parser) {
2576 parser->m_reenter = XML_TRUE;
2577 }
2578
2579 enum XML_Status XMLCALL
2580 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2581 if (parser == NULL)
2582 return XML_STATUS_ERROR;
2583 switch (parser->m_parsingStatus.parsing) {
2584 case XML_INITIALIZED:
2585 parser->m_errorCode = XML_ERROR_NOT_STARTED;
2586 return XML_STATUS_ERROR;
2587 case XML_SUSPENDED:
2588 if (resumable) {
2589 parser->m_errorCode = XML_ERROR_SUSPENDED;
2590 return XML_STATUS_ERROR;
2591 }
2592 parser->m_parsingStatus.parsing = XML_FINISHED;
2593 break;
2594 case XML_FINISHED:
2595 parser->m_errorCode = XML_ERROR_FINISHED;
2596 return XML_STATUS_ERROR;
2597 case XML_PARSING:
2598 if (resumable) {
2599 #ifdef XML_DTD
2600 if (parser->m_isParamEntity) {
2601 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2602 return XML_STATUS_ERROR;
2603 }
2604 #endif
2605 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2606 } else
2607 parser->m_parsingStatus.parsing = XML_FINISHED;
2608 break;
2609 default:
2610 assert(0);
2611 }
2612 return XML_STATUS_OK;
2613 }
2614
2615 enum XML_Status XMLCALL
2616 XML_ResumeParser(XML_Parser parser) {
2617 enum XML_Status result = XML_STATUS_OK;
2618
2619 if (parser == NULL)
2620 return XML_STATUS_ERROR;
2621 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2622 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2623 return XML_STATUS_ERROR;
2624 }
2625 parser->m_parsingStatus.parsing = XML_PARSING;
2626
2627 parser->m_errorCode = callProcessor(
2628 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2629
2630 if (parser->m_errorCode != XML_ERROR_NONE) {
2631 parser->m_eventEndPtr = parser->m_eventPtr;
2632 parser->m_processor = errorProcessor;
2633 return XML_STATUS_ERROR;
2634 } else {
2635 switch (parser->m_parsingStatus.parsing) {
2636 case XML_SUSPENDED:
2637 result = XML_STATUS_SUSPENDED;
2638 break;
2639 case XML_INITIALIZED:
2640 case XML_PARSING:
2641 if (parser->m_parsingStatus.finalBuffer) {
2642 parser->m_parsingStatus.parsing = XML_FINISHED;
2643 return result;
2644 }
2645 default:;
2646 }
2647 }
2648
2649 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2650 parser->m_bufferPtr, &parser->m_position);
2651 parser->m_positionPtr = parser->m_bufferPtr;
2652 return result;
2653 }
2654
2655 void XMLCALL
2656 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2657 if (parser == NULL)
2658 return;
2659 assert(status != NULL);
2660 *status = parser->m_parsingStatus;
2661 }
2662
2663 enum XML_Error XMLCALL
2664 XML_GetErrorCode(XML_Parser parser) {
2665 if (parser == NULL)
2666 return XML_ERROR_INVALID_ARGUMENT;
2667 return parser->m_errorCode;
2668 }
2669
2670 XML_Index XMLCALL
2671 XML_GetCurrentByteIndex(XML_Parser parser) {
2672 if (parser == NULL)
2673 return -1;
2674 if (parser->m_eventPtr)
2675 return (XML_Index)(parser->m_parseEndByteIndex
2676 - (parser->m_parseEndPtr - parser->m_eventPtr));
2677 return -1;
2678 }
2679
2680 int XMLCALL
2681 XML_GetCurrentByteCount(XML_Parser parser) {
2682 if (parser == NULL)
2683 return 0;
2684 if (parser->m_eventEndPtr && parser->m_eventPtr)
2685 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2686 return 0;
2687 }
2688
2689 const char *XMLCALL
2690 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2691 #if XML_CONTEXT_BYTES > 0
2692 if (parser == NULL)
2693 return NULL;
2694 if (parser->m_eventPtr && parser->m_buffer) {
2695 if (offset != NULL)
2696 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2697 if (size != NULL)
2698 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2699 return parser->m_buffer;
2700 }
2701 #else
2702 (void)parser;
2703 (void)offset;
2704 (void)size;
2705 #endif /* XML_CONTEXT_BYTES > 0 */
2706 return (const char *)0;
2707 }
2708
2709 XML_Size XMLCALL
2710 XML_GetCurrentLineNumber(XML_Parser parser) {
2711 if (parser == NULL)
2712 return 0;
2713 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2714 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2715 parser->m_eventPtr, &parser->m_position);
2716 parser->m_positionPtr = parser->m_eventPtr;
2717 }
2718 return parser->m_position.lineNumber + 1;
2719 }
2720
2721 XML_Size XMLCALL
2722 XML_GetCurrentColumnNumber(XML_Parser parser) {
2723 if (parser == NULL)
2724 return 0;
2725 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2726 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2727 parser->m_eventPtr, &parser->m_position);
2728 parser->m_positionPtr = parser->m_eventPtr;
2729 }
2730 return parser->m_position.columnNumber;
2731 }
2732
2733 void XMLCALL
2734 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2735 if (parser == NULL)
2736 return;
2737
2738 // NOTE: We are avoiding FREE(..) here because the content model
2739 // has been created using plain .malloc_fcn(..) rather than MALLOC(..).
2740 parser->m_mem.free_fcn(model);
2741 }
2742
2743 void *XMLCALL
2744 XML_MemMalloc(XML_Parser parser, size_t size) {
2745 if (parser == NULL)
2746 return NULL;
2747
2748 // NOTE: We are avoiding MALLOC(..) here to not include
2749 // user allocations with allocation tracking and limiting.
2750 return parser->m_mem.malloc_fcn(size);
2751 }
2752
2753 void *XMLCALL
2754 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2755 if (parser == NULL)
2756 return NULL;
2757
2758 // NOTE: We are avoiding REALLOC(..) here to not include
2759 // user allocations with allocation tracking and limiting.
2760 return parser->m_mem.realloc_fcn(ptr, size);
2761 }
2762
2763 void XMLCALL
2764 XML_MemFree(XML_Parser parser, void *ptr) {
2765 if (parser == NULL)
2766 return;
2767
2768 // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and
2769 // XML_MemRealloc are not using MALLOC(..) and REALLOC(..)
2770 // but plain .malloc_fcn(..) and .realloc_fcn(..), internally.
2771 parser->m_mem.free_fcn(ptr);
2772 }
2773
2774 void XMLCALL
2775 XML_DefaultCurrent(XML_Parser parser) {
2776 if (parser == NULL)
2777 return;
2778 if (parser->m_defaultHandler) {
2779 if (parser->m_openInternalEntities)
2780 reportDefault(parser, parser->m_internalEncoding,
2781 parser->m_openInternalEntities->internalEventPtr,
2782 parser->m_openInternalEntities->internalEventEndPtr);
2783 else
2784 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2785 parser->m_eventEndPtr);
2786 }
2787 }
2788
2789 const XML_LChar *XMLCALL
2790 XML_ErrorString(enum XML_Error code) {
2791 switch (code) {
2792 case XML_ERROR_NONE:
2793 return NULL;
2794 case XML_ERROR_NO_MEMORY:
2795 return XML_L("out of memory");
2796 case XML_ERROR_SYNTAX:
2797 return XML_L("syntax error");
2798 case XML_ERROR_NO_ELEMENTS:
2799 return XML_L("no element found");
2800 case XML_ERROR_INVALID_TOKEN:
2801 return XML_L("not well-formed (invalid token)");
2802 case XML_ERROR_UNCLOSED_TOKEN:
2803 return XML_L("unclosed token");
2804 case XML_ERROR_PARTIAL_CHAR:
2805 return XML_L("partial character");
2806 case XML_ERROR_TAG_MISMATCH:
2807 return XML_L("mismatched tag");
2808 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2809 return XML_L("duplicate attribute");
2810 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2811 return XML_L("junk after document element");
2812 case XML_ERROR_PARAM_ENTITY_REF:
2813 return XML_L("illegal parameter entity reference");
2814 case XML_ERROR_UNDEFINED_ENTITY:
2815 return XML_L("undefined entity");
2816 case XML_ERROR_RECURSIVE_ENTITY_REF:
2817 return XML_L("recursive entity reference");
2818 case XML_ERROR_ASYNC_ENTITY:
2819 return XML_L("asynchronous entity");
2820 case XML_ERROR_BAD_CHAR_REF:
2821 return XML_L("reference to invalid character number");
2822 case XML_ERROR_BINARY_ENTITY_REF:
2823 return XML_L("reference to binary entity");
2824 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2825 return XML_L("reference to external entity in attribute");
2826 case XML_ERROR_MISPLACED_XML_PI:
2827 return XML_L("XML or text declaration not at start of entity");
2828 case XML_ERROR_UNKNOWN_ENCODING:
2829 return XML_L("unknown encoding");
2830 case XML_ERROR_INCORRECT_ENCODING:
2831 return XML_L("encoding specified in XML declaration is incorrect");
2832 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2833 return XML_L("unclosed CDATA section");
2834 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2835 return XML_L("error in processing external entity reference");
2836 case XML_ERROR_NOT_STANDALONE:
2837 return XML_L("document is not standalone");
2838 case XML_ERROR_UNEXPECTED_STATE:
2839 return XML_L("unexpected parser state - please send a bug report");
2840 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2841 return XML_L("entity declared in parameter entity");
2842 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2843 return XML_L("requested feature requires XML_DTD support in Expat");
2844 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2845 return XML_L("cannot change setting once parsing has begun");
2846 /* Added in 1.95.7. */
2847 case XML_ERROR_UNBOUND_PREFIX:
2848 return XML_L("unbound prefix");
2849 /* Added in 1.95.8. */
2850 case XML_ERROR_UNDECLARING_PREFIX:
2851 return XML_L("must not undeclare prefix");
2852 case XML_ERROR_INCOMPLETE_PE:
2853 return XML_L("incomplete markup in parameter entity");
2854 case XML_ERROR_XML_DECL:
2855 return XML_L("XML declaration not well-formed");
2856 case XML_ERROR_TEXT_DECL:
2857 return XML_L("text declaration not well-formed");
2858 case XML_ERROR_PUBLICID:
2859 return XML_L("illegal character(s) in public id");
2860 case XML_ERROR_SUSPENDED:
2861 return XML_L("parser suspended");
2862 case XML_ERROR_NOT_SUSPENDED:
2863 return XML_L("parser not suspended");
2864 case XML_ERROR_ABORTED:
2865 return XML_L("parsing aborted");
2866 case XML_ERROR_FINISHED:
2867 return XML_L("parsing finished");
2868 case XML_ERROR_SUSPEND_PE:
2869 return XML_L("cannot suspend in external parameter entity");
2870 /* Added in 2.0.0. */
2871 case XML_ERROR_RESERVED_PREFIX_XML:
2872 return XML_L(
2873 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2874 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2875 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2876 case XML_ERROR_RESERVED_NAMESPACE_URI:
2877 return XML_L(
2878 "prefix must not be bound to one of the reserved namespace names");
2879 /* Added in 2.2.5. */
2880 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2881 return XML_L("invalid argument");
2882 /* Added in 2.3.0. */
2883 case XML_ERROR_NO_BUFFER:
2884 return XML_L(
2885 "a successful prior call to function XML_GetBuffer is required");
2886 /* Added in 2.4.0. */
2887 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2888 return XML_L(
2889 "limit on input amplification factor (from DTD and entities) breached");
2890 /* Added in 2.6.4. */
2891 case XML_ERROR_NOT_STARTED:
2892 return XML_L("parser not started");
2893 }
2894 return NULL;
2895 }
2896
2897 const XML_LChar *XMLCALL
2898 XML_ExpatVersion(void) {
2899 /* V1 is used to string-ize the version number. However, it would
2900 string-ize the actual version macro *names* unless we get them
2901 substituted before being passed to V1. CPP is defined to expand
2902 a macro, then rescan for more expansions. Thus, we use V2 to expand
2903 the version macros, then CPP will expand the resulting V1() macro
2904 with the correct numerals. */
2905 /* ### I'm assuming cpp is portable in this respect... */
2906
2907 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2908 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2909
2910 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2911
2912 #undef V1
2913 #undef V2
2914 }
2915
2916 XML_Expat_Version XMLCALL
2917 XML_ExpatVersionInfo(void) {
2918 XML_Expat_Version version;
2919
2920 version.major = XML_MAJOR_VERSION;
2921 version.minor = XML_MINOR_VERSION;
2922 version.micro = XML_MICRO_VERSION;
2923
2924 return version;
2925 }
2926
2927 const XML_Feature *XMLCALL
2928 XML_GetFeatureList(void) {
2929 static const XML_Feature features[] = {
2930 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2931 sizeof(XML_Char)},
2932 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2933 sizeof(XML_LChar)},
2934 #ifdef XML_UNICODE
2935 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2936 #endif
2937 #ifdef XML_UNICODE_WCHAR_T
2938 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2939 #endif
2940 #ifdef XML_DTD
2941 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2942 #endif
2943 #if XML_CONTEXT_BYTES > 0
2944 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2945 XML_CONTEXT_BYTES},
2946 #endif
2947 #ifdef XML_MIN_SIZE
2948 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2949 #endif
2950 #ifdef XML_NS
2951 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2952 #endif
2953 #ifdef XML_LARGE_SIZE
2954 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2955 #endif
2956 #ifdef XML_ATTR_INFO
2957 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2958 #endif
2959 #if XML_GE == 1
2960 /* Added in Expat 2.4.0 for XML_DTD defined and
2961 * added in Expat 2.6.0 for XML_GE == 1. */
2962 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2963 XML_L("XML_BLAP_MAX_AMP"),
2964 (long int)
2965 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2966 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2967 XML_L("XML_BLAP_ACT_THRES"),
2968 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2969 /* Added in Expat 2.6.0. */
2970 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2971 /* Added in Expat 2.7.2. */
2972 {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT,
2973 XML_L("XML_AT_MAX_AMP"),
2974 (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT},
2975 {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT,
2976 XML_L("XML_AT_ACT_THRES"),
2977 (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT},
2978 #endif
2979 {XML_FEATURE_END, NULL, 0}};
2980
2981 return features;
2982 }
2983
2984 #if XML_GE == 1
2985 XML_Bool XMLCALL
2986 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2987 XML_Parser parser, float maximumAmplificationFactor) {
2988 if ((parser == NULL) || (parser->m_parentParser != NULL)
2989 || isnan(maximumAmplificationFactor)
2990 || (maximumAmplificationFactor < 1.0f)) {
2991 return XML_FALSE;
2992 }
2993 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2994 return XML_TRUE;
2995 }
2996
2997 XML_Bool XMLCALL
2998 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2999 XML_Parser parser, unsigned long long activationThresholdBytes) {
3000 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3001 return XML_FALSE;
3002 }
3003 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
3004 return XML_TRUE;
3005 }
3006
3007 XML_Bool XMLCALL
3008 XML_SetAllocTrackerMaximumAmplification(XML_Parser parser,
3009 float maximumAmplificationFactor) {
3010 if ((parser == NULL) || (parser->m_parentParser != NULL)
3011 || isnan(maximumAmplificationFactor)
3012 || (maximumAmplificationFactor < 1.0f)) {
3013 return XML_FALSE;
3014 }
3015 parser->m_alloc_tracker.maximumAmplificationFactor
3016 = maximumAmplificationFactor;
3017 return XML_TRUE;
3018 }
3019
3020 XML_Bool XMLCALL
3021 XML_SetAllocTrackerActivationThreshold(
3022 XML_Parser parser, unsigned long long activationThresholdBytes) {
3023 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3024 return XML_FALSE;
3025 }
3026 parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes;
3027 return XML_TRUE;
3028 }
3029 #endif /* XML_GE == 1 */
3030
3031 XML_Bool XMLCALL
3032 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
3033 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
3034 parser->m_reparseDeferralEnabled = enabled;
3035 return XML_TRUE;
3036 }
3037 return XML_FALSE;
3038 }
3039
3040 /* Initially tag->rawName always points into the parse buffer;
3041 for those TAG instances opened while the current parse buffer was
3042 processed, and not yet closed, we need to store tag->rawName in a more
3043 permanent location, since the parse buffer is about to be discarded.
3044 */
3045 static XML_Bool
3046 storeRawNames(XML_Parser parser) {
3047 TAG *tag = parser->m_tagStack;
3048 while (tag) {
3049 size_t bufSize;
3050 size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
3051 size_t rawNameLen;
3052 char *rawNameBuf = tag->buf.raw + nameLen;
3053 /* Stop if already stored. Since m_tagStack is a stack, we can stop
3054 at the first entry that has already been copied; everything
3055 below it in the stack is already been accounted for in a
3056 previous call to this function.
3057 */
3058 if (tag->rawName == rawNameBuf)
3059 break;
3060 /* For reuse purposes we need to ensure that the
3061 size of tag->buf is a multiple of sizeof(XML_Char).
3062 */
3063 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
3064 /* Detect and prevent integer overflow. */
3065 if (rawNameLen > (size_t)INT_MAX - nameLen)
3066 return XML_FALSE;
3067 bufSize = nameLen + rawNameLen;
3068 if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) {
3069 char *temp = REALLOC(parser, tag->buf.raw, bufSize);
3070 if (temp == NULL)
3071 return XML_FALSE;
3072 /* if tag->name.str points to tag->buf.str (only when namespace
3073 processing is off) then we have to update it
3074 */
3075 if (tag->name.str == tag->buf.str)
3076 tag->name.str = (XML_Char *)temp;
3077 /* if tag->name.localPart is set (when namespace processing is on)
3078 then update it as well, since it will always point into tag->buf
3079 */
3080 if (tag->name.localPart)
3081 tag->name.localPart
3082 = (XML_Char *)temp + (tag->name.localPart - tag->buf.str);
3083 tag->buf.raw = temp;
3084 tag->bufEnd = temp + bufSize;
3085 rawNameBuf = temp + nameLen;
3086 }
3087 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
3088 tag->rawName = rawNameBuf;
3089 tag = tag->parent;
3090 }
3091 return XML_TRUE;
3092 }
3093
3094 static enum XML_Error PTRCALL
3095 contentProcessor(XML_Parser parser, const char *start, const char *end,
3096 const char **endPtr) {
3097 enum XML_Error result = doContent(
3098 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
3099 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3100 XML_ACCOUNT_DIRECT);
3101 if (result == XML_ERROR_NONE) {
3102 if (! storeRawNames(parser))
3103 return XML_ERROR_NO_MEMORY;
3104 }
3105 return result;
3106 }
3107
3108 static enum XML_Error PTRCALL
3109 externalEntityInitProcessor(XML_Parser parser, const char *start,
3110 const char *end, const char **endPtr) {
3111 enum XML_Error result = initializeEncoding(parser);
3112 if (result != XML_ERROR_NONE)
3113 return result;
3114 parser->m_processor = externalEntityInitProcessor2;
3115 return externalEntityInitProcessor2(parser, start, end, endPtr);
3116 }
3117
3118 static enum XML_Error PTRCALL
3119 externalEntityInitProcessor2(XML_Parser parser, const char *start,
3120 const char *end, const char **endPtr) {
3121 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3122 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
3123 switch (tok) {
3124 case XML_TOK_BOM:
3125 #if XML_GE == 1
3126 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
3127 XML_ACCOUNT_DIRECT)) {
3128 accountingOnAbort(parser);
3129 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3130 }
3131 #endif /* XML_GE == 1 */
3132
3133 /* If we are at the end of the buffer, this would cause the next stage,
3134 i.e. externalEntityInitProcessor3, to pass control directly to
3135 doContent (by detecting XML_TOK_NONE) without processing any xml text
3136 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
3137 */
3138 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
3139 *endPtr = next;
3140 return XML_ERROR_NONE;
3141 }
3142 start = next;
3143 break;
3144 case XML_TOK_PARTIAL:
3145 if (! parser->m_parsingStatus.finalBuffer) {
3146 *endPtr = start;
3147 return XML_ERROR_NONE;
3148 }
3149 parser->m_eventPtr = start;
3150 return XML_ERROR_UNCLOSED_TOKEN;
3151 case XML_TOK_PARTIAL_CHAR:
3152 if (! parser->m_parsingStatus.finalBuffer) {
3153 *endPtr = start;
3154 return XML_ERROR_NONE;
3155 }
3156 parser->m_eventPtr = start;
3157 return XML_ERROR_PARTIAL_CHAR;
3158 }
3159 parser->m_processor = externalEntityInitProcessor3;
3160 return externalEntityInitProcessor3(parser, start, end, endPtr);
3161 }
3162
3163 static enum XML_Error PTRCALL
3164 externalEntityInitProcessor3(XML_Parser parser, const char *start,
3165 const char *end, const char **endPtr) {
3166 int tok;
3167 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3168 parser->m_eventPtr = start;
3169 tok = XmlContentTok(parser->m_encoding, start, end, &next);
3170 /* Note: These bytes are accounted later in:
3171 - processXmlDecl
3172 - externalEntityContentProcessor
3173 */
3174 parser->m_eventEndPtr = next;
3175
3176 switch (tok) {
3177 case XML_TOK_XML_DECL: {
3178 enum XML_Error result;
3179 result = processXmlDecl(parser, 1, start, next);
3180 if (result != XML_ERROR_NONE)
3181 return result;
3182 switch (parser->m_parsingStatus.parsing) {
3183 case XML_SUSPENDED:
3184 *endPtr = next;
3185 return XML_ERROR_NONE;
3186 case XML_FINISHED:
3187 return XML_ERROR_ABORTED;
3188 case XML_PARSING:
3189 if (parser->m_reenter) {
3190 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
3191 }
3192 /* Fall through */
3193 default:
3194 start = next;
3195 }
3196 } break;
3197 case XML_TOK_PARTIAL:
3198 if (! parser->m_parsingStatus.finalBuffer) {
3199 *endPtr = start;
3200 return XML_ERROR_NONE;
3201 }
3202 return XML_ERROR_UNCLOSED_TOKEN;
3203 case XML_TOK_PARTIAL_CHAR:
3204 if (! parser->m_parsingStatus.finalBuffer) {
3205 *endPtr = start;
3206 return XML_ERROR_NONE;
3207 }
3208 return XML_ERROR_PARTIAL_CHAR;
3209 }
3210 parser->m_processor = externalEntityContentProcessor;
3211 parser->m_tagLevel = 1;
3212 return externalEntityContentProcessor(parser, start, end, endPtr);
3213 }
3214
3215 static enum XML_Error PTRCALL
3216 externalEntityContentProcessor(XML_Parser parser, const char *start,
3217 const char *end, const char **endPtr) {
3218 enum XML_Error result
3219 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
3220 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3221 XML_ACCOUNT_ENTITY_EXPANSION);
3222 if (result == XML_ERROR_NONE) {
3223 if (! storeRawNames(parser))
3224 return XML_ERROR_NO_MEMORY;
3225 }
3226 return result;
3227 }
3228
3229 static enum XML_Error
3230 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
3231 const char *s, const char *end, const char **nextPtr,
3232 XML_Bool haveMore, enum XML_Account account) {
3233 /* save one level of indirection */
3234 DTD *const dtd = parser->m_dtd;
3235
3236 const char **eventPP;
3237 const char **eventEndPP;
3238 if (enc == parser->m_encoding) {
3239 eventPP = &parser->m_eventPtr;
3240 eventEndPP = &parser->m_eventEndPtr;
3241 } else {
3242 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3243 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3244 }
3245 *eventPP = s;
3246
3247 for (;;) {
3248 const char *next = s; /* XmlContentTok doesn't always set the last arg */
3249 int tok = XmlContentTok(enc, s, end, &next);
3250 #if XML_GE == 1
3251 const char *accountAfter
3252 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
3253 ? (haveMore ? s /* i.e. 0 bytes */ : end)
3254 : next;
3255 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
3256 account)) {
3257 accountingOnAbort(parser);
3258 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3259 }
3260 #endif
3261 *eventEndPP = next;
3262 switch (tok) {
3263 case XML_TOK_TRAILING_CR:
3264 if (haveMore) {
3265 *nextPtr = s;
3266 return XML_ERROR_NONE;
3267 }
3268 *eventEndPP = end;
3269 if (parser->m_characterDataHandler) {
3270 XML_Char c = 0xA;
3271 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3272 } else if (parser->m_defaultHandler)
3273 reportDefault(parser, enc, s, end);
3274 /* We are at the end of the final buffer, should we check for
3275 XML_SUSPENDED, XML_FINISHED?
3276 */
3277 if (startTagLevel == 0)
3278 return XML_ERROR_NO_ELEMENTS;
3279 if (parser->m_tagLevel != startTagLevel)
3280 return XML_ERROR_ASYNC_ENTITY;
3281 *nextPtr = end;
3282 return XML_ERROR_NONE;
3283 case XML_TOK_NONE:
3284 if (haveMore) {
3285 *nextPtr = s;
3286 return XML_ERROR_NONE;
3287 }
3288 if (startTagLevel > 0) {
3289 if (parser->m_tagLevel != startTagLevel)
3290 return XML_ERROR_ASYNC_ENTITY;
3291 *nextPtr = s;
3292 return XML_ERROR_NONE;
3293 }
3294 return XML_ERROR_NO_ELEMENTS;
3295 case XML_TOK_INVALID:
3296 *eventPP = next;
3297 return XML_ERROR_INVALID_TOKEN;
3298 case XML_TOK_PARTIAL:
3299 if (haveMore) {
3300 *nextPtr = s;
3301 return XML_ERROR_NONE;
3302 }
3303 return XML_ERROR_UNCLOSED_TOKEN;
3304 case XML_TOK_PARTIAL_CHAR:
3305 if (haveMore) {
3306 *nextPtr = s;
3307 return XML_ERROR_NONE;
3308 }
3309 return XML_ERROR_PARTIAL_CHAR;
3310 case XML_TOK_ENTITY_REF: {
3311 const XML_Char *name;
3312 ENTITY *entity;
3313 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3314 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3315 if (ch) {
3316 #if XML_GE == 1
3317 /* NOTE: We are replacing 4-6 characters original input for 1 character
3318 * so there is no amplification and hence recording without
3319 * protection. */
3320 accountingDiffTolerated(parser, tok, (char *)&ch,
3321 ((char *)&ch) + sizeof(XML_Char), __LINE__,
3322 XML_ACCOUNT_ENTITY_EXPANSION);
3323 #endif /* XML_GE == 1 */
3324 if (parser->m_characterDataHandler)
3325 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3326 else if (parser->m_defaultHandler)
3327 reportDefault(parser, enc, s, next);
3328 break;
3329 }
3330 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3331 next - enc->minBytesPerChar);
3332 if (! name)
3333 return XML_ERROR_NO_MEMORY;
3334 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3335 poolDiscard(&dtd->pool);
3336 /* First, determine if a check for an existing declaration is needed;
3337 if yes, check that the entity exists, and that it is internal,
3338 otherwise call the skipped entity or default handler.
3339 */
3340 if (! dtd->hasParamEntityRefs || dtd->standalone) {
3341 if (! entity)
3342 return XML_ERROR_UNDEFINED_ENTITY;
3343 else if (! entity->is_internal)
3344 return XML_ERROR_ENTITY_DECLARED_IN_PE;
3345 } else if (! entity) {
3346 if (parser->m_skippedEntityHandler)
3347 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3348 else if (parser->m_defaultHandler)
3349 reportDefault(parser, enc, s, next);
3350 break;
3351 }
3352 if (entity->open)
3353 return XML_ERROR_RECURSIVE_ENTITY_REF;
3354 if (entity->notation)
3355 return XML_ERROR_BINARY_ENTITY_REF;
3356 if (entity->textPtr) {
3357 enum XML_Error result;
3358 if (! parser->m_defaultExpandInternalEntities) {
3359 if (parser->m_skippedEntityHandler)
3360 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3361 0);
3362 else if (parser->m_defaultHandler)
3363 reportDefault(parser, enc, s, next);
3364 break;
3365 }
3366 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3367 if (result != XML_ERROR_NONE)
3368 return result;
3369 } else if (parser->m_externalEntityRefHandler) {
3370 const XML_Char *context;
3371 entity->open = XML_TRUE;
3372 context = getContext(parser);
3373 entity->open = XML_FALSE;
3374 if (! context)
3375 return XML_ERROR_NO_MEMORY;
3376 if (! parser->m_externalEntityRefHandler(
3377 parser->m_externalEntityRefHandlerArg, context, entity->base,
3378 entity->systemId, entity->publicId))
3379 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3380 poolDiscard(&parser->m_tempPool);
3381 } else if (parser->m_defaultHandler)
3382 reportDefault(parser, enc, s, next);
3383 break;
3384 }
3385 case XML_TOK_START_TAG_NO_ATTS:
3386 /* fall through */
3387 case XML_TOK_START_TAG_WITH_ATTS: {
3388 TAG *tag;
3389 enum XML_Error result;
3390 XML_Char *toPtr;
3391 if (parser->m_freeTagList) {
3392 tag = parser->m_freeTagList;
3393 parser->m_freeTagList = parser->m_freeTagList->parent;
3394 } else {
3395 tag = MALLOC(parser, sizeof(TAG));
3396 if (! tag)
3397 return XML_ERROR_NO_MEMORY;
3398 tag->buf.raw = MALLOC(parser, INIT_TAG_BUF_SIZE);
3399 if (! tag->buf.raw) {
3400 FREE(parser, tag);
3401 return XML_ERROR_NO_MEMORY;
3402 }
3403 tag->bufEnd = tag->buf.raw + INIT_TAG_BUF_SIZE;
3404 }
3405 tag->bindings = NULL;
3406 tag->parent = parser->m_tagStack;
3407 parser->m_tagStack = tag;
3408 tag->name.localPart = NULL;
3409 tag->name.prefix = NULL;
3410 tag->rawName = s + enc->minBytesPerChar;
3411 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3412 ++parser->m_tagLevel;
3413 {
3414 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3415 const char *fromPtr = tag->rawName;
3416 toPtr = tag->buf.str;
3417 for (;;) {
3418 int convLen;
3419 const enum XML_Convert_Result convert_res
3420 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3421 (ICHAR *)tag->bufEnd - 1);
3422 convLen = (int)(toPtr - tag->buf.str);
3423 if ((fromPtr >= rawNameEnd)
3424 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3425 tag->name.strLen = convLen;
3426 break;
3427 }
3428 if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf.raw))
3429 return XML_ERROR_NO_MEMORY;
3430 const size_t bufSize = (size_t)(tag->bufEnd - tag->buf.raw) * 2;
3431 {
3432 char *temp = REALLOC(parser, tag->buf.raw, bufSize);
3433 if (temp == NULL)
3434 return XML_ERROR_NO_MEMORY;
3435 tag->buf.raw = temp;
3436 tag->bufEnd = temp + bufSize;
3437 toPtr = (XML_Char *)temp + convLen;
3438 }
3439 }
3440 }
3441 tag->name.str = tag->buf.str;
3442 *toPtr = XML_T('\0');
3443 result
3444 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3445 if (result)
3446 return result;
3447 if (parser->m_startElementHandler)
3448 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3449 (const XML_Char **)parser->m_atts);
3450 else if (parser->m_defaultHandler)
3451 reportDefault(parser, enc, s, next);
3452 poolClear(&parser->m_tempPool);
3453 break;
3454 }
3455 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3456 /* fall through */
3457 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3458 const char *rawName = s + enc->minBytesPerChar;
3459 enum XML_Error result;
3460 BINDING *bindings = NULL;
3461 XML_Bool noElmHandlers = XML_TRUE;
3462 TAG_NAME name;
3463 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3464 rawName + XmlNameLength(enc, rawName));
3465 if (! name.str)
3466 return XML_ERROR_NO_MEMORY;
3467 poolFinish(&parser->m_tempPool);
3468 result = storeAtts(parser, enc, s, &name, &bindings,
3469 XML_ACCOUNT_NONE /* token spans whole start tag */);
3470 if (result != XML_ERROR_NONE) {
3471 freeBindings(parser, bindings);
3472 return result;
3473 }
3474 poolFinish(&parser->m_tempPool);
3475 if (parser->m_startElementHandler) {
3476 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3477 (const XML_Char **)parser->m_atts);
3478 noElmHandlers = XML_FALSE;
3479 }
3480 if (parser->m_endElementHandler) {
3481 if (parser->m_startElementHandler)
3482 *eventPP = *eventEndPP;
3483 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3484 noElmHandlers = XML_FALSE;
3485 }
3486 if (noElmHandlers && parser->m_defaultHandler)
3487 reportDefault(parser, enc, s, next);
3488 poolClear(&parser->m_tempPool);
3489 freeBindings(parser, bindings);
3490 }
3491 if ((parser->m_tagLevel == 0)
3492 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3493 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3494 || (parser->m_parsingStatus.parsing == XML_PARSING
3495 && parser->m_reenter))
3496 parser->m_processor = epilogProcessor;
3497 else
3498 return epilogProcessor(parser, next, end, nextPtr);
3499 }
3500 break;
3501 case XML_TOK_END_TAG:
3502 if (parser->m_tagLevel == startTagLevel)
3503 return XML_ERROR_ASYNC_ENTITY;
3504 else {
3505 int len;
3506 const char *rawName;
3507 TAG *tag = parser->m_tagStack;
3508 rawName = s + enc->minBytesPerChar * 2;
3509 len = XmlNameLength(enc, rawName);
3510 if (len != tag->rawNameLength
3511 || memcmp(tag->rawName, rawName, len) != 0) {
3512 *eventPP = rawName;
3513 return XML_ERROR_TAG_MISMATCH;
3514 }
3515 parser->m_tagStack = tag->parent;
3516 tag->parent = parser->m_freeTagList;
3517 parser->m_freeTagList = tag;
3518 --parser->m_tagLevel;
3519 if (parser->m_endElementHandler) {
3520 const XML_Char *localPart;
3521 const XML_Char *prefix;
3522 XML_Char *uri;
3523 localPart = tag->name.localPart;
3524 if (parser->m_ns && localPart) {
3525 /* localPart and prefix may have been overwritten in
3526 tag->name.str, since this points to the binding->uri
3527 buffer which gets reused; so we have to add them again
3528 */
3529 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3530 /* don't need to check for space - already done in storeAtts() */
3531 while (*localPart)
3532 *uri++ = *localPart++;
3533 prefix = tag->name.prefix;
3534 if (parser->m_ns_triplets && prefix) {
3535 *uri++ = parser->m_namespaceSeparator;
3536 while (*prefix)
3537 *uri++ = *prefix++;
3538 }
3539 *uri = XML_T('\0');
3540 }
3541 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3542 } else if (parser->m_defaultHandler)
3543 reportDefault(parser, enc, s, next);
3544 while (tag->bindings) {
3545 BINDING *b = tag->bindings;
3546 if (parser->m_endNamespaceDeclHandler)
3547 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3548 b->prefix->name);
3549 tag->bindings = tag->bindings->nextTagBinding;
3550 b->nextTagBinding = parser->m_freeBindingList;
3551 parser->m_freeBindingList = b;
3552 b->prefix->binding = b->prevPrefixBinding;
3553 }
3554 if ((parser->m_tagLevel == 0)
3555 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3556 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3557 || (parser->m_parsingStatus.parsing == XML_PARSING
3558 && parser->m_reenter))
3559 parser->m_processor = epilogProcessor;
3560 else
3561 return epilogProcessor(parser, next, end, nextPtr);
3562 }
3563 }
3564 break;
3565 case XML_TOK_CHAR_REF: {
3566 int n = XmlCharRefNumber(enc, s);
3567 if (n < 0)
3568 return XML_ERROR_BAD_CHAR_REF;
3569 if (parser->m_characterDataHandler) {
3570 XML_Char buf[XML_ENCODE_MAX];
3571 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3572 XmlEncode(n, (ICHAR *)buf));
3573 } else if (parser->m_defaultHandler)
3574 reportDefault(parser, enc, s, next);
3575 } break;
3576 case XML_TOK_XML_DECL:
3577 return XML_ERROR_MISPLACED_XML_PI;
3578 case XML_TOK_DATA_NEWLINE:
3579 if (parser->m_characterDataHandler) {
3580 XML_Char c = 0xA;
3581 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3582 } else if (parser->m_defaultHandler)
3583 reportDefault(parser, enc, s, next);
3584 break;
3585 case XML_TOK_CDATA_SECT_OPEN: {
3586 enum XML_Error result;
3587 if (parser->m_startCdataSectionHandler)
3588 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3589 /* BEGIN disabled code */
3590 /* Suppose you doing a transformation on a document that involves
3591 changing only the character data. You set up a defaultHandler
3592 and a characterDataHandler. The defaultHandler simply copies
3593 characters through. The characterDataHandler does the
3594 transformation and writes the characters out escaping them as
3595 necessary. This case will fail to work if we leave out the
3596 following two lines (because & and < inside CDATA sections will
3597 be incorrectly escaped).
3598
3599 However, now we have a start/endCdataSectionHandler, so it seems
3600 easier to let the user deal with this.
3601 */
3602 else if ((0) && parser->m_characterDataHandler)
3603 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3604 0);
3605 /* END disabled code */
3606 else if (parser->m_defaultHandler)
3607 reportDefault(parser, enc, s, next);
3608 result
3609 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3610 if (result != XML_ERROR_NONE)
3611 return result;
3612 else if (! next) {
3613 parser->m_processor = cdataSectionProcessor;
3614 return result;
3615 }
3616 } break;
3617 case XML_TOK_TRAILING_RSQB:
3618 if (haveMore) {
3619 *nextPtr = s;
3620 return XML_ERROR_NONE;
3621 }
3622 if (parser->m_characterDataHandler) {
3623 if (MUST_CONVERT(enc, s)) {
3624 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3625 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3626 parser->m_characterDataHandler(
3627 parser->m_handlerArg, parser->m_dataBuf,
3628 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3629 } else
3630 parser->m_characterDataHandler(
3631 parser->m_handlerArg, (const XML_Char *)s,
3632 (int)((const XML_Char *)end - (const XML_Char *)s));
3633 } else if (parser->m_defaultHandler)
3634 reportDefault(parser, enc, s, end);
3635 /* We are at the end of the final buffer, should we check for
3636 XML_SUSPENDED, XML_FINISHED?
3637 */
3638 if (startTagLevel == 0) {
3639 *eventPP = end;
3640 return XML_ERROR_NO_ELEMENTS;
3641 }
3642 if (parser->m_tagLevel != startTagLevel) {
3643 *eventPP = end;
3644 return XML_ERROR_ASYNC_ENTITY;
3645 }
3646 *nextPtr = end;
3647 return XML_ERROR_NONE;
3648 case XML_TOK_DATA_CHARS: {
3649 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3650 if (charDataHandler) {
3651 if (MUST_CONVERT(enc, s)) {
3652 for (;;) {
3653 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3654 const enum XML_Convert_Result convert_res = XmlConvert(
3655 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3656 *eventEndPP = s;
3657 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3658 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3659 if ((convert_res == XML_CONVERT_COMPLETED)
3660 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3661 break;
3662 *eventPP = s;
3663 }
3664 } else
3665 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3666 (int)((const XML_Char *)next - (const XML_Char *)s));
3667 } else if (parser->m_defaultHandler)
3668 reportDefault(parser, enc, s, next);
3669 } break;
3670 case XML_TOK_PI:
3671 if (! reportProcessingInstruction(parser, enc, s, next))
3672 return XML_ERROR_NO_MEMORY;
3673 break;
3674 case XML_TOK_COMMENT:
3675 if (! reportComment(parser, enc, s, next))
3676 return XML_ERROR_NO_MEMORY;
3677 break;
3678 default:
3679 /* All of the tokens produced by XmlContentTok() have their own
3680 * explicit cases, so this default is not strictly necessary.
3681 * However it is a useful safety net, so we retain the code and
3682 * simply exclude it from the coverage tests.
3683 *
3684 * LCOV_EXCL_START
3685 */
3686 if (parser->m_defaultHandler)
3687 reportDefault(parser, enc, s, next);
3688 break;
3689 /* LCOV_EXCL_STOP */
3690 }
3691 switch (parser->m_parsingStatus.parsing) {
3692 case XML_SUSPENDED:
3693 *eventPP = next;
3694 *nextPtr = next;
3695 return XML_ERROR_NONE;
3696 case XML_FINISHED:
3697 *eventPP = next;
3698 return XML_ERROR_ABORTED;
3699 case XML_PARSING:
3700 if (parser->m_reenter) {
3701 *nextPtr = next;
3702 return XML_ERROR_NONE;
3703 }
3704 /* Fall through */
3705 default:;
3706 *eventPP = s = next;
3707 }
3708 }
3709 /* not reached */
3710 }
3711
3712 /* This function does not call free() on the allocated memory, merely
3713 * moving it to the parser's m_freeBindingList where it can be freed or
3714 * reused as appropriate.
3715 */
3716 static void
3717 freeBindings(XML_Parser parser, BINDING *bindings) {
3718 while (bindings) {
3719 BINDING *b = bindings;
3720
3721 /* m_startNamespaceDeclHandler will have been called for this
3722 * binding in addBindings(), so call the end handler now.
3723 */
3724 if (parser->m_endNamespaceDeclHandler)
3725 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3726
3727 bindings = bindings->nextTagBinding;
3728 b->nextTagBinding = parser->m_freeBindingList;
3729 parser->m_freeBindingList = b;
3730 b->prefix->binding = b->prevPrefixBinding;
3731 }
3732 }
3733
3734 /* Precondition: all arguments must be non-NULL;
3735 Purpose:
3736 - normalize attributes
3737 - check attributes for well-formedness
3738 - generate namespace aware attribute names (URI, prefix)
3739 - build list of attributes for startElementHandler
3740 - default attributes
3741 - process namespace declarations (check and report them)
3742 - generate namespace aware element name (URI, prefix)
3743 */
3744 static enum XML_Error
3745 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3746 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3747 enum XML_Account account) {
3748 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3749 ELEMENT_TYPE *elementType;
3750 int nDefaultAtts;
3751 const XML_Char **appAtts; /* the attribute list for the application */
3752 int attIndex = 0;
3753 int prefixLen;
3754 int i;
3755 int n;
3756 XML_Char *uri;
3757 int nPrefixes = 0;
3758 BINDING *binding;
3759 const XML_Char *localPart;
3760
3761 /* lookup the element type name */
3762 elementType
3763 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3764 if (! elementType) {
3765 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3766 if (! name)
3767 return XML_ERROR_NO_MEMORY;
3768 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3769 sizeof(ELEMENT_TYPE));
3770 if (! elementType)
3771 return XML_ERROR_NO_MEMORY;
3772 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3773 return XML_ERROR_NO_MEMORY;
3774 }
3775 nDefaultAtts = elementType->nDefaultAtts;
3776
3777 /* get the attributes from the tokenizer */
3778 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3779
3780 /* Detect and prevent integer overflow */
3781 if (n > INT_MAX - nDefaultAtts) {
3782 return XML_ERROR_NO_MEMORY;
3783 }
3784
3785 if (n + nDefaultAtts > parser->m_attsSize) {
3786 int oldAttsSize = parser->m_attsSize;
3787 ATTRIBUTE *temp;
3788 #ifdef XML_ATTR_INFO
3789 XML_AttrInfo *temp2;
3790 #endif
3791
3792 /* Detect and prevent integer overflow */
3793 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3794 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3795 return XML_ERROR_NO_MEMORY;
3796 }
3797
3798 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3799
3800 /* Detect and prevent integer overflow.
3801 * The preprocessor guard addresses the "always false" warning
3802 * from -Wtype-limits on platforms where
3803 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3804 #if UINT_MAX >= SIZE_MAX
3805 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) {
3806 parser->m_attsSize = oldAttsSize;
3807 return XML_ERROR_NO_MEMORY;
3808 }
3809 #endif
3810
3811 temp = REALLOC(parser, parser->m_atts,
3812 parser->m_attsSize * sizeof(ATTRIBUTE));
3813 if (temp == NULL) {
3814 parser->m_attsSize = oldAttsSize;
3815 return XML_ERROR_NO_MEMORY;
3816 }
3817 parser->m_atts = temp;
3818 #ifdef XML_ATTR_INFO
3819 /* Detect and prevent integer overflow.
3820 * The preprocessor guard addresses the "always false" warning
3821 * from -Wtype-limits on platforms where
3822 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3823 # if UINT_MAX >= SIZE_MAX
3824 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) {
3825 parser->m_attsSize = oldAttsSize;
3826 return XML_ERROR_NO_MEMORY;
3827 }
3828 # endif
3829
3830 temp2 = REALLOC(parser, parser->m_attInfo,
3831 parser->m_attsSize * sizeof(XML_AttrInfo));
3832 if (temp2 == NULL) {
3833 parser->m_attsSize = oldAttsSize;
3834 return XML_ERROR_NO_MEMORY;
3835 }
3836 parser->m_attInfo = temp2;
3837 #endif
3838 if (n > oldAttsSize)
3839 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3840 }
3841
3842 appAtts = (const XML_Char **)parser->m_atts;
3843 for (i = 0; i < n; i++) {
3844 ATTRIBUTE *currAtt = &parser->m_atts[i];
3845 #ifdef XML_ATTR_INFO
3846 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3847 #endif
3848 /* add the name and value to the attribute list */
3849 ATTRIBUTE_ID *attId
3850 = getAttributeId(parser, enc, currAtt->name,
3851 currAtt->name + XmlNameLength(enc, currAtt->name));
3852 if (! attId)
3853 return XML_ERROR_NO_MEMORY;
3854 #ifdef XML_ATTR_INFO
3855 currAttInfo->nameStart
3856 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3857 currAttInfo->nameEnd
3858 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3859 currAttInfo->valueStart = parser->m_parseEndByteIndex
3860 - (parser->m_parseEndPtr - currAtt->valuePtr);
3861 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3862 - (parser->m_parseEndPtr - currAtt->valueEnd);
3863 #endif
3864 /* Detect duplicate attributes by their QNames. This does not work when
3865 namespace processing is turned on and different prefixes for the same
3866 namespace are used. For this case we have a check further down.
3867 */
3868 if ((attId->name)[-1]) {
3869 if (enc == parser->m_encoding)
3870 parser->m_eventPtr = parser->m_atts[i].name;
3871 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3872 }
3873 (attId->name)[-1] = 1;
3874 appAtts[attIndex++] = attId->name;
3875 if (! parser->m_atts[i].normalized) {
3876 enum XML_Error result;
3877 XML_Bool isCdata = XML_TRUE;
3878
3879 /* figure out whether declared as other than CDATA */
3880 if (attId->maybeTokenized) {
3881 int j;
3882 for (j = 0; j < nDefaultAtts; j++) {
3883 if (attId == elementType->defaultAtts[j].id) {
3884 isCdata = elementType->defaultAtts[j].isCdata;
3885 break;
3886 }
3887 }
3888 }
3889
3890 /* normalize the attribute value */
3891 result = storeAttributeValue(
3892 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3893 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3894 if (result)
3895 return result;
3896 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3897 poolFinish(&parser->m_tempPool);
3898 } else {
3899 /* the value did not need normalizing */
3900 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3901 parser->m_atts[i].valuePtr,
3902 parser->m_atts[i].valueEnd);
3903 if (appAtts[attIndex] == 0)
3904 return XML_ERROR_NO_MEMORY;
3905 poolFinish(&parser->m_tempPool);
3906 }
3907 /* handle prefixed attribute names */
3908 if (attId->prefix) {
3909 if (attId->xmlns) {
3910 /* deal with namespace declarations here */
3911 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3912 appAtts[attIndex], bindingsPtr);
3913 if (result)
3914 return result;
3915 --attIndex;
3916 } else {
3917 /* deal with other prefixed names later */
3918 attIndex++;
3919 nPrefixes++;
3920 (attId->name)[-1] = 2;
3921 }
3922 } else
3923 attIndex++;
3924 }
3925
3926 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3927 parser->m_nSpecifiedAtts = attIndex;
3928 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3929 for (i = 0; i < attIndex; i += 2)
3930 if (appAtts[i] == elementType->idAtt->name) {
3931 parser->m_idAttIndex = i;
3932 break;
3933 }
3934 } else
3935 parser->m_idAttIndex = -1;
3936
3937 /* do attribute defaulting */
3938 for (i = 0; i < nDefaultAtts; i++) {
3939 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3940 if (! (da->id->name)[-1] && da->value) {
3941 if (da->id->prefix) {
3942 if (da->id->xmlns) {
3943 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3944 da->value, bindingsPtr);
3945 if (result)
3946 return result;
3947 } else {
3948 (da->id->name)[-1] = 2;
3949 nPrefixes++;
3950 appAtts[attIndex++] = da->id->name;
3951 appAtts[attIndex++] = da->value;
3952 }
3953 } else {
3954 (da->id->name)[-1] = 1;
3955 appAtts[attIndex++] = da->id->name;
3956 appAtts[attIndex++] = da->value;
3957 }
3958 }
3959 }
3960 appAtts[attIndex] = 0;
3961
3962 /* expand prefixed attribute names, check for duplicates,
3963 and clear flags that say whether attributes were specified */
3964 i = 0;
3965 if (nPrefixes) {
3966 unsigned int j; /* hash table index */
3967 unsigned long version = parser->m_nsAttsVersion;
3968
3969 /* Detect and prevent invalid shift */
3970 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3971 return XML_ERROR_NO_MEMORY;
3972 }
3973
3974 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3975 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3976 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3977 if ((nPrefixes << 1)
3978 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3979 NS_ATT *temp;
3980 /* hash table size must also be a power of 2 and >= 8 */
3981 while (nPrefixes >> parser->m_nsAttsPower++)
3982 ;
3983 if (parser->m_nsAttsPower < 3)
3984 parser->m_nsAttsPower = 3;
3985
3986 /* Detect and prevent invalid shift */
3987 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3988 /* Restore actual size of memory in m_nsAtts */
3989 parser->m_nsAttsPower = oldNsAttsPower;
3990 return XML_ERROR_NO_MEMORY;
3991 }
3992
3993 nsAttsSize = 1u << parser->m_nsAttsPower;
3994
3995 /* Detect and prevent integer overflow.
3996 * The preprocessor guard addresses the "always false" warning
3997 * from -Wtype-limits on platforms where
3998 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3999 #if UINT_MAX >= SIZE_MAX
4000 if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) {
4001 /* Restore actual size of memory in m_nsAtts */
4002 parser->m_nsAttsPower = oldNsAttsPower;
4003 return XML_ERROR_NO_MEMORY;
4004 }
4005 #endif
4006
4007 temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
4008 if (! temp) {
4009 /* Restore actual size of memory in m_nsAtts */
4010 parser->m_nsAttsPower = oldNsAttsPower;
4011 return XML_ERROR_NO_MEMORY;
4012 }
4013 parser->m_nsAtts = temp;
4014 version = 0; /* force re-initialization of m_nsAtts hash table */
4015 }
4016 /* using a version flag saves us from initializing m_nsAtts every time */
4017 if (! version) { /* initialize version flags when version wraps around */
4018 version = INIT_ATTS_VERSION;
4019 for (j = nsAttsSize; j != 0;)
4020 parser->m_nsAtts[--j].version = version;
4021 }
4022 parser->m_nsAttsVersion = --version;
4023
4024 /* expand prefixed names and check for duplicates */
4025 for (; i < attIndex; i += 2) {
4026 const XML_Char *s = appAtts[i];
4027 if (s[-1] == 2) { /* prefixed */
4028 ATTRIBUTE_ID *id;
4029 const BINDING *b;
4030 unsigned long uriHash;
4031 struct siphash sip_state;
4032 struct sipkey sip_key;
4033
4034 copy_salt_to_sipkey(parser, &sip_key);
4035 sip24_init(&sip_state, &sip_key);
4036
4037 ((XML_Char *)s)[-1] = 0; /* clear flag */
4038 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
4039 if (! id || ! id->prefix) {
4040 /* This code is walking through the appAtts array, dealing
4041 * with (in this case) a prefixed attribute name. To be in
4042 * the array, the attribute must have already been bound, so
4043 * has to have passed through the hash table lookup once
4044 * already. That implies that an entry for it already
4045 * exists, so the lookup above will return a pointer to
4046 * already allocated memory. There is no opportunaity for
4047 * the allocator to fail, so the condition above cannot be
4048 * fulfilled.
4049 *
4050 * Since it is difficult to be certain that the above
4051 * analysis is complete, we retain the test and merely
4052 * remove the code from coverage tests.
4053 */
4054 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4055 }
4056 b = id->prefix->binding;
4057 if (! b)
4058 return XML_ERROR_UNBOUND_PREFIX;
4059
4060 for (j = 0; j < (unsigned int)b->uriLen; j++) {
4061 const XML_Char c = b->uri[j];
4062 if (! poolAppendChar(&parser->m_tempPool, c))
4063 return XML_ERROR_NO_MEMORY;
4064 }
4065
4066 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
4067
4068 while (*s++ != XML_T(ASCII_COLON))
4069 ;
4070
4071 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
4072
4073 do { /* copies null terminator */
4074 if (! poolAppendChar(&parser->m_tempPool, *s))
4075 return XML_ERROR_NO_MEMORY;
4076 } while (*s++);
4077
4078 uriHash = (unsigned long)sip24_final(&sip_state);
4079
4080 { /* Check hash table for duplicate of expanded name (uriName).
4081 Derived from code in lookup(parser, HASH_TABLE *table, ...).
4082 */
4083 unsigned char step = 0;
4084 unsigned long mask = nsAttsSize - 1;
4085 j = uriHash & mask; /* index into hash table */
4086 while (parser->m_nsAtts[j].version == version) {
4087 /* for speed we compare stored hash values first */
4088 if (uriHash == parser->m_nsAtts[j].hash) {
4089 const XML_Char *s1 = poolStart(&parser->m_tempPool);
4090 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
4091 /* s1 is null terminated, but not s2 */
4092 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
4093 ;
4094 if (*s1 == 0)
4095 return XML_ERROR_DUPLICATE_ATTRIBUTE;
4096 }
4097 if (! step)
4098 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
4099 j < step ? (j += nsAttsSize - step) : (j -= step);
4100 }
4101 }
4102
4103 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
4104 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
4105 s = b->prefix->name;
4106 do {
4107 if (! poolAppendChar(&parser->m_tempPool, *s))
4108 return XML_ERROR_NO_MEMORY;
4109 } while (*s++);
4110 }
4111
4112 /* store expanded name in attribute list */
4113 s = poolStart(&parser->m_tempPool);
4114 poolFinish(&parser->m_tempPool);
4115 appAtts[i] = s;
4116
4117 /* fill empty slot with new version, uriName and hash value */
4118 parser->m_nsAtts[j].version = version;
4119 parser->m_nsAtts[j].hash = uriHash;
4120 parser->m_nsAtts[j].uriName = s;
4121
4122 if (! --nPrefixes) {
4123 i += 2;
4124 break;
4125 }
4126 } else /* not prefixed */
4127 ((XML_Char *)s)[-1] = 0; /* clear flag */
4128 }
4129 }
4130 /* clear flags for the remaining attributes */
4131 for (; i < attIndex; i += 2)
4132 ((XML_Char *)(appAtts[i]))[-1] = 0;
4133 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
4134 binding->attId->name[-1] = 0;
4135
4136 if (! parser->m_ns)
4137 return XML_ERROR_NONE;
4138
4139 /* expand the element type name */
4140 if (elementType->prefix) {
4141 binding = elementType->prefix->binding;
4142 if (! binding)
4143 return XML_ERROR_UNBOUND_PREFIX;
4144 localPart = tagNamePtr->str;
4145 while (*localPart++ != XML_T(ASCII_COLON))
4146 ;
4147 } else if (dtd->defaultPrefix.binding) {
4148 binding = dtd->defaultPrefix.binding;
4149 localPart = tagNamePtr->str;
4150 } else
4151 return XML_ERROR_NONE;
4152 prefixLen = 0;
4153 if (parser->m_ns_triplets && binding->prefix->name) {
4154 while (binding->prefix->name[prefixLen++])
4155 ; /* prefixLen includes null terminator */
4156 }
4157 tagNamePtr->localPart = localPart;
4158 tagNamePtr->uriLen = binding->uriLen;
4159 tagNamePtr->prefix = binding->prefix->name;
4160 tagNamePtr->prefixLen = prefixLen;
4161 for (i = 0; localPart[i++];)
4162 ; /* i includes null terminator */
4163
4164 /* Detect and prevent integer overflow */
4165 if (binding->uriLen > INT_MAX - prefixLen
4166 || i > INT_MAX - (binding->uriLen + prefixLen)) {
4167 return XML_ERROR_NO_MEMORY;
4168 }
4169
4170 n = i + binding->uriLen + prefixLen;
4171 if (n > binding->uriAlloc) {
4172 TAG *p;
4173
4174 /* Detect and prevent integer overflow */
4175 if (n > INT_MAX - EXPAND_SPARE) {
4176 return XML_ERROR_NO_MEMORY;
4177 }
4178 /* Detect and prevent integer overflow.
4179 * The preprocessor guard addresses the "always false" warning
4180 * from -Wtype-limits on platforms where
4181 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4182 #if UINT_MAX >= SIZE_MAX
4183 if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4184 return XML_ERROR_NO_MEMORY;
4185 }
4186 #endif
4187
4188 uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
4189 if (! uri)
4190 return XML_ERROR_NO_MEMORY;
4191 binding->uriAlloc = n + EXPAND_SPARE;
4192 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
4193 for (p = parser->m_tagStack; p; p = p->parent)
4194 if (p->name.str == binding->uri)
4195 p->name.str = uri;
4196 FREE(parser, binding->uri);
4197 binding->uri = uri;
4198 }
4199 /* if m_namespaceSeparator != '\0' then uri includes it already */
4200 uri = binding->uri + binding->uriLen;
4201 memcpy(uri, localPart, i * sizeof(XML_Char));
4202 /* we always have a namespace separator between localPart and prefix */
4203 if (prefixLen) {
4204 uri += i - 1;
4205 *uri = parser->m_namespaceSeparator; /* replace null terminator */
4206 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
4207 }
4208 tagNamePtr->str = binding->uri;
4209 return XML_ERROR_NONE;
4210 }
4211
4212 static XML_Bool
4213 is_rfc3986_uri_char(XML_Char candidate) {
4214 // For the RFC 3986 ANBF grammar see
4215 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
4216
4217 switch (candidate) {
4218 // From rule "ALPHA" (uppercase half)
4219 case 'A':
4220 case 'B':
4221 case 'C':
4222 case 'D':
4223 case 'E':
4224 case 'F':
4225 case 'G':
4226 case 'H':
4227 case 'I':
4228 case 'J':
4229 case 'K':
4230 case 'L':
4231 case 'M':
4232 case 'N':
4233 case 'O':
4234 case 'P':
4235 case 'Q':
4236 case 'R':
4237 case 'S':
4238 case 'T':
4239 case 'U':
4240 case 'V':
4241 case 'W':
4242 case 'X':
4243 case 'Y':
4244 case 'Z':
4245
4246 // From rule "ALPHA" (lowercase half)
4247 case 'a':
4248 case 'b':
4249 case 'c':
4250 case 'd':
4251 case 'e':
4252 case 'f':
4253 case 'g':
4254 case 'h':
4255 case 'i':
4256 case 'j':
4257 case 'k':
4258 case 'l':
4259 case 'm':
4260 case 'n':
4261 case 'o':
4262 case 'p':
4263 case 'q':
4264 case 'r':
4265 case 's':
4266 case 't':
4267 case 'u':
4268 case 'v':
4269 case 'w':
4270 case 'x':
4271 case 'y':
4272 case 'z':
4273
4274 // From rule "DIGIT"
4275 case '0':
4276 case '1':
4277 case '2':
4278 case '3':
4279 case '4':
4280 case '5':
4281 case '6':
4282 case '7':
4283 case '8':
4284 case '9':
4285
4286 // From rule "pct-encoded"
4287 case '%':
4288
4289 // From rule "unreserved"
4290 case '-':
4291 case '.':
4292 case '_':
4293 case '~':
4294
4295 // From rule "gen-delims"
4296 case ':':
4297 case '/':
4298 case '?':
4299 case '#':
4300 case '[':
4301 case ']':
4302 case '@':
4303
4304 // From rule "sub-delims"
4305 case '!':
4306 case '$':
4307 case '&':
4308 case '\'':
4309 case '(':
4310 case ')':
4311 case '*':
4312 case '+':
4313 case ',':
4314 case ';':
4315 case '=':
4316 return XML_TRUE;
4317
4318 default:
4319 return XML_FALSE;
4320 }
4321 }
4322
4323 /* addBinding() overwrites the value of prefix->binding without checking.
4324 Therefore one must keep track of the old value outside of addBinding().
4325 */
4326 static enum XML_Error
4327 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4328 const XML_Char *uri, BINDING **bindingsPtr) {
4329 // "http://www.w3.org/XML/1998/namespace"
4330 static const XML_Char xmlNamespace[]
4331 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
4332 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
4333 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
4334 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
4335 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
4336 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
4337 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
4338 ASCII_e, '\0'};
4339 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4340 // "http://www.w3.org/2000/xmlns/"
4341 static const XML_Char xmlnsNamespace[]
4342 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
4343 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4344 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
4345 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
4346 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
4347 static const int xmlnsLen
4348 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4349
4350 XML_Bool mustBeXML = XML_FALSE;
4351 XML_Bool isXML = XML_TRUE;
4352 XML_Bool isXMLNS = XML_TRUE;
4353
4354 BINDING *b;
4355 int len;
4356
4357 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4358 if (*uri == XML_T('\0') && prefix->name)
4359 return XML_ERROR_UNDECLARING_PREFIX;
4360
4361 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4362 && prefix->name[1] == XML_T(ASCII_m)
4363 && prefix->name[2] == XML_T(ASCII_l)) {
4364 /* Not allowed to bind xmlns */
4365 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4366 && prefix->name[5] == XML_T('\0'))
4367 return XML_ERROR_RESERVED_PREFIX_XMLNS;
4368
4369 if (prefix->name[3] == XML_T('\0'))
4370 mustBeXML = XML_TRUE;
4371 }
4372
4373 for (len = 0; uri[len]; len++) {
4374 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4375 isXML = XML_FALSE;
4376
4377 if (! mustBeXML && isXMLNS
4378 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4379 isXMLNS = XML_FALSE;
4380
4381 // NOTE: While Expat does not validate namespace URIs against RFC 3986
4382 // today (and is not REQUIRED to do so with regard to the XML 1.0
4383 // namespaces specification) we have to at least make sure, that
4384 // the application on top of Expat (that is likely splitting expanded
4385 // element names ("qualified names") of form
4386 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4387 // in its element handler code) cannot be confused by an attacker
4388 // putting additional namespace separator characters into namespace
4389 // declarations. That would be ambiguous and not to be expected.
4390 //
4391 // While the HTML API docs of function XML_ParserCreateNS have been
4392 // advising against use of a namespace separator character that can
4393 // appear in a URI for >20 years now, some widespread applications
4394 // are using URI characters (':' (colon) in particular) for a
4395 // namespace separator, in practice. To keep these applications
4396 // functional, we only reject namespaces URIs containing the
4397 // application-chosen namespace separator if the chosen separator
4398 // is a non-URI character with regard to RFC 3986.
4399 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4400 && ! is_rfc3986_uri_char(uri[len])) {
4401 return XML_ERROR_SYNTAX;
4402 }
4403 }
4404 isXML = isXML && len == xmlLen;
4405 isXMLNS = isXMLNS && len == xmlnsLen;
4406
4407 if (mustBeXML != isXML)
4408 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4409 : XML_ERROR_RESERVED_NAMESPACE_URI;
4410
4411 if (isXMLNS)
4412 return XML_ERROR_RESERVED_NAMESPACE_URI;
4413
4414 if (parser->m_namespaceSeparator)
4415 len++;
4416 if (parser->m_freeBindingList) {
4417 b = parser->m_freeBindingList;
4418 if (len > b->uriAlloc) {
4419 /* Detect and prevent integer overflow */
4420 if (len > INT_MAX - EXPAND_SPARE) {
4421 return XML_ERROR_NO_MEMORY;
4422 }
4423
4424 /* Detect and prevent integer overflow.
4425 * The preprocessor guard addresses the "always false" warning
4426 * from -Wtype-limits on platforms where
4427 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4428 #if UINT_MAX >= SIZE_MAX
4429 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4430 return XML_ERROR_NO_MEMORY;
4431 }
4432 #endif
4433
4434 XML_Char *temp
4435 = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4436 if (temp == NULL)
4437 return XML_ERROR_NO_MEMORY;
4438 b->uri = temp;
4439 b->uriAlloc = len + EXPAND_SPARE;
4440 }
4441 parser->m_freeBindingList = b->nextTagBinding;
4442 } else {
4443 b = MALLOC(parser, sizeof(BINDING));
4444 if (! b)
4445 return XML_ERROR_NO_MEMORY;
4446
4447 /* Detect and prevent integer overflow */
4448 if (len > INT_MAX - EXPAND_SPARE) {
4449 return XML_ERROR_NO_MEMORY;
4450 }
4451 /* Detect and prevent integer overflow.
4452 * The preprocessor guard addresses the "always false" warning
4453 * from -Wtype-limits on platforms where
4454 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4455 #if UINT_MAX >= SIZE_MAX
4456 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4457 return XML_ERROR_NO_MEMORY;
4458 }
4459 #endif
4460
4461 b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4462 if (! b->uri) {
4463 FREE(parser, b);
4464 return XML_ERROR_NO_MEMORY;
4465 }
4466 b->uriAlloc = len + EXPAND_SPARE;
4467 }
4468 b->uriLen = len;
4469 memcpy(b->uri, uri, len * sizeof(XML_Char));
4470 if (parser->m_namespaceSeparator)
4471 b->uri[len - 1] = parser->m_namespaceSeparator;
4472 b->prefix = prefix;
4473 b->attId = attId;
4474 b->prevPrefixBinding = prefix->binding;
4475 /* NULL binding when default namespace undeclared */
4476 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4477 prefix->binding = NULL;
4478 else
4479 prefix->binding = b;
4480 b->nextTagBinding = *bindingsPtr;
4481 *bindingsPtr = b;
4482 /* if attId == NULL then we are not starting a namespace scope */
4483 if (attId && parser->m_startNamespaceDeclHandler)
4484 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4485 prefix->binding ? uri : 0);
4486 return XML_ERROR_NONE;
4487 }
4488
4489 /* The idea here is to avoid using stack for each CDATA section when
4490 the whole file is parsed with one call.
4491 */
4492 static enum XML_Error PTRCALL
4493 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4494 const char **endPtr) {
4495 enum XML_Error result = doCdataSection(
4496 parser, parser->m_encoding, &start, end, endPtr,
4497 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4498 if (result != XML_ERROR_NONE)
4499 return result;
4500 if (start) {
4501 if (parser->m_parentParser) { /* we are parsing an external entity */
4502 parser->m_processor = externalEntityContentProcessor;
4503 return externalEntityContentProcessor(parser, start, end, endPtr);
4504 } else {
4505 parser->m_processor = contentProcessor;
4506 return contentProcessor(parser, start, end, endPtr);
4507 }
4508 }
4509 return result;
4510 }
4511
4512 /* startPtr gets set to non-null if the section is closed, and to null if
4513 the section is not yet closed.
4514 */
4515 static enum XML_Error
4516 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4517 const char *end, const char **nextPtr, XML_Bool haveMore,
4518 enum XML_Account account) {
4519 const char *s = *startPtr;
4520 const char **eventPP;
4521 const char **eventEndPP;
4522 if (enc == parser->m_encoding) {
4523 eventPP = &parser->m_eventPtr;
4524 *eventPP = s;
4525 eventEndPP = &parser->m_eventEndPtr;
4526 } else {
4527 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4528 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4529 }
4530 *eventPP = s;
4531 *startPtr = NULL;
4532
4533 for (;;) {
4534 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4535 int tok = XmlCdataSectionTok(enc, s, end, &next);
4536 #if XML_GE == 1
4537 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4538 accountingOnAbort(parser);
4539 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4540 }
4541 #else
4542 UNUSED_P(account);
4543 #endif
4544 *eventEndPP = next;
4545 switch (tok) {
4546 case XML_TOK_CDATA_SECT_CLOSE:
4547 if (parser->m_endCdataSectionHandler)
4548 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4549 /* BEGIN disabled code */
4550 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4551 else if ((0) && parser->m_characterDataHandler)
4552 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4553 0);
4554 /* END disabled code */
4555 else if (parser->m_defaultHandler)
4556 reportDefault(parser, enc, s, next);
4557 *startPtr = next;
4558 *nextPtr = next;
4559 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4560 return XML_ERROR_ABORTED;
4561 else
4562 return XML_ERROR_NONE;
4563 case XML_TOK_DATA_NEWLINE:
4564 if (parser->m_characterDataHandler) {
4565 XML_Char c = 0xA;
4566 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4567 } else if (parser->m_defaultHandler)
4568 reportDefault(parser, enc, s, next);
4569 break;
4570 case XML_TOK_DATA_CHARS: {
4571 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4572 if (charDataHandler) {
4573 if (MUST_CONVERT(enc, s)) {
4574 for (;;) {
4575 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4576 const enum XML_Convert_Result convert_res = XmlConvert(
4577 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4578 *eventEndPP = next;
4579 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4580 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4581 if ((convert_res == XML_CONVERT_COMPLETED)
4582 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4583 break;
4584 *eventPP = s;
4585 }
4586 } else
4587 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4588 (int)((const XML_Char *)next - (const XML_Char *)s));
4589 } else if (parser->m_defaultHandler)
4590 reportDefault(parser, enc, s, next);
4591 } break;
4592 case XML_TOK_INVALID:
4593 *eventPP = next;
4594 return XML_ERROR_INVALID_TOKEN;
4595 case XML_TOK_PARTIAL_CHAR:
4596 if (haveMore) {
4597 *nextPtr = s;
4598 return XML_ERROR_NONE;
4599 }
4600 return XML_ERROR_PARTIAL_CHAR;
4601 case XML_TOK_PARTIAL:
4602 case XML_TOK_NONE:
4603 if (haveMore) {
4604 *nextPtr = s;
4605 return XML_ERROR_NONE;
4606 }
4607 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4608 default:
4609 /* Every token returned by XmlCdataSectionTok() has its own
4610 * explicit case, so this default case will never be executed.
4611 * We retain it as a safety net and exclude it from the coverage
4612 * statistics.
4613 *
4614 * LCOV_EXCL_START
4615 */
4616 *eventPP = next;
4617 return XML_ERROR_UNEXPECTED_STATE;
4618 /* LCOV_EXCL_STOP */
4619 }
4620
4621 switch (parser->m_parsingStatus.parsing) {
4622 case XML_SUSPENDED:
4623 *eventPP = next;
4624 *nextPtr = next;
4625 return XML_ERROR_NONE;
4626 case XML_FINISHED:
4627 *eventPP = next;
4628 return XML_ERROR_ABORTED;
4629 case XML_PARSING:
4630 if (parser->m_reenter) {
4631 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4632 }
4633 /* Fall through */
4634 default:;
4635 *eventPP = s = next;
4636 }
4637 }
4638 /* not reached */
4639 }
4640
4641 #ifdef XML_DTD
4642
4643 /* The idea here is to avoid using stack for each IGNORE section when
4644 the whole file is parsed with one call.
4645 */
4646 static enum XML_Error PTRCALL
4647 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4648 const char **endPtr) {
4649 enum XML_Error result
4650 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4651 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4652 if (result != XML_ERROR_NONE)
4653 return result;
4654 if (start) {
4655 parser->m_processor = prologProcessor;
4656 return prologProcessor(parser, start, end, endPtr);
4657 }
4658 return result;
4659 }
4660
4661 /* startPtr gets set to non-null is the section is closed, and to null
4662 if the section is not yet closed.
4663 */
4664 static enum XML_Error
4665 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4666 const char *end, const char **nextPtr, XML_Bool haveMore) {
4667 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4668 int tok;
4669 const char *s = *startPtr;
4670 const char **eventPP;
4671 const char **eventEndPP;
4672 if (enc == parser->m_encoding) {
4673 eventPP = &parser->m_eventPtr;
4674 *eventPP = s;
4675 eventEndPP = &parser->m_eventEndPtr;
4676 } else {
4677 /* It's not entirely clear, but it seems the following two lines
4678 * of code cannot be executed. The only occasions on which 'enc'
4679 * is not 'encoding' are when this function is called
4680 * from the internal entity processing, and IGNORE sections are an
4681 * error in internal entities.
4682 *
4683 * Since it really isn't clear that this is true, we keep the code
4684 * and just remove it from our coverage tests.
4685 *
4686 * LCOV_EXCL_START
4687 */
4688 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4689 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4690 /* LCOV_EXCL_STOP */
4691 }
4692 *eventPP = s;
4693 *startPtr = NULL;
4694 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4695 # if XML_GE == 1
4696 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4697 XML_ACCOUNT_DIRECT)) {
4698 accountingOnAbort(parser);
4699 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4700 }
4701 # endif
4702 *eventEndPP = next;
4703 switch (tok) {
4704 case XML_TOK_IGNORE_SECT:
4705 if (parser->m_defaultHandler)
4706 reportDefault(parser, enc, s, next);
4707 *startPtr = next;
4708 *nextPtr = next;
4709 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4710 return XML_ERROR_ABORTED;
4711 else
4712 return XML_ERROR_NONE;
4713 case XML_TOK_INVALID:
4714 *eventPP = next;
4715 return XML_ERROR_INVALID_TOKEN;
4716 case XML_TOK_PARTIAL_CHAR:
4717 if (haveMore) {
4718 *nextPtr = s;
4719 return XML_ERROR_NONE;
4720 }
4721 return XML_ERROR_PARTIAL_CHAR;
4722 case XML_TOK_PARTIAL:
4723 case XML_TOK_NONE:
4724 if (haveMore) {
4725 *nextPtr = s;
4726 return XML_ERROR_NONE;
4727 }
4728 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4729 default:
4730 /* All of the tokens that XmlIgnoreSectionTok() returns have
4731 * explicit cases to handle them, so this default case is never
4732 * executed. We keep it as a safety net anyway, and remove it
4733 * from our test coverage statistics.
4734 *
4735 * LCOV_EXCL_START
4736 */
4737 *eventPP = next;
4738 return XML_ERROR_UNEXPECTED_STATE;
4739 /* LCOV_EXCL_STOP */
4740 }
4741 /* not reached */
4742 }
4743
4744 #endif /* XML_DTD */
4745
4746 static enum XML_Error
4747 initializeEncoding(XML_Parser parser) {
4748 const char *s;
4749 #ifdef XML_UNICODE
4750 char encodingBuf[128];
4751 /* See comments about `protocolEncodingName` in parserInit() */
4752 if (! parser->m_protocolEncodingName)
4753 s = NULL;
4754 else {
4755 int i;
4756 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4757 if (i == sizeof(encodingBuf) - 1
4758 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4759 encodingBuf[0] = '\0';
4760 break;
4761 }
4762 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4763 }
4764 encodingBuf[i] = '\0';
4765 s = encodingBuf;
4766 }
4767 #else
4768 s = parser->m_protocolEncodingName;
4769 #endif
4770 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4771 &parser->m_initEncoding, &parser->m_encoding, s))
4772 return XML_ERROR_NONE;
4773 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4774 }
4775
4776 static enum XML_Error
4777 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4778 const char *next) {
4779 const char *encodingName = NULL;
4780 const XML_Char *storedEncName = NULL;
4781 const ENCODING *newEncoding = NULL;
4782 const char *version = NULL;
4783 const char *versionend = NULL;
4784 const XML_Char *storedversion = NULL;
4785 int standalone = -1;
4786
4787 #if XML_GE == 1
4788 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4789 XML_ACCOUNT_DIRECT)) {
4790 accountingOnAbort(parser);
4791 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4792 }
4793 #endif
4794
4795 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4796 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4797 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4798 if (isGeneralTextEntity)
4799 return XML_ERROR_TEXT_DECL;
4800 else
4801 return XML_ERROR_XML_DECL;
4802 }
4803 if (! isGeneralTextEntity && standalone == 1) {
4804 parser->m_dtd->standalone = XML_TRUE;
4805 #ifdef XML_DTD
4806 if (parser->m_paramEntityParsing
4807 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4808 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4809 #endif /* XML_DTD */
4810 }
4811 if (parser->m_xmlDeclHandler) {
4812 if (encodingName != NULL) {
4813 storedEncName = poolStoreString(
4814 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4815 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4816 if (! storedEncName)
4817 return XML_ERROR_NO_MEMORY;
4818 poolFinish(&parser->m_temp2Pool);
4819 }
4820 if (version) {
4821 storedversion
4822 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4823 versionend - parser->m_encoding->minBytesPerChar);
4824 if (! storedversion)
4825 return XML_ERROR_NO_MEMORY;
4826 }
4827 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4828 standalone);
4829 } else if (parser->m_defaultHandler)
4830 reportDefault(parser, parser->m_encoding, s, next);
4831 if (parser->m_protocolEncodingName == NULL) {
4832 if (newEncoding) {
4833 /* Check that the specified encoding does not conflict with what
4834 * the parser has already deduced. Do we have the same number
4835 * of bytes in the smallest representation of a character? If
4836 * this is UTF-16, is it the same endianness?
4837 */
4838 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4839 || (newEncoding->minBytesPerChar == 2
4840 && newEncoding != parser->m_encoding)) {
4841 parser->m_eventPtr = encodingName;
4842 return XML_ERROR_INCORRECT_ENCODING;
4843 }
4844 parser->m_encoding = newEncoding;
4845 } else if (encodingName) {
4846 enum XML_Error result;
4847 if (! storedEncName) {
4848 storedEncName = poolStoreString(
4849 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4850 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4851 if (! storedEncName)
4852 return XML_ERROR_NO_MEMORY;
4853 }
4854 result = handleUnknownEncoding(parser, storedEncName);
4855 poolClear(&parser->m_temp2Pool);
4856 if (result == XML_ERROR_UNKNOWN_ENCODING)
4857 parser->m_eventPtr = encodingName;
4858 return result;
4859 }
4860 }
4861
4862 if (storedEncName || storedversion)
4863 poolClear(&parser->m_temp2Pool);
4864
4865 return XML_ERROR_NONE;
4866 }
4867
4868 static enum XML_Error
4869 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4870 if (parser->m_unknownEncodingHandler) {
4871 XML_Encoding info;
4872 int i;
4873 for (i = 0; i < 256; i++)
4874 info.map[i] = -1;
4875 info.convert = NULL;
4876 info.data = NULL;
4877 info.release = NULL;
4878 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4879 encodingName, &info)) {
4880 ENCODING *enc;
4881 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4882 if (! parser->m_unknownEncodingMem) {
4883 if (info.release)
4884 info.release(info.data);
4885 return XML_ERROR_NO_MEMORY;
4886 }
4887 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4888 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4889 if (enc) {
4890 parser->m_unknownEncodingData = info.data;
4891 parser->m_unknownEncodingRelease = info.release;
4892 parser->m_encoding = enc;
4893 return XML_ERROR_NONE;
4894 }
4895 }
4896 if (info.release != NULL)
4897 info.release(info.data);
4898 }
4899 return XML_ERROR_UNKNOWN_ENCODING;
4900 }
4901
4902 static enum XML_Error PTRCALL
4903 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4904 const char **nextPtr) {
4905 enum XML_Error result = initializeEncoding(parser);
4906 if (result != XML_ERROR_NONE)
4907 return result;
4908 parser->m_processor = prologProcessor;
4909 return prologProcessor(parser, s, end, nextPtr);
4910 }
4911
4912 #ifdef XML_DTD
4913
4914 static enum XML_Error PTRCALL
4915 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4916 const char **nextPtr) {
4917 enum XML_Error result = initializeEncoding(parser);
4918 if (result != XML_ERROR_NONE)
4919 return result;
4920
4921 /* we know now that XML_Parse(Buffer) has been called,
4922 so we consider the external parameter entity read */
4923 parser->m_dtd->paramEntityRead = XML_TRUE;
4924
4925 if (parser->m_prologState.inEntityValue) {
4926 parser->m_processor = entityValueInitProcessor;
4927 return entityValueInitProcessor(parser, s, end, nextPtr);
4928 } else {
4929 parser->m_processor = externalParEntProcessor;
4930 return externalParEntProcessor(parser, s, end, nextPtr);
4931 }
4932 }
4933
4934 static enum XML_Error PTRCALL
4935 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4936 const char **nextPtr) {
4937 int tok;
4938 const char *start = s;
4939 const char *next = start;
4940 parser->m_eventPtr = start;
4941
4942 for (;;) {
4943 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4944 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4945 - storeEntityValue
4946 - processXmlDecl
4947 */
4948 parser->m_eventEndPtr = next;
4949 if (tok <= 0) {
4950 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4951 *nextPtr = s;
4952 return XML_ERROR_NONE;
4953 }
4954 switch (tok) {
4955 case XML_TOK_INVALID:
4956 return XML_ERROR_INVALID_TOKEN;
4957 case XML_TOK_PARTIAL:
4958 return XML_ERROR_UNCLOSED_TOKEN;
4959 case XML_TOK_PARTIAL_CHAR:
4960 return XML_ERROR_PARTIAL_CHAR;
4961 case XML_TOK_NONE: /* start == end */
4962 default:
4963 break;
4964 }
4965 /* found end of entity value - can store it now */
4966 return storeEntityValue(parser, parser->m_encoding, s, end,
4967 XML_ACCOUNT_DIRECT, NULL);
4968 } else if (tok == XML_TOK_XML_DECL) {
4969 enum XML_Error result;
4970 result = processXmlDecl(parser, 0, start, next);
4971 if (result != XML_ERROR_NONE)
4972 return result;
4973 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4974 * that to happen, a parameter entity parsing handler must have attempted
4975 * to suspend the parser, which fails and raises an error. The parser can
4976 * be aborted, but can't be suspended.
4977 */
4978 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4979 return XML_ERROR_ABORTED;
4980 *nextPtr = next;
4981 /* stop scanning for text declaration - we found one */
4982 parser->m_processor = entityValueProcessor;
4983 return entityValueProcessor(parser, next, end, nextPtr);
4984 }
4985 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4986 must move s and nextPtr forward to consume the BOM.
4987
4988 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4989 would leave the BOM in the buffer and return. On the next call to this
4990 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4991 is not valid to have multiple BOMs.
4992 */
4993 else if (tok == XML_TOK_BOM) {
4994 # if XML_GE == 1
4995 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4996 XML_ACCOUNT_DIRECT)) {
4997 accountingOnAbort(parser);
4998 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4999 }
5000 # endif
5001
5002 *nextPtr = next;
5003 s = next;
5004 }
5005 /* If we get this token, we have the start of what might be a
5006 normal tag, but not a declaration (i.e. it doesn't begin with
5007 "<!" or "<?"). In a DTD context, that isn't legal.
5008 */
5009 else if (tok == XML_TOK_INSTANCE_START) {
5010 *nextPtr = next;
5011 return XML_ERROR_SYNTAX;
5012 }
5013 start = next;
5014 parser->m_eventPtr = start;
5015 }
5016 }
5017
5018 static enum XML_Error PTRCALL
5019 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
5020 const char **nextPtr) {
5021 const char *next = s;
5022 int tok;
5023
5024 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5025 if (tok <= 0) {
5026 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5027 *nextPtr = s;
5028 return XML_ERROR_NONE;
5029 }
5030 switch (tok) {
5031 case XML_TOK_INVALID:
5032 return XML_ERROR_INVALID_TOKEN;
5033 case XML_TOK_PARTIAL:
5034 return XML_ERROR_UNCLOSED_TOKEN;
5035 case XML_TOK_PARTIAL_CHAR:
5036 return XML_ERROR_PARTIAL_CHAR;
5037 case XML_TOK_NONE: /* start == end */
5038 default:
5039 break;
5040 }
5041 }
5042 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
5043 However, when parsing an external subset, doProlog will not accept a BOM
5044 as valid, and report a syntax error, so we have to skip the BOM, and
5045 account for the BOM bytes.
5046 */
5047 else if (tok == XML_TOK_BOM) {
5048 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5049 XML_ACCOUNT_DIRECT)) {
5050 accountingOnAbort(parser);
5051 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5052 }
5053
5054 s = next;
5055 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5056 }
5057
5058 parser->m_processor = prologProcessor;
5059 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5060 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5061 XML_ACCOUNT_DIRECT);
5062 }
5063
5064 static enum XML_Error PTRCALL
5065 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
5066 const char **nextPtr) {
5067 const char *start = s;
5068 const char *next = s;
5069 const ENCODING *enc = parser->m_encoding;
5070 int tok;
5071
5072 for (;;) {
5073 tok = XmlPrologTok(enc, start, end, &next);
5074 /* Note: These bytes are accounted later in:
5075 - storeEntityValue
5076 */
5077 if (tok <= 0) {
5078 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5079 *nextPtr = s;
5080 return XML_ERROR_NONE;
5081 }
5082 switch (tok) {
5083 case XML_TOK_INVALID:
5084 return XML_ERROR_INVALID_TOKEN;
5085 case XML_TOK_PARTIAL:
5086 return XML_ERROR_UNCLOSED_TOKEN;
5087 case XML_TOK_PARTIAL_CHAR:
5088 return XML_ERROR_PARTIAL_CHAR;
5089 case XML_TOK_NONE: /* start == end */
5090 default:
5091 break;
5092 }
5093 /* found end of entity value - can store it now */
5094 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
5095 }
5096 /* If we get this token, we have the start of what might be a
5097 normal tag, but not a declaration (i.e. it doesn't begin with
5098 "<!" or "<?"). In a DTD context, that isn't legal.
5099 */
5100 else if (tok == XML_TOK_INSTANCE_START) {
5101 *nextPtr = next;
5102 return XML_ERROR_SYNTAX;
5103 }
5104
5105 start = next;
5106 }
5107 }
5108
5109 #endif /* XML_DTD */
5110
5111 static enum XML_Error PTRCALL
5112 prologProcessor(XML_Parser parser, const char *s, const char *end,
5113 const char **nextPtr) {
5114 const char *next = s;
5115 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5116 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5117 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5118 XML_ACCOUNT_DIRECT);
5119 }
5120
5121 static enum XML_Error
5122 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
5123 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
5124 XML_Bool allowClosingDoctype, enum XML_Account account) {
5125 #ifdef XML_DTD
5126 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
5127 #endif /* XML_DTD */
5128 static const XML_Char atypeCDATA[]
5129 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
5130 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
5131 static const XML_Char atypeIDREF[]
5132 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
5133 static const XML_Char atypeIDREFS[]
5134 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
5135 static const XML_Char atypeENTITY[]
5136 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
5137 static const XML_Char atypeENTITIES[]
5138 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
5139 ASCII_I, ASCII_E, ASCII_S, '\0'};
5140 static const XML_Char atypeNMTOKEN[]
5141 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
5142 static const XML_Char atypeNMTOKENS[]
5143 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
5144 ASCII_E, ASCII_N, ASCII_S, '\0'};
5145 static const XML_Char notationPrefix[]
5146 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
5147 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
5148 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
5149 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
5150
5151 #ifndef XML_DTD
5152 UNUSED_P(account);
5153 #endif
5154
5155 /* save one level of indirection */
5156 DTD *const dtd = parser->m_dtd;
5157
5158 const char **eventPP;
5159 const char **eventEndPP;
5160 enum XML_Content_Quant quant;
5161
5162 if (enc == parser->m_encoding) {
5163 eventPP = &parser->m_eventPtr;
5164 eventEndPP = &parser->m_eventEndPtr;
5165 } else {
5166 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5167 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5168 }
5169
5170 for (;;) {
5171 int role;
5172 XML_Bool handleDefault = XML_TRUE;
5173 *eventPP = s;
5174 *eventEndPP = next;
5175 if (tok <= 0) {
5176 if (haveMore && tok != XML_TOK_INVALID) {
5177 *nextPtr = s;
5178 return XML_ERROR_NONE;
5179 }
5180 switch (tok) {
5181 case XML_TOK_INVALID:
5182 *eventPP = next;
5183 return XML_ERROR_INVALID_TOKEN;
5184 case XML_TOK_PARTIAL:
5185 return XML_ERROR_UNCLOSED_TOKEN;
5186 case XML_TOK_PARTIAL_CHAR:
5187 return XML_ERROR_PARTIAL_CHAR;
5188 case -XML_TOK_PROLOG_S:
5189 tok = -tok;
5190 break;
5191 case XML_TOK_NONE:
5192 #ifdef XML_DTD
5193 /* for internal PE NOT referenced between declarations */
5194 if (enc != parser->m_encoding
5195 && ! parser->m_openInternalEntities->betweenDecl) {
5196 *nextPtr = s;
5197 return XML_ERROR_NONE;
5198 }
5199 /* WFC: PE Between Declarations - must check that PE contains
5200 complete markup, not only for external PEs, but also for
5201 internal PEs if the reference occurs between declarations.
5202 */
5203 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5204 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
5205 == XML_ROLE_ERROR)
5206 return XML_ERROR_INCOMPLETE_PE;
5207 *nextPtr = s;
5208 return XML_ERROR_NONE;
5209 }
5210 #endif /* XML_DTD */
5211 return XML_ERROR_NO_ELEMENTS;
5212 default:
5213 tok = -tok;
5214 next = end;
5215 break;
5216 }
5217 }
5218 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
5219 #if XML_GE == 1
5220 switch (role) {
5221 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
5222 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
5223 # ifdef XML_DTD
5224 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
5225 # endif
5226 break;
5227 default:
5228 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
5229 accountingOnAbort(parser);
5230 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5231 }
5232 }
5233 #endif
5234 switch (role) {
5235 case XML_ROLE_XML_DECL: {
5236 enum XML_Error result = processXmlDecl(parser, 0, s, next);
5237 if (result != XML_ERROR_NONE)
5238 return result;
5239 enc = parser->m_encoding;
5240 handleDefault = XML_FALSE;
5241 } break;
5242 case XML_ROLE_DOCTYPE_NAME:
5243 if (parser->m_startDoctypeDeclHandler) {
5244 parser->m_doctypeName
5245 = poolStoreString(&parser->m_tempPool, enc, s, next);
5246 if (! parser->m_doctypeName)
5247 return XML_ERROR_NO_MEMORY;
5248 poolFinish(&parser->m_tempPool);
5249 parser->m_doctypePubid = NULL;
5250 handleDefault = XML_FALSE;
5251 }
5252 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
5253 break;
5254 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
5255 if (parser->m_startDoctypeDeclHandler) {
5256 parser->m_startDoctypeDeclHandler(
5257 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5258 parser->m_doctypePubid, 1);
5259 parser->m_doctypeName = NULL;
5260 poolClear(&parser->m_tempPool);
5261 handleDefault = XML_FALSE;
5262 }
5263 break;
5264 #ifdef XML_DTD
5265 case XML_ROLE_TEXT_DECL: {
5266 enum XML_Error result = processXmlDecl(parser, 1, s, next);
5267 if (result != XML_ERROR_NONE)
5268 return result;
5269 enc = parser->m_encoding;
5270 handleDefault = XML_FALSE;
5271 } break;
5272 #endif /* XML_DTD */
5273 case XML_ROLE_DOCTYPE_PUBLIC_ID:
5274 #ifdef XML_DTD
5275 parser->m_useForeignDTD = XML_FALSE;
5276 parser->m_declEntity = (ENTITY *)lookup(
5277 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5278 if (! parser->m_declEntity)
5279 return XML_ERROR_NO_MEMORY;
5280 #endif /* XML_DTD */
5281 dtd->hasParamEntityRefs = XML_TRUE;
5282 if (parser->m_startDoctypeDeclHandler) {
5283 XML_Char *pubId;
5284 if (! XmlIsPublicId(enc, s, next, eventPP))
5285 return XML_ERROR_PUBLICID;
5286 pubId = poolStoreString(&parser->m_tempPool, enc,
5287 s + enc->minBytesPerChar,
5288 next - enc->minBytesPerChar);
5289 if (! pubId)
5290 return XML_ERROR_NO_MEMORY;
5291 normalizePublicId(pubId);
5292 poolFinish(&parser->m_tempPool);
5293 parser->m_doctypePubid = pubId;
5294 handleDefault = XML_FALSE;
5295 goto alreadyChecked;
5296 }
5297 /* fall through */
5298 case XML_ROLE_ENTITY_PUBLIC_ID:
5299 if (! XmlIsPublicId(enc, s, next, eventPP))
5300 return XML_ERROR_PUBLICID;
5301 alreadyChecked:
5302 if (dtd->keepProcessing && parser->m_declEntity) {
5303 XML_Char *tem
5304 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5305 next - enc->minBytesPerChar);
5306 if (! tem)
5307 return XML_ERROR_NO_MEMORY;
5308 normalizePublicId(tem);
5309 parser->m_declEntity->publicId = tem;
5310 poolFinish(&dtd->pool);
5311 /* Don't suppress the default handler if we fell through from
5312 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5313 */
5314 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5315 handleDefault = XML_FALSE;
5316 }
5317 break;
5318 case XML_ROLE_DOCTYPE_CLOSE:
5319 if (allowClosingDoctype != XML_TRUE) {
5320 /* Must not close doctype from within expanded parameter entities */
5321 return XML_ERROR_INVALID_TOKEN;
5322 }
5323
5324 if (parser->m_doctypeName) {
5325 parser->m_startDoctypeDeclHandler(
5326 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5327 parser->m_doctypePubid, 0);
5328 poolClear(&parser->m_tempPool);
5329 handleDefault = XML_FALSE;
5330 }
5331 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
5332 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5333 was not set, indicating an external subset
5334 */
5335 #ifdef XML_DTD
5336 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5337 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5338 dtd->hasParamEntityRefs = XML_TRUE;
5339 if (parser->m_paramEntityParsing
5340 && parser->m_externalEntityRefHandler) {
5341 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5342 externalSubsetName, sizeof(ENTITY));
5343 if (! entity) {
5344 /* The external subset name "#" will have already been
5345 * inserted into the hash table at the start of the
5346 * external entity parsing, so no allocation will happen
5347 * and lookup() cannot fail.
5348 */
5349 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5350 }
5351 if (parser->m_useForeignDTD)
5352 entity->base = parser->m_curBase;
5353 dtd->paramEntityRead = XML_FALSE;
5354 if (! parser->m_externalEntityRefHandler(
5355 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5356 entity->systemId, entity->publicId))
5357 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5358 if (dtd->paramEntityRead) {
5359 if (! dtd->standalone && parser->m_notStandaloneHandler
5360 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5361 return XML_ERROR_NOT_STANDALONE;
5362 }
5363 /* if we didn't read the foreign DTD then this means that there
5364 is no external subset and we must reset dtd->hasParamEntityRefs
5365 */
5366 else if (! parser->m_doctypeSysid)
5367 dtd->hasParamEntityRefs = hadParamEntityRefs;
5368 /* end of DTD - no need to update dtd->keepProcessing */
5369 }
5370 parser->m_useForeignDTD = XML_FALSE;
5371 }
5372 #endif /* XML_DTD */
5373 if (parser->m_endDoctypeDeclHandler) {
5374 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5375 handleDefault = XML_FALSE;
5376 }
5377 break;
5378 case XML_ROLE_INSTANCE_START:
5379 #ifdef XML_DTD
5380 /* if there is no DOCTYPE declaration then now is the
5381 last chance to read the foreign DTD
5382 */
5383 if (parser->m_useForeignDTD) {
5384 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5385 dtd->hasParamEntityRefs = XML_TRUE;
5386 if (parser->m_paramEntityParsing
5387 && parser->m_externalEntityRefHandler) {
5388 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5389 externalSubsetName, sizeof(ENTITY));
5390 if (! entity)
5391 return XML_ERROR_NO_MEMORY;
5392 entity->base = parser->m_curBase;
5393 dtd->paramEntityRead = XML_FALSE;
5394 if (! parser->m_externalEntityRefHandler(
5395 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5396 entity->systemId, entity->publicId))
5397 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5398 if (dtd->paramEntityRead) {
5399 if (! dtd->standalone && parser->m_notStandaloneHandler
5400 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5401 return XML_ERROR_NOT_STANDALONE;
5402 }
5403 /* if we didn't read the foreign DTD then this means that there
5404 is no external subset and we must reset dtd->hasParamEntityRefs
5405 */
5406 else
5407 dtd->hasParamEntityRefs = hadParamEntityRefs;
5408 /* end of DTD - no need to update dtd->keepProcessing */
5409 }
5410 }
5411 #endif /* XML_DTD */
5412 parser->m_processor = contentProcessor;
5413 return contentProcessor(parser, s, end, nextPtr);
5414 case XML_ROLE_ATTLIST_ELEMENT_NAME:
5415 parser->m_declElementType = getElementType(parser, enc, s, next);
5416 if (! parser->m_declElementType)
5417 return XML_ERROR_NO_MEMORY;
5418 goto checkAttListDeclHandler;
5419 case XML_ROLE_ATTRIBUTE_NAME:
5420 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5421 if (! parser->m_declAttributeId)
5422 return XML_ERROR_NO_MEMORY;
5423 parser->m_declAttributeIsCdata = XML_FALSE;
5424 parser->m_declAttributeType = NULL;
5425 parser->m_declAttributeIsId = XML_FALSE;
5426 goto checkAttListDeclHandler;
5427 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5428 parser->m_declAttributeIsCdata = XML_TRUE;
5429 parser->m_declAttributeType = atypeCDATA;
5430 goto checkAttListDeclHandler;
5431 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5432 parser->m_declAttributeIsId = XML_TRUE;
5433 parser->m_declAttributeType = atypeID;
5434 goto checkAttListDeclHandler;
5435 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5436 parser->m_declAttributeType = atypeIDREF;
5437 goto checkAttListDeclHandler;
5438 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5439 parser->m_declAttributeType = atypeIDREFS;
5440 goto checkAttListDeclHandler;
5441 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5442 parser->m_declAttributeType = atypeENTITY;
5443 goto checkAttListDeclHandler;
5444 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5445 parser->m_declAttributeType = atypeENTITIES;
5446 goto checkAttListDeclHandler;
5447 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5448 parser->m_declAttributeType = atypeNMTOKEN;
5449 goto checkAttListDeclHandler;
5450 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5451 parser->m_declAttributeType = atypeNMTOKENS;
5452 checkAttListDeclHandler:
5453 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5454 handleDefault = XML_FALSE;
5455 break;
5456 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5457 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5458 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5459 const XML_Char *prefix;
5460 if (parser->m_declAttributeType) {
5461 prefix = enumValueSep;
5462 } else {
5463 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5464 : enumValueStart);
5465 }
5466 if (! poolAppendString(&parser->m_tempPool, prefix))
5467 return XML_ERROR_NO_MEMORY;
5468 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5469 return XML_ERROR_NO_MEMORY;
5470 parser->m_declAttributeType = parser->m_tempPool.start;
5471 handleDefault = XML_FALSE;
5472 }
5473 break;
5474 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5475 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5476 if (dtd->keepProcessing) {
5477 if (! defineAttribute(parser->m_declElementType,
5478 parser->m_declAttributeId,
5479 parser->m_declAttributeIsCdata,
5480 parser->m_declAttributeIsId, 0, parser))
5481 return XML_ERROR_NO_MEMORY;
5482 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5483 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5484 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5485 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5486 /* Enumerated or Notation type */
5487 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5488 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5489 return XML_ERROR_NO_MEMORY;
5490 parser->m_declAttributeType = parser->m_tempPool.start;
5491 poolFinish(&parser->m_tempPool);
5492 }
5493 *eventEndPP = s;
5494 parser->m_attlistDeclHandler(
5495 parser->m_handlerArg, parser->m_declElementType->name,
5496 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5497 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5498 handleDefault = XML_FALSE;
5499 }
5500 }
5501 poolClear(&parser->m_tempPool);
5502 break;
5503 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5504 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5505 if (dtd->keepProcessing) {
5506 const XML_Char *attVal;
5507 enum XML_Error result = storeAttributeValue(
5508 parser, enc, parser->m_declAttributeIsCdata,
5509 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5510 XML_ACCOUNT_NONE);
5511 if (result)
5512 return result;
5513 attVal = poolStart(&dtd->pool);
5514 poolFinish(&dtd->pool);
5515 /* ID attributes aren't allowed to have a default */
5516 if (! defineAttribute(
5517 parser->m_declElementType, parser->m_declAttributeId,
5518 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5519 return XML_ERROR_NO_MEMORY;
5520 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5521 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5522 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5523 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5524 /* Enumerated or Notation type */
5525 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5526 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5527 return XML_ERROR_NO_MEMORY;
5528 parser->m_declAttributeType = parser->m_tempPool.start;
5529 poolFinish(&parser->m_tempPool);
5530 }
5531 *eventEndPP = s;
5532 parser->m_attlistDeclHandler(
5533 parser->m_handlerArg, parser->m_declElementType->name,
5534 parser->m_declAttributeId->name, parser->m_declAttributeType,
5535 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5536 poolClear(&parser->m_tempPool);
5537 handleDefault = XML_FALSE;
5538 }
5539 }
5540 break;
5541 case XML_ROLE_ENTITY_VALUE:
5542 if (dtd->keepProcessing) {
5543 #if XML_GE == 1
5544 // This will store the given replacement text in
5545 // parser->m_declEntity->textPtr.
5546 enum XML_Error result = callStoreEntityValue(
5547 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5548 XML_ACCOUNT_NONE);
5549 if (parser->m_declEntity) {
5550 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5551 parser->m_declEntity->textLen
5552 = (int)(poolLength(&dtd->entityValuePool));
5553 poolFinish(&dtd->entityValuePool);
5554 if (parser->m_entityDeclHandler) {
5555 *eventEndPP = s;
5556 parser->m_entityDeclHandler(
5557 parser->m_handlerArg, parser->m_declEntity->name,
5558 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5559 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5560 handleDefault = XML_FALSE;
5561 }
5562 } else
5563 poolDiscard(&dtd->entityValuePool);
5564 if (result != XML_ERROR_NONE)
5565 return result;
5566 #else
5567 // This will store "&entity123;" in parser->m_declEntity->textPtr
5568 // to end up as "&entity123;" in the handler.
5569 if (parser->m_declEntity != NULL) {
5570 const enum XML_Error result
5571 = storeSelfEntityValue(parser, parser->m_declEntity);
5572 if (result != XML_ERROR_NONE)
5573 return result;
5574
5575 if (parser->m_entityDeclHandler) {
5576 *eventEndPP = s;
5577 parser->m_entityDeclHandler(
5578 parser->m_handlerArg, parser->m_declEntity->name,
5579 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5580 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5581 handleDefault = XML_FALSE;
5582 }
5583 }
5584 #endif
5585 }
5586 break;
5587 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5588 #ifdef XML_DTD
5589 parser->m_useForeignDTD = XML_FALSE;
5590 #endif /* XML_DTD */
5591 dtd->hasParamEntityRefs = XML_TRUE;
5592 if (parser->m_startDoctypeDeclHandler) {
5593 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5594 s + enc->minBytesPerChar,
5595 next - enc->minBytesPerChar);
5596 if (parser->m_doctypeSysid == NULL)
5597 return XML_ERROR_NO_MEMORY;
5598 poolFinish(&parser->m_tempPool);
5599 handleDefault = XML_FALSE;
5600 }
5601 #ifdef XML_DTD
5602 else
5603 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5604 for the case where no parser->m_startDoctypeDeclHandler is set */
5605 parser->m_doctypeSysid = externalSubsetName;
5606 #endif /* XML_DTD */
5607 if (! dtd->standalone
5608 #ifdef XML_DTD
5609 && ! parser->m_paramEntityParsing
5610 #endif /* XML_DTD */
5611 && parser->m_notStandaloneHandler
5612 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5613 return XML_ERROR_NOT_STANDALONE;
5614 #ifndef XML_DTD
5615 break;
5616 #else /* XML_DTD */
5617 if (! parser->m_declEntity) {
5618 parser->m_declEntity = (ENTITY *)lookup(
5619 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5620 if (! parser->m_declEntity)
5621 return XML_ERROR_NO_MEMORY;
5622 parser->m_declEntity->publicId = NULL;
5623 }
5624 #endif /* XML_DTD */
5625 /* fall through */
5626 case XML_ROLE_ENTITY_SYSTEM_ID:
5627 if (dtd->keepProcessing && parser->m_declEntity) {
5628 parser->m_declEntity->systemId
5629 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5630 next - enc->minBytesPerChar);
5631 if (! parser->m_declEntity->systemId)
5632 return XML_ERROR_NO_MEMORY;
5633 parser->m_declEntity->base = parser->m_curBase;
5634 poolFinish(&dtd->pool);
5635 /* Don't suppress the default handler if we fell through from
5636 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5637 */
5638 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5639 handleDefault = XML_FALSE;
5640 }
5641 break;
5642 case XML_ROLE_ENTITY_COMPLETE:
5643 #if XML_GE == 0
5644 // This will store "&entity123;" in entity->textPtr
5645 // to end up as "&entity123;" in the handler.
5646 if (parser->m_declEntity != NULL) {
5647 const enum XML_Error result
5648 = storeSelfEntityValue(parser, parser->m_declEntity);
5649 if (result != XML_ERROR_NONE)
5650 return result;
5651 }
5652 #endif
5653 if (dtd->keepProcessing && parser->m_declEntity
5654 && parser->m_entityDeclHandler) {
5655 *eventEndPP = s;
5656 parser->m_entityDeclHandler(
5657 parser->m_handlerArg, parser->m_declEntity->name,
5658 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5659 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5660 handleDefault = XML_FALSE;
5661 }
5662 break;
5663 case XML_ROLE_ENTITY_NOTATION_NAME:
5664 if (dtd->keepProcessing && parser->m_declEntity) {
5665 parser->m_declEntity->notation
5666 = poolStoreString(&dtd->pool, enc, s, next);
5667 if (! parser->m_declEntity->notation)
5668 return XML_ERROR_NO_MEMORY;
5669 poolFinish(&dtd->pool);
5670 if (parser->m_unparsedEntityDeclHandler) {
5671 *eventEndPP = s;
5672 parser->m_unparsedEntityDeclHandler(
5673 parser->m_handlerArg, parser->m_declEntity->name,
5674 parser->m_declEntity->base, parser->m_declEntity->systemId,
5675 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5676 handleDefault = XML_FALSE;
5677 } else if (parser->m_entityDeclHandler) {
5678 *eventEndPP = s;
5679 parser->m_entityDeclHandler(
5680 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5681 parser->m_declEntity->base, parser->m_declEntity->systemId,
5682 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5683 handleDefault = XML_FALSE;
5684 }
5685 }
5686 break;
5687 case XML_ROLE_GENERAL_ENTITY_NAME: {
5688 if (XmlPredefinedEntityName(enc, s, next)) {
5689 parser->m_declEntity = NULL;
5690 break;
5691 }
5692 if (dtd->keepProcessing) {
5693 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5694 if (! name)
5695 return XML_ERROR_NO_MEMORY;
5696 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5697 name, sizeof(ENTITY));
5698 if (! parser->m_declEntity)
5699 return XML_ERROR_NO_MEMORY;
5700 if (parser->m_declEntity->name != name) {
5701 poolDiscard(&dtd->pool);
5702 parser->m_declEntity = NULL;
5703 } else {
5704 poolFinish(&dtd->pool);
5705 parser->m_declEntity->publicId = NULL;
5706 parser->m_declEntity->is_param = XML_FALSE;
5707 /* if we have a parent parser or are reading an internal parameter
5708 entity, then the entity declaration is not considered "internal"
5709 */
5710 parser->m_declEntity->is_internal
5711 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5712 if (parser->m_entityDeclHandler)
5713 handleDefault = XML_FALSE;
5714 }
5715 } else {
5716 poolDiscard(&dtd->pool);
5717 parser->m_declEntity = NULL;
5718 }
5719 } break;
5720 case XML_ROLE_PARAM_ENTITY_NAME:
5721 #ifdef XML_DTD
5722 if (dtd->keepProcessing) {
5723 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5724 if (! name)
5725 return XML_ERROR_NO_MEMORY;
5726 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5727 name, sizeof(ENTITY));
5728 if (! parser->m_declEntity)
5729 return XML_ERROR_NO_MEMORY;
5730 if (parser->m_declEntity->name != name) {
5731 poolDiscard(&dtd->pool);
5732 parser->m_declEntity = NULL;
5733 } else {
5734 poolFinish(&dtd->pool);
5735 parser->m_declEntity->publicId = NULL;
5736 parser->m_declEntity->is_param = XML_TRUE;
5737 /* if we have a parent parser or are reading an internal parameter
5738 entity, then the entity declaration is not considered "internal"
5739 */
5740 parser->m_declEntity->is_internal
5741 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5742 if (parser->m_entityDeclHandler)
5743 handleDefault = XML_FALSE;
5744 }
5745 } else {
5746 poolDiscard(&dtd->pool);
5747 parser->m_declEntity = NULL;
5748 }
5749 #else /* not XML_DTD */
5750 parser->m_declEntity = NULL;
5751 #endif /* XML_DTD */
5752 break;
5753 case XML_ROLE_NOTATION_NAME:
5754 parser->m_declNotationPublicId = NULL;
5755 parser->m_declNotationName = NULL;
5756 if (parser->m_notationDeclHandler) {
5757 parser->m_declNotationName
5758 = poolStoreString(&parser->m_tempPool, enc, s, next);
5759 if (! parser->m_declNotationName)
5760 return XML_ERROR_NO_MEMORY;
5761 poolFinish(&parser->m_tempPool);
5762 handleDefault = XML_FALSE;
5763 }
5764 break;
5765 case XML_ROLE_NOTATION_PUBLIC_ID:
5766 if (! XmlIsPublicId(enc, s, next, eventPP))
5767 return XML_ERROR_PUBLICID;
5768 if (parser
5769 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5770 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5771 s + enc->minBytesPerChar,
5772 next - enc->minBytesPerChar);
5773 if (! tem)
5774 return XML_ERROR_NO_MEMORY;
5775 normalizePublicId(tem);
5776 parser->m_declNotationPublicId = tem;
5777 poolFinish(&parser->m_tempPool);
5778 handleDefault = XML_FALSE;
5779 }
5780 break;
5781 case XML_ROLE_NOTATION_SYSTEM_ID:
5782 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5783 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5784 s + enc->minBytesPerChar,
5785 next - enc->minBytesPerChar);
5786 if (! systemId)
5787 return XML_ERROR_NO_MEMORY;
5788 *eventEndPP = s;
5789 parser->m_notationDeclHandler(
5790 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5791 systemId, parser->m_declNotationPublicId);
5792 handleDefault = XML_FALSE;
5793 }
5794 poolClear(&parser->m_tempPool);
5795 break;
5796 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5797 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5798 *eventEndPP = s;
5799 parser->m_notationDeclHandler(
5800 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5801 0, parser->m_declNotationPublicId);
5802 handleDefault = XML_FALSE;
5803 }
5804 poolClear(&parser->m_tempPool);
5805 break;
5806 case XML_ROLE_ERROR:
5807 switch (tok) {
5808 case XML_TOK_PARAM_ENTITY_REF:
5809 /* PE references in internal subset are
5810 not allowed within declarations. */
5811 return XML_ERROR_PARAM_ENTITY_REF;
5812 case XML_TOK_XML_DECL:
5813 return XML_ERROR_MISPLACED_XML_PI;
5814 default:
5815 return XML_ERROR_SYNTAX;
5816 }
5817 #ifdef XML_DTD
5818 case XML_ROLE_IGNORE_SECT: {
5819 enum XML_Error result;
5820 if (parser->m_defaultHandler)
5821 reportDefault(parser, enc, s, next);
5822 handleDefault = XML_FALSE;
5823 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5824 if (result != XML_ERROR_NONE)
5825 return result;
5826 else if (! next) {
5827 parser->m_processor = ignoreSectionProcessor;
5828 return result;
5829 }
5830 } break;
5831 #endif /* XML_DTD */
5832 case XML_ROLE_GROUP_OPEN:
5833 if (parser->m_prologState.level >= parser->m_groupSize) {
5834 if (parser->m_groupSize) {
5835 {
5836 /* Detect and prevent integer overflow */
5837 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5838 return XML_ERROR_NO_MEMORY;
5839 }
5840
5841 char *const new_connector = REALLOC(
5842 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5843 if (new_connector == NULL) {
5844 parser->m_groupSize /= 2;
5845 return XML_ERROR_NO_MEMORY;
5846 }
5847 parser->m_groupConnector = new_connector;
5848 }
5849
5850 if (dtd->scaffIndex) {
5851 /* Detect and prevent integer overflow.
5852 * The preprocessor guard addresses the "always false" warning
5853 * from -Wtype-limits on platforms where
5854 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5855 #if UINT_MAX >= SIZE_MAX
5856 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) {
5857 parser->m_groupSize /= 2;
5858 return XML_ERROR_NO_MEMORY;
5859 }
5860 #endif
5861
5862 int *const new_scaff_index = REALLOC(
5863 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5864 if (new_scaff_index == NULL) {
5865 parser->m_groupSize /= 2;
5866 return XML_ERROR_NO_MEMORY;
5867 }
5868 dtd->scaffIndex = new_scaff_index;
5869 }
5870 } else {
5871 parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32);
5872 if (! parser->m_groupConnector) {
5873 parser->m_groupSize = 0;
5874 return XML_ERROR_NO_MEMORY;
5875 }
5876 }
5877 }
5878 parser->m_groupConnector[parser->m_prologState.level] = 0;
5879 if (dtd->in_eldecl) {
5880 int myindex = nextScaffoldPart(parser);
5881 if (myindex < 0)
5882 return XML_ERROR_NO_MEMORY;
5883 assert(dtd->scaffIndex != NULL);
5884 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5885 dtd->scaffLevel++;
5886 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5887 if (parser->m_elementDeclHandler)
5888 handleDefault = XML_FALSE;
5889 }
5890 break;
5891 case XML_ROLE_GROUP_SEQUENCE:
5892 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5893 return XML_ERROR_SYNTAX;
5894 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5895 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5896 handleDefault = XML_FALSE;
5897 break;
5898 case XML_ROLE_GROUP_CHOICE:
5899 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5900 return XML_ERROR_SYNTAX;
5901 if (dtd->in_eldecl
5902 && ! parser->m_groupConnector[parser->m_prologState.level]
5903 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5904 != XML_CTYPE_MIXED)) {
5905 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5906 = XML_CTYPE_CHOICE;
5907 if (parser->m_elementDeclHandler)
5908 handleDefault = XML_FALSE;
5909 }
5910 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5911 break;
5912 case XML_ROLE_PARAM_ENTITY_REF:
5913 #ifdef XML_DTD
5914 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5915 dtd->hasParamEntityRefs = XML_TRUE;
5916 if (! parser->m_paramEntityParsing)
5917 dtd->keepProcessing = dtd->standalone;
5918 else {
5919 const XML_Char *name;
5920 ENTITY *entity;
5921 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5922 next - enc->minBytesPerChar);
5923 if (! name)
5924 return XML_ERROR_NO_MEMORY;
5925 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5926 poolDiscard(&dtd->pool);
5927 /* first, determine if a check for an existing declaration is needed;
5928 if yes, check that the entity exists, and that it is internal,
5929 otherwise call the skipped entity handler
5930 */
5931 if (parser->m_prologState.documentEntity
5932 && (dtd->standalone ? ! parser->m_openInternalEntities
5933 : ! dtd->hasParamEntityRefs)) {
5934 if (! entity)
5935 return XML_ERROR_UNDEFINED_ENTITY;
5936 else if (! entity->is_internal) {
5937 /* It's hard to exhaustively search the code to be sure,
5938 * but there doesn't seem to be a way of executing the
5939 * following line. There are two cases:
5940 *
5941 * If 'standalone' is false, the DTD must have no
5942 * parameter entities or we wouldn't have passed the outer
5943 * 'if' statement. That means the only entity in the hash
5944 * table is the external subset name "#" which cannot be
5945 * given as a parameter entity name in XML syntax, so the
5946 * lookup must have returned NULL and we don't even reach
5947 * the test for an internal entity.
5948 *
5949 * If 'standalone' is true, it does not seem to be
5950 * possible to create entities taking this code path that
5951 * are not internal entities, so fail the test above.
5952 *
5953 * Because this analysis is very uncertain, the code is
5954 * being left in place and merely removed from the
5955 * coverage test statistics.
5956 */
5957 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5958 }
5959 } else if (! entity) {
5960 dtd->keepProcessing = dtd->standalone;
5961 /* cannot report skipped entities in declarations */
5962 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5963 && parser->m_skippedEntityHandler) {
5964 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5965 handleDefault = XML_FALSE;
5966 }
5967 break;
5968 }
5969 if (entity->open)
5970 return XML_ERROR_RECURSIVE_ENTITY_REF;
5971 if (entity->textPtr) {
5972 enum XML_Error result;
5973 XML_Bool betweenDecl
5974 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5975 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
5976 if (result != XML_ERROR_NONE)
5977 return result;
5978 handleDefault = XML_FALSE;
5979 break;
5980 }
5981 if (parser->m_externalEntityRefHandler) {
5982 dtd->paramEntityRead = XML_FALSE;
5983 entity->open = XML_TRUE;
5984 entityTrackingOnOpen(parser, entity, __LINE__);
5985 if (! parser->m_externalEntityRefHandler(
5986 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5987 entity->systemId, entity->publicId)) {
5988 entityTrackingOnClose(parser, entity, __LINE__);
5989 entity->open = XML_FALSE;
5990 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5991 }
5992 entityTrackingOnClose(parser, entity, __LINE__);
5993 entity->open = XML_FALSE;
5994 handleDefault = XML_FALSE;
5995 if (! dtd->paramEntityRead) {
5996 dtd->keepProcessing = dtd->standalone;
5997 break;
5998 }
5999 } else {
6000 dtd->keepProcessing = dtd->standalone;
6001 break;
6002 }
6003 }
6004 #endif /* XML_DTD */
6005 if (! dtd->standalone && parser->m_notStandaloneHandler
6006 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
6007 return XML_ERROR_NOT_STANDALONE;
6008 break;
6009
6010 /* Element declaration stuff */
6011
6012 case XML_ROLE_ELEMENT_NAME:
6013 if (parser->m_elementDeclHandler) {
6014 parser->m_declElementType = getElementType(parser, enc, s, next);
6015 if (! parser->m_declElementType)
6016 return XML_ERROR_NO_MEMORY;
6017 dtd->scaffLevel = 0;
6018 dtd->scaffCount = 0;
6019 dtd->in_eldecl = XML_TRUE;
6020 handleDefault = XML_FALSE;
6021 }
6022 break;
6023
6024 case XML_ROLE_CONTENT_ANY:
6025 case XML_ROLE_CONTENT_EMPTY:
6026 if (dtd->in_eldecl) {
6027 if (parser->m_elementDeclHandler) {
6028 // NOTE: We are avoiding MALLOC(..) here to so that
6029 // applications that are not using XML_FreeContentModel but
6030 // plain free(..) or .free_fcn() to free the content model's
6031 // memory are safe.
6032 XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content));
6033 if (! content)
6034 return XML_ERROR_NO_MEMORY;
6035 content->quant = XML_CQUANT_NONE;
6036 content->name = NULL;
6037 content->numchildren = 0;
6038 content->children = NULL;
6039 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
6040 : XML_CTYPE_EMPTY);
6041 *eventEndPP = s;
6042 parser->m_elementDeclHandler(
6043 parser->m_handlerArg, parser->m_declElementType->name, content);
6044 handleDefault = XML_FALSE;
6045 }
6046 dtd->in_eldecl = XML_FALSE;
6047 }
6048 break;
6049
6050 case XML_ROLE_CONTENT_PCDATA:
6051 if (dtd->in_eldecl) {
6052 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
6053 = XML_CTYPE_MIXED;
6054 if (parser->m_elementDeclHandler)
6055 handleDefault = XML_FALSE;
6056 }
6057 break;
6058
6059 case XML_ROLE_CONTENT_ELEMENT:
6060 quant = XML_CQUANT_NONE;
6061 goto elementContent;
6062 case XML_ROLE_CONTENT_ELEMENT_OPT:
6063 quant = XML_CQUANT_OPT;
6064 goto elementContent;
6065 case XML_ROLE_CONTENT_ELEMENT_REP:
6066 quant = XML_CQUANT_REP;
6067 goto elementContent;
6068 case XML_ROLE_CONTENT_ELEMENT_PLUS:
6069 quant = XML_CQUANT_PLUS;
6070 elementContent:
6071 if (dtd->in_eldecl) {
6072 ELEMENT_TYPE *el;
6073 const XML_Char *name;
6074 size_t nameLen;
6075 const char *nxt
6076 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
6077 int myindex = nextScaffoldPart(parser);
6078 if (myindex < 0)
6079 return XML_ERROR_NO_MEMORY;
6080 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
6081 dtd->scaffold[myindex].quant = quant;
6082 el = getElementType(parser, enc, s, nxt);
6083 if (! el)
6084 return XML_ERROR_NO_MEMORY;
6085 name = el->name;
6086 dtd->scaffold[myindex].name = name;
6087 nameLen = 0;
6088 while (name[nameLen++])
6089 ;
6090
6091 /* Detect and prevent integer overflow */
6092 if (nameLen > UINT_MAX - dtd->contentStringLen) {
6093 return XML_ERROR_NO_MEMORY;
6094 }
6095
6096 dtd->contentStringLen += (unsigned)nameLen;
6097 if (parser->m_elementDeclHandler)
6098 handleDefault = XML_FALSE;
6099 }
6100 break;
6101
6102 case XML_ROLE_GROUP_CLOSE:
6103 quant = XML_CQUANT_NONE;
6104 goto closeGroup;
6105 case XML_ROLE_GROUP_CLOSE_OPT:
6106 quant = XML_CQUANT_OPT;
6107 goto closeGroup;
6108 case XML_ROLE_GROUP_CLOSE_REP:
6109 quant = XML_CQUANT_REP;
6110 goto closeGroup;
6111 case XML_ROLE_GROUP_CLOSE_PLUS:
6112 quant = XML_CQUANT_PLUS;
6113 closeGroup:
6114 if (dtd->in_eldecl) {
6115 if (parser->m_elementDeclHandler)
6116 handleDefault = XML_FALSE;
6117 dtd->scaffLevel--;
6118 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
6119 if (dtd->scaffLevel == 0) {
6120 if (! handleDefault) {
6121 XML_Content *model = build_model(parser);
6122 if (! model)
6123 return XML_ERROR_NO_MEMORY;
6124 *eventEndPP = s;
6125 parser->m_elementDeclHandler(
6126 parser->m_handlerArg, parser->m_declElementType->name, model);
6127 }
6128 dtd->in_eldecl = XML_FALSE;
6129 dtd->contentStringLen = 0;
6130 }
6131 }
6132 break;
6133 /* End element declaration stuff */
6134
6135 case XML_ROLE_PI:
6136 if (! reportProcessingInstruction(parser, enc, s, next))
6137 return XML_ERROR_NO_MEMORY;
6138 handleDefault = XML_FALSE;
6139 break;
6140 case XML_ROLE_COMMENT:
6141 if (! reportComment(parser, enc, s, next))
6142 return XML_ERROR_NO_MEMORY;
6143 handleDefault = XML_FALSE;
6144 break;
6145 case XML_ROLE_NONE:
6146 switch (tok) {
6147 case XML_TOK_BOM:
6148 handleDefault = XML_FALSE;
6149 break;
6150 }
6151 break;
6152 case XML_ROLE_DOCTYPE_NONE:
6153 if (parser->m_startDoctypeDeclHandler)
6154 handleDefault = XML_FALSE;
6155 break;
6156 case XML_ROLE_ENTITY_NONE:
6157 if (dtd->keepProcessing && parser->m_entityDeclHandler)
6158 handleDefault = XML_FALSE;
6159 break;
6160 case XML_ROLE_NOTATION_NONE:
6161 if (parser->m_notationDeclHandler)
6162 handleDefault = XML_FALSE;
6163 break;
6164 case XML_ROLE_ATTLIST_NONE:
6165 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
6166 handleDefault = XML_FALSE;
6167 break;
6168 case XML_ROLE_ELEMENT_NONE:
6169 if (parser->m_elementDeclHandler)
6170 handleDefault = XML_FALSE;
6171 break;
6172 } /* end of big switch */
6173
6174 if (handleDefault && parser->m_defaultHandler)
6175 reportDefault(parser, enc, s, next);
6176
6177 switch (parser->m_parsingStatus.parsing) {
6178 case XML_SUSPENDED:
6179 *nextPtr = next;
6180 return XML_ERROR_NONE;
6181 case XML_FINISHED:
6182 return XML_ERROR_ABORTED;
6183 case XML_PARSING:
6184 if (parser->m_reenter) {
6185 *nextPtr = next;
6186 return XML_ERROR_NONE;
6187 }
6188 /* Fall through */
6189 default:
6190 s = next;
6191 tok = XmlPrologTok(enc, s, end, &next);
6192 }
6193 }
6194 /* not reached */
6195 }
6196
6197 static enum XML_Error PTRCALL
6198 epilogProcessor(XML_Parser parser, const char *s, const char *end,
6199 const char **nextPtr) {
6200 parser->m_processor = epilogProcessor;
6201 parser->m_eventPtr = s;
6202 for (;;) {
6203 const char *next = NULL;
6204 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
6205 #if XML_GE == 1
6206 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
6207 XML_ACCOUNT_DIRECT)) {
6208 accountingOnAbort(parser);
6209 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6210 }
6211 #endif
6212 parser->m_eventEndPtr = next;
6213 switch (tok) {
6214 /* report partial linebreak - it might be the last token */
6215 case -XML_TOK_PROLOG_S:
6216 if (parser->m_defaultHandler) {
6217 reportDefault(parser, parser->m_encoding, s, next);
6218 if (parser->m_parsingStatus.parsing == XML_FINISHED)
6219 return XML_ERROR_ABORTED;
6220 }
6221 *nextPtr = next;
6222 return XML_ERROR_NONE;
6223 case XML_TOK_NONE:
6224 *nextPtr = s;
6225 return XML_ERROR_NONE;
6226 case XML_TOK_PROLOG_S:
6227 if (parser->m_defaultHandler)
6228 reportDefault(parser, parser->m_encoding, s, next);
6229 break;
6230 case XML_TOK_PI:
6231 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
6232 return XML_ERROR_NO_MEMORY;
6233 break;
6234 case XML_TOK_COMMENT:
6235 if (! reportComment(parser, parser->m_encoding, s, next))
6236 return XML_ERROR_NO_MEMORY;
6237 break;
6238 case XML_TOK_INVALID:
6239 parser->m_eventPtr = next;
6240 return XML_ERROR_INVALID_TOKEN;
6241 case XML_TOK_PARTIAL:
6242 if (! parser->m_parsingStatus.finalBuffer) {
6243 *nextPtr = s;
6244 return XML_ERROR_NONE;
6245 }
6246 return XML_ERROR_UNCLOSED_TOKEN;
6247 case XML_TOK_PARTIAL_CHAR:
6248 if (! parser->m_parsingStatus.finalBuffer) {
6249 *nextPtr = s;
6250 return XML_ERROR_NONE;
6251 }
6252 return XML_ERROR_PARTIAL_CHAR;
6253 default:
6254 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
6255 }
6256 switch (parser->m_parsingStatus.parsing) {
6257 case XML_SUSPENDED:
6258 parser->m_eventPtr = next;
6259 *nextPtr = next;
6260 return XML_ERROR_NONE;
6261 case XML_FINISHED:
6262 parser->m_eventPtr = next;
6263 return XML_ERROR_ABORTED;
6264 case XML_PARSING:
6265 if (parser->m_reenter) {
6266 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
6267 }
6268 /* Fall through */
6269 default:;
6270 parser->m_eventPtr = s = next;
6271 }
6272 }
6273 }
6274
6275 static enum XML_Error
6276 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
6277 enum EntityType type) {
6278 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
6279 switch (type) {
6280 case ENTITY_INTERNAL:
6281 parser->m_processor = internalEntityProcessor;
6282 openEntityList = &parser->m_openInternalEntities;
6283 freeEntityList = &parser->m_freeInternalEntities;
6284 break;
6285 case ENTITY_ATTRIBUTE:
6286 openEntityList = &parser->m_openAttributeEntities;
6287 freeEntityList = &parser->m_freeAttributeEntities;
6288 break;
6289 case ENTITY_VALUE:
6290 openEntityList = &parser->m_openValueEntities;
6291 freeEntityList = &parser->m_freeValueEntities;
6292 break;
6293 /* default case serves merely as a safety net in case of a
6294 * wrong entityType. Therefore we exclude the following lines
6295 * from the test coverage.
6296 *
6297 * LCOV_EXCL_START
6298 */
6299 default:
6300 // Should not reach here
6301 assert(0);
6302 /* LCOV_EXCL_STOP */
6303 }
6304
6305 if (*freeEntityList) {
6306 openEntity = *freeEntityList;
6307 *freeEntityList = openEntity->next;
6308 } else {
6309 openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6310 if (! openEntity)
6311 return XML_ERROR_NO_MEMORY;
6312 }
6313 entity->open = XML_TRUE;
6314 entity->hasMore = XML_TRUE;
6315 #if XML_GE == 1
6316 entityTrackingOnOpen(parser, entity, __LINE__);
6317 #endif
6318 entity->processed = 0;
6319 openEntity->next = *openEntityList;
6320 *openEntityList = openEntity;
6321 openEntity->entity = entity;
6322 openEntity->type = type;
6323 openEntity->startTagLevel = parser->m_tagLevel;
6324 openEntity->betweenDecl = betweenDecl;
6325 openEntity->internalEventPtr = NULL;
6326 openEntity->internalEventEndPtr = NULL;
6327
6328 // Only internal entities make use of the reenter flag
6329 // therefore no need to set it for other entity types
6330 if (type == ENTITY_INTERNAL) {
6331 triggerReenter(parser);
6332 }
6333 return XML_ERROR_NONE;
6334 }
6335
6336 static enum XML_Error PTRCALL
6337 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6338 const char **nextPtr) {
6339 UNUSED_P(s);
6340 UNUSED_P(end);
6341 UNUSED_P(nextPtr);
6342 ENTITY *entity;
6343 const char *textStart, *textEnd;
6344 const char *next;
6345 enum XML_Error result;
6346 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6347 if (! openEntity)
6348 return XML_ERROR_UNEXPECTED_STATE;
6349
6350 entity = openEntity->entity;
6351
6352 // This will return early
6353 if (entity->hasMore) {
6354 textStart = ((const char *)entity->textPtr) + entity->processed;
6355 textEnd = (const char *)(entity->textPtr + entity->textLen);
6356 /* Set a safe default value in case 'next' does not get set */
6357 next = textStart;
6358
6359 if (entity->is_param) {
6360 int tok
6361 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6362 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6363 tok, next, &next, XML_FALSE, XML_FALSE,
6364 XML_ACCOUNT_ENTITY_EXPANSION);
6365 } else {
6366 result = doContent(parser, openEntity->startTagLevel,
6367 parser->m_internalEncoding, textStart, textEnd, &next,
6368 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6369 }
6370
6371 if (result != XML_ERROR_NONE)
6372 return result;
6373 // Check if entity is complete, if not, mark down how much of it is
6374 // processed
6375 if (textEnd != next
6376 && (parser->m_parsingStatus.parsing == XML_SUSPENDED
6377 || (parser->m_parsingStatus.parsing == XML_PARSING
6378 && parser->m_reenter))) {
6379 entity->processed = (int)(next - (const char *)entity->textPtr);
6380 return result;
6381 }
6382
6383 // Entity is complete. We cannot close it here since we need to first
6384 // process its possible inner entities (which are added to the
6385 // m_openInternalEntities during doProlog or doContent calls above)
6386 entity->hasMore = XML_FALSE;
6387 if (! entity->is_param
6388 && (openEntity->startTagLevel != parser->m_tagLevel)) {
6389 return XML_ERROR_ASYNC_ENTITY;
6390 }
6391 triggerReenter(parser);
6392 return result;
6393 } // End of entity processing, "if" block will return here
6394
6395 // Remove fully processed openEntity from open entity list.
6396 #if XML_GE == 1
6397 entityTrackingOnClose(parser, entity, __LINE__);
6398 #endif
6399 // openEntity is m_openInternalEntities' head, as we set it at the start of
6400 // this function and we skipped doProlog and doContent calls with hasMore set
6401 // to false. This means we can directly remove the head of
6402 // m_openInternalEntities
6403 assert(parser->m_openInternalEntities == openEntity);
6404 entity->open = XML_FALSE;
6405 parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6406
6407 /* put openEntity back in list of free instances */
6408 openEntity->next = parser->m_freeInternalEntities;
6409 parser->m_freeInternalEntities = openEntity;
6410
6411 if (parser->m_openInternalEntities == NULL) {
6412 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6413 }
6414 triggerReenter(parser);
6415 return XML_ERROR_NONE;
6416 }
6417
6418 static enum XML_Error PTRCALL
6419 errorProcessor(XML_Parser parser, const char *s, const char *end,
6420 const char **nextPtr) {
6421 UNUSED_P(s);
6422 UNUSED_P(end);
6423 UNUSED_P(nextPtr);
6424 return parser->m_errorCode;
6425 }
6426
6427 static enum XML_Error
6428 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6429 const char *ptr, const char *end, STRING_POOL *pool,
6430 enum XML_Account account) {
6431 const char *next = ptr;
6432 enum XML_Error result = XML_ERROR_NONE;
6433
6434 while (1) {
6435 if (! parser->m_openAttributeEntities) {
6436 result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6437 account, &next);
6438 } else {
6439 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6440 if (! openEntity)
6441 return XML_ERROR_UNEXPECTED_STATE;
6442
6443 ENTITY *const entity = openEntity->entity;
6444 const char *const textStart
6445 = ((const char *)entity->textPtr) + entity->processed;
6446 const char *const textEnd
6447 = (const char *)(entity->textPtr + entity->textLen);
6448 /* Set a safe default value in case 'next' does not get set */
6449 const char *nextInEntity = textStart;
6450 if (entity->hasMore) {
6451 result = appendAttributeValue(
6452 parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6453 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6454 if (result != XML_ERROR_NONE)
6455 break;
6456 // Check if entity is complete, if not, mark down how much of it is
6457 // processed. A XML_SUSPENDED check here is not required as
6458 // appendAttributeValue will never suspend the parser.
6459 if (textEnd != nextInEntity) {
6460 entity->processed
6461 = (int)(nextInEntity - (const char *)entity->textPtr);
6462 continue;
6463 }
6464
6465 // Entity is complete. We cannot close it here since we need to first
6466 // process its possible inner entities (which are added to the
6467 // m_openAttributeEntities during appendAttributeValue)
6468 entity->hasMore = XML_FALSE;
6469 continue;
6470 } // End of entity processing, "if" block skips the rest
6471
6472 // Remove fully processed openEntity from open entity list.
6473 #if XML_GE == 1
6474 entityTrackingOnClose(parser, entity, __LINE__);
6475 #endif
6476 // openEntity is m_openAttributeEntities' head, since we set it at the
6477 // start of this function and because we skipped appendAttributeValue call
6478 // with hasMore set to false. This means we can directly remove the head
6479 // of m_openAttributeEntities
6480 assert(parser->m_openAttributeEntities == openEntity);
6481 entity->open = XML_FALSE;
6482 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6483
6484 /* put openEntity back in list of free instances */
6485 openEntity->next = parser->m_freeAttributeEntities;
6486 parser->m_freeAttributeEntities = openEntity;
6487 }
6488
6489 // Break if an error occurred or there is nothing left to process
6490 if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6491 break;
6492 }
6493 }
6494
6495 if (result)
6496 return result;
6497 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6498 poolChop(pool);
6499 if (! poolAppendChar(pool, XML_T('\0')))
6500 return XML_ERROR_NO_MEMORY;
6501 return XML_ERROR_NONE;
6502 }
6503
6504 static enum XML_Error
6505 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6506 const char *ptr, const char *end, STRING_POOL *pool,
6507 enum XML_Account account, const char **nextPtr) {
6508 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6509 #ifndef XML_DTD
6510 UNUSED_P(account);
6511 #endif
6512
6513 for (;;) {
6514 const char *next
6515 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6516 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6517 #if XML_GE == 1
6518 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6519 accountingOnAbort(parser);
6520 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6521 }
6522 #endif
6523 switch (tok) {
6524 case XML_TOK_NONE:
6525 if (nextPtr) {
6526 *nextPtr = next;
6527 }
6528 return XML_ERROR_NONE;
6529 case XML_TOK_INVALID:
6530 if (enc == parser->m_encoding)
6531 parser->m_eventPtr = next;
6532 return XML_ERROR_INVALID_TOKEN;
6533 case XML_TOK_PARTIAL:
6534 if (enc == parser->m_encoding)
6535 parser->m_eventPtr = ptr;
6536 return XML_ERROR_INVALID_TOKEN;
6537 case XML_TOK_CHAR_REF: {
6538 XML_Char buf[XML_ENCODE_MAX];
6539 int i;
6540 int n = XmlCharRefNumber(enc, ptr);
6541 if (n < 0) {
6542 if (enc == parser->m_encoding)
6543 parser->m_eventPtr = ptr;
6544 return XML_ERROR_BAD_CHAR_REF;
6545 }
6546 if (! isCdata && n == 0x20 /* space */
6547 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6548 break;
6549 n = XmlEncode(n, (ICHAR *)buf);
6550 /* The XmlEncode() functions can never return 0 here. That
6551 * error return happens if the code point passed in is either
6552 * negative or greater than or equal to 0x110000. The
6553 * XmlCharRefNumber() functions will all return a number
6554 * strictly less than 0x110000 or a negative value if an error
6555 * occurred. The negative value is intercepted above, so
6556 * XmlEncode() is never passed a value it might return an
6557 * error for.
6558 */
6559 for (i = 0; i < n; i++) {
6560 if (! poolAppendChar(pool, buf[i]))
6561 return XML_ERROR_NO_MEMORY;
6562 }
6563 } break;
6564 case XML_TOK_DATA_CHARS:
6565 if (! poolAppend(pool, enc, ptr, next))
6566 return XML_ERROR_NO_MEMORY;
6567 break;
6568 case XML_TOK_TRAILING_CR:
6569 next = ptr + enc->minBytesPerChar;
6570 /* fall through */
6571 case XML_TOK_ATTRIBUTE_VALUE_S:
6572 case XML_TOK_DATA_NEWLINE:
6573 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6574 break;
6575 if (! poolAppendChar(pool, 0x20))
6576 return XML_ERROR_NO_MEMORY;
6577 break;
6578 case XML_TOK_ENTITY_REF: {
6579 const XML_Char *name;
6580 ENTITY *entity;
6581 bool checkEntityDecl;
6582 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6583 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6584 if (ch) {
6585 #if XML_GE == 1
6586 /* NOTE: We are replacing 4-6 characters original input for 1 character
6587 * so there is no amplification and hence recording without
6588 * protection. */
6589 accountingDiffTolerated(parser, tok, (char *)&ch,
6590 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6591 XML_ACCOUNT_ENTITY_EXPANSION);
6592 #endif /* XML_GE == 1 */
6593 if (! poolAppendChar(pool, ch))
6594 return XML_ERROR_NO_MEMORY;
6595 break;
6596 }
6597 name = poolStoreString(&parser->m_temp2Pool, enc,
6598 ptr + enc->minBytesPerChar,
6599 next - enc->minBytesPerChar);
6600 if (! name)
6601 return XML_ERROR_NO_MEMORY;
6602 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6603 poolDiscard(&parser->m_temp2Pool);
6604 /* First, determine if a check for an existing declaration is needed;
6605 if yes, check that the entity exists, and that it is internal.
6606 */
6607 if (pool == &dtd->pool) /* are we called from prolog? */
6608 checkEntityDecl =
6609 #ifdef XML_DTD
6610 parser->m_prologState.documentEntity &&
6611 #endif /* XML_DTD */
6612 (dtd->standalone ? ! parser->m_openInternalEntities
6613 : ! dtd->hasParamEntityRefs);
6614 else /* if (pool == &parser->m_tempPool): we are called from content */
6615 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6616 if (checkEntityDecl) {
6617 if (! entity)
6618 return XML_ERROR_UNDEFINED_ENTITY;
6619 else if (! entity->is_internal)
6620 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6621 } else if (! entity) {
6622 /* Cannot report skipped entity here - see comments on
6623 parser->m_skippedEntityHandler.
6624 if (parser->m_skippedEntityHandler)
6625 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6626 */
6627 /* Cannot call the default handler because this would be
6628 out of sync with the call to the startElementHandler.
6629 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6630 reportDefault(parser, enc, ptr, next);
6631 */
6632 break;
6633 }
6634 if (entity->open) {
6635 if (enc == parser->m_encoding) {
6636 /* It does not appear that this line can be executed.
6637 *
6638 * The "if (entity->open)" check catches recursive entity
6639 * definitions. In order to be called with an open
6640 * entity, it must have gone through this code before and
6641 * been through the recursive call to
6642 * appendAttributeValue() some lines below. That call
6643 * sets the local encoding ("enc") to the parser's
6644 * internal encoding (internal_utf8 or internal_utf16),
6645 * which can never be the same as the principle encoding.
6646 * It doesn't appear there is another code path that gets
6647 * here with entity->open being TRUE.
6648 *
6649 * Since it is not certain that this logic is watertight,
6650 * we keep the line and merely exclude it from coverage
6651 * tests.
6652 */
6653 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6654 }
6655 return XML_ERROR_RECURSIVE_ENTITY_REF;
6656 }
6657 if (entity->notation) {
6658 if (enc == parser->m_encoding)
6659 parser->m_eventPtr = ptr;
6660 return XML_ERROR_BINARY_ENTITY_REF;
6661 }
6662 if (! entity->textPtr) {
6663 if (enc == parser->m_encoding)
6664 parser->m_eventPtr = ptr;
6665 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6666 } else {
6667 enum XML_Error result;
6668 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6669 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6670 *nextPtr = next;
6671 }
6672 return result;
6673 }
6674 } break;
6675 default:
6676 /* The only token returned by XmlAttributeValueTok() that does
6677 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6678 * Getting that would require an entity name to contain an
6679 * incomplete XML character (e.g. \xE2\x82); however previous
6680 * tokenisers will have already recognised and rejected such
6681 * names before XmlAttributeValueTok() gets a look-in. This
6682 * default case should be retained as a safety net, but the code
6683 * excluded from coverage tests.
6684 *
6685 * LCOV_EXCL_START
6686 */
6687 if (enc == parser->m_encoding)
6688 parser->m_eventPtr = ptr;
6689 return XML_ERROR_UNEXPECTED_STATE;
6690 /* LCOV_EXCL_STOP */
6691 }
6692 ptr = next;
6693 }
6694 /* not reached */
6695 }
6696
6697 #if XML_GE == 1
6698 static enum XML_Error
6699 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6700 const char *entityTextPtr, const char *entityTextEnd,
6701 enum XML_Account account, const char **nextPtr) {
6702 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6703 STRING_POOL *pool = &(dtd->entityValuePool);
6704 enum XML_Error result = XML_ERROR_NONE;
6705 # ifdef XML_DTD
6706 int oldInEntityValue = parser->m_prologState.inEntityValue;
6707 parser->m_prologState.inEntityValue = 1;
6708 # else
6709 UNUSED_P(account);
6710 # endif /* XML_DTD */
6711 /* never return Null for the value argument in EntityDeclHandler,
6712 since this would indicate an external entity; therefore we
6713 have to make sure that entityValuePool.start is not null */
6714 if (! pool->blocks) {
6715 if (! poolGrow(pool))
6716 return XML_ERROR_NO_MEMORY;
6717 }
6718
6719 const char *next = entityTextPtr;
6720
6721 /* Nothing to tokenize. */
6722 if (entityTextPtr >= entityTextEnd) {
6723 result = XML_ERROR_NONE;
6724 goto endEntityValue;
6725 }
6726
6727 for (;;) {
6728 next
6729 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6730 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6731
6732 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6733 account)) {
6734 accountingOnAbort(parser);
6735 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6736 goto endEntityValue;
6737 }
6738
6739 switch (tok) {
6740 case XML_TOK_PARAM_ENTITY_REF:
6741 # ifdef XML_DTD
6742 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6743 const XML_Char *name;
6744 ENTITY *entity;
6745 name = poolStoreString(&parser->m_tempPool, enc,
6746 entityTextPtr + enc->minBytesPerChar,
6747 next - enc->minBytesPerChar);
6748 if (! name) {
6749 result = XML_ERROR_NO_MEMORY;
6750 goto endEntityValue;
6751 }
6752 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6753 poolDiscard(&parser->m_tempPool);
6754 if (! entity) {
6755 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6756 /* cannot report skipped entity here - see comments on
6757 parser->m_skippedEntityHandler
6758 if (parser->m_skippedEntityHandler)
6759 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6760 */
6761 dtd->keepProcessing = dtd->standalone;
6762 goto endEntityValue;
6763 }
6764 if (entity->open || (entity == parser->m_declEntity)) {
6765 if (enc == parser->m_encoding)
6766 parser->m_eventPtr = entityTextPtr;
6767 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6768 goto endEntityValue;
6769 }
6770 if (entity->systemId) {
6771 if (parser->m_externalEntityRefHandler) {
6772 dtd->paramEntityRead = XML_FALSE;
6773 entity->open = XML_TRUE;
6774 entityTrackingOnOpen(parser, entity, __LINE__);
6775 if (! parser->m_externalEntityRefHandler(
6776 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6777 entity->systemId, entity->publicId)) {
6778 entityTrackingOnClose(parser, entity, __LINE__);
6779 entity->open = XML_FALSE;
6780 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6781 goto endEntityValue;
6782 }
6783 entityTrackingOnClose(parser, entity, __LINE__);
6784 entity->open = XML_FALSE;
6785 if (! dtd->paramEntityRead)
6786 dtd->keepProcessing = dtd->standalone;
6787 } else
6788 dtd->keepProcessing = dtd->standalone;
6789 } else {
6790 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6791 goto endEntityValue;
6792 }
6793 break;
6794 }
6795 # endif /* XML_DTD */
6796 /* In the internal subset, PE references are not legal
6797 within markup declarations, e.g entity values in this case. */
6798 parser->m_eventPtr = entityTextPtr;
6799 result = XML_ERROR_PARAM_ENTITY_REF;
6800 goto endEntityValue;
6801 case XML_TOK_NONE:
6802 result = XML_ERROR_NONE;
6803 goto endEntityValue;
6804 case XML_TOK_ENTITY_REF:
6805 case XML_TOK_DATA_CHARS:
6806 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6807 result = XML_ERROR_NO_MEMORY;
6808 goto endEntityValue;
6809 }
6810 break;
6811 case XML_TOK_TRAILING_CR:
6812 next = entityTextPtr + enc->minBytesPerChar;
6813 /* fall through */
6814 case XML_TOK_DATA_NEWLINE:
6815 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6816 result = XML_ERROR_NO_MEMORY;
6817 goto endEntityValue;
6818 }
6819 *(pool->ptr)++ = 0xA;
6820 break;
6821 case XML_TOK_CHAR_REF: {
6822 XML_Char buf[XML_ENCODE_MAX];
6823 int i;
6824 int n = XmlCharRefNumber(enc, entityTextPtr);
6825 if (n < 0) {
6826 if (enc == parser->m_encoding)
6827 parser->m_eventPtr = entityTextPtr;
6828 result = XML_ERROR_BAD_CHAR_REF;
6829 goto endEntityValue;
6830 }
6831 n = XmlEncode(n, (ICHAR *)buf);
6832 /* The XmlEncode() functions can never return 0 here. That
6833 * error return happens if the code point passed in is either
6834 * negative or greater than or equal to 0x110000. The
6835 * XmlCharRefNumber() functions will all return a number
6836 * strictly less than 0x110000 or a negative value if an error
6837 * occurred. The negative value is intercepted above, so
6838 * XmlEncode() is never passed a value it might return an
6839 * error for.
6840 */
6841 for (i = 0; i < n; i++) {
6842 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6843 result = XML_ERROR_NO_MEMORY;
6844 goto endEntityValue;
6845 }
6846 *(pool->ptr)++ = buf[i];
6847 }
6848 } break;
6849 case XML_TOK_PARTIAL:
6850 if (enc == parser->m_encoding)
6851 parser->m_eventPtr = entityTextPtr;
6852 result = XML_ERROR_INVALID_TOKEN;
6853 goto endEntityValue;
6854 case XML_TOK_INVALID:
6855 if (enc == parser->m_encoding)
6856 parser->m_eventPtr = next;
6857 result = XML_ERROR_INVALID_TOKEN;
6858 goto endEntityValue;
6859 default:
6860 /* This default case should be unnecessary -- all the tokens
6861 * that XmlEntityValueTok() can return have their own explicit
6862 * cases -- but should be retained for safety. We do however
6863 * exclude it from the coverage statistics.
6864 *
6865 * LCOV_EXCL_START
6866 */
6867 if (enc == parser->m_encoding)
6868 parser->m_eventPtr = entityTextPtr;
6869 result = XML_ERROR_UNEXPECTED_STATE;
6870 goto endEntityValue;
6871 /* LCOV_EXCL_STOP */
6872 }
6873 entityTextPtr = next;
6874 }
6875 endEntityValue:
6876 # ifdef XML_DTD
6877 parser->m_prologState.inEntityValue = oldInEntityValue;
6878 # endif /* XML_DTD */
6879 // If 'nextPtr' is given, it should be updated during the processing
6880 if (nextPtr != NULL) {
6881 *nextPtr = next;
6882 }
6883 return result;
6884 }
6885
6886 static enum XML_Error
6887 callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6888 const char *entityTextPtr, const char *entityTextEnd,
6889 enum XML_Account account) {
6890 const char *next = entityTextPtr;
6891 enum XML_Error result = XML_ERROR_NONE;
6892 while (1) {
6893 if (! parser->m_openValueEntities) {
6894 result
6895 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6896 } else {
6897 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6898 if (! openEntity)
6899 return XML_ERROR_UNEXPECTED_STATE;
6900
6901 ENTITY *const entity = openEntity->entity;
6902 const char *const textStart
6903 = ((const char *)entity->textPtr) + entity->processed;
6904 const char *const textEnd
6905 = (const char *)(entity->textPtr + entity->textLen);
6906 /* Set a safe default value in case 'next' does not get set */
6907 const char *nextInEntity = textStart;
6908 if (entity->hasMore) {
6909 result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6910 textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6911 &nextInEntity);
6912 if (result != XML_ERROR_NONE)
6913 break;
6914 // Check if entity is complete, if not, mark down how much of it is
6915 // processed. A XML_SUSPENDED check here is not required as
6916 // appendAttributeValue will never suspend the parser.
6917 if (textEnd != nextInEntity) {
6918 entity->processed
6919 = (int)(nextInEntity - (const char *)entity->textPtr);
6920 continue;
6921 }
6922
6923 // Entity is complete. We cannot close it here since we need to first
6924 // process its possible inner entities (which are added to the
6925 // m_openValueEntities during storeEntityValue)
6926 entity->hasMore = XML_FALSE;
6927 continue;
6928 } // End of entity processing, "if" block skips the rest
6929
6930 // Remove fully processed openEntity from open entity list.
6931 # if XML_GE == 1
6932 entityTrackingOnClose(parser, entity, __LINE__);
6933 # endif
6934 // openEntity is m_openValueEntities' head, since we set it at the
6935 // start of this function and because we skipped storeEntityValue call
6936 // with hasMore set to false. This means we can directly remove the head
6937 // of m_openValueEntities
6938 assert(parser->m_openValueEntities == openEntity);
6939 entity->open = XML_FALSE;
6940 parser->m_openValueEntities = parser->m_openValueEntities->next;
6941
6942 /* put openEntity back in list of free instances */
6943 openEntity->next = parser->m_freeValueEntities;
6944 parser->m_freeValueEntities = openEntity;
6945 }
6946
6947 // Break if an error occurred or there is nothing left to process
6948 if (result
6949 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
6950 break;
6951 }
6952 }
6953
6954 return result;
6955 }
6956
6957 #else /* XML_GE == 0 */
6958
6959 static enum XML_Error
6960 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6961 // This will store "&entity123;" in entity->textPtr
6962 // to end up as "&entity123;" in the handler.
6963 const char *const entity_start = "&";
6964 const char *const entity_end = ";";
6965
6966 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6967 if (! poolAppendString(pool, entity_start)
6968 || ! poolAppendString(pool, entity->name)
6969 || ! poolAppendString(pool, entity_end)) {
6970 poolDiscard(pool);
6971 return XML_ERROR_NO_MEMORY;
6972 }
6973
6974 entity->textPtr = poolStart(pool);
6975 entity->textLen = (int)(poolLength(pool));
6976 poolFinish(pool);
6977
6978 return XML_ERROR_NONE;
6979 }
6980
6981 #endif /* XML_GE == 0 */
6982
6983 static void FASTCALL
6984 normalizeLines(XML_Char *s) {
6985 XML_Char *p;
6986 for (;; s++) {
6987 if (*s == XML_T('\0'))
6988 return;
6989 if (*s == 0xD)
6990 break;
6991 }
6992 p = s;
6993 do {
6994 if (*s == 0xD) {
6995 *p++ = 0xA;
6996 if (*++s == 0xA)
6997 s++;
6998 } else
6999 *p++ = *s++;
7000 } while (*s);
7001 *p = XML_T('\0');
7002 }
7003
7004 static int
7005 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
7006 const char *start, const char *end) {
7007 const XML_Char *target;
7008 XML_Char *data;
7009 const char *tem;
7010 if (! parser->m_processingInstructionHandler) {
7011 if (parser->m_defaultHandler)
7012 reportDefault(parser, enc, start, end);
7013 return 1;
7014 }
7015 start += enc->minBytesPerChar * 2;
7016 tem = start + XmlNameLength(enc, start);
7017 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
7018 if (! target)
7019 return 0;
7020 poolFinish(&parser->m_tempPool);
7021 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
7022 end - enc->minBytesPerChar * 2);
7023 if (! data)
7024 return 0;
7025 normalizeLines(data);
7026 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
7027 poolClear(&parser->m_tempPool);
7028 return 1;
7029 }
7030
7031 static int
7032 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
7033 const char *end) {
7034 XML_Char *data;
7035 if (! parser->m_commentHandler) {
7036 if (parser->m_defaultHandler)
7037 reportDefault(parser, enc, start, end);
7038 return 1;
7039 }
7040 data = poolStoreString(&parser->m_tempPool, enc,
7041 start + enc->minBytesPerChar * 4,
7042 end - enc->minBytesPerChar * 3);
7043 if (! data)
7044 return 0;
7045 normalizeLines(data);
7046 parser->m_commentHandler(parser->m_handlerArg, data);
7047 poolClear(&parser->m_tempPool);
7048 return 1;
7049 }
7050
7051 static void
7052 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
7053 const char *end) {
7054 if (MUST_CONVERT(enc, s)) {
7055 enum XML_Convert_Result convert_res;
7056 const char **eventPP;
7057 const char **eventEndPP;
7058 if (enc == parser->m_encoding) {
7059 eventPP = &parser->m_eventPtr;
7060 eventEndPP = &parser->m_eventEndPtr;
7061 } else {
7062 /* To get here, two things must be true; the parser must be
7063 * using a character encoding that is not the same as the
7064 * encoding passed in, and the encoding passed in must need
7065 * conversion to the internal format (UTF-8 unless XML_UNICODE
7066 * is defined). The only occasions on which the encoding passed
7067 * in is not the same as the parser's encoding are when it is
7068 * the internal encoding (e.g. a previously defined parameter
7069 * entity, already converted to internal format). This by
7070 * definition doesn't need conversion, so the whole branch never
7071 * gets executed.
7072 *
7073 * For safety's sake we don't delete these lines and merely
7074 * exclude them from coverage statistics.
7075 *
7076 * LCOV_EXCL_START
7077 */
7078 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
7079 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
7080 /* LCOV_EXCL_STOP */
7081 }
7082 do {
7083 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
7084 convert_res
7085 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
7086 *eventEndPP = s;
7087 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
7088 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
7089 *eventPP = s;
7090 } while ((convert_res != XML_CONVERT_COMPLETED)
7091 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
7092 } else
7093 parser->m_defaultHandler(
7094 parser->m_handlerArg, (const XML_Char *)s,
7095 (int)((const XML_Char *)end - (const XML_Char *)s));
7096 }
7097
7098 static int
7099 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
7100 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
7101 DEFAULT_ATTRIBUTE *att;
7102 if (value || isId) {
7103 /* The handling of default attributes gets messed up if we have
7104 a default which duplicates a non-default. */
7105 int i;
7106 for (i = 0; i < type->nDefaultAtts; i++)
7107 if (attId == type->defaultAtts[i].id)
7108 return 1;
7109 if (isId && ! type->idAtt && ! attId->xmlns)
7110 type->idAtt = attId;
7111 }
7112 if (type->nDefaultAtts == type->allocDefaultAtts) {
7113 if (type->allocDefaultAtts == 0) {
7114 type->allocDefaultAtts = 8;
7115 type->defaultAtts
7116 = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7117 if (! type->defaultAtts) {
7118 type->allocDefaultAtts = 0;
7119 return 0;
7120 }
7121 } else {
7122 DEFAULT_ATTRIBUTE *temp;
7123
7124 /* Detect and prevent integer overflow */
7125 if (type->allocDefaultAtts > INT_MAX / 2) {
7126 return 0;
7127 }
7128
7129 int count = type->allocDefaultAtts * 2;
7130
7131 /* Detect and prevent integer overflow.
7132 * The preprocessor guard addresses the "always false" warning
7133 * from -Wtype-limits on platforms where
7134 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7135 #if UINT_MAX >= SIZE_MAX
7136 if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) {
7137 return 0;
7138 }
7139 #endif
7140
7141 temp = REALLOC(parser, type->defaultAtts,
7142 (count * sizeof(DEFAULT_ATTRIBUTE)));
7143 if (temp == NULL)
7144 return 0;
7145 type->allocDefaultAtts = count;
7146 type->defaultAtts = temp;
7147 }
7148 }
7149 att = type->defaultAtts + type->nDefaultAtts;
7150 att->id = attId;
7151 att->value = value;
7152 att->isCdata = isCdata;
7153 if (! isCdata)
7154 attId->maybeTokenized = XML_TRUE;
7155 type->nDefaultAtts += 1;
7156 return 1;
7157 }
7158
7159 static int
7160 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
7161 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7162 const XML_Char *name;
7163 for (name = elementType->name; *name; name++) {
7164 if (*name == XML_T(ASCII_COLON)) {
7165 PREFIX *prefix;
7166 const XML_Char *s;
7167 for (s = elementType->name; s != name; s++) {
7168 if (! poolAppendChar(&dtd->pool, *s))
7169 return 0;
7170 }
7171 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7172 return 0;
7173 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
7174 sizeof(PREFIX));
7175 if (! prefix)
7176 return 0;
7177 if (prefix->name == poolStart(&dtd->pool))
7178 poolFinish(&dtd->pool);
7179 else
7180 poolDiscard(&dtd->pool);
7181 elementType->prefix = prefix;
7182 break;
7183 }
7184 }
7185 return 1;
7186 }
7187
7188 static ATTRIBUTE_ID *
7189 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
7190 const char *end) {
7191 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7192 ATTRIBUTE_ID *id;
7193 const XML_Char *name;
7194 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7195 return NULL;
7196 name = poolStoreString(&dtd->pool, enc, start, end);
7197 if (! name)
7198 return NULL;
7199 /* skip quotation mark - its storage will be reused (like in name[-1]) */
7200 ++name;
7201 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
7202 sizeof(ATTRIBUTE_ID));
7203 if (! id)
7204 return NULL;
7205 if (id->name != name)
7206 poolDiscard(&dtd->pool);
7207 else {
7208 poolFinish(&dtd->pool);
7209 if (! parser->m_ns)
7210 ;
7211 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
7212 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
7213 && name[4] == XML_T(ASCII_s)
7214 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
7215 if (name[5] == XML_T('\0'))
7216 id->prefix = &dtd->defaultPrefix;
7217 else
7218 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
7219 sizeof(PREFIX));
7220 id->xmlns = XML_TRUE;
7221 } else {
7222 int i;
7223 for (i = 0; name[i]; i++) {
7224 /* attributes without prefix are *not* in the default namespace */
7225 if (name[i] == XML_T(ASCII_COLON)) {
7226 int j;
7227 for (j = 0; j < i; j++) {
7228 if (! poolAppendChar(&dtd->pool, name[j]))
7229 return NULL;
7230 }
7231 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7232 return NULL;
7233 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
7234 poolStart(&dtd->pool), sizeof(PREFIX));
7235 if (! id->prefix)
7236 return NULL;
7237 if (id->prefix->name == poolStart(&dtd->pool))
7238 poolFinish(&dtd->pool);
7239 else
7240 poolDiscard(&dtd->pool);
7241 break;
7242 }
7243 }
7244 }
7245 }
7246 return id;
7247 }
7248
7249 #define CONTEXT_SEP XML_T(ASCII_FF)
7250
7251 static const XML_Char *
7252 getContext(XML_Parser parser) {
7253 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7254 HASH_TABLE_ITER iter;
7255 XML_Bool needSep = XML_FALSE;
7256
7257 if (dtd->defaultPrefix.binding) {
7258 int i;
7259 int len;
7260 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7261 return NULL;
7262 len = dtd->defaultPrefix.binding->uriLen;
7263 if (parser->m_namespaceSeparator)
7264 len--;
7265 for (i = 0; i < len; i++) {
7266 if (! poolAppendChar(&parser->m_tempPool,
7267 dtd->defaultPrefix.binding->uri[i])) {
7268 /* Because of memory caching, I don't believe this line can be
7269 * executed.
7270 *
7271 * This is part of a loop copying the default prefix binding
7272 * URI into the parser's temporary string pool. Previously,
7273 * that URI was copied into the same string pool, with a
7274 * terminating NUL character, as part of setContext(). When
7275 * the pool was cleared, that leaves a block definitely big
7276 * enough to hold the URI on the free block list of the pool.
7277 * The URI copy in getContext() therefore cannot run out of
7278 * memory.
7279 *
7280 * If the pool is used between the setContext() and
7281 * getContext() calls, the worst it can do is leave a bigger
7282 * block on the front of the free list. Given that this is
7283 * all somewhat inobvious and program logic can be changed, we
7284 * don't delete the line but we do exclude it from the test
7285 * coverage statistics.
7286 */
7287 return NULL; /* LCOV_EXCL_LINE */
7288 }
7289 }
7290 needSep = XML_TRUE;
7291 }
7292
7293 hashTableIterInit(&iter, &(dtd->prefixes));
7294 for (;;) {
7295 int i;
7296 int len;
7297 const XML_Char *s;
7298 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
7299 if (! prefix)
7300 break;
7301 if (! prefix->binding) {
7302 /* This test appears to be (justifiable) paranoia. There does
7303 * not seem to be a way of injecting a prefix without a binding
7304 * that doesn't get errored long before this function is called.
7305 * The test should remain for safety's sake, so we instead
7306 * exclude the following line from the coverage statistics.
7307 */
7308 continue; /* LCOV_EXCL_LINE */
7309 }
7310 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7311 return NULL;
7312 for (s = prefix->name; *s; s++)
7313 if (! poolAppendChar(&parser->m_tempPool, *s))
7314 return NULL;
7315 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7316 return NULL;
7317 len = prefix->binding->uriLen;
7318 if (parser->m_namespaceSeparator)
7319 len--;
7320 for (i = 0; i < len; i++)
7321 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7322 return NULL;
7323 needSep = XML_TRUE;
7324 }
7325
7326 hashTableIterInit(&iter, &(dtd->generalEntities));
7327 for (;;) {
7328 const XML_Char *s;
7329 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7330 if (! e)
7331 break;
7332 if (! e->open)
7333 continue;
7334 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7335 return NULL;
7336 for (s = e->name; *s; s++)
7337 if (! poolAppendChar(&parser->m_tempPool, *s))
7338 return 0;
7339 needSep = XML_TRUE;
7340 }
7341
7342 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7343 return NULL;
7344 return parser->m_tempPool.start;
7345 }
7346
7347 static XML_Bool
7348 setContext(XML_Parser parser, const XML_Char *context) {
7349 if (context == NULL) {
7350 return XML_FALSE;
7351 }
7352
7353 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7354 const XML_Char *s = context;
7355
7356 while (*context != XML_T('\0')) {
7357 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7358 ENTITY *e;
7359 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7360 return XML_FALSE;
7361 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7362 poolStart(&parser->m_tempPool), 0);
7363 if (e)
7364 e->open = XML_TRUE;
7365 if (*s != XML_T('\0'))
7366 s++;
7367 context = s;
7368 poolDiscard(&parser->m_tempPool);
7369 } else if (*s == XML_T(ASCII_EQUALS)) {
7370 PREFIX *prefix;
7371 if (poolLength(&parser->m_tempPool) == 0)
7372 prefix = &dtd->defaultPrefix;
7373 else {
7374 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7375 return XML_FALSE;
7376 const XML_Char *const prefixName = poolCopyStringNoFinish(
7377 &dtd->pool, poolStart(&parser->m_tempPool));
7378 if (! prefixName) {
7379 return XML_FALSE;
7380 }
7381
7382 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, prefixName,
7383 sizeof(PREFIX));
7384
7385 const bool prefixNameUsed = prefix && prefix->name == prefixName;
7386 if (prefixNameUsed)
7387 poolFinish(&dtd->pool);
7388 else
7389 poolDiscard(&dtd->pool);
7390
7391 if (! prefix)
7392 return XML_FALSE;
7393
7394 poolDiscard(&parser->m_tempPool);
7395 }
7396 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7397 context++)
7398 if (! poolAppendChar(&parser->m_tempPool, *context))
7399 return XML_FALSE;
7400 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7401 return XML_FALSE;
7402 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7403 &parser->m_inheritedBindings)
7404 != XML_ERROR_NONE)
7405 return XML_FALSE;
7406 poolDiscard(&parser->m_tempPool);
7407 if (*context != XML_T('\0'))
7408 ++context;
7409 s = context;
7410 } else {
7411 if (! poolAppendChar(&parser->m_tempPool, *s))
7412 return XML_FALSE;
7413 s++;
7414 }
7415 }
7416 return XML_TRUE;
7417 }
7418
7419 static void FASTCALL
7420 normalizePublicId(XML_Char *publicId) {
7421 XML_Char *p = publicId;
7422 XML_Char *s;
7423 for (s = publicId; *s; s++) {
7424 switch (*s) {
7425 case 0x20:
7426 case 0xD:
7427 case 0xA:
7428 if (p != publicId && p[-1] != 0x20)
7429 *p++ = 0x20;
7430 break;
7431 default:
7432 *p++ = *s;
7433 }
7434 }
7435 if (p != publicId && p[-1] == 0x20)
7436 --p;
7437 *p = XML_T('\0');
7438 }
7439
7440 static DTD *
7441 dtdCreate(XML_Parser parser) {
7442 DTD *p = MALLOC(parser, sizeof(DTD));
7443 if (p == NULL)
7444 return p;
7445 poolInit(&(p->pool), parser);
7446 poolInit(&(p->entityValuePool), parser);
7447 hashTableInit(&(p->generalEntities), parser);
7448 hashTableInit(&(p->elementTypes), parser);
7449 hashTableInit(&(p->attributeIds), parser);
7450 hashTableInit(&(p->prefixes), parser);
7451 #ifdef XML_DTD
7452 p->paramEntityRead = XML_FALSE;
7453 hashTableInit(&(p->paramEntities), parser);
7454 #endif /* XML_DTD */
7455 p->defaultPrefix.name = NULL;
7456 p->defaultPrefix.binding = NULL;
7457
7458 p->in_eldecl = XML_FALSE;
7459 p->scaffIndex = NULL;
7460 p->scaffold = NULL;
7461 p->scaffLevel = 0;
7462 p->scaffSize = 0;
7463 p->scaffCount = 0;
7464 p->contentStringLen = 0;
7465
7466 p->keepProcessing = XML_TRUE;
7467 p->hasParamEntityRefs = XML_FALSE;
7468 p->standalone = XML_FALSE;
7469 return p;
7470 }
7471
7472 static void
7473 dtdReset(DTD *p, XML_Parser parser) {
7474 HASH_TABLE_ITER iter;
7475 hashTableIterInit(&iter, &(p->elementTypes));
7476 for (;;) {
7477 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7478 if (! e)
7479 break;
7480 if (e->allocDefaultAtts != 0)
7481 FREE(parser, e->defaultAtts);
7482 }
7483 hashTableClear(&(p->generalEntities));
7484 #ifdef XML_DTD
7485 p->paramEntityRead = XML_FALSE;
7486 hashTableClear(&(p->paramEntities));
7487 #endif /* XML_DTD */
7488 hashTableClear(&(p->elementTypes));
7489 hashTableClear(&(p->attributeIds));
7490 hashTableClear(&(p->prefixes));
7491 poolClear(&(p->pool));
7492 poolClear(&(p->entityValuePool));
7493 p->defaultPrefix.name = NULL;
7494 p->defaultPrefix.binding = NULL;
7495
7496 p->in_eldecl = XML_FALSE;
7497
7498 FREE(parser, p->scaffIndex);
7499 p->scaffIndex = NULL;
7500 FREE(parser, p->scaffold);
7501 p->scaffold = NULL;
7502
7503 p->scaffLevel = 0;
7504 p->scaffSize = 0;
7505 p->scaffCount = 0;
7506 p->contentStringLen = 0;
7507
7508 p->keepProcessing = XML_TRUE;
7509 p->hasParamEntityRefs = XML_FALSE;
7510 p->standalone = XML_FALSE;
7511 }
7512
7513 static void
7514 dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) {
7515 HASH_TABLE_ITER iter;
7516 hashTableIterInit(&iter, &(p->elementTypes));
7517 for (;;) {
7518 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7519 if (! e)
7520 break;
7521 if (e->allocDefaultAtts != 0)
7522 FREE(parser, e->defaultAtts);
7523 }
7524 hashTableDestroy(&(p->generalEntities));
7525 #ifdef XML_DTD
7526 hashTableDestroy(&(p->paramEntities));
7527 #endif /* XML_DTD */
7528 hashTableDestroy(&(p->elementTypes));
7529 hashTableDestroy(&(p->attributeIds));
7530 hashTableDestroy(&(p->prefixes));
7531 poolDestroy(&(p->pool));
7532 poolDestroy(&(p->entityValuePool));
7533 if (isDocEntity) {
7534 FREE(parser, p->scaffIndex);
7535 FREE(parser, p->scaffold);
7536 }
7537 FREE(parser, p);
7538 }
7539
7540 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7541 The new DTD has already been initialized.
7542 */
7543 static int
7544 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7545 XML_Parser parser) {
7546 HASH_TABLE_ITER iter;
7547
7548 /* Copy the prefix table. */
7549
7550 hashTableIterInit(&iter, &(oldDtd->prefixes));
7551 for (;;) {
7552 const XML_Char *name;
7553 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7554 if (! oldP)
7555 break;
7556 name = poolCopyString(&(newDtd->pool), oldP->name);
7557 if (! name)
7558 return 0;
7559 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7560 return 0;
7561 }
7562
7563 hashTableIterInit(&iter, &(oldDtd->attributeIds));
7564
7565 /* Copy the attribute id table. */
7566
7567 for (;;) {
7568 ATTRIBUTE_ID *newA;
7569 const XML_Char *name;
7570 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7571
7572 if (! oldA)
7573 break;
7574 /* Remember to allocate the scratch byte before the name. */
7575 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7576 return 0;
7577 name = poolCopyString(&(newDtd->pool), oldA->name);
7578 if (! name)
7579 return 0;
7580 ++name;
7581 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7582 sizeof(ATTRIBUTE_ID));
7583 if (! newA)
7584 return 0;
7585 newA->maybeTokenized = oldA->maybeTokenized;
7586 if (oldA->prefix) {
7587 newA->xmlns = oldA->xmlns;
7588 if (oldA->prefix == &oldDtd->defaultPrefix)
7589 newA->prefix = &newDtd->defaultPrefix;
7590 else
7591 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7592 oldA->prefix->name, 0);
7593 }
7594 }
7595
7596 /* Copy the element type table. */
7597
7598 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7599
7600 for (;;) {
7601 int i;
7602 ELEMENT_TYPE *newE;
7603 const XML_Char *name;
7604 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7605 if (! oldE)
7606 break;
7607 name = poolCopyString(&(newDtd->pool), oldE->name);
7608 if (! name)
7609 return 0;
7610 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7611 sizeof(ELEMENT_TYPE));
7612 if (! newE)
7613 return 0;
7614 if (oldE->nDefaultAtts) {
7615 /* Detect and prevent integer overflow.
7616 * The preprocessor guard addresses the "always false" warning
7617 * from -Wtype-limits on platforms where
7618 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7619 #if UINT_MAX >= SIZE_MAX
7620 if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) {
7621 return 0;
7622 }
7623 #endif
7624 newE->defaultAtts
7625 = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7626 if (! newE->defaultAtts) {
7627 return 0;
7628 }
7629 }
7630 if (oldE->idAtt)
7631 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7632 oldE->idAtt->name, 0);
7633 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7634 if (oldE->prefix)
7635 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7636 oldE->prefix->name, 0);
7637 for (i = 0; i < newE->nDefaultAtts; i++) {
7638 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7639 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7640 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7641 if (oldE->defaultAtts[i].value) {
7642 newE->defaultAtts[i].value
7643 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7644 if (! newE->defaultAtts[i].value)
7645 return 0;
7646 } else
7647 newE->defaultAtts[i].value = NULL;
7648 }
7649 }
7650
7651 /* Copy the entity tables. */
7652 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7653 &(oldDtd->generalEntities)))
7654 return 0;
7655
7656 #ifdef XML_DTD
7657 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7658 &(oldDtd->paramEntities)))
7659 return 0;
7660 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7661 #endif /* XML_DTD */
7662
7663 newDtd->keepProcessing = oldDtd->keepProcessing;
7664 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7665 newDtd->standalone = oldDtd->standalone;
7666
7667 /* Don't want deep copying for scaffolding */
7668 newDtd->in_eldecl = oldDtd->in_eldecl;
7669 newDtd->scaffold = oldDtd->scaffold;
7670 newDtd->contentStringLen = oldDtd->contentStringLen;
7671 newDtd->scaffSize = oldDtd->scaffSize;
7672 newDtd->scaffLevel = oldDtd->scaffLevel;
7673 newDtd->scaffIndex = oldDtd->scaffIndex;
7674
7675 return 1;
7676 } /* End dtdCopy */
7677
7678 static int
7679 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7680 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7681 HASH_TABLE_ITER iter;
7682 const XML_Char *cachedOldBase = NULL;
7683 const XML_Char *cachedNewBase = NULL;
7684
7685 hashTableIterInit(&iter, oldTable);
7686
7687 for (;;) {
7688 ENTITY *newE;
7689 const XML_Char *name;
7690 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7691 if (! oldE)
7692 break;
7693 name = poolCopyString(newPool, oldE->name);
7694 if (! name)
7695 return 0;
7696 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7697 if (! newE)
7698 return 0;
7699 if (oldE->systemId) {
7700 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7701 if (! tem)
7702 return 0;
7703 newE->systemId = tem;
7704 if (oldE->base) {
7705 if (oldE->base == cachedOldBase)
7706 newE->base = cachedNewBase;
7707 else {
7708 cachedOldBase = oldE->base;
7709 tem = poolCopyString(newPool, cachedOldBase);
7710 if (! tem)
7711 return 0;
7712 cachedNewBase = newE->base = tem;
7713 }
7714 }
7715 if (oldE->publicId) {
7716 tem = poolCopyString(newPool, oldE->publicId);
7717 if (! tem)
7718 return 0;
7719 newE->publicId = tem;
7720 }
7721 } else {
7722 const XML_Char *tem
7723 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7724 if (! tem)
7725 return 0;
7726 newE->textPtr = tem;
7727 newE->textLen = oldE->textLen;
7728 }
7729 if (oldE->notation) {
7730 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7731 if (! tem)
7732 return 0;
7733 newE->notation = tem;
7734 }
7735 newE->is_param = oldE->is_param;
7736 newE->is_internal = oldE->is_internal;
7737 }
7738 return 1;
7739 }
7740
7741 #define INIT_POWER 6
7742
7743 static XML_Bool FASTCALL
7744 keyeq(KEY s1, KEY s2) {
7745 for (; *s1 == *s2; s1++, s2++)
7746 if (*s1 == 0)
7747 return XML_TRUE;
7748 return XML_FALSE;
7749 }
7750
7751 static size_t
7752 keylen(KEY s) {
7753 size_t len = 0;
7754 for (; *s; s++, len++)
7755 ;
7756 return len;
7757 }
7758
7759 static void
7760 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7761 const XML_Parser rootParser = getRootParserOf(parser, NULL);
7762 assert(! rootParser->m_parentParser);
7763
7764 *key = rootParser->m_hash_secret_salt_128;
7765 }
7766
7767 static unsigned long FASTCALL
7768 hash(XML_Parser parser, KEY s) {
7769 struct siphash state;
7770 struct sipkey key;
7771 (void)sip24_valid;
7772 copy_salt_to_sipkey(parser, &key);
7773 sip24_init(&state, &key);
7774 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7775 return (unsigned long)sip24_final(&state);
7776 }
7777
7778 static NAMED *
7779 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7780 size_t i;
7781 if (table->size == 0) {
7782 size_t tsize;
7783 if (! createSize)
7784 return NULL;
7785 table->power = INIT_POWER;
7786 /* table->size is a power of 2 */
7787 table->size = (size_t)1 << INIT_POWER;
7788 tsize = table->size * sizeof(NAMED *);
7789 table->v = MALLOC(table->parser, tsize);
7790 if (! table->v) {
7791 table->size = 0;
7792 return NULL;
7793 }
7794 memset(table->v, 0, tsize);
7795 i = hash(parser, name) & ((unsigned long)table->size - 1);
7796 } else {
7797 unsigned long h = hash(parser, name);
7798 unsigned long mask = (unsigned long)table->size - 1;
7799 unsigned char step = 0;
7800 i = h & mask;
7801 while (table->v[i]) {
7802 if (keyeq(name, table->v[i]->name))
7803 return table->v[i];
7804 if (! step)
7805 step = PROBE_STEP(h, mask, table->power);
7806 i < step ? (i += table->size - step) : (i -= step);
7807 }
7808 if (! createSize)
7809 return NULL;
7810
7811 /* check for overflow (table is half full) */
7812 if (table->used >> (table->power - 1)) {
7813 unsigned char newPower = table->power + 1;
7814
7815 /* Detect and prevent invalid shift */
7816 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7817 return NULL;
7818 }
7819
7820 size_t newSize = (size_t)1 << newPower;
7821 unsigned long newMask = (unsigned long)newSize - 1;
7822
7823 /* Detect and prevent integer overflow */
7824 if (newSize > SIZE_MAX / sizeof(NAMED *)) {
7825 return NULL;
7826 }
7827
7828 size_t tsize = newSize * sizeof(NAMED *);
7829 NAMED **newV = MALLOC(table->parser, tsize);
7830 if (! newV)
7831 return NULL;
7832 memset(newV, 0, tsize);
7833 for (i = 0; i < table->size; i++)
7834 if (table->v[i]) {
7835 unsigned long newHash = hash(parser, table->v[i]->name);
7836 size_t j = newHash & newMask;
7837 step = 0;
7838 while (newV[j]) {
7839 if (! step)
7840 step = PROBE_STEP(newHash, newMask, newPower);
7841 j < step ? (j += newSize - step) : (j -= step);
7842 }
7843 newV[j] = table->v[i];
7844 }
7845 FREE(table->parser, table->v);
7846 table->v = newV;
7847 table->power = newPower;
7848 table->size = newSize;
7849 i = h & newMask;
7850 step = 0;
7851 while (table->v[i]) {
7852 if (! step)
7853 step = PROBE_STEP(h, newMask, newPower);
7854 i < step ? (i += newSize - step) : (i -= step);
7855 }
7856 }
7857 }
7858 table->v[i] = MALLOC(table->parser, createSize);
7859 if (! table->v[i])
7860 return NULL;
7861 memset(table->v[i], 0, createSize);
7862 table->v[i]->name = name;
7863 (table->used)++;
7864 return table->v[i];
7865 }
7866
7867 static void FASTCALL
7868 hashTableClear(HASH_TABLE *table) {
7869 size_t i;
7870 for (i = 0; i < table->size; i++) {
7871 FREE(table->parser, table->v[i]);
7872 table->v[i] = NULL;
7873 }
7874 table->used = 0;
7875 }
7876
7877 static void FASTCALL
7878 hashTableDestroy(HASH_TABLE *table) {
7879 size_t i;
7880 for (i = 0; i < table->size; i++)
7881 FREE(table->parser, table->v[i]);
7882 FREE(table->parser, table->v);
7883 }
7884
7885 static void FASTCALL
7886 hashTableInit(HASH_TABLE *p, XML_Parser parser) {
7887 p->power = 0;
7888 p->size = 0;
7889 p->used = 0;
7890 p->v = NULL;
7891 p->parser = parser;
7892 }
7893
7894 static void FASTCALL
7895 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7896 iter->p = table->v;
7897 iter->end = iter->p ? iter->p + table->size : NULL;
7898 }
7899
7900 static NAMED *FASTCALL
7901 hashTableIterNext(HASH_TABLE_ITER *iter) {
7902 while (iter->p != iter->end) {
7903 NAMED *tem = *(iter->p)++;
7904 if (tem)
7905 return tem;
7906 }
7907 return NULL;
7908 }
7909
7910 static void FASTCALL
7911 poolInit(STRING_POOL *pool, XML_Parser parser) {
7912 pool->blocks = NULL;
7913 pool->freeBlocks = NULL;
7914 pool->start = NULL;
7915 pool->ptr = NULL;
7916 pool->end = NULL;
7917 pool->parser = parser;
7918 }
7919
7920 static void FASTCALL
7921 poolClear(STRING_POOL *pool) {
7922 if (! pool->freeBlocks)
7923 pool->freeBlocks = pool->blocks;
7924 else {
7925 BLOCK *p = pool->blocks;
7926 while (p) {
7927 BLOCK *tem = p->next;
7928 p->next = pool->freeBlocks;
7929 pool->freeBlocks = p;
7930 p = tem;
7931 }
7932 }
7933 pool->blocks = NULL;
7934 pool->start = NULL;
7935 pool->ptr = NULL;
7936 pool->end = NULL;
7937 }
7938
7939 static void FASTCALL
7940 poolDestroy(STRING_POOL *pool) {
7941 BLOCK *p = pool->blocks;
7942 while (p) {
7943 BLOCK *tem = p->next;
7944 FREE(pool->parser, p);
7945 p = tem;
7946 }
7947 p = pool->freeBlocks;
7948 while (p) {
7949 BLOCK *tem = p->next;
7950 FREE(pool->parser, p);
7951 p = tem;
7952 }
7953 }
7954
7955 static XML_Char *
7956 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7957 const char *end) {
7958 if (! pool->ptr && ! poolGrow(pool))
7959 return NULL;
7960 for (;;) {
7961 const enum XML_Convert_Result convert_res = XmlConvert(
7962 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7963 if ((convert_res == XML_CONVERT_COMPLETED)
7964 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7965 break;
7966 if (! poolGrow(pool))
7967 return NULL;
7968 }
7969 return pool->start;
7970 }
7971
7972 static const XML_Char *FASTCALL
7973 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7974 do {
7975 if (! poolAppendChar(pool, *s))
7976 return NULL;
7977 } while (*s++);
7978 s = pool->start;
7979 poolFinish(pool);
7980 return s;
7981 }
7982
7983 // A version of `poolCopyString` that does not call `poolFinish`
7984 // and reverts any partial advancement upon failure.
7985 static const XML_Char *FASTCALL
7986 poolCopyStringNoFinish(STRING_POOL *pool, const XML_Char *s) {
7987 const XML_Char *const original = s;
7988 do {
7989 if (! poolAppendChar(pool, *s)) {
7990 // Revert any previously successful advancement
7991 const ptrdiff_t advancedBy = s - original;
7992 if (advancedBy > 0)
7993 pool->ptr -= advancedBy;
7994 return NULL;
7995 }
7996 } while (*s++);
7997 return pool->start;
7998 }
7999
8000 static const XML_Char *
8001 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
8002 if (! pool->ptr && ! poolGrow(pool)) {
8003 /* The following line is unreachable given the current usage of
8004 * poolCopyStringN(). Currently it is called from exactly one
8005 * place to copy the text of a simple general entity. By that
8006 * point, the name of the entity is already stored in the pool, so
8007 * pool->ptr cannot be NULL.
8008 *
8009 * If poolCopyStringN() is used elsewhere as it well might be,
8010 * this line may well become executable again. Regardless, this
8011 * sort of check shouldn't be removed lightly, so we just exclude
8012 * it from the coverage statistics.
8013 */
8014 return NULL; /* LCOV_EXCL_LINE */
8015 }
8016 for (; n > 0; --n, s++) {
8017 if (! poolAppendChar(pool, *s))
8018 return NULL;
8019 }
8020 s = pool->start;
8021 poolFinish(pool);
8022 return s;
8023 }
8024
8025 static const XML_Char *FASTCALL
8026 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
8027 while (*s) {
8028 if (! poolAppendChar(pool, *s))
8029 return NULL;
8030 s++;
8031 }
8032 return pool->start;
8033 }
8034
8035 static XML_Char *
8036 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
8037 const char *end) {
8038 if (! poolAppend(pool, enc, ptr, end))
8039 return NULL;
8040 if (pool->ptr == pool->end && ! poolGrow(pool))
8041 return NULL;
8042 *(pool->ptr)++ = 0;
8043 return pool->start;
8044 }
8045
8046 static size_t
8047 poolBytesToAllocateFor(int blockSize) {
8048 /* Unprotected math would be:
8049 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
8050 **
8051 ** Detect overflow, avoiding _signed_ overflow undefined behavior
8052 ** For a + b * c we check b * c in isolation first, so that addition of a
8053 ** on top has no chance of making us accept a small non-negative number
8054 */
8055 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
8056
8057 if (blockSize <= 0)
8058 return 0;
8059
8060 if (blockSize > (int)(INT_MAX / stretch))
8061 return 0;
8062
8063 {
8064 const int stretchedBlockSize = blockSize * (int)stretch;
8065 const int bytesToAllocate
8066 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
8067 if (bytesToAllocate < 0)
8068 return 0;
8069
8070 return (size_t)bytesToAllocate;
8071 }
8072 }
8073
8074 static XML_Bool FASTCALL
8075 poolGrow(STRING_POOL *pool) {
8076 if (pool->freeBlocks) {
8077 if (pool->start == NULL) {
8078 pool->blocks = pool->freeBlocks;
8079 pool->freeBlocks = pool->freeBlocks->next;
8080 pool->blocks->next = NULL;
8081 pool->start = pool->blocks->s;
8082 pool->end = pool->start + pool->blocks->size;
8083 pool->ptr = pool->start;
8084 return XML_TRUE;
8085 }
8086 if (pool->end - pool->start < pool->freeBlocks->size) {
8087 BLOCK *tem = pool->freeBlocks->next;
8088 pool->freeBlocks->next = pool->blocks;
8089 pool->blocks = pool->freeBlocks;
8090 pool->freeBlocks = tem;
8091 memcpy(pool->blocks->s, pool->start,
8092 (pool->end - pool->start) * sizeof(XML_Char));
8093 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
8094 pool->start = pool->blocks->s;
8095 pool->end = pool->start + pool->blocks->size;
8096 return XML_TRUE;
8097 }
8098 }
8099 if (pool->blocks && pool->start == pool->blocks->s) {
8100 BLOCK *temp;
8101 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
8102 size_t bytesToAllocate;
8103
8104 /* NOTE: Needs to be calculated prior to calling `realloc`
8105 to avoid dangling pointers: */
8106 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
8107
8108 if (blockSize < 0) {
8109 /* This condition traps a situation where either more than
8110 * INT_MAX/2 bytes have already been allocated. This isn't
8111 * readily testable, since it is unlikely that an average
8112 * machine will have that much memory, so we exclude it from the
8113 * coverage statistics.
8114 */
8115 return XML_FALSE; /* LCOV_EXCL_LINE */
8116 }
8117
8118 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8119 if (bytesToAllocate == 0)
8120 return XML_FALSE;
8121
8122 temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate);
8123 if (temp == NULL)
8124 return XML_FALSE;
8125 pool->blocks = temp;
8126 pool->blocks->size = blockSize;
8127 pool->ptr = pool->blocks->s + offsetInsideBlock;
8128 pool->start = pool->blocks->s;
8129 pool->end = pool->start + blockSize;
8130 } else {
8131 BLOCK *tem;
8132 int blockSize = (int)(pool->end - pool->start);
8133 size_t bytesToAllocate;
8134
8135 if (blockSize < 0) {
8136 /* This condition traps a situation where either more than
8137 * INT_MAX bytes have already been allocated (which is prevented
8138 * by various pieces of program logic, not least this one, never
8139 * mind the unlikelihood of actually having that much memory) or
8140 * the pool control fields have been corrupted (which could
8141 * conceivably happen in an extremely buggy user handler
8142 * function). Either way it isn't readily testable, so we
8143 * exclude it from the coverage statistics.
8144 */
8145 return XML_FALSE; /* LCOV_EXCL_LINE */
8146 }
8147
8148 if (blockSize < INIT_BLOCK_SIZE)
8149 blockSize = INIT_BLOCK_SIZE;
8150 else {
8151 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
8152 if ((int)((unsigned)blockSize * 2U) < 0) {
8153 return XML_FALSE;
8154 }
8155 blockSize *= 2;
8156 }
8157
8158 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8159 if (bytesToAllocate == 0)
8160 return XML_FALSE;
8161
8162 tem = MALLOC(pool->parser, bytesToAllocate);
8163 if (! tem)
8164 return XML_FALSE;
8165 tem->size = blockSize;
8166 tem->next = pool->blocks;
8167 pool->blocks = tem;
8168 if (pool->ptr != pool->start)
8169 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
8170 pool->ptr = tem->s + (pool->ptr - pool->start);
8171 pool->start = tem->s;
8172 pool->end = tem->s + blockSize;
8173 }
8174 return XML_TRUE;
8175 }
8176
8177 static int FASTCALL
8178 nextScaffoldPart(XML_Parser parser) {
8179 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8180 CONTENT_SCAFFOLD *me;
8181 int next;
8182
8183 if (! dtd->scaffIndex) {
8184 /* Detect and prevent integer overflow.
8185 * The preprocessor guard addresses the "always false" warning
8186 * from -Wtype-limits on platforms where
8187 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8188 #if UINT_MAX >= SIZE_MAX
8189 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) {
8190 return -1;
8191 }
8192 #endif
8193 dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int));
8194 if (! dtd->scaffIndex)
8195 return -1;
8196 dtd->scaffIndex[0] = 0;
8197 }
8198
8199 // Will casting to int be safe further down?
8200 if (dtd->scaffCount > INT_MAX) {
8201 return -1;
8202 }
8203
8204 if (dtd->scaffCount >= dtd->scaffSize) {
8205 CONTENT_SCAFFOLD *temp;
8206 if (dtd->scaffold) {
8207 /* Detect and prevent integer overflow */
8208 if (dtd->scaffSize > UINT_MAX / 2u) {
8209 return -1;
8210 }
8211 /* Detect and prevent integer overflow.
8212 * The preprocessor guard addresses the "always false" warning
8213 * from -Wtype-limits on platforms where
8214 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8215 #if UINT_MAX >= SIZE_MAX
8216 if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) {
8217 return -1;
8218 }
8219 #endif
8220
8221 temp = REALLOC(parser, dtd->scaffold,
8222 dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
8223 if (temp == NULL)
8224 return -1;
8225 dtd->scaffSize *= 2;
8226 } else {
8227 temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD));
8228 if (temp == NULL)
8229 return -1;
8230 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
8231 }
8232 dtd->scaffold = temp;
8233 }
8234 next = (int)dtd->scaffCount++;
8235 me = &dtd->scaffold[next];
8236 if (dtd->scaffLevel) {
8237 CONTENT_SCAFFOLD *parent
8238 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
8239 if (parent->lastchild) {
8240 dtd->scaffold[parent->lastchild].nextsib = next;
8241 }
8242 if (! parent->childcnt)
8243 parent->firstchild = next;
8244 parent->lastchild = next;
8245 parent->childcnt++;
8246 }
8247 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
8248 return next;
8249 }
8250
8251 static XML_Content *
8252 build_model(XML_Parser parser) {
8253 /* Function build_model transforms the existing parser->m_dtd->scaffold
8254 * array of CONTENT_SCAFFOLD tree nodes into a new array of
8255 * XML_Content tree nodes followed by a gapless list of zero-terminated
8256 * strings. */
8257 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8258 XML_Content *ret;
8259 XML_Char *str; /* the current string writing location */
8260
8261 /* Detect and prevent integer overflow.
8262 * The preprocessor guard addresses the "always false" warning
8263 * from -Wtype-limits on platforms where
8264 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8265 #if UINT_MAX >= SIZE_MAX
8266 if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) {
8267 return NULL;
8268 }
8269 if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) {
8270 return NULL;
8271 }
8272 #endif
8273 if (dtd->scaffCount * sizeof(XML_Content)
8274 > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) {
8275 return NULL;
8276 }
8277
8278 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
8279 + (dtd->contentStringLen * sizeof(XML_Char)));
8280
8281 // NOTE: We are avoiding MALLOC(..) here to so that
8282 // applications that are not using XML_FreeContentModel but plain
8283 // free(..) or .free_fcn() to free the content model's memory are safe.
8284 ret = parser->m_mem.malloc_fcn(allocsize);
8285 if (! ret)
8286 return NULL;
8287
8288 /* What follows is an iterative implementation (of what was previously done
8289 * recursively in a dedicated function called "build_node". The old recursive
8290 * build_node could be forced into stack exhaustion from input as small as a
8291 * few megabyte, and so that was a security issue. Hence, a function call
8292 * stack is avoided now by resolving recursion.)
8293 *
8294 * The iterative approach works as follows:
8295 *
8296 * - We have two writing pointers, both walking up the result array; one does
8297 * the work, the other creates "jobs" for its colleague to do, and leads
8298 * the way:
8299 *
8300 * - The faster one, pointer jobDest, always leads and writes "what job
8301 * to do" by the other, once they reach that place in the
8302 * array: leader "jobDest" stores the source node array index (relative
8303 * to array dtd->scaffold) in field "numchildren".
8304 *
8305 * - The slower one, pointer dest, looks at the value stored in the
8306 * "numchildren" field (which actually holds a source node array index
8307 * at that time) and puts the real data from dtd->scaffold in.
8308 *
8309 * - Before the loop starts, jobDest writes source array index 0
8310 * (where the root node is located) so that dest will have something to do
8311 * when it starts operation.
8312 *
8313 * - Whenever nodes with children are encountered, jobDest appends
8314 * them as new jobs, in order. As a result, tree node siblings are
8315 * adjacent in the resulting array, for example:
8316 *
8317 * [0] root, has two children
8318 * [1] first child of 0, has three children
8319 * [3] first child of 1, does not have children
8320 * [4] second child of 1, does not have children
8321 * [5] third child of 1, does not have children
8322 * [2] second child of 0, does not have children
8323 *
8324 * Or (the same data) presented in flat array view:
8325 *
8326 * [0] root, has two children
8327 *
8328 * [1] first child of 0, has three children
8329 * [2] second child of 0, does not have children
8330 *
8331 * [3] first child of 1, does not have children
8332 * [4] second child of 1, does not have children
8333 * [5] third child of 1, does not have children
8334 *
8335 * - The algorithm repeats until all target array indices have been processed.
8336 */
8337 XML_Content *dest = ret; /* tree node writing location, moves upwards */
8338 XML_Content *const destLimit = &ret[dtd->scaffCount];
8339 XML_Content *jobDest = ret; /* next free writing location in target array */
8340 str = (XML_Char *)&ret[dtd->scaffCount];
8341
8342 /* Add the starting job, the root node (index 0) of the source tree */
8343 (jobDest++)->numchildren = 0;
8344
8345 for (; dest < destLimit; dest++) {
8346 /* Retrieve source tree array index from job storage */
8347 const int src_node = (int)dest->numchildren;
8348
8349 /* Convert item */
8350 dest->type = dtd->scaffold[src_node].type;
8351 dest->quant = dtd->scaffold[src_node].quant;
8352 if (dest->type == XML_CTYPE_NAME) {
8353 const XML_Char *src;
8354 dest->name = str;
8355 src = dtd->scaffold[src_node].name;
8356 for (;;) {
8357 *str++ = *src;
8358 if (! *src)
8359 break;
8360 src++;
8361 }
8362 dest->numchildren = 0;
8363 dest->children = NULL;
8364 } else {
8365 unsigned int i;
8366 int cn;
8367 dest->name = NULL;
8368 dest->numchildren = dtd->scaffold[src_node].childcnt;
8369 dest->children = jobDest;
8370
8371 /* Append scaffold indices of children to array */
8372 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8373 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8374 (jobDest++)->numchildren = (unsigned int)cn;
8375 }
8376 }
8377
8378 return ret;
8379 }
8380
8381 static ELEMENT_TYPE *
8382 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8383 const char *end) {
8384 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8385 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8386 ELEMENT_TYPE *ret;
8387
8388 if (! name)
8389 return NULL;
8390 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8391 sizeof(ELEMENT_TYPE));
8392 if (! ret)
8393 return NULL;
8394 if (ret->name != name)
8395 poolDiscard(&dtd->pool);
8396 else {
8397 poolFinish(&dtd->pool);
8398 if (! setElementTypePrefix(parser, ret))
8399 return NULL;
8400 }
8401 return ret;
8402 }
8403
8404 static XML_Char *
8405 copyString(const XML_Char *s, XML_Parser parser) {
8406 size_t charsRequired = 0;
8407 XML_Char *result;
8408
8409 /* First determine how long the string is */
8410 while (s[charsRequired] != 0) {
8411 charsRequired++;
8412 }
8413 /* Include the terminator */
8414 charsRequired++;
8415
8416 /* Now allocate space for the copy */
8417 result = MALLOC(parser, charsRequired * sizeof(XML_Char));
8418 if (result == NULL)
8419 return NULL;
8420 /* Copy the original into place */
8421 memcpy(result, s, charsRequired * sizeof(XML_Char));
8422 return result;
8423 }
8424
8425 #if XML_GE == 1
8426
8427 static float
8428 accountingGetCurrentAmplification(XML_Parser rootParser) {
8429 // 1.........1.........12 => 22
8430 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8431 const XmlBigCount countBytesOutput
8432 = rootParser->m_accounting.countBytesDirect
8433 + rootParser->m_accounting.countBytesIndirect;
8434 const float amplificationFactor
8435 = rootParser->m_accounting.countBytesDirect
8436 ? ((float)countBytesOutput
8437 / (float)(rootParser->m_accounting.countBytesDirect))
8438 : ((float)(lenOfShortestInclude
8439 + rootParser->m_accounting.countBytesIndirect)
8440 / (float)lenOfShortestInclude);
8441 assert(! rootParser->m_parentParser);
8442 return amplificationFactor;
8443 }
8444
8445 static void
8446 accountingReportStats(XML_Parser originParser, const char *epilog) {
8447 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8448 assert(! rootParser->m_parentParser);
8449
8450 if (rootParser->m_accounting.debugLevel == 0u) {
8451 return;
8452 }
8453
8454 const float amplificationFactor
8455 = accountingGetCurrentAmplification(rootParser);
8456 fprintf(stderr,
8457 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8458 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8459 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
8460 rootParser->m_accounting.countBytesIndirect,
8461 (double)amplificationFactor, epilog);
8462 }
8463
8464 static void
8465 accountingOnAbort(XML_Parser originParser) {
8466 accountingReportStats(originParser, " ABORTING\n");
8467 }
8468
8469 static void
8470 accountingReportDiff(XML_Parser rootParser,
8471 unsigned int levelsAwayFromRootParser, const char *before,
8472 const char *after, ptrdiff_t bytesMore, int source_line,
8473 enum XML_Account account) {
8474 assert(! rootParser->m_parentParser);
8475
8476 fprintf(stderr,
8477 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8478 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8479 levelsAwayFromRootParser, source_line, 10, "");
8480
8481 const char ellipis[] = "[..]";
8482 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8483 const unsigned int contextLength = 10;
8484
8485 /* Note: Performance is of no concern here */
8486 const char *walker = before;
8487 if ((rootParser->m_accounting.debugLevel >= 3u)
8488 || (after - before)
8489 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8490 for (; walker < after; walker++) {
8491 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8492 }
8493 } else {
8494 for (; walker < before + contextLength; walker++) {
8495 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8496 }
8497 fprintf(stderr, ellipis);
8498 walker = after - contextLength;
8499 for (; walker < after; walker++) {
8500 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8501 }
8502 }
8503 fprintf(stderr, "\"\n");
8504 }
8505
8506 static XML_Bool
8507 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8508 const char *after, int source_line,
8509 enum XML_Account account) {
8510 /* Note: We need to check the token type *first* to be sure that
8511 * we can even access variable <after>, safely.
8512 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8513 switch (tok) {
8514 case XML_TOK_INVALID:
8515 case XML_TOK_PARTIAL:
8516 case XML_TOK_PARTIAL_CHAR:
8517 case XML_TOK_NONE:
8518 return XML_TRUE;
8519 }
8520
8521 if (account == XML_ACCOUNT_NONE)
8522 return XML_TRUE; /* because these bytes have been accounted for, already */
8523
8524 unsigned int levelsAwayFromRootParser;
8525 const XML_Parser rootParser
8526 = getRootParserOf(originParser, &levelsAwayFromRootParser);
8527 assert(! rootParser->m_parentParser);
8528
8529 const int isDirect
8530 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8531 const ptrdiff_t bytesMore = after - before;
8532
8533 XmlBigCount *const additionTarget
8534 = isDirect ? &rootParser->m_accounting.countBytesDirect
8535 : &rootParser->m_accounting.countBytesIndirect;
8536
8537 /* Detect and avoid integer overflow */
8538 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8539 return XML_FALSE;
8540 *additionTarget += bytesMore;
8541
8542 const XmlBigCount countBytesOutput
8543 = rootParser->m_accounting.countBytesDirect
8544 + rootParser->m_accounting.countBytesIndirect;
8545 const float amplificationFactor
8546 = accountingGetCurrentAmplification(rootParser);
8547 const XML_Bool tolerated
8548 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8549 || (amplificationFactor
8550 <= rootParser->m_accounting.maximumAmplificationFactor);
8551
8552 if (rootParser->m_accounting.debugLevel >= 2u) {
8553 accountingReportStats(rootParser, "");
8554 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8555 bytesMore, source_line, account);
8556 }
8557
8558 return tolerated;
8559 }
8560
8561 unsigned long long
8562 testingAccountingGetCountBytesDirect(XML_Parser parser) {
8563 if (! parser)
8564 return 0;
8565 return parser->m_accounting.countBytesDirect;
8566 }
8567
8568 unsigned long long
8569 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8570 if (! parser)
8571 return 0;
8572 return parser->m_accounting.countBytesIndirect;
8573 }
8574
8575 static void
8576 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8577 const char *action, int sourceLine) {
8578 assert(! rootParser->m_parentParser);
8579 if (rootParser->m_entity_stats.debugLevel == 0u)
8580 return;
8581
8582 # if defined(XML_UNICODE)
8583 const char *const entityName = "[..]";
8584 # else
8585 const char *const entityName = entity->name;
8586 # endif
8587
8588 fprintf(
8589 stderr,
8590 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8591 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8592 rootParser->m_entity_stats.currentDepth,
8593 rootParser->m_entity_stats.maximumDepthSeen,
8594 ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8595 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8596 sourceLine);
8597 }
8598
8599 static void
8600 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8601 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8602 assert(! rootParser->m_parentParser);
8603
8604 rootParser->m_entity_stats.countEverOpened++;
8605 rootParser->m_entity_stats.currentDepth++;
8606 if (rootParser->m_entity_stats.currentDepth
8607 > rootParser->m_entity_stats.maximumDepthSeen) {
8608 rootParser->m_entity_stats.maximumDepthSeen++;
8609 }
8610
8611 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8612 }
8613
8614 static void
8615 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8616 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8617 assert(! rootParser->m_parentParser);
8618
8619 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8620 rootParser->m_entity_stats.currentDepth--;
8621 }
8622
8623 #endif /* XML_GE == 1 */
8624
8625 static XML_Parser
8626 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8627 XML_Parser rootParser = parser;
8628 unsigned int stepsTakenUpwards = 0;
8629 while (rootParser->m_parentParser) {
8630 rootParser = rootParser->m_parentParser;
8631 stepsTakenUpwards++;
8632 }
8633 assert(! rootParser->m_parentParser);
8634 if (outLevelDiff != NULL) {
8635 *outLevelDiff = stepsTakenUpwards;
8636 }
8637 return rootParser;
8638 }
8639
8640 #if XML_GE == 1
8641
8642 const char *
8643 unsignedCharToPrintable(unsigned char c) {
8644 switch (c) {
8645 case 0:
8646 return "\\0";
8647 case 1:
8648 return "\\x1";
8649 case 2:
8650 return "\\x2";
8651 case 3:
8652 return "\\x3";
8653 case 4:
8654 return "\\x4";
8655 case 5:
8656 return "\\x5";
8657 case 6:
8658 return "\\x6";
8659 case 7:
8660 return "\\x7";
8661 case 8:
8662 return "\\x8";
8663 case 9:
8664 return "\\t";
8665 case 10:
8666 return "\\n";
8667 case 11:
8668 return "\\xB";
8669 case 12:
8670 return "\\xC";
8671 case 13:
8672 return "\\r";
8673 case 14:
8674 return "\\xE";
8675 case 15:
8676 return "\\xF";
8677 case 16:
8678 return "\\x10";
8679 case 17:
8680 return "\\x11";
8681 case 18:
8682 return "\\x12";
8683 case 19:
8684 return "\\x13";
8685 case 20:
8686 return "\\x14";
8687 case 21:
8688 return "\\x15";
8689 case 22:
8690 return "\\x16";
8691 case 23:
8692 return "\\x17";
8693 case 24:
8694 return "\\x18";
8695 case 25:
8696 return "\\x19";
8697 case 26:
8698 return "\\x1A";
8699 case 27:
8700 return "\\x1B";
8701 case 28:
8702 return "\\x1C";
8703 case 29:
8704 return "\\x1D";
8705 case 30:
8706 return "\\x1E";
8707 case 31:
8708 return "\\x1F";
8709 case 32:
8710 return " ";
8711 case 33:
8712 return "!";
8713 case 34:
8714 return "\\\"";
8715 case 35:
8716 return "#";
8717 case 36:
8718 return "$";
8719 case 37:
8720 return "%";
8721 case 38:
8722 return "&";
8723 case 39:
8724 return "'";
8725 case 40:
8726 return "(";
8727 case 41:
8728 return ")";
8729 case 42:
8730 return "*";
8731 case 43:
8732 return "+";
8733 case 44:
8734 return ",";
8735 case 45:
8736 return "-";
8737 case 46:
8738 return ".";
8739 case 47:
8740 return "/";
8741 case 48:
8742 return "0";
8743 case 49:
8744 return "1";
8745 case 50:
8746 return "2";
8747 case 51:
8748 return "3";
8749 case 52:
8750 return "4";
8751 case 53:
8752 return "5";
8753 case 54:
8754 return "6";
8755 case 55:
8756 return "7";
8757 case 56:
8758 return "8";
8759 case 57:
8760 return "9";
8761 case 58:
8762 return ":";
8763 case 59:
8764 return ";";
8765 case 60:
8766 return "<";
8767 case 61:
8768 return "=";
8769 case 62:
8770 return ">";
8771 case 63:
8772 return "?";
8773 case 64:
8774 return "@";
8775 case 65:
8776 return "A";
8777 case 66:
8778 return "B";
8779 case 67:
8780 return "C";
8781 case 68:
8782 return "D";
8783 case 69:
8784 return "E";
8785 case 70:
8786 return "F";
8787 case 71:
8788 return "G";
8789 case 72:
8790 return "H";
8791 case 73:
8792 return "I";
8793 case 74:
8794 return "J";
8795 case 75:
8796 return "K";
8797 case 76:
8798 return "L";
8799 case 77:
8800 return "M";
8801 case 78:
8802 return "N";
8803 case 79:
8804 return "O";
8805 case 80:
8806 return "P";
8807 case 81:
8808 return "Q";
8809 case 82:
8810 return "R";
8811 case 83:
8812 return "S";
8813 case 84:
8814 return "T";
8815 case 85:
8816 return "U";
8817 case 86:
8818 return "V";
8819 case 87:
8820 return "W";
8821 case 88:
8822 return "X";
8823 case 89:
8824 return "Y";
8825 case 90:
8826 return "Z";
8827 case 91:
8828 return "[";
8829 case 92:
8830 return "\\\\";
8831 case 93:
8832 return "]";
8833 case 94:
8834 return "^";
8835 case 95:
8836 return "_";
8837 case 96:
8838 return "`";
8839 case 97:
8840 return "a";
8841 case 98:
8842 return "b";
8843 case 99:
8844 return "c";
8845 case 100:
8846 return "d";
8847 case 101:
8848 return "e";
8849 case 102:
8850 return "f";
8851 case 103:
8852 return "g";
8853 case 104:
8854 return "h";
8855 case 105:
8856 return "i";
8857 case 106:
8858 return "j";
8859 case 107:
8860 return "k";
8861 case 108:
8862 return "l";
8863 case 109:
8864 return "m";
8865 case 110:
8866 return "n";
8867 case 111:
8868 return "o";
8869 case 112:
8870 return "p";
8871 case 113:
8872 return "q";
8873 case 114:
8874 return "r";
8875 case 115:
8876 return "s";
8877 case 116:
8878 return "t";
8879 case 117:
8880 return "u";
8881 case 118:
8882 return "v";
8883 case 119:
8884 return "w";
8885 case 120:
8886 return "x";
8887 case 121:
8888 return "y";
8889 case 122:
8890 return "z";
8891 case 123:
8892 return "{";
8893 case 124:
8894 return "|";
8895 case 125:
8896 return "}";
8897 case 126:
8898 return "~";
8899 case 127:
8900 return "\\x7F";
8901 case 128:
8902 return "\\x80";
8903 case 129:
8904 return "\\x81";
8905 case 130:
8906 return "\\x82";
8907 case 131:
8908 return "\\x83";
8909 case 132:
8910 return "\\x84";
8911 case 133:
8912 return "\\x85";
8913 case 134:
8914 return "\\x86";
8915 case 135:
8916 return "\\x87";
8917 case 136:
8918 return "\\x88";
8919 case 137:
8920 return "\\x89";
8921 case 138:
8922 return "\\x8A";
8923 case 139:
8924 return "\\x8B";
8925 case 140:
8926 return "\\x8C";
8927 case 141:
8928 return "\\x8D";
8929 case 142:
8930 return "\\x8E";
8931 case 143:
8932 return "\\x8F";
8933 case 144:
8934 return "\\x90";
8935 case 145:
8936 return "\\x91";
8937 case 146:
8938 return "\\x92";
8939 case 147:
8940 return "\\x93";
8941 case 148:
8942 return "\\x94";
8943 case 149:
8944 return "\\x95";
8945 case 150:
8946 return "\\x96";
8947 case 151:
8948 return "\\x97";
8949 case 152:
8950 return "\\x98";
8951 case 153:
8952 return "\\x99";
8953 case 154:
8954 return "\\x9A";
8955 case 155:
8956 return "\\x9B";
8957 case 156:
8958 return "\\x9C";
8959 case 157:
8960 return "\\x9D";
8961 case 158:
8962 return "\\x9E";
8963 case 159:
8964 return "\\x9F";
8965 case 160:
8966 return "\\xA0";
8967 case 161:
8968 return "\\xA1";
8969 case 162:
8970 return "\\xA2";
8971 case 163:
8972 return "\\xA3";
8973 case 164:
8974 return "\\xA4";
8975 case 165:
8976 return "\\xA5";
8977 case 166:
8978 return "\\xA6";
8979 case 167:
8980 return "\\xA7";
8981 case 168:
8982 return "\\xA8";
8983 case 169:
8984 return "\\xA9";
8985 case 170:
8986 return "\\xAA";
8987 case 171:
8988 return "\\xAB";
8989 case 172:
8990 return "\\xAC";
8991 case 173:
8992 return "\\xAD";
8993 case 174:
8994 return "\\xAE";
8995 case 175:
8996 return "\\xAF";
8997 case 176:
8998 return "\\xB0";
8999 case 177:
9000 return "\\xB1";
9001 case 178:
9002 return "\\xB2";
9003 case 179:
9004 return "\\xB3";
9005 case 180:
9006 return "\\xB4";
9007 case 181:
9008 return "\\xB5";
9009 case 182:
9010 return "\\xB6";
9011 case 183:
9012 return "\\xB7";
9013 case 184:
9014 return "\\xB8";
9015 case 185:
9016 return "\\xB9";
9017 case 186:
9018 return "\\xBA";
9019 case 187:
9020 return "\\xBB";
9021 case 188:
9022 return "\\xBC";
9023 case 189:
9024 return "\\xBD";
9025 case 190:
9026 return "\\xBE";
9027 case 191:
9028 return "\\xBF";
9029 case 192:
9030 return "\\xC0";
9031 case 193:
9032 return "\\xC1";
9033 case 194:
9034 return "\\xC2";
9035 case 195:
9036 return "\\xC3";
9037 case 196:
9038 return "\\xC4";
9039 case 197:
9040 return "\\xC5";
9041 case 198:
9042 return "\\xC6";
9043 case 199:
9044 return "\\xC7";
9045 case 200:
9046 return "\\xC8";
9047 case 201:
9048 return "\\xC9";
9049 case 202:
9050 return "\\xCA";
9051 case 203:
9052 return "\\xCB";
9053 case 204:
9054 return "\\xCC";
9055 case 205:
9056 return "\\xCD";
9057 case 206:
9058 return "\\xCE";
9059 case 207:
9060 return "\\xCF";
9061 case 208:
9062 return "\\xD0";
9063 case 209:
9064 return "\\xD1";
9065 case 210:
9066 return "\\xD2";
9067 case 211:
9068 return "\\xD3";
9069 case 212:
9070 return "\\xD4";
9071 case 213:
9072 return "\\xD5";
9073 case 214:
9074 return "\\xD6";
9075 case 215:
9076 return "\\xD7";
9077 case 216:
9078 return "\\xD8";
9079 case 217:
9080 return "\\xD9";
9081 case 218:
9082 return "\\xDA";
9083 case 219:
9084 return "\\xDB";
9085 case 220:
9086 return "\\xDC";
9087 case 221:
9088 return "\\xDD";
9089 case 222:
9090 return "\\xDE";
9091 case 223:
9092 return "\\xDF";
9093 case 224:
9094 return "\\xE0";
9095 case 225:
9096 return "\\xE1";
9097 case 226:
9098 return "\\xE2";
9099 case 227:
9100 return "\\xE3";
9101 case 228:
9102 return "\\xE4";
9103 case 229:
9104 return "\\xE5";
9105 case 230:
9106 return "\\xE6";
9107 case 231:
9108 return "\\xE7";
9109 case 232:
9110 return "\\xE8";
9111 case 233:
9112 return "\\xE9";
9113 case 234:
9114 return "\\xEA";
9115 case 235:
9116 return "\\xEB";
9117 case 236:
9118 return "\\xEC";
9119 case 237:
9120 return "\\xED";
9121 case 238:
9122 return "\\xEE";
9123 case 239:
9124 return "\\xEF";
9125 case 240:
9126 return "\\xF0";
9127 case 241:
9128 return "\\xF1";
9129 case 242:
9130 return "\\xF2";
9131 case 243:
9132 return "\\xF3";
9133 case 244:
9134 return "\\xF4";
9135 case 245:
9136 return "\\xF5";
9137 case 246:
9138 return "\\xF6";
9139 case 247:
9140 return "\\xF7";
9141 case 248:
9142 return "\\xF8";
9143 case 249:
9144 return "\\xF9";
9145 case 250:
9146 return "\\xFA";
9147 case 251:
9148 return "\\xFB";
9149 case 252:
9150 return "\\xFC";
9151 case 253:
9152 return "\\xFD";
9153 case 254:
9154 return "\\xFE";
9155 case 255:
9156 return "\\xFF";
9157 // LCOV_EXCL_START
9158 default:
9159 assert(0); /* never gets here */
9160 return "dead code";
9161 }
9162 assert(0); /* never gets here */
9163 // LCOV_EXCL_STOP
9164 }
9165
9166 #endif /* XML_GE == 1 */
9167
9168 static unsigned long
9169 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
9170 const char *const valueOrNull = getenv(variableName);
9171 if (valueOrNull == NULL) {
9172 return defaultDebugLevel;
9173 }
9174 const char *const value = valueOrNull;
9175
9176 errno = 0;
9177 char *afterValue = NULL;
9178 unsigned long debugLevel = strtoul(value, &afterValue, 10);
9179 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
9180 errno = 0;
9181 return defaultDebugLevel;
9182 }
9183
9184 return debugLevel;
9185 }
9186