1 /* 75ef4224f81c052e9e5aeea2ac7de75357d2169ff9908e39edc08b9dc3052513 (2.8.1+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
44 Copyright (c) 2025-2026 Matthew Fernandez <matthew.fernandez@gmail.com>
45 Copyright (c) 2025 Atrem Borovik <polzovatellllk@gmail.com>
46 Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com>
47 Copyright (c) 2026 Rosen Penev <rosenp@gmail.com>
48 Copyright (c) 2026 Francesco Bertolaccini
49 Copyright (c) 2026 Christian Ng <christianrng@berkeley.edu>
50 Licensed under the MIT license:
51
52 Permission is hereby granted, free of charge, to any person obtaining
53 a copy of this software and associated documentation files (the
54 "Software"), to deal in the Software without restriction, including
55 without limitation the rights to use, copy, modify, merge, publish,
56 distribute, sublicense, and/or sell copies of the Software, and to permit
57 persons to whom the Software is furnished to do so, subject to the
58 following conditions:
59
60 The above copyright notice and this permission notice shall be included
61 in all copies or substantial portions of the Software.
62
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
64 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
66 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
67 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
68 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
69 USE OR OTHER DEALINGS IN THE SOFTWARE.
70 */
71
72 #define XML_BUILDING_EXPAT 1
73
74 #include "expat_config.h"
75
76 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
77 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
78 #endif
79
80 #if defined(XML_DTD) && XML_GE == 0
81 # error Either undefine XML_DTD or define XML_GE to 1.
82 #endif
83
84 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
85 || (XML_CONTEXT_BYTES + 0 < 0)
86 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
87 #endif
88
89 #include <stdbool.h>
90 #include <stddef.h>
91 #include <string.h> /* memset(), memcpy() */
92 #include <assert.h>
93 #include <limits.h> /* INT_MAX, UINT_MAX */
94 #include <stdio.h> /* fprintf */
95 #include <stdlib.h> /* getenv */
96 #include <stdint.h> /* SIZE_MAX, uintptr_t */
97 #include <math.h> /* isnan */
98 #include <errno.h>
99
100 #ifdef _WIN32
101 # define getpid GetCurrentProcessId
102 #else
103 # include <sys/time.h> /* gettimeofday() */
104 # include <sys/types.h> /* getpid() */
105 # include <unistd.h> /* getpid() */
106 # include <fcntl.h> /* O_RDONLY */
107 # include <errno.h>
108 #endif
109
110 #ifdef _WIN32
111 # include "winconfig.h"
112 #endif
113
114 #include "ascii.h"
115 #include "expat.h"
116 #include "siphash.h"
117
118 #if defined(HAVE_ARC4RANDOM)
119 # include "random_arc4random.h"
120 #endif /* defined(HAVE_ARC4RANDOM) */
121
122 #if defined(HAVE_ARC4RANDOM_BUF)
123 # include "random_arc4random_buf.h"
124 #endif // defined(HAVE_ARC4RANDOM_BUF)
125
126 #if defined(XML_DEV_URANDOM)
127 # include "random_dev_urandom.h"
128 #endif /* defined(XML_DEV_URANDOM) */
129
130 #if defined(HAVE_GETENTROPY)
131 # include "random_getentropy.h"
132 #endif // defined(HAVE_GETENTROPY)
133
134 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
135 # include "random_getrandom.h"
136 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
137
138 #if defined(_WIN32)
139 # include "random_rand_s.h"
140 #endif /* defined(_WIN32) */
141
142 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
143 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
144 && ! defined(HAVE_GETENTROPY) && ! defined(XML_DEV_URANDOM) \
145 && ! defined(_WIN32) && ! defined(XML_POOR_ENTROPY)
146 # error You do not have support for any sources of high quality entropy \
147 enabled. For end user security, that is probably not what you want. \
148 \
149 Your options include: \
150 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
151 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
152 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
153 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
154 * BSD / macOS >=10.12 / glibc >=2.25 (getentropy): HAVE_GETENTROPY, \
155 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
156 * Windows >=Vista (rand_s): _WIN32. \
157 \
158 If you insist on not using any of these, bypass this error by defining \
159 XML_POOR_ENTROPY; you have been warned. \
160 \
161 If you have reasons to patch this detection code away or need changes \
162 to the build system, please open a bug. Thank you!
163 #endif
164
165 #ifdef XML_UNICODE
166 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
167 # define XmlConvert XmlUtf16Convert
168 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
169 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
170 # define XmlEncode XmlUtf16Encode
171 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
172 typedef unsigned short ICHAR;
173 #else
174 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
175 # define XmlConvert XmlUtf8Convert
176 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
177 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
178 # define XmlEncode XmlUtf8Encode
179 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
180 typedef char ICHAR;
181 #endif
182
183 #ifndef XML_NS
184
185 # define XmlInitEncodingNS XmlInitEncoding
186 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
187 # undef XmlGetInternalEncodingNS
188 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
189 # define XmlParseXmlDeclNS XmlParseXmlDecl
190
191 #endif
192
193 #ifdef XML_UNICODE
194
195 # ifdef XML_UNICODE_WCHAR_T
196 # define XML_T(x) (const wchar_t) x
197 # define XML_L(x) L##x
198 # else
199 # define XML_T(x) (const unsigned short)x
200 # define XML_L(x) x
201 # endif
202
203 #else
204
205 # define XML_T(x) x
206 # define XML_L(x) x
207
208 #endif
209
210 /* Round up n to be a multiple of sz, where sz is a power of 2. */
211 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
212
213 /* Do safe (NULL-aware) pointer arithmetic */
214 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
215
216 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
217
218 #include "internal.h"
219 #include "xmltok.h"
220 #include "xmlrole.h"
221
222 typedef const XML_Char *KEY;
223
224 typedef struct {
225 KEY name;
226 } NAMED;
227
228 typedef struct {
229 NAMED **v;
230 unsigned char power;
231 size_t size;
232 size_t used;
233 XML_Parser parser;
234 } HASH_TABLE;
235
236 static size_t keylen(KEY s);
237
238 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
239
240 /* For probing (after a collision) we need a step size relative prime
241 to the hash table size, which is a power of 2. We use double-hashing,
242 since we can calculate a second hash value cheaply by taking those bits
243 of the first hash value that were discarded (masked out) when the table
244 index was calculated: index = hash & mask, where mask = table->size - 1.
245 We limit the maximum step size to table->size / 4 (mask >> 2) and make
246 it odd, since odd numbers are always relative prime to a power of 2.
247 */
248 #define SECOND_HASH(hash, mask, power) \
249 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
250 #define PROBE_STEP(hash, mask, power) \
251 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
252
253 typedef struct {
254 NAMED **p;
255 NAMED **end;
256 } HASH_TABLE_ITER;
257
258 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
259 #define INIT_DATA_BUF_SIZE 1024
260 #define INIT_ATTS_SIZE 16
261 #define INIT_ATTS_VERSION 0xFFFFFFFF
262 #define INIT_BLOCK_SIZE 1024
263 #define INIT_BUFFER_SIZE 1024
264
265 #define EXPAND_SPARE 24
266
267 typedef struct binding {
268 struct prefix *prefix;
269 struct binding *nextTagBinding;
270 struct binding *prevPrefixBinding;
271 const struct attribute_id *attId;
272 XML_Char *uri;
273 int uriLen;
274 int uriAlloc;
275 } BINDING;
276
277 typedef struct prefix {
278 const XML_Char *name;
279 BINDING *binding;
280 } PREFIX;
281
282 typedef struct {
283 const XML_Char *str;
284 const XML_Char *localPart;
285 const XML_Char *prefix;
286 int strLen;
287 int uriLen;
288 int prefixLen;
289 } TAG_NAME;
290
291 /* TAG represents an open element.
292 The name of the element is stored in both the document and API
293 encodings. The memory buffer 'buf' is a separately-allocated
294 memory area which stores the name. During the XML_Parse()/
295 XML_ParseBuffer() when the element is open, the memory for the 'raw'
296 version of the name (in the document encoding) is shared with the
297 document buffer. If the element is open across calls to
298 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
299 contain the 'raw' name as well.
300
301 A parser reuses these structures, maintaining a list of allocated
302 TAG objects in a free list.
303 */
304 typedef struct tag {
305 struct tag *parent; /* parent of this element */
306 const char *rawName; /* tagName in the original encoding */
307 int rawNameLength;
308 TAG_NAME name; /* tagName in the API encoding */
309 union {
310 char *raw; /* for byte-level access (rawName storage) */
311 XML_Char *str; /* for character-level access (converted name) */
312 } buf; /* buffer for name components */
313 char *bufEnd; /* end of the buffer */
314 BINDING *bindings;
315 } TAG;
316
317 typedef struct {
318 const XML_Char *name;
319 const XML_Char *textPtr;
320 int textLen; /* length in XML_Chars */
321 int processed; /* # of processed bytes - when suspended */
322 const XML_Char *systemId;
323 const XML_Char *base;
324 const XML_Char *publicId;
325 const XML_Char *notation;
326 XML_Bool open;
327 XML_Bool hasMore; /* true if entity has not been completely processed */
328 /* An entity can be open while being already completely processed (hasMore ==
329 XML_FALSE). The reason is the delayed closing of entities until their inner
330 entities are processed and closed */
331 XML_Bool is_param;
332 XML_Bool is_internal; /* true if declared in internal subset outside PE */
333 } ENTITY;
334
335 typedef struct {
336 enum XML_Content_Type type;
337 enum XML_Content_Quant quant;
338 const XML_Char *name;
339 int firstchild;
340 int lastchild;
341 int childcnt;
342 int nextsib;
343 } CONTENT_SCAFFOLD;
344
345 #define INIT_SCAFFOLD_ELEMENTS 32
346
347 typedef struct block {
348 struct block *next;
349 int size;
350 XML_Char s[];
351 } BLOCK;
352
353 typedef struct {
354 BLOCK *blocks;
355 BLOCK *freeBlocks;
356 const XML_Char *end;
357 XML_Char *ptr;
358 XML_Char *start;
359 XML_Parser parser;
360 } STRING_POOL;
361
362 /* The XML_Char before the name is used to determine whether
363 an attribute has been specified. */
364 typedef struct attribute_id {
365 XML_Char *name;
366 PREFIX *prefix;
367 XML_Bool maybeTokenized;
368 XML_Bool xmlns;
369 } ATTRIBUTE_ID;
370
371 typedef struct {
372 const ATTRIBUTE_ID *id;
373 XML_Bool isCdata;
374 const XML_Char *value;
375 } DEFAULT_ATTRIBUTE;
376
377 typedef struct {
378 unsigned long version;
379 unsigned long hash;
380 const XML_Char *uriName;
381 } NS_ATT;
382
383 typedef struct {
384 const XML_Char *name;
385 PREFIX *prefix;
386 const ATTRIBUTE_ID *idAtt;
387 int nDefaultAtts;
388 int allocDefaultAtts;
389 DEFAULT_ATTRIBUTE *defaultAtts;
390 HASH_TABLE defaultAttsNames;
391 } ELEMENT_TYPE;
392
393 typedef struct {
394 HASH_TABLE generalEntities;
395 HASH_TABLE elementTypes;
396 HASH_TABLE attributeIds;
397 HASH_TABLE prefixes;
398 STRING_POOL pool;
399 STRING_POOL entityValuePool;
400 /* false once a parameter entity reference has been skipped */
401 XML_Bool keepProcessing;
402 /* true once an internal or external PE reference has been encountered;
403 this includes the reference to an external subset */
404 XML_Bool hasParamEntityRefs;
405 XML_Bool standalone;
406 #ifdef XML_DTD
407 /* indicates if external PE has been read */
408 XML_Bool paramEntityRead;
409 HASH_TABLE paramEntities;
410 #endif /* XML_DTD */
411 PREFIX defaultPrefix;
412 /* === scaffolding for building content model === */
413 XML_Bool in_eldecl;
414 CONTENT_SCAFFOLD *scaffold;
415 unsigned contentStringLen;
416 unsigned scaffSize;
417 unsigned scaffCount;
418 int scaffLevel;
419 int *scaffIndex;
420 } DTD;
421
422 enum EntityType {
423 ENTITY_INTERNAL,
424 ENTITY_ATTRIBUTE,
425 ENTITY_VALUE,
426 };
427
428 typedef struct open_internal_entity {
429 const char *internalEventPtr;
430 const char *internalEventEndPtr;
431 struct open_internal_entity *next;
432 ENTITY *entity;
433 int startTagLevel;
434 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
435 enum EntityType type;
436 } OPEN_INTERNAL_ENTITY;
437
438 enum XML_Account {
439 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
440 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
441 expansion */
442 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
443 };
444
445 #if XML_GE == 1
446 typedef unsigned long long XmlBigCount;
447 typedef struct accounting {
448 XmlBigCount countBytesDirect;
449 XmlBigCount countBytesIndirect;
450 unsigned long debugLevel;
451 float maximumAmplificationFactor; // >=1.0
452 unsigned long long activationThresholdBytes;
453 } ACCOUNTING;
454
455 typedef struct MALLOC_TRACKER {
456 XmlBigCount bytesAllocated;
457 XmlBigCount peakBytesAllocated; // updated live only for debug level >=2
458 unsigned long debugLevel;
459 float maximumAmplificationFactor; // >=1.0
460 XmlBigCount activationThresholdBytes;
461 } MALLOC_TRACKER;
462
463 typedef struct entity_stats {
464 unsigned int countEverOpened;
465 unsigned int currentDepth;
466 unsigned int maximumDepthSeen;
467 unsigned long debugLevel;
468 } ENTITY_STATS;
469 #endif /* XML_GE == 1 */
470
471 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
472 const char *end, const char **endPtr);
473
474 static Processor prologProcessor;
475 static Processor prologInitProcessor;
476 static Processor contentProcessor;
477 static Processor cdataSectionProcessor;
478 #ifdef XML_DTD
479 static Processor ignoreSectionProcessor;
480 static Processor externalParEntProcessor;
481 static Processor externalParEntInitProcessor;
482 static Processor entityValueProcessor;
483 static Processor entityValueInitProcessor;
484 #endif /* XML_DTD */
485 static Processor epilogProcessor;
486 static Processor errorProcessor;
487 static Processor externalEntityInitProcessor;
488 static Processor externalEntityInitProcessor2;
489 static Processor externalEntityInitProcessor3;
490 static Processor externalEntityContentProcessor;
491 static Processor internalEntityProcessor;
492
493 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
494 const XML_Char *encodingName);
495 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
496 const char *s, const char *next);
497 static enum XML_Error initializeEncoding(XML_Parser parser);
498 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
499 const char *s, const char *end, int tok,
500 const char *next, const char **nextPtr,
501 XML_Bool haveMore, XML_Bool allowClosingDoctype,
502 enum XML_Account account);
503 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
504 XML_Bool betweenDecl, enum EntityType type);
505 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
506 const ENCODING *enc, const char *start,
507 const char *end, const char **endPtr,
508 XML_Bool haveMore, enum XML_Account account);
509 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
510 const char **startPtr, const char *end,
511 const char **nextPtr, XML_Bool haveMore,
512 enum XML_Account account);
513 #ifdef XML_DTD
514 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
515 const char **startPtr, const char *end,
516 const char **nextPtr, XML_Bool haveMore);
517 #endif /* XML_DTD */
518
519 static void freeBindings(XML_Parser parser, BINDING *bindings);
520 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
521 const char *attStr, TAG_NAME *tagNamePtr,
522 BINDING **bindingsPtr,
523 enum XML_Account account);
524 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
525 const ATTRIBUTE_ID *attId, const XML_Char *uri,
526 BINDING **bindingsPtr);
527 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
528 XML_Bool isCdata, XML_Bool isId,
529 const XML_Char *value, XML_Parser parser);
530 static enum XML_Error storeAttributeValue(XML_Parser parser,
531 const ENCODING *enc, XML_Bool isCdata,
532 const char *ptr, const char *end,
533 STRING_POOL *pool,
534 enum XML_Account account);
535 static enum XML_Error
536 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
537 const char *ptr, const char *end, STRING_POOL *pool,
538 enum XML_Account account, const char **nextPtr);
539 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
540 const char *start, const char *end);
541 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
542 #if XML_GE == 1
543 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
544 const char *start, const char *end,
545 enum XML_Account account,
546 const char **nextPtr);
547 static enum XML_Error callStoreEntityValue(XML_Parser parser,
548 const ENCODING *enc,
549 const char *start, const char *end,
550 enum XML_Account account);
551 #else
552 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
553 #endif
554 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
555 const char *start, const char *end);
556 static int reportComment(XML_Parser parser, const ENCODING *enc,
557 const char *start, const char *end);
558 static void reportDefault(XML_Parser parser, const ENCODING *enc,
559 const char *start, const char *end);
560
561 static const XML_Char *getContext(XML_Parser parser);
562 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
563
564 static void FASTCALL normalizePublicId(XML_Char *s);
565
566 static DTD *dtdCreate(XML_Parser parser);
567 /* do not call if m_parentParser != NULL */
568 static void dtdReset(DTD *p, XML_Parser parser);
569 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser);
570 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
571 XML_Parser parser);
572 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
573 STRING_POOL *newPool, const HASH_TABLE *oldTable);
574 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
575 size_t createSize);
576 static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser);
577 static void FASTCALL hashTableClear(HASH_TABLE *table);
578 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
579 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
580 const HASH_TABLE *table);
581 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
582
583 static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser);
584 static void FASTCALL poolClear(STRING_POOL *pool);
585 static void FASTCALL poolDestroy(STRING_POOL *pool);
586 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
587 const char *ptr, const char *end);
588 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
589 const char *ptr, const char *end);
590 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
591 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
592 const XML_Char *s);
593 static const XML_Char *FASTCALL poolCopyStringNoFinish(STRING_POOL *pool,
594 const XML_Char *s);
595 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
596 int n);
597 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
598 const XML_Char *s);
599
600 static int FASTCALL nextScaffoldPart(XML_Parser parser);
601 static XML_Content *build_model(XML_Parser parser);
602 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
603 const char *ptr, const char *end);
604
605 static XML_Char *copyString(const XML_Char *s, XML_Parser parser);
606
607 static struct sipkey generate_hash_secret_salt(void);
608 static XML_Bool startParsing(XML_Parser parser);
609
610 static XML_Parser parserCreate(const XML_Char *encodingName,
611 const XML_Memory_Handling_Suite *memsuite,
612 const XML_Char *nameSep, DTD *dtd,
613 XML_Parser parentParser);
614
615 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
616
617 #if XML_GE == 1
618 static float accountingGetCurrentAmplification(XML_Parser rootParser);
619 static void accountingReportStats(XML_Parser originParser, const char *epilog);
620 static void accountingOnAbort(XML_Parser originParser);
621 static void accountingReportDiff(XML_Parser rootParser,
622 unsigned int levelsAwayFromRootParser,
623 const char *before, const char *after,
624 ptrdiff_t bytesMore, int source_line,
625 enum XML_Account account);
626 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
627 const char *before, const char *after,
628 int source_line,
629 enum XML_Account account);
630
631 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
632 const char *action, int sourceLine);
633 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
634 int sourceLine);
635 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
636 int sourceLine);
637 #endif /* XML_GE == 1 */
638
639 static XML_Parser getRootParserOf(XML_Parser parser,
640 unsigned int *outLevelDiff);
641
642 static unsigned long getDebugLevel(const char *variableName,
643 unsigned long defaultDebugLevel);
644
645 #define poolStart(pool) ((pool)->start)
646 #define poolLength(pool) ((pool)->ptr - (pool)->start)
647 #define poolChop(pool) ((void)--(pool->ptr))
648 #define poolLastChar(pool) (((pool)->ptr)[-1])
649 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
650 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
651 #define poolAppendChar(pool, c) \
652 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
653 ? 0 \
654 : ((*((pool)->ptr)++ = c), 1))
655
656 #if ! defined(XML_TESTING)
657 const
658 #endif
659 XML_Bool g_reparseDeferralEnabledDefault
660 = XML_TRUE; // write ONLY in runtests.c
661 #if defined(XML_TESTING)
662 unsigned int g_bytesScanned = 0; // used for testing only
663 #endif
664
665 struct XML_ParserStruct {
666 /* The first member must be m_userData so that the XML_GetUserData
667 macro works. */
668 void *m_userData;
669 void *m_handlerArg;
670
671 // How the four parse buffer pointers below relate in time and space:
672 //
673 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
674 // | | | |
675 // <--parsed-->| | |
676 // <---parsing--->| |
677 // <--unoccupied-->|
678 // <---------total-malloced/realloced-------->|
679
680 char *m_buffer; // malloc/realloc base pointer of parse buffer
681 const XML_Memory_Handling_Suite m_mem;
682 const char *m_bufferPtr; // first character to be parsed
683 char *m_bufferEnd; // past last character to be parsed
684 const char *m_bufferLim; // allocated end of m_buffer
685
686 XML_Index m_parseEndByteIndex;
687 const char *m_parseEndPtr;
688 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
689 XML_Bool m_reparseDeferralEnabled;
690 int m_lastBufferRequestSize;
691 XML_Char *m_dataBuf;
692 XML_Char *m_dataBufEnd;
693 XML_StartElementHandler m_startElementHandler;
694 XML_EndElementHandler m_endElementHandler;
695 XML_CharacterDataHandler m_characterDataHandler;
696 XML_ProcessingInstructionHandler m_processingInstructionHandler;
697 XML_CommentHandler m_commentHandler;
698 XML_StartCdataSectionHandler m_startCdataSectionHandler;
699 XML_EndCdataSectionHandler m_endCdataSectionHandler;
700 XML_DefaultHandler m_defaultHandler;
701 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
702 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
703 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
704 XML_NotationDeclHandler m_notationDeclHandler;
705 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
706 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
707 XML_NotStandaloneHandler m_notStandaloneHandler;
708 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
709 XML_Parser m_externalEntityRefHandlerArg;
710 XML_SkippedEntityHandler m_skippedEntityHandler;
711 XML_UnknownEncodingHandler m_unknownEncodingHandler;
712 XML_ElementDeclHandler m_elementDeclHandler;
713 XML_AttlistDeclHandler m_attlistDeclHandler;
714 XML_EntityDeclHandler m_entityDeclHandler;
715 XML_XmlDeclHandler m_xmlDeclHandler;
716 const ENCODING *m_encoding;
717 INIT_ENCODING m_initEncoding;
718 const ENCODING *m_internalEncoding;
719 const XML_Char *m_protocolEncodingName;
720 XML_Bool m_ns;
721 XML_Bool m_ns_triplets;
722 void *m_unknownEncodingMem;
723 void *m_unknownEncodingData;
724 void *m_unknownEncodingHandlerData;
725 void(XMLCALL *m_unknownEncodingRelease)(void *);
726 PROLOG_STATE m_prologState;
727 Processor *m_processor;
728 enum XML_Error m_errorCode;
729 const char *m_eventPtr;
730 const char *m_eventEndPtr;
731 const char *m_positionPtr;
732 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
733 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
734 OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
735 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
736 OPEN_INTERNAL_ENTITY *m_openValueEntities;
737 OPEN_INTERNAL_ENTITY *m_freeValueEntities;
738 XML_Bool m_defaultExpandInternalEntities;
739 int m_tagLevel;
740 ENTITY *m_declEntity;
741 const XML_Char *m_doctypeName;
742 const XML_Char *m_doctypeSysid;
743 const XML_Char *m_doctypePubid;
744 const XML_Char *m_declAttributeType;
745 const XML_Char *m_declNotationName;
746 const XML_Char *m_declNotationPublicId;
747 ELEMENT_TYPE *m_declElementType;
748 ATTRIBUTE_ID *m_declAttributeId;
749 XML_Bool m_declAttributeIsCdata;
750 XML_Bool m_declAttributeIsId;
751 DTD *m_dtd;
752 const XML_Char *m_curBase;
753 TAG *m_tagStack;
754 TAG *m_freeTagList;
755 BINDING *m_inheritedBindings;
756 BINDING *m_freeBindingList;
757 int m_attsSize;
758 int m_nSpecifiedAtts;
759 int m_idAttIndex;
760 ATTRIBUTE *m_atts;
761 NS_ATT *m_nsAtts;
762 unsigned long m_nsAttsVersion;
763 unsigned char m_nsAttsPower;
764 #ifdef XML_ATTR_INFO
765 XML_AttrInfo *m_attInfo;
766 #endif
767 POSITION m_position;
768 STRING_POOL m_tempPool;
769 STRING_POOL m_temp2Pool;
770 char *m_groupConnector;
771 unsigned int m_groupSize;
772 XML_Char m_namespaceSeparator;
773 XML_Parser m_parentParser;
774 XML_ParsingStatus m_parsingStatus;
775 #ifdef XML_DTD
776 XML_Bool m_isParamEntity;
777 XML_Bool m_useForeignDTD;
778 enum XML_ParamEntityParsing m_paramEntityParsing;
779 #endif
780 struct sipkey m_hash_secret_salt_128;
781 XML_Bool m_hash_secret_salt_set;
782 #if XML_GE == 1
783 ACCOUNTING m_accounting;
784 MALLOC_TRACKER m_alloc_tracker;
785 ENTITY_STATS m_entity_stats;
786 #endif
787 XML_Bool m_reenter;
788 };
789
790 #if XML_GE == 1
791 # define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__))
792 # define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__))
793 # define FREE(parser, p) (expat_free((parser), (p), __LINE__))
794 #else
795 # define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
796 # define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
797 # define FREE(parser, p) (parser->m_mem.free_fcn((p)))
798 #endif
799
800 #if XML_GE == 1
801 static void
expat_heap_stat(XML_Parser rootParser,char operator,XmlBigCount absDiff,XmlBigCount newTotal,XmlBigCount peakTotal,int sourceLine)802 expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff,
803 XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) {
804 // NOTE: This can be +infinity or -nan
805 const float amplification
806 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
807 fprintf(
808 stderr,
809 "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL(
810 "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n",
811 (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator,
812 absDiff, newTotal, peakTotal, (double)amplification, sourceLine);
813 }
814
815 static bool
expat_heap_increase_tolerable(XML_Parser rootParser,XmlBigCount increase,int sourceLine)816 expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase,
817 int sourceLine) {
818 assert(rootParser != NULL);
819 assert(increase > 0);
820
821 XmlBigCount newTotal = 0;
822 bool tolerable = true;
823
824 // Detect integer overflow
825 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) {
826 tolerable = false;
827 } else {
828 newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase;
829
830 if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) {
831 assert(newTotal > 0);
832 // NOTE: This can be +infinity when dividing by zero but not -nan
833 const float amplification
834 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
835 if (amplification
836 > rootParser->m_alloc_tracker.maximumAmplificationFactor) {
837 tolerable = false;
838 }
839 }
840 }
841
842 if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) {
843 expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine);
844 }
845
846 return tolerable;
847 }
848
849 # if defined(XML_TESTING)
850 void *
851 # else
852 static void *
853 # endif
expat_malloc(XML_Parser parser,size_t size,int sourceLine)854 expat_malloc(XML_Parser parser, size_t size, int sourceLine) {
855 // Detect integer overflow
856 if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) {
857 return NULL;
858 }
859
860 const XML_Parser rootParser = getRootParserOf(parser, NULL);
861 assert(rootParser->m_parentParser == NULL);
862
863 const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size;
864
865 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
866 < bytesToAllocate) {
867 return NULL; // i.e. signal integer overflow as out-of-memory
868 }
869
870 if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate,
871 sourceLine)) {
872 return NULL; // i.e. signal violation as out-of-memory
873 }
874
875 // Actually allocate
876 void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate);
877
878 if (mallocedPtr == NULL) {
879 return NULL;
880 }
881
882 // Update in-block recorded size
883 *(size_t *)mallocedPtr = size;
884
885 // Update accounting
886 rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate;
887
888 // Report as needed
889 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
890 if (rootParser->m_alloc_tracker.bytesAllocated
891 > rootParser->m_alloc_tracker.peakBytesAllocated) {
892 rootParser->m_alloc_tracker.peakBytesAllocated
893 = rootParser->m_alloc_tracker.bytesAllocated;
894 }
895 expat_heap_stat(rootParser, '+', bytesToAllocate,
896 rootParser->m_alloc_tracker.bytesAllocated,
897 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
898 }
899
900 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
901 }
902
903 # if defined(XML_TESTING)
904 void
905 # else
906 static void
907 # endif
expat_free(XML_Parser parser,void * ptr,int sourceLine)908 expat_free(XML_Parser parser, void *ptr, int sourceLine) {
909 assert(parser != NULL);
910
911 if (ptr == NULL) {
912 return;
913 }
914
915 const XML_Parser rootParser = getRootParserOf(parser, NULL);
916 assert(rootParser->m_parentParser == NULL);
917
918 // Extract size (to the eyes of malloc_fcn/realloc_fcn) and
919 // the original pointer returned by malloc/realloc
920 void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
921 const size_t bytesAllocated
922 = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr;
923
924 // Update accounting
925 assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated);
926 rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated;
927
928 // Report as needed
929 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
930 expat_heap_stat(rootParser, '-', bytesAllocated,
931 rootParser->m_alloc_tracker.bytesAllocated,
932 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
933 }
934
935 // NOTE: This may be freeing rootParser, so freeing has to come last
936 parser->m_mem.free_fcn(mallocedPtr);
937 }
938
939 # if defined(XML_TESTING)
940 void *
941 # else
942 static void *
943 # endif
expat_realloc(XML_Parser parser,void * ptr,size_t size,int sourceLine)944 expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) {
945 assert(parser != NULL);
946
947 if (ptr == NULL) {
948 return expat_malloc(parser, size, sourceLine);
949 }
950
951 if (size == 0) {
952 expat_free(parser, ptr, sourceLine);
953 return NULL;
954 }
955
956 const XML_Parser rootParser = getRootParserOf(parser, NULL);
957 assert(rootParser->m_parentParser == NULL);
958
959 // Extract original size (to the eyes of the caller) and the original
960 // pointer returned by malloc/realloc
961 void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
962 const size_t prevSize = *(size_t *)mallocedPtr;
963
964 // Classify upcoming change
965 const bool isIncrease = (size > prevSize);
966 const size_t absDiff
967 = (size > prevSize) ? (size - prevSize) : (prevSize - size);
968
969 // Ask for permission from accounting
970 if (isIncrease) {
971 if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) {
972 return NULL; // i.e. signal violation as out-of-memory
973 }
974 }
975
976 // NOTE: Integer overflow detection has already been done for us
977 // by expat_heap_increase_tolerable(..) above
978 assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size);
979
980 // Actually allocate
981 mallocedPtr = parser->m_mem.realloc_fcn(
982 mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size);
983
984 if (mallocedPtr == NULL) {
985 return NULL;
986 }
987
988 // Update accounting
989 if (isIncrease) {
990 assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
991 >= absDiff);
992 rootParser->m_alloc_tracker.bytesAllocated += absDiff;
993 } else { // i.e. decrease
994 assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff);
995 rootParser->m_alloc_tracker.bytesAllocated -= absDiff;
996 }
997
998 // Report as needed
999 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
1000 if (rootParser->m_alloc_tracker.bytesAllocated
1001 > rootParser->m_alloc_tracker.peakBytesAllocated) {
1002 rootParser->m_alloc_tracker.peakBytesAllocated
1003 = rootParser->m_alloc_tracker.bytesAllocated;
1004 }
1005 expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff,
1006 rootParser->m_alloc_tracker.bytesAllocated,
1007 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
1008 }
1009
1010 // Update in-block recorded size
1011 *(size_t *)mallocedPtr = size;
1012
1013 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
1014 }
1015 #endif // XML_GE == 1
1016
1017 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)1018 XML_ParserCreate(const XML_Char *encodingName) {
1019 return XML_ParserCreate_MM(encodingName, NULL, NULL);
1020 }
1021
1022 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)1023 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
1024 XML_Char tmp[2] = {nsSep, 0};
1025 return XML_ParserCreate_MM(encodingName, NULL, tmp);
1026 }
1027
1028 // "xml=http://www.w3.org/XML/1998/namespace"
1029 static const XML_Char implicitContext[]
1030 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
1031 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
1032 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
1033 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
1034 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
1035 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
1036 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
1037 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
1038 '\0'};
1039
1040 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1041
1042 static unsigned long
gather_time_entropy(void)1043 gather_time_entropy(void) {
1044 # ifdef _WIN32
1045 FILETIME ft;
1046 GetSystemTimeAsFileTime(&ft); /* never fails */
1047 return ft.dwHighDateTime ^ ft.dwLowDateTime;
1048 # else
1049 struct timeval tv;
1050 int gettimeofday_res;
1051
1052 gettimeofday_res = gettimeofday(&tv, NULL);
1053
1054 # if defined(NDEBUG)
1055 (void)gettimeofday_res;
1056 # else
1057 assert(gettimeofday_res == 0);
1058 # endif /* defined(NDEBUG) */
1059
1060 /* Microseconds time is <20 bits entropy */
1061 return tv.tv_usec;
1062 # endif
1063 }
1064
1065 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1066
1067 static struct sipkey
ENTROPY_DEBUG(const char * label,struct sipkey entropy_128)1068 ENTROPY_DEBUG(const char *label, struct sipkey entropy_128) {
1069 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
1070 fprintf(stderr,
1071 "expat: Entropy: %s --> [0x" EXPAT_FMT_LLX(
1072 "016") ", 0x" EXPAT_FMT_LLX("016") "] (16 bytes)\n",
1073 label, (unsigned long long)entropy_128.k[0],
1074 (unsigned long long)entropy_128.k[1]);
1075 }
1076 return entropy_128;
1077 }
1078
1079 static struct sipkey
generate_hash_secret_salt(void)1080 generate_hash_secret_salt(void) {
1081 struct sipkey entropy;
1082
1083 /* "Failproof" high quality providers: */
1084 #if defined(HAVE_ARC4RANDOM_BUF)
1085 writeRandomBytes_arc4random_buf(&entropy, sizeof(entropy));
1086 return ENTROPY_DEBUG("arc4random_buf", entropy);
1087 #elif defined(HAVE_ARC4RANDOM)
1088 writeRandomBytes_arc4random(&entropy, sizeof(entropy));
1089 return ENTROPY_DEBUG("arc4random", entropy);
1090 #else
1091 /* Try high quality providers first .. */
1092 # ifdef _WIN32
1093 if (writeRandomBytes_rand_s(&entropy, sizeof(entropy))) {
1094 return ENTROPY_DEBUG("rand_s", entropy);
1095 }
1096 # elif defined(HAVE_GETENTROPY)
1097 if (writeRandomBytes_getentropy(&entropy, sizeof(entropy))) {
1098 return ENTROPY_DEBUG("getentropy", entropy);
1099 }
1100 errno = 0;
1101 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1102 if (writeRandomBytes_getrandom_nonblock(&entropy, sizeof(entropy))) {
1103 return ENTROPY_DEBUG("getrandom", entropy);
1104 }
1105 # endif
1106 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1107 if (writeRandomBytes_dev_urandom(&entropy, sizeof(entropy))) {
1108 return ENTROPY_DEBUG("/dev/urandom", entropy);
1109 }
1110 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1111 /* .. and self-made low quality for backup: */
1112
1113 entropy.k[0] = 0;
1114 entropy.k[1] = gather_time_entropy();
1115 # if ! defined(__wasi__)
1116 /* Process ID is 0 bits entropy if attacker has local access */
1117 entropy.k[1] ^= getpid();
1118 # endif
1119
1120 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1121 if (sizeof(unsigned long) == 4) {
1122 entropy.k[1] *= 2147483647;
1123 return ENTROPY_DEBUG("fallback(4)", entropy);
1124 } else {
1125 entropy.k[1] *= 2305843009213693951ULL;
1126 return ENTROPY_DEBUG("fallback(8)", entropy);
1127 }
1128 #endif
1129 }
1130
1131 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1132 callProcessor(XML_Parser parser, const char *start, const char *end,
1133 const char **endPtr) {
1134 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1135
1136 if (parser->m_reparseDeferralEnabled
1137 && ! parser->m_parsingStatus.finalBuffer) {
1138 // Heuristic: don't try to parse a partial token again until the amount of
1139 // available data has increased significantly.
1140 const size_t had_before = parser->m_partialTokenBytesBefore;
1141 // ...but *do* try anyway if we're close to causing a reallocation.
1142 size_t available_buffer
1143 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1144 #if XML_CONTEXT_BYTES > 0
1145 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1146 #endif
1147 available_buffer
1148 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1149 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1150 const bool enough
1151 = (have_now >= 2 * had_before)
1152 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1153
1154 if (! enough) {
1155 *endPtr = start; // callers may expect this to be set
1156 return XML_ERROR_NONE;
1157 }
1158 }
1159 #if defined(XML_TESTING)
1160 g_bytesScanned += (unsigned)have_now;
1161 #endif
1162 // Run in a loop to eliminate dangerous recursion depths
1163 enum XML_Error ret;
1164 *endPtr = start;
1165 while (1) {
1166 // Use endPtr as the new start in each iteration, since it will
1167 // be set to the next start point by m_processor.
1168 ret = parser->m_processor(parser, *endPtr, end, endPtr);
1169
1170 // Make parsing status (and in particular XML_SUSPENDED) take
1171 // precedence over re-enter flag when they disagree
1172 if (parser->m_parsingStatus.parsing != XML_PARSING) {
1173 parser->m_reenter = XML_FALSE;
1174 }
1175
1176 if (! parser->m_reenter) {
1177 break;
1178 }
1179
1180 parser->m_reenter = XML_FALSE;
1181 if (ret != XML_ERROR_NONE)
1182 return ret;
1183 }
1184
1185 if (ret == XML_ERROR_NONE) {
1186 // if we consumed nothing, remember what we had on this parse attempt.
1187 if (*endPtr == start) {
1188 parser->m_partialTokenBytesBefore = have_now;
1189 } else {
1190 parser->m_partialTokenBytesBefore = 0;
1191 }
1192 }
1193 return ret;
1194 }
1195
1196 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1197 startParsing(XML_Parser parser) {
1198 /* hash functions must be initialized before setContext() is called */
1199 if (parser->m_hash_secret_salt_set != XML_TRUE) {
1200 parser->m_hash_secret_salt_128 = generate_hash_secret_salt();
1201 parser->m_hash_secret_salt_set = XML_TRUE;
1202 }
1203 if (parser->m_ns) {
1204 /* implicit context only set for root parser, since child
1205 parsers (i.e. external entity parsers) will inherit it
1206 */
1207 return setContext(parser, implicitContext);
1208 }
1209 return XML_TRUE;
1210 }
1211
1212 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1213 XML_ParserCreate_MM(const XML_Char *encodingName,
1214 const XML_Memory_Handling_Suite *memsuite,
1215 const XML_Char *nameSep) {
1216 return parserCreate(encodingName, memsuite, nameSep, NULL, NULL);
1217 }
1218
1219 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd,XML_Parser parentParser)1220 parserCreate(const XML_Char *encodingName,
1221 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1222 DTD *dtd, XML_Parser parentParser) {
1223 XML_Parser parser = NULL;
1224
1225 #if XML_GE == 1
1226 const size_t increase
1227 = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct);
1228
1229 if (parentParser != NULL) {
1230 const XML_Parser rootParser = getRootParserOf(parentParser, NULL);
1231 if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) {
1232 return NULL;
1233 }
1234 }
1235 #else
1236 UNUSED_P(parentParser);
1237 #endif
1238
1239 if (memsuite) {
1240 XML_Memory_Handling_Suite *mtemp;
1241 #if XML_GE == 1
1242 void *const sizeAndParser
1243 = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING
1244 + sizeof(struct XML_ParserStruct));
1245 if (sizeAndParser != NULL) {
1246 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1247 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1248 + EXPAT_MALLOC_PADDING);
1249 #else
1250 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1251 if (parser != NULL) {
1252 #endif
1253 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1254 mtemp->malloc_fcn = memsuite->malloc_fcn;
1255 mtemp->realloc_fcn = memsuite->realloc_fcn;
1256 mtemp->free_fcn = memsuite->free_fcn;
1257 }
1258 } else {
1259 XML_Memory_Handling_Suite *mtemp;
1260 #if XML_GE == 1
1261 void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING
1262 + sizeof(struct XML_ParserStruct));
1263 if (sizeAndParser != NULL) {
1264 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1265 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1266 + EXPAT_MALLOC_PADDING);
1267 #else
1268 parser = malloc(sizeof(struct XML_ParserStruct));
1269 if (parser != NULL) {
1270 #endif
1271 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1272 mtemp->malloc_fcn = malloc;
1273 mtemp->realloc_fcn = realloc;
1274 mtemp->free_fcn = free;
1275 }
1276 } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0
1277
1278 if (! parser)
1279 return parser;
1280
1281 #if XML_GE == 1
1282 // Initialize .m_alloc_tracker
1283 memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER));
1284 if (parentParser == NULL) {
1285 parser->m_alloc_tracker.debugLevel
1286 = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u);
1287 parser->m_alloc_tracker.maximumAmplificationFactor
1288 = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT;
1289 parser->m_alloc_tracker.activationThresholdBytes
1290 = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT;
1291
1292 // NOTE: This initialization needs to come this early because these fields
1293 // are read by allocation tracking code
1294 parser->m_parentParser = NULL;
1295 parser->m_accounting.countBytesDirect = 0;
1296 } else {
1297 parser->m_parentParser = parentParser;
1298 }
1299
1300 // Record XML_ParserStruct allocation we did a few lines up before
1301 const XML_Parser rootParser = getRootParserOf(parser, NULL);
1302 assert(rootParser->m_parentParser == NULL);
1303 assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase);
1304 rootParser->m_alloc_tracker.bytesAllocated += increase;
1305
1306 // Report on allocation
1307 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
1308 if (rootParser->m_alloc_tracker.bytesAllocated
1309 > rootParser->m_alloc_tracker.peakBytesAllocated) {
1310 rootParser->m_alloc_tracker.peakBytesAllocated
1311 = rootParser->m_alloc_tracker.bytesAllocated;
1312 }
1313
1314 expat_heap_stat(rootParser, '+', increase,
1315 rootParser->m_alloc_tracker.bytesAllocated,
1316 rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__);
1317 }
1318 #else
1319 parser->m_parentParser = NULL;
1320 #endif // XML_GE == 1
1321
1322 parser->m_buffer = NULL;
1323 parser->m_bufferLim = NULL;
1324
1325 parser->m_attsSize = INIT_ATTS_SIZE;
1326 parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1327 if (parser->m_atts == NULL) {
1328 FREE(parser, parser);
1329 return NULL;
1330 }
1331 #ifdef XML_ATTR_INFO
1332 parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1333 if (parser->m_attInfo == NULL) {
1334 FREE(parser, parser->m_atts);
1335 FREE(parser, parser);
1336 return NULL;
1337 }
1338 #endif
1339 parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1340 if (parser->m_dataBuf == NULL) {
1341 FREE(parser, parser->m_atts);
1342 #ifdef XML_ATTR_INFO
1343 FREE(parser, parser->m_attInfo);
1344 #endif
1345 FREE(parser, parser);
1346 return NULL;
1347 }
1348 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1349
1350 if (dtd)
1351 parser->m_dtd = dtd;
1352 else {
1353 parser->m_dtd = dtdCreate(parser);
1354 if (parser->m_dtd == NULL) {
1355 FREE(parser, parser->m_dataBuf);
1356 FREE(parser, parser->m_atts);
1357 #ifdef XML_ATTR_INFO
1358 FREE(parser, parser->m_attInfo);
1359 #endif
1360 FREE(parser, parser);
1361 return NULL;
1362 }
1363 }
1364
1365 parser->m_freeBindingList = NULL;
1366 parser->m_freeTagList = NULL;
1367 parser->m_freeInternalEntities = NULL;
1368 parser->m_freeAttributeEntities = NULL;
1369 parser->m_freeValueEntities = NULL;
1370
1371 parser->m_groupSize = 0;
1372 parser->m_groupConnector = NULL;
1373
1374 parser->m_unknownEncodingHandler = NULL;
1375 parser->m_unknownEncodingHandlerData = NULL;
1376
1377 parser->m_namespaceSeparator = ASCII_EXCL;
1378 parser->m_ns = XML_FALSE;
1379 parser->m_ns_triplets = XML_FALSE;
1380
1381 parser->m_nsAtts = NULL;
1382 parser->m_nsAttsVersion = 0;
1383 parser->m_nsAttsPower = 0;
1384
1385 parser->m_protocolEncodingName = NULL;
1386
1387 poolInit(&parser->m_tempPool, parser);
1388 poolInit(&parser->m_temp2Pool, parser);
1389 parserInit(parser, encodingName);
1390
1391 if (encodingName && ! parser->m_protocolEncodingName) {
1392 if (dtd) {
1393 // We need to stop the upcoming call to XML_ParserFree from happily
1394 // destroying parser->m_dtd because the DTD is shared with the parent
1395 // parser and the only guard that keeps XML_ParserFree from destroying
1396 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1397 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1398 parser->m_dtd = NULL;
1399 }
1400 XML_ParserFree(parser);
1401 return NULL;
1402 }
1403
1404 if (nameSep) {
1405 parser->m_ns = XML_TRUE;
1406 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1407 parser->m_namespaceSeparator = *nameSep;
1408 } else {
1409 parser->m_internalEncoding = XmlGetInternalEncoding();
1410 }
1411
1412 return parser;
1413 }
1414
1415 static void
1416 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1417 parser->m_processor = prologInitProcessor;
1418 XmlPrologStateInit(&parser->m_prologState);
1419 if (encodingName != NULL) {
1420 parser->m_protocolEncodingName = copyString(encodingName, parser);
1421 }
1422 parser->m_curBase = NULL;
1423 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1424 parser->m_userData = NULL;
1425 parser->m_handlerArg = NULL;
1426 parser->m_startElementHandler = NULL;
1427 parser->m_endElementHandler = NULL;
1428 parser->m_characterDataHandler = NULL;
1429 parser->m_processingInstructionHandler = NULL;
1430 parser->m_commentHandler = NULL;
1431 parser->m_startCdataSectionHandler = NULL;
1432 parser->m_endCdataSectionHandler = NULL;
1433 parser->m_defaultHandler = NULL;
1434 parser->m_startDoctypeDeclHandler = NULL;
1435 parser->m_endDoctypeDeclHandler = NULL;
1436 parser->m_unparsedEntityDeclHandler = NULL;
1437 parser->m_notationDeclHandler = NULL;
1438 parser->m_startNamespaceDeclHandler = NULL;
1439 parser->m_endNamespaceDeclHandler = NULL;
1440 parser->m_notStandaloneHandler = NULL;
1441 parser->m_externalEntityRefHandler = NULL;
1442 parser->m_externalEntityRefHandlerArg = parser;
1443 parser->m_skippedEntityHandler = NULL;
1444 parser->m_elementDeclHandler = NULL;
1445 parser->m_attlistDeclHandler = NULL;
1446 parser->m_entityDeclHandler = NULL;
1447 parser->m_xmlDeclHandler = NULL;
1448 parser->m_bufferPtr = parser->m_buffer;
1449 parser->m_bufferEnd = parser->m_buffer;
1450 parser->m_parseEndByteIndex = 0;
1451 parser->m_parseEndPtr = NULL;
1452 parser->m_partialTokenBytesBefore = 0;
1453 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1454 parser->m_lastBufferRequestSize = 0;
1455 parser->m_declElementType = NULL;
1456 parser->m_declAttributeId = NULL;
1457 parser->m_declEntity = NULL;
1458 parser->m_doctypeName = NULL;
1459 parser->m_doctypeSysid = NULL;
1460 parser->m_doctypePubid = NULL;
1461 parser->m_declAttributeType = NULL;
1462 parser->m_declNotationName = NULL;
1463 parser->m_declNotationPublicId = NULL;
1464 parser->m_declAttributeIsCdata = XML_FALSE;
1465 parser->m_declAttributeIsId = XML_FALSE;
1466 memset(&parser->m_position, 0, sizeof(POSITION));
1467 parser->m_errorCode = XML_ERROR_NONE;
1468 parser->m_eventPtr = NULL;
1469 parser->m_eventEndPtr = NULL;
1470 parser->m_positionPtr = NULL;
1471 parser->m_openInternalEntities = NULL;
1472 parser->m_openAttributeEntities = NULL;
1473 parser->m_openValueEntities = NULL;
1474 parser->m_defaultExpandInternalEntities = XML_TRUE;
1475 parser->m_tagLevel = 0;
1476 parser->m_tagStack = NULL;
1477 parser->m_inheritedBindings = NULL;
1478 parser->m_nSpecifiedAtts = 0;
1479 parser->m_unknownEncodingMem = NULL;
1480 parser->m_unknownEncodingRelease = NULL;
1481 parser->m_unknownEncodingData = NULL;
1482 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1483 // Reentry can only be triggered inside m_processor calls
1484 parser->m_reenter = XML_FALSE;
1485 #ifdef XML_DTD
1486 parser->m_isParamEntity = XML_FALSE;
1487 parser->m_useForeignDTD = XML_FALSE;
1488 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1489 #endif
1490 parser->m_hash_secret_salt_128.k[0] = 0;
1491 parser->m_hash_secret_salt_128.k[1] = 0;
1492 parser->m_hash_secret_salt_set = XML_FALSE;
1493
1494 #if XML_GE == 1
1495 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1496 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1497 parser->m_accounting.maximumAmplificationFactor
1498 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1499 parser->m_accounting.activationThresholdBytes
1500 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1501
1502 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1503 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1504 #endif
1505 }
1506
1507 /* moves list of bindings to m_freeBindingList */
1508 static void FASTCALL
1509 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1510 while (bindings) {
1511 BINDING *b = bindings;
1512 bindings = bindings->nextTagBinding;
1513 b->nextTagBinding = parser->m_freeBindingList;
1514 parser->m_freeBindingList = b;
1515 }
1516 }
1517
1518 XML_Bool XMLCALL
1519 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1520 TAG *tStk;
1521 OPEN_INTERNAL_ENTITY *openEntityList;
1522
1523 if (parser == NULL)
1524 return XML_FALSE;
1525
1526 if (parser->m_parentParser)
1527 return XML_FALSE;
1528 /* move m_tagStack to m_freeTagList */
1529 tStk = parser->m_tagStack;
1530 while (tStk) {
1531 TAG *tag = tStk;
1532 tStk = tStk->parent;
1533 tag->parent = parser->m_freeTagList;
1534 moveToFreeBindingList(parser, tag->bindings);
1535 tag->bindings = NULL;
1536 parser->m_freeTagList = tag;
1537 }
1538 /* move m_openInternalEntities to m_freeInternalEntities */
1539 openEntityList = parser->m_openInternalEntities;
1540 while (openEntityList) {
1541 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1542 openEntityList = openEntity->next;
1543 openEntity->next = parser->m_freeInternalEntities;
1544 parser->m_freeInternalEntities = openEntity;
1545 }
1546 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1547 * for attributes) */
1548 openEntityList = parser->m_openAttributeEntities;
1549 while (openEntityList) {
1550 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1551 openEntityList = openEntity->next;
1552 openEntity->next = parser->m_freeAttributeEntities;
1553 parser->m_freeAttributeEntities = openEntity;
1554 }
1555 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1556 * for value entities) */
1557 openEntityList = parser->m_openValueEntities;
1558 while (openEntityList) {
1559 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1560 openEntityList = openEntity->next;
1561 openEntity->next = parser->m_freeValueEntities;
1562 parser->m_freeValueEntities = openEntity;
1563 }
1564 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1565 FREE(parser, parser->m_unknownEncodingMem);
1566 if (parser->m_unknownEncodingRelease)
1567 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1568 poolClear(&parser->m_tempPool);
1569 poolClear(&parser->m_temp2Pool);
1570 FREE(parser, (void *)parser->m_protocolEncodingName);
1571 parser->m_protocolEncodingName = NULL;
1572 parserInit(parser, encodingName);
1573 dtdReset(parser->m_dtd, parser);
1574 return XML_TRUE;
1575 }
1576
1577 static XML_Bool
1578 parserBusy(XML_Parser parser) {
1579 switch (parser->m_parsingStatus.parsing) {
1580 case XML_PARSING:
1581 case XML_SUSPENDED:
1582 return XML_TRUE;
1583 case XML_INITIALIZED:
1584 case XML_FINISHED:
1585 default:
1586 return XML_FALSE;
1587 }
1588 }
1589
1590 enum XML_Status XMLCALL
1591 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1592 if (parser == NULL)
1593 return XML_STATUS_ERROR;
1594 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1595 XXX There's no way for the caller to determine which of the
1596 XXX possible error cases caused the XML_STATUS_ERROR return.
1597 */
1598 if (parserBusy(parser))
1599 return XML_STATUS_ERROR;
1600
1601 /* Get rid of any previous encoding name */
1602 FREE(parser, (void *)parser->m_protocolEncodingName);
1603
1604 if (encodingName == NULL)
1605 /* No new encoding name */
1606 parser->m_protocolEncodingName = NULL;
1607 else {
1608 /* Copy the new encoding name into allocated memory */
1609 parser->m_protocolEncodingName = copyString(encodingName, parser);
1610 if (! parser->m_protocolEncodingName)
1611 return XML_STATUS_ERROR;
1612 }
1613 return XML_STATUS_OK;
1614 }
1615
1616 XML_Parser XMLCALL
1617 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1618 const XML_Char *encodingName) {
1619 XML_Parser parser = oldParser;
1620 DTD *newDtd = NULL;
1621 DTD *oldDtd;
1622 XML_StartElementHandler oldStartElementHandler;
1623 XML_EndElementHandler oldEndElementHandler;
1624 XML_CharacterDataHandler oldCharacterDataHandler;
1625 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1626 XML_CommentHandler oldCommentHandler;
1627 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1628 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1629 XML_DefaultHandler oldDefaultHandler;
1630 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1631 XML_NotationDeclHandler oldNotationDeclHandler;
1632 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1633 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1634 XML_NotStandaloneHandler oldNotStandaloneHandler;
1635 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1636 XML_SkippedEntityHandler oldSkippedEntityHandler;
1637 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1638 void *oldUnknownEncodingHandlerData;
1639 XML_ElementDeclHandler oldElementDeclHandler;
1640 XML_AttlistDeclHandler oldAttlistDeclHandler;
1641 XML_EntityDeclHandler oldEntityDeclHandler;
1642 XML_XmlDeclHandler oldXmlDeclHandler;
1643 ELEMENT_TYPE *oldDeclElementType;
1644
1645 void *oldUserData;
1646 void *oldHandlerArg;
1647 XML_Bool oldDefaultExpandInternalEntities;
1648 XML_Parser oldExternalEntityRefHandlerArg;
1649 #ifdef XML_DTD
1650 enum XML_ParamEntityParsing oldParamEntityParsing;
1651 int oldInEntityValue;
1652 #endif
1653 XML_Bool oldns_triplets;
1654 /* Note that the new parser shares the same hash secret as the old
1655 parser, so that dtdCopy and copyEntityTable can lookup values
1656 from hash tables associated with either parser without us having
1657 to worry which hash secrets each table has.
1658 */
1659 struct sipkey oldhash_secret_salt_128;
1660 XML_Bool oldhash_secret_salt_set;
1661 XML_Bool oldReparseDeferralEnabled;
1662
1663 /* Validate the oldParser parameter before we pull everything out of it */
1664 if (oldParser == NULL)
1665 return NULL;
1666
1667 /* Stash the original parser contents on the stack */
1668 oldDtd = parser->m_dtd;
1669 oldStartElementHandler = parser->m_startElementHandler;
1670 oldEndElementHandler = parser->m_endElementHandler;
1671 oldCharacterDataHandler = parser->m_characterDataHandler;
1672 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1673 oldCommentHandler = parser->m_commentHandler;
1674 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1675 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1676 oldDefaultHandler = parser->m_defaultHandler;
1677 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1678 oldNotationDeclHandler = parser->m_notationDeclHandler;
1679 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1680 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1681 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1682 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1683 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1684 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1685 oldUnknownEncodingHandlerData = parser->m_unknownEncodingHandlerData;
1686 oldElementDeclHandler = parser->m_elementDeclHandler;
1687 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1688 oldEntityDeclHandler = parser->m_entityDeclHandler;
1689 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1690 oldDeclElementType = parser->m_declElementType;
1691
1692 oldUserData = parser->m_userData;
1693 oldHandlerArg = parser->m_handlerArg;
1694 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1695 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1696 #ifdef XML_DTD
1697 oldParamEntityParsing = parser->m_paramEntityParsing;
1698 oldInEntityValue = parser->m_prologState.inEntityValue;
1699 #endif
1700 oldns_triplets = parser->m_ns_triplets;
1701 /* Note that the new parser shares the same hash secret as the old
1702 parser, so that dtdCopy and copyEntityTable can lookup values
1703 from hash tables associated with either parser without us having
1704 to worry which hash secrets each table has.
1705 */
1706 oldhash_secret_salt_128 = parser->m_hash_secret_salt_128;
1707 oldhash_secret_salt_set = parser->m_hash_secret_salt_set;
1708 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1709
1710 #ifdef XML_DTD
1711 if (! context)
1712 newDtd = oldDtd;
1713 #endif /* XML_DTD */
1714
1715 /* Note that the magical uses of the pre-processor to make field
1716 access look more like C++ require that `parser' be overwritten
1717 here. This makes this function more painful to follow than it
1718 would be otherwise.
1719 */
1720 if (parser->m_ns) {
1721 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1722 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser);
1723 } else {
1724 parser
1725 = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser);
1726 }
1727
1728 if (! parser)
1729 return NULL;
1730
1731 parser->m_startElementHandler = oldStartElementHandler;
1732 parser->m_endElementHandler = oldEndElementHandler;
1733 parser->m_characterDataHandler = oldCharacterDataHandler;
1734 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1735 parser->m_commentHandler = oldCommentHandler;
1736 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1737 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1738 parser->m_defaultHandler = oldDefaultHandler;
1739 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1740 parser->m_notationDeclHandler = oldNotationDeclHandler;
1741 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1742 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1743 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1744 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1745 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1746 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1747 parser->m_unknownEncodingHandlerData = oldUnknownEncodingHandlerData;
1748 parser->m_elementDeclHandler = oldElementDeclHandler;
1749 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1750 parser->m_entityDeclHandler = oldEntityDeclHandler;
1751 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1752 parser->m_declElementType = oldDeclElementType;
1753 parser->m_userData = oldUserData;
1754 if (oldUserData == oldHandlerArg)
1755 parser->m_handlerArg = parser->m_userData;
1756 else
1757 parser->m_handlerArg = parser;
1758 if (oldExternalEntityRefHandlerArg != oldParser)
1759 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1760 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1761 parser->m_ns_triplets = oldns_triplets;
1762 parser->m_hash_secret_salt_128 = oldhash_secret_salt_128;
1763 parser->m_hash_secret_salt_set = oldhash_secret_salt_set;
1764 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1765 parser->m_parentParser = oldParser;
1766 #ifdef XML_DTD
1767 parser->m_paramEntityParsing = oldParamEntityParsing;
1768 parser->m_prologState.inEntityValue = oldInEntityValue;
1769 if (context) {
1770 #endif /* XML_DTD */
1771 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser)
1772 || ! setContext(parser, context)) {
1773 XML_ParserFree(parser);
1774 return NULL;
1775 }
1776 parser->m_processor = externalEntityInitProcessor;
1777 #ifdef XML_DTD
1778 } else {
1779 /* The DTD instance referenced by parser->m_dtd is shared between the
1780 document's root parser and external PE parsers, therefore one does not
1781 need to call setContext. In addition, one also *must* not call
1782 setContext, because this would overwrite existing prefix->binding
1783 pointers in parser->m_dtd with ones that get destroyed with the external
1784 PE parser. This would leave those prefixes with dangling pointers.
1785 */
1786 parser->m_isParamEntity = XML_TRUE;
1787 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1788 parser->m_processor = externalParEntInitProcessor;
1789 }
1790 #endif /* XML_DTD */
1791 return parser;
1792 }
1793
1794 static void FASTCALL
1795 destroyBindings(BINDING *bindings, XML_Parser parser) {
1796 for (;;) {
1797 BINDING *b = bindings;
1798 if (! b)
1799 break;
1800 bindings = b->nextTagBinding;
1801 FREE(parser, b->uri);
1802 FREE(parser, b);
1803 }
1804 }
1805
1806 void XMLCALL
1807 XML_ParserFree(XML_Parser parser) {
1808 TAG *tagList;
1809 OPEN_INTERNAL_ENTITY *entityList;
1810 if (parser == NULL)
1811 return;
1812 /* free m_tagStack and m_freeTagList */
1813 tagList = parser->m_tagStack;
1814 for (;;) {
1815 TAG *p;
1816 if (tagList == NULL) {
1817 if (parser->m_freeTagList == NULL)
1818 break;
1819 tagList = parser->m_freeTagList;
1820 parser->m_freeTagList = NULL;
1821 }
1822 p = tagList;
1823 tagList = tagList->parent;
1824 FREE(parser, p->buf.raw);
1825 destroyBindings(p->bindings, parser);
1826 FREE(parser, p);
1827 }
1828 /* free m_openInternalEntities and m_freeInternalEntities */
1829 entityList = parser->m_openInternalEntities;
1830 for (;;) {
1831 OPEN_INTERNAL_ENTITY *openEntity;
1832 if (entityList == NULL) {
1833 if (parser->m_freeInternalEntities == NULL)
1834 break;
1835 entityList = parser->m_freeInternalEntities;
1836 parser->m_freeInternalEntities = NULL;
1837 }
1838 openEntity = entityList;
1839 entityList = entityList->next;
1840 FREE(parser, openEntity);
1841 }
1842 /* free m_openAttributeEntities and m_freeAttributeEntities */
1843 entityList = parser->m_openAttributeEntities;
1844 for (;;) {
1845 OPEN_INTERNAL_ENTITY *openEntity;
1846 if (entityList == NULL) {
1847 if (parser->m_freeAttributeEntities == NULL)
1848 break;
1849 entityList = parser->m_freeAttributeEntities;
1850 parser->m_freeAttributeEntities = NULL;
1851 }
1852 openEntity = entityList;
1853 entityList = entityList->next;
1854 FREE(parser, openEntity);
1855 }
1856 /* free m_openValueEntities and m_freeValueEntities */
1857 entityList = parser->m_openValueEntities;
1858 for (;;) {
1859 OPEN_INTERNAL_ENTITY *openEntity;
1860 if (entityList == NULL) {
1861 if (parser->m_freeValueEntities == NULL)
1862 break;
1863 entityList = parser->m_freeValueEntities;
1864 parser->m_freeValueEntities = NULL;
1865 }
1866 openEntity = entityList;
1867 entityList = entityList->next;
1868 FREE(parser, openEntity);
1869 }
1870 destroyBindings(parser->m_freeBindingList, parser);
1871 destroyBindings(parser->m_inheritedBindings, parser);
1872 poolDestroy(&parser->m_tempPool);
1873 poolDestroy(&parser->m_temp2Pool);
1874 FREE(parser, (void *)parser->m_protocolEncodingName);
1875 #ifdef XML_DTD
1876 /* external parameter entity parsers share the DTD structure
1877 parser->m_dtd with the root parser, so we must not destroy it
1878 */
1879 if (! parser->m_isParamEntity && parser->m_dtd)
1880 #else
1881 if (parser->m_dtd)
1882 #endif /* XML_DTD */
1883 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser);
1884 FREE(parser, parser->m_atts);
1885 #ifdef XML_ATTR_INFO
1886 FREE(parser, parser->m_attInfo);
1887 #endif
1888 FREE(parser, parser->m_groupConnector);
1889 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
1890 // is not being allocated with MALLOC(..) but with plain
1891 // .malloc_fcn(..).
1892 parser->m_mem.free_fcn(parser->m_buffer);
1893 FREE(parser, parser->m_dataBuf);
1894 FREE(parser, parser->m_nsAtts);
1895 FREE(parser, parser->m_unknownEncodingMem);
1896 if (parser->m_unknownEncodingRelease)
1897 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1898 FREE(parser, parser);
1899 }
1900
1901 void XMLCALL
1902 XML_UseParserAsHandlerArg(XML_Parser parser) {
1903 if (parser != NULL)
1904 parser->m_handlerArg = parser;
1905 }
1906
1907 enum XML_Error XMLCALL
1908 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1909 if (parser == NULL)
1910 return XML_ERROR_INVALID_ARGUMENT;
1911 #ifdef XML_DTD
1912 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1913 if (parserBusy(parser))
1914 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1915 parser->m_useForeignDTD = useDTD;
1916 return XML_ERROR_NONE;
1917 #else
1918 UNUSED_P(useDTD);
1919 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1920 #endif
1921 }
1922
1923 void XMLCALL
1924 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1925 if (parser == NULL)
1926 return;
1927 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1928 if (parserBusy(parser))
1929 return;
1930 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1931 }
1932
1933 void XMLCALL
1934 XML_SetUserData(XML_Parser parser, void *p) {
1935 if (parser == NULL)
1936 return;
1937 if (parser->m_handlerArg == parser->m_userData)
1938 parser->m_handlerArg = parser->m_userData = p;
1939 else
1940 parser->m_userData = p;
1941 }
1942
1943 enum XML_Status XMLCALL
1944 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1945 if (parser == NULL)
1946 return XML_STATUS_ERROR;
1947 if (p) {
1948 p = poolCopyString(&parser->m_dtd->pool, p);
1949 if (! p)
1950 return XML_STATUS_ERROR;
1951 parser->m_curBase = p;
1952 } else
1953 parser->m_curBase = NULL;
1954 return XML_STATUS_OK;
1955 }
1956
1957 const XML_Char *XMLCALL
1958 XML_GetBase(XML_Parser parser) {
1959 if (parser == NULL)
1960 return NULL;
1961 return parser->m_curBase;
1962 }
1963
1964 int XMLCALL
1965 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1966 if (parser == NULL)
1967 return -1;
1968 return parser->m_nSpecifiedAtts;
1969 }
1970
1971 int XMLCALL
1972 XML_GetIdAttributeIndex(XML_Parser parser) {
1973 if (parser == NULL)
1974 return -1;
1975 return parser->m_idAttIndex;
1976 }
1977
1978 #ifdef XML_ATTR_INFO
1979 const XML_AttrInfo *XMLCALL
1980 XML_GetAttributeInfo(XML_Parser parser) {
1981 if (parser == NULL)
1982 return NULL;
1983 return parser->m_attInfo;
1984 }
1985 #endif
1986
1987 void XMLCALL
1988 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1989 XML_EndElementHandler end) {
1990 if (parser == NULL)
1991 return;
1992 parser->m_startElementHandler = start;
1993 parser->m_endElementHandler = end;
1994 }
1995
1996 void XMLCALL
1997 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1998 if (parser != NULL)
1999 parser->m_startElementHandler = start;
2000 }
2001
2002 void XMLCALL
2003 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
2004 if (parser != NULL)
2005 parser->m_endElementHandler = end;
2006 }
2007
2008 void XMLCALL
2009 XML_SetCharacterDataHandler(XML_Parser parser,
2010 XML_CharacterDataHandler handler) {
2011 if (parser != NULL)
2012 parser->m_characterDataHandler = handler;
2013 }
2014
2015 void XMLCALL
2016 XML_SetProcessingInstructionHandler(XML_Parser parser,
2017 XML_ProcessingInstructionHandler handler) {
2018 if (parser != NULL)
2019 parser->m_processingInstructionHandler = handler;
2020 }
2021
2022 void XMLCALL
2023 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
2024 if (parser != NULL)
2025 parser->m_commentHandler = handler;
2026 }
2027
2028 void XMLCALL
2029 XML_SetCdataSectionHandler(XML_Parser parser,
2030 XML_StartCdataSectionHandler start,
2031 XML_EndCdataSectionHandler end) {
2032 if (parser == NULL)
2033 return;
2034 parser->m_startCdataSectionHandler = start;
2035 parser->m_endCdataSectionHandler = end;
2036 }
2037
2038 void XMLCALL
2039 XML_SetStartCdataSectionHandler(XML_Parser parser,
2040 XML_StartCdataSectionHandler start) {
2041 if (parser != NULL)
2042 parser->m_startCdataSectionHandler = start;
2043 }
2044
2045 void XMLCALL
2046 XML_SetEndCdataSectionHandler(XML_Parser parser,
2047 XML_EndCdataSectionHandler end) {
2048 if (parser != NULL)
2049 parser->m_endCdataSectionHandler = end;
2050 }
2051
2052 void XMLCALL
2053 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
2054 if (parser == NULL)
2055 return;
2056 parser->m_defaultHandler = handler;
2057 parser->m_defaultExpandInternalEntities = XML_FALSE;
2058 }
2059
2060 void XMLCALL
2061 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
2062 if (parser == NULL)
2063 return;
2064 parser->m_defaultHandler = handler;
2065 parser->m_defaultExpandInternalEntities = XML_TRUE;
2066 }
2067
2068 void XMLCALL
2069 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
2070 XML_EndDoctypeDeclHandler end) {
2071 if (parser == NULL)
2072 return;
2073 parser->m_startDoctypeDeclHandler = start;
2074 parser->m_endDoctypeDeclHandler = end;
2075 }
2076
2077 void XMLCALL
2078 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
2079 XML_StartDoctypeDeclHandler start) {
2080 if (parser != NULL)
2081 parser->m_startDoctypeDeclHandler = start;
2082 }
2083
2084 void XMLCALL
2085 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
2086 if (parser != NULL)
2087 parser->m_endDoctypeDeclHandler = end;
2088 }
2089
2090 void XMLCALL
2091 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
2092 XML_UnparsedEntityDeclHandler handler) {
2093 if (parser != NULL)
2094 parser->m_unparsedEntityDeclHandler = handler;
2095 }
2096
2097 void XMLCALL
2098 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
2099 if (parser != NULL)
2100 parser->m_notationDeclHandler = handler;
2101 }
2102
2103 void XMLCALL
2104 XML_SetNamespaceDeclHandler(XML_Parser parser,
2105 XML_StartNamespaceDeclHandler start,
2106 XML_EndNamespaceDeclHandler end) {
2107 if (parser == NULL)
2108 return;
2109 parser->m_startNamespaceDeclHandler = start;
2110 parser->m_endNamespaceDeclHandler = end;
2111 }
2112
2113 void XMLCALL
2114 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
2115 XML_StartNamespaceDeclHandler start) {
2116 if (parser != NULL)
2117 parser->m_startNamespaceDeclHandler = start;
2118 }
2119
2120 void XMLCALL
2121 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
2122 XML_EndNamespaceDeclHandler end) {
2123 if (parser != NULL)
2124 parser->m_endNamespaceDeclHandler = end;
2125 }
2126
2127 void XMLCALL
2128 XML_SetNotStandaloneHandler(XML_Parser parser,
2129 XML_NotStandaloneHandler handler) {
2130 if (parser != NULL)
2131 parser->m_notStandaloneHandler = handler;
2132 }
2133
2134 void XMLCALL
2135 XML_SetExternalEntityRefHandler(XML_Parser parser,
2136 XML_ExternalEntityRefHandler handler) {
2137 if (parser != NULL)
2138 parser->m_externalEntityRefHandler = handler;
2139 }
2140
2141 void XMLCALL
2142 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
2143 if (parser == NULL)
2144 return;
2145 if (arg)
2146 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
2147 else
2148 parser->m_externalEntityRefHandlerArg = parser;
2149 }
2150
2151 void XMLCALL
2152 XML_SetSkippedEntityHandler(XML_Parser parser,
2153 XML_SkippedEntityHandler handler) {
2154 if (parser != NULL)
2155 parser->m_skippedEntityHandler = handler;
2156 }
2157
2158 void XMLCALL
2159 XML_SetUnknownEncodingHandler(XML_Parser parser,
2160 XML_UnknownEncodingHandler handler, void *data) {
2161 if (parser == NULL)
2162 return;
2163 parser->m_unknownEncodingHandler = handler;
2164 parser->m_unknownEncodingHandlerData = data;
2165 }
2166
2167 void XMLCALL
2168 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
2169 if (parser != NULL)
2170 parser->m_elementDeclHandler = eldecl;
2171 }
2172
2173 void XMLCALL
2174 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
2175 if (parser != NULL)
2176 parser->m_attlistDeclHandler = attdecl;
2177 }
2178
2179 void XMLCALL
2180 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
2181 if (parser != NULL)
2182 parser->m_entityDeclHandler = handler;
2183 }
2184
2185 void XMLCALL
2186 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
2187 if (parser != NULL)
2188 parser->m_xmlDeclHandler = handler;
2189 }
2190
2191 int XMLCALL
2192 XML_SetParamEntityParsing(XML_Parser parser,
2193 enum XML_ParamEntityParsing peParsing) {
2194 if (parser == NULL)
2195 return 0;
2196 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2197 if (parserBusy(parser))
2198 return 0;
2199 #ifdef XML_DTD
2200 parser->m_paramEntityParsing = peParsing;
2201 return 1;
2202 #else
2203 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2204 #endif
2205 }
2206
2207 // DEPRECATED since Expat 2.8.0.
2208 int XMLCALL
2209 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2210 if (parser == NULL)
2211 return 0;
2212
2213 const XML_Parser rootParser = getRootParserOf(parser, NULL);
2214 assert(! rootParser->m_parentParser);
2215
2216 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2217 if (parserBusy(rootParser))
2218 return 0;
2219
2220 rootParser->m_hash_secret_salt_128.k[0] = 0;
2221 rootParser->m_hash_secret_salt_128.k[1] = hash_salt;
2222
2223 if (hash_salt != 0) { // to remain backwards compatible
2224 rootParser->m_hash_secret_salt_set = XML_TRUE;
2225
2226 if (sizeof(unsigned long) == 4)
2227 ENTROPY_DEBUG("explicit(4)", rootParser->m_hash_secret_salt_128);
2228 else
2229 ENTROPY_DEBUG("explicit(8)", rootParser->m_hash_secret_salt_128);
2230 }
2231
2232 return 1;
2233 }
2234
2235 XML_Bool XMLCALL
2236 XML_SetHashSalt16Bytes(XML_Parser parser, const uint8_t entropy[16]) {
2237 if (parser == NULL)
2238 return XML_FALSE;
2239
2240 if (entropy == NULL)
2241 return XML_FALSE;
2242
2243 const XML_Parser rootParser = getRootParserOf(parser, NULL);
2244 assert(! rootParser->m_parentParser);
2245
2246 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2247 if (parserBusy(rootParser))
2248 return XML_FALSE;
2249
2250 sip_tokey(&(rootParser->m_hash_secret_salt_128), entropy);
2251
2252 rootParser->m_hash_secret_salt_set = XML_TRUE;
2253
2254 ENTROPY_DEBUG("explicit(16)", rootParser->m_hash_secret_salt_128);
2255
2256 return XML_TRUE;
2257 }
2258
2259 enum XML_Status XMLCALL
2260 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2261 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2262 if (parser != NULL)
2263 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2264 return XML_STATUS_ERROR;
2265 }
2266 switch (parser->m_parsingStatus.parsing) {
2267 case XML_SUSPENDED:
2268 parser->m_errorCode = XML_ERROR_SUSPENDED;
2269 return XML_STATUS_ERROR;
2270 case XML_FINISHED:
2271 parser->m_errorCode = XML_ERROR_FINISHED;
2272 return XML_STATUS_ERROR;
2273 case XML_INITIALIZED:
2274 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2275 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2276 return XML_STATUS_ERROR;
2277 }
2278 /* fall through */
2279 default:
2280 parser->m_parsingStatus.parsing = XML_PARSING;
2281 }
2282
2283 #if XML_CONTEXT_BYTES == 0
2284 if (parser->m_bufferPtr == parser->m_bufferEnd) {
2285 const char *end;
2286 int nLeftOver;
2287 enum XML_Status result;
2288 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
2289 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2290 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2291 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2292 parser->m_processor = errorProcessor;
2293 return XML_STATUS_ERROR;
2294 }
2295 // though this isn't a buffer request, we assume that `len` is the app's
2296 // preferred buffer fill size, and therefore save it here.
2297 parser->m_lastBufferRequestSize = len;
2298 parser->m_parseEndByteIndex += len;
2299 parser->m_positionPtr = s;
2300 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2301
2302 parser->m_errorCode
2303 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2304
2305 if (parser->m_errorCode != XML_ERROR_NONE) {
2306 parser->m_eventEndPtr = parser->m_eventPtr;
2307 parser->m_processor = errorProcessor;
2308 return XML_STATUS_ERROR;
2309 } else {
2310 switch (parser->m_parsingStatus.parsing) {
2311 case XML_SUSPENDED:
2312 result = XML_STATUS_SUSPENDED;
2313 break;
2314 case XML_INITIALIZED:
2315 case XML_PARSING:
2316 if (isFinal) {
2317 parser->m_parsingStatus.parsing = XML_FINISHED;
2318 return XML_STATUS_OK;
2319 }
2320 /* fall through */
2321 default:
2322 result = XML_STATUS_OK;
2323 }
2324 }
2325
2326 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2327 &parser->m_position);
2328 nLeftOver = s + len - end;
2329 if (nLeftOver) {
2330 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2331 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2332 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2333 parser->m_parsingStatus.parsing = XML_PARSING;
2334 void *const temp = XML_GetBuffer(parser, nLeftOver);
2335 parser->m_parsingStatus.parsing = originalStatus;
2336 // GetBuffer may have overwritten this, but we want to remember what the
2337 // app requested, not how many bytes were left over after parsing.
2338 parser->m_lastBufferRequestSize = len;
2339 if (temp == NULL) {
2340 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2341 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2342 parser->m_processor = errorProcessor;
2343 return XML_STATUS_ERROR;
2344 }
2345 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2346 // don't have any data to preserve, and can copy straight into the start
2347 // of the buffer rather than the GetBuffer return pointer (which may be
2348 // pointing further into the allocated buffer).
2349 memcpy(parser->m_buffer, end, nLeftOver);
2350 }
2351 parser->m_bufferPtr = parser->m_buffer;
2352 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2353 parser->m_positionPtr = parser->m_bufferPtr;
2354 parser->m_parseEndPtr = parser->m_bufferEnd;
2355 parser->m_eventPtr = parser->m_bufferPtr;
2356 parser->m_eventEndPtr = parser->m_bufferPtr;
2357 return result;
2358 }
2359 #endif /* XML_CONTEXT_BYTES == 0 */
2360 void *buff = XML_GetBuffer(parser, len);
2361 if (buff == NULL)
2362 return XML_STATUS_ERROR;
2363 if (len > 0) {
2364 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2365 memcpy(buff, s, len);
2366 }
2367 return XML_ParseBuffer(parser, len, isFinal);
2368 }
2369
2370 enum XML_Status XMLCALL
2371 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2372 const char *start;
2373 enum XML_Status result = XML_STATUS_OK;
2374
2375 if (parser == NULL)
2376 return XML_STATUS_ERROR;
2377
2378 if (len < 0) {
2379 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2380 return XML_STATUS_ERROR;
2381 }
2382
2383 switch (parser->m_parsingStatus.parsing) {
2384 case XML_SUSPENDED:
2385 parser->m_errorCode = XML_ERROR_SUSPENDED;
2386 return XML_STATUS_ERROR;
2387 case XML_FINISHED:
2388 parser->m_errorCode = XML_ERROR_FINISHED;
2389 return XML_STATUS_ERROR;
2390 case XML_INITIALIZED:
2391 /* Has someone called XML_GetBuffer successfully before? */
2392 if (! parser->m_bufferPtr) {
2393 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2394 return XML_STATUS_ERROR;
2395 }
2396
2397 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2398 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2399 return XML_STATUS_ERROR;
2400 }
2401 /* fall through */
2402 default:
2403 parser->m_parsingStatus.parsing = XML_PARSING;
2404 }
2405
2406 start = parser->m_bufferPtr;
2407 parser->m_positionPtr = start;
2408 parser->m_bufferEnd += len;
2409 parser->m_parseEndPtr = parser->m_bufferEnd;
2410 parser->m_parseEndByteIndex += len;
2411 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2412
2413 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2414 &parser->m_bufferPtr);
2415
2416 if (parser->m_errorCode != XML_ERROR_NONE) {
2417 parser->m_eventEndPtr = parser->m_eventPtr;
2418 parser->m_processor = errorProcessor;
2419 return XML_STATUS_ERROR;
2420 } else {
2421 switch (parser->m_parsingStatus.parsing) {
2422 case XML_SUSPENDED:
2423 result = XML_STATUS_SUSPENDED;
2424 break;
2425 case XML_INITIALIZED:
2426 case XML_PARSING:
2427 if (isFinal) {
2428 parser->m_parsingStatus.parsing = XML_FINISHED;
2429 return result;
2430 }
2431 default:; /* should not happen */
2432 }
2433 }
2434
2435 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2436 parser->m_bufferPtr, &parser->m_position);
2437 parser->m_positionPtr = parser->m_bufferPtr;
2438 return result;
2439 }
2440
2441 void *XMLCALL
2442 XML_GetBuffer(XML_Parser parser, int len) {
2443 if (parser == NULL)
2444 return NULL;
2445 if (len < 0) {
2446 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2447 return NULL;
2448 }
2449 switch (parser->m_parsingStatus.parsing) {
2450 case XML_SUSPENDED:
2451 parser->m_errorCode = XML_ERROR_SUSPENDED;
2452 return NULL;
2453 case XML_FINISHED:
2454 parser->m_errorCode = XML_ERROR_FINISHED;
2455 return NULL;
2456 default:;
2457 }
2458
2459 // whether or not the request succeeds, `len` seems to be the app's preferred
2460 // buffer fill size; remember it.
2461 parser->m_lastBufferRequestSize = len;
2462 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2463 || parser->m_buffer == NULL) {
2464 #if XML_CONTEXT_BYTES > 0
2465 int keep;
2466 #endif /* XML_CONTEXT_BYTES > 0 */
2467 /* Do not invoke signed arithmetic overflow: */
2468 int neededSize = (int)((unsigned)len
2469 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2470 parser->m_bufferEnd, parser->m_bufferPtr));
2471 if (neededSize < 0) {
2472 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2473 return NULL;
2474 }
2475 #if XML_CONTEXT_BYTES > 0
2476 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2477 if (keep > XML_CONTEXT_BYTES)
2478 keep = XML_CONTEXT_BYTES;
2479 /* Detect and prevent integer overflow */
2480 if (keep > INT_MAX - neededSize) {
2481 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2482 return NULL;
2483 }
2484 neededSize += keep;
2485 #endif /* XML_CONTEXT_BYTES > 0 */
2486 if (parser->m_buffer && parser->m_bufferPtr
2487 && neededSize
2488 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2489 #if XML_CONTEXT_BYTES > 0
2490 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2491 int offset
2492 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2493 - keep;
2494 /* The buffer pointers cannot be NULL here; we have at least some bytes
2495 * in the buffer */
2496 memmove(parser->m_buffer, &parser->m_buffer[offset],
2497 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2498 parser->m_bufferEnd -= offset;
2499 parser->m_bufferPtr -= offset;
2500 }
2501 #else
2502 memmove(parser->m_buffer, parser->m_bufferPtr,
2503 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2504 parser->m_bufferEnd
2505 = parser->m_buffer
2506 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2507 parser->m_bufferPtr = parser->m_buffer;
2508 #endif /* XML_CONTEXT_BYTES > 0 */
2509 } else {
2510 char *newBuf;
2511 int bufferSize
2512 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2513 if (bufferSize == 0)
2514 bufferSize = INIT_BUFFER_SIZE;
2515 do {
2516 /* Do not invoke signed arithmetic overflow: */
2517 bufferSize = (int)(2U * (unsigned)bufferSize);
2518 } while (bufferSize < neededSize && bufferSize > 0);
2519 if (bufferSize <= 0) {
2520 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2521 return NULL;
2522 }
2523 // NOTE: We are avoiding MALLOC(..) here to leave limiting
2524 // the input size to the application using Expat.
2525 newBuf = parser->m_mem.malloc_fcn(bufferSize);
2526 if (newBuf == NULL) {
2527 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2528 return NULL;
2529 }
2530 parser->m_bufferLim = newBuf + bufferSize;
2531 #if XML_CONTEXT_BYTES > 0
2532 if (parser->m_bufferPtr) {
2533 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2534 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2535 + keep);
2536 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2537 // is not being allocated with MALLOC(..) but with plain
2538 // .malloc_fcn(..).
2539 parser->m_mem.free_fcn(parser->m_buffer);
2540 parser->m_buffer = newBuf;
2541 parser->m_bufferEnd
2542 = parser->m_buffer
2543 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2544 + keep;
2545 parser->m_bufferPtr = parser->m_buffer + keep;
2546 } else {
2547 /* This must be a brand new buffer with no data in it yet */
2548 parser->m_bufferEnd = newBuf;
2549 parser->m_bufferPtr = parser->m_buffer = newBuf;
2550 }
2551 #else
2552 if (parser->m_bufferPtr) {
2553 memcpy(newBuf, parser->m_bufferPtr,
2554 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2555 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2556 // is not being allocated with MALLOC(..) but with plain
2557 // .malloc_fcn(..).
2558 parser->m_mem.free_fcn(parser->m_buffer);
2559 parser->m_bufferEnd
2560 = newBuf
2561 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2562 } else {
2563 /* This must be a brand new buffer with no data in it yet */
2564 parser->m_bufferEnd = newBuf;
2565 }
2566 parser->m_bufferPtr = parser->m_buffer = newBuf;
2567 #endif /* XML_CONTEXT_BYTES > 0 */
2568 }
2569 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2570 parser->m_positionPtr = NULL;
2571 }
2572 return parser->m_bufferEnd;
2573 }
2574
2575 static void
2576 triggerReenter(XML_Parser parser) {
2577 parser->m_reenter = XML_TRUE;
2578 }
2579
2580 enum XML_Status XMLCALL
2581 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2582 if (parser == NULL)
2583 return XML_STATUS_ERROR;
2584 switch (parser->m_parsingStatus.parsing) {
2585 case XML_INITIALIZED:
2586 parser->m_errorCode = XML_ERROR_NOT_STARTED;
2587 return XML_STATUS_ERROR;
2588 case XML_SUSPENDED:
2589 if (resumable) {
2590 parser->m_errorCode = XML_ERROR_SUSPENDED;
2591 return XML_STATUS_ERROR;
2592 }
2593 parser->m_parsingStatus.parsing = XML_FINISHED;
2594 break;
2595 case XML_FINISHED:
2596 parser->m_errorCode = XML_ERROR_FINISHED;
2597 return XML_STATUS_ERROR;
2598 case XML_PARSING:
2599 if (resumable) {
2600 #ifdef XML_DTD
2601 if (parser->m_isParamEntity) {
2602 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2603 return XML_STATUS_ERROR;
2604 }
2605 #endif
2606 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2607 } else
2608 parser->m_parsingStatus.parsing = XML_FINISHED;
2609 break;
2610 default:
2611 assert(0);
2612 }
2613 return XML_STATUS_OK;
2614 }
2615
2616 enum XML_Status XMLCALL
2617 XML_ResumeParser(XML_Parser parser) {
2618 enum XML_Status result = XML_STATUS_OK;
2619
2620 if (parser == NULL)
2621 return XML_STATUS_ERROR;
2622 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2623 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2624 return XML_STATUS_ERROR;
2625 }
2626 parser->m_parsingStatus.parsing = XML_PARSING;
2627
2628 parser->m_errorCode = callProcessor(
2629 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2630
2631 if (parser->m_errorCode != XML_ERROR_NONE) {
2632 parser->m_eventEndPtr = parser->m_eventPtr;
2633 parser->m_processor = errorProcessor;
2634 return XML_STATUS_ERROR;
2635 } else {
2636 switch (parser->m_parsingStatus.parsing) {
2637 case XML_SUSPENDED:
2638 result = XML_STATUS_SUSPENDED;
2639 break;
2640 case XML_INITIALIZED:
2641 case XML_PARSING:
2642 if (parser->m_parsingStatus.finalBuffer) {
2643 parser->m_parsingStatus.parsing = XML_FINISHED;
2644 return result;
2645 }
2646 default:;
2647 }
2648 }
2649
2650 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2651 parser->m_bufferPtr, &parser->m_position);
2652 parser->m_positionPtr = parser->m_bufferPtr;
2653 return result;
2654 }
2655
2656 void XMLCALL
2657 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2658 if (parser == NULL)
2659 return;
2660 assert(status != NULL);
2661 *status = parser->m_parsingStatus;
2662 }
2663
2664 enum XML_Error XMLCALL
2665 XML_GetErrorCode(XML_Parser parser) {
2666 if (parser == NULL)
2667 return XML_ERROR_INVALID_ARGUMENT;
2668 return parser->m_errorCode;
2669 }
2670
2671 XML_Index XMLCALL
2672 XML_GetCurrentByteIndex(XML_Parser parser) {
2673 if (parser == NULL)
2674 return -1;
2675 if (parser->m_eventPtr)
2676 return (XML_Index)(parser->m_parseEndByteIndex
2677 - (parser->m_parseEndPtr - parser->m_eventPtr));
2678 return -1;
2679 }
2680
2681 int XMLCALL
2682 XML_GetCurrentByteCount(XML_Parser parser) {
2683 if (parser == NULL)
2684 return 0;
2685 if (parser->m_eventEndPtr && parser->m_eventPtr)
2686 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2687 return 0;
2688 }
2689
2690 const char *XMLCALL
2691 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2692 #if XML_CONTEXT_BYTES > 0
2693 if (parser == NULL)
2694 return NULL;
2695 if (parser->m_eventPtr && parser->m_buffer) {
2696 if (offset != NULL)
2697 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2698 if (size != NULL)
2699 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2700 return parser->m_buffer;
2701 }
2702 #else
2703 (void)parser;
2704 (void)offset;
2705 (void)size;
2706 #endif /* XML_CONTEXT_BYTES > 0 */
2707 return (const char *)0;
2708 }
2709
2710 XML_Size XMLCALL
2711 XML_GetCurrentLineNumber(XML_Parser parser) {
2712 if (parser == NULL)
2713 return 0;
2714 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2715 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2716 parser->m_eventPtr, &parser->m_position);
2717 parser->m_positionPtr = parser->m_eventPtr;
2718 }
2719 return parser->m_position.lineNumber + 1;
2720 }
2721
2722 XML_Size XMLCALL
2723 XML_GetCurrentColumnNumber(XML_Parser parser) {
2724 if (parser == NULL)
2725 return 0;
2726 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2727 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2728 parser->m_eventPtr, &parser->m_position);
2729 parser->m_positionPtr = parser->m_eventPtr;
2730 }
2731 return parser->m_position.columnNumber;
2732 }
2733
2734 void XMLCALL
2735 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2736 if (parser == NULL)
2737 return;
2738
2739 // NOTE: We are avoiding FREE(..) here because the content model
2740 // has been created using plain .malloc_fcn(..) rather than MALLOC(..).
2741 parser->m_mem.free_fcn(model);
2742 }
2743
2744 void *XMLCALL
2745 XML_MemMalloc(XML_Parser parser, size_t size) {
2746 if (parser == NULL)
2747 return NULL;
2748
2749 // NOTE: We are avoiding MALLOC(..) here to not include
2750 // user allocations with allocation tracking and limiting.
2751 return parser->m_mem.malloc_fcn(size);
2752 }
2753
2754 void *XMLCALL
2755 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2756 if (parser == NULL)
2757 return NULL;
2758
2759 // NOTE: We are avoiding REALLOC(..) here to not include
2760 // user allocations with allocation tracking and limiting.
2761 return parser->m_mem.realloc_fcn(ptr, size);
2762 }
2763
2764 void XMLCALL
2765 XML_MemFree(XML_Parser parser, void *ptr) {
2766 if (parser == NULL)
2767 return;
2768
2769 // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and
2770 // XML_MemRealloc are not using MALLOC(..) and REALLOC(..)
2771 // but plain .malloc_fcn(..) and .realloc_fcn(..), internally.
2772 parser->m_mem.free_fcn(ptr);
2773 }
2774
2775 void XMLCALL
2776 XML_DefaultCurrent(XML_Parser parser) {
2777 if (parser == NULL)
2778 return;
2779 if (parser->m_defaultHandler) {
2780 if (parser->m_openInternalEntities)
2781 reportDefault(parser, parser->m_internalEncoding,
2782 parser->m_openInternalEntities->internalEventPtr,
2783 parser->m_openInternalEntities->internalEventEndPtr);
2784 else
2785 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2786 parser->m_eventEndPtr);
2787 }
2788 }
2789
2790 const XML_LChar *XMLCALL
2791 XML_ErrorString(enum XML_Error code) {
2792 switch (code) {
2793 case XML_ERROR_NONE:
2794 return NULL;
2795 case XML_ERROR_NO_MEMORY:
2796 return XML_L("out of memory");
2797 case XML_ERROR_SYNTAX:
2798 return XML_L("syntax error");
2799 case XML_ERROR_NO_ELEMENTS:
2800 return XML_L("no element found");
2801 case XML_ERROR_INVALID_TOKEN:
2802 return XML_L("not well-formed (invalid token)");
2803 case XML_ERROR_UNCLOSED_TOKEN:
2804 return XML_L("unclosed token");
2805 case XML_ERROR_PARTIAL_CHAR:
2806 return XML_L("partial character");
2807 case XML_ERROR_TAG_MISMATCH:
2808 return XML_L("mismatched tag");
2809 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2810 return XML_L("duplicate attribute");
2811 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2812 return XML_L("junk after document element");
2813 case XML_ERROR_PARAM_ENTITY_REF:
2814 return XML_L("illegal parameter entity reference");
2815 case XML_ERROR_UNDEFINED_ENTITY:
2816 return XML_L("undefined entity");
2817 case XML_ERROR_RECURSIVE_ENTITY_REF:
2818 return XML_L("recursive entity reference");
2819 case XML_ERROR_ASYNC_ENTITY:
2820 return XML_L("asynchronous entity");
2821 case XML_ERROR_BAD_CHAR_REF:
2822 return XML_L("reference to invalid character number");
2823 case XML_ERROR_BINARY_ENTITY_REF:
2824 return XML_L("reference to binary entity");
2825 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2826 return XML_L("reference to external entity in attribute");
2827 case XML_ERROR_MISPLACED_XML_PI:
2828 return XML_L("XML or text declaration not at start of entity");
2829 case XML_ERROR_UNKNOWN_ENCODING:
2830 return XML_L("unknown encoding");
2831 case XML_ERROR_INCORRECT_ENCODING:
2832 return XML_L("encoding specified in XML declaration is incorrect");
2833 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2834 return XML_L("unclosed CDATA section");
2835 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2836 return XML_L("error in processing external entity reference");
2837 case XML_ERROR_NOT_STANDALONE:
2838 return XML_L("document is not standalone");
2839 case XML_ERROR_UNEXPECTED_STATE:
2840 return XML_L("unexpected parser state - please send a bug report");
2841 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2842 return XML_L("entity declared in parameter entity");
2843 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2844 return XML_L("requested feature requires XML_DTD support in Expat");
2845 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2846 return XML_L("cannot change setting once parsing has begun");
2847 /* Added in 1.95.7. */
2848 case XML_ERROR_UNBOUND_PREFIX:
2849 return XML_L("unbound prefix");
2850 /* Added in 1.95.8. */
2851 case XML_ERROR_UNDECLARING_PREFIX:
2852 return XML_L("must not undeclare prefix");
2853 case XML_ERROR_INCOMPLETE_PE:
2854 return XML_L("incomplete markup in parameter entity");
2855 case XML_ERROR_XML_DECL:
2856 return XML_L("XML declaration not well-formed");
2857 case XML_ERROR_TEXT_DECL:
2858 return XML_L("text declaration not well-formed");
2859 case XML_ERROR_PUBLICID:
2860 return XML_L("illegal character(s) in public id");
2861 case XML_ERROR_SUSPENDED:
2862 return XML_L("parser suspended");
2863 case XML_ERROR_NOT_SUSPENDED:
2864 return XML_L("parser not suspended");
2865 case XML_ERROR_ABORTED:
2866 return XML_L("parsing aborted");
2867 case XML_ERROR_FINISHED:
2868 return XML_L("parsing finished");
2869 case XML_ERROR_SUSPEND_PE:
2870 return XML_L("cannot suspend in external parameter entity");
2871 /* Added in 2.0.0. */
2872 case XML_ERROR_RESERVED_PREFIX_XML:
2873 return XML_L(
2874 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2875 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2876 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2877 case XML_ERROR_RESERVED_NAMESPACE_URI:
2878 return XML_L(
2879 "prefix must not be bound to one of the reserved namespace names");
2880 /* Added in 2.2.5. */
2881 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2882 return XML_L("invalid argument");
2883 /* Added in 2.3.0. */
2884 case XML_ERROR_NO_BUFFER:
2885 return XML_L(
2886 "a successful prior call to function XML_GetBuffer is required");
2887 /* Added in 2.4.0. */
2888 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2889 return XML_L(
2890 "limit on input amplification factor (from DTD and entities) breached");
2891 /* Added in 2.6.4. */
2892 case XML_ERROR_NOT_STARTED:
2893 return XML_L("parser not started");
2894 }
2895 return NULL;
2896 }
2897
2898 const XML_LChar *XMLCALL
2899 XML_ExpatVersion(void) {
2900 /* V1 is used to string-ize the version number. However, it would
2901 string-ize the actual version macro *names* unless we get them
2902 substituted before being passed to V1. CPP is defined to expand
2903 a macro, then rescan for more expansions. Thus, we use V2 to expand
2904 the version macros, then CPP will expand the resulting V1() macro
2905 with the correct numerals. */
2906 /* ### I'm assuming cpp is portable in this respect... */
2907
2908 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2909 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2910
2911 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2912
2913 #undef V1
2914 #undef V2
2915 }
2916
2917 XML_Expat_Version XMLCALL
2918 XML_ExpatVersionInfo(void) {
2919 XML_Expat_Version version;
2920
2921 version.major = XML_MAJOR_VERSION;
2922 version.minor = XML_MINOR_VERSION;
2923 version.micro = XML_MICRO_VERSION;
2924
2925 return version;
2926 }
2927
2928 const XML_Feature *XMLCALL
2929 XML_GetFeatureList(void) {
2930 static const XML_Feature features[] = {
2931 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2932 sizeof(XML_Char)},
2933 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2934 sizeof(XML_LChar)},
2935 #ifdef XML_UNICODE
2936 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2937 #endif
2938 #ifdef XML_UNICODE_WCHAR_T
2939 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2940 #endif
2941 #ifdef XML_DTD
2942 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2943 #endif
2944 #if XML_CONTEXT_BYTES > 0
2945 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2946 XML_CONTEXT_BYTES},
2947 #endif
2948 #ifdef XML_MIN_SIZE
2949 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2950 #endif
2951 #ifdef XML_NS
2952 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2953 #endif
2954 #ifdef XML_LARGE_SIZE
2955 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2956 #endif
2957 #ifdef XML_ATTR_INFO
2958 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2959 #endif
2960 #if XML_GE == 1
2961 /* Added in Expat 2.4.0 for XML_DTD defined and
2962 * added in Expat 2.6.0 for XML_GE == 1. */
2963 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2964 XML_L("XML_BLAP_MAX_AMP"),
2965 (long int)
2966 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2967 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2968 XML_L("XML_BLAP_ACT_THRES"),
2969 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2970 /* Added in Expat 2.6.0. */
2971 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2972 /* Added in Expat 2.7.2. */
2973 {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT,
2974 XML_L("XML_AT_MAX_AMP"),
2975 (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT},
2976 {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT,
2977 XML_L("XML_AT_ACT_THRES"),
2978 (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT},
2979 #endif
2980 {XML_FEATURE_END, NULL, 0}};
2981
2982 return features;
2983 }
2984
2985 #if XML_GE == 1
2986 XML_Bool XMLCALL
2987 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2988 XML_Parser parser, float maximumAmplificationFactor) {
2989 if ((parser == NULL) || (parser->m_parentParser != NULL)
2990 || isnan(maximumAmplificationFactor)
2991 || (maximumAmplificationFactor < 1.0f)) {
2992 return XML_FALSE;
2993 }
2994 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2995 return XML_TRUE;
2996 }
2997
2998 XML_Bool XMLCALL
2999 XML_SetBillionLaughsAttackProtectionActivationThreshold(
3000 XML_Parser parser, unsigned long long activationThresholdBytes) {
3001 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3002 return XML_FALSE;
3003 }
3004 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
3005 return XML_TRUE;
3006 }
3007
3008 XML_Bool XMLCALL
3009 XML_SetAllocTrackerMaximumAmplification(XML_Parser parser,
3010 float maximumAmplificationFactor) {
3011 if ((parser == NULL) || (parser->m_parentParser != NULL)
3012 || isnan(maximumAmplificationFactor)
3013 || (maximumAmplificationFactor < 1.0f)) {
3014 return XML_FALSE;
3015 }
3016 parser->m_alloc_tracker.maximumAmplificationFactor
3017 = maximumAmplificationFactor;
3018 return XML_TRUE;
3019 }
3020
3021 XML_Bool XMLCALL
3022 XML_SetAllocTrackerActivationThreshold(
3023 XML_Parser parser, unsigned long long activationThresholdBytes) {
3024 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3025 return XML_FALSE;
3026 }
3027 parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes;
3028 return XML_TRUE;
3029 }
3030 #endif /* XML_GE == 1 */
3031
3032 XML_Bool XMLCALL
3033 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
3034 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
3035 parser->m_reparseDeferralEnabled = enabled;
3036 return XML_TRUE;
3037 }
3038 return XML_FALSE;
3039 }
3040
3041 /* Initially tag->rawName always points into the parse buffer;
3042 for those TAG instances opened while the current parse buffer was
3043 processed, and not yet closed, we need to store tag->rawName in a more
3044 permanent location, since the parse buffer is about to be discarded.
3045 */
3046 static XML_Bool
3047 storeRawNames(XML_Parser parser) {
3048 TAG *tag = parser->m_tagStack;
3049 while (tag) {
3050 size_t bufSize;
3051 size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
3052 size_t rawNameLen;
3053 char *rawNameBuf = tag->buf.raw + nameLen;
3054 /* Stop if already stored. Since m_tagStack is a stack, we can stop
3055 at the first entry that has already been copied; everything
3056 below it in the stack is already been accounted for in a
3057 previous call to this function.
3058 */
3059 if (tag->rawName == rawNameBuf)
3060 break;
3061 /* For reuse purposes we need to ensure that the
3062 size of tag->buf is a multiple of sizeof(XML_Char).
3063 */
3064 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
3065 /* Detect and prevent integer overflow. */
3066 if (rawNameLen > (size_t)INT_MAX - nameLen)
3067 return XML_FALSE;
3068 bufSize = nameLen + rawNameLen;
3069 if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) {
3070 char *temp = REALLOC(parser, tag->buf.raw, bufSize);
3071 if (temp == NULL)
3072 return XML_FALSE;
3073 /* if tag->name.str points to tag->buf.str (only when namespace
3074 processing is off) then we have to update it
3075 */
3076 if (tag->name.str == tag->buf.str)
3077 tag->name.str = (XML_Char *)temp;
3078 /* if tag->name.localPart is set (when namespace processing is on)
3079 then update it as well, since it will always point into tag->buf
3080 */
3081 if (tag->name.localPart)
3082 tag->name.localPart
3083 = (XML_Char *)temp + (tag->name.localPart - tag->buf.str);
3084 tag->buf.raw = temp;
3085 tag->bufEnd = temp + bufSize;
3086 rawNameBuf = temp + nameLen;
3087 }
3088 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
3089 tag->rawName = rawNameBuf;
3090 tag = tag->parent;
3091 }
3092 return XML_TRUE;
3093 }
3094
3095 static enum XML_Error PTRCALL
3096 contentProcessor(XML_Parser parser, const char *start, const char *end,
3097 const char **endPtr) {
3098 enum XML_Error result = doContent(
3099 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
3100 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3101 XML_ACCOUNT_DIRECT);
3102 if (result == XML_ERROR_NONE) {
3103 if (! storeRawNames(parser))
3104 return XML_ERROR_NO_MEMORY;
3105 }
3106 return result;
3107 }
3108
3109 static enum XML_Error PTRCALL
3110 externalEntityInitProcessor(XML_Parser parser, const char *start,
3111 const char *end, const char **endPtr) {
3112 enum XML_Error result = initializeEncoding(parser);
3113 if (result != XML_ERROR_NONE)
3114 return result;
3115 parser->m_processor = externalEntityInitProcessor2;
3116 return externalEntityInitProcessor2(parser, start, end, endPtr);
3117 }
3118
3119 static enum XML_Error PTRCALL
3120 externalEntityInitProcessor2(XML_Parser parser, const char *start,
3121 const char *end, const char **endPtr) {
3122 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3123 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
3124 switch (tok) {
3125 case XML_TOK_BOM:
3126 #if XML_GE == 1
3127 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
3128 XML_ACCOUNT_DIRECT)) {
3129 accountingOnAbort(parser);
3130 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3131 }
3132 #endif /* XML_GE == 1 */
3133
3134 /* If we are at the end of the buffer, this would cause the next stage,
3135 i.e. externalEntityInitProcessor3, to pass control directly to
3136 doContent (by detecting XML_TOK_NONE) without processing any xml text
3137 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
3138 */
3139 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
3140 *endPtr = next;
3141 return XML_ERROR_NONE;
3142 }
3143 start = next;
3144 break;
3145 case XML_TOK_PARTIAL:
3146 if (! parser->m_parsingStatus.finalBuffer) {
3147 *endPtr = start;
3148 return XML_ERROR_NONE;
3149 }
3150 parser->m_eventPtr = start;
3151 return XML_ERROR_UNCLOSED_TOKEN;
3152 case XML_TOK_PARTIAL_CHAR:
3153 if (! parser->m_parsingStatus.finalBuffer) {
3154 *endPtr = start;
3155 return XML_ERROR_NONE;
3156 }
3157 parser->m_eventPtr = start;
3158 return XML_ERROR_PARTIAL_CHAR;
3159 }
3160 parser->m_processor = externalEntityInitProcessor3;
3161 return externalEntityInitProcessor3(parser, start, end, endPtr);
3162 }
3163
3164 static enum XML_Error PTRCALL
3165 externalEntityInitProcessor3(XML_Parser parser, const char *start,
3166 const char *end, const char **endPtr) {
3167 int tok;
3168 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3169 parser->m_eventPtr = start;
3170 tok = XmlContentTok(parser->m_encoding, start, end, &next);
3171 /* Note: These bytes are accounted later in:
3172 - processXmlDecl
3173 - externalEntityContentProcessor
3174 */
3175 parser->m_eventEndPtr = next;
3176
3177 switch (tok) {
3178 case XML_TOK_XML_DECL: {
3179 enum XML_Error result;
3180 result = processXmlDecl(parser, 1, start, next);
3181 if (result != XML_ERROR_NONE)
3182 return result;
3183 switch (parser->m_parsingStatus.parsing) {
3184 case XML_SUSPENDED:
3185 *endPtr = next;
3186 return XML_ERROR_NONE;
3187 case XML_FINISHED:
3188 return XML_ERROR_ABORTED;
3189 case XML_PARSING:
3190 if (parser->m_reenter) {
3191 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
3192 }
3193 /* Fall through */
3194 default:
3195 start = next;
3196 }
3197 } break;
3198 case XML_TOK_PARTIAL:
3199 if (! parser->m_parsingStatus.finalBuffer) {
3200 *endPtr = start;
3201 return XML_ERROR_NONE;
3202 }
3203 return XML_ERROR_UNCLOSED_TOKEN;
3204 case XML_TOK_PARTIAL_CHAR:
3205 if (! parser->m_parsingStatus.finalBuffer) {
3206 *endPtr = start;
3207 return XML_ERROR_NONE;
3208 }
3209 return XML_ERROR_PARTIAL_CHAR;
3210 }
3211 parser->m_processor = externalEntityContentProcessor;
3212 parser->m_tagLevel = 1;
3213 return externalEntityContentProcessor(parser, start, end, endPtr);
3214 }
3215
3216 static enum XML_Error PTRCALL
3217 externalEntityContentProcessor(XML_Parser parser, const char *start,
3218 const char *end, const char **endPtr) {
3219 enum XML_Error result
3220 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
3221 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3222 XML_ACCOUNT_ENTITY_EXPANSION);
3223 if (result == XML_ERROR_NONE) {
3224 if (! storeRawNames(parser))
3225 return XML_ERROR_NO_MEMORY;
3226 }
3227 return result;
3228 }
3229
3230 static enum XML_Error
3231 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
3232 const char *s, const char *end, const char **nextPtr,
3233 XML_Bool haveMore, enum XML_Account account) {
3234 /* save one level of indirection */
3235 DTD *const dtd = parser->m_dtd;
3236
3237 const char **eventPP;
3238 const char **eventEndPP;
3239 if (enc == parser->m_encoding) {
3240 eventPP = &parser->m_eventPtr;
3241 eventEndPP = &parser->m_eventEndPtr;
3242 } else {
3243 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3244 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3245 }
3246 *eventPP = s;
3247
3248 for (;;) {
3249 const char *next = s; /* XmlContentTok doesn't always set the last arg */
3250 int tok = XmlContentTok(enc, s, end, &next);
3251 #if XML_GE == 1
3252 const char *accountAfter
3253 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
3254 ? (haveMore ? s /* i.e. 0 bytes */ : end)
3255 : next;
3256 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
3257 account)) {
3258 accountingOnAbort(parser);
3259 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3260 }
3261 #endif
3262 *eventEndPP = next;
3263 switch (tok) {
3264 case XML_TOK_TRAILING_CR:
3265 if (haveMore) {
3266 *nextPtr = s;
3267 return XML_ERROR_NONE;
3268 }
3269 *eventEndPP = end;
3270 if (parser->m_characterDataHandler) {
3271 XML_Char c = 0xA;
3272 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3273 } else if (parser->m_defaultHandler)
3274 reportDefault(parser, enc, s, end);
3275 /* We are at the end of the final buffer, should we check for
3276 XML_SUSPENDED, XML_FINISHED?
3277 */
3278 if (startTagLevel == 0)
3279 return XML_ERROR_NO_ELEMENTS;
3280 if (parser->m_tagLevel != startTagLevel)
3281 return XML_ERROR_ASYNC_ENTITY;
3282 *nextPtr = end;
3283 return XML_ERROR_NONE;
3284 case XML_TOK_NONE:
3285 if (haveMore) {
3286 *nextPtr = s;
3287 return XML_ERROR_NONE;
3288 }
3289 if (startTagLevel > 0) {
3290 if (parser->m_tagLevel != startTagLevel)
3291 return XML_ERROR_ASYNC_ENTITY;
3292 *nextPtr = s;
3293 return XML_ERROR_NONE;
3294 }
3295 return XML_ERROR_NO_ELEMENTS;
3296 case XML_TOK_INVALID:
3297 *eventPP = next;
3298 return XML_ERROR_INVALID_TOKEN;
3299 case XML_TOK_PARTIAL:
3300 if (haveMore) {
3301 *nextPtr = s;
3302 return XML_ERROR_NONE;
3303 }
3304 return XML_ERROR_UNCLOSED_TOKEN;
3305 case XML_TOK_PARTIAL_CHAR:
3306 if (haveMore) {
3307 *nextPtr = s;
3308 return XML_ERROR_NONE;
3309 }
3310 return XML_ERROR_PARTIAL_CHAR;
3311 case XML_TOK_ENTITY_REF: {
3312 const XML_Char *name;
3313 ENTITY *entity;
3314 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3315 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3316 if (ch) {
3317 #if XML_GE == 1
3318 /* NOTE: We are replacing 4-6 characters original input for 1 character
3319 * so there is no amplification and hence recording without
3320 * protection. */
3321 accountingDiffTolerated(parser, tok, (char *)&ch,
3322 ((char *)&ch) + sizeof(XML_Char), __LINE__,
3323 XML_ACCOUNT_ENTITY_EXPANSION);
3324 #endif /* XML_GE == 1 */
3325 if (parser->m_characterDataHandler)
3326 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3327 else if (parser->m_defaultHandler)
3328 reportDefault(parser, enc, s, next);
3329 break;
3330 }
3331 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3332 next - enc->minBytesPerChar);
3333 if (! name)
3334 return XML_ERROR_NO_MEMORY;
3335 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3336 poolDiscard(&dtd->pool);
3337 /* First, determine if a check for an existing declaration is needed;
3338 if yes, check that the entity exists, and that it is internal,
3339 otherwise call the skipped entity or default handler.
3340 */
3341 if (! dtd->hasParamEntityRefs || dtd->standalone) {
3342 if (! entity)
3343 return XML_ERROR_UNDEFINED_ENTITY;
3344 else if (! entity->is_internal)
3345 return XML_ERROR_ENTITY_DECLARED_IN_PE;
3346 } else if (! entity) {
3347 if (parser->m_skippedEntityHandler)
3348 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3349 else if (parser->m_defaultHandler)
3350 reportDefault(parser, enc, s, next);
3351 break;
3352 }
3353 if (entity->open)
3354 return XML_ERROR_RECURSIVE_ENTITY_REF;
3355 if (entity->notation)
3356 return XML_ERROR_BINARY_ENTITY_REF;
3357 if (entity->textPtr) {
3358 enum XML_Error result;
3359 if (! parser->m_defaultExpandInternalEntities) {
3360 if (parser->m_skippedEntityHandler)
3361 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3362 0);
3363 else if (parser->m_defaultHandler)
3364 reportDefault(parser, enc, s, next);
3365 break;
3366 }
3367 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3368 if (result != XML_ERROR_NONE)
3369 return result;
3370 } else if (parser->m_externalEntityRefHandler) {
3371 const XML_Char *context;
3372 entity->open = XML_TRUE;
3373 context = getContext(parser);
3374 entity->open = XML_FALSE;
3375 if (! context)
3376 return XML_ERROR_NO_MEMORY;
3377 if (! parser->m_externalEntityRefHandler(
3378 parser->m_externalEntityRefHandlerArg, context, entity->base,
3379 entity->systemId, entity->publicId))
3380 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3381 poolDiscard(&parser->m_tempPool);
3382 } else if (parser->m_defaultHandler)
3383 reportDefault(parser, enc, s, next);
3384 break;
3385 }
3386 case XML_TOK_START_TAG_NO_ATTS:
3387 /* fall through */
3388 case XML_TOK_START_TAG_WITH_ATTS: {
3389 TAG *tag;
3390 enum XML_Error result;
3391 XML_Char *toPtr;
3392 if (parser->m_freeTagList) {
3393 tag = parser->m_freeTagList;
3394 parser->m_freeTagList = parser->m_freeTagList->parent;
3395 } else {
3396 tag = MALLOC(parser, sizeof(TAG));
3397 if (! tag)
3398 return XML_ERROR_NO_MEMORY;
3399 tag->buf.raw = MALLOC(parser, INIT_TAG_BUF_SIZE);
3400 if (! tag->buf.raw) {
3401 FREE(parser, tag);
3402 return XML_ERROR_NO_MEMORY;
3403 }
3404 tag->bufEnd = tag->buf.raw + INIT_TAG_BUF_SIZE;
3405 }
3406 tag->bindings = NULL;
3407 tag->parent = parser->m_tagStack;
3408 parser->m_tagStack = tag;
3409 tag->name.localPart = NULL;
3410 tag->name.prefix = NULL;
3411 tag->rawName = s + enc->minBytesPerChar;
3412 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3413 ++parser->m_tagLevel;
3414 {
3415 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3416 const char *fromPtr = tag->rawName;
3417 toPtr = tag->buf.str;
3418 for (;;) {
3419 int convLen;
3420 const enum XML_Convert_Result convert_res
3421 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3422 (ICHAR *)tag->bufEnd - 1);
3423 convLen = (int)(toPtr - tag->buf.str);
3424 if ((fromPtr >= rawNameEnd)
3425 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3426 tag->name.strLen = convLen;
3427 break;
3428 }
3429 if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf.raw))
3430 return XML_ERROR_NO_MEMORY;
3431 const size_t bufSize = (size_t)(tag->bufEnd - tag->buf.raw) * 2;
3432 {
3433 char *temp = REALLOC(parser, tag->buf.raw, bufSize);
3434 if (temp == NULL)
3435 return XML_ERROR_NO_MEMORY;
3436 tag->buf.raw = temp;
3437 tag->bufEnd = temp + bufSize;
3438 toPtr = (XML_Char *)temp + convLen;
3439 }
3440 }
3441 }
3442 tag->name.str = tag->buf.str;
3443 *toPtr = XML_T('\0');
3444 result
3445 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3446 if (result)
3447 return result;
3448 if (parser->m_startElementHandler)
3449 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3450 (const XML_Char **)parser->m_atts);
3451 else if (parser->m_defaultHandler)
3452 reportDefault(parser, enc, s, next);
3453 poolClear(&parser->m_tempPool);
3454 break;
3455 }
3456 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3457 /* fall through */
3458 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3459 const char *rawName = s + enc->minBytesPerChar;
3460 enum XML_Error result;
3461 BINDING *bindings = NULL;
3462 XML_Bool noElmHandlers = XML_TRUE;
3463 TAG_NAME name;
3464 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3465 rawName + XmlNameLength(enc, rawName));
3466 if (! name.str)
3467 return XML_ERROR_NO_MEMORY;
3468 poolFinish(&parser->m_tempPool);
3469 result = storeAtts(parser, enc, s, &name, &bindings,
3470 XML_ACCOUNT_NONE /* token spans whole start tag */);
3471 if (result != XML_ERROR_NONE) {
3472 freeBindings(parser, bindings);
3473 return result;
3474 }
3475 poolFinish(&parser->m_tempPool);
3476 if (parser->m_startElementHandler) {
3477 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3478 (const XML_Char **)parser->m_atts);
3479 noElmHandlers = XML_FALSE;
3480 }
3481 if (parser->m_endElementHandler) {
3482 if (parser->m_startElementHandler)
3483 *eventPP = *eventEndPP;
3484 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3485 noElmHandlers = XML_FALSE;
3486 }
3487 if (noElmHandlers && parser->m_defaultHandler)
3488 reportDefault(parser, enc, s, next);
3489 poolClear(&parser->m_tempPool);
3490 freeBindings(parser, bindings);
3491 }
3492 if ((parser->m_tagLevel == 0)
3493 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3494 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3495 || (parser->m_parsingStatus.parsing == XML_PARSING
3496 && parser->m_reenter))
3497 parser->m_processor = epilogProcessor;
3498 else
3499 return epilogProcessor(parser, next, end, nextPtr);
3500 }
3501 break;
3502 case XML_TOK_END_TAG:
3503 if (parser->m_tagLevel == startTagLevel)
3504 return XML_ERROR_ASYNC_ENTITY;
3505 else {
3506 int len;
3507 const char *rawName;
3508 TAG *tag = parser->m_tagStack;
3509 rawName = s + enc->minBytesPerChar * 2;
3510 len = XmlNameLength(enc, rawName);
3511 if (len != tag->rawNameLength
3512 || memcmp(tag->rawName, rawName, len) != 0) {
3513 *eventPP = rawName;
3514 return XML_ERROR_TAG_MISMATCH;
3515 }
3516 parser->m_tagStack = tag->parent;
3517 tag->parent = parser->m_freeTagList;
3518 parser->m_freeTagList = tag;
3519 --parser->m_tagLevel;
3520 if (parser->m_endElementHandler) {
3521 const XML_Char *localPart;
3522 const XML_Char *prefix;
3523 XML_Char *uri;
3524 localPart = tag->name.localPart;
3525 if (parser->m_ns && localPart) {
3526 /* localPart and prefix may have been overwritten in
3527 tag->name.str, since this points to the binding->uri
3528 buffer which gets reused; so we have to add them again
3529 */
3530 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3531 /* don't need to check for space - already done in storeAtts() */
3532 while (*localPart)
3533 *uri++ = *localPart++;
3534 prefix = tag->name.prefix;
3535 if (parser->m_ns_triplets && prefix) {
3536 *uri++ = parser->m_namespaceSeparator;
3537 while (*prefix)
3538 *uri++ = *prefix++;
3539 }
3540 *uri = XML_T('\0');
3541 }
3542 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3543 } else if (parser->m_defaultHandler)
3544 reportDefault(parser, enc, s, next);
3545 while (tag->bindings) {
3546 BINDING *b = tag->bindings;
3547 if (parser->m_endNamespaceDeclHandler)
3548 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3549 b->prefix->name);
3550 tag->bindings = tag->bindings->nextTagBinding;
3551 b->nextTagBinding = parser->m_freeBindingList;
3552 parser->m_freeBindingList = b;
3553 b->prefix->binding = b->prevPrefixBinding;
3554 }
3555 if ((parser->m_tagLevel == 0)
3556 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3557 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3558 || (parser->m_parsingStatus.parsing == XML_PARSING
3559 && parser->m_reenter))
3560 parser->m_processor = epilogProcessor;
3561 else
3562 return epilogProcessor(parser, next, end, nextPtr);
3563 }
3564 }
3565 break;
3566 case XML_TOK_CHAR_REF: {
3567 int n = XmlCharRefNumber(enc, s);
3568 if (n < 0)
3569 return XML_ERROR_BAD_CHAR_REF;
3570 if (parser->m_characterDataHandler) {
3571 XML_Char buf[XML_ENCODE_MAX];
3572 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3573 XmlEncode(n, (ICHAR *)buf));
3574 } else if (parser->m_defaultHandler)
3575 reportDefault(parser, enc, s, next);
3576 } break;
3577 case XML_TOK_XML_DECL:
3578 return XML_ERROR_MISPLACED_XML_PI;
3579 case XML_TOK_DATA_NEWLINE:
3580 if (parser->m_characterDataHandler) {
3581 XML_Char c = 0xA;
3582 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3583 } else if (parser->m_defaultHandler)
3584 reportDefault(parser, enc, s, next);
3585 break;
3586 case XML_TOK_CDATA_SECT_OPEN: {
3587 enum XML_Error result;
3588 if (parser->m_startCdataSectionHandler)
3589 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3590 /* BEGIN disabled code */
3591 /* Suppose you doing a transformation on a document that involves
3592 changing only the character data. You set up a defaultHandler
3593 and a characterDataHandler. The defaultHandler simply copies
3594 characters through. The characterDataHandler does the
3595 transformation and writes the characters out escaping them as
3596 necessary. This case will fail to work if we leave out the
3597 following two lines (because & and < inside CDATA sections will
3598 be incorrectly escaped).
3599
3600 However, now we have a start/endCdataSectionHandler, so it seems
3601 easier to let the user deal with this.
3602 */
3603 else if ((0) && parser->m_characterDataHandler)
3604 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3605 0);
3606 /* END disabled code */
3607 else if (parser->m_defaultHandler)
3608 reportDefault(parser, enc, s, next);
3609 result
3610 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3611 if (result != XML_ERROR_NONE)
3612 return result;
3613 else if (! next) {
3614 parser->m_processor = cdataSectionProcessor;
3615 return result;
3616 }
3617 } break;
3618 case XML_TOK_TRAILING_RSQB:
3619 if (haveMore) {
3620 *nextPtr = s;
3621 return XML_ERROR_NONE;
3622 }
3623 if (parser->m_characterDataHandler) {
3624 if (MUST_CONVERT(enc, s)) {
3625 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3626 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3627 parser->m_characterDataHandler(
3628 parser->m_handlerArg, parser->m_dataBuf,
3629 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3630 } else
3631 parser->m_characterDataHandler(
3632 parser->m_handlerArg, (const XML_Char *)s,
3633 (int)((const XML_Char *)end - (const XML_Char *)s));
3634 } else if (parser->m_defaultHandler)
3635 reportDefault(parser, enc, s, end);
3636 /* We are at the end of the final buffer, should we check for
3637 XML_SUSPENDED, XML_FINISHED?
3638 */
3639 if (startTagLevel == 0) {
3640 *eventPP = end;
3641 return XML_ERROR_NO_ELEMENTS;
3642 }
3643 if (parser->m_tagLevel != startTagLevel) {
3644 *eventPP = end;
3645 return XML_ERROR_ASYNC_ENTITY;
3646 }
3647 *nextPtr = end;
3648 return XML_ERROR_NONE;
3649 case XML_TOK_DATA_CHARS: {
3650 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3651 if (charDataHandler) {
3652 if (MUST_CONVERT(enc, s)) {
3653 for (;;) {
3654 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3655 const enum XML_Convert_Result convert_res = XmlConvert(
3656 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3657 *eventEndPP = s;
3658 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3659 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3660 if ((convert_res == XML_CONVERT_COMPLETED)
3661 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3662 break;
3663 *eventPP = s;
3664 }
3665 } else
3666 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3667 (int)((const XML_Char *)next - (const XML_Char *)s));
3668 } else if (parser->m_defaultHandler)
3669 reportDefault(parser, enc, s, next);
3670 } break;
3671 case XML_TOK_PI:
3672 if (! reportProcessingInstruction(parser, enc, s, next))
3673 return XML_ERROR_NO_MEMORY;
3674 break;
3675 case XML_TOK_COMMENT:
3676 if (! reportComment(parser, enc, s, next))
3677 return XML_ERROR_NO_MEMORY;
3678 break;
3679 default:
3680 /* All of the tokens produced by XmlContentTok() have their own
3681 * explicit cases, so this default is not strictly necessary.
3682 * However it is a useful safety net, so we retain the code and
3683 * simply exclude it from the coverage tests.
3684 *
3685 * LCOV_EXCL_START
3686 */
3687 if (parser->m_defaultHandler)
3688 reportDefault(parser, enc, s, next);
3689 break;
3690 /* LCOV_EXCL_STOP */
3691 }
3692 switch (parser->m_parsingStatus.parsing) {
3693 case XML_SUSPENDED:
3694 *eventPP = next;
3695 *nextPtr = next;
3696 return XML_ERROR_NONE;
3697 case XML_FINISHED:
3698 *eventPP = next;
3699 return XML_ERROR_ABORTED;
3700 case XML_PARSING:
3701 if (parser->m_reenter) {
3702 *nextPtr = next;
3703 return XML_ERROR_NONE;
3704 }
3705 /* Fall through */
3706 default:;
3707 *eventPP = s = next;
3708 }
3709 }
3710 /* not reached */
3711 }
3712
3713 /* This function does not call free() on the allocated memory, merely
3714 * moving it to the parser's m_freeBindingList where it can be freed or
3715 * reused as appropriate.
3716 */
3717 static void
3718 freeBindings(XML_Parser parser, BINDING *bindings) {
3719 while (bindings) {
3720 BINDING *b = bindings;
3721
3722 /* m_startNamespaceDeclHandler will have been called for this
3723 * binding in addBindings(), so call the end handler now.
3724 */
3725 if (parser->m_endNamespaceDeclHandler)
3726 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3727
3728 bindings = bindings->nextTagBinding;
3729 b->nextTagBinding = parser->m_freeBindingList;
3730 parser->m_freeBindingList = b;
3731 b->prefix->binding = b->prevPrefixBinding;
3732 }
3733 }
3734
3735 /* Precondition: all arguments must be non-NULL;
3736 Purpose:
3737 - normalize attributes
3738 - check attributes for well-formedness
3739 - generate namespace aware attribute names (URI, prefix)
3740 - build list of attributes for startElementHandler
3741 - default attributes
3742 - process namespace declarations (check and report them)
3743 - generate namespace aware element name (URI, prefix)
3744 */
3745 static enum XML_Error
3746 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3747 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3748 enum XML_Account account) {
3749 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3750 ELEMENT_TYPE *elementType;
3751 int nDefaultAtts;
3752 const XML_Char **appAtts; /* the attribute list for the application */
3753 int attIndex = 0;
3754 int prefixLen;
3755 int i;
3756 int n;
3757 XML_Char *uri;
3758 int nPrefixes = 0;
3759 BINDING *binding;
3760 const XML_Char *localPart;
3761
3762 /* lookup the element type name */
3763 elementType
3764 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3765 if (! elementType) {
3766 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3767 if (! name)
3768 return XML_ERROR_NO_MEMORY;
3769 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3770 sizeof(ELEMENT_TYPE));
3771 if (! elementType)
3772 return XML_ERROR_NO_MEMORY;
3773 if (! elementType->defaultAttsNames.parser)
3774 hashTableInit(&(elementType->defaultAttsNames), parser);
3775 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3776 return XML_ERROR_NO_MEMORY;
3777 }
3778 nDefaultAtts = elementType->nDefaultAtts;
3779
3780 /* get the attributes from the tokenizer */
3781 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3782
3783 /* Detect and prevent integer overflow */
3784 if (n > INT_MAX - nDefaultAtts) {
3785 return XML_ERROR_NO_MEMORY;
3786 }
3787
3788 if (n + nDefaultAtts > parser->m_attsSize) {
3789 int oldAttsSize = parser->m_attsSize;
3790 ATTRIBUTE *temp;
3791 #ifdef XML_ATTR_INFO
3792 XML_AttrInfo *temp2;
3793 #endif
3794
3795 /* Detect and prevent integer overflow */
3796 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3797 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3798 return XML_ERROR_NO_MEMORY;
3799 }
3800
3801 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3802
3803 /* Detect and prevent integer overflow.
3804 * The preprocessor guard addresses the "always false" warning
3805 * from -Wtype-limits on platforms where
3806 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3807 #if UINT_MAX >= SIZE_MAX
3808 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) {
3809 parser->m_attsSize = oldAttsSize;
3810 return XML_ERROR_NO_MEMORY;
3811 }
3812 #endif
3813
3814 temp = REALLOC(parser, parser->m_atts,
3815 parser->m_attsSize * sizeof(ATTRIBUTE));
3816 if (temp == NULL) {
3817 parser->m_attsSize = oldAttsSize;
3818 return XML_ERROR_NO_MEMORY;
3819 }
3820 parser->m_atts = temp;
3821 #ifdef XML_ATTR_INFO
3822 /* Detect and prevent integer overflow.
3823 * The preprocessor guard addresses the "always false" warning
3824 * from -Wtype-limits on platforms where
3825 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3826 # if UINT_MAX >= SIZE_MAX
3827 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) {
3828 parser->m_attsSize = oldAttsSize;
3829 return XML_ERROR_NO_MEMORY;
3830 }
3831 # endif
3832
3833 temp2 = REALLOC(parser, parser->m_attInfo,
3834 parser->m_attsSize * sizeof(XML_AttrInfo));
3835 if (temp2 == NULL) {
3836 parser->m_attsSize = oldAttsSize;
3837 return XML_ERROR_NO_MEMORY;
3838 }
3839 parser->m_attInfo = temp2;
3840 #endif
3841 if (n > oldAttsSize)
3842 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3843 }
3844
3845 appAtts = (const XML_Char **)parser->m_atts;
3846 for (i = 0; i < n; i++) {
3847 ATTRIBUTE *currAtt = &parser->m_atts[i];
3848 #ifdef XML_ATTR_INFO
3849 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3850 #endif
3851 /* add the name and value to the attribute list */
3852 ATTRIBUTE_ID *attId
3853 = getAttributeId(parser, enc, currAtt->name,
3854 currAtt->name + XmlNameLength(enc, currAtt->name));
3855 if (! attId)
3856 return XML_ERROR_NO_MEMORY;
3857 #ifdef XML_ATTR_INFO
3858 currAttInfo->nameStart
3859 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3860 currAttInfo->nameEnd
3861 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3862 currAttInfo->valueStart = parser->m_parseEndByteIndex
3863 - (parser->m_parseEndPtr - currAtt->valuePtr);
3864 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3865 - (parser->m_parseEndPtr - currAtt->valueEnd);
3866 #endif
3867 /* Detect duplicate attributes by their QNames. This does not work when
3868 namespace processing is turned on and different prefixes for the same
3869 namespace are used. For this case we have a check further down.
3870 */
3871 if ((attId->name)[-1]) {
3872 if (enc == parser->m_encoding)
3873 parser->m_eventPtr = parser->m_atts[i].name;
3874 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3875 }
3876 (attId->name)[-1] = 1;
3877 appAtts[attIndex++] = attId->name;
3878 if (! parser->m_atts[i].normalized) {
3879 enum XML_Error result;
3880 XML_Bool isCdata = XML_TRUE;
3881
3882 /* figure out whether declared as other than CDATA */
3883 if (attId->maybeTokenized) {
3884 int j;
3885 for (j = 0; j < nDefaultAtts; j++) {
3886 if (attId == elementType->defaultAtts[j].id) {
3887 isCdata = elementType->defaultAtts[j].isCdata;
3888 break;
3889 }
3890 }
3891 }
3892
3893 /* normalize the attribute value */
3894 result = storeAttributeValue(
3895 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3896 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3897 if (result)
3898 return result;
3899 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3900 poolFinish(&parser->m_tempPool);
3901 } else {
3902 /* the value did not need normalizing */
3903 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3904 parser->m_atts[i].valuePtr,
3905 parser->m_atts[i].valueEnd);
3906 if (appAtts[attIndex] == 0)
3907 return XML_ERROR_NO_MEMORY;
3908 poolFinish(&parser->m_tempPool);
3909 }
3910 /* handle prefixed attribute names */
3911 if (attId->prefix) {
3912 if (attId->xmlns) {
3913 /* deal with namespace declarations here */
3914 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3915 appAtts[attIndex], bindingsPtr);
3916 if (result)
3917 return result;
3918 --attIndex;
3919 } else {
3920 /* deal with other prefixed names later */
3921 attIndex++;
3922 nPrefixes++;
3923 (attId->name)[-1] = 2;
3924 }
3925 } else
3926 attIndex++;
3927 }
3928
3929 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3930 parser->m_nSpecifiedAtts = attIndex;
3931 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3932 for (i = 0; i < attIndex; i += 2)
3933 if (appAtts[i] == elementType->idAtt->name) {
3934 parser->m_idAttIndex = i;
3935 break;
3936 }
3937 } else
3938 parser->m_idAttIndex = -1;
3939
3940 /* do attribute defaulting */
3941 for (i = 0; i < nDefaultAtts; i++) {
3942 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3943 if (! (da->id->name)[-1] && da->value) {
3944 if (da->id->prefix) {
3945 if (da->id->xmlns) {
3946 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3947 da->value, bindingsPtr);
3948 if (result)
3949 return result;
3950 } else {
3951 (da->id->name)[-1] = 2;
3952 nPrefixes++;
3953 appAtts[attIndex++] = da->id->name;
3954 appAtts[attIndex++] = da->value;
3955 }
3956 } else {
3957 (da->id->name)[-1] = 1;
3958 appAtts[attIndex++] = da->id->name;
3959 appAtts[attIndex++] = da->value;
3960 }
3961 }
3962 }
3963 appAtts[attIndex] = 0;
3964
3965 /* expand prefixed attribute names, check for duplicates,
3966 and clear flags that say whether attributes were specified */
3967 i = 0;
3968 if (nPrefixes) {
3969 unsigned int j; /* hash table index */
3970 unsigned long version = parser->m_nsAttsVersion;
3971
3972 /* Detect and prevent invalid shift */
3973 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3974 return XML_ERROR_NO_MEMORY;
3975 }
3976
3977 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3978 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3979 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3980 if ((nPrefixes << 1)
3981 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3982 NS_ATT *temp;
3983 /* hash table size must also be a power of 2 and >= 8 */
3984 while (nPrefixes >> parser->m_nsAttsPower++)
3985 ;
3986 if (parser->m_nsAttsPower < 3)
3987 parser->m_nsAttsPower = 3;
3988
3989 /* Detect and prevent invalid shift */
3990 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3991 /* Restore actual size of memory in m_nsAtts */
3992 parser->m_nsAttsPower = oldNsAttsPower;
3993 return XML_ERROR_NO_MEMORY;
3994 }
3995
3996 nsAttsSize = 1u << parser->m_nsAttsPower;
3997
3998 /* Detect and prevent integer overflow.
3999 * The preprocessor guard addresses the "always false" warning
4000 * from -Wtype-limits on platforms where
4001 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4002 #if UINT_MAX >= SIZE_MAX
4003 if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) {
4004 /* Restore actual size of memory in m_nsAtts */
4005 parser->m_nsAttsPower = oldNsAttsPower;
4006 return XML_ERROR_NO_MEMORY;
4007 }
4008 #endif
4009
4010 temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
4011 if (! temp) {
4012 /* Restore actual size of memory in m_nsAtts */
4013 parser->m_nsAttsPower = oldNsAttsPower;
4014 return XML_ERROR_NO_MEMORY;
4015 }
4016 parser->m_nsAtts = temp;
4017 version = 0; /* force re-initialization of m_nsAtts hash table */
4018 }
4019 /* using a version flag saves us from initializing m_nsAtts every time */
4020 if (! version) { /* initialize version flags when version wraps around */
4021 version = INIT_ATTS_VERSION;
4022 for (j = nsAttsSize; j != 0;)
4023 parser->m_nsAtts[--j].version = version;
4024 }
4025 parser->m_nsAttsVersion = --version;
4026
4027 /* expand prefixed names and check for duplicates */
4028 for (; i < attIndex; i += 2) {
4029 const XML_Char *s = appAtts[i];
4030 if (s[-1] == 2) { /* prefixed */
4031 ATTRIBUTE_ID *id;
4032 const BINDING *b;
4033 unsigned long uriHash;
4034 struct siphash sip_state;
4035 struct sipkey sip_key;
4036
4037 copy_salt_to_sipkey(parser, &sip_key);
4038 sip24_init(&sip_state, &sip_key);
4039
4040 ((XML_Char *)s)[-1] = 0; /* clear flag */
4041 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
4042 if (! id || ! id->prefix) {
4043 /* This code is walking through the appAtts array, dealing
4044 * with (in this case) a prefixed attribute name. To be in
4045 * the array, the attribute must have already been bound, so
4046 * has to have passed through the hash table lookup once
4047 * already. That implies that an entry for it already
4048 * exists, so the lookup above will return a pointer to
4049 * already allocated memory. There is no opportunaity for
4050 * the allocator to fail, so the condition above cannot be
4051 * fulfilled.
4052 *
4053 * Since it is difficult to be certain that the above
4054 * analysis is complete, we retain the test and merely
4055 * remove the code from coverage tests.
4056 */
4057 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4058 }
4059 b = id->prefix->binding;
4060 if (! b)
4061 return XML_ERROR_UNBOUND_PREFIX;
4062
4063 for (j = 0; j < (unsigned int)b->uriLen; j++) {
4064 const XML_Char c = b->uri[j];
4065 if (! poolAppendChar(&parser->m_tempPool, c))
4066 return XML_ERROR_NO_MEMORY;
4067 }
4068
4069 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
4070
4071 while (*s++ != XML_T(ASCII_COLON))
4072 ;
4073
4074 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
4075
4076 do { /* copies null terminator */
4077 if (! poolAppendChar(&parser->m_tempPool, *s))
4078 return XML_ERROR_NO_MEMORY;
4079 } while (*s++);
4080
4081 uriHash = (unsigned long)sip24_final(&sip_state);
4082
4083 { /* Check hash table for duplicate of expanded name (uriName).
4084 Derived from code in lookup(parser, HASH_TABLE *table, ...).
4085 */
4086 unsigned char step = 0;
4087 unsigned long mask = nsAttsSize - 1;
4088 j = uriHash & mask; /* index into hash table */
4089 while (parser->m_nsAtts[j].version == version) {
4090 /* for speed we compare stored hash values first */
4091 if (uriHash == parser->m_nsAtts[j].hash) {
4092 const XML_Char *s1 = poolStart(&parser->m_tempPool);
4093 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
4094 /* s1 is null terminated, but not s2 */
4095 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
4096 ;
4097 if (*s1 == 0)
4098 return XML_ERROR_DUPLICATE_ATTRIBUTE;
4099 }
4100 if (! step)
4101 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
4102 j < step ? (j += nsAttsSize - step) : (j -= step);
4103 }
4104 }
4105
4106 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
4107 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
4108 s = b->prefix->name;
4109 do {
4110 if (! poolAppendChar(&parser->m_tempPool, *s))
4111 return XML_ERROR_NO_MEMORY;
4112 } while (*s++);
4113 }
4114
4115 /* store expanded name in attribute list */
4116 s = poolStart(&parser->m_tempPool);
4117 poolFinish(&parser->m_tempPool);
4118 appAtts[i] = s;
4119
4120 /* fill empty slot with new version, uriName and hash value */
4121 parser->m_nsAtts[j].version = version;
4122 parser->m_nsAtts[j].hash = uriHash;
4123 parser->m_nsAtts[j].uriName = s;
4124
4125 if (! --nPrefixes) {
4126 i += 2;
4127 break;
4128 }
4129 } else /* not prefixed */
4130 ((XML_Char *)s)[-1] = 0; /* clear flag */
4131 }
4132 }
4133 /* clear flags for the remaining attributes */
4134 for (; i < attIndex; i += 2)
4135 ((XML_Char *)(appAtts[i]))[-1] = 0;
4136 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
4137 binding->attId->name[-1] = 0;
4138
4139 if (! parser->m_ns)
4140 return XML_ERROR_NONE;
4141
4142 /* expand the element type name */
4143 if (elementType->prefix) {
4144 binding = elementType->prefix->binding;
4145 if (! binding)
4146 return XML_ERROR_UNBOUND_PREFIX;
4147 localPart = tagNamePtr->str;
4148 while (*localPart++ != XML_T(ASCII_COLON))
4149 ;
4150 } else if (dtd->defaultPrefix.binding) {
4151 binding = dtd->defaultPrefix.binding;
4152 localPart = tagNamePtr->str;
4153 } else
4154 return XML_ERROR_NONE;
4155 prefixLen = 0;
4156 if (parser->m_ns_triplets && binding->prefix->name) {
4157 while (binding->prefix->name[prefixLen++])
4158 ; /* prefixLen includes null terminator */
4159 }
4160 tagNamePtr->localPart = localPart;
4161 tagNamePtr->uriLen = binding->uriLen;
4162 tagNamePtr->prefix = binding->prefix->name;
4163 tagNamePtr->prefixLen = prefixLen;
4164 for (i = 0; localPart[i++];)
4165 ; /* i includes null terminator */
4166
4167 /* Detect and prevent integer overflow */
4168 if (binding->uriLen > INT_MAX - prefixLen
4169 || i > INT_MAX - (binding->uriLen + prefixLen)) {
4170 return XML_ERROR_NO_MEMORY;
4171 }
4172
4173 n = i + binding->uriLen + prefixLen;
4174 if (n > binding->uriAlloc) {
4175 TAG *p;
4176
4177 /* Detect and prevent integer overflow */
4178 if (n > INT_MAX - EXPAND_SPARE) {
4179 return XML_ERROR_NO_MEMORY;
4180 }
4181 /* Detect and prevent integer overflow.
4182 * The preprocessor guard addresses the "always false" warning
4183 * from -Wtype-limits on platforms where
4184 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4185 #if UINT_MAX >= SIZE_MAX
4186 if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4187 return XML_ERROR_NO_MEMORY;
4188 }
4189 #endif
4190
4191 uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
4192 if (! uri)
4193 return XML_ERROR_NO_MEMORY;
4194 binding->uriAlloc = n + EXPAND_SPARE;
4195 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
4196 for (p = parser->m_tagStack; p; p = p->parent)
4197 if (p->name.str == binding->uri)
4198 p->name.str = uri;
4199 FREE(parser, binding->uri);
4200 binding->uri = uri;
4201 }
4202 /* if m_namespaceSeparator != '\0' then uri includes it already */
4203 uri = binding->uri + binding->uriLen;
4204 memcpy(uri, localPart, i * sizeof(XML_Char));
4205 /* we always have a namespace separator between localPart and prefix */
4206 if (prefixLen) {
4207 uri += i - 1;
4208 *uri = parser->m_namespaceSeparator; /* replace null terminator */
4209 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
4210 }
4211 tagNamePtr->str = binding->uri;
4212 return XML_ERROR_NONE;
4213 }
4214
4215 static XML_Bool
4216 is_rfc3986_uri_char(XML_Char candidate) {
4217 // For the RFC 3986 ANBF grammar see
4218 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
4219
4220 switch (candidate) {
4221 // From rule "ALPHA" (uppercase half)
4222 case 'A':
4223 case 'B':
4224 case 'C':
4225 case 'D':
4226 case 'E':
4227 case 'F':
4228 case 'G':
4229 case 'H':
4230 case 'I':
4231 case 'J':
4232 case 'K':
4233 case 'L':
4234 case 'M':
4235 case 'N':
4236 case 'O':
4237 case 'P':
4238 case 'Q':
4239 case 'R':
4240 case 'S':
4241 case 'T':
4242 case 'U':
4243 case 'V':
4244 case 'W':
4245 case 'X':
4246 case 'Y':
4247 case 'Z':
4248
4249 // From rule "ALPHA" (lowercase half)
4250 case 'a':
4251 case 'b':
4252 case 'c':
4253 case 'd':
4254 case 'e':
4255 case 'f':
4256 case 'g':
4257 case 'h':
4258 case 'i':
4259 case 'j':
4260 case 'k':
4261 case 'l':
4262 case 'm':
4263 case 'n':
4264 case 'o':
4265 case 'p':
4266 case 'q':
4267 case 'r':
4268 case 's':
4269 case 't':
4270 case 'u':
4271 case 'v':
4272 case 'w':
4273 case 'x':
4274 case 'y':
4275 case 'z':
4276
4277 // From rule "DIGIT"
4278 case '0':
4279 case '1':
4280 case '2':
4281 case '3':
4282 case '4':
4283 case '5':
4284 case '6':
4285 case '7':
4286 case '8':
4287 case '9':
4288
4289 // From rule "pct-encoded"
4290 case '%':
4291
4292 // From rule "unreserved"
4293 case '-':
4294 case '.':
4295 case '_':
4296 case '~':
4297
4298 // From rule "gen-delims"
4299 case ':':
4300 case '/':
4301 case '?':
4302 case '#':
4303 case '[':
4304 case ']':
4305 case '@':
4306
4307 // From rule "sub-delims"
4308 case '!':
4309 case '$':
4310 case '&':
4311 case '\'':
4312 case '(':
4313 case ')':
4314 case '*':
4315 case '+':
4316 case ',':
4317 case ';':
4318 case '=':
4319 return XML_TRUE;
4320
4321 default:
4322 return XML_FALSE;
4323 }
4324 }
4325
4326 /* addBinding() overwrites the value of prefix->binding without checking.
4327 Therefore one must keep track of the old value outside of addBinding().
4328 */
4329 static enum XML_Error
4330 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4331 const XML_Char *uri, BINDING **bindingsPtr) {
4332 // "http://www.w3.org/XML/1998/namespace"
4333 static const XML_Char xmlNamespace[]
4334 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
4335 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
4336 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
4337 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
4338 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
4339 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
4340 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
4341 ASCII_e, '\0'};
4342 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4343 // "http://www.w3.org/2000/xmlns/"
4344 static const XML_Char xmlnsNamespace[]
4345 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
4346 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4347 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
4348 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
4349 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
4350 static const int xmlnsLen
4351 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4352
4353 XML_Bool mustBeXML = XML_FALSE;
4354 XML_Bool isXML = XML_TRUE;
4355 XML_Bool isXMLNS = XML_TRUE;
4356
4357 BINDING *b;
4358 int len;
4359
4360 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4361 if (*uri == XML_T('\0') && prefix->name)
4362 return XML_ERROR_UNDECLARING_PREFIX;
4363
4364 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4365 && prefix->name[1] == XML_T(ASCII_m)
4366 && prefix->name[2] == XML_T(ASCII_l)) {
4367 /* Not allowed to bind xmlns */
4368 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4369 && prefix->name[5] == XML_T('\0'))
4370 return XML_ERROR_RESERVED_PREFIX_XMLNS;
4371
4372 if (prefix->name[3] == XML_T('\0'))
4373 mustBeXML = XML_TRUE;
4374 }
4375
4376 for (len = 0; uri[len]; len++) {
4377 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4378 isXML = XML_FALSE;
4379
4380 if (! mustBeXML && isXMLNS
4381 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4382 isXMLNS = XML_FALSE;
4383
4384 // NOTE: While Expat does not validate namespace URIs against RFC 3986
4385 // today (and is not REQUIRED to do so with regard to the XML 1.0
4386 // namespaces specification) we have to at least make sure, that
4387 // the application on top of Expat (that is likely splitting expanded
4388 // element names ("qualified names") of form
4389 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4390 // in its element handler code) cannot be confused by an attacker
4391 // putting additional namespace separator characters into namespace
4392 // declarations. That would be ambiguous and not to be expected.
4393 //
4394 // While the HTML API docs of function XML_ParserCreateNS have been
4395 // advising against use of a namespace separator character that can
4396 // appear in a URI for >20 years now, some widespread applications
4397 // are using URI characters (':' (colon) in particular) for a
4398 // namespace separator, in practice. To keep these applications
4399 // functional, we only reject namespaces URIs containing the
4400 // application-chosen namespace separator if the chosen separator
4401 // is a non-URI character with regard to RFC 3986.
4402 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4403 && ! is_rfc3986_uri_char(uri[len])) {
4404 return XML_ERROR_SYNTAX;
4405 }
4406 }
4407 isXML = isXML && len == xmlLen;
4408 isXMLNS = isXMLNS && len == xmlnsLen;
4409
4410 if (mustBeXML != isXML)
4411 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4412 : XML_ERROR_RESERVED_NAMESPACE_URI;
4413
4414 if (isXMLNS)
4415 return XML_ERROR_RESERVED_NAMESPACE_URI;
4416
4417 if (parser->m_namespaceSeparator)
4418 len++;
4419 if (parser->m_freeBindingList) {
4420 b = parser->m_freeBindingList;
4421 if (len > b->uriAlloc) {
4422 /* Detect and prevent integer overflow */
4423 if (len > INT_MAX - EXPAND_SPARE) {
4424 return XML_ERROR_NO_MEMORY;
4425 }
4426
4427 /* Detect and prevent integer overflow.
4428 * The preprocessor guard addresses the "always false" warning
4429 * from -Wtype-limits on platforms where
4430 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4431 #if UINT_MAX >= SIZE_MAX
4432 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4433 return XML_ERROR_NO_MEMORY;
4434 }
4435 #endif
4436
4437 XML_Char *temp
4438 = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4439 if (temp == NULL)
4440 return XML_ERROR_NO_MEMORY;
4441 b->uri = temp;
4442 b->uriAlloc = len + EXPAND_SPARE;
4443 }
4444 parser->m_freeBindingList = b->nextTagBinding;
4445 } else {
4446 b = MALLOC(parser, sizeof(BINDING));
4447 if (! b)
4448 return XML_ERROR_NO_MEMORY;
4449
4450 /* Detect and prevent integer overflow */
4451 if (len > INT_MAX - EXPAND_SPARE) {
4452 return XML_ERROR_NO_MEMORY;
4453 }
4454 /* Detect and prevent integer overflow.
4455 * The preprocessor guard addresses the "always false" warning
4456 * from -Wtype-limits on platforms where
4457 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4458 #if UINT_MAX >= SIZE_MAX
4459 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4460 return XML_ERROR_NO_MEMORY;
4461 }
4462 #endif
4463
4464 b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4465 if (! b->uri) {
4466 FREE(parser, b);
4467 return XML_ERROR_NO_MEMORY;
4468 }
4469 b->uriAlloc = len + EXPAND_SPARE;
4470 }
4471 b->uriLen = len;
4472 memcpy(b->uri, uri, len * sizeof(XML_Char));
4473 if (parser->m_namespaceSeparator)
4474 b->uri[len - 1] = parser->m_namespaceSeparator;
4475 b->prefix = prefix;
4476 b->attId = attId;
4477 b->prevPrefixBinding = prefix->binding;
4478 /* NULL binding when default namespace undeclared */
4479 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4480 prefix->binding = NULL;
4481 else
4482 prefix->binding = b;
4483 b->nextTagBinding = *bindingsPtr;
4484 *bindingsPtr = b;
4485 /* if attId == NULL then we are not starting a namespace scope */
4486 if (attId && parser->m_startNamespaceDeclHandler)
4487 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4488 prefix->binding ? uri : 0);
4489 return XML_ERROR_NONE;
4490 }
4491
4492 /* The idea here is to avoid using stack for each CDATA section when
4493 the whole file is parsed with one call.
4494 */
4495 static enum XML_Error PTRCALL
4496 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4497 const char **endPtr) {
4498 enum XML_Error result = doCdataSection(
4499 parser, parser->m_encoding, &start, end, endPtr,
4500 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4501 if (result != XML_ERROR_NONE)
4502 return result;
4503 if (start) {
4504 if (parser->m_parentParser) { /* we are parsing an external entity */
4505 parser->m_processor = externalEntityContentProcessor;
4506 return externalEntityContentProcessor(parser, start, end, endPtr);
4507 } else {
4508 parser->m_processor = contentProcessor;
4509 return contentProcessor(parser, start, end, endPtr);
4510 }
4511 }
4512 return result;
4513 }
4514
4515 /* startPtr gets set to non-null if the section is closed, and to null if
4516 the section is not yet closed.
4517 */
4518 static enum XML_Error
4519 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4520 const char *end, const char **nextPtr, XML_Bool haveMore,
4521 enum XML_Account account) {
4522 const char *s = *startPtr;
4523 const char **eventPP;
4524 const char **eventEndPP;
4525 if (enc == parser->m_encoding) {
4526 eventPP = &parser->m_eventPtr;
4527 *eventPP = s;
4528 eventEndPP = &parser->m_eventEndPtr;
4529 } else {
4530 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4531 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4532 }
4533 *eventPP = s;
4534 *startPtr = NULL;
4535
4536 for (;;) {
4537 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4538 int tok = XmlCdataSectionTok(enc, s, end, &next);
4539 #if XML_GE == 1
4540 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4541 accountingOnAbort(parser);
4542 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4543 }
4544 #else
4545 UNUSED_P(account);
4546 #endif
4547 *eventEndPP = next;
4548 switch (tok) {
4549 case XML_TOK_CDATA_SECT_CLOSE:
4550 if (parser->m_endCdataSectionHandler)
4551 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4552 /* BEGIN disabled code */
4553 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4554 else if ((0) && parser->m_characterDataHandler)
4555 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4556 0);
4557 /* END disabled code */
4558 else if (parser->m_defaultHandler)
4559 reportDefault(parser, enc, s, next);
4560 *startPtr = next;
4561 *nextPtr = next;
4562 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4563 return XML_ERROR_ABORTED;
4564 else
4565 return XML_ERROR_NONE;
4566 case XML_TOK_DATA_NEWLINE:
4567 if (parser->m_characterDataHandler) {
4568 XML_Char c = 0xA;
4569 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4570 } else if (parser->m_defaultHandler)
4571 reportDefault(parser, enc, s, next);
4572 break;
4573 case XML_TOK_DATA_CHARS: {
4574 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4575 if (charDataHandler) {
4576 if (MUST_CONVERT(enc, s)) {
4577 for (;;) {
4578 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4579 const enum XML_Convert_Result convert_res = XmlConvert(
4580 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4581 *eventEndPP = next;
4582 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4583 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4584 if ((convert_res == XML_CONVERT_COMPLETED)
4585 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4586 break;
4587 *eventPP = s;
4588 }
4589 } else
4590 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4591 (int)((const XML_Char *)next - (const XML_Char *)s));
4592 } else if (parser->m_defaultHandler)
4593 reportDefault(parser, enc, s, next);
4594 } break;
4595 case XML_TOK_INVALID:
4596 *eventPP = next;
4597 return XML_ERROR_INVALID_TOKEN;
4598 case XML_TOK_PARTIAL_CHAR:
4599 if (haveMore) {
4600 *nextPtr = s;
4601 return XML_ERROR_NONE;
4602 }
4603 return XML_ERROR_PARTIAL_CHAR;
4604 case XML_TOK_PARTIAL:
4605 case XML_TOK_NONE:
4606 if (haveMore) {
4607 *nextPtr = s;
4608 return XML_ERROR_NONE;
4609 }
4610 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4611 default:
4612 /* Every token returned by XmlCdataSectionTok() has its own
4613 * explicit case, so this default case will never be executed.
4614 * We retain it as a safety net and exclude it from the coverage
4615 * statistics.
4616 *
4617 * LCOV_EXCL_START
4618 */
4619 *eventPP = next;
4620 return XML_ERROR_UNEXPECTED_STATE;
4621 /* LCOV_EXCL_STOP */
4622 }
4623
4624 switch (parser->m_parsingStatus.parsing) {
4625 case XML_SUSPENDED:
4626 *eventPP = next;
4627 *nextPtr = next;
4628 return XML_ERROR_NONE;
4629 case XML_FINISHED:
4630 *eventPP = next;
4631 return XML_ERROR_ABORTED;
4632 case XML_PARSING:
4633 if (parser->m_reenter) {
4634 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4635 }
4636 /* Fall through */
4637 default:;
4638 *eventPP = s = next;
4639 }
4640 }
4641 /* not reached */
4642 }
4643
4644 #ifdef XML_DTD
4645
4646 /* The idea here is to avoid using stack for each IGNORE section when
4647 the whole file is parsed with one call.
4648 */
4649 static enum XML_Error PTRCALL
4650 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4651 const char **endPtr) {
4652 enum XML_Error result
4653 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4654 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4655 if (result != XML_ERROR_NONE)
4656 return result;
4657 if (start) {
4658 parser->m_processor = prologProcessor;
4659 return prologProcessor(parser, start, end, endPtr);
4660 }
4661 return result;
4662 }
4663
4664 /* startPtr gets set to non-null is the section is closed, and to null
4665 if the section is not yet closed.
4666 */
4667 static enum XML_Error
4668 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4669 const char *end, const char **nextPtr, XML_Bool haveMore) {
4670 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4671 int tok;
4672 const char *s = *startPtr;
4673 const char **eventPP;
4674 const char **eventEndPP;
4675 if (enc == parser->m_encoding) {
4676 eventPP = &parser->m_eventPtr;
4677 *eventPP = s;
4678 eventEndPP = &parser->m_eventEndPtr;
4679 } else {
4680 /* It's not entirely clear, but it seems the following two lines
4681 * of code cannot be executed. The only occasions on which 'enc'
4682 * is not 'encoding' are when this function is called
4683 * from the internal entity processing, and IGNORE sections are an
4684 * error in internal entities.
4685 *
4686 * Since it really isn't clear that this is true, we keep the code
4687 * and just remove it from our coverage tests.
4688 *
4689 * LCOV_EXCL_START
4690 */
4691 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4692 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4693 /* LCOV_EXCL_STOP */
4694 }
4695 *eventPP = s;
4696 *startPtr = NULL;
4697 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4698 # if XML_GE == 1
4699 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4700 XML_ACCOUNT_DIRECT)) {
4701 accountingOnAbort(parser);
4702 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4703 }
4704 # endif
4705 *eventEndPP = next;
4706 switch (tok) {
4707 case XML_TOK_IGNORE_SECT:
4708 if (parser->m_defaultHandler)
4709 reportDefault(parser, enc, s, next);
4710 *startPtr = next;
4711 *nextPtr = next;
4712 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4713 return XML_ERROR_ABORTED;
4714 else
4715 return XML_ERROR_NONE;
4716 case XML_TOK_INVALID:
4717 *eventPP = next;
4718 return XML_ERROR_INVALID_TOKEN;
4719 case XML_TOK_PARTIAL_CHAR:
4720 if (haveMore) {
4721 *nextPtr = s;
4722 return XML_ERROR_NONE;
4723 }
4724 return XML_ERROR_PARTIAL_CHAR;
4725 case XML_TOK_PARTIAL:
4726 case XML_TOK_NONE:
4727 if (haveMore) {
4728 *nextPtr = s;
4729 return XML_ERROR_NONE;
4730 }
4731 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4732 default:
4733 /* All of the tokens that XmlIgnoreSectionTok() returns have
4734 * explicit cases to handle them, so this default case is never
4735 * executed. We keep it as a safety net anyway, and remove it
4736 * from our test coverage statistics.
4737 *
4738 * LCOV_EXCL_START
4739 */
4740 *eventPP = next;
4741 return XML_ERROR_UNEXPECTED_STATE;
4742 /* LCOV_EXCL_STOP */
4743 }
4744 /* not reached */
4745 }
4746
4747 #endif /* XML_DTD */
4748
4749 static enum XML_Error
4750 initializeEncoding(XML_Parser parser) {
4751 const char *s;
4752 #ifdef XML_UNICODE
4753 char encodingBuf[128];
4754 /* See comments about `protocolEncodingName` in parserInit() */
4755 if (! parser->m_protocolEncodingName)
4756 s = NULL;
4757 else {
4758 int i;
4759 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4760 if (i == sizeof(encodingBuf) - 1
4761 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4762 encodingBuf[0] = '\0';
4763 break;
4764 }
4765 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4766 }
4767 encodingBuf[i] = '\0';
4768 s = encodingBuf;
4769 }
4770 #else
4771 s = parser->m_protocolEncodingName;
4772 #endif
4773 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4774 &parser->m_initEncoding, &parser->m_encoding, s))
4775 return XML_ERROR_NONE;
4776 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4777 }
4778
4779 static enum XML_Error
4780 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4781 const char *next) {
4782 const char *encodingName = NULL;
4783 const XML_Char *storedEncName = NULL;
4784 const ENCODING *newEncoding = NULL;
4785 const char *version = NULL;
4786 const char *versionend = NULL;
4787 const XML_Char *storedversion = NULL;
4788 int standalone = -1;
4789
4790 #if XML_GE == 1
4791 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4792 XML_ACCOUNT_DIRECT)) {
4793 accountingOnAbort(parser);
4794 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4795 }
4796 #endif
4797
4798 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4799 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4800 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4801 if (isGeneralTextEntity)
4802 return XML_ERROR_TEXT_DECL;
4803 else
4804 return XML_ERROR_XML_DECL;
4805 }
4806 if (! isGeneralTextEntity && standalone == 1) {
4807 parser->m_dtd->standalone = XML_TRUE;
4808 #ifdef XML_DTD
4809 if (parser->m_paramEntityParsing
4810 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4811 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4812 #endif /* XML_DTD */
4813 }
4814 if (parser->m_xmlDeclHandler) {
4815 if (encodingName != NULL) {
4816 storedEncName = poolStoreString(
4817 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4818 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4819 if (! storedEncName)
4820 return XML_ERROR_NO_MEMORY;
4821 poolFinish(&parser->m_temp2Pool);
4822 }
4823 if (version) {
4824 storedversion
4825 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4826 versionend - parser->m_encoding->minBytesPerChar);
4827 if (! storedversion)
4828 return XML_ERROR_NO_MEMORY;
4829 }
4830 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4831 standalone);
4832 } else if (parser->m_defaultHandler)
4833 reportDefault(parser, parser->m_encoding, s, next);
4834 if (parser->m_protocolEncodingName == NULL) {
4835 if (newEncoding) {
4836 /* Check that the specified encoding does not conflict with what
4837 * the parser has already deduced. Do we have the same number
4838 * of bytes in the smallest representation of a character? If
4839 * this is UTF-16, is it the same endianness?
4840 */
4841 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4842 || (newEncoding->minBytesPerChar == 2
4843 && newEncoding != parser->m_encoding)) {
4844 parser->m_eventPtr = encodingName;
4845 return XML_ERROR_INCORRECT_ENCODING;
4846 }
4847 parser->m_encoding = newEncoding;
4848 } else if (encodingName) {
4849 enum XML_Error result;
4850 if (! storedEncName) {
4851 storedEncName = poolStoreString(
4852 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4853 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4854 if (! storedEncName)
4855 return XML_ERROR_NO_MEMORY;
4856 }
4857 result = handleUnknownEncoding(parser, storedEncName);
4858 poolClear(&parser->m_temp2Pool);
4859 if (result == XML_ERROR_UNKNOWN_ENCODING)
4860 parser->m_eventPtr = encodingName;
4861 return result;
4862 }
4863 }
4864
4865 if (storedEncName || storedversion)
4866 poolClear(&parser->m_temp2Pool);
4867
4868 return XML_ERROR_NONE;
4869 }
4870
4871 static enum XML_Error
4872 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4873 if (parser->m_unknownEncodingHandler) {
4874 XML_Encoding info;
4875 int i;
4876 for (i = 0; i < 256; i++)
4877 info.map[i] = -1;
4878 info.convert = NULL;
4879 info.data = NULL;
4880 info.release = NULL;
4881 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4882 encodingName, &info)) {
4883 ENCODING *enc;
4884 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4885 if (! parser->m_unknownEncodingMem) {
4886 if (info.release)
4887 info.release(info.data);
4888 return XML_ERROR_NO_MEMORY;
4889 }
4890 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4891 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4892 if (enc) {
4893 parser->m_unknownEncodingData = info.data;
4894 parser->m_unknownEncodingRelease = info.release;
4895 parser->m_encoding = enc;
4896 return XML_ERROR_NONE;
4897 }
4898 }
4899 if (info.release != NULL)
4900 info.release(info.data);
4901 }
4902 return XML_ERROR_UNKNOWN_ENCODING;
4903 }
4904
4905 static enum XML_Error PTRCALL
4906 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4907 const char **nextPtr) {
4908 enum XML_Error result = initializeEncoding(parser);
4909 if (result != XML_ERROR_NONE)
4910 return result;
4911 parser->m_processor = prologProcessor;
4912 return prologProcessor(parser, s, end, nextPtr);
4913 }
4914
4915 #ifdef XML_DTD
4916
4917 static enum XML_Error PTRCALL
4918 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4919 const char **nextPtr) {
4920 enum XML_Error result = initializeEncoding(parser);
4921 if (result != XML_ERROR_NONE)
4922 return result;
4923
4924 /* we know now that XML_Parse(Buffer) has been called,
4925 so we consider the external parameter entity read */
4926 parser->m_dtd->paramEntityRead = XML_TRUE;
4927
4928 if (parser->m_prologState.inEntityValue) {
4929 parser->m_processor = entityValueInitProcessor;
4930 return entityValueInitProcessor(parser, s, end, nextPtr);
4931 } else {
4932 parser->m_processor = externalParEntProcessor;
4933 return externalParEntProcessor(parser, s, end, nextPtr);
4934 }
4935 }
4936
4937 static enum XML_Error PTRCALL
4938 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4939 const char **nextPtr) {
4940 int tok;
4941 const char *start = s;
4942 const char *next = start;
4943 parser->m_eventPtr = start;
4944
4945 for (;;) {
4946 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4947 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4948 - storeEntityValue
4949 - processXmlDecl
4950 */
4951 parser->m_eventEndPtr = next;
4952 if (tok <= 0) {
4953 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4954 *nextPtr = s;
4955 return XML_ERROR_NONE;
4956 }
4957 switch (tok) {
4958 case XML_TOK_INVALID:
4959 return XML_ERROR_INVALID_TOKEN;
4960 case XML_TOK_PARTIAL:
4961 return XML_ERROR_UNCLOSED_TOKEN;
4962 case XML_TOK_PARTIAL_CHAR:
4963 return XML_ERROR_PARTIAL_CHAR;
4964 case XML_TOK_NONE: /* start == end */
4965 default:
4966 break;
4967 }
4968 /* found end of entity value - can store it now */
4969 return storeEntityValue(parser, parser->m_encoding, s, end,
4970 XML_ACCOUNT_DIRECT, NULL);
4971 } else if (tok == XML_TOK_XML_DECL) {
4972 enum XML_Error result;
4973 result = processXmlDecl(parser, 0, start, next);
4974 if (result != XML_ERROR_NONE)
4975 return result;
4976 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4977 * that to happen, a parameter entity parsing handler must have attempted
4978 * to suspend the parser, which fails and raises an error. The parser can
4979 * be aborted, but can't be suspended.
4980 */
4981 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4982 return XML_ERROR_ABORTED;
4983 *nextPtr = next;
4984 /* stop scanning for text declaration - we found one */
4985 parser->m_processor = entityValueProcessor;
4986 return entityValueProcessor(parser, next, end, nextPtr);
4987 }
4988 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4989 must move s and nextPtr forward to consume the BOM.
4990
4991 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4992 would leave the BOM in the buffer and return. On the next call to this
4993 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4994 is not valid to have multiple BOMs.
4995 */
4996 else if (tok == XML_TOK_BOM) {
4997 # if XML_GE == 1
4998 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4999 XML_ACCOUNT_DIRECT)) {
5000 accountingOnAbort(parser);
5001 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5002 }
5003 # endif
5004
5005 *nextPtr = next;
5006 s = next;
5007 }
5008 /* If we get this token, we have the start of what might be a
5009 normal tag, but not a declaration (i.e. it doesn't begin with
5010 "<!" or "<?"). In a DTD context, that isn't legal.
5011 */
5012 else if (tok == XML_TOK_INSTANCE_START) {
5013 *nextPtr = next;
5014 return XML_ERROR_SYNTAX;
5015 }
5016 start = next;
5017 parser->m_eventPtr = start;
5018 }
5019 }
5020
5021 static enum XML_Error PTRCALL
5022 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
5023 const char **nextPtr) {
5024 const char *next = s;
5025 int tok;
5026
5027 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5028 if (tok <= 0) {
5029 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5030 *nextPtr = s;
5031 return XML_ERROR_NONE;
5032 }
5033 switch (tok) {
5034 case XML_TOK_INVALID:
5035 return XML_ERROR_INVALID_TOKEN;
5036 case XML_TOK_PARTIAL:
5037 return XML_ERROR_UNCLOSED_TOKEN;
5038 case XML_TOK_PARTIAL_CHAR:
5039 return XML_ERROR_PARTIAL_CHAR;
5040 case XML_TOK_NONE: /* start == end */
5041 default:
5042 break;
5043 }
5044 }
5045 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
5046 However, when parsing an external subset, doProlog will not accept a BOM
5047 as valid, and report a syntax error, so we have to skip the BOM, and
5048 account for the BOM bytes.
5049 */
5050 else if (tok == XML_TOK_BOM) {
5051 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5052 XML_ACCOUNT_DIRECT)) {
5053 accountingOnAbort(parser);
5054 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5055 }
5056
5057 s = next;
5058 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5059 }
5060
5061 parser->m_processor = prologProcessor;
5062 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5063 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5064 XML_ACCOUNT_DIRECT);
5065 }
5066
5067 static enum XML_Error PTRCALL
5068 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
5069 const char **nextPtr) {
5070 const char *start = s;
5071 const char *next = s;
5072 const ENCODING *enc = parser->m_encoding;
5073 int tok;
5074
5075 for (;;) {
5076 tok = XmlPrologTok(enc, start, end, &next);
5077 /* Note: These bytes are accounted later in:
5078 - storeEntityValue
5079 */
5080 if (tok <= 0) {
5081 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5082 *nextPtr = s;
5083 return XML_ERROR_NONE;
5084 }
5085 switch (tok) {
5086 case XML_TOK_INVALID:
5087 return XML_ERROR_INVALID_TOKEN;
5088 case XML_TOK_PARTIAL:
5089 return XML_ERROR_UNCLOSED_TOKEN;
5090 case XML_TOK_PARTIAL_CHAR:
5091 return XML_ERROR_PARTIAL_CHAR;
5092 case XML_TOK_NONE: /* start == end */
5093 default:
5094 break;
5095 }
5096 /* found end of entity value - can store it now */
5097 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
5098 }
5099 /* If we get this token, we have the start of what might be a
5100 normal tag, but not a declaration (i.e. it doesn't begin with
5101 "<!" or "<?"). In a DTD context, that isn't legal.
5102 */
5103 else if (tok == XML_TOK_INSTANCE_START) {
5104 *nextPtr = next;
5105 return XML_ERROR_SYNTAX;
5106 }
5107
5108 start = next;
5109 }
5110 }
5111
5112 #endif /* XML_DTD */
5113
5114 static enum XML_Error PTRCALL
5115 prologProcessor(XML_Parser parser, const char *s, const char *end,
5116 const char **nextPtr) {
5117 const char *next = s;
5118 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5119 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5120 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5121 XML_ACCOUNT_DIRECT);
5122 }
5123
5124 static enum XML_Error
5125 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
5126 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
5127 XML_Bool allowClosingDoctype, enum XML_Account account) {
5128 #ifdef XML_DTD
5129 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
5130 #endif /* XML_DTD */
5131 static const XML_Char atypeCDATA[]
5132 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
5133 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
5134 static const XML_Char atypeIDREF[]
5135 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
5136 static const XML_Char atypeIDREFS[]
5137 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
5138 static const XML_Char atypeENTITY[]
5139 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
5140 static const XML_Char atypeENTITIES[]
5141 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
5142 ASCII_I, ASCII_E, ASCII_S, '\0'};
5143 static const XML_Char atypeNMTOKEN[]
5144 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
5145 static const XML_Char atypeNMTOKENS[]
5146 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
5147 ASCII_E, ASCII_N, ASCII_S, '\0'};
5148 static const XML_Char notationPrefix[]
5149 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
5150 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
5151 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
5152 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
5153
5154 #ifndef XML_DTD
5155 UNUSED_P(account);
5156 #endif
5157
5158 /* save one level of indirection */
5159 DTD *const dtd = parser->m_dtd;
5160
5161 const char **eventPP;
5162 const char **eventEndPP;
5163 enum XML_Content_Quant quant;
5164
5165 if (enc == parser->m_encoding) {
5166 eventPP = &parser->m_eventPtr;
5167 eventEndPP = &parser->m_eventEndPtr;
5168 } else {
5169 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5170 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5171 }
5172
5173 for (;;) {
5174 int role;
5175 XML_Bool handleDefault = XML_TRUE;
5176 *eventPP = s;
5177 *eventEndPP = next;
5178 if (tok <= 0) {
5179 if (haveMore && tok != XML_TOK_INVALID) {
5180 *nextPtr = s;
5181 return XML_ERROR_NONE;
5182 }
5183 switch (tok) {
5184 case XML_TOK_INVALID:
5185 *eventPP = next;
5186 return XML_ERROR_INVALID_TOKEN;
5187 case XML_TOK_PARTIAL:
5188 return XML_ERROR_UNCLOSED_TOKEN;
5189 case XML_TOK_PARTIAL_CHAR:
5190 return XML_ERROR_PARTIAL_CHAR;
5191 case -XML_TOK_PROLOG_S:
5192 tok = -tok;
5193 break;
5194 case XML_TOK_NONE:
5195 #ifdef XML_DTD
5196 /* for internal PE NOT referenced between declarations */
5197 if (enc != parser->m_encoding
5198 && ! parser->m_openInternalEntities->betweenDecl) {
5199 *nextPtr = s;
5200 return XML_ERROR_NONE;
5201 }
5202 /* WFC: PE Between Declarations - must check that PE contains
5203 complete markup, not only for external PEs, but also for
5204 internal PEs if the reference occurs between declarations.
5205 */
5206 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5207 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
5208 == XML_ROLE_ERROR)
5209 return XML_ERROR_INCOMPLETE_PE;
5210 *nextPtr = s;
5211 return XML_ERROR_NONE;
5212 }
5213 #endif /* XML_DTD */
5214 return XML_ERROR_NO_ELEMENTS;
5215 default:
5216 tok = -tok;
5217 next = end;
5218 break;
5219 }
5220 }
5221 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
5222 #if XML_GE == 1
5223 switch (role) {
5224 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
5225 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
5226 # ifdef XML_DTD
5227 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
5228 # endif
5229 break;
5230 default:
5231 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
5232 accountingOnAbort(parser);
5233 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5234 }
5235 }
5236 #endif
5237 switch (role) {
5238 case XML_ROLE_XML_DECL: {
5239 enum XML_Error result = processXmlDecl(parser, 0, s, next);
5240 if (result != XML_ERROR_NONE)
5241 return result;
5242 enc = parser->m_encoding;
5243 handleDefault = XML_FALSE;
5244 } break;
5245 case XML_ROLE_DOCTYPE_NAME:
5246 if (parser->m_startDoctypeDeclHandler) {
5247 parser->m_doctypeName
5248 = poolStoreString(&parser->m_tempPool, enc, s, next);
5249 if (! parser->m_doctypeName)
5250 return XML_ERROR_NO_MEMORY;
5251 poolFinish(&parser->m_tempPool);
5252 parser->m_doctypePubid = NULL;
5253 handleDefault = XML_FALSE;
5254 }
5255 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
5256 break;
5257 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
5258 if (parser->m_startDoctypeDeclHandler) {
5259 parser->m_startDoctypeDeclHandler(
5260 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5261 parser->m_doctypePubid, 1);
5262 parser->m_doctypeName = NULL;
5263 poolClear(&parser->m_tempPool);
5264 handleDefault = XML_FALSE;
5265 }
5266 break;
5267 #ifdef XML_DTD
5268 case XML_ROLE_TEXT_DECL: {
5269 enum XML_Error result = processXmlDecl(parser, 1, s, next);
5270 if (result != XML_ERROR_NONE)
5271 return result;
5272 enc = parser->m_encoding;
5273 handleDefault = XML_FALSE;
5274 } break;
5275 #endif /* XML_DTD */
5276 case XML_ROLE_DOCTYPE_PUBLIC_ID:
5277 #ifdef XML_DTD
5278 parser->m_useForeignDTD = XML_FALSE;
5279 parser->m_declEntity = (ENTITY *)lookup(
5280 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5281 if (! parser->m_declEntity)
5282 return XML_ERROR_NO_MEMORY;
5283 #endif /* XML_DTD */
5284 dtd->hasParamEntityRefs = XML_TRUE;
5285 if (parser->m_startDoctypeDeclHandler) {
5286 XML_Char *pubId;
5287 if (! XmlIsPublicId(enc, s, next, eventPP))
5288 return XML_ERROR_PUBLICID;
5289 pubId = poolStoreString(&parser->m_tempPool, enc,
5290 s + enc->minBytesPerChar,
5291 next - enc->minBytesPerChar);
5292 if (! pubId)
5293 return XML_ERROR_NO_MEMORY;
5294 normalizePublicId(pubId);
5295 poolFinish(&parser->m_tempPool);
5296 parser->m_doctypePubid = pubId;
5297 handleDefault = XML_FALSE;
5298 goto alreadyChecked;
5299 }
5300 /* fall through */
5301 case XML_ROLE_ENTITY_PUBLIC_ID:
5302 if (! XmlIsPublicId(enc, s, next, eventPP))
5303 return XML_ERROR_PUBLICID;
5304 alreadyChecked:
5305 if (dtd->keepProcessing && parser->m_declEntity) {
5306 XML_Char *tem
5307 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5308 next - enc->minBytesPerChar);
5309 if (! tem)
5310 return XML_ERROR_NO_MEMORY;
5311 normalizePublicId(tem);
5312 parser->m_declEntity->publicId = tem;
5313 poolFinish(&dtd->pool);
5314 /* Don't suppress the default handler if we fell through from
5315 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5316 */
5317 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5318 handleDefault = XML_FALSE;
5319 }
5320 break;
5321 case XML_ROLE_DOCTYPE_CLOSE:
5322 if (allowClosingDoctype != XML_TRUE) {
5323 /* Must not close doctype from within expanded parameter entities */
5324 return XML_ERROR_INVALID_TOKEN;
5325 }
5326
5327 if (parser->m_doctypeName) {
5328 parser->m_startDoctypeDeclHandler(
5329 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5330 parser->m_doctypePubid, 0);
5331 poolClear(&parser->m_tempPool);
5332 handleDefault = XML_FALSE;
5333 }
5334 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
5335 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5336 was not set, indicating an external subset
5337 */
5338 #ifdef XML_DTD
5339 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5340 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5341 dtd->hasParamEntityRefs = XML_TRUE;
5342 if (parser->m_paramEntityParsing
5343 && parser->m_externalEntityRefHandler) {
5344 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5345 externalSubsetName, sizeof(ENTITY));
5346 if (! entity) {
5347 /* The external subset name "#" will have already been
5348 * inserted into the hash table at the start of the
5349 * external entity parsing, so no allocation will happen
5350 * and lookup() cannot fail.
5351 */
5352 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5353 }
5354 if (parser->m_useForeignDTD)
5355 entity->base = parser->m_curBase;
5356 dtd->paramEntityRead = XML_FALSE;
5357 if (! parser->m_externalEntityRefHandler(
5358 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5359 entity->systemId, entity->publicId))
5360 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5361 if (dtd->paramEntityRead) {
5362 if (! dtd->standalone && parser->m_notStandaloneHandler
5363 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5364 return XML_ERROR_NOT_STANDALONE;
5365 }
5366 /* if we didn't read the foreign DTD then this means that there
5367 is no external subset and we must reset dtd->hasParamEntityRefs
5368 */
5369 else if (! parser->m_doctypeSysid)
5370 dtd->hasParamEntityRefs = hadParamEntityRefs;
5371 /* end of DTD - no need to update dtd->keepProcessing */
5372 }
5373 parser->m_useForeignDTD = XML_FALSE;
5374 }
5375 #endif /* XML_DTD */
5376 if (parser->m_endDoctypeDeclHandler) {
5377 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5378 handleDefault = XML_FALSE;
5379 }
5380 break;
5381 case XML_ROLE_INSTANCE_START:
5382 #ifdef XML_DTD
5383 /* if there is no DOCTYPE declaration then now is the
5384 last chance to read the foreign DTD
5385 */
5386 if (parser->m_useForeignDTD) {
5387 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5388 dtd->hasParamEntityRefs = XML_TRUE;
5389 if (parser->m_paramEntityParsing
5390 && parser->m_externalEntityRefHandler) {
5391 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5392 externalSubsetName, sizeof(ENTITY));
5393 if (! entity)
5394 return XML_ERROR_NO_MEMORY;
5395 entity->base = parser->m_curBase;
5396 dtd->paramEntityRead = XML_FALSE;
5397 if (! parser->m_externalEntityRefHandler(
5398 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5399 entity->systemId, entity->publicId))
5400 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5401 if (dtd->paramEntityRead) {
5402 if (! dtd->standalone && parser->m_notStandaloneHandler
5403 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5404 return XML_ERROR_NOT_STANDALONE;
5405 }
5406 /* if we didn't read the foreign DTD then this means that there
5407 is no external subset and we must reset dtd->hasParamEntityRefs
5408 */
5409 else
5410 dtd->hasParamEntityRefs = hadParamEntityRefs;
5411 /* end of DTD - no need to update dtd->keepProcessing */
5412 }
5413 }
5414 #endif /* XML_DTD */
5415 parser->m_processor = contentProcessor;
5416 return contentProcessor(parser, s, end, nextPtr);
5417 case XML_ROLE_ATTLIST_ELEMENT_NAME:
5418 parser->m_declElementType = getElementType(parser, enc, s, next);
5419 if (! parser->m_declElementType)
5420 return XML_ERROR_NO_MEMORY;
5421 goto checkAttListDeclHandler;
5422 case XML_ROLE_ATTRIBUTE_NAME:
5423 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5424 if (! parser->m_declAttributeId)
5425 return XML_ERROR_NO_MEMORY;
5426 parser->m_declAttributeIsCdata = XML_FALSE;
5427 parser->m_declAttributeType = NULL;
5428 parser->m_declAttributeIsId = XML_FALSE;
5429 goto checkAttListDeclHandler;
5430 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5431 parser->m_declAttributeIsCdata = XML_TRUE;
5432 parser->m_declAttributeType = atypeCDATA;
5433 goto checkAttListDeclHandler;
5434 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5435 parser->m_declAttributeIsId = XML_TRUE;
5436 parser->m_declAttributeType = atypeID;
5437 goto checkAttListDeclHandler;
5438 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5439 parser->m_declAttributeType = atypeIDREF;
5440 goto checkAttListDeclHandler;
5441 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5442 parser->m_declAttributeType = atypeIDREFS;
5443 goto checkAttListDeclHandler;
5444 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5445 parser->m_declAttributeType = atypeENTITY;
5446 goto checkAttListDeclHandler;
5447 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5448 parser->m_declAttributeType = atypeENTITIES;
5449 goto checkAttListDeclHandler;
5450 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5451 parser->m_declAttributeType = atypeNMTOKEN;
5452 goto checkAttListDeclHandler;
5453 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5454 parser->m_declAttributeType = atypeNMTOKENS;
5455 checkAttListDeclHandler:
5456 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5457 handleDefault = XML_FALSE;
5458 break;
5459 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5460 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5461 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5462 const XML_Char *prefix;
5463 if (parser->m_declAttributeType) {
5464 prefix = enumValueSep;
5465 } else {
5466 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5467 : enumValueStart);
5468 }
5469 if (! poolAppendString(&parser->m_tempPool, prefix))
5470 return XML_ERROR_NO_MEMORY;
5471 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5472 return XML_ERROR_NO_MEMORY;
5473 parser->m_declAttributeType = parser->m_tempPool.start;
5474 handleDefault = XML_FALSE;
5475 }
5476 break;
5477 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5478 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5479 if (dtd->keepProcessing) {
5480 if (! defineAttribute(parser->m_declElementType,
5481 parser->m_declAttributeId,
5482 parser->m_declAttributeIsCdata,
5483 parser->m_declAttributeIsId, 0, parser))
5484 return XML_ERROR_NO_MEMORY;
5485 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5486 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5487 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5488 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5489 /* Enumerated or Notation type */
5490 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5491 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5492 return XML_ERROR_NO_MEMORY;
5493 parser->m_declAttributeType = parser->m_tempPool.start;
5494 poolFinish(&parser->m_tempPool);
5495 }
5496 *eventEndPP = s;
5497 parser->m_attlistDeclHandler(
5498 parser->m_handlerArg, parser->m_declElementType->name,
5499 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5500 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5501 handleDefault = XML_FALSE;
5502 }
5503 }
5504 poolClear(&parser->m_tempPool);
5505 break;
5506 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5507 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5508 if (dtd->keepProcessing) {
5509 const XML_Char *attVal;
5510 enum XML_Error result = storeAttributeValue(
5511 parser, enc, parser->m_declAttributeIsCdata,
5512 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5513 XML_ACCOUNT_NONE);
5514 if (result)
5515 return result;
5516 attVal = poolStart(&dtd->pool);
5517 poolFinish(&dtd->pool);
5518 /* ID attributes aren't allowed to have a default */
5519 if (! defineAttribute(
5520 parser->m_declElementType, parser->m_declAttributeId,
5521 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5522 return XML_ERROR_NO_MEMORY;
5523 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5524 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5525 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5526 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5527 /* Enumerated or Notation type */
5528 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5529 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5530 return XML_ERROR_NO_MEMORY;
5531 parser->m_declAttributeType = parser->m_tempPool.start;
5532 poolFinish(&parser->m_tempPool);
5533 }
5534 *eventEndPP = s;
5535 parser->m_attlistDeclHandler(
5536 parser->m_handlerArg, parser->m_declElementType->name,
5537 parser->m_declAttributeId->name, parser->m_declAttributeType,
5538 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5539 poolClear(&parser->m_tempPool);
5540 handleDefault = XML_FALSE;
5541 }
5542 }
5543 break;
5544 case XML_ROLE_ENTITY_VALUE:
5545 if (dtd->keepProcessing) {
5546 #if XML_GE == 1
5547 // This will store the given replacement text in
5548 // parser->m_declEntity->textPtr.
5549 enum XML_Error result = callStoreEntityValue(
5550 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5551 XML_ACCOUNT_NONE);
5552 if (parser->m_declEntity) {
5553 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5554 parser->m_declEntity->textLen
5555 = (int)(poolLength(&dtd->entityValuePool));
5556 poolFinish(&dtd->entityValuePool);
5557 if (parser->m_entityDeclHandler) {
5558 *eventEndPP = s;
5559 parser->m_entityDeclHandler(
5560 parser->m_handlerArg, parser->m_declEntity->name,
5561 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5562 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5563 handleDefault = XML_FALSE;
5564 }
5565 } else
5566 poolDiscard(&dtd->entityValuePool);
5567 if (result != XML_ERROR_NONE)
5568 return result;
5569 #else
5570 // This will store "&entity123;" in parser->m_declEntity->textPtr
5571 // to end up as "&entity123;" in the handler.
5572 if (parser->m_declEntity != NULL) {
5573 const enum XML_Error result
5574 = storeSelfEntityValue(parser, parser->m_declEntity);
5575 if (result != XML_ERROR_NONE)
5576 return result;
5577
5578 if (parser->m_entityDeclHandler) {
5579 *eventEndPP = s;
5580 parser->m_entityDeclHandler(
5581 parser->m_handlerArg, parser->m_declEntity->name,
5582 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5583 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5584 handleDefault = XML_FALSE;
5585 }
5586 }
5587 #endif
5588 }
5589 break;
5590 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5591 #ifdef XML_DTD
5592 parser->m_useForeignDTD = XML_FALSE;
5593 #endif /* XML_DTD */
5594 dtd->hasParamEntityRefs = XML_TRUE;
5595 if (parser->m_startDoctypeDeclHandler) {
5596 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5597 s + enc->minBytesPerChar,
5598 next - enc->minBytesPerChar);
5599 if (parser->m_doctypeSysid == NULL)
5600 return XML_ERROR_NO_MEMORY;
5601 poolFinish(&parser->m_tempPool);
5602 handleDefault = XML_FALSE;
5603 }
5604 #ifdef XML_DTD
5605 else
5606 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5607 for the case where no parser->m_startDoctypeDeclHandler is set */
5608 parser->m_doctypeSysid = externalSubsetName;
5609 #endif /* XML_DTD */
5610 if (! dtd->standalone
5611 #ifdef XML_DTD
5612 && ! parser->m_paramEntityParsing
5613 #endif /* XML_DTD */
5614 && parser->m_notStandaloneHandler
5615 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5616 return XML_ERROR_NOT_STANDALONE;
5617 #ifndef XML_DTD
5618 break;
5619 #else /* XML_DTD */
5620 if (! parser->m_declEntity) {
5621 parser->m_declEntity = (ENTITY *)lookup(
5622 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5623 if (! parser->m_declEntity)
5624 return XML_ERROR_NO_MEMORY;
5625 parser->m_declEntity->publicId = NULL;
5626 }
5627 #endif /* XML_DTD */
5628 /* fall through */
5629 case XML_ROLE_ENTITY_SYSTEM_ID:
5630 if (dtd->keepProcessing && parser->m_declEntity) {
5631 parser->m_declEntity->systemId
5632 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5633 next - enc->minBytesPerChar);
5634 if (! parser->m_declEntity->systemId)
5635 return XML_ERROR_NO_MEMORY;
5636 parser->m_declEntity->base = parser->m_curBase;
5637 poolFinish(&dtd->pool);
5638 /* Don't suppress the default handler if we fell through from
5639 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5640 */
5641 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5642 handleDefault = XML_FALSE;
5643 }
5644 break;
5645 case XML_ROLE_ENTITY_COMPLETE:
5646 #if XML_GE == 0
5647 // This will store "&entity123;" in entity->textPtr
5648 // to end up as "&entity123;" in the handler.
5649 if (parser->m_declEntity != NULL) {
5650 const enum XML_Error result
5651 = storeSelfEntityValue(parser, parser->m_declEntity);
5652 if (result != XML_ERROR_NONE)
5653 return result;
5654 }
5655 #endif
5656 if (dtd->keepProcessing && parser->m_declEntity
5657 && parser->m_entityDeclHandler) {
5658 *eventEndPP = s;
5659 parser->m_entityDeclHandler(
5660 parser->m_handlerArg, parser->m_declEntity->name,
5661 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5662 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5663 handleDefault = XML_FALSE;
5664 }
5665 break;
5666 case XML_ROLE_ENTITY_NOTATION_NAME:
5667 if (dtd->keepProcessing && parser->m_declEntity) {
5668 parser->m_declEntity->notation
5669 = poolStoreString(&dtd->pool, enc, s, next);
5670 if (! parser->m_declEntity->notation)
5671 return XML_ERROR_NO_MEMORY;
5672 poolFinish(&dtd->pool);
5673 if (parser->m_unparsedEntityDeclHandler) {
5674 *eventEndPP = s;
5675 parser->m_unparsedEntityDeclHandler(
5676 parser->m_handlerArg, parser->m_declEntity->name,
5677 parser->m_declEntity->base, parser->m_declEntity->systemId,
5678 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5679 handleDefault = XML_FALSE;
5680 } else if (parser->m_entityDeclHandler) {
5681 *eventEndPP = s;
5682 parser->m_entityDeclHandler(
5683 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5684 parser->m_declEntity->base, parser->m_declEntity->systemId,
5685 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5686 handleDefault = XML_FALSE;
5687 }
5688 }
5689 break;
5690 case XML_ROLE_GENERAL_ENTITY_NAME: {
5691 if (XmlPredefinedEntityName(enc, s, next)) {
5692 parser->m_declEntity = NULL;
5693 break;
5694 }
5695 if (dtd->keepProcessing) {
5696 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5697 if (! name)
5698 return XML_ERROR_NO_MEMORY;
5699 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5700 name, sizeof(ENTITY));
5701 if (! parser->m_declEntity)
5702 return XML_ERROR_NO_MEMORY;
5703 if (parser->m_declEntity->name != name) {
5704 poolDiscard(&dtd->pool);
5705 parser->m_declEntity = NULL;
5706 } else {
5707 poolFinish(&dtd->pool);
5708 parser->m_declEntity->publicId = NULL;
5709 parser->m_declEntity->is_param = XML_FALSE;
5710 /* if we have a parent parser or are reading an internal parameter
5711 entity, then the entity declaration is not considered "internal"
5712 */
5713 parser->m_declEntity->is_internal
5714 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5715 if (parser->m_entityDeclHandler)
5716 handleDefault = XML_FALSE;
5717 }
5718 } else {
5719 poolDiscard(&dtd->pool);
5720 parser->m_declEntity = NULL;
5721 }
5722 } break;
5723 case XML_ROLE_PARAM_ENTITY_NAME:
5724 #ifdef XML_DTD
5725 if (dtd->keepProcessing) {
5726 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5727 if (! name)
5728 return XML_ERROR_NO_MEMORY;
5729 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5730 name, sizeof(ENTITY));
5731 if (! parser->m_declEntity)
5732 return XML_ERROR_NO_MEMORY;
5733 if (parser->m_declEntity->name != name) {
5734 poolDiscard(&dtd->pool);
5735 parser->m_declEntity = NULL;
5736 } else {
5737 poolFinish(&dtd->pool);
5738 parser->m_declEntity->publicId = NULL;
5739 parser->m_declEntity->is_param = XML_TRUE;
5740 /* if we have a parent parser or are reading an internal parameter
5741 entity, then the entity declaration is not considered "internal"
5742 */
5743 parser->m_declEntity->is_internal
5744 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5745 if (parser->m_entityDeclHandler)
5746 handleDefault = XML_FALSE;
5747 }
5748 } else {
5749 poolDiscard(&dtd->pool);
5750 parser->m_declEntity = NULL;
5751 }
5752 #else /* not XML_DTD */
5753 parser->m_declEntity = NULL;
5754 #endif /* XML_DTD */
5755 break;
5756 case XML_ROLE_NOTATION_NAME:
5757 parser->m_declNotationPublicId = NULL;
5758 parser->m_declNotationName = NULL;
5759 if (parser->m_notationDeclHandler) {
5760 parser->m_declNotationName
5761 = poolStoreString(&parser->m_tempPool, enc, s, next);
5762 if (! parser->m_declNotationName)
5763 return XML_ERROR_NO_MEMORY;
5764 poolFinish(&parser->m_tempPool);
5765 handleDefault = XML_FALSE;
5766 }
5767 break;
5768 case XML_ROLE_NOTATION_PUBLIC_ID:
5769 if (! XmlIsPublicId(enc, s, next, eventPP))
5770 return XML_ERROR_PUBLICID;
5771 if (parser
5772 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5773 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5774 s + enc->minBytesPerChar,
5775 next - enc->minBytesPerChar);
5776 if (! tem)
5777 return XML_ERROR_NO_MEMORY;
5778 normalizePublicId(tem);
5779 parser->m_declNotationPublicId = tem;
5780 poolFinish(&parser->m_tempPool);
5781 handleDefault = XML_FALSE;
5782 }
5783 break;
5784 case XML_ROLE_NOTATION_SYSTEM_ID:
5785 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5786 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5787 s + enc->minBytesPerChar,
5788 next - enc->minBytesPerChar);
5789 if (! systemId)
5790 return XML_ERROR_NO_MEMORY;
5791 *eventEndPP = s;
5792 parser->m_notationDeclHandler(
5793 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5794 systemId, parser->m_declNotationPublicId);
5795 handleDefault = XML_FALSE;
5796 }
5797 poolClear(&parser->m_tempPool);
5798 break;
5799 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5800 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5801 *eventEndPP = s;
5802 parser->m_notationDeclHandler(
5803 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5804 0, parser->m_declNotationPublicId);
5805 handleDefault = XML_FALSE;
5806 }
5807 poolClear(&parser->m_tempPool);
5808 break;
5809 case XML_ROLE_ERROR:
5810 switch (tok) {
5811 case XML_TOK_PARAM_ENTITY_REF:
5812 /* PE references in internal subset are
5813 not allowed within declarations. */
5814 return XML_ERROR_PARAM_ENTITY_REF;
5815 case XML_TOK_XML_DECL:
5816 return XML_ERROR_MISPLACED_XML_PI;
5817 default:
5818 return XML_ERROR_SYNTAX;
5819 }
5820 #ifdef XML_DTD
5821 case XML_ROLE_IGNORE_SECT: {
5822 enum XML_Error result;
5823 if (parser->m_defaultHandler)
5824 reportDefault(parser, enc, s, next);
5825 handleDefault = XML_FALSE;
5826 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5827 if (result != XML_ERROR_NONE)
5828 return result;
5829 else if (! next) {
5830 parser->m_processor = ignoreSectionProcessor;
5831 return result;
5832 }
5833 } break;
5834 #endif /* XML_DTD */
5835 case XML_ROLE_GROUP_OPEN:
5836 if (parser->m_prologState.level >= parser->m_groupSize) {
5837 if (parser->m_groupSize) {
5838 {
5839 /* Detect and prevent integer overflow */
5840 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5841 return XML_ERROR_NO_MEMORY;
5842 }
5843
5844 char *const new_connector = REALLOC(
5845 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5846 if (new_connector == NULL) {
5847 parser->m_groupSize /= 2;
5848 return XML_ERROR_NO_MEMORY;
5849 }
5850 parser->m_groupConnector = new_connector;
5851 }
5852
5853 if (dtd->scaffIndex) {
5854 /* Detect and prevent integer overflow.
5855 * The preprocessor guard addresses the "always false" warning
5856 * from -Wtype-limits on platforms where
5857 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5858 #if UINT_MAX >= SIZE_MAX
5859 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) {
5860 parser->m_groupSize /= 2;
5861 return XML_ERROR_NO_MEMORY;
5862 }
5863 #endif
5864
5865 int *const new_scaff_index = REALLOC(
5866 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5867 if (new_scaff_index == NULL) {
5868 parser->m_groupSize /= 2;
5869 return XML_ERROR_NO_MEMORY;
5870 }
5871 dtd->scaffIndex = new_scaff_index;
5872 }
5873 } else {
5874 parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32);
5875 if (! parser->m_groupConnector) {
5876 parser->m_groupSize = 0;
5877 return XML_ERROR_NO_MEMORY;
5878 }
5879 }
5880 }
5881 parser->m_groupConnector[parser->m_prologState.level] = 0;
5882 if (dtd->in_eldecl) {
5883 int myindex = nextScaffoldPart(parser);
5884 if (myindex < 0)
5885 return XML_ERROR_NO_MEMORY;
5886 assert(dtd->scaffIndex != NULL);
5887 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5888 dtd->scaffLevel++;
5889 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5890 if (parser->m_elementDeclHandler)
5891 handleDefault = XML_FALSE;
5892 }
5893 break;
5894 case XML_ROLE_GROUP_SEQUENCE:
5895 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5896 return XML_ERROR_SYNTAX;
5897 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5898 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5899 handleDefault = XML_FALSE;
5900 break;
5901 case XML_ROLE_GROUP_CHOICE:
5902 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5903 return XML_ERROR_SYNTAX;
5904 if (dtd->in_eldecl
5905 && ! parser->m_groupConnector[parser->m_prologState.level]
5906 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5907 != XML_CTYPE_MIXED)) {
5908 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5909 = XML_CTYPE_CHOICE;
5910 if (parser->m_elementDeclHandler)
5911 handleDefault = XML_FALSE;
5912 }
5913 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5914 break;
5915 case XML_ROLE_PARAM_ENTITY_REF:
5916 #ifdef XML_DTD
5917 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5918 dtd->hasParamEntityRefs = XML_TRUE;
5919 if (! parser->m_paramEntityParsing)
5920 dtd->keepProcessing = dtd->standalone;
5921 else {
5922 const XML_Char *name;
5923 ENTITY *entity;
5924 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5925 next - enc->minBytesPerChar);
5926 if (! name)
5927 return XML_ERROR_NO_MEMORY;
5928 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5929 poolDiscard(&dtd->pool);
5930 /* first, determine if a check for an existing declaration is needed;
5931 if yes, check that the entity exists, and that it is internal,
5932 otherwise call the skipped entity handler
5933 */
5934 if (parser->m_prologState.documentEntity
5935 && (dtd->standalone ? ! parser->m_openInternalEntities
5936 : ! dtd->hasParamEntityRefs)) {
5937 if (! entity)
5938 return XML_ERROR_UNDEFINED_ENTITY;
5939 else if (! entity->is_internal) {
5940 /* It's hard to exhaustively search the code to be sure,
5941 * but there doesn't seem to be a way of executing the
5942 * following line. There are two cases:
5943 *
5944 * If 'standalone' is false, the DTD must have no
5945 * parameter entities or we wouldn't have passed the outer
5946 * 'if' statement. That means the only entity in the hash
5947 * table is the external subset name "#" which cannot be
5948 * given as a parameter entity name in XML syntax, so the
5949 * lookup must have returned NULL and we don't even reach
5950 * the test for an internal entity.
5951 *
5952 * If 'standalone' is true, it does not seem to be
5953 * possible to create entities taking this code path that
5954 * are not internal entities, so fail the test above.
5955 *
5956 * Because this analysis is very uncertain, the code is
5957 * being left in place and merely removed from the
5958 * coverage test statistics.
5959 */
5960 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5961 }
5962 } else if (! entity) {
5963 dtd->keepProcessing = dtd->standalone;
5964 /* cannot report skipped entities in declarations */
5965 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5966 && parser->m_skippedEntityHandler) {
5967 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5968 handleDefault = XML_FALSE;
5969 }
5970 break;
5971 }
5972 if (entity->open)
5973 return XML_ERROR_RECURSIVE_ENTITY_REF;
5974 if (entity->textPtr) {
5975 enum XML_Error result;
5976 XML_Bool betweenDecl
5977 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5978 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
5979 if (result != XML_ERROR_NONE)
5980 return result;
5981 handleDefault = XML_FALSE;
5982 break;
5983 }
5984 if (parser->m_externalEntityRefHandler) {
5985 dtd->paramEntityRead = XML_FALSE;
5986 entity->open = XML_TRUE;
5987 entityTrackingOnOpen(parser, entity, __LINE__);
5988 if (! parser->m_externalEntityRefHandler(
5989 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5990 entity->systemId, entity->publicId)) {
5991 entityTrackingOnClose(parser, entity, __LINE__);
5992 entity->open = XML_FALSE;
5993 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5994 }
5995 entityTrackingOnClose(parser, entity, __LINE__);
5996 entity->open = XML_FALSE;
5997 handleDefault = XML_FALSE;
5998 if (! dtd->paramEntityRead) {
5999 dtd->keepProcessing = dtd->standalone;
6000 break;
6001 }
6002 } else {
6003 dtd->keepProcessing = dtd->standalone;
6004 break;
6005 }
6006 }
6007 #endif /* XML_DTD */
6008 if (! dtd->standalone && parser->m_notStandaloneHandler
6009 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
6010 return XML_ERROR_NOT_STANDALONE;
6011 break;
6012
6013 /* Element declaration stuff */
6014
6015 case XML_ROLE_ELEMENT_NAME:
6016 if (parser->m_elementDeclHandler) {
6017 parser->m_declElementType = getElementType(parser, enc, s, next);
6018 if (! parser->m_declElementType)
6019 return XML_ERROR_NO_MEMORY;
6020 dtd->scaffLevel = 0;
6021 dtd->scaffCount = 0;
6022 dtd->in_eldecl = XML_TRUE;
6023 handleDefault = XML_FALSE;
6024 }
6025 break;
6026
6027 case XML_ROLE_CONTENT_ANY:
6028 case XML_ROLE_CONTENT_EMPTY:
6029 if (dtd->in_eldecl) {
6030 if (parser->m_elementDeclHandler) {
6031 // NOTE: We are avoiding MALLOC(..) here to so that
6032 // applications that are not using XML_FreeContentModel but
6033 // plain free(..) or .free_fcn() to free the content model's
6034 // memory are safe.
6035 XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content));
6036 if (! content)
6037 return XML_ERROR_NO_MEMORY;
6038 content->quant = XML_CQUANT_NONE;
6039 content->name = NULL;
6040 content->numchildren = 0;
6041 content->children = NULL;
6042 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
6043 : XML_CTYPE_EMPTY);
6044 *eventEndPP = s;
6045 parser->m_elementDeclHandler(
6046 parser->m_handlerArg, parser->m_declElementType->name, content);
6047 handleDefault = XML_FALSE;
6048 }
6049 dtd->in_eldecl = XML_FALSE;
6050 }
6051 break;
6052
6053 case XML_ROLE_CONTENT_PCDATA:
6054 if (dtd->in_eldecl) {
6055 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
6056 = XML_CTYPE_MIXED;
6057 if (parser->m_elementDeclHandler)
6058 handleDefault = XML_FALSE;
6059 }
6060 break;
6061
6062 case XML_ROLE_CONTENT_ELEMENT:
6063 quant = XML_CQUANT_NONE;
6064 goto elementContent;
6065 case XML_ROLE_CONTENT_ELEMENT_OPT:
6066 quant = XML_CQUANT_OPT;
6067 goto elementContent;
6068 case XML_ROLE_CONTENT_ELEMENT_REP:
6069 quant = XML_CQUANT_REP;
6070 goto elementContent;
6071 case XML_ROLE_CONTENT_ELEMENT_PLUS:
6072 quant = XML_CQUANT_PLUS;
6073 elementContent:
6074 if (dtd->in_eldecl) {
6075 ELEMENT_TYPE *el;
6076 const XML_Char *name;
6077 size_t nameLen;
6078 const char *nxt
6079 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
6080 int myindex = nextScaffoldPart(parser);
6081 if (myindex < 0)
6082 return XML_ERROR_NO_MEMORY;
6083 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
6084 dtd->scaffold[myindex].quant = quant;
6085 el = getElementType(parser, enc, s, nxt);
6086 if (! el)
6087 return XML_ERROR_NO_MEMORY;
6088 name = el->name;
6089 dtd->scaffold[myindex].name = name;
6090 nameLen = 0;
6091 while (name[nameLen++])
6092 ;
6093
6094 /* Detect and prevent integer overflow */
6095 if (nameLen > UINT_MAX - dtd->contentStringLen) {
6096 return XML_ERROR_NO_MEMORY;
6097 }
6098
6099 dtd->contentStringLen += (unsigned)nameLen;
6100 if (parser->m_elementDeclHandler)
6101 handleDefault = XML_FALSE;
6102 }
6103 break;
6104
6105 case XML_ROLE_GROUP_CLOSE:
6106 quant = XML_CQUANT_NONE;
6107 goto closeGroup;
6108 case XML_ROLE_GROUP_CLOSE_OPT:
6109 quant = XML_CQUANT_OPT;
6110 goto closeGroup;
6111 case XML_ROLE_GROUP_CLOSE_REP:
6112 quant = XML_CQUANT_REP;
6113 goto closeGroup;
6114 case XML_ROLE_GROUP_CLOSE_PLUS:
6115 quant = XML_CQUANT_PLUS;
6116 closeGroup:
6117 if (dtd->in_eldecl) {
6118 if (parser->m_elementDeclHandler)
6119 handleDefault = XML_FALSE;
6120 dtd->scaffLevel--;
6121 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
6122 if (dtd->scaffLevel == 0) {
6123 if (! handleDefault) {
6124 XML_Content *model = build_model(parser);
6125 if (! model)
6126 return XML_ERROR_NO_MEMORY;
6127 *eventEndPP = s;
6128 parser->m_elementDeclHandler(
6129 parser->m_handlerArg, parser->m_declElementType->name, model);
6130 }
6131 dtd->in_eldecl = XML_FALSE;
6132 dtd->contentStringLen = 0;
6133 }
6134 }
6135 break;
6136 /* End element declaration stuff */
6137
6138 case XML_ROLE_PI:
6139 if (! reportProcessingInstruction(parser, enc, s, next))
6140 return XML_ERROR_NO_MEMORY;
6141 handleDefault = XML_FALSE;
6142 break;
6143 case XML_ROLE_COMMENT:
6144 if (! reportComment(parser, enc, s, next))
6145 return XML_ERROR_NO_MEMORY;
6146 handleDefault = XML_FALSE;
6147 break;
6148 case XML_ROLE_NONE:
6149 switch (tok) {
6150 case XML_TOK_BOM:
6151 handleDefault = XML_FALSE;
6152 break;
6153 }
6154 break;
6155 case XML_ROLE_DOCTYPE_NONE:
6156 if (parser->m_startDoctypeDeclHandler)
6157 handleDefault = XML_FALSE;
6158 break;
6159 case XML_ROLE_ENTITY_NONE:
6160 if (dtd->keepProcessing && parser->m_entityDeclHandler)
6161 handleDefault = XML_FALSE;
6162 break;
6163 case XML_ROLE_NOTATION_NONE:
6164 if (parser->m_notationDeclHandler)
6165 handleDefault = XML_FALSE;
6166 break;
6167 case XML_ROLE_ATTLIST_NONE:
6168 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
6169 handleDefault = XML_FALSE;
6170 break;
6171 case XML_ROLE_ELEMENT_NONE:
6172 if (parser->m_elementDeclHandler)
6173 handleDefault = XML_FALSE;
6174 break;
6175 } /* end of big switch */
6176
6177 if (handleDefault && parser->m_defaultHandler)
6178 reportDefault(parser, enc, s, next);
6179
6180 switch (parser->m_parsingStatus.parsing) {
6181 case XML_SUSPENDED:
6182 *nextPtr = next;
6183 return XML_ERROR_NONE;
6184 case XML_FINISHED:
6185 return XML_ERROR_ABORTED;
6186 case XML_PARSING:
6187 if (parser->m_reenter) {
6188 *nextPtr = next;
6189 return XML_ERROR_NONE;
6190 }
6191 /* Fall through */
6192 default:
6193 s = next;
6194 tok = XmlPrologTok(enc, s, end, &next);
6195 }
6196 }
6197 /* not reached */
6198 }
6199
6200 static enum XML_Error PTRCALL
6201 epilogProcessor(XML_Parser parser, const char *s, const char *end,
6202 const char **nextPtr) {
6203 parser->m_processor = epilogProcessor;
6204 parser->m_eventPtr = s;
6205 for (;;) {
6206 const char *next = NULL;
6207 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
6208 #if XML_GE == 1
6209 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
6210 XML_ACCOUNT_DIRECT)) {
6211 accountingOnAbort(parser);
6212 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6213 }
6214 #endif
6215 parser->m_eventEndPtr = next;
6216 switch (tok) {
6217 /* report partial linebreak - it might be the last token */
6218 case -XML_TOK_PROLOG_S:
6219 if (parser->m_defaultHandler) {
6220 reportDefault(parser, parser->m_encoding, s, next);
6221 if (parser->m_parsingStatus.parsing == XML_FINISHED)
6222 return XML_ERROR_ABORTED;
6223 }
6224 *nextPtr = next;
6225 return XML_ERROR_NONE;
6226 case XML_TOK_NONE:
6227 *nextPtr = s;
6228 return XML_ERROR_NONE;
6229 case XML_TOK_PROLOG_S:
6230 if (parser->m_defaultHandler)
6231 reportDefault(parser, parser->m_encoding, s, next);
6232 break;
6233 case XML_TOK_PI:
6234 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
6235 return XML_ERROR_NO_MEMORY;
6236 break;
6237 case XML_TOK_COMMENT:
6238 if (! reportComment(parser, parser->m_encoding, s, next))
6239 return XML_ERROR_NO_MEMORY;
6240 break;
6241 case XML_TOK_INVALID:
6242 parser->m_eventPtr = next;
6243 return XML_ERROR_INVALID_TOKEN;
6244 case XML_TOK_PARTIAL:
6245 if (! parser->m_parsingStatus.finalBuffer) {
6246 *nextPtr = s;
6247 return XML_ERROR_NONE;
6248 }
6249 return XML_ERROR_UNCLOSED_TOKEN;
6250 case XML_TOK_PARTIAL_CHAR:
6251 if (! parser->m_parsingStatus.finalBuffer) {
6252 *nextPtr = s;
6253 return XML_ERROR_NONE;
6254 }
6255 return XML_ERROR_PARTIAL_CHAR;
6256 default:
6257 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
6258 }
6259 switch (parser->m_parsingStatus.parsing) {
6260 case XML_SUSPENDED:
6261 parser->m_eventPtr = next;
6262 *nextPtr = next;
6263 return XML_ERROR_NONE;
6264 case XML_FINISHED:
6265 parser->m_eventPtr = next;
6266 return XML_ERROR_ABORTED;
6267 case XML_PARSING:
6268 if (parser->m_reenter) {
6269 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
6270 }
6271 /* Fall through */
6272 default:;
6273 parser->m_eventPtr = s = next;
6274 }
6275 }
6276 }
6277
6278 static enum XML_Error
6279 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
6280 enum EntityType type) {
6281 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
6282 switch (type) {
6283 case ENTITY_INTERNAL:
6284 parser->m_processor = internalEntityProcessor;
6285 openEntityList = &parser->m_openInternalEntities;
6286 freeEntityList = &parser->m_freeInternalEntities;
6287 break;
6288 case ENTITY_ATTRIBUTE:
6289 openEntityList = &parser->m_openAttributeEntities;
6290 freeEntityList = &parser->m_freeAttributeEntities;
6291 break;
6292 case ENTITY_VALUE:
6293 openEntityList = &parser->m_openValueEntities;
6294 freeEntityList = &parser->m_freeValueEntities;
6295 break;
6296 /* default case serves merely as a safety net in case of a
6297 * wrong entityType. Therefore we exclude the following lines
6298 * from the test coverage.
6299 *
6300 * LCOV_EXCL_START
6301 */
6302 default:
6303 // Should not reach here
6304 assert(0);
6305 /* LCOV_EXCL_STOP */
6306 }
6307
6308 if (*freeEntityList) {
6309 openEntity = *freeEntityList;
6310 *freeEntityList = openEntity->next;
6311 } else {
6312 openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6313 if (! openEntity)
6314 return XML_ERROR_NO_MEMORY;
6315 }
6316 entity->open = XML_TRUE;
6317 entity->hasMore = XML_TRUE;
6318 #if XML_GE == 1
6319 entityTrackingOnOpen(parser, entity, __LINE__);
6320 #endif
6321 entity->processed = 0;
6322 openEntity->next = *openEntityList;
6323 *openEntityList = openEntity;
6324 openEntity->entity = entity;
6325 openEntity->type = type;
6326 openEntity->startTagLevel = parser->m_tagLevel;
6327 openEntity->betweenDecl = betweenDecl;
6328 openEntity->internalEventPtr = NULL;
6329 openEntity->internalEventEndPtr = NULL;
6330
6331 // Only internal entities make use of the reenter flag
6332 // therefore no need to set it for other entity types
6333 if (type == ENTITY_INTERNAL) {
6334 triggerReenter(parser);
6335 }
6336 return XML_ERROR_NONE;
6337 }
6338
6339 static enum XML_Error PTRCALL
6340 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6341 const char **nextPtr) {
6342 UNUSED_P(s);
6343 UNUSED_P(end);
6344 UNUSED_P(nextPtr);
6345 ENTITY *entity;
6346 const char *textStart, *textEnd;
6347 const char *next;
6348 enum XML_Error result;
6349 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6350 if (! openEntity)
6351 return XML_ERROR_UNEXPECTED_STATE;
6352
6353 entity = openEntity->entity;
6354
6355 // This will return early
6356 if (entity->hasMore) {
6357 textStart = ((const char *)entity->textPtr) + entity->processed;
6358 textEnd = (const char *)(entity->textPtr + entity->textLen);
6359 /* Set a safe default value in case 'next' does not get set */
6360 next = textStart;
6361
6362 if (entity->is_param) {
6363 int tok
6364 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6365 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6366 tok, next, &next, XML_FALSE, XML_FALSE,
6367 XML_ACCOUNT_ENTITY_EXPANSION);
6368 } else {
6369 result = doContent(parser, openEntity->startTagLevel,
6370 parser->m_internalEncoding, textStart, textEnd, &next,
6371 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6372 }
6373
6374 if (result != XML_ERROR_NONE)
6375 return result;
6376 // Check if entity is complete, if not, mark down how much of it is
6377 // processed
6378 if (textEnd != next
6379 && (parser->m_parsingStatus.parsing == XML_SUSPENDED
6380 || (parser->m_parsingStatus.parsing == XML_PARSING
6381 && parser->m_reenter))) {
6382 entity->processed = (int)(next - (const char *)entity->textPtr);
6383 return result;
6384 }
6385
6386 // Entity is complete. We cannot close it here since we need to first
6387 // process its possible inner entities (which are added to the
6388 // m_openInternalEntities during doProlog or doContent calls above)
6389 entity->hasMore = XML_FALSE;
6390 if (! entity->is_param
6391 && (openEntity->startTagLevel != parser->m_tagLevel)) {
6392 return XML_ERROR_ASYNC_ENTITY;
6393 }
6394 triggerReenter(parser);
6395 return result;
6396 } // End of entity processing, "if" block will return here
6397
6398 // Remove fully processed openEntity from open entity list.
6399 #if XML_GE == 1
6400 entityTrackingOnClose(parser, entity, __LINE__);
6401 #endif
6402 // openEntity is m_openInternalEntities' head, as we set it at the start of
6403 // this function and we skipped doProlog and doContent calls with hasMore set
6404 // to false. This means we can directly remove the head of
6405 // m_openInternalEntities
6406 assert(parser->m_openInternalEntities == openEntity);
6407 entity->open = XML_FALSE;
6408 parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6409
6410 /* put openEntity back in list of free instances */
6411 openEntity->next = parser->m_freeInternalEntities;
6412 parser->m_freeInternalEntities = openEntity;
6413
6414 if (parser->m_openInternalEntities == NULL) {
6415 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6416 }
6417 triggerReenter(parser);
6418 return XML_ERROR_NONE;
6419 }
6420
6421 static enum XML_Error PTRCALL
6422 errorProcessor(XML_Parser parser, const char *s, const char *end,
6423 const char **nextPtr) {
6424 UNUSED_P(s);
6425 UNUSED_P(end);
6426 UNUSED_P(nextPtr);
6427 return parser->m_errorCode;
6428 }
6429
6430 static enum XML_Error
6431 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6432 const char *ptr, const char *end, STRING_POOL *pool,
6433 enum XML_Account account) {
6434 const char *next = ptr;
6435 enum XML_Error result = XML_ERROR_NONE;
6436
6437 while (1) {
6438 if (! parser->m_openAttributeEntities) {
6439 result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6440 account, &next);
6441 } else {
6442 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6443 if (! openEntity)
6444 return XML_ERROR_UNEXPECTED_STATE;
6445
6446 ENTITY *const entity = openEntity->entity;
6447 const char *const textStart
6448 = ((const char *)entity->textPtr) + entity->processed;
6449 const char *const textEnd
6450 = (const char *)(entity->textPtr + entity->textLen);
6451 /* Set a safe default value in case 'next' does not get set */
6452 const char *nextInEntity = textStart;
6453 if (entity->hasMore) {
6454 result = appendAttributeValue(
6455 parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6456 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6457 if (result != XML_ERROR_NONE)
6458 break;
6459 // Check if entity is complete, if not, mark down how much of it is
6460 // processed. A XML_SUSPENDED check here is not required as
6461 // appendAttributeValue will never suspend the parser.
6462 if (textEnd != nextInEntity) {
6463 entity->processed
6464 = (int)(nextInEntity - (const char *)entity->textPtr);
6465 continue;
6466 }
6467
6468 // Entity is complete. We cannot close it here since we need to first
6469 // process its possible inner entities (which are added to the
6470 // m_openAttributeEntities during appendAttributeValue)
6471 entity->hasMore = XML_FALSE;
6472 continue;
6473 } // End of entity processing, "if" block skips the rest
6474
6475 // Remove fully processed openEntity from open entity list.
6476 #if XML_GE == 1
6477 entityTrackingOnClose(parser, entity, __LINE__);
6478 #endif
6479 // openEntity is m_openAttributeEntities' head, since we set it at the
6480 // start of this function and because we skipped appendAttributeValue call
6481 // with hasMore set to false. This means we can directly remove the head
6482 // of m_openAttributeEntities
6483 assert(parser->m_openAttributeEntities == openEntity);
6484 entity->open = XML_FALSE;
6485 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6486
6487 /* put openEntity back in list of free instances */
6488 openEntity->next = parser->m_freeAttributeEntities;
6489 parser->m_freeAttributeEntities = openEntity;
6490 }
6491
6492 // Break if an error occurred or there is nothing left to process
6493 if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6494 break;
6495 }
6496 }
6497
6498 if (result)
6499 return result;
6500 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6501 poolChop(pool);
6502 if (! poolAppendChar(pool, XML_T('\0')))
6503 return XML_ERROR_NO_MEMORY;
6504 return XML_ERROR_NONE;
6505 }
6506
6507 static enum XML_Error
6508 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6509 const char *ptr, const char *end, STRING_POOL *pool,
6510 enum XML_Account account, const char **nextPtr) {
6511 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6512 #ifndef XML_DTD
6513 UNUSED_P(account);
6514 #endif
6515
6516 for (;;) {
6517 const char *next
6518 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6519 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6520 #if XML_GE == 1
6521 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6522 accountingOnAbort(parser);
6523 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6524 }
6525 #endif
6526 switch (tok) {
6527 case XML_TOK_NONE:
6528 if (nextPtr) {
6529 *nextPtr = next;
6530 }
6531 return XML_ERROR_NONE;
6532 case XML_TOK_INVALID:
6533 if (enc == parser->m_encoding)
6534 parser->m_eventPtr = next;
6535 return XML_ERROR_INVALID_TOKEN;
6536 case XML_TOK_PARTIAL:
6537 if (enc == parser->m_encoding)
6538 parser->m_eventPtr = ptr;
6539 return XML_ERROR_INVALID_TOKEN;
6540 case XML_TOK_CHAR_REF: {
6541 XML_Char buf[XML_ENCODE_MAX];
6542 int i;
6543 int n = XmlCharRefNumber(enc, ptr);
6544 if (n < 0) {
6545 if (enc == parser->m_encoding)
6546 parser->m_eventPtr = ptr;
6547 return XML_ERROR_BAD_CHAR_REF;
6548 }
6549 if (! isCdata && n == 0x20 /* space */
6550 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6551 break;
6552 n = XmlEncode(n, (ICHAR *)buf);
6553 /* The XmlEncode() functions can never return 0 here. That
6554 * error return happens if the code point passed in is either
6555 * negative or greater than or equal to 0x110000. The
6556 * XmlCharRefNumber() functions will all return a number
6557 * strictly less than 0x110000 or a negative value if an error
6558 * occurred. The negative value is intercepted above, so
6559 * XmlEncode() is never passed a value it might return an
6560 * error for.
6561 */
6562 for (i = 0; i < n; i++) {
6563 if (! poolAppendChar(pool, buf[i]))
6564 return XML_ERROR_NO_MEMORY;
6565 }
6566 } break;
6567 case XML_TOK_DATA_CHARS:
6568 if (! poolAppend(pool, enc, ptr, next))
6569 return XML_ERROR_NO_MEMORY;
6570 break;
6571 case XML_TOK_TRAILING_CR:
6572 next = ptr + enc->minBytesPerChar;
6573 /* fall through */
6574 case XML_TOK_ATTRIBUTE_VALUE_S:
6575 case XML_TOK_DATA_NEWLINE:
6576 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6577 break;
6578 if (! poolAppendChar(pool, 0x20))
6579 return XML_ERROR_NO_MEMORY;
6580 break;
6581 case XML_TOK_ENTITY_REF: {
6582 const XML_Char *name;
6583 ENTITY *entity;
6584 bool checkEntityDecl;
6585 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6586 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6587 if (ch) {
6588 #if XML_GE == 1
6589 /* NOTE: We are replacing 4-6 characters original input for 1 character
6590 * so there is no amplification and hence recording without
6591 * protection. */
6592 accountingDiffTolerated(parser, tok, (char *)&ch,
6593 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6594 XML_ACCOUNT_ENTITY_EXPANSION);
6595 #endif /* XML_GE == 1 */
6596 if (! poolAppendChar(pool, ch))
6597 return XML_ERROR_NO_MEMORY;
6598 break;
6599 }
6600 name = poolStoreString(&parser->m_temp2Pool, enc,
6601 ptr + enc->minBytesPerChar,
6602 next - enc->minBytesPerChar);
6603 if (! name)
6604 return XML_ERROR_NO_MEMORY;
6605 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6606 poolDiscard(&parser->m_temp2Pool);
6607 /* First, determine if a check for an existing declaration is needed;
6608 if yes, check that the entity exists, and that it is internal.
6609 */
6610 if (pool == &dtd->pool) /* are we called from prolog? */
6611 checkEntityDecl =
6612 #ifdef XML_DTD
6613 parser->m_prologState.documentEntity &&
6614 #endif /* XML_DTD */
6615 (dtd->standalone ? ! parser->m_openInternalEntities
6616 : ! dtd->hasParamEntityRefs);
6617 else /* if (pool == &parser->m_tempPool): we are called from content */
6618 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6619 if (checkEntityDecl) {
6620 if (! entity)
6621 return XML_ERROR_UNDEFINED_ENTITY;
6622 else if (! entity->is_internal)
6623 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6624 } else if (! entity) {
6625 /* Cannot report skipped entity here - see comments on
6626 parser->m_skippedEntityHandler.
6627 if (parser->m_skippedEntityHandler)
6628 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6629 */
6630 /* Cannot call the default handler because this would be
6631 out of sync with the call to the startElementHandler.
6632 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6633 reportDefault(parser, enc, ptr, next);
6634 */
6635 break;
6636 }
6637 if (entity->open) {
6638 if (enc == parser->m_encoding) {
6639 /* It does not appear that this line can be executed.
6640 *
6641 * The "if (entity->open)" check catches recursive entity
6642 * definitions. In order to be called with an open
6643 * entity, it must have gone through this code before and
6644 * been through the recursive call to
6645 * appendAttributeValue() some lines below. That call
6646 * sets the local encoding ("enc") to the parser's
6647 * internal encoding (internal_utf8 or internal_utf16),
6648 * which can never be the same as the principle encoding.
6649 * It doesn't appear there is another code path that gets
6650 * here with entity->open being TRUE.
6651 *
6652 * Since it is not certain that this logic is watertight,
6653 * we keep the line and merely exclude it from coverage
6654 * tests.
6655 */
6656 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6657 }
6658 return XML_ERROR_RECURSIVE_ENTITY_REF;
6659 }
6660 if (entity->notation) {
6661 if (enc == parser->m_encoding)
6662 parser->m_eventPtr = ptr;
6663 return XML_ERROR_BINARY_ENTITY_REF;
6664 }
6665 if (! entity->textPtr) {
6666 if (enc == parser->m_encoding)
6667 parser->m_eventPtr = ptr;
6668 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6669 } else {
6670 enum XML_Error result;
6671 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6672 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6673 *nextPtr = next;
6674 }
6675 return result;
6676 }
6677 } break;
6678 default:
6679 /* The only token returned by XmlAttributeValueTok() that does
6680 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6681 * Getting that would require an entity name to contain an
6682 * incomplete XML character (e.g. \xE2\x82); however previous
6683 * tokenisers will have already recognised and rejected such
6684 * names before XmlAttributeValueTok() gets a look-in. This
6685 * default case should be retained as a safety net, but the code
6686 * excluded from coverage tests.
6687 *
6688 * LCOV_EXCL_START
6689 */
6690 if (enc == parser->m_encoding)
6691 parser->m_eventPtr = ptr;
6692 return XML_ERROR_UNEXPECTED_STATE;
6693 /* LCOV_EXCL_STOP */
6694 }
6695 ptr = next;
6696 }
6697 /* not reached */
6698 }
6699
6700 #if XML_GE == 1
6701 static enum XML_Error
6702 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6703 const char *entityTextPtr, const char *entityTextEnd,
6704 enum XML_Account account, const char **nextPtr) {
6705 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6706 STRING_POOL *pool = &(dtd->entityValuePool);
6707 enum XML_Error result = XML_ERROR_NONE;
6708 # ifdef XML_DTD
6709 int oldInEntityValue = parser->m_prologState.inEntityValue;
6710 parser->m_prologState.inEntityValue = 1;
6711 # else
6712 UNUSED_P(account);
6713 # endif /* XML_DTD */
6714 /* never return Null for the value argument in EntityDeclHandler,
6715 since this would indicate an external entity; therefore we
6716 have to make sure that entityValuePool.start is not null */
6717 if (! pool->blocks) {
6718 if (! poolGrow(pool))
6719 return XML_ERROR_NO_MEMORY;
6720 }
6721
6722 const char *next = entityTextPtr;
6723
6724 /* Nothing to tokenize. */
6725 if (entityTextPtr >= entityTextEnd) {
6726 result = XML_ERROR_NONE;
6727 goto endEntityValue;
6728 }
6729
6730 for (;;) {
6731 next
6732 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6733 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6734
6735 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6736 account)) {
6737 accountingOnAbort(parser);
6738 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6739 goto endEntityValue;
6740 }
6741
6742 switch (tok) {
6743 case XML_TOK_PARAM_ENTITY_REF:
6744 # ifdef XML_DTD
6745 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6746 const XML_Char *name;
6747 ENTITY *entity;
6748 name = poolStoreString(&parser->m_tempPool, enc,
6749 entityTextPtr + enc->minBytesPerChar,
6750 next - enc->minBytesPerChar);
6751 if (! name) {
6752 result = XML_ERROR_NO_MEMORY;
6753 goto endEntityValue;
6754 }
6755 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6756 poolDiscard(&parser->m_tempPool);
6757 if (! entity) {
6758 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6759 /* cannot report skipped entity here - see comments on
6760 parser->m_skippedEntityHandler
6761 if (parser->m_skippedEntityHandler)
6762 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6763 */
6764 dtd->keepProcessing = dtd->standalone;
6765 goto endEntityValue;
6766 }
6767 if (entity->open || (entity == parser->m_declEntity)) {
6768 if (enc == parser->m_encoding)
6769 parser->m_eventPtr = entityTextPtr;
6770 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6771 goto endEntityValue;
6772 }
6773 if (entity->systemId) {
6774 if (parser->m_externalEntityRefHandler) {
6775 dtd->paramEntityRead = XML_FALSE;
6776 entity->open = XML_TRUE;
6777 entityTrackingOnOpen(parser, entity, __LINE__);
6778 if (! parser->m_externalEntityRefHandler(
6779 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6780 entity->systemId, entity->publicId)) {
6781 entityTrackingOnClose(parser, entity, __LINE__);
6782 entity->open = XML_FALSE;
6783 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6784 goto endEntityValue;
6785 }
6786 entityTrackingOnClose(parser, entity, __LINE__);
6787 entity->open = XML_FALSE;
6788 if (! dtd->paramEntityRead)
6789 dtd->keepProcessing = dtd->standalone;
6790 } else
6791 dtd->keepProcessing = dtd->standalone;
6792 } else {
6793 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6794 goto endEntityValue;
6795 }
6796 break;
6797 }
6798 # endif /* XML_DTD */
6799 /* In the internal subset, PE references are not legal
6800 within markup declarations, e.g entity values in this case. */
6801 parser->m_eventPtr = entityTextPtr;
6802 result = XML_ERROR_PARAM_ENTITY_REF;
6803 goto endEntityValue;
6804 case XML_TOK_NONE:
6805 result = XML_ERROR_NONE;
6806 goto endEntityValue;
6807 case XML_TOK_ENTITY_REF:
6808 case XML_TOK_DATA_CHARS:
6809 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6810 result = XML_ERROR_NO_MEMORY;
6811 goto endEntityValue;
6812 }
6813 break;
6814 case XML_TOK_TRAILING_CR:
6815 next = entityTextPtr + enc->minBytesPerChar;
6816 /* fall through */
6817 case XML_TOK_DATA_NEWLINE:
6818 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6819 result = XML_ERROR_NO_MEMORY;
6820 goto endEntityValue;
6821 }
6822 *(pool->ptr)++ = 0xA;
6823 break;
6824 case XML_TOK_CHAR_REF: {
6825 XML_Char buf[XML_ENCODE_MAX];
6826 int i;
6827 int n = XmlCharRefNumber(enc, entityTextPtr);
6828 if (n < 0) {
6829 if (enc == parser->m_encoding)
6830 parser->m_eventPtr = entityTextPtr;
6831 result = XML_ERROR_BAD_CHAR_REF;
6832 goto endEntityValue;
6833 }
6834 n = XmlEncode(n, (ICHAR *)buf);
6835 /* The XmlEncode() functions can never return 0 here. That
6836 * error return happens if the code point passed in is either
6837 * negative or greater than or equal to 0x110000. The
6838 * XmlCharRefNumber() functions will all return a number
6839 * strictly less than 0x110000 or a negative value if an error
6840 * occurred. The negative value is intercepted above, so
6841 * XmlEncode() is never passed a value it might return an
6842 * error for.
6843 */
6844 for (i = 0; i < n; i++) {
6845 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6846 result = XML_ERROR_NO_MEMORY;
6847 goto endEntityValue;
6848 }
6849 *(pool->ptr)++ = buf[i];
6850 }
6851 } break;
6852 case XML_TOK_PARTIAL:
6853 if (enc == parser->m_encoding)
6854 parser->m_eventPtr = entityTextPtr;
6855 result = XML_ERROR_INVALID_TOKEN;
6856 goto endEntityValue;
6857 case XML_TOK_INVALID:
6858 if (enc == parser->m_encoding)
6859 parser->m_eventPtr = next;
6860 result = XML_ERROR_INVALID_TOKEN;
6861 goto endEntityValue;
6862 default:
6863 /* This default case should be unnecessary -- all the tokens
6864 * that XmlEntityValueTok() can return have their own explicit
6865 * cases -- but should be retained for safety. We do however
6866 * exclude it from the coverage statistics.
6867 *
6868 * LCOV_EXCL_START
6869 */
6870 if (enc == parser->m_encoding)
6871 parser->m_eventPtr = entityTextPtr;
6872 result = XML_ERROR_UNEXPECTED_STATE;
6873 goto endEntityValue;
6874 /* LCOV_EXCL_STOP */
6875 }
6876 entityTextPtr = next;
6877 }
6878 endEntityValue:
6879 # ifdef XML_DTD
6880 parser->m_prologState.inEntityValue = oldInEntityValue;
6881 # endif /* XML_DTD */
6882 // If 'nextPtr' is given, it should be updated during the processing
6883 if (nextPtr != NULL) {
6884 *nextPtr = next;
6885 }
6886 return result;
6887 }
6888
6889 static enum XML_Error
6890 callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6891 const char *entityTextPtr, const char *entityTextEnd,
6892 enum XML_Account account) {
6893 const char *next = entityTextPtr;
6894 enum XML_Error result = XML_ERROR_NONE;
6895 while (1) {
6896 if (! parser->m_openValueEntities) {
6897 result
6898 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6899 } else {
6900 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6901 if (! openEntity)
6902 return XML_ERROR_UNEXPECTED_STATE;
6903
6904 ENTITY *const entity = openEntity->entity;
6905 const char *const textStart
6906 = ((const char *)entity->textPtr) + entity->processed;
6907 const char *const textEnd
6908 = (const char *)(entity->textPtr + entity->textLen);
6909 /* Set a safe default value in case 'next' does not get set */
6910 const char *nextInEntity = textStart;
6911 if (entity->hasMore) {
6912 result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6913 textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6914 &nextInEntity);
6915 if (result != XML_ERROR_NONE)
6916 break;
6917 // Check if entity is complete, if not, mark down how much of it is
6918 // processed. A XML_SUSPENDED check here is not required as
6919 // appendAttributeValue will never suspend the parser.
6920 if (textEnd != nextInEntity) {
6921 entity->processed
6922 = (int)(nextInEntity - (const char *)entity->textPtr);
6923 continue;
6924 }
6925
6926 // Entity is complete. We cannot close it here since we need to first
6927 // process its possible inner entities (which are added to the
6928 // m_openValueEntities during storeEntityValue)
6929 entity->hasMore = XML_FALSE;
6930 continue;
6931 } // End of entity processing, "if" block skips the rest
6932
6933 // Remove fully processed openEntity from open entity list.
6934 # if XML_GE == 1
6935 entityTrackingOnClose(parser, entity, __LINE__);
6936 # endif
6937 // openEntity is m_openValueEntities' head, since we set it at the
6938 // start of this function and because we skipped storeEntityValue call
6939 // with hasMore set to false. This means we can directly remove the head
6940 // of m_openValueEntities
6941 assert(parser->m_openValueEntities == openEntity);
6942 entity->open = XML_FALSE;
6943 parser->m_openValueEntities = parser->m_openValueEntities->next;
6944
6945 /* put openEntity back in list of free instances */
6946 openEntity->next = parser->m_freeValueEntities;
6947 parser->m_freeValueEntities = openEntity;
6948 }
6949
6950 // Break if an error occurred or there is nothing left to process
6951 if (result
6952 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
6953 break;
6954 }
6955 }
6956
6957 return result;
6958 }
6959
6960 #else /* XML_GE == 0 */
6961
6962 static enum XML_Error
6963 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6964 // This will store "&entity123;" in entity->textPtr
6965 // to end up as "&entity123;" in the handler.
6966 const char *const entity_start = "&";
6967 const char *const entity_end = ";";
6968
6969 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6970 if (! poolAppendString(pool, entity_start)
6971 || ! poolAppendString(pool, entity->name)
6972 || ! poolAppendString(pool, entity_end)) {
6973 poolDiscard(pool);
6974 return XML_ERROR_NO_MEMORY;
6975 }
6976
6977 entity->textPtr = poolStart(pool);
6978 entity->textLen = (int)(poolLength(pool));
6979 poolFinish(pool);
6980
6981 return XML_ERROR_NONE;
6982 }
6983
6984 #endif /* XML_GE == 0 */
6985
6986 static void FASTCALL
6987 normalizeLines(XML_Char *s) {
6988 XML_Char *p;
6989 for (;; s++) {
6990 if (*s == XML_T('\0'))
6991 return;
6992 if (*s == 0xD)
6993 break;
6994 }
6995 p = s;
6996 do {
6997 if (*s == 0xD) {
6998 *p++ = 0xA;
6999 if (*++s == 0xA)
7000 s++;
7001 } else
7002 *p++ = *s++;
7003 } while (*s);
7004 *p = XML_T('\0');
7005 }
7006
7007 static int
7008 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
7009 const char *start, const char *end) {
7010 const XML_Char *target;
7011 XML_Char *data;
7012 const char *tem;
7013 if (! parser->m_processingInstructionHandler) {
7014 if (parser->m_defaultHandler)
7015 reportDefault(parser, enc, start, end);
7016 return 1;
7017 }
7018 start += enc->minBytesPerChar * 2;
7019 tem = start + XmlNameLength(enc, start);
7020 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
7021 if (! target)
7022 return 0;
7023 poolFinish(&parser->m_tempPool);
7024 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
7025 end - enc->minBytesPerChar * 2);
7026 if (! data)
7027 return 0;
7028 normalizeLines(data);
7029 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
7030 poolClear(&parser->m_tempPool);
7031 return 1;
7032 }
7033
7034 static int
7035 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
7036 const char *end) {
7037 XML_Char *data;
7038 if (! parser->m_commentHandler) {
7039 if (parser->m_defaultHandler)
7040 reportDefault(parser, enc, start, end);
7041 return 1;
7042 }
7043 data = poolStoreString(&parser->m_tempPool, enc,
7044 start + enc->minBytesPerChar * 4,
7045 end - enc->minBytesPerChar * 3);
7046 if (! data)
7047 return 0;
7048 normalizeLines(data);
7049 parser->m_commentHandler(parser->m_handlerArg, data);
7050 poolClear(&parser->m_tempPool);
7051 return 1;
7052 }
7053
7054 static void
7055 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
7056 const char *end) {
7057 if (MUST_CONVERT(enc, s)) {
7058 enum XML_Convert_Result convert_res;
7059 const char **eventPP;
7060 const char **eventEndPP;
7061 if (enc == parser->m_encoding) {
7062 eventPP = &parser->m_eventPtr;
7063 eventEndPP = &parser->m_eventEndPtr;
7064 } else {
7065 /* To get here, two things must be true; the parser must be
7066 * using a character encoding that is not the same as the
7067 * encoding passed in, and the encoding passed in must need
7068 * conversion to the internal format (UTF-8 unless XML_UNICODE
7069 * is defined). The only occasions on which the encoding passed
7070 * in is not the same as the parser's encoding are when it is
7071 * the internal encoding (e.g. a previously defined parameter
7072 * entity, already converted to internal format). This by
7073 * definition doesn't need conversion, so the whole branch never
7074 * gets executed.
7075 *
7076 * For safety's sake we don't delete these lines and merely
7077 * exclude them from coverage statistics.
7078 *
7079 * LCOV_EXCL_START
7080 */
7081 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
7082 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
7083 /* LCOV_EXCL_STOP */
7084 }
7085 do {
7086 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
7087 convert_res
7088 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
7089 *eventEndPP = s;
7090 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
7091 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
7092 *eventPP = s;
7093 } while ((convert_res != XML_CONVERT_COMPLETED)
7094 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
7095 } else
7096 parser->m_defaultHandler(
7097 parser->m_handlerArg, (const XML_Char *)s,
7098 (int)((const XML_Char *)end - (const XML_Char *)s));
7099 }
7100
7101 static int
7102 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
7103 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
7104 DEFAULT_ATTRIBUTE *att;
7105 if (value || isId) {
7106 /* The handling of default attributes gets messed up if we have
7107 a default which duplicates a non-default. */
7108 NAMED *const nameFound
7109 = (NAMED *)lookup(parser, &(type->defaultAttsNames), attId->name, 0);
7110 if (nameFound)
7111 return 1;
7112 if (isId && ! type->idAtt && ! attId->xmlns)
7113 type->idAtt = attId;
7114 }
7115 if (type->nDefaultAtts == type->allocDefaultAtts) {
7116 if (type->allocDefaultAtts == 0) {
7117 type->allocDefaultAtts = 8;
7118 type->defaultAtts
7119 = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7120 if (! type->defaultAtts) {
7121 type->allocDefaultAtts = 0;
7122 return 0;
7123 }
7124 } else {
7125 DEFAULT_ATTRIBUTE *temp;
7126
7127 /* Detect and prevent integer overflow */
7128 if (type->allocDefaultAtts > INT_MAX / 2) {
7129 return 0;
7130 }
7131
7132 int count = type->allocDefaultAtts * 2;
7133
7134 /* Detect and prevent integer overflow.
7135 * The preprocessor guard addresses the "always false" warning
7136 * from -Wtype-limits on platforms where
7137 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7138 #if UINT_MAX >= SIZE_MAX
7139 if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) {
7140 return 0;
7141 }
7142 #endif
7143
7144 temp = REALLOC(parser, type->defaultAtts,
7145 (count * sizeof(DEFAULT_ATTRIBUTE)));
7146 if (temp == NULL)
7147 return 0;
7148 type->allocDefaultAtts = count;
7149 type->defaultAtts = temp;
7150 }
7151 }
7152 att = type->defaultAtts + type->nDefaultAtts;
7153 att->id = attId;
7154 att->value = value;
7155 att->isCdata = isCdata;
7156 if (! isCdata)
7157 attId->maybeTokenized = XML_TRUE;
7158
7159 NAMED *const nameAddedOrFound = (NAMED *)lookup(
7160 parser, &(type->defaultAttsNames), attId->name, sizeof(NAMED));
7161 if (! nameAddedOrFound)
7162 return 0;
7163
7164 type->nDefaultAtts += 1;
7165 return 1;
7166 }
7167
7168 static int
7169 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
7170 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7171 const XML_Char *name;
7172 for (name = elementType->name; *name; name++) {
7173 if (*name == XML_T(ASCII_COLON)) {
7174 PREFIX *prefix;
7175 const XML_Char *s;
7176 for (s = elementType->name; s != name; s++) {
7177 if (! poolAppendChar(&dtd->pool, *s))
7178 return 0;
7179 }
7180 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7181 return 0;
7182 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
7183 sizeof(PREFIX));
7184 if (! prefix)
7185 return 0;
7186 if (prefix->name == poolStart(&dtd->pool))
7187 poolFinish(&dtd->pool);
7188 else
7189 poolDiscard(&dtd->pool);
7190 elementType->prefix = prefix;
7191 break;
7192 }
7193 }
7194 return 1;
7195 }
7196
7197 static ATTRIBUTE_ID *
7198 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
7199 const char *end) {
7200 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7201 ATTRIBUTE_ID *id;
7202 const XML_Char *name;
7203 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7204 return NULL;
7205 name = poolStoreString(&dtd->pool, enc, start, end);
7206 if (! name)
7207 return NULL;
7208 /* skip quotation mark - its storage will be reused (like in name[-1]) */
7209 ++name;
7210 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
7211 sizeof(ATTRIBUTE_ID));
7212 if (! id)
7213 return NULL;
7214 if (id->name != name)
7215 poolDiscard(&dtd->pool);
7216 else {
7217 poolFinish(&dtd->pool);
7218 if (! parser->m_ns)
7219 ;
7220 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
7221 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
7222 && name[4] == XML_T(ASCII_s)
7223 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
7224 if (name[5] == XML_T('\0'))
7225 id->prefix = &dtd->defaultPrefix;
7226 else
7227 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
7228 sizeof(PREFIX));
7229 id->xmlns = XML_TRUE;
7230 } else {
7231 int i;
7232 for (i = 0; name[i]; i++) {
7233 /* attributes without prefix are *not* in the default namespace */
7234 if (name[i] == XML_T(ASCII_COLON)) {
7235 int j;
7236 for (j = 0; j < i; j++) {
7237 if (! poolAppendChar(&dtd->pool, name[j]))
7238 return NULL;
7239 }
7240 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7241 return NULL;
7242 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
7243 poolStart(&dtd->pool), sizeof(PREFIX));
7244 if (! id->prefix)
7245 return NULL;
7246 if (id->prefix->name == poolStart(&dtd->pool))
7247 poolFinish(&dtd->pool);
7248 else
7249 poolDiscard(&dtd->pool);
7250 break;
7251 }
7252 }
7253 }
7254 }
7255 return id;
7256 }
7257
7258 #define CONTEXT_SEP XML_T(ASCII_FF)
7259
7260 static const XML_Char *
7261 getContext(XML_Parser parser) {
7262 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7263 HASH_TABLE_ITER iter;
7264 XML_Bool needSep = XML_FALSE;
7265
7266 if (dtd->defaultPrefix.binding) {
7267 int i;
7268 int len;
7269 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7270 return NULL;
7271 len = dtd->defaultPrefix.binding->uriLen;
7272 if (parser->m_namespaceSeparator)
7273 len--;
7274 for (i = 0; i < len; i++) {
7275 if (! poolAppendChar(&parser->m_tempPool,
7276 dtd->defaultPrefix.binding->uri[i])) {
7277 /* Because of memory caching, I don't believe this line can be
7278 * executed.
7279 *
7280 * This is part of a loop copying the default prefix binding
7281 * URI into the parser's temporary string pool. Previously,
7282 * that URI was copied into the same string pool, with a
7283 * terminating NUL character, as part of setContext(). When
7284 * the pool was cleared, that leaves a block definitely big
7285 * enough to hold the URI on the free block list of the pool.
7286 * The URI copy in getContext() therefore cannot run out of
7287 * memory.
7288 *
7289 * If the pool is used between the setContext() and
7290 * getContext() calls, the worst it can do is leave a bigger
7291 * block on the front of the free list. Given that this is
7292 * all somewhat inobvious and program logic can be changed, we
7293 * don't delete the line but we do exclude it from the test
7294 * coverage statistics.
7295 */
7296 return NULL; /* LCOV_EXCL_LINE */
7297 }
7298 }
7299 needSep = XML_TRUE;
7300 }
7301
7302 hashTableIterInit(&iter, &(dtd->prefixes));
7303 for (;;) {
7304 int i;
7305 int len;
7306 const XML_Char *s;
7307 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
7308 if (! prefix)
7309 break;
7310 if (! prefix->binding) {
7311 /* This test appears to be (justifiable) paranoia. There does
7312 * not seem to be a way of injecting a prefix without a binding
7313 * that doesn't get errored long before this function is called.
7314 * The test should remain for safety's sake, so we instead
7315 * exclude the following line from the coverage statistics.
7316 */
7317 continue; /* LCOV_EXCL_LINE */
7318 }
7319 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7320 return NULL;
7321 for (s = prefix->name; *s; s++)
7322 if (! poolAppendChar(&parser->m_tempPool, *s))
7323 return NULL;
7324 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7325 return NULL;
7326 len = prefix->binding->uriLen;
7327 if (parser->m_namespaceSeparator)
7328 len--;
7329 for (i = 0; i < len; i++)
7330 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7331 return NULL;
7332 needSep = XML_TRUE;
7333 }
7334
7335 hashTableIterInit(&iter, &(dtd->generalEntities));
7336 for (;;) {
7337 const XML_Char *s;
7338 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7339 if (! e)
7340 break;
7341 if (! e->open)
7342 continue;
7343 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7344 return NULL;
7345 for (s = e->name; *s; s++)
7346 if (! poolAppendChar(&parser->m_tempPool, *s))
7347 return 0;
7348 needSep = XML_TRUE;
7349 }
7350
7351 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7352 return NULL;
7353 return parser->m_tempPool.start;
7354 }
7355
7356 static XML_Bool
7357 setContext(XML_Parser parser, const XML_Char *context) {
7358 if (context == NULL) {
7359 return XML_FALSE;
7360 }
7361
7362 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7363 const XML_Char *s = context;
7364
7365 while (*context != XML_T('\0')) {
7366 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7367 ENTITY *e;
7368 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7369 return XML_FALSE;
7370 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7371 poolStart(&parser->m_tempPool), 0);
7372 if (e)
7373 e->open = XML_TRUE;
7374 if (*s != XML_T('\0'))
7375 s++;
7376 context = s;
7377 poolDiscard(&parser->m_tempPool);
7378 } else if (*s == XML_T(ASCII_EQUALS)) {
7379 PREFIX *prefix;
7380 if (poolLength(&parser->m_tempPool) == 0)
7381 prefix = &dtd->defaultPrefix;
7382 else {
7383 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7384 return XML_FALSE;
7385 const XML_Char *const prefixName = poolCopyStringNoFinish(
7386 &dtd->pool, poolStart(&parser->m_tempPool));
7387 if (! prefixName) {
7388 return XML_FALSE;
7389 }
7390
7391 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, prefixName,
7392 sizeof(PREFIX));
7393
7394 const bool prefixNameUsed = prefix && prefix->name == prefixName;
7395 if (prefixNameUsed)
7396 poolFinish(&dtd->pool);
7397 else
7398 poolDiscard(&dtd->pool);
7399
7400 if (! prefix)
7401 return XML_FALSE;
7402
7403 poolDiscard(&parser->m_tempPool);
7404 }
7405 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7406 context++)
7407 if (! poolAppendChar(&parser->m_tempPool, *context))
7408 return XML_FALSE;
7409 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7410 return XML_FALSE;
7411 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7412 &parser->m_inheritedBindings)
7413 != XML_ERROR_NONE)
7414 return XML_FALSE;
7415 poolDiscard(&parser->m_tempPool);
7416 if (*context != XML_T('\0'))
7417 ++context;
7418 s = context;
7419 } else {
7420 if (! poolAppendChar(&parser->m_tempPool, *s))
7421 return XML_FALSE;
7422 s++;
7423 }
7424 }
7425 return XML_TRUE;
7426 }
7427
7428 static void FASTCALL
7429 normalizePublicId(XML_Char *publicId) {
7430 XML_Char *p = publicId;
7431 XML_Char *s;
7432 for (s = publicId; *s; s++) {
7433 switch (*s) {
7434 case 0x20:
7435 case 0xD:
7436 case 0xA:
7437 if (p != publicId && p[-1] != 0x20)
7438 *p++ = 0x20;
7439 break;
7440 default:
7441 *p++ = *s;
7442 }
7443 }
7444 if (p != publicId && p[-1] == 0x20)
7445 --p;
7446 *p = XML_T('\0');
7447 }
7448
7449 static DTD *
7450 dtdCreate(XML_Parser parser) {
7451 DTD *p = MALLOC(parser, sizeof(DTD));
7452 if (p == NULL)
7453 return p;
7454 poolInit(&(p->pool), parser);
7455 poolInit(&(p->entityValuePool), parser);
7456 hashTableInit(&(p->generalEntities), parser);
7457 hashTableInit(&(p->elementTypes), parser);
7458 hashTableInit(&(p->attributeIds), parser);
7459 hashTableInit(&(p->prefixes), parser);
7460 #ifdef XML_DTD
7461 p->paramEntityRead = XML_FALSE;
7462 hashTableInit(&(p->paramEntities), parser);
7463 #endif /* XML_DTD */
7464 p->defaultPrefix.name = NULL;
7465 p->defaultPrefix.binding = NULL;
7466
7467 p->in_eldecl = XML_FALSE;
7468 p->scaffIndex = NULL;
7469 p->scaffold = NULL;
7470 p->scaffLevel = 0;
7471 p->scaffSize = 0;
7472 p->scaffCount = 0;
7473 p->contentStringLen = 0;
7474
7475 p->keepProcessing = XML_TRUE;
7476 p->hasParamEntityRefs = XML_FALSE;
7477 p->standalone = XML_FALSE;
7478 return p;
7479 }
7480
7481 static void
7482 dtdReset(DTD *p, XML_Parser parser) {
7483 HASH_TABLE_ITER iter;
7484 hashTableIterInit(&iter, &(p->elementTypes));
7485 for (;;) {
7486 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7487 if (! e)
7488 break;
7489 hashTableDestroy(&(e->defaultAttsNames));
7490 if (e->allocDefaultAtts != 0)
7491 FREE(parser, e->defaultAtts);
7492 }
7493 hashTableClear(&(p->generalEntities));
7494 #ifdef XML_DTD
7495 p->paramEntityRead = XML_FALSE;
7496 hashTableClear(&(p->paramEntities));
7497 #endif /* XML_DTD */
7498 hashTableClear(&(p->elementTypes));
7499 hashTableClear(&(p->attributeIds));
7500 hashTableClear(&(p->prefixes));
7501 poolClear(&(p->pool));
7502 poolClear(&(p->entityValuePool));
7503 p->defaultPrefix.name = NULL;
7504 p->defaultPrefix.binding = NULL;
7505
7506 p->in_eldecl = XML_FALSE;
7507
7508 FREE(parser, p->scaffIndex);
7509 p->scaffIndex = NULL;
7510 FREE(parser, p->scaffold);
7511 p->scaffold = NULL;
7512
7513 p->scaffLevel = 0;
7514 p->scaffSize = 0;
7515 p->scaffCount = 0;
7516 p->contentStringLen = 0;
7517
7518 p->keepProcessing = XML_TRUE;
7519 p->hasParamEntityRefs = XML_FALSE;
7520 p->standalone = XML_FALSE;
7521 }
7522
7523 static void
7524 dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) {
7525 HASH_TABLE_ITER iter;
7526 hashTableIterInit(&iter, &(p->elementTypes));
7527 for (;;) {
7528 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7529 if (! e)
7530 break;
7531 hashTableDestroy(&(e->defaultAttsNames));
7532 if (e->allocDefaultAtts != 0)
7533 FREE(parser, e->defaultAtts);
7534 }
7535 hashTableDestroy(&(p->generalEntities));
7536 #ifdef XML_DTD
7537 hashTableDestroy(&(p->paramEntities));
7538 #endif /* XML_DTD */
7539 hashTableDestroy(&(p->elementTypes));
7540 hashTableDestroy(&(p->attributeIds));
7541 hashTableDestroy(&(p->prefixes));
7542 poolDestroy(&(p->pool));
7543 poolDestroy(&(p->entityValuePool));
7544 if (isDocEntity) {
7545 FREE(parser, p->scaffIndex);
7546 FREE(parser, p->scaffold);
7547 }
7548 FREE(parser, p);
7549 }
7550
7551 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7552 The new DTD has already been initialized.
7553 */
7554 static int
7555 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7556 XML_Parser parser) {
7557 HASH_TABLE_ITER iter;
7558
7559 /* Copy the prefix table. */
7560
7561 hashTableIterInit(&iter, &(oldDtd->prefixes));
7562 for (;;) {
7563 const XML_Char *name;
7564 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7565 if (! oldP)
7566 break;
7567 name = poolCopyString(&(newDtd->pool), oldP->name);
7568 if (! name)
7569 return 0;
7570 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7571 return 0;
7572 }
7573
7574 hashTableIterInit(&iter, &(oldDtd->attributeIds));
7575
7576 /* Copy the attribute id table. */
7577
7578 for (;;) {
7579 ATTRIBUTE_ID *newA;
7580 const XML_Char *name;
7581 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7582
7583 if (! oldA)
7584 break;
7585 /* Remember to allocate the scratch byte before the name. */
7586 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7587 return 0;
7588 name = poolCopyString(&(newDtd->pool), oldA->name);
7589 if (! name)
7590 return 0;
7591 ++name;
7592 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7593 sizeof(ATTRIBUTE_ID));
7594 if (! newA)
7595 return 0;
7596 newA->maybeTokenized = oldA->maybeTokenized;
7597 if (oldA->prefix) {
7598 newA->xmlns = oldA->xmlns;
7599 if (oldA->prefix == &oldDtd->defaultPrefix)
7600 newA->prefix = &newDtd->defaultPrefix;
7601 else
7602 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7603 oldA->prefix->name, 0);
7604 }
7605 }
7606
7607 /* Copy the element type table. */
7608
7609 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7610
7611 for (;;) {
7612 int i;
7613 ELEMENT_TYPE *newE;
7614 const XML_Char *name;
7615 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7616 if (! oldE)
7617 break;
7618 name = poolCopyString(&(newDtd->pool), oldE->name);
7619 if (! name)
7620 return 0;
7621 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7622 sizeof(ELEMENT_TYPE));
7623 if (! newE)
7624 return 0;
7625
7626 if (! newE->defaultAttsNames.parser)
7627 hashTableInit(&(newE->defaultAttsNames), parser);
7628
7629 if (oldE->nDefaultAtts) {
7630 /* Detect and prevent integer overflow.
7631 * The preprocessor guard addresses the "always false" warning
7632 * from -Wtype-limits on platforms where
7633 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7634 #if UINT_MAX >= SIZE_MAX
7635 if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) {
7636 return 0;
7637 }
7638 #endif
7639 newE->defaultAtts
7640 = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7641 if (! newE->defaultAtts) {
7642 return 0;
7643 }
7644 }
7645 if (oldE->idAtt)
7646 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7647 oldE->idAtt->name, 0);
7648 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7649 if (oldE->prefix)
7650 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7651 oldE->prefix->name, 0);
7652 for (i = 0; i < newE->nDefaultAtts; i++) {
7653 const XML_Char *const attributeName = oldE->defaultAtts[i].id->name;
7654 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7655 oldParser, &(newDtd->attributeIds), attributeName, 0);
7656 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7657 if (oldE->defaultAtts[i].value) {
7658 newE->defaultAtts[i].value
7659 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7660 if (! newE->defaultAtts[i].value)
7661 return 0;
7662 } else
7663 newE->defaultAtts[i].value = NULL;
7664
7665 NAMED *const nameAddedOrFound = (NAMED *)lookup(
7666 parser, &(newE->defaultAttsNames), attributeName, sizeof(NAMED));
7667 if (! nameAddedOrFound) {
7668 return 0;
7669 }
7670 }
7671 }
7672
7673 /* Copy the entity tables. */
7674 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7675 &(oldDtd->generalEntities)))
7676 return 0;
7677
7678 #ifdef XML_DTD
7679 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7680 &(oldDtd->paramEntities)))
7681 return 0;
7682 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7683 #endif /* XML_DTD */
7684
7685 newDtd->keepProcessing = oldDtd->keepProcessing;
7686 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7687 newDtd->standalone = oldDtd->standalone;
7688
7689 /* Don't want deep copying for scaffolding */
7690 newDtd->in_eldecl = oldDtd->in_eldecl;
7691 newDtd->scaffold = oldDtd->scaffold;
7692 newDtd->contentStringLen = oldDtd->contentStringLen;
7693 newDtd->scaffSize = oldDtd->scaffSize;
7694 newDtd->scaffLevel = oldDtd->scaffLevel;
7695 newDtd->scaffIndex = oldDtd->scaffIndex;
7696
7697 return 1;
7698 } /* End dtdCopy */
7699
7700 static int
7701 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7702 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7703 HASH_TABLE_ITER iter;
7704 const XML_Char *cachedOldBase = NULL;
7705 const XML_Char *cachedNewBase = NULL;
7706
7707 hashTableIterInit(&iter, oldTable);
7708
7709 for (;;) {
7710 ENTITY *newE;
7711 const XML_Char *name;
7712 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7713 if (! oldE)
7714 break;
7715 name = poolCopyString(newPool, oldE->name);
7716 if (! name)
7717 return 0;
7718 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7719 if (! newE)
7720 return 0;
7721 if (oldE->systemId) {
7722 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7723 if (! tem)
7724 return 0;
7725 newE->systemId = tem;
7726 if (oldE->base) {
7727 if (oldE->base == cachedOldBase)
7728 newE->base = cachedNewBase;
7729 else {
7730 cachedOldBase = oldE->base;
7731 tem = poolCopyString(newPool, cachedOldBase);
7732 if (! tem)
7733 return 0;
7734 cachedNewBase = newE->base = tem;
7735 }
7736 }
7737 if (oldE->publicId) {
7738 tem = poolCopyString(newPool, oldE->publicId);
7739 if (! tem)
7740 return 0;
7741 newE->publicId = tem;
7742 }
7743 } else {
7744 const XML_Char *tem
7745 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7746 if (! tem)
7747 return 0;
7748 newE->textPtr = tem;
7749 newE->textLen = oldE->textLen;
7750 }
7751 if (oldE->notation) {
7752 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7753 if (! tem)
7754 return 0;
7755 newE->notation = tem;
7756 }
7757 newE->is_param = oldE->is_param;
7758 newE->is_internal = oldE->is_internal;
7759 }
7760 return 1;
7761 }
7762
7763 #define INIT_POWER 6
7764
7765 static XML_Bool FASTCALL
7766 keyeq(KEY s1, KEY s2) {
7767 for (; *s1 == *s2; s1++, s2++)
7768 if (*s1 == 0)
7769 return XML_TRUE;
7770 return XML_FALSE;
7771 }
7772
7773 static size_t
7774 keylen(KEY s) {
7775 size_t len = 0;
7776 for (; *s; s++, len++)
7777 ;
7778 return len;
7779 }
7780
7781 static void
7782 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7783 const XML_Parser rootParser = getRootParserOf(parser, NULL);
7784 assert(! rootParser->m_parentParser);
7785
7786 *key = rootParser->m_hash_secret_salt_128;
7787 }
7788
7789 static unsigned long FASTCALL
7790 hash(XML_Parser parser, KEY s) {
7791 struct siphash state;
7792 struct sipkey key;
7793 (void)sip24_valid;
7794 copy_salt_to_sipkey(parser, &key);
7795 sip24_init(&state, &key);
7796 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7797 return (unsigned long)sip24_final(&state);
7798 }
7799
7800 static NAMED *
7801 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7802 size_t i;
7803 if (table->size == 0) {
7804 size_t tsize;
7805 if (! createSize)
7806 return NULL;
7807 table->power = INIT_POWER;
7808 /* table->size is a power of 2 */
7809 table->size = (size_t)1 << INIT_POWER;
7810 tsize = table->size * sizeof(NAMED *);
7811 table->v = MALLOC(table->parser, tsize);
7812 if (! table->v) {
7813 table->size = 0;
7814 return NULL;
7815 }
7816 memset(table->v, 0, tsize);
7817 i = hash(parser, name) & ((unsigned long)table->size - 1);
7818 } else {
7819 unsigned long h = hash(parser, name);
7820 unsigned long mask = (unsigned long)table->size - 1;
7821 unsigned char step = 0;
7822 i = h & mask;
7823 while (table->v[i]) {
7824 if (keyeq(name, table->v[i]->name))
7825 return table->v[i];
7826 if (! step)
7827 step = PROBE_STEP(h, mask, table->power);
7828 i < step ? (i += table->size - step) : (i -= step);
7829 }
7830 if (! createSize)
7831 return NULL;
7832
7833 /* check for overflow (table is half full) */
7834 if (table->used >> (table->power - 1)) {
7835 unsigned char newPower = table->power + 1;
7836
7837 /* Detect and prevent invalid shift */
7838 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7839 return NULL;
7840 }
7841
7842 size_t newSize = (size_t)1 << newPower;
7843 unsigned long newMask = (unsigned long)newSize - 1;
7844
7845 /* Detect and prevent integer overflow */
7846 if (newSize > SIZE_MAX / sizeof(NAMED *)) {
7847 return NULL;
7848 }
7849
7850 size_t tsize = newSize * sizeof(NAMED *);
7851 NAMED **newV = MALLOC(table->parser, tsize);
7852 if (! newV)
7853 return NULL;
7854 memset(newV, 0, tsize);
7855 for (i = 0; i < table->size; i++)
7856 if (table->v[i]) {
7857 unsigned long newHash = hash(parser, table->v[i]->name);
7858 size_t j = newHash & newMask;
7859 step = 0;
7860 while (newV[j]) {
7861 if (! step)
7862 step = PROBE_STEP(newHash, newMask, newPower);
7863 j < step ? (j += newSize - step) : (j -= step);
7864 }
7865 newV[j] = table->v[i];
7866 }
7867 FREE(table->parser, table->v);
7868 table->v = newV;
7869 table->power = newPower;
7870 table->size = newSize;
7871 i = h & newMask;
7872 step = 0;
7873 while (table->v[i]) {
7874 if (! step)
7875 step = PROBE_STEP(h, newMask, newPower);
7876 i < step ? (i += newSize - step) : (i -= step);
7877 }
7878 }
7879 }
7880 table->v[i] = MALLOC(table->parser, createSize);
7881 if (! table->v[i])
7882 return NULL;
7883 memset(table->v[i], 0, createSize);
7884 table->v[i]->name = name;
7885 (table->used)++;
7886 return table->v[i];
7887 }
7888
7889 static void FASTCALL
7890 hashTableClear(HASH_TABLE *table) {
7891 size_t i;
7892 for (i = 0; i < table->size; i++) {
7893 FREE(table->parser, table->v[i]);
7894 table->v[i] = NULL;
7895 }
7896 table->used = 0;
7897 }
7898
7899 static void FASTCALL
7900 hashTableDestroy(HASH_TABLE *table) {
7901 size_t i;
7902 for (i = 0; i < table->size; i++)
7903 FREE(table->parser, table->v[i]);
7904 FREE(table->parser, table->v);
7905 }
7906
7907 static void FASTCALL
7908 hashTableInit(HASH_TABLE *p, XML_Parser parser) {
7909 p->power = 0;
7910 p->size = 0;
7911 p->used = 0;
7912 p->v = NULL;
7913 p->parser = parser;
7914 }
7915
7916 static void FASTCALL
7917 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7918 iter->p = table->v;
7919 iter->end = iter->p ? iter->p + table->size : NULL;
7920 }
7921
7922 static NAMED *FASTCALL
7923 hashTableIterNext(HASH_TABLE_ITER *iter) {
7924 while (iter->p != iter->end) {
7925 NAMED *tem = *(iter->p)++;
7926 if (tem)
7927 return tem;
7928 }
7929 return NULL;
7930 }
7931
7932 static void FASTCALL
7933 poolInit(STRING_POOL *pool, XML_Parser parser) {
7934 pool->blocks = NULL;
7935 pool->freeBlocks = NULL;
7936 pool->start = NULL;
7937 pool->ptr = NULL;
7938 pool->end = NULL;
7939 pool->parser = parser;
7940 }
7941
7942 static void FASTCALL
7943 poolClear(STRING_POOL *pool) {
7944 if (! pool->freeBlocks)
7945 pool->freeBlocks = pool->blocks;
7946 else {
7947 BLOCK *p = pool->blocks;
7948 while (p) {
7949 BLOCK *tem = p->next;
7950 p->next = pool->freeBlocks;
7951 pool->freeBlocks = p;
7952 p = tem;
7953 }
7954 }
7955 pool->blocks = NULL;
7956 pool->start = NULL;
7957 pool->ptr = NULL;
7958 pool->end = NULL;
7959 }
7960
7961 static void FASTCALL
7962 poolDestroy(STRING_POOL *pool) {
7963 BLOCK *p = pool->blocks;
7964 while (p) {
7965 BLOCK *tem = p->next;
7966 FREE(pool->parser, p);
7967 p = tem;
7968 }
7969 p = pool->freeBlocks;
7970 while (p) {
7971 BLOCK *tem = p->next;
7972 FREE(pool->parser, p);
7973 p = tem;
7974 }
7975 }
7976
7977 static XML_Char *
7978 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7979 const char *end) {
7980 if (! pool->ptr && ! poolGrow(pool))
7981 return NULL;
7982 for (;;) {
7983 const enum XML_Convert_Result convert_res = XmlConvert(
7984 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7985 if ((convert_res == XML_CONVERT_COMPLETED)
7986 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7987 break;
7988 if (! poolGrow(pool))
7989 return NULL;
7990 }
7991 return pool->start;
7992 }
7993
7994 static const XML_Char *FASTCALL
7995 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7996 do {
7997 if (! poolAppendChar(pool, *s))
7998 return NULL;
7999 } while (*s++);
8000 s = pool->start;
8001 poolFinish(pool);
8002 return s;
8003 }
8004
8005 // A version of `poolCopyString` that does not call `poolFinish`
8006 // and reverts any partial advancement upon failure.
8007 static const XML_Char *FASTCALL
8008 poolCopyStringNoFinish(STRING_POOL *pool, const XML_Char *s) {
8009 const XML_Char *const original = s;
8010 do {
8011 if (! poolAppendChar(pool, *s)) {
8012 // Revert any previously successful advancement
8013 const ptrdiff_t advancedBy = s - original;
8014 if (advancedBy > 0)
8015 pool->ptr -= advancedBy;
8016 return NULL;
8017 }
8018 } while (*s++);
8019 return pool->start;
8020 }
8021
8022 static const XML_Char *
8023 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
8024 if (! pool->ptr && ! poolGrow(pool)) {
8025 /* The following line is unreachable given the current usage of
8026 * poolCopyStringN(). Currently it is called from exactly one
8027 * place to copy the text of a simple general entity. By that
8028 * point, the name of the entity is already stored in the pool, so
8029 * pool->ptr cannot be NULL.
8030 *
8031 * If poolCopyStringN() is used elsewhere as it well might be,
8032 * this line may well become executable again. Regardless, this
8033 * sort of check shouldn't be removed lightly, so we just exclude
8034 * it from the coverage statistics.
8035 */
8036 return NULL; /* LCOV_EXCL_LINE */
8037 }
8038 for (; n > 0; --n, s++) {
8039 if (! poolAppendChar(pool, *s))
8040 return NULL;
8041 }
8042 s = pool->start;
8043 poolFinish(pool);
8044 return s;
8045 }
8046
8047 static const XML_Char *FASTCALL
8048 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
8049 while (*s) {
8050 if (! poolAppendChar(pool, *s))
8051 return NULL;
8052 s++;
8053 }
8054 return pool->start;
8055 }
8056
8057 static XML_Char *
8058 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
8059 const char *end) {
8060 if (! poolAppend(pool, enc, ptr, end))
8061 return NULL;
8062 if (pool->ptr == pool->end && ! poolGrow(pool))
8063 return NULL;
8064 *(pool->ptr)++ = 0;
8065 return pool->start;
8066 }
8067
8068 static size_t
8069 poolBytesToAllocateFor(int blockSize) {
8070 /* Unprotected math would be:
8071 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
8072 **
8073 ** Detect overflow, avoiding _signed_ overflow undefined behavior
8074 ** For a + b * c we check b * c in isolation first, so that addition of a
8075 ** on top has no chance of making us accept a small non-negative number
8076 */
8077 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
8078
8079 if (blockSize <= 0)
8080 return 0;
8081
8082 if (blockSize > (int)(INT_MAX / stretch))
8083 return 0;
8084
8085 {
8086 const int stretchedBlockSize = blockSize * (int)stretch;
8087 const int bytesToAllocate
8088 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
8089 if (bytesToAllocate < 0)
8090 return 0;
8091
8092 return (size_t)bytesToAllocate;
8093 }
8094 }
8095
8096 static XML_Bool FASTCALL
8097 poolGrow(STRING_POOL *pool) {
8098 if (pool->freeBlocks) {
8099 if (pool->start == NULL) {
8100 pool->blocks = pool->freeBlocks;
8101 pool->freeBlocks = pool->freeBlocks->next;
8102 pool->blocks->next = NULL;
8103 pool->start = pool->blocks->s;
8104 pool->end = pool->start + pool->blocks->size;
8105 pool->ptr = pool->start;
8106 return XML_TRUE;
8107 }
8108 if (pool->end - pool->start < pool->freeBlocks->size) {
8109 BLOCK *tem = pool->freeBlocks->next;
8110 pool->freeBlocks->next = pool->blocks;
8111 pool->blocks = pool->freeBlocks;
8112 pool->freeBlocks = tem;
8113 memcpy(pool->blocks->s, pool->start,
8114 (pool->end - pool->start) * sizeof(XML_Char));
8115 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
8116 pool->start = pool->blocks->s;
8117 pool->end = pool->start + pool->blocks->size;
8118 return XML_TRUE;
8119 }
8120 }
8121 if (pool->blocks && pool->start == pool->blocks->s) {
8122 BLOCK *temp;
8123 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
8124 size_t bytesToAllocate;
8125
8126 /* NOTE: Needs to be calculated prior to calling `realloc`
8127 to avoid dangling pointers: */
8128 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
8129
8130 if (blockSize < 0) {
8131 /* This condition traps a situation where either more than
8132 * INT_MAX/2 bytes have already been allocated. This isn't
8133 * readily testable, since it is unlikely that an average
8134 * machine will have that much memory, so we exclude it from the
8135 * coverage statistics.
8136 */
8137 return XML_FALSE; /* LCOV_EXCL_LINE */
8138 }
8139
8140 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8141 if (bytesToAllocate == 0)
8142 return XML_FALSE;
8143
8144 temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate);
8145 if (temp == NULL)
8146 return XML_FALSE;
8147 pool->blocks = temp;
8148 pool->blocks->size = blockSize;
8149 pool->ptr = pool->blocks->s + offsetInsideBlock;
8150 pool->start = pool->blocks->s;
8151 pool->end = pool->start + blockSize;
8152 } else {
8153 BLOCK *tem;
8154 int blockSize = (int)(pool->end - pool->start);
8155 size_t bytesToAllocate;
8156
8157 if (blockSize < 0) {
8158 /* This condition traps a situation where either more than
8159 * INT_MAX bytes have already been allocated (which is prevented
8160 * by various pieces of program logic, not least this one, never
8161 * mind the unlikelihood of actually having that much memory) or
8162 * the pool control fields have been corrupted (which could
8163 * conceivably happen in an extremely buggy user handler
8164 * function). Either way it isn't readily testable, so we
8165 * exclude it from the coverage statistics.
8166 */
8167 return XML_FALSE; /* LCOV_EXCL_LINE */
8168 }
8169
8170 if (blockSize < INIT_BLOCK_SIZE)
8171 blockSize = INIT_BLOCK_SIZE;
8172 else {
8173 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
8174 if ((int)((unsigned)blockSize * 2U) < 0) {
8175 return XML_FALSE;
8176 }
8177 blockSize *= 2;
8178 }
8179
8180 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8181 if (bytesToAllocate == 0)
8182 return XML_FALSE;
8183
8184 tem = MALLOC(pool->parser, bytesToAllocate);
8185 if (! tem)
8186 return XML_FALSE;
8187 tem->size = blockSize;
8188 tem->next = pool->blocks;
8189 pool->blocks = tem;
8190 if (pool->ptr != pool->start)
8191 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
8192 pool->ptr = tem->s + (pool->ptr - pool->start);
8193 pool->start = tem->s;
8194 pool->end = tem->s + blockSize;
8195 }
8196 return XML_TRUE;
8197 }
8198
8199 static int FASTCALL
8200 nextScaffoldPart(XML_Parser parser) {
8201 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8202 CONTENT_SCAFFOLD *me;
8203 int next;
8204
8205 if (! dtd->scaffIndex) {
8206 /* Detect and prevent integer overflow.
8207 * The preprocessor guard addresses the "always false" warning
8208 * from -Wtype-limits on platforms where
8209 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8210 #if UINT_MAX >= SIZE_MAX
8211 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) {
8212 return -1;
8213 }
8214 #endif
8215 dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int));
8216 if (! dtd->scaffIndex)
8217 return -1;
8218 dtd->scaffIndex[0] = 0;
8219 }
8220
8221 // Will casting to int be safe further down?
8222 if (dtd->scaffCount > INT_MAX) {
8223 return -1;
8224 }
8225
8226 if (dtd->scaffCount >= dtd->scaffSize) {
8227 CONTENT_SCAFFOLD *temp;
8228 if (dtd->scaffold) {
8229 /* Detect and prevent integer overflow */
8230 if (dtd->scaffSize > UINT_MAX / 2u) {
8231 return -1;
8232 }
8233 /* Detect and prevent integer overflow.
8234 * The preprocessor guard addresses the "always false" warning
8235 * from -Wtype-limits on platforms where
8236 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8237 #if UINT_MAX >= SIZE_MAX
8238 if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) {
8239 return -1;
8240 }
8241 #endif
8242
8243 temp = REALLOC(parser, dtd->scaffold,
8244 dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
8245 if (temp == NULL)
8246 return -1;
8247 dtd->scaffSize *= 2;
8248 } else {
8249 temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD));
8250 if (temp == NULL)
8251 return -1;
8252 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
8253 }
8254 dtd->scaffold = temp;
8255 }
8256 next = (int)dtd->scaffCount++;
8257 me = &dtd->scaffold[next];
8258 if (dtd->scaffLevel) {
8259 CONTENT_SCAFFOLD *parent
8260 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
8261 if (parent->lastchild) {
8262 dtd->scaffold[parent->lastchild].nextsib = next;
8263 }
8264 if (! parent->childcnt)
8265 parent->firstchild = next;
8266 parent->lastchild = next;
8267 parent->childcnt++;
8268 }
8269 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
8270 return next;
8271 }
8272
8273 static XML_Content *
8274 build_model(XML_Parser parser) {
8275 /* Function build_model transforms the existing parser->m_dtd->scaffold
8276 * array of CONTENT_SCAFFOLD tree nodes into a new array of
8277 * XML_Content tree nodes followed by a gapless list of zero-terminated
8278 * strings. */
8279 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8280 XML_Content *ret;
8281 XML_Char *str; /* the current string writing location */
8282
8283 /* Detect and prevent integer overflow.
8284 * The preprocessor guard addresses the "always false" warning
8285 * from -Wtype-limits on platforms where
8286 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8287 #if UINT_MAX >= SIZE_MAX
8288 if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) {
8289 return NULL;
8290 }
8291 if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) {
8292 return NULL;
8293 }
8294 #endif
8295 if (dtd->scaffCount * sizeof(XML_Content)
8296 > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) {
8297 return NULL;
8298 }
8299
8300 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
8301 + (dtd->contentStringLen * sizeof(XML_Char)));
8302
8303 // NOTE: We are avoiding MALLOC(..) here to so that
8304 // applications that are not using XML_FreeContentModel but plain
8305 // free(..) or .free_fcn() to free the content model's memory are safe.
8306 ret = parser->m_mem.malloc_fcn(allocsize);
8307 if (! ret)
8308 return NULL;
8309
8310 /* What follows is an iterative implementation (of what was previously done
8311 * recursively in a dedicated function called "build_node". The old recursive
8312 * build_node could be forced into stack exhaustion from input as small as a
8313 * few megabyte, and so that was a security issue. Hence, a function call
8314 * stack is avoided now by resolving recursion.)
8315 *
8316 * The iterative approach works as follows:
8317 *
8318 * - We have two writing pointers, both walking up the result array; one does
8319 * the work, the other creates "jobs" for its colleague to do, and leads
8320 * the way:
8321 *
8322 * - The faster one, pointer jobDest, always leads and writes "what job
8323 * to do" by the other, once they reach that place in the
8324 * array: leader "jobDest" stores the source node array index (relative
8325 * to array dtd->scaffold) in field "numchildren".
8326 *
8327 * - The slower one, pointer dest, looks at the value stored in the
8328 * "numchildren" field (which actually holds a source node array index
8329 * at that time) and puts the real data from dtd->scaffold in.
8330 *
8331 * - Before the loop starts, jobDest writes source array index 0
8332 * (where the root node is located) so that dest will have something to do
8333 * when it starts operation.
8334 *
8335 * - Whenever nodes with children are encountered, jobDest appends
8336 * them as new jobs, in order. As a result, tree node siblings are
8337 * adjacent in the resulting array, for example:
8338 *
8339 * [0] root, has two children
8340 * [1] first child of 0, has three children
8341 * [3] first child of 1, does not have children
8342 * [4] second child of 1, does not have children
8343 * [5] third child of 1, does not have children
8344 * [2] second child of 0, does not have children
8345 *
8346 * Or (the same data) presented in flat array view:
8347 *
8348 * [0] root, has two children
8349 *
8350 * [1] first child of 0, has three children
8351 * [2] second child of 0, does not have children
8352 *
8353 * [3] first child of 1, does not have children
8354 * [4] second child of 1, does not have children
8355 * [5] third child of 1, does not have children
8356 *
8357 * - The algorithm repeats until all target array indices have been processed.
8358 */
8359 XML_Content *dest = ret; /* tree node writing location, moves upwards */
8360 XML_Content *const destLimit = &ret[dtd->scaffCount];
8361 XML_Content *jobDest = ret; /* next free writing location in target array */
8362 str = (XML_Char *)&ret[dtd->scaffCount];
8363
8364 /* Add the starting job, the root node (index 0) of the source tree */
8365 (jobDest++)->numchildren = 0;
8366
8367 for (; dest < destLimit; dest++) {
8368 /* Retrieve source tree array index from job storage */
8369 const int src_node = (int)dest->numchildren;
8370
8371 /* Convert item */
8372 dest->type = dtd->scaffold[src_node].type;
8373 dest->quant = dtd->scaffold[src_node].quant;
8374 if (dest->type == XML_CTYPE_NAME) {
8375 const XML_Char *src;
8376 dest->name = str;
8377 src = dtd->scaffold[src_node].name;
8378 for (;;) {
8379 *str++ = *src;
8380 if (! *src)
8381 break;
8382 src++;
8383 }
8384 dest->numchildren = 0;
8385 dest->children = NULL;
8386 } else {
8387 unsigned int i;
8388 int cn;
8389 dest->name = NULL;
8390 dest->numchildren = dtd->scaffold[src_node].childcnt;
8391 dest->children = jobDest;
8392
8393 /* Append scaffold indices of children to array */
8394 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8395 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8396 (jobDest++)->numchildren = (unsigned int)cn;
8397 }
8398 }
8399
8400 return ret;
8401 }
8402
8403 static ELEMENT_TYPE *
8404 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8405 const char *end) {
8406 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8407 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8408 ELEMENT_TYPE *ret;
8409
8410 if (! name)
8411 return NULL;
8412 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8413 sizeof(ELEMENT_TYPE));
8414 if (! ret)
8415 return NULL;
8416 if (! ret->defaultAttsNames.parser)
8417 hashTableInit(&(ret->defaultAttsNames), getRootParserOf(parser, NULL));
8418 if (ret->name != name)
8419 poolDiscard(&dtd->pool);
8420 else {
8421 poolFinish(&dtd->pool);
8422 if (! setElementTypePrefix(parser, ret))
8423 return NULL;
8424 }
8425 return ret;
8426 }
8427
8428 static XML_Char *
8429 copyString(const XML_Char *s, XML_Parser parser) {
8430 size_t charsRequired = 0;
8431 XML_Char *result;
8432
8433 /* First determine how long the string is */
8434 while (s[charsRequired] != 0) {
8435 charsRequired++;
8436 }
8437 /* Include the terminator */
8438 charsRequired++;
8439
8440 /* Now allocate space for the copy */
8441 result = MALLOC(parser, charsRequired * sizeof(XML_Char));
8442 if (result == NULL)
8443 return NULL;
8444 /* Copy the original into place */
8445 memcpy(result, s, charsRequired * sizeof(XML_Char));
8446 return result;
8447 }
8448
8449 #if XML_GE == 1
8450
8451 static float
8452 accountingGetCurrentAmplification(XML_Parser rootParser) {
8453 // 1.........1.........12 => 22
8454 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8455 const XmlBigCount countBytesOutput
8456 = rootParser->m_accounting.countBytesDirect
8457 + rootParser->m_accounting.countBytesIndirect;
8458 const float amplificationFactor
8459 = rootParser->m_accounting.countBytesDirect
8460 ? ((float)countBytesOutput
8461 / (float)(rootParser->m_accounting.countBytesDirect))
8462 : ((float)(lenOfShortestInclude
8463 + rootParser->m_accounting.countBytesIndirect)
8464 / (float)lenOfShortestInclude);
8465 assert(! rootParser->m_parentParser);
8466 return amplificationFactor;
8467 }
8468
8469 static void
8470 accountingReportStats(XML_Parser originParser, const char *epilog) {
8471 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8472 assert(! rootParser->m_parentParser);
8473
8474 if (rootParser->m_accounting.debugLevel == 0u) {
8475 return;
8476 }
8477
8478 const float amplificationFactor
8479 = accountingGetCurrentAmplification(rootParser);
8480 fprintf(stderr,
8481 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8482 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8483 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
8484 rootParser->m_accounting.countBytesIndirect,
8485 (double)amplificationFactor, epilog);
8486 }
8487
8488 static void
8489 accountingOnAbort(XML_Parser originParser) {
8490 accountingReportStats(originParser, " ABORTING\n");
8491 }
8492
8493 static void
8494 accountingReportDiff(XML_Parser rootParser,
8495 unsigned int levelsAwayFromRootParser, const char *before,
8496 const char *after, ptrdiff_t bytesMore, int source_line,
8497 enum XML_Account account) {
8498 assert(! rootParser->m_parentParser);
8499
8500 fprintf(stderr,
8501 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8502 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8503 levelsAwayFromRootParser, source_line, 10, "");
8504
8505 const char ellipis[] = "[..]";
8506 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8507 const unsigned int contextLength = 10;
8508
8509 /* Note: Performance is of no concern here */
8510 const char *walker = before;
8511 if ((rootParser->m_accounting.debugLevel >= 3u)
8512 || (after - before)
8513 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8514 for (; walker < after; walker++) {
8515 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8516 }
8517 } else {
8518 for (; walker < before + contextLength; walker++) {
8519 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8520 }
8521 fprintf(stderr, ellipis);
8522 walker = after - contextLength;
8523 for (; walker < after; walker++) {
8524 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8525 }
8526 }
8527 fprintf(stderr, "\"\n");
8528 }
8529
8530 static XML_Bool
8531 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8532 const char *after, int source_line,
8533 enum XML_Account account) {
8534 /* Note: We need to check the token type *first* to be sure that
8535 * we can even access variable <after>, safely.
8536 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8537 switch (tok) {
8538 case XML_TOK_INVALID:
8539 case XML_TOK_PARTIAL:
8540 case XML_TOK_PARTIAL_CHAR:
8541 case XML_TOK_NONE:
8542 return XML_TRUE;
8543 }
8544
8545 if (account == XML_ACCOUNT_NONE)
8546 return XML_TRUE; /* because these bytes have been accounted for, already */
8547
8548 unsigned int levelsAwayFromRootParser;
8549 const XML_Parser rootParser
8550 = getRootParserOf(originParser, &levelsAwayFromRootParser);
8551 assert(! rootParser->m_parentParser);
8552
8553 const int isDirect
8554 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8555 const ptrdiff_t bytesMore = after - before;
8556
8557 XmlBigCount *const additionTarget
8558 = isDirect ? &rootParser->m_accounting.countBytesDirect
8559 : &rootParser->m_accounting.countBytesIndirect;
8560
8561 /* Detect and avoid integer overflow */
8562 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8563 return XML_FALSE;
8564 *additionTarget += bytesMore;
8565
8566 const XmlBigCount countBytesOutput
8567 = rootParser->m_accounting.countBytesDirect
8568 + rootParser->m_accounting.countBytesIndirect;
8569 const float amplificationFactor
8570 = accountingGetCurrentAmplification(rootParser);
8571 const XML_Bool tolerated
8572 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8573 || (amplificationFactor
8574 <= rootParser->m_accounting.maximumAmplificationFactor);
8575
8576 if (rootParser->m_accounting.debugLevel >= 2u) {
8577 accountingReportStats(rootParser, "");
8578 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8579 bytesMore, source_line, account);
8580 }
8581
8582 return tolerated;
8583 }
8584
8585 unsigned long long
8586 testingAccountingGetCountBytesDirect(XML_Parser parser) {
8587 if (! parser)
8588 return 0;
8589 return parser->m_accounting.countBytesDirect;
8590 }
8591
8592 unsigned long long
8593 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8594 if (! parser)
8595 return 0;
8596 return parser->m_accounting.countBytesIndirect;
8597 }
8598
8599 static void
8600 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8601 const char *action, int sourceLine) {
8602 assert(! rootParser->m_parentParser);
8603 if (rootParser->m_entity_stats.debugLevel == 0u)
8604 return;
8605
8606 # if defined(XML_UNICODE)
8607 const char *const entityName = "[..]";
8608 # else
8609 const char *const entityName = entity->name;
8610 # endif
8611
8612 fprintf(
8613 stderr,
8614 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8615 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8616 rootParser->m_entity_stats.currentDepth,
8617 rootParser->m_entity_stats.maximumDepthSeen,
8618 ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8619 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8620 sourceLine);
8621 }
8622
8623 static void
8624 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8625 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8626 assert(! rootParser->m_parentParser);
8627
8628 rootParser->m_entity_stats.countEverOpened++;
8629 rootParser->m_entity_stats.currentDepth++;
8630 if (rootParser->m_entity_stats.currentDepth
8631 > rootParser->m_entity_stats.maximumDepthSeen) {
8632 rootParser->m_entity_stats.maximumDepthSeen++;
8633 }
8634
8635 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8636 }
8637
8638 static void
8639 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8640 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8641 assert(! rootParser->m_parentParser);
8642
8643 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8644 rootParser->m_entity_stats.currentDepth--;
8645 }
8646
8647 #endif /* XML_GE == 1 */
8648
8649 static XML_Parser
8650 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8651 XML_Parser rootParser = parser;
8652 unsigned int stepsTakenUpwards = 0;
8653 while (rootParser->m_parentParser) {
8654 rootParser = rootParser->m_parentParser;
8655 stepsTakenUpwards++;
8656 }
8657 assert(! rootParser->m_parentParser);
8658 if (outLevelDiff != NULL) {
8659 *outLevelDiff = stepsTakenUpwards;
8660 }
8661 return rootParser;
8662 }
8663
8664 #if XML_GE == 1
8665
8666 const char *
8667 unsignedCharToPrintable(unsigned char c) {
8668 switch (c) {
8669 case 0:
8670 return "\\0";
8671 case 1:
8672 return "\\x1";
8673 case 2:
8674 return "\\x2";
8675 case 3:
8676 return "\\x3";
8677 case 4:
8678 return "\\x4";
8679 case 5:
8680 return "\\x5";
8681 case 6:
8682 return "\\x6";
8683 case 7:
8684 return "\\x7";
8685 case 8:
8686 return "\\x8";
8687 case 9:
8688 return "\\t";
8689 case 10:
8690 return "\\n";
8691 case 11:
8692 return "\\xB";
8693 case 12:
8694 return "\\xC";
8695 case 13:
8696 return "\\r";
8697 case 14:
8698 return "\\xE";
8699 case 15:
8700 return "\\xF";
8701 case 16:
8702 return "\\x10";
8703 case 17:
8704 return "\\x11";
8705 case 18:
8706 return "\\x12";
8707 case 19:
8708 return "\\x13";
8709 case 20:
8710 return "\\x14";
8711 case 21:
8712 return "\\x15";
8713 case 22:
8714 return "\\x16";
8715 case 23:
8716 return "\\x17";
8717 case 24:
8718 return "\\x18";
8719 case 25:
8720 return "\\x19";
8721 case 26:
8722 return "\\x1A";
8723 case 27:
8724 return "\\x1B";
8725 case 28:
8726 return "\\x1C";
8727 case 29:
8728 return "\\x1D";
8729 case 30:
8730 return "\\x1E";
8731 case 31:
8732 return "\\x1F";
8733 case 32:
8734 return " ";
8735 case 33:
8736 return "!";
8737 case 34:
8738 return "\\\"";
8739 case 35:
8740 return "#";
8741 case 36:
8742 return "$";
8743 case 37:
8744 return "%";
8745 case 38:
8746 return "&";
8747 case 39:
8748 return "'";
8749 case 40:
8750 return "(";
8751 case 41:
8752 return ")";
8753 case 42:
8754 return "*";
8755 case 43:
8756 return "+";
8757 case 44:
8758 return ",";
8759 case 45:
8760 return "-";
8761 case 46:
8762 return ".";
8763 case 47:
8764 return "/";
8765 case 48:
8766 return "0";
8767 case 49:
8768 return "1";
8769 case 50:
8770 return "2";
8771 case 51:
8772 return "3";
8773 case 52:
8774 return "4";
8775 case 53:
8776 return "5";
8777 case 54:
8778 return "6";
8779 case 55:
8780 return "7";
8781 case 56:
8782 return "8";
8783 case 57:
8784 return "9";
8785 case 58:
8786 return ":";
8787 case 59:
8788 return ";";
8789 case 60:
8790 return "<";
8791 case 61:
8792 return "=";
8793 case 62:
8794 return ">";
8795 case 63:
8796 return "?";
8797 case 64:
8798 return "@";
8799 case 65:
8800 return "A";
8801 case 66:
8802 return "B";
8803 case 67:
8804 return "C";
8805 case 68:
8806 return "D";
8807 case 69:
8808 return "E";
8809 case 70:
8810 return "F";
8811 case 71:
8812 return "G";
8813 case 72:
8814 return "H";
8815 case 73:
8816 return "I";
8817 case 74:
8818 return "J";
8819 case 75:
8820 return "K";
8821 case 76:
8822 return "L";
8823 case 77:
8824 return "M";
8825 case 78:
8826 return "N";
8827 case 79:
8828 return "O";
8829 case 80:
8830 return "P";
8831 case 81:
8832 return "Q";
8833 case 82:
8834 return "R";
8835 case 83:
8836 return "S";
8837 case 84:
8838 return "T";
8839 case 85:
8840 return "U";
8841 case 86:
8842 return "V";
8843 case 87:
8844 return "W";
8845 case 88:
8846 return "X";
8847 case 89:
8848 return "Y";
8849 case 90:
8850 return "Z";
8851 case 91:
8852 return "[";
8853 case 92:
8854 return "\\\\";
8855 case 93:
8856 return "]";
8857 case 94:
8858 return "^";
8859 case 95:
8860 return "_";
8861 case 96:
8862 return "`";
8863 case 97:
8864 return "a";
8865 case 98:
8866 return "b";
8867 case 99:
8868 return "c";
8869 case 100:
8870 return "d";
8871 case 101:
8872 return "e";
8873 case 102:
8874 return "f";
8875 case 103:
8876 return "g";
8877 case 104:
8878 return "h";
8879 case 105:
8880 return "i";
8881 case 106:
8882 return "j";
8883 case 107:
8884 return "k";
8885 case 108:
8886 return "l";
8887 case 109:
8888 return "m";
8889 case 110:
8890 return "n";
8891 case 111:
8892 return "o";
8893 case 112:
8894 return "p";
8895 case 113:
8896 return "q";
8897 case 114:
8898 return "r";
8899 case 115:
8900 return "s";
8901 case 116:
8902 return "t";
8903 case 117:
8904 return "u";
8905 case 118:
8906 return "v";
8907 case 119:
8908 return "w";
8909 case 120:
8910 return "x";
8911 case 121:
8912 return "y";
8913 case 122:
8914 return "z";
8915 case 123:
8916 return "{";
8917 case 124:
8918 return "|";
8919 case 125:
8920 return "}";
8921 case 126:
8922 return "~";
8923 case 127:
8924 return "\\x7F";
8925 case 128:
8926 return "\\x80";
8927 case 129:
8928 return "\\x81";
8929 case 130:
8930 return "\\x82";
8931 case 131:
8932 return "\\x83";
8933 case 132:
8934 return "\\x84";
8935 case 133:
8936 return "\\x85";
8937 case 134:
8938 return "\\x86";
8939 case 135:
8940 return "\\x87";
8941 case 136:
8942 return "\\x88";
8943 case 137:
8944 return "\\x89";
8945 case 138:
8946 return "\\x8A";
8947 case 139:
8948 return "\\x8B";
8949 case 140:
8950 return "\\x8C";
8951 case 141:
8952 return "\\x8D";
8953 case 142:
8954 return "\\x8E";
8955 case 143:
8956 return "\\x8F";
8957 case 144:
8958 return "\\x90";
8959 case 145:
8960 return "\\x91";
8961 case 146:
8962 return "\\x92";
8963 case 147:
8964 return "\\x93";
8965 case 148:
8966 return "\\x94";
8967 case 149:
8968 return "\\x95";
8969 case 150:
8970 return "\\x96";
8971 case 151:
8972 return "\\x97";
8973 case 152:
8974 return "\\x98";
8975 case 153:
8976 return "\\x99";
8977 case 154:
8978 return "\\x9A";
8979 case 155:
8980 return "\\x9B";
8981 case 156:
8982 return "\\x9C";
8983 case 157:
8984 return "\\x9D";
8985 case 158:
8986 return "\\x9E";
8987 case 159:
8988 return "\\x9F";
8989 case 160:
8990 return "\\xA0";
8991 case 161:
8992 return "\\xA1";
8993 case 162:
8994 return "\\xA2";
8995 case 163:
8996 return "\\xA3";
8997 case 164:
8998 return "\\xA4";
8999 case 165:
9000 return "\\xA5";
9001 case 166:
9002 return "\\xA6";
9003 case 167:
9004 return "\\xA7";
9005 case 168:
9006 return "\\xA8";
9007 case 169:
9008 return "\\xA9";
9009 case 170:
9010 return "\\xAA";
9011 case 171:
9012 return "\\xAB";
9013 case 172:
9014 return "\\xAC";
9015 case 173:
9016 return "\\xAD";
9017 case 174:
9018 return "\\xAE";
9019 case 175:
9020 return "\\xAF";
9021 case 176:
9022 return "\\xB0";
9023 case 177:
9024 return "\\xB1";
9025 case 178:
9026 return "\\xB2";
9027 case 179:
9028 return "\\xB3";
9029 case 180:
9030 return "\\xB4";
9031 case 181:
9032 return "\\xB5";
9033 case 182:
9034 return "\\xB6";
9035 case 183:
9036 return "\\xB7";
9037 case 184:
9038 return "\\xB8";
9039 case 185:
9040 return "\\xB9";
9041 case 186:
9042 return "\\xBA";
9043 case 187:
9044 return "\\xBB";
9045 case 188:
9046 return "\\xBC";
9047 case 189:
9048 return "\\xBD";
9049 case 190:
9050 return "\\xBE";
9051 case 191:
9052 return "\\xBF";
9053 case 192:
9054 return "\\xC0";
9055 case 193:
9056 return "\\xC1";
9057 case 194:
9058 return "\\xC2";
9059 case 195:
9060 return "\\xC3";
9061 case 196:
9062 return "\\xC4";
9063 case 197:
9064 return "\\xC5";
9065 case 198:
9066 return "\\xC6";
9067 case 199:
9068 return "\\xC7";
9069 case 200:
9070 return "\\xC8";
9071 case 201:
9072 return "\\xC9";
9073 case 202:
9074 return "\\xCA";
9075 case 203:
9076 return "\\xCB";
9077 case 204:
9078 return "\\xCC";
9079 case 205:
9080 return "\\xCD";
9081 case 206:
9082 return "\\xCE";
9083 case 207:
9084 return "\\xCF";
9085 case 208:
9086 return "\\xD0";
9087 case 209:
9088 return "\\xD1";
9089 case 210:
9090 return "\\xD2";
9091 case 211:
9092 return "\\xD3";
9093 case 212:
9094 return "\\xD4";
9095 case 213:
9096 return "\\xD5";
9097 case 214:
9098 return "\\xD6";
9099 case 215:
9100 return "\\xD7";
9101 case 216:
9102 return "\\xD8";
9103 case 217:
9104 return "\\xD9";
9105 case 218:
9106 return "\\xDA";
9107 case 219:
9108 return "\\xDB";
9109 case 220:
9110 return "\\xDC";
9111 case 221:
9112 return "\\xDD";
9113 case 222:
9114 return "\\xDE";
9115 case 223:
9116 return "\\xDF";
9117 case 224:
9118 return "\\xE0";
9119 case 225:
9120 return "\\xE1";
9121 case 226:
9122 return "\\xE2";
9123 case 227:
9124 return "\\xE3";
9125 case 228:
9126 return "\\xE4";
9127 case 229:
9128 return "\\xE5";
9129 case 230:
9130 return "\\xE6";
9131 case 231:
9132 return "\\xE7";
9133 case 232:
9134 return "\\xE8";
9135 case 233:
9136 return "\\xE9";
9137 case 234:
9138 return "\\xEA";
9139 case 235:
9140 return "\\xEB";
9141 case 236:
9142 return "\\xEC";
9143 case 237:
9144 return "\\xED";
9145 case 238:
9146 return "\\xEE";
9147 case 239:
9148 return "\\xEF";
9149 case 240:
9150 return "\\xF0";
9151 case 241:
9152 return "\\xF1";
9153 case 242:
9154 return "\\xF2";
9155 case 243:
9156 return "\\xF3";
9157 case 244:
9158 return "\\xF4";
9159 case 245:
9160 return "\\xF5";
9161 case 246:
9162 return "\\xF6";
9163 case 247:
9164 return "\\xF7";
9165 case 248:
9166 return "\\xF8";
9167 case 249:
9168 return "\\xF9";
9169 case 250:
9170 return "\\xFA";
9171 case 251:
9172 return "\\xFB";
9173 case 252:
9174 return "\\xFC";
9175 case 253:
9176 return "\\xFD";
9177 case 254:
9178 return "\\xFE";
9179 case 255:
9180 return "\\xFF";
9181 // LCOV_EXCL_START
9182 default:
9183 assert(0); /* never gets here */
9184 return "dead code";
9185 }
9186 assert(0); /* never gets here */
9187 // LCOV_EXCL_STOP
9188 }
9189
9190 #endif /* XML_GE == 1 */
9191
9192 static unsigned long
9193 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
9194 const char *const valueOrNull = getenv(variableName);
9195 if (valueOrNull == NULL) {
9196 return defaultDebugLevel;
9197 }
9198 const char *const value = valueOrNull;
9199
9200 errno = 0;
9201 char *afterValue = NULL;
9202 unsigned long debugLevel = strtoul(value, &afterValue, 10);
9203 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
9204 errno = 0;
9205 return defaultDebugLevel;
9206 }
9207
9208 return debugLevel;
9209 }
9210