1 /* 93c1caa66e2b0310459482516af05505b57c5cb7b96df777105308fc585c85d1 (2.7.5+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
44 Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com>
45 Copyright (c) 2025 Atrem Borovik <polzovatellllk@gmail.com>
46 Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com>
47 Copyright (c) 2026 Rosen Penev <rosenp@gmail.com>
48 Licensed under the MIT license:
49
50 Permission is hereby granted, free of charge, to any person obtaining
51 a copy of this software and associated documentation files (the
52 "Software"), to deal in the Software without restriction, including
53 without limitation the rights to use, copy, modify, merge, publish,
54 distribute, sublicense, and/or sell copies of the Software, and to permit
55 persons to whom the Software is furnished to do so, subject to the
56 following conditions:
57
58 The above copyright notice and this permission notice shall be included
59 in all copies or substantial portions of the Software.
60
61 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
62 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
63 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
64 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
65 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
66 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
67 USE OR OTHER DEALINGS IN THE SOFTWARE.
68 */
69
70 #define XML_BUILDING_EXPAT 1
71
72 #include "expat_config.h"
73
74 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
75 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
76 #endif
77
78 #if defined(XML_DTD) && XML_GE == 0
79 # error Either undefine XML_DTD or define XML_GE to 1.
80 #endif
81
82 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
83 || (XML_CONTEXT_BYTES + 0 < 0)
84 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
85 #endif
86
87 #if defined(HAVE_SYSCALL_GETRANDOM)
88 # if ! defined(_GNU_SOURCE)
89 # define _GNU_SOURCE 1 /* syscall prototype */
90 # endif
91 #endif
92
93 #ifdef _WIN32
94 /* force stdlib to define rand_s() */
95 # if ! defined(_CRT_RAND_S)
96 # define _CRT_RAND_S
97 # endif
98 #endif
99
100 #include <stdbool.h>
101 #include <stddef.h>
102 #include <string.h> /* memset(), memcpy() */
103 #include <assert.h>
104 #include <limits.h> /* INT_MAX, UINT_MAX */
105 #include <stdio.h> /* fprintf */
106 #include <stdlib.h> /* getenv, rand_s */
107 #include <stdint.h> /* SIZE_MAX, uintptr_t */
108 #include <math.h> /* isnan */
109
110 #ifdef _WIN32
111 # define getpid GetCurrentProcessId
112 #else
113 # include <sys/time.h> /* gettimeofday() */
114 # include <sys/types.h> /* getpid() */
115 # include <unistd.h> /* getpid() */
116 # include <fcntl.h> /* O_RDONLY */
117 # include <errno.h>
118 #endif
119
120 #ifdef _WIN32
121 # include "winconfig.h"
122 #endif
123
124 #include "ascii.h"
125 #include "expat.h"
126 #include "siphash.h"
127
128 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
129 # if defined(HAVE_GETRANDOM)
130 # include <sys/random.h> /* getrandom */
131 # else
132 # include <unistd.h> /* syscall */
133 # include <sys/syscall.h> /* SYS_getrandom */
134 # endif
135 # if ! defined(GRND_NONBLOCK)
136 # define GRND_NONBLOCK 0x0001
137 # endif /* defined(GRND_NONBLOCK) */
138 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
139
140 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
141 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
142 #endif
143
144 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
145 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
146 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
147 && ! defined(XML_POOR_ENTROPY)
148 # error You do not have support for any sources of high quality entropy \
149 enabled. For end user security, that is probably not what you want. \
150 \
151 Your options include: \
152 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
153 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
154 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
155 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
156 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
157 * Windows >=Vista (rand_s): _WIN32. \
158 \
159 If insist on not using any of these, bypass this error by defining \
160 XML_POOR_ENTROPY; you have been warned. \
161 \
162 If you have reasons to patch this detection code away or need changes \
163 to the build system, please open a bug. Thank you!
164 #endif
165
166 #ifdef XML_UNICODE
167 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
168 # define XmlConvert XmlUtf16Convert
169 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
170 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
171 # define XmlEncode XmlUtf16Encode
172 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
173 typedef unsigned short ICHAR;
174 #else
175 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
176 # define XmlConvert XmlUtf8Convert
177 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
178 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
179 # define XmlEncode XmlUtf8Encode
180 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
181 typedef char ICHAR;
182 #endif
183
184 #ifndef XML_NS
185
186 # define XmlInitEncodingNS XmlInitEncoding
187 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
188 # undef XmlGetInternalEncodingNS
189 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
190 # define XmlParseXmlDeclNS XmlParseXmlDecl
191
192 #endif
193
194 #ifdef XML_UNICODE
195
196 # ifdef XML_UNICODE_WCHAR_T
197 # define XML_T(x) (const wchar_t) x
198 # define XML_L(x) L##x
199 # else
200 # define XML_T(x) (const unsigned short)x
201 # define XML_L(x) x
202 # endif
203
204 #else
205
206 # define XML_T(x) x
207 # define XML_L(x) x
208
209 #endif
210
211 /* Round up n to be a multiple of sz, where sz is a power of 2. */
212 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
213
214 /* Do safe (NULL-aware) pointer arithmetic */
215 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
216
217 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
218
219 #include "internal.h"
220 #include "xmltok.h"
221 #include "xmlrole.h"
222
223 typedef const XML_Char *KEY;
224
225 typedef struct {
226 KEY name;
227 } NAMED;
228
229 typedef struct {
230 NAMED **v;
231 unsigned char power;
232 size_t size;
233 size_t used;
234 XML_Parser parser;
235 } HASH_TABLE;
236
237 static size_t keylen(KEY s);
238
239 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
240
241 /* For probing (after a collision) we need a step size relative prime
242 to the hash table size, which is a power of 2. We use double-hashing,
243 since we can calculate a second hash value cheaply by taking those bits
244 of the first hash value that were discarded (masked out) when the table
245 index was calculated: index = hash & mask, where mask = table->size - 1.
246 We limit the maximum step size to table->size / 4 (mask >> 2) and make
247 it odd, since odd numbers are always relative prime to a power of 2.
248 */
249 #define SECOND_HASH(hash, mask, power) \
250 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
251 #define PROBE_STEP(hash, mask, power) \
252 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
253
254 typedef struct {
255 NAMED **p;
256 NAMED **end;
257 } HASH_TABLE_ITER;
258
259 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
260 #define INIT_DATA_BUF_SIZE 1024
261 #define INIT_ATTS_SIZE 16
262 #define INIT_ATTS_VERSION 0xFFFFFFFF
263 #define INIT_BLOCK_SIZE 1024
264 #define INIT_BUFFER_SIZE 1024
265
266 #define EXPAND_SPARE 24
267
268 typedef struct binding {
269 struct prefix *prefix;
270 struct binding *nextTagBinding;
271 struct binding *prevPrefixBinding;
272 const struct attribute_id *attId;
273 XML_Char *uri;
274 int uriLen;
275 int uriAlloc;
276 } BINDING;
277
278 typedef struct prefix {
279 const XML_Char *name;
280 BINDING *binding;
281 } PREFIX;
282
283 typedef struct {
284 const XML_Char *str;
285 const XML_Char *localPart;
286 const XML_Char *prefix;
287 int strLen;
288 int uriLen;
289 int prefixLen;
290 } TAG_NAME;
291
292 /* TAG represents an open element.
293 The name of the element is stored in both the document and API
294 encodings. The memory buffer 'buf' is a separately-allocated
295 memory area which stores the name. During the XML_Parse()/
296 XML_ParseBuffer() when the element is open, the memory for the 'raw'
297 version of the name (in the document encoding) is shared with the
298 document buffer. If the element is open across calls to
299 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
300 contain the 'raw' name as well.
301
302 A parser reuses these structures, maintaining a list of allocated
303 TAG objects in a free list.
304 */
305 typedef struct tag {
306 struct tag *parent; /* parent of this element */
307 const char *rawName; /* tagName in the original encoding */
308 int rawNameLength;
309 TAG_NAME name; /* tagName in the API encoding */
310 union {
311 char *raw; /* for byte-level access (rawName storage) */
312 XML_Char *str; /* for character-level access (converted name) */
313 } buf; /* buffer for name components */
314 char *bufEnd; /* end of the buffer */
315 BINDING *bindings;
316 } TAG;
317
318 typedef struct {
319 const XML_Char *name;
320 const XML_Char *textPtr;
321 int textLen; /* length in XML_Chars */
322 int processed; /* # of processed bytes - when suspended */
323 const XML_Char *systemId;
324 const XML_Char *base;
325 const XML_Char *publicId;
326 const XML_Char *notation;
327 XML_Bool open;
328 XML_Bool hasMore; /* true if entity has not been completely processed */
329 /* An entity can be open while being already completely processed (hasMore ==
330 XML_FALSE). The reason is the delayed closing of entities until their inner
331 entities are processed and closed */
332 XML_Bool is_param;
333 XML_Bool is_internal; /* true if declared in internal subset outside PE */
334 } ENTITY;
335
336 typedef struct {
337 enum XML_Content_Type type;
338 enum XML_Content_Quant quant;
339 const XML_Char *name;
340 int firstchild;
341 int lastchild;
342 int childcnt;
343 int nextsib;
344 } CONTENT_SCAFFOLD;
345
346 #define INIT_SCAFFOLD_ELEMENTS 32
347
348 typedef struct block {
349 struct block *next;
350 int size;
351 XML_Char s[];
352 } BLOCK;
353
354 typedef struct {
355 BLOCK *blocks;
356 BLOCK *freeBlocks;
357 const XML_Char *end;
358 XML_Char *ptr;
359 XML_Char *start;
360 XML_Parser parser;
361 } STRING_POOL;
362
363 /* The XML_Char before the name is used to determine whether
364 an attribute has been specified. */
365 typedef struct attribute_id {
366 XML_Char *name;
367 PREFIX *prefix;
368 XML_Bool maybeTokenized;
369 XML_Bool xmlns;
370 } ATTRIBUTE_ID;
371
372 typedef struct {
373 const ATTRIBUTE_ID *id;
374 XML_Bool isCdata;
375 const XML_Char *value;
376 } DEFAULT_ATTRIBUTE;
377
378 typedef struct {
379 unsigned long version;
380 unsigned long hash;
381 const XML_Char *uriName;
382 } NS_ATT;
383
384 typedef struct {
385 const XML_Char *name;
386 PREFIX *prefix;
387 const ATTRIBUTE_ID *idAtt;
388 int nDefaultAtts;
389 int allocDefaultAtts;
390 DEFAULT_ATTRIBUTE *defaultAtts;
391 } ELEMENT_TYPE;
392
393 typedef struct {
394 HASH_TABLE generalEntities;
395 HASH_TABLE elementTypes;
396 HASH_TABLE attributeIds;
397 HASH_TABLE prefixes;
398 STRING_POOL pool;
399 STRING_POOL entityValuePool;
400 /* false once a parameter entity reference has been skipped */
401 XML_Bool keepProcessing;
402 /* true once an internal or external PE reference has been encountered;
403 this includes the reference to an external subset */
404 XML_Bool hasParamEntityRefs;
405 XML_Bool standalone;
406 #ifdef XML_DTD
407 /* indicates if external PE has been read */
408 XML_Bool paramEntityRead;
409 HASH_TABLE paramEntities;
410 #endif /* XML_DTD */
411 PREFIX defaultPrefix;
412 /* === scaffolding for building content model === */
413 XML_Bool in_eldecl;
414 CONTENT_SCAFFOLD *scaffold;
415 unsigned contentStringLen;
416 unsigned scaffSize;
417 unsigned scaffCount;
418 int scaffLevel;
419 int *scaffIndex;
420 } DTD;
421
422 enum EntityType {
423 ENTITY_INTERNAL,
424 ENTITY_ATTRIBUTE,
425 ENTITY_VALUE,
426 };
427
428 typedef struct open_internal_entity {
429 const char *internalEventPtr;
430 const char *internalEventEndPtr;
431 struct open_internal_entity *next;
432 ENTITY *entity;
433 int startTagLevel;
434 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
435 enum EntityType type;
436 } OPEN_INTERNAL_ENTITY;
437
438 enum XML_Account {
439 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
440 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
441 expansion */
442 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
443 };
444
445 #if XML_GE == 1
446 typedef unsigned long long XmlBigCount;
447 typedef struct accounting {
448 XmlBigCount countBytesDirect;
449 XmlBigCount countBytesIndirect;
450 unsigned long debugLevel;
451 float maximumAmplificationFactor; // >=1.0
452 unsigned long long activationThresholdBytes;
453 } ACCOUNTING;
454
455 typedef struct MALLOC_TRACKER {
456 XmlBigCount bytesAllocated;
457 XmlBigCount peakBytesAllocated; // updated live only for debug level >=2
458 unsigned long debugLevel;
459 float maximumAmplificationFactor; // >=1.0
460 XmlBigCount activationThresholdBytes;
461 } MALLOC_TRACKER;
462
463 typedef struct entity_stats {
464 unsigned int countEverOpened;
465 unsigned int currentDepth;
466 unsigned int maximumDepthSeen;
467 unsigned long debugLevel;
468 } ENTITY_STATS;
469 #endif /* XML_GE == 1 */
470
471 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
472 const char *end, const char **endPtr);
473
474 static Processor prologProcessor;
475 static Processor prologInitProcessor;
476 static Processor contentProcessor;
477 static Processor cdataSectionProcessor;
478 #ifdef XML_DTD
479 static Processor ignoreSectionProcessor;
480 static Processor externalParEntProcessor;
481 static Processor externalParEntInitProcessor;
482 static Processor entityValueProcessor;
483 static Processor entityValueInitProcessor;
484 #endif /* XML_DTD */
485 static Processor epilogProcessor;
486 static Processor errorProcessor;
487 static Processor externalEntityInitProcessor;
488 static Processor externalEntityInitProcessor2;
489 static Processor externalEntityInitProcessor3;
490 static Processor externalEntityContentProcessor;
491 static Processor internalEntityProcessor;
492
493 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
494 const XML_Char *encodingName);
495 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
496 const char *s, const char *next);
497 static enum XML_Error initializeEncoding(XML_Parser parser);
498 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
499 const char *s, const char *end, int tok,
500 const char *next, const char **nextPtr,
501 XML_Bool haveMore, XML_Bool allowClosingDoctype,
502 enum XML_Account account);
503 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
504 XML_Bool betweenDecl, enum EntityType type);
505 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
506 const ENCODING *enc, const char *start,
507 const char *end, const char **endPtr,
508 XML_Bool haveMore, enum XML_Account account);
509 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
510 const char **startPtr, const char *end,
511 const char **nextPtr, XML_Bool haveMore,
512 enum XML_Account account);
513 #ifdef XML_DTD
514 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
515 const char **startPtr, const char *end,
516 const char **nextPtr, XML_Bool haveMore);
517 #endif /* XML_DTD */
518
519 static void freeBindings(XML_Parser parser, BINDING *bindings);
520 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
521 const char *attStr, TAG_NAME *tagNamePtr,
522 BINDING **bindingsPtr,
523 enum XML_Account account);
524 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
525 const ATTRIBUTE_ID *attId, const XML_Char *uri,
526 BINDING **bindingsPtr);
527 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
528 XML_Bool isCdata, XML_Bool isId,
529 const XML_Char *value, XML_Parser parser);
530 static enum XML_Error storeAttributeValue(XML_Parser parser,
531 const ENCODING *enc, XML_Bool isCdata,
532 const char *ptr, const char *end,
533 STRING_POOL *pool,
534 enum XML_Account account);
535 static enum XML_Error
536 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
537 const char *ptr, const char *end, STRING_POOL *pool,
538 enum XML_Account account, const char **nextPtr);
539 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
540 const char *start, const char *end);
541 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
542 #if XML_GE == 1
543 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
544 const char *start, const char *end,
545 enum XML_Account account,
546 const char **nextPtr);
547 static enum XML_Error callStoreEntityValue(XML_Parser parser,
548 const ENCODING *enc,
549 const char *start, const char *end,
550 enum XML_Account account);
551 #else
552 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
553 #endif
554 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
555 const char *start, const char *end);
556 static int reportComment(XML_Parser parser, const ENCODING *enc,
557 const char *start, const char *end);
558 static void reportDefault(XML_Parser parser, const ENCODING *enc,
559 const char *start, const char *end);
560
561 static const XML_Char *getContext(XML_Parser parser);
562 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
563
564 static void FASTCALL normalizePublicId(XML_Char *s);
565
566 static DTD *dtdCreate(XML_Parser parser);
567 /* do not call if m_parentParser != NULL */
568 static void dtdReset(DTD *p, XML_Parser parser);
569 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser);
570 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
571 XML_Parser parser);
572 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
573 STRING_POOL *newPool, const HASH_TABLE *oldTable);
574 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
575 size_t createSize);
576 static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser);
577 static void FASTCALL hashTableClear(HASH_TABLE *table);
578 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
579 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
580 const HASH_TABLE *table);
581 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
582
583 static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser);
584 static void FASTCALL poolClear(STRING_POOL *pool);
585 static void FASTCALL poolDestroy(STRING_POOL *pool);
586 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
587 const char *ptr, const char *end);
588 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
589 const char *ptr, const char *end);
590 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
591 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
592 const XML_Char *s);
593 static const XML_Char *FASTCALL poolCopyStringNoFinish(STRING_POOL *pool,
594 const XML_Char *s);
595 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
596 int n);
597 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
598 const XML_Char *s);
599
600 static int FASTCALL nextScaffoldPart(XML_Parser parser);
601 static XML_Content *build_model(XML_Parser parser);
602 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
603 const char *ptr, const char *end);
604
605 static XML_Char *copyString(const XML_Char *s, XML_Parser parser);
606
607 static unsigned long generate_hash_secret_salt(XML_Parser parser);
608 static XML_Bool startParsing(XML_Parser parser);
609
610 static XML_Parser parserCreate(const XML_Char *encodingName,
611 const XML_Memory_Handling_Suite *memsuite,
612 const XML_Char *nameSep, DTD *dtd,
613 XML_Parser parentParser);
614
615 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
616
617 #if XML_GE == 1
618 static float accountingGetCurrentAmplification(XML_Parser rootParser);
619 static void accountingReportStats(XML_Parser originParser, const char *epilog);
620 static void accountingOnAbort(XML_Parser originParser);
621 static void accountingReportDiff(XML_Parser rootParser,
622 unsigned int levelsAwayFromRootParser,
623 const char *before, const char *after,
624 ptrdiff_t bytesMore, int source_line,
625 enum XML_Account account);
626 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
627 const char *before, const char *after,
628 int source_line,
629 enum XML_Account account);
630
631 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
632 const char *action, int sourceLine);
633 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
634 int sourceLine);
635 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
636 int sourceLine);
637 #endif /* XML_GE == 1 */
638
639 static XML_Parser getRootParserOf(XML_Parser parser,
640 unsigned int *outLevelDiff);
641
642 static unsigned long getDebugLevel(const char *variableName,
643 unsigned long defaultDebugLevel);
644
645 #define poolStart(pool) ((pool)->start)
646 #define poolLength(pool) ((pool)->ptr - (pool)->start)
647 #define poolChop(pool) ((void)--(pool->ptr))
648 #define poolLastChar(pool) (((pool)->ptr)[-1])
649 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
650 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
651 #define poolAppendChar(pool, c) \
652 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
653 ? 0 \
654 : ((*((pool)->ptr)++ = c), 1))
655
656 #if ! defined(XML_TESTING)
657 const
658 #endif
659 XML_Bool g_reparseDeferralEnabledDefault
660 = XML_TRUE; // write ONLY in runtests.c
661 #if defined(XML_TESTING)
662 unsigned int g_bytesScanned = 0; // used for testing only
663 #endif
664
665 struct XML_ParserStruct {
666 /* The first member must be m_userData so that the XML_GetUserData
667 macro works. */
668 void *m_userData;
669 void *m_handlerArg;
670
671 // How the four parse buffer pointers below relate in time and space:
672 //
673 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
674 // | | | |
675 // <--parsed-->| | |
676 // <---parsing--->| |
677 // <--unoccupied-->|
678 // <---------total-malloced/realloced-------->|
679
680 char *m_buffer; // malloc/realloc base pointer of parse buffer
681 const XML_Memory_Handling_Suite m_mem;
682 const char *m_bufferPtr; // first character to be parsed
683 char *m_bufferEnd; // past last character to be parsed
684 const char *m_bufferLim; // allocated end of m_buffer
685
686 XML_Index m_parseEndByteIndex;
687 const char *m_parseEndPtr;
688 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
689 XML_Bool m_reparseDeferralEnabled;
690 int m_lastBufferRequestSize;
691 XML_Char *m_dataBuf;
692 XML_Char *m_dataBufEnd;
693 XML_StartElementHandler m_startElementHandler;
694 XML_EndElementHandler m_endElementHandler;
695 XML_CharacterDataHandler m_characterDataHandler;
696 XML_ProcessingInstructionHandler m_processingInstructionHandler;
697 XML_CommentHandler m_commentHandler;
698 XML_StartCdataSectionHandler m_startCdataSectionHandler;
699 XML_EndCdataSectionHandler m_endCdataSectionHandler;
700 XML_DefaultHandler m_defaultHandler;
701 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
702 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
703 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
704 XML_NotationDeclHandler m_notationDeclHandler;
705 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
706 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
707 XML_NotStandaloneHandler m_notStandaloneHandler;
708 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
709 XML_Parser m_externalEntityRefHandlerArg;
710 XML_SkippedEntityHandler m_skippedEntityHandler;
711 XML_UnknownEncodingHandler m_unknownEncodingHandler;
712 XML_ElementDeclHandler m_elementDeclHandler;
713 XML_AttlistDeclHandler m_attlistDeclHandler;
714 XML_EntityDeclHandler m_entityDeclHandler;
715 XML_XmlDeclHandler m_xmlDeclHandler;
716 const ENCODING *m_encoding;
717 INIT_ENCODING m_initEncoding;
718 const ENCODING *m_internalEncoding;
719 const XML_Char *m_protocolEncodingName;
720 XML_Bool m_ns;
721 XML_Bool m_ns_triplets;
722 void *m_unknownEncodingMem;
723 void *m_unknownEncodingData;
724 void *m_unknownEncodingHandlerData;
725 void(XMLCALL *m_unknownEncodingRelease)(void *);
726 PROLOG_STATE m_prologState;
727 Processor *m_processor;
728 enum XML_Error m_errorCode;
729 const char *m_eventPtr;
730 const char *m_eventEndPtr;
731 const char *m_positionPtr;
732 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
733 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
734 OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
735 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
736 OPEN_INTERNAL_ENTITY *m_openValueEntities;
737 OPEN_INTERNAL_ENTITY *m_freeValueEntities;
738 XML_Bool m_defaultExpandInternalEntities;
739 int m_tagLevel;
740 ENTITY *m_declEntity;
741 const XML_Char *m_doctypeName;
742 const XML_Char *m_doctypeSysid;
743 const XML_Char *m_doctypePubid;
744 const XML_Char *m_declAttributeType;
745 const XML_Char *m_declNotationName;
746 const XML_Char *m_declNotationPublicId;
747 ELEMENT_TYPE *m_declElementType;
748 ATTRIBUTE_ID *m_declAttributeId;
749 XML_Bool m_declAttributeIsCdata;
750 XML_Bool m_declAttributeIsId;
751 DTD *m_dtd;
752 const XML_Char *m_curBase;
753 TAG *m_tagStack;
754 TAG *m_freeTagList;
755 BINDING *m_inheritedBindings;
756 BINDING *m_freeBindingList;
757 int m_attsSize;
758 int m_nSpecifiedAtts;
759 int m_idAttIndex;
760 ATTRIBUTE *m_atts;
761 NS_ATT *m_nsAtts;
762 unsigned long m_nsAttsVersion;
763 unsigned char m_nsAttsPower;
764 #ifdef XML_ATTR_INFO
765 XML_AttrInfo *m_attInfo;
766 #endif
767 POSITION m_position;
768 STRING_POOL m_tempPool;
769 STRING_POOL m_temp2Pool;
770 char *m_groupConnector;
771 unsigned int m_groupSize;
772 XML_Char m_namespaceSeparator;
773 XML_Parser m_parentParser;
774 XML_ParsingStatus m_parsingStatus;
775 #ifdef XML_DTD
776 XML_Bool m_isParamEntity;
777 XML_Bool m_useForeignDTD;
778 enum XML_ParamEntityParsing m_paramEntityParsing;
779 #endif
780 unsigned long m_hash_secret_salt;
781 #if XML_GE == 1
782 ACCOUNTING m_accounting;
783 MALLOC_TRACKER m_alloc_tracker;
784 ENTITY_STATS m_entity_stats;
785 #endif
786 XML_Bool m_reenter;
787 };
788
789 #if XML_GE == 1
790 # define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__))
791 # define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__))
792 # define FREE(parser, p) (expat_free((parser), (p), __LINE__))
793 #else
794 # define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
795 # define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
796 # define FREE(parser, p) (parser->m_mem.free_fcn((p)))
797 #endif
798
799 #if XML_GE == 1
800 static void
expat_heap_stat(XML_Parser rootParser,char operator,XmlBigCount absDiff,XmlBigCount newTotal,XmlBigCount peakTotal,int sourceLine)801 expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff,
802 XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) {
803 // NOTE: This can be +infinity or -nan
804 const float amplification
805 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
806 fprintf(
807 stderr,
808 "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL(
809 "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n",
810 (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator,
811 absDiff, newTotal, peakTotal, (double)amplification, sourceLine);
812 }
813
814 static bool
expat_heap_increase_tolerable(XML_Parser rootParser,XmlBigCount increase,int sourceLine)815 expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase,
816 int sourceLine) {
817 assert(rootParser != NULL);
818 assert(increase > 0);
819
820 XmlBigCount newTotal = 0;
821 bool tolerable = true;
822
823 // Detect integer overflow
824 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) {
825 tolerable = false;
826 } else {
827 newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase;
828
829 if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) {
830 assert(newTotal > 0);
831 // NOTE: This can be +infinity when dividing by zero but not -nan
832 const float amplification
833 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
834 if (amplification
835 > rootParser->m_alloc_tracker.maximumAmplificationFactor) {
836 tolerable = false;
837 }
838 }
839 }
840
841 if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) {
842 expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine);
843 }
844
845 return tolerable;
846 }
847
848 # if defined(XML_TESTING)
849 void *
850 # else
851 static void *
852 # endif
expat_malloc(XML_Parser parser,size_t size,int sourceLine)853 expat_malloc(XML_Parser parser, size_t size, int sourceLine) {
854 // Detect integer overflow
855 if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) {
856 return NULL;
857 }
858
859 const XML_Parser rootParser = getRootParserOf(parser, NULL);
860 assert(rootParser->m_parentParser == NULL);
861
862 const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size;
863
864 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
865 < bytesToAllocate) {
866 return NULL; // i.e. signal integer overflow as out-of-memory
867 }
868
869 if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate,
870 sourceLine)) {
871 return NULL; // i.e. signal violation as out-of-memory
872 }
873
874 // Actually allocate
875 void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate);
876
877 if (mallocedPtr == NULL) {
878 return NULL;
879 }
880
881 // Update in-block recorded size
882 *(size_t *)mallocedPtr = size;
883
884 // Update accounting
885 rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate;
886
887 // Report as needed
888 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
889 if (rootParser->m_alloc_tracker.bytesAllocated
890 > rootParser->m_alloc_tracker.peakBytesAllocated) {
891 rootParser->m_alloc_tracker.peakBytesAllocated
892 = rootParser->m_alloc_tracker.bytesAllocated;
893 }
894 expat_heap_stat(rootParser, '+', bytesToAllocate,
895 rootParser->m_alloc_tracker.bytesAllocated,
896 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
897 }
898
899 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
900 }
901
902 # if defined(XML_TESTING)
903 void
904 # else
905 static void
906 # endif
expat_free(XML_Parser parser,void * ptr,int sourceLine)907 expat_free(XML_Parser parser, void *ptr, int sourceLine) {
908 assert(parser != NULL);
909
910 if (ptr == NULL) {
911 return;
912 }
913
914 const XML_Parser rootParser = getRootParserOf(parser, NULL);
915 assert(rootParser->m_parentParser == NULL);
916
917 // Extract size (to the eyes of malloc_fcn/realloc_fcn) and
918 // the original pointer returned by malloc/realloc
919 void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
920 const size_t bytesAllocated
921 = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr;
922
923 // Update accounting
924 assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated);
925 rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated;
926
927 // Report as needed
928 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
929 expat_heap_stat(rootParser, '-', bytesAllocated,
930 rootParser->m_alloc_tracker.bytesAllocated,
931 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
932 }
933
934 // NOTE: This may be freeing rootParser, so freeing has to come last
935 parser->m_mem.free_fcn(mallocedPtr);
936 }
937
938 # if defined(XML_TESTING)
939 void *
940 # else
941 static void *
942 # endif
expat_realloc(XML_Parser parser,void * ptr,size_t size,int sourceLine)943 expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) {
944 assert(parser != NULL);
945
946 if (ptr == NULL) {
947 return expat_malloc(parser, size, sourceLine);
948 }
949
950 if (size == 0) {
951 expat_free(parser, ptr, sourceLine);
952 return NULL;
953 }
954
955 const XML_Parser rootParser = getRootParserOf(parser, NULL);
956 assert(rootParser->m_parentParser == NULL);
957
958 // Extract original size (to the eyes of the caller) and the original
959 // pointer returned by malloc/realloc
960 void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t);
961 const size_t prevSize = *(size_t *)mallocedPtr;
962
963 // Classify upcoming change
964 const bool isIncrease = (size > prevSize);
965 const size_t absDiff
966 = (size > prevSize) ? (size - prevSize) : (prevSize - size);
967
968 // Ask for permission from accounting
969 if (isIncrease) {
970 if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) {
971 return NULL; // i.e. signal violation as out-of-memory
972 }
973 }
974
975 // NOTE: Integer overflow detection has already been done for us
976 // by expat_heap_increase_tolerable(..) above
977 assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size);
978
979 // Actually allocate
980 mallocedPtr = parser->m_mem.realloc_fcn(
981 mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size);
982
983 if (mallocedPtr == NULL) {
984 return NULL;
985 }
986
987 // Update accounting
988 if (isIncrease) {
989 assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
990 >= absDiff);
991 rootParser->m_alloc_tracker.bytesAllocated += absDiff;
992 } else { // i.e. decrease
993 assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff);
994 rootParser->m_alloc_tracker.bytesAllocated -= absDiff;
995 }
996
997 // Report as needed
998 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
999 if (rootParser->m_alloc_tracker.bytesAllocated
1000 > rootParser->m_alloc_tracker.peakBytesAllocated) {
1001 rootParser->m_alloc_tracker.peakBytesAllocated
1002 = rootParser->m_alloc_tracker.bytesAllocated;
1003 }
1004 expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff,
1005 rootParser->m_alloc_tracker.bytesAllocated,
1006 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
1007 }
1008
1009 // Update in-block recorded size
1010 *(size_t *)mallocedPtr = size;
1011
1012 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING;
1013 }
1014 #endif // XML_GE == 1
1015
1016 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)1017 XML_ParserCreate(const XML_Char *encodingName) {
1018 return XML_ParserCreate_MM(encodingName, NULL, NULL);
1019 }
1020
1021 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)1022 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
1023 XML_Char tmp[2] = {nsSep, 0};
1024 return XML_ParserCreate_MM(encodingName, NULL, tmp);
1025 }
1026
1027 // "xml=http://www.w3.org/XML/1998/namespace"
1028 static const XML_Char implicitContext[]
1029 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
1030 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
1031 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
1032 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
1033 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
1034 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
1035 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
1036 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
1037 '\0'};
1038
1039 /* To avoid warnings about unused functions: */
1040 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1041
1042 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1043
1044 /* Obtain entropy on Linux 3.17+ */
1045 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)1046 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
1047 int success = 0; /* full count bytes written? */
1048 size_t bytesWrittenTotal = 0;
1049 const unsigned int getrandomFlags = GRND_NONBLOCK;
1050
1051 do {
1052 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
1053 const size_t bytesToWrite = count - bytesWrittenTotal;
1054
1055 assert(bytesToWrite <= INT_MAX);
1056
1057 const int bytesWrittenMore =
1058 # if defined(HAVE_GETRANDOM)
1059 (int)getrandom(currentTarget, bytesToWrite, getrandomFlags);
1060 # else
1061 (int)syscall(SYS_getrandom, currentTarget, bytesToWrite,
1062 getrandomFlags);
1063 # endif
1064
1065 if (bytesWrittenMore > 0) {
1066 bytesWrittenTotal += bytesWrittenMore;
1067 if (bytesWrittenTotal >= count)
1068 success = 1;
1069 }
1070 } while (! success && (errno == EINTR));
1071
1072 return success;
1073 }
1074
1075 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
1076
1077 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1078
1079 /* Extract entropy from /dev/urandom */
1080 static int
writeRandomBytes_dev_urandom(void * target,size_t count)1081 writeRandomBytes_dev_urandom(void *target, size_t count) {
1082 int success = 0; /* full count bytes written? */
1083 size_t bytesWrittenTotal = 0;
1084
1085 const int fd = open("/dev/urandom", O_RDONLY);
1086 if (fd < 0) {
1087 return 0;
1088 }
1089
1090 do {
1091 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
1092 const size_t bytesToWrite = count - bytesWrittenTotal;
1093
1094 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
1095
1096 if (bytesWrittenMore > 0) {
1097 bytesWrittenTotal += bytesWrittenMore;
1098 if (bytesWrittenTotal >= count)
1099 success = 1;
1100 }
1101 } while (! success && (errno == EINTR));
1102
1103 close(fd);
1104 return success;
1105 }
1106
1107 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1108
1109 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1110
1111 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
1112
1113 static void
writeRandomBytes_arc4random(void * target,size_t count)1114 writeRandomBytes_arc4random(void *target, size_t count) {
1115 size_t bytesWrittenTotal = 0;
1116
1117 while (bytesWrittenTotal < count) {
1118 const uint32_t random32 = arc4random();
1119 size_t i = 0;
1120
1121 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
1122 i++, bytesWrittenTotal++) {
1123 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
1124 ((uint8_t *)target)[bytesWrittenTotal] = random8;
1125 }
1126 }
1127 }
1128
1129 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
1130
1131 #ifdef _WIN32
1132
1133 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
1134 as it didn't declare it in its header prior to version 5.3.0 of its
1135 runtime package (mingwrt, containing stdlib.h). The upstream fix
1136 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
1137 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
1138 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
1139 __declspec(dllimport) int rand_s(unsigned int *);
1140 # endif
1141
1142 /* Obtain entropy on Windows using the rand_s() function which
1143 * generates cryptographically secure random numbers. Internally it
1144 * uses RtlGenRandom API which is present in Windows XP and later.
1145 */
1146 static int
writeRandomBytes_rand_s(void * target,size_t count)1147 writeRandomBytes_rand_s(void *target, size_t count) {
1148 size_t bytesWrittenTotal = 0;
1149
1150 while (bytesWrittenTotal < count) {
1151 unsigned int random32 = 0;
1152 size_t i = 0;
1153
1154 if (rand_s(&random32))
1155 return 0; /* failure */
1156
1157 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
1158 i++, bytesWrittenTotal++) {
1159 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
1160 ((uint8_t *)target)[bytesWrittenTotal] = random8;
1161 }
1162 }
1163 return 1; /* success */
1164 }
1165
1166 #endif /* _WIN32 */
1167
1168 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1169
1170 static unsigned long
gather_time_entropy(void)1171 gather_time_entropy(void) {
1172 # ifdef _WIN32
1173 FILETIME ft;
1174 GetSystemTimeAsFileTime(&ft); /* never fails */
1175 return ft.dwHighDateTime ^ ft.dwLowDateTime;
1176 # else
1177 struct timeval tv;
1178 int gettimeofday_res;
1179
1180 gettimeofday_res = gettimeofday(&tv, NULL);
1181
1182 # if defined(NDEBUG)
1183 (void)gettimeofday_res;
1184 # else
1185 assert(gettimeofday_res == 0);
1186 # endif /* defined(NDEBUG) */
1187
1188 /* Microseconds time is <20 bits entropy */
1189 return tv.tv_usec;
1190 # endif
1191 }
1192
1193 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1194
1195 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)1196 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
1197 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
1198 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
1199 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
1200 }
1201 return entropy;
1202 }
1203
1204 static unsigned long
generate_hash_secret_salt(XML_Parser parser)1205 generate_hash_secret_salt(XML_Parser parser) {
1206 unsigned long entropy;
1207 (void)parser;
1208
1209 /* "Failproof" high quality providers: */
1210 #if defined(HAVE_ARC4RANDOM_BUF)
1211 arc4random_buf(&entropy, sizeof(entropy));
1212 return ENTROPY_DEBUG("arc4random_buf", entropy);
1213 #elif defined(HAVE_ARC4RANDOM)
1214 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
1215 return ENTROPY_DEBUG("arc4random", entropy);
1216 #else
1217 /* Try high quality providers first .. */
1218 # ifdef _WIN32
1219 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
1220 return ENTROPY_DEBUG("rand_s", entropy);
1221 }
1222 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1223 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
1224 return ENTROPY_DEBUG("getrandom", entropy);
1225 }
1226 # endif
1227 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1228 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
1229 return ENTROPY_DEBUG("/dev/urandom", entropy);
1230 }
1231 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1232 /* .. and self-made low quality for backup: */
1233
1234 entropy = gather_time_entropy();
1235 # if ! defined(__wasi__)
1236 /* Process ID is 0 bits entropy if attacker has local access */
1237 entropy ^= getpid();
1238 # endif
1239
1240 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1241 if (sizeof(unsigned long) == 4) {
1242 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
1243 } else {
1244 return ENTROPY_DEBUG("fallback(8)",
1245 entropy * (unsigned long)2305843009213693951ULL);
1246 }
1247 #endif
1248 }
1249
1250 static unsigned long
get_hash_secret_salt(XML_Parser parser)1251 get_hash_secret_salt(XML_Parser parser) {
1252 const XML_Parser rootParser = getRootParserOf(parser, NULL);
1253 assert(! rootParser->m_parentParser);
1254
1255 return rootParser->m_hash_secret_salt;
1256 }
1257
1258 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1259 callProcessor(XML_Parser parser, const char *start, const char *end,
1260 const char **endPtr) {
1261 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1262
1263 if (parser->m_reparseDeferralEnabled
1264 && ! parser->m_parsingStatus.finalBuffer) {
1265 // Heuristic: don't try to parse a partial token again until the amount of
1266 // available data has increased significantly.
1267 const size_t had_before = parser->m_partialTokenBytesBefore;
1268 // ...but *do* try anyway if we're close to causing a reallocation.
1269 size_t available_buffer
1270 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1271 #if XML_CONTEXT_BYTES > 0
1272 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1273 #endif
1274 available_buffer
1275 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1276 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1277 const bool enough
1278 = (have_now >= 2 * had_before)
1279 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1280
1281 if (! enough) {
1282 *endPtr = start; // callers may expect this to be set
1283 return XML_ERROR_NONE;
1284 }
1285 }
1286 #if defined(XML_TESTING)
1287 g_bytesScanned += (unsigned)have_now;
1288 #endif
1289 // Run in a loop to eliminate dangerous recursion depths
1290 enum XML_Error ret;
1291 *endPtr = start;
1292 while (1) {
1293 // Use endPtr as the new start in each iteration, since it will
1294 // be set to the next start point by m_processor.
1295 ret = parser->m_processor(parser, *endPtr, end, endPtr);
1296
1297 // Make parsing status (and in particular XML_SUSPENDED) take
1298 // precedence over re-enter flag when they disagree
1299 if (parser->m_parsingStatus.parsing != XML_PARSING) {
1300 parser->m_reenter = XML_FALSE;
1301 }
1302
1303 if (! parser->m_reenter) {
1304 break;
1305 }
1306
1307 parser->m_reenter = XML_FALSE;
1308 if (ret != XML_ERROR_NONE)
1309 return ret;
1310 }
1311
1312 if (ret == XML_ERROR_NONE) {
1313 // if we consumed nothing, remember what we had on this parse attempt.
1314 if (*endPtr == start) {
1315 parser->m_partialTokenBytesBefore = have_now;
1316 } else {
1317 parser->m_partialTokenBytesBefore = 0;
1318 }
1319 }
1320 return ret;
1321 }
1322
1323 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1324 startParsing(XML_Parser parser) {
1325 /* hash functions must be initialized before setContext() is called */
1326 if (parser->m_hash_secret_salt == 0)
1327 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1328 if (parser->m_ns) {
1329 /* implicit context only set for root parser, since child
1330 parsers (i.e. external entity parsers) will inherit it
1331 */
1332 return setContext(parser, implicitContext);
1333 }
1334 return XML_TRUE;
1335 }
1336
1337 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1338 XML_ParserCreate_MM(const XML_Char *encodingName,
1339 const XML_Memory_Handling_Suite *memsuite,
1340 const XML_Char *nameSep) {
1341 return parserCreate(encodingName, memsuite, nameSep, NULL, NULL);
1342 }
1343
1344 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd,XML_Parser parentParser)1345 parserCreate(const XML_Char *encodingName,
1346 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1347 DTD *dtd, XML_Parser parentParser) {
1348 XML_Parser parser = NULL;
1349
1350 #if XML_GE == 1
1351 const size_t increase
1352 = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct);
1353
1354 if (parentParser != NULL) {
1355 const XML_Parser rootParser = getRootParserOf(parentParser, NULL);
1356 if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) {
1357 return NULL;
1358 }
1359 }
1360 #else
1361 UNUSED_P(parentParser);
1362 #endif
1363
1364 if (memsuite) {
1365 XML_Memory_Handling_Suite *mtemp;
1366 #if XML_GE == 1
1367 void *const sizeAndParser
1368 = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING
1369 + sizeof(struct XML_ParserStruct));
1370 if (sizeAndParser != NULL) {
1371 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1372 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1373 + EXPAT_MALLOC_PADDING);
1374 #else
1375 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1376 if (parser != NULL) {
1377 #endif
1378 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1379 mtemp->malloc_fcn = memsuite->malloc_fcn;
1380 mtemp->realloc_fcn = memsuite->realloc_fcn;
1381 mtemp->free_fcn = memsuite->free_fcn;
1382 }
1383 } else {
1384 XML_Memory_Handling_Suite *mtemp;
1385 #if XML_GE == 1
1386 void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING
1387 + sizeof(struct XML_ParserStruct));
1388 if (sizeAndParser != NULL) {
1389 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1390 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)
1391 + EXPAT_MALLOC_PADDING);
1392 #else
1393 parser = malloc(sizeof(struct XML_ParserStruct));
1394 if (parser != NULL) {
1395 #endif
1396 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1397 mtemp->malloc_fcn = malloc;
1398 mtemp->realloc_fcn = realloc;
1399 mtemp->free_fcn = free;
1400 }
1401 } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0
1402
1403 if (! parser)
1404 return parser;
1405
1406 #if XML_GE == 1
1407 // Initialize .m_alloc_tracker
1408 memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER));
1409 if (parentParser == NULL) {
1410 parser->m_alloc_tracker.debugLevel
1411 = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u);
1412 parser->m_alloc_tracker.maximumAmplificationFactor
1413 = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT;
1414 parser->m_alloc_tracker.activationThresholdBytes
1415 = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT;
1416
1417 // NOTE: This initialization needs to come this early because these fields
1418 // are read by allocation tracking code
1419 parser->m_parentParser = NULL;
1420 parser->m_accounting.countBytesDirect = 0;
1421 } else {
1422 parser->m_parentParser = parentParser;
1423 }
1424
1425 // Record XML_ParserStruct allocation we did a few lines up before
1426 const XML_Parser rootParser = getRootParserOf(parser, NULL);
1427 assert(rootParser->m_parentParser == NULL);
1428 assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase);
1429 rootParser->m_alloc_tracker.bytesAllocated += increase;
1430
1431 // Report on allocation
1432 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
1433 if (rootParser->m_alloc_tracker.bytesAllocated
1434 > rootParser->m_alloc_tracker.peakBytesAllocated) {
1435 rootParser->m_alloc_tracker.peakBytesAllocated
1436 = rootParser->m_alloc_tracker.bytesAllocated;
1437 }
1438
1439 expat_heap_stat(rootParser, '+', increase,
1440 rootParser->m_alloc_tracker.bytesAllocated,
1441 rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__);
1442 }
1443 #else
1444 parser->m_parentParser = NULL;
1445 #endif // XML_GE == 1
1446
1447 parser->m_buffer = NULL;
1448 parser->m_bufferLim = NULL;
1449
1450 parser->m_attsSize = INIT_ATTS_SIZE;
1451 parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1452 if (parser->m_atts == NULL) {
1453 FREE(parser, parser);
1454 return NULL;
1455 }
1456 #ifdef XML_ATTR_INFO
1457 parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1458 if (parser->m_attInfo == NULL) {
1459 FREE(parser, parser->m_atts);
1460 FREE(parser, parser);
1461 return NULL;
1462 }
1463 #endif
1464 parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1465 if (parser->m_dataBuf == NULL) {
1466 FREE(parser, parser->m_atts);
1467 #ifdef XML_ATTR_INFO
1468 FREE(parser, parser->m_attInfo);
1469 #endif
1470 FREE(parser, parser);
1471 return NULL;
1472 }
1473 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1474
1475 if (dtd)
1476 parser->m_dtd = dtd;
1477 else {
1478 parser->m_dtd = dtdCreate(parser);
1479 if (parser->m_dtd == NULL) {
1480 FREE(parser, parser->m_dataBuf);
1481 FREE(parser, parser->m_atts);
1482 #ifdef XML_ATTR_INFO
1483 FREE(parser, parser->m_attInfo);
1484 #endif
1485 FREE(parser, parser);
1486 return NULL;
1487 }
1488 }
1489
1490 parser->m_freeBindingList = NULL;
1491 parser->m_freeTagList = NULL;
1492 parser->m_freeInternalEntities = NULL;
1493 parser->m_freeAttributeEntities = NULL;
1494 parser->m_freeValueEntities = NULL;
1495
1496 parser->m_groupSize = 0;
1497 parser->m_groupConnector = NULL;
1498
1499 parser->m_unknownEncodingHandler = NULL;
1500 parser->m_unknownEncodingHandlerData = NULL;
1501
1502 parser->m_namespaceSeparator = ASCII_EXCL;
1503 parser->m_ns = XML_FALSE;
1504 parser->m_ns_triplets = XML_FALSE;
1505
1506 parser->m_nsAtts = NULL;
1507 parser->m_nsAttsVersion = 0;
1508 parser->m_nsAttsPower = 0;
1509
1510 parser->m_protocolEncodingName = NULL;
1511
1512 poolInit(&parser->m_tempPool, parser);
1513 poolInit(&parser->m_temp2Pool, parser);
1514 parserInit(parser, encodingName);
1515
1516 if (encodingName && ! parser->m_protocolEncodingName) {
1517 if (dtd) {
1518 // We need to stop the upcoming call to XML_ParserFree from happily
1519 // destroying parser->m_dtd because the DTD is shared with the parent
1520 // parser and the only guard that keeps XML_ParserFree from destroying
1521 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1522 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1523 parser->m_dtd = NULL;
1524 }
1525 XML_ParserFree(parser);
1526 return NULL;
1527 }
1528
1529 if (nameSep) {
1530 parser->m_ns = XML_TRUE;
1531 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1532 parser->m_namespaceSeparator = *nameSep;
1533 } else {
1534 parser->m_internalEncoding = XmlGetInternalEncoding();
1535 }
1536
1537 return parser;
1538 }
1539
1540 static void
1541 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1542 parser->m_processor = prologInitProcessor;
1543 XmlPrologStateInit(&parser->m_prologState);
1544 if (encodingName != NULL) {
1545 parser->m_protocolEncodingName = copyString(encodingName, parser);
1546 }
1547 parser->m_curBase = NULL;
1548 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1549 parser->m_userData = NULL;
1550 parser->m_handlerArg = NULL;
1551 parser->m_startElementHandler = NULL;
1552 parser->m_endElementHandler = NULL;
1553 parser->m_characterDataHandler = NULL;
1554 parser->m_processingInstructionHandler = NULL;
1555 parser->m_commentHandler = NULL;
1556 parser->m_startCdataSectionHandler = NULL;
1557 parser->m_endCdataSectionHandler = NULL;
1558 parser->m_defaultHandler = NULL;
1559 parser->m_startDoctypeDeclHandler = NULL;
1560 parser->m_endDoctypeDeclHandler = NULL;
1561 parser->m_unparsedEntityDeclHandler = NULL;
1562 parser->m_notationDeclHandler = NULL;
1563 parser->m_startNamespaceDeclHandler = NULL;
1564 parser->m_endNamespaceDeclHandler = NULL;
1565 parser->m_notStandaloneHandler = NULL;
1566 parser->m_externalEntityRefHandler = NULL;
1567 parser->m_externalEntityRefHandlerArg = parser;
1568 parser->m_skippedEntityHandler = NULL;
1569 parser->m_elementDeclHandler = NULL;
1570 parser->m_attlistDeclHandler = NULL;
1571 parser->m_entityDeclHandler = NULL;
1572 parser->m_xmlDeclHandler = NULL;
1573 parser->m_bufferPtr = parser->m_buffer;
1574 parser->m_bufferEnd = parser->m_buffer;
1575 parser->m_parseEndByteIndex = 0;
1576 parser->m_parseEndPtr = NULL;
1577 parser->m_partialTokenBytesBefore = 0;
1578 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1579 parser->m_lastBufferRequestSize = 0;
1580 parser->m_declElementType = NULL;
1581 parser->m_declAttributeId = NULL;
1582 parser->m_declEntity = NULL;
1583 parser->m_doctypeName = NULL;
1584 parser->m_doctypeSysid = NULL;
1585 parser->m_doctypePubid = NULL;
1586 parser->m_declAttributeType = NULL;
1587 parser->m_declNotationName = NULL;
1588 parser->m_declNotationPublicId = NULL;
1589 parser->m_declAttributeIsCdata = XML_FALSE;
1590 parser->m_declAttributeIsId = XML_FALSE;
1591 memset(&parser->m_position, 0, sizeof(POSITION));
1592 parser->m_errorCode = XML_ERROR_NONE;
1593 parser->m_eventPtr = NULL;
1594 parser->m_eventEndPtr = NULL;
1595 parser->m_positionPtr = NULL;
1596 parser->m_openInternalEntities = NULL;
1597 parser->m_openAttributeEntities = NULL;
1598 parser->m_openValueEntities = NULL;
1599 parser->m_defaultExpandInternalEntities = XML_TRUE;
1600 parser->m_tagLevel = 0;
1601 parser->m_tagStack = NULL;
1602 parser->m_inheritedBindings = NULL;
1603 parser->m_nSpecifiedAtts = 0;
1604 parser->m_unknownEncodingMem = NULL;
1605 parser->m_unknownEncodingRelease = NULL;
1606 parser->m_unknownEncodingData = NULL;
1607 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1608 // Reentry can only be triggered inside m_processor calls
1609 parser->m_reenter = XML_FALSE;
1610 #ifdef XML_DTD
1611 parser->m_isParamEntity = XML_FALSE;
1612 parser->m_useForeignDTD = XML_FALSE;
1613 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1614 #endif
1615 parser->m_hash_secret_salt = 0;
1616
1617 #if XML_GE == 1
1618 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1619 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1620 parser->m_accounting.maximumAmplificationFactor
1621 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1622 parser->m_accounting.activationThresholdBytes
1623 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1624
1625 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1626 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1627 #endif
1628 }
1629
1630 /* moves list of bindings to m_freeBindingList */
1631 static void FASTCALL
1632 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1633 while (bindings) {
1634 BINDING *b = bindings;
1635 bindings = bindings->nextTagBinding;
1636 b->nextTagBinding = parser->m_freeBindingList;
1637 parser->m_freeBindingList = b;
1638 }
1639 }
1640
1641 XML_Bool XMLCALL
1642 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1643 TAG *tStk;
1644 OPEN_INTERNAL_ENTITY *openEntityList;
1645
1646 if (parser == NULL)
1647 return XML_FALSE;
1648
1649 if (parser->m_parentParser)
1650 return XML_FALSE;
1651 /* move m_tagStack to m_freeTagList */
1652 tStk = parser->m_tagStack;
1653 while (tStk) {
1654 TAG *tag = tStk;
1655 tStk = tStk->parent;
1656 tag->parent = parser->m_freeTagList;
1657 moveToFreeBindingList(parser, tag->bindings);
1658 tag->bindings = NULL;
1659 parser->m_freeTagList = tag;
1660 }
1661 /* move m_openInternalEntities to m_freeInternalEntities */
1662 openEntityList = parser->m_openInternalEntities;
1663 while (openEntityList) {
1664 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1665 openEntityList = openEntity->next;
1666 openEntity->next = parser->m_freeInternalEntities;
1667 parser->m_freeInternalEntities = openEntity;
1668 }
1669 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1670 * for attributes) */
1671 openEntityList = parser->m_openAttributeEntities;
1672 while (openEntityList) {
1673 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1674 openEntityList = openEntity->next;
1675 openEntity->next = parser->m_freeAttributeEntities;
1676 parser->m_freeAttributeEntities = openEntity;
1677 }
1678 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1679 * for value entities) */
1680 openEntityList = parser->m_openValueEntities;
1681 while (openEntityList) {
1682 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1683 openEntityList = openEntity->next;
1684 openEntity->next = parser->m_freeValueEntities;
1685 parser->m_freeValueEntities = openEntity;
1686 }
1687 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1688 FREE(parser, parser->m_unknownEncodingMem);
1689 if (parser->m_unknownEncodingRelease)
1690 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1691 poolClear(&parser->m_tempPool);
1692 poolClear(&parser->m_temp2Pool);
1693 FREE(parser, (void *)parser->m_protocolEncodingName);
1694 parser->m_protocolEncodingName = NULL;
1695 parserInit(parser, encodingName);
1696 dtdReset(parser->m_dtd, parser);
1697 return XML_TRUE;
1698 }
1699
1700 static XML_Bool
1701 parserBusy(XML_Parser parser) {
1702 switch (parser->m_parsingStatus.parsing) {
1703 case XML_PARSING:
1704 case XML_SUSPENDED:
1705 return XML_TRUE;
1706 case XML_INITIALIZED:
1707 case XML_FINISHED:
1708 default:
1709 return XML_FALSE;
1710 }
1711 }
1712
1713 enum XML_Status XMLCALL
1714 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1715 if (parser == NULL)
1716 return XML_STATUS_ERROR;
1717 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1718 XXX There's no way for the caller to determine which of the
1719 XXX possible error cases caused the XML_STATUS_ERROR return.
1720 */
1721 if (parserBusy(parser))
1722 return XML_STATUS_ERROR;
1723
1724 /* Get rid of any previous encoding name */
1725 FREE(parser, (void *)parser->m_protocolEncodingName);
1726
1727 if (encodingName == NULL)
1728 /* No new encoding name */
1729 parser->m_protocolEncodingName = NULL;
1730 else {
1731 /* Copy the new encoding name into allocated memory */
1732 parser->m_protocolEncodingName = copyString(encodingName, parser);
1733 if (! parser->m_protocolEncodingName)
1734 return XML_STATUS_ERROR;
1735 }
1736 return XML_STATUS_OK;
1737 }
1738
1739 XML_Parser XMLCALL
1740 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1741 const XML_Char *encodingName) {
1742 XML_Parser parser = oldParser;
1743 DTD *newDtd = NULL;
1744 DTD *oldDtd;
1745 XML_StartElementHandler oldStartElementHandler;
1746 XML_EndElementHandler oldEndElementHandler;
1747 XML_CharacterDataHandler oldCharacterDataHandler;
1748 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1749 XML_CommentHandler oldCommentHandler;
1750 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1751 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1752 XML_DefaultHandler oldDefaultHandler;
1753 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1754 XML_NotationDeclHandler oldNotationDeclHandler;
1755 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1756 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1757 XML_NotStandaloneHandler oldNotStandaloneHandler;
1758 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1759 XML_SkippedEntityHandler oldSkippedEntityHandler;
1760 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1761 void *oldUnknownEncodingHandlerData;
1762 XML_ElementDeclHandler oldElementDeclHandler;
1763 XML_AttlistDeclHandler oldAttlistDeclHandler;
1764 XML_EntityDeclHandler oldEntityDeclHandler;
1765 XML_XmlDeclHandler oldXmlDeclHandler;
1766 ELEMENT_TYPE *oldDeclElementType;
1767
1768 void *oldUserData;
1769 void *oldHandlerArg;
1770 XML_Bool oldDefaultExpandInternalEntities;
1771 XML_Parser oldExternalEntityRefHandlerArg;
1772 #ifdef XML_DTD
1773 enum XML_ParamEntityParsing oldParamEntityParsing;
1774 int oldInEntityValue;
1775 #endif
1776 XML_Bool oldns_triplets;
1777 /* Note that the new parser shares the same hash secret as the old
1778 parser, so that dtdCopy and copyEntityTable can lookup values
1779 from hash tables associated with either parser without us having
1780 to worry which hash secrets each table has.
1781 */
1782 unsigned long oldhash_secret_salt;
1783 XML_Bool oldReparseDeferralEnabled;
1784
1785 /* Validate the oldParser parameter before we pull everything out of it */
1786 if (oldParser == NULL)
1787 return NULL;
1788
1789 /* Stash the original parser contents on the stack */
1790 oldDtd = parser->m_dtd;
1791 oldStartElementHandler = parser->m_startElementHandler;
1792 oldEndElementHandler = parser->m_endElementHandler;
1793 oldCharacterDataHandler = parser->m_characterDataHandler;
1794 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1795 oldCommentHandler = parser->m_commentHandler;
1796 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1797 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1798 oldDefaultHandler = parser->m_defaultHandler;
1799 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1800 oldNotationDeclHandler = parser->m_notationDeclHandler;
1801 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1802 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1803 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1804 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1805 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1806 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1807 oldUnknownEncodingHandlerData = parser->m_unknownEncodingHandlerData;
1808 oldElementDeclHandler = parser->m_elementDeclHandler;
1809 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1810 oldEntityDeclHandler = parser->m_entityDeclHandler;
1811 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1812 oldDeclElementType = parser->m_declElementType;
1813
1814 oldUserData = parser->m_userData;
1815 oldHandlerArg = parser->m_handlerArg;
1816 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1817 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1818 #ifdef XML_DTD
1819 oldParamEntityParsing = parser->m_paramEntityParsing;
1820 oldInEntityValue = parser->m_prologState.inEntityValue;
1821 #endif
1822 oldns_triplets = parser->m_ns_triplets;
1823 /* Note that the new parser shares the same hash secret as the old
1824 parser, so that dtdCopy and copyEntityTable can lookup values
1825 from hash tables associated with either parser without us having
1826 to worry which hash secrets each table has.
1827 */
1828 oldhash_secret_salt = parser->m_hash_secret_salt;
1829 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1830
1831 #ifdef XML_DTD
1832 if (! context)
1833 newDtd = oldDtd;
1834 #endif /* XML_DTD */
1835
1836 /* Note that the magical uses of the pre-processor to make field
1837 access look more like C++ require that `parser' be overwritten
1838 here. This makes this function more painful to follow than it
1839 would be otherwise.
1840 */
1841 if (parser->m_ns) {
1842 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1843 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser);
1844 } else {
1845 parser
1846 = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser);
1847 }
1848
1849 if (! parser)
1850 return NULL;
1851
1852 parser->m_startElementHandler = oldStartElementHandler;
1853 parser->m_endElementHandler = oldEndElementHandler;
1854 parser->m_characterDataHandler = oldCharacterDataHandler;
1855 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1856 parser->m_commentHandler = oldCommentHandler;
1857 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1858 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1859 parser->m_defaultHandler = oldDefaultHandler;
1860 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1861 parser->m_notationDeclHandler = oldNotationDeclHandler;
1862 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1863 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1864 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1865 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1866 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1867 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1868 parser->m_unknownEncodingHandlerData = oldUnknownEncodingHandlerData;
1869 parser->m_elementDeclHandler = oldElementDeclHandler;
1870 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1871 parser->m_entityDeclHandler = oldEntityDeclHandler;
1872 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1873 parser->m_declElementType = oldDeclElementType;
1874 parser->m_userData = oldUserData;
1875 if (oldUserData == oldHandlerArg)
1876 parser->m_handlerArg = parser->m_userData;
1877 else
1878 parser->m_handlerArg = parser;
1879 if (oldExternalEntityRefHandlerArg != oldParser)
1880 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1881 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1882 parser->m_ns_triplets = oldns_triplets;
1883 parser->m_hash_secret_salt = oldhash_secret_salt;
1884 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1885 parser->m_parentParser = oldParser;
1886 #ifdef XML_DTD
1887 parser->m_paramEntityParsing = oldParamEntityParsing;
1888 parser->m_prologState.inEntityValue = oldInEntityValue;
1889 if (context) {
1890 #endif /* XML_DTD */
1891 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser)
1892 || ! setContext(parser, context)) {
1893 XML_ParserFree(parser);
1894 return NULL;
1895 }
1896 parser->m_processor = externalEntityInitProcessor;
1897 #ifdef XML_DTD
1898 } else {
1899 /* The DTD instance referenced by parser->m_dtd is shared between the
1900 document's root parser and external PE parsers, therefore one does not
1901 need to call setContext. In addition, one also *must* not call
1902 setContext, because this would overwrite existing prefix->binding
1903 pointers in parser->m_dtd with ones that get destroyed with the external
1904 PE parser. This would leave those prefixes with dangling pointers.
1905 */
1906 parser->m_isParamEntity = XML_TRUE;
1907 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1908 parser->m_processor = externalParEntInitProcessor;
1909 }
1910 #endif /* XML_DTD */
1911 return parser;
1912 }
1913
1914 static void FASTCALL
1915 destroyBindings(BINDING *bindings, XML_Parser parser) {
1916 for (;;) {
1917 BINDING *b = bindings;
1918 if (! b)
1919 break;
1920 bindings = b->nextTagBinding;
1921 FREE(parser, b->uri);
1922 FREE(parser, b);
1923 }
1924 }
1925
1926 void XMLCALL
1927 XML_ParserFree(XML_Parser parser) {
1928 TAG *tagList;
1929 OPEN_INTERNAL_ENTITY *entityList;
1930 if (parser == NULL)
1931 return;
1932 /* free m_tagStack and m_freeTagList */
1933 tagList = parser->m_tagStack;
1934 for (;;) {
1935 TAG *p;
1936 if (tagList == NULL) {
1937 if (parser->m_freeTagList == NULL)
1938 break;
1939 tagList = parser->m_freeTagList;
1940 parser->m_freeTagList = NULL;
1941 }
1942 p = tagList;
1943 tagList = tagList->parent;
1944 FREE(parser, p->buf.raw);
1945 destroyBindings(p->bindings, parser);
1946 FREE(parser, p);
1947 }
1948 /* free m_openInternalEntities and m_freeInternalEntities */
1949 entityList = parser->m_openInternalEntities;
1950 for (;;) {
1951 OPEN_INTERNAL_ENTITY *openEntity;
1952 if (entityList == NULL) {
1953 if (parser->m_freeInternalEntities == NULL)
1954 break;
1955 entityList = parser->m_freeInternalEntities;
1956 parser->m_freeInternalEntities = NULL;
1957 }
1958 openEntity = entityList;
1959 entityList = entityList->next;
1960 FREE(parser, openEntity);
1961 }
1962 /* free m_openAttributeEntities and m_freeAttributeEntities */
1963 entityList = parser->m_openAttributeEntities;
1964 for (;;) {
1965 OPEN_INTERNAL_ENTITY *openEntity;
1966 if (entityList == NULL) {
1967 if (parser->m_freeAttributeEntities == NULL)
1968 break;
1969 entityList = parser->m_freeAttributeEntities;
1970 parser->m_freeAttributeEntities = NULL;
1971 }
1972 openEntity = entityList;
1973 entityList = entityList->next;
1974 FREE(parser, openEntity);
1975 }
1976 /* free m_openValueEntities and m_freeValueEntities */
1977 entityList = parser->m_openValueEntities;
1978 for (;;) {
1979 OPEN_INTERNAL_ENTITY *openEntity;
1980 if (entityList == NULL) {
1981 if (parser->m_freeValueEntities == NULL)
1982 break;
1983 entityList = parser->m_freeValueEntities;
1984 parser->m_freeValueEntities = NULL;
1985 }
1986 openEntity = entityList;
1987 entityList = entityList->next;
1988 FREE(parser, openEntity);
1989 }
1990 destroyBindings(parser->m_freeBindingList, parser);
1991 destroyBindings(parser->m_inheritedBindings, parser);
1992 poolDestroy(&parser->m_tempPool);
1993 poolDestroy(&parser->m_temp2Pool);
1994 FREE(parser, (void *)parser->m_protocolEncodingName);
1995 #ifdef XML_DTD
1996 /* external parameter entity parsers share the DTD structure
1997 parser->m_dtd with the root parser, so we must not destroy it
1998 */
1999 if (! parser->m_isParamEntity && parser->m_dtd)
2000 #else
2001 if (parser->m_dtd)
2002 #endif /* XML_DTD */
2003 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser);
2004 FREE(parser, parser->m_atts);
2005 #ifdef XML_ATTR_INFO
2006 FREE(parser, parser->m_attInfo);
2007 #endif
2008 FREE(parser, parser->m_groupConnector);
2009 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2010 // is not being allocated with MALLOC(..) but with plain
2011 // .malloc_fcn(..).
2012 parser->m_mem.free_fcn(parser->m_buffer);
2013 FREE(parser, parser->m_dataBuf);
2014 FREE(parser, parser->m_nsAtts);
2015 FREE(parser, parser->m_unknownEncodingMem);
2016 if (parser->m_unknownEncodingRelease)
2017 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
2018 FREE(parser, parser);
2019 }
2020
2021 void XMLCALL
2022 XML_UseParserAsHandlerArg(XML_Parser parser) {
2023 if (parser != NULL)
2024 parser->m_handlerArg = parser;
2025 }
2026
2027 enum XML_Error XMLCALL
2028 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
2029 if (parser == NULL)
2030 return XML_ERROR_INVALID_ARGUMENT;
2031 #ifdef XML_DTD
2032 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2033 if (parserBusy(parser))
2034 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
2035 parser->m_useForeignDTD = useDTD;
2036 return XML_ERROR_NONE;
2037 #else
2038 UNUSED_P(useDTD);
2039 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
2040 #endif
2041 }
2042
2043 void XMLCALL
2044 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
2045 if (parser == NULL)
2046 return;
2047 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2048 if (parserBusy(parser))
2049 return;
2050 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
2051 }
2052
2053 void XMLCALL
2054 XML_SetUserData(XML_Parser parser, void *p) {
2055 if (parser == NULL)
2056 return;
2057 if (parser->m_handlerArg == parser->m_userData)
2058 parser->m_handlerArg = parser->m_userData = p;
2059 else
2060 parser->m_userData = p;
2061 }
2062
2063 enum XML_Status XMLCALL
2064 XML_SetBase(XML_Parser parser, const XML_Char *p) {
2065 if (parser == NULL)
2066 return XML_STATUS_ERROR;
2067 if (p) {
2068 p = poolCopyString(&parser->m_dtd->pool, p);
2069 if (! p)
2070 return XML_STATUS_ERROR;
2071 parser->m_curBase = p;
2072 } else
2073 parser->m_curBase = NULL;
2074 return XML_STATUS_OK;
2075 }
2076
2077 const XML_Char *XMLCALL
2078 XML_GetBase(XML_Parser parser) {
2079 if (parser == NULL)
2080 return NULL;
2081 return parser->m_curBase;
2082 }
2083
2084 int XMLCALL
2085 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
2086 if (parser == NULL)
2087 return -1;
2088 return parser->m_nSpecifiedAtts;
2089 }
2090
2091 int XMLCALL
2092 XML_GetIdAttributeIndex(XML_Parser parser) {
2093 if (parser == NULL)
2094 return -1;
2095 return parser->m_idAttIndex;
2096 }
2097
2098 #ifdef XML_ATTR_INFO
2099 const XML_AttrInfo *XMLCALL
2100 XML_GetAttributeInfo(XML_Parser parser) {
2101 if (parser == NULL)
2102 return NULL;
2103 return parser->m_attInfo;
2104 }
2105 #endif
2106
2107 void XMLCALL
2108 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
2109 XML_EndElementHandler end) {
2110 if (parser == NULL)
2111 return;
2112 parser->m_startElementHandler = start;
2113 parser->m_endElementHandler = end;
2114 }
2115
2116 void XMLCALL
2117 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
2118 if (parser != NULL)
2119 parser->m_startElementHandler = start;
2120 }
2121
2122 void XMLCALL
2123 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
2124 if (parser != NULL)
2125 parser->m_endElementHandler = end;
2126 }
2127
2128 void XMLCALL
2129 XML_SetCharacterDataHandler(XML_Parser parser,
2130 XML_CharacterDataHandler handler) {
2131 if (parser != NULL)
2132 parser->m_characterDataHandler = handler;
2133 }
2134
2135 void XMLCALL
2136 XML_SetProcessingInstructionHandler(XML_Parser parser,
2137 XML_ProcessingInstructionHandler handler) {
2138 if (parser != NULL)
2139 parser->m_processingInstructionHandler = handler;
2140 }
2141
2142 void XMLCALL
2143 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
2144 if (parser != NULL)
2145 parser->m_commentHandler = handler;
2146 }
2147
2148 void XMLCALL
2149 XML_SetCdataSectionHandler(XML_Parser parser,
2150 XML_StartCdataSectionHandler start,
2151 XML_EndCdataSectionHandler end) {
2152 if (parser == NULL)
2153 return;
2154 parser->m_startCdataSectionHandler = start;
2155 parser->m_endCdataSectionHandler = end;
2156 }
2157
2158 void XMLCALL
2159 XML_SetStartCdataSectionHandler(XML_Parser parser,
2160 XML_StartCdataSectionHandler start) {
2161 if (parser != NULL)
2162 parser->m_startCdataSectionHandler = start;
2163 }
2164
2165 void XMLCALL
2166 XML_SetEndCdataSectionHandler(XML_Parser parser,
2167 XML_EndCdataSectionHandler end) {
2168 if (parser != NULL)
2169 parser->m_endCdataSectionHandler = end;
2170 }
2171
2172 void XMLCALL
2173 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
2174 if (parser == NULL)
2175 return;
2176 parser->m_defaultHandler = handler;
2177 parser->m_defaultExpandInternalEntities = XML_FALSE;
2178 }
2179
2180 void XMLCALL
2181 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
2182 if (parser == NULL)
2183 return;
2184 parser->m_defaultHandler = handler;
2185 parser->m_defaultExpandInternalEntities = XML_TRUE;
2186 }
2187
2188 void XMLCALL
2189 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
2190 XML_EndDoctypeDeclHandler end) {
2191 if (parser == NULL)
2192 return;
2193 parser->m_startDoctypeDeclHandler = start;
2194 parser->m_endDoctypeDeclHandler = end;
2195 }
2196
2197 void XMLCALL
2198 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
2199 XML_StartDoctypeDeclHandler start) {
2200 if (parser != NULL)
2201 parser->m_startDoctypeDeclHandler = start;
2202 }
2203
2204 void XMLCALL
2205 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
2206 if (parser != NULL)
2207 parser->m_endDoctypeDeclHandler = end;
2208 }
2209
2210 void XMLCALL
2211 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
2212 XML_UnparsedEntityDeclHandler handler) {
2213 if (parser != NULL)
2214 parser->m_unparsedEntityDeclHandler = handler;
2215 }
2216
2217 void XMLCALL
2218 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
2219 if (parser != NULL)
2220 parser->m_notationDeclHandler = handler;
2221 }
2222
2223 void XMLCALL
2224 XML_SetNamespaceDeclHandler(XML_Parser parser,
2225 XML_StartNamespaceDeclHandler start,
2226 XML_EndNamespaceDeclHandler end) {
2227 if (parser == NULL)
2228 return;
2229 parser->m_startNamespaceDeclHandler = start;
2230 parser->m_endNamespaceDeclHandler = end;
2231 }
2232
2233 void XMLCALL
2234 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
2235 XML_StartNamespaceDeclHandler start) {
2236 if (parser != NULL)
2237 parser->m_startNamespaceDeclHandler = start;
2238 }
2239
2240 void XMLCALL
2241 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
2242 XML_EndNamespaceDeclHandler end) {
2243 if (parser != NULL)
2244 parser->m_endNamespaceDeclHandler = end;
2245 }
2246
2247 void XMLCALL
2248 XML_SetNotStandaloneHandler(XML_Parser parser,
2249 XML_NotStandaloneHandler handler) {
2250 if (parser != NULL)
2251 parser->m_notStandaloneHandler = handler;
2252 }
2253
2254 void XMLCALL
2255 XML_SetExternalEntityRefHandler(XML_Parser parser,
2256 XML_ExternalEntityRefHandler handler) {
2257 if (parser != NULL)
2258 parser->m_externalEntityRefHandler = handler;
2259 }
2260
2261 void XMLCALL
2262 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
2263 if (parser == NULL)
2264 return;
2265 if (arg)
2266 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
2267 else
2268 parser->m_externalEntityRefHandlerArg = parser;
2269 }
2270
2271 void XMLCALL
2272 XML_SetSkippedEntityHandler(XML_Parser parser,
2273 XML_SkippedEntityHandler handler) {
2274 if (parser != NULL)
2275 parser->m_skippedEntityHandler = handler;
2276 }
2277
2278 void XMLCALL
2279 XML_SetUnknownEncodingHandler(XML_Parser parser,
2280 XML_UnknownEncodingHandler handler, void *data) {
2281 if (parser == NULL)
2282 return;
2283 parser->m_unknownEncodingHandler = handler;
2284 parser->m_unknownEncodingHandlerData = data;
2285 }
2286
2287 void XMLCALL
2288 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
2289 if (parser != NULL)
2290 parser->m_elementDeclHandler = eldecl;
2291 }
2292
2293 void XMLCALL
2294 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
2295 if (parser != NULL)
2296 parser->m_attlistDeclHandler = attdecl;
2297 }
2298
2299 void XMLCALL
2300 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
2301 if (parser != NULL)
2302 parser->m_entityDeclHandler = handler;
2303 }
2304
2305 void XMLCALL
2306 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
2307 if (parser != NULL)
2308 parser->m_xmlDeclHandler = handler;
2309 }
2310
2311 int XMLCALL
2312 XML_SetParamEntityParsing(XML_Parser parser,
2313 enum XML_ParamEntityParsing peParsing) {
2314 if (parser == NULL)
2315 return 0;
2316 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2317 if (parserBusy(parser))
2318 return 0;
2319 #ifdef XML_DTD
2320 parser->m_paramEntityParsing = peParsing;
2321 return 1;
2322 #else
2323 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2324 #endif
2325 }
2326
2327 int XMLCALL
2328 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2329 if (parser == NULL)
2330 return 0;
2331
2332 const XML_Parser rootParser = getRootParserOf(parser, NULL);
2333 assert(! rootParser->m_parentParser);
2334
2335 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2336 if (parserBusy(rootParser))
2337 return 0;
2338 rootParser->m_hash_secret_salt = hash_salt;
2339 return 1;
2340 }
2341
2342 enum XML_Status XMLCALL
2343 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2344 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2345 if (parser != NULL)
2346 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2347 return XML_STATUS_ERROR;
2348 }
2349 switch (parser->m_parsingStatus.parsing) {
2350 case XML_SUSPENDED:
2351 parser->m_errorCode = XML_ERROR_SUSPENDED;
2352 return XML_STATUS_ERROR;
2353 case XML_FINISHED:
2354 parser->m_errorCode = XML_ERROR_FINISHED;
2355 return XML_STATUS_ERROR;
2356 case XML_INITIALIZED:
2357 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2358 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2359 return XML_STATUS_ERROR;
2360 }
2361 /* fall through */
2362 default:
2363 parser->m_parsingStatus.parsing = XML_PARSING;
2364 }
2365
2366 #if XML_CONTEXT_BYTES == 0
2367 if (parser->m_bufferPtr == parser->m_bufferEnd) {
2368 const char *end;
2369 int nLeftOver;
2370 enum XML_Status result;
2371 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
2372 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2373 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2374 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2375 parser->m_processor = errorProcessor;
2376 return XML_STATUS_ERROR;
2377 }
2378 // though this isn't a buffer request, we assume that `len` is the app's
2379 // preferred buffer fill size, and therefore save it here.
2380 parser->m_lastBufferRequestSize = len;
2381 parser->m_parseEndByteIndex += len;
2382 parser->m_positionPtr = s;
2383 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2384
2385 parser->m_errorCode
2386 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2387
2388 if (parser->m_errorCode != XML_ERROR_NONE) {
2389 parser->m_eventEndPtr = parser->m_eventPtr;
2390 parser->m_processor = errorProcessor;
2391 return XML_STATUS_ERROR;
2392 } else {
2393 switch (parser->m_parsingStatus.parsing) {
2394 case XML_SUSPENDED:
2395 result = XML_STATUS_SUSPENDED;
2396 break;
2397 case XML_INITIALIZED:
2398 case XML_PARSING:
2399 if (isFinal) {
2400 parser->m_parsingStatus.parsing = XML_FINISHED;
2401 return XML_STATUS_OK;
2402 }
2403 /* fall through */
2404 default:
2405 result = XML_STATUS_OK;
2406 }
2407 }
2408
2409 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2410 &parser->m_position);
2411 nLeftOver = s + len - end;
2412 if (nLeftOver) {
2413 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2414 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2415 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2416 parser->m_parsingStatus.parsing = XML_PARSING;
2417 void *const temp = XML_GetBuffer(parser, nLeftOver);
2418 parser->m_parsingStatus.parsing = originalStatus;
2419 // GetBuffer may have overwritten this, but we want to remember what the
2420 // app requested, not how many bytes were left over after parsing.
2421 parser->m_lastBufferRequestSize = len;
2422 if (temp == NULL) {
2423 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2424 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2425 parser->m_processor = errorProcessor;
2426 return XML_STATUS_ERROR;
2427 }
2428 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2429 // don't have any data to preserve, and can copy straight into the start
2430 // of the buffer rather than the GetBuffer return pointer (which may be
2431 // pointing further into the allocated buffer).
2432 memcpy(parser->m_buffer, end, nLeftOver);
2433 }
2434 parser->m_bufferPtr = parser->m_buffer;
2435 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2436 parser->m_positionPtr = parser->m_bufferPtr;
2437 parser->m_parseEndPtr = parser->m_bufferEnd;
2438 parser->m_eventPtr = parser->m_bufferPtr;
2439 parser->m_eventEndPtr = parser->m_bufferPtr;
2440 return result;
2441 }
2442 #endif /* XML_CONTEXT_BYTES == 0 */
2443 void *buff = XML_GetBuffer(parser, len);
2444 if (buff == NULL)
2445 return XML_STATUS_ERROR;
2446 if (len > 0) {
2447 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2448 memcpy(buff, s, len);
2449 }
2450 return XML_ParseBuffer(parser, len, isFinal);
2451 }
2452
2453 enum XML_Status XMLCALL
2454 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2455 const char *start;
2456 enum XML_Status result = XML_STATUS_OK;
2457
2458 if (parser == NULL)
2459 return XML_STATUS_ERROR;
2460
2461 if (len < 0) {
2462 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2463 return XML_STATUS_ERROR;
2464 }
2465
2466 switch (parser->m_parsingStatus.parsing) {
2467 case XML_SUSPENDED:
2468 parser->m_errorCode = XML_ERROR_SUSPENDED;
2469 return XML_STATUS_ERROR;
2470 case XML_FINISHED:
2471 parser->m_errorCode = XML_ERROR_FINISHED;
2472 return XML_STATUS_ERROR;
2473 case XML_INITIALIZED:
2474 /* Has someone called XML_GetBuffer successfully before? */
2475 if (! parser->m_bufferPtr) {
2476 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2477 return XML_STATUS_ERROR;
2478 }
2479
2480 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2481 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2482 return XML_STATUS_ERROR;
2483 }
2484 /* fall through */
2485 default:
2486 parser->m_parsingStatus.parsing = XML_PARSING;
2487 }
2488
2489 start = parser->m_bufferPtr;
2490 parser->m_positionPtr = start;
2491 parser->m_bufferEnd += len;
2492 parser->m_parseEndPtr = parser->m_bufferEnd;
2493 parser->m_parseEndByteIndex += len;
2494 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2495
2496 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2497 &parser->m_bufferPtr);
2498
2499 if (parser->m_errorCode != XML_ERROR_NONE) {
2500 parser->m_eventEndPtr = parser->m_eventPtr;
2501 parser->m_processor = errorProcessor;
2502 return XML_STATUS_ERROR;
2503 } else {
2504 switch (parser->m_parsingStatus.parsing) {
2505 case XML_SUSPENDED:
2506 result = XML_STATUS_SUSPENDED;
2507 break;
2508 case XML_INITIALIZED:
2509 case XML_PARSING:
2510 if (isFinal) {
2511 parser->m_parsingStatus.parsing = XML_FINISHED;
2512 return result;
2513 }
2514 default:; /* should not happen */
2515 }
2516 }
2517
2518 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2519 parser->m_bufferPtr, &parser->m_position);
2520 parser->m_positionPtr = parser->m_bufferPtr;
2521 return result;
2522 }
2523
2524 void *XMLCALL
2525 XML_GetBuffer(XML_Parser parser, int len) {
2526 if (parser == NULL)
2527 return NULL;
2528 if (len < 0) {
2529 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2530 return NULL;
2531 }
2532 switch (parser->m_parsingStatus.parsing) {
2533 case XML_SUSPENDED:
2534 parser->m_errorCode = XML_ERROR_SUSPENDED;
2535 return NULL;
2536 case XML_FINISHED:
2537 parser->m_errorCode = XML_ERROR_FINISHED;
2538 return NULL;
2539 default:;
2540 }
2541
2542 // whether or not the request succeeds, `len` seems to be the app's preferred
2543 // buffer fill size; remember it.
2544 parser->m_lastBufferRequestSize = len;
2545 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2546 || parser->m_buffer == NULL) {
2547 #if XML_CONTEXT_BYTES > 0
2548 int keep;
2549 #endif /* XML_CONTEXT_BYTES > 0 */
2550 /* Do not invoke signed arithmetic overflow: */
2551 int neededSize = (int)((unsigned)len
2552 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2553 parser->m_bufferEnd, parser->m_bufferPtr));
2554 if (neededSize < 0) {
2555 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2556 return NULL;
2557 }
2558 #if XML_CONTEXT_BYTES > 0
2559 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2560 if (keep > XML_CONTEXT_BYTES)
2561 keep = XML_CONTEXT_BYTES;
2562 /* Detect and prevent integer overflow */
2563 if (keep > INT_MAX - neededSize) {
2564 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2565 return NULL;
2566 }
2567 neededSize += keep;
2568 #endif /* XML_CONTEXT_BYTES > 0 */
2569 if (parser->m_buffer && parser->m_bufferPtr
2570 && neededSize
2571 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2572 #if XML_CONTEXT_BYTES > 0
2573 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2574 int offset
2575 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2576 - keep;
2577 /* The buffer pointers cannot be NULL here; we have at least some bytes
2578 * in the buffer */
2579 memmove(parser->m_buffer, &parser->m_buffer[offset],
2580 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2581 parser->m_bufferEnd -= offset;
2582 parser->m_bufferPtr -= offset;
2583 }
2584 #else
2585 memmove(parser->m_buffer, parser->m_bufferPtr,
2586 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2587 parser->m_bufferEnd
2588 = parser->m_buffer
2589 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2590 parser->m_bufferPtr = parser->m_buffer;
2591 #endif /* XML_CONTEXT_BYTES > 0 */
2592 } else {
2593 char *newBuf;
2594 int bufferSize
2595 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2596 if (bufferSize == 0)
2597 bufferSize = INIT_BUFFER_SIZE;
2598 do {
2599 /* Do not invoke signed arithmetic overflow: */
2600 bufferSize = (int)(2U * (unsigned)bufferSize);
2601 } while (bufferSize < neededSize && bufferSize > 0);
2602 if (bufferSize <= 0) {
2603 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2604 return NULL;
2605 }
2606 // NOTE: We are avoiding MALLOC(..) here to leave limiting
2607 // the input size to the application using Expat.
2608 newBuf = parser->m_mem.malloc_fcn(bufferSize);
2609 if (newBuf == NULL) {
2610 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2611 return NULL;
2612 }
2613 parser->m_bufferLim = newBuf + bufferSize;
2614 #if XML_CONTEXT_BYTES > 0
2615 if (parser->m_bufferPtr) {
2616 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2617 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2618 + keep);
2619 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2620 // is not being allocated with MALLOC(..) but with plain
2621 // .malloc_fcn(..).
2622 parser->m_mem.free_fcn(parser->m_buffer);
2623 parser->m_buffer = newBuf;
2624 parser->m_bufferEnd
2625 = parser->m_buffer
2626 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2627 + keep;
2628 parser->m_bufferPtr = parser->m_buffer + keep;
2629 } else {
2630 /* This must be a brand new buffer with no data in it yet */
2631 parser->m_bufferEnd = newBuf;
2632 parser->m_bufferPtr = parser->m_buffer = newBuf;
2633 }
2634 #else
2635 if (parser->m_bufferPtr) {
2636 memcpy(newBuf, parser->m_bufferPtr,
2637 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2638 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2639 // is not being allocated with MALLOC(..) but with plain
2640 // .malloc_fcn(..).
2641 parser->m_mem.free_fcn(parser->m_buffer);
2642 parser->m_bufferEnd
2643 = newBuf
2644 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2645 } else {
2646 /* This must be a brand new buffer with no data in it yet */
2647 parser->m_bufferEnd = newBuf;
2648 }
2649 parser->m_bufferPtr = parser->m_buffer = newBuf;
2650 #endif /* XML_CONTEXT_BYTES > 0 */
2651 }
2652 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2653 parser->m_positionPtr = NULL;
2654 }
2655 return parser->m_bufferEnd;
2656 }
2657
2658 static void
2659 triggerReenter(XML_Parser parser) {
2660 parser->m_reenter = XML_TRUE;
2661 }
2662
2663 enum XML_Status XMLCALL
2664 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2665 if (parser == NULL)
2666 return XML_STATUS_ERROR;
2667 switch (parser->m_parsingStatus.parsing) {
2668 case XML_INITIALIZED:
2669 parser->m_errorCode = XML_ERROR_NOT_STARTED;
2670 return XML_STATUS_ERROR;
2671 case XML_SUSPENDED:
2672 if (resumable) {
2673 parser->m_errorCode = XML_ERROR_SUSPENDED;
2674 return XML_STATUS_ERROR;
2675 }
2676 parser->m_parsingStatus.parsing = XML_FINISHED;
2677 break;
2678 case XML_FINISHED:
2679 parser->m_errorCode = XML_ERROR_FINISHED;
2680 return XML_STATUS_ERROR;
2681 case XML_PARSING:
2682 if (resumable) {
2683 #ifdef XML_DTD
2684 if (parser->m_isParamEntity) {
2685 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2686 return XML_STATUS_ERROR;
2687 }
2688 #endif
2689 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2690 } else
2691 parser->m_parsingStatus.parsing = XML_FINISHED;
2692 break;
2693 default:
2694 assert(0);
2695 }
2696 return XML_STATUS_OK;
2697 }
2698
2699 enum XML_Status XMLCALL
2700 XML_ResumeParser(XML_Parser parser) {
2701 enum XML_Status result = XML_STATUS_OK;
2702
2703 if (parser == NULL)
2704 return XML_STATUS_ERROR;
2705 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2706 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2707 return XML_STATUS_ERROR;
2708 }
2709 parser->m_parsingStatus.parsing = XML_PARSING;
2710
2711 parser->m_errorCode = callProcessor(
2712 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2713
2714 if (parser->m_errorCode != XML_ERROR_NONE) {
2715 parser->m_eventEndPtr = parser->m_eventPtr;
2716 parser->m_processor = errorProcessor;
2717 return XML_STATUS_ERROR;
2718 } else {
2719 switch (parser->m_parsingStatus.parsing) {
2720 case XML_SUSPENDED:
2721 result = XML_STATUS_SUSPENDED;
2722 break;
2723 case XML_INITIALIZED:
2724 case XML_PARSING:
2725 if (parser->m_parsingStatus.finalBuffer) {
2726 parser->m_parsingStatus.parsing = XML_FINISHED;
2727 return result;
2728 }
2729 default:;
2730 }
2731 }
2732
2733 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2734 parser->m_bufferPtr, &parser->m_position);
2735 parser->m_positionPtr = parser->m_bufferPtr;
2736 return result;
2737 }
2738
2739 void XMLCALL
2740 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2741 if (parser == NULL)
2742 return;
2743 assert(status != NULL);
2744 *status = parser->m_parsingStatus;
2745 }
2746
2747 enum XML_Error XMLCALL
2748 XML_GetErrorCode(XML_Parser parser) {
2749 if (parser == NULL)
2750 return XML_ERROR_INVALID_ARGUMENT;
2751 return parser->m_errorCode;
2752 }
2753
2754 XML_Index XMLCALL
2755 XML_GetCurrentByteIndex(XML_Parser parser) {
2756 if (parser == NULL)
2757 return -1;
2758 if (parser->m_eventPtr)
2759 return (XML_Index)(parser->m_parseEndByteIndex
2760 - (parser->m_parseEndPtr - parser->m_eventPtr));
2761 return -1;
2762 }
2763
2764 int XMLCALL
2765 XML_GetCurrentByteCount(XML_Parser parser) {
2766 if (parser == NULL)
2767 return 0;
2768 if (parser->m_eventEndPtr && parser->m_eventPtr)
2769 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2770 return 0;
2771 }
2772
2773 const char *XMLCALL
2774 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2775 #if XML_CONTEXT_BYTES > 0
2776 if (parser == NULL)
2777 return NULL;
2778 if (parser->m_eventPtr && parser->m_buffer) {
2779 if (offset != NULL)
2780 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2781 if (size != NULL)
2782 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2783 return parser->m_buffer;
2784 }
2785 #else
2786 (void)parser;
2787 (void)offset;
2788 (void)size;
2789 #endif /* XML_CONTEXT_BYTES > 0 */
2790 return (const char *)0;
2791 }
2792
2793 XML_Size XMLCALL
2794 XML_GetCurrentLineNumber(XML_Parser parser) {
2795 if (parser == NULL)
2796 return 0;
2797 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2798 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2799 parser->m_eventPtr, &parser->m_position);
2800 parser->m_positionPtr = parser->m_eventPtr;
2801 }
2802 return parser->m_position.lineNumber + 1;
2803 }
2804
2805 XML_Size XMLCALL
2806 XML_GetCurrentColumnNumber(XML_Parser parser) {
2807 if (parser == NULL)
2808 return 0;
2809 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2810 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2811 parser->m_eventPtr, &parser->m_position);
2812 parser->m_positionPtr = parser->m_eventPtr;
2813 }
2814 return parser->m_position.columnNumber;
2815 }
2816
2817 void XMLCALL
2818 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2819 if (parser == NULL)
2820 return;
2821
2822 // NOTE: We are avoiding FREE(..) here because the content model
2823 // has been created using plain .malloc_fcn(..) rather than MALLOC(..).
2824 parser->m_mem.free_fcn(model);
2825 }
2826
2827 void *XMLCALL
2828 XML_MemMalloc(XML_Parser parser, size_t size) {
2829 if (parser == NULL)
2830 return NULL;
2831
2832 // NOTE: We are avoiding MALLOC(..) here to not include
2833 // user allocations with allocation tracking and limiting.
2834 return parser->m_mem.malloc_fcn(size);
2835 }
2836
2837 void *XMLCALL
2838 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2839 if (parser == NULL)
2840 return NULL;
2841
2842 // NOTE: We are avoiding REALLOC(..) here to not include
2843 // user allocations with allocation tracking and limiting.
2844 return parser->m_mem.realloc_fcn(ptr, size);
2845 }
2846
2847 void XMLCALL
2848 XML_MemFree(XML_Parser parser, void *ptr) {
2849 if (parser == NULL)
2850 return;
2851
2852 // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and
2853 // XML_MemRealloc are not using MALLOC(..) and REALLOC(..)
2854 // but plain .malloc_fcn(..) and .realloc_fcn(..), internally.
2855 parser->m_mem.free_fcn(ptr);
2856 }
2857
2858 void XMLCALL
2859 XML_DefaultCurrent(XML_Parser parser) {
2860 if (parser == NULL)
2861 return;
2862 if (parser->m_defaultHandler) {
2863 if (parser->m_openInternalEntities)
2864 reportDefault(parser, parser->m_internalEncoding,
2865 parser->m_openInternalEntities->internalEventPtr,
2866 parser->m_openInternalEntities->internalEventEndPtr);
2867 else
2868 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2869 parser->m_eventEndPtr);
2870 }
2871 }
2872
2873 const XML_LChar *XMLCALL
2874 XML_ErrorString(enum XML_Error code) {
2875 switch (code) {
2876 case XML_ERROR_NONE:
2877 return NULL;
2878 case XML_ERROR_NO_MEMORY:
2879 return XML_L("out of memory");
2880 case XML_ERROR_SYNTAX:
2881 return XML_L("syntax error");
2882 case XML_ERROR_NO_ELEMENTS:
2883 return XML_L("no element found");
2884 case XML_ERROR_INVALID_TOKEN:
2885 return XML_L("not well-formed (invalid token)");
2886 case XML_ERROR_UNCLOSED_TOKEN:
2887 return XML_L("unclosed token");
2888 case XML_ERROR_PARTIAL_CHAR:
2889 return XML_L("partial character");
2890 case XML_ERROR_TAG_MISMATCH:
2891 return XML_L("mismatched tag");
2892 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2893 return XML_L("duplicate attribute");
2894 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2895 return XML_L("junk after document element");
2896 case XML_ERROR_PARAM_ENTITY_REF:
2897 return XML_L("illegal parameter entity reference");
2898 case XML_ERROR_UNDEFINED_ENTITY:
2899 return XML_L("undefined entity");
2900 case XML_ERROR_RECURSIVE_ENTITY_REF:
2901 return XML_L("recursive entity reference");
2902 case XML_ERROR_ASYNC_ENTITY:
2903 return XML_L("asynchronous entity");
2904 case XML_ERROR_BAD_CHAR_REF:
2905 return XML_L("reference to invalid character number");
2906 case XML_ERROR_BINARY_ENTITY_REF:
2907 return XML_L("reference to binary entity");
2908 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2909 return XML_L("reference to external entity in attribute");
2910 case XML_ERROR_MISPLACED_XML_PI:
2911 return XML_L("XML or text declaration not at start of entity");
2912 case XML_ERROR_UNKNOWN_ENCODING:
2913 return XML_L("unknown encoding");
2914 case XML_ERROR_INCORRECT_ENCODING:
2915 return XML_L("encoding specified in XML declaration is incorrect");
2916 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2917 return XML_L("unclosed CDATA section");
2918 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2919 return XML_L("error in processing external entity reference");
2920 case XML_ERROR_NOT_STANDALONE:
2921 return XML_L("document is not standalone");
2922 case XML_ERROR_UNEXPECTED_STATE:
2923 return XML_L("unexpected parser state - please send a bug report");
2924 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2925 return XML_L("entity declared in parameter entity");
2926 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2927 return XML_L("requested feature requires XML_DTD support in Expat");
2928 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2929 return XML_L("cannot change setting once parsing has begun");
2930 /* Added in 1.95.7. */
2931 case XML_ERROR_UNBOUND_PREFIX:
2932 return XML_L("unbound prefix");
2933 /* Added in 1.95.8. */
2934 case XML_ERROR_UNDECLARING_PREFIX:
2935 return XML_L("must not undeclare prefix");
2936 case XML_ERROR_INCOMPLETE_PE:
2937 return XML_L("incomplete markup in parameter entity");
2938 case XML_ERROR_XML_DECL:
2939 return XML_L("XML declaration not well-formed");
2940 case XML_ERROR_TEXT_DECL:
2941 return XML_L("text declaration not well-formed");
2942 case XML_ERROR_PUBLICID:
2943 return XML_L("illegal character(s) in public id");
2944 case XML_ERROR_SUSPENDED:
2945 return XML_L("parser suspended");
2946 case XML_ERROR_NOT_SUSPENDED:
2947 return XML_L("parser not suspended");
2948 case XML_ERROR_ABORTED:
2949 return XML_L("parsing aborted");
2950 case XML_ERROR_FINISHED:
2951 return XML_L("parsing finished");
2952 case XML_ERROR_SUSPEND_PE:
2953 return XML_L("cannot suspend in external parameter entity");
2954 /* Added in 2.0.0. */
2955 case XML_ERROR_RESERVED_PREFIX_XML:
2956 return XML_L(
2957 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2958 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2959 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2960 case XML_ERROR_RESERVED_NAMESPACE_URI:
2961 return XML_L(
2962 "prefix must not be bound to one of the reserved namespace names");
2963 /* Added in 2.2.5. */
2964 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2965 return XML_L("invalid argument");
2966 /* Added in 2.3.0. */
2967 case XML_ERROR_NO_BUFFER:
2968 return XML_L(
2969 "a successful prior call to function XML_GetBuffer is required");
2970 /* Added in 2.4.0. */
2971 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2972 return XML_L(
2973 "limit on input amplification factor (from DTD and entities) breached");
2974 /* Added in 2.6.4. */
2975 case XML_ERROR_NOT_STARTED:
2976 return XML_L("parser not started");
2977 }
2978 return NULL;
2979 }
2980
2981 const XML_LChar *XMLCALL
2982 XML_ExpatVersion(void) {
2983 /* V1 is used to string-ize the version number. However, it would
2984 string-ize the actual version macro *names* unless we get them
2985 substituted before being passed to V1. CPP is defined to expand
2986 a macro, then rescan for more expansions. Thus, we use V2 to expand
2987 the version macros, then CPP will expand the resulting V1() macro
2988 with the correct numerals. */
2989 /* ### I'm assuming cpp is portable in this respect... */
2990
2991 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2992 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2993
2994 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2995
2996 #undef V1
2997 #undef V2
2998 }
2999
3000 XML_Expat_Version XMLCALL
3001 XML_ExpatVersionInfo(void) {
3002 XML_Expat_Version version;
3003
3004 version.major = XML_MAJOR_VERSION;
3005 version.minor = XML_MINOR_VERSION;
3006 version.micro = XML_MICRO_VERSION;
3007
3008 return version;
3009 }
3010
3011 const XML_Feature *XMLCALL
3012 XML_GetFeatureList(void) {
3013 static const XML_Feature features[] = {
3014 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
3015 sizeof(XML_Char)},
3016 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
3017 sizeof(XML_LChar)},
3018 #ifdef XML_UNICODE
3019 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
3020 #endif
3021 #ifdef XML_UNICODE_WCHAR_T
3022 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
3023 #endif
3024 #ifdef XML_DTD
3025 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
3026 #endif
3027 #if XML_CONTEXT_BYTES > 0
3028 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
3029 XML_CONTEXT_BYTES},
3030 #endif
3031 #ifdef XML_MIN_SIZE
3032 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
3033 #endif
3034 #ifdef XML_NS
3035 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
3036 #endif
3037 #ifdef XML_LARGE_SIZE
3038 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
3039 #endif
3040 #ifdef XML_ATTR_INFO
3041 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
3042 #endif
3043 #if XML_GE == 1
3044 /* Added in Expat 2.4.0 for XML_DTD defined and
3045 * added in Expat 2.6.0 for XML_GE == 1. */
3046 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
3047 XML_L("XML_BLAP_MAX_AMP"),
3048 (long int)
3049 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
3050 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
3051 XML_L("XML_BLAP_ACT_THRES"),
3052 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
3053 /* Added in Expat 2.6.0. */
3054 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
3055 /* Added in Expat 2.7.2. */
3056 {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT,
3057 XML_L("XML_AT_MAX_AMP"),
3058 (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT},
3059 {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT,
3060 XML_L("XML_AT_ACT_THRES"),
3061 (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT},
3062 #endif
3063 {XML_FEATURE_END, NULL, 0}};
3064
3065 return features;
3066 }
3067
3068 #if XML_GE == 1
3069 XML_Bool XMLCALL
3070 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
3071 XML_Parser parser, float maximumAmplificationFactor) {
3072 if ((parser == NULL) || (parser->m_parentParser != NULL)
3073 || isnan(maximumAmplificationFactor)
3074 || (maximumAmplificationFactor < 1.0f)) {
3075 return XML_FALSE;
3076 }
3077 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
3078 return XML_TRUE;
3079 }
3080
3081 XML_Bool XMLCALL
3082 XML_SetBillionLaughsAttackProtectionActivationThreshold(
3083 XML_Parser parser, unsigned long long activationThresholdBytes) {
3084 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3085 return XML_FALSE;
3086 }
3087 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
3088 return XML_TRUE;
3089 }
3090
3091 XML_Bool XMLCALL
3092 XML_SetAllocTrackerMaximumAmplification(XML_Parser parser,
3093 float maximumAmplificationFactor) {
3094 if ((parser == NULL) || (parser->m_parentParser != NULL)
3095 || isnan(maximumAmplificationFactor)
3096 || (maximumAmplificationFactor < 1.0f)) {
3097 return XML_FALSE;
3098 }
3099 parser->m_alloc_tracker.maximumAmplificationFactor
3100 = maximumAmplificationFactor;
3101 return XML_TRUE;
3102 }
3103
3104 XML_Bool XMLCALL
3105 XML_SetAllocTrackerActivationThreshold(
3106 XML_Parser parser, unsigned long long activationThresholdBytes) {
3107 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3108 return XML_FALSE;
3109 }
3110 parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes;
3111 return XML_TRUE;
3112 }
3113 #endif /* XML_GE == 1 */
3114
3115 XML_Bool XMLCALL
3116 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
3117 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
3118 parser->m_reparseDeferralEnabled = enabled;
3119 return XML_TRUE;
3120 }
3121 return XML_FALSE;
3122 }
3123
3124 /* Initially tag->rawName always points into the parse buffer;
3125 for those TAG instances opened while the current parse buffer was
3126 processed, and not yet closed, we need to store tag->rawName in a more
3127 permanent location, since the parse buffer is about to be discarded.
3128 */
3129 static XML_Bool
3130 storeRawNames(XML_Parser parser) {
3131 TAG *tag = parser->m_tagStack;
3132 while (tag) {
3133 size_t bufSize;
3134 size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
3135 size_t rawNameLen;
3136 char *rawNameBuf = tag->buf.raw + nameLen;
3137 /* Stop if already stored. Since m_tagStack is a stack, we can stop
3138 at the first entry that has already been copied; everything
3139 below it in the stack is already been accounted for in a
3140 previous call to this function.
3141 */
3142 if (tag->rawName == rawNameBuf)
3143 break;
3144 /* For reuse purposes we need to ensure that the
3145 size of tag->buf is a multiple of sizeof(XML_Char).
3146 */
3147 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
3148 /* Detect and prevent integer overflow. */
3149 if (rawNameLen > (size_t)INT_MAX - nameLen)
3150 return XML_FALSE;
3151 bufSize = nameLen + rawNameLen;
3152 if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) {
3153 char *temp = REALLOC(parser, tag->buf.raw, bufSize);
3154 if (temp == NULL)
3155 return XML_FALSE;
3156 /* if tag->name.str points to tag->buf.str (only when namespace
3157 processing is off) then we have to update it
3158 */
3159 if (tag->name.str == tag->buf.str)
3160 tag->name.str = (XML_Char *)temp;
3161 /* if tag->name.localPart is set (when namespace processing is on)
3162 then update it as well, since it will always point into tag->buf
3163 */
3164 if (tag->name.localPart)
3165 tag->name.localPart
3166 = (XML_Char *)temp + (tag->name.localPart - tag->buf.str);
3167 tag->buf.raw = temp;
3168 tag->bufEnd = temp + bufSize;
3169 rawNameBuf = temp + nameLen;
3170 }
3171 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
3172 tag->rawName = rawNameBuf;
3173 tag = tag->parent;
3174 }
3175 return XML_TRUE;
3176 }
3177
3178 static enum XML_Error PTRCALL
3179 contentProcessor(XML_Parser parser, const char *start, const char *end,
3180 const char **endPtr) {
3181 enum XML_Error result = doContent(
3182 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
3183 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3184 XML_ACCOUNT_DIRECT);
3185 if (result == XML_ERROR_NONE) {
3186 if (! storeRawNames(parser))
3187 return XML_ERROR_NO_MEMORY;
3188 }
3189 return result;
3190 }
3191
3192 static enum XML_Error PTRCALL
3193 externalEntityInitProcessor(XML_Parser parser, const char *start,
3194 const char *end, const char **endPtr) {
3195 enum XML_Error result = initializeEncoding(parser);
3196 if (result != XML_ERROR_NONE)
3197 return result;
3198 parser->m_processor = externalEntityInitProcessor2;
3199 return externalEntityInitProcessor2(parser, start, end, endPtr);
3200 }
3201
3202 static enum XML_Error PTRCALL
3203 externalEntityInitProcessor2(XML_Parser parser, const char *start,
3204 const char *end, const char **endPtr) {
3205 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3206 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
3207 switch (tok) {
3208 case XML_TOK_BOM:
3209 #if XML_GE == 1
3210 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
3211 XML_ACCOUNT_DIRECT)) {
3212 accountingOnAbort(parser);
3213 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3214 }
3215 #endif /* XML_GE == 1 */
3216
3217 /* If we are at the end of the buffer, this would cause the next stage,
3218 i.e. externalEntityInitProcessor3, to pass control directly to
3219 doContent (by detecting XML_TOK_NONE) without processing any xml text
3220 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
3221 */
3222 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
3223 *endPtr = next;
3224 return XML_ERROR_NONE;
3225 }
3226 start = next;
3227 break;
3228 case XML_TOK_PARTIAL:
3229 if (! parser->m_parsingStatus.finalBuffer) {
3230 *endPtr = start;
3231 return XML_ERROR_NONE;
3232 }
3233 parser->m_eventPtr = start;
3234 return XML_ERROR_UNCLOSED_TOKEN;
3235 case XML_TOK_PARTIAL_CHAR:
3236 if (! parser->m_parsingStatus.finalBuffer) {
3237 *endPtr = start;
3238 return XML_ERROR_NONE;
3239 }
3240 parser->m_eventPtr = start;
3241 return XML_ERROR_PARTIAL_CHAR;
3242 }
3243 parser->m_processor = externalEntityInitProcessor3;
3244 return externalEntityInitProcessor3(parser, start, end, endPtr);
3245 }
3246
3247 static enum XML_Error PTRCALL
3248 externalEntityInitProcessor3(XML_Parser parser, const char *start,
3249 const char *end, const char **endPtr) {
3250 int tok;
3251 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3252 parser->m_eventPtr = start;
3253 tok = XmlContentTok(parser->m_encoding, start, end, &next);
3254 /* Note: These bytes are accounted later in:
3255 - processXmlDecl
3256 - externalEntityContentProcessor
3257 */
3258 parser->m_eventEndPtr = next;
3259
3260 switch (tok) {
3261 case XML_TOK_XML_DECL: {
3262 enum XML_Error result;
3263 result = processXmlDecl(parser, 1, start, next);
3264 if (result != XML_ERROR_NONE)
3265 return result;
3266 switch (parser->m_parsingStatus.parsing) {
3267 case XML_SUSPENDED:
3268 *endPtr = next;
3269 return XML_ERROR_NONE;
3270 case XML_FINISHED:
3271 return XML_ERROR_ABORTED;
3272 case XML_PARSING:
3273 if (parser->m_reenter) {
3274 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
3275 }
3276 /* Fall through */
3277 default:
3278 start = next;
3279 }
3280 } break;
3281 case XML_TOK_PARTIAL:
3282 if (! parser->m_parsingStatus.finalBuffer) {
3283 *endPtr = start;
3284 return XML_ERROR_NONE;
3285 }
3286 return XML_ERROR_UNCLOSED_TOKEN;
3287 case XML_TOK_PARTIAL_CHAR:
3288 if (! parser->m_parsingStatus.finalBuffer) {
3289 *endPtr = start;
3290 return XML_ERROR_NONE;
3291 }
3292 return XML_ERROR_PARTIAL_CHAR;
3293 }
3294 parser->m_processor = externalEntityContentProcessor;
3295 parser->m_tagLevel = 1;
3296 return externalEntityContentProcessor(parser, start, end, endPtr);
3297 }
3298
3299 static enum XML_Error PTRCALL
3300 externalEntityContentProcessor(XML_Parser parser, const char *start,
3301 const char *end, const char **endPtr) {
3302 enum XML_Error result
3303 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
3304 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3305 XML_ACCOUNT_ENTITY_EXPANSION);
3306 if (result == XML_ERROR_NONE) {
3307 if (! storeRawNames(parser))
3308 return XML_ERROR_NO_MEMORY;
3309 }
3310 return result;
3311 }
3312
3313 static enum XML_Error
3314 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
3315 const char *s, const char *end, const char **nextPtr,
3316 XML_Bool haveMore, enum XML_Account account) {
3317 /* save one level of indirection */
3318 DTD *const dtd = parser->m_dtd;
3319
3320 const char **eventPP;
3321 const char **eventEndPP;
3322 if (enc == parser->m_encoding) {
3323 eventPP = &parser->m_eventPtr;
3324 eventEndPP = &parser->m_eventEndPtr;
3325 } else {
3326 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3327 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3328 }
3329 *eventPP = s;
3330
3331 for (;;) {
3332 const char *next = s; /* XmlContentTok doesn't always set the last arg */
3333 int tok = XmlContentTok(enc, s, end, &next);
3334 #if XML_GE == 1
3335 const char *accountAfter
3336 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
3337 ? (haveMore ? s /* i.e. 0 bytes */ : end)
3338 : next;
3339 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
3340 account)) {
3341 accountingOnAbort(parser);
3342 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3343 }
3344 #endif
3345 *eventEndPP = next;
3346 switch (tok) {
3347 case XML_TOK_TRAILING_CR:
3348 if (haveMore) {
3349 *nextPtr = s;
3350 return XML_ERROR_NONE;
3351 }
3352 *eventEndPP = end;
3353 if (parser->m_characterDataHandler) {
3354 XML_Char c = 0xA;
3355 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3356 } else if (parser->m_defaultHandler)
3357 reportDefault(parser, enc, s, end);
3358 /* We are at the end of the final buffer, should we check for
3359 XML_SUSPENDED, XML_FINISHED?
3360 */
3361 if (startTagLevel == 0)
3362 return XML_ERROR_NO_ELEMENTS;
3363 if (parser->m_tagLevel != startTagLevel)
3364 return XML_ERROR_ASYNC_ENTITY;
3365 *nextPtr = end;
3366 return XML_ERROR_NONE;
3367 case XML_TOK_NONE:
3368 if (haveMore) {
3369 *nextPtr = s;
3370 return XML_ERROR_NONE;
3371 }
3372 if (startTagLevel > 0) {
3373 if (parser->m_tagLevel != startTagLevel)
3374 return XML_ERROR_ASYNC_ENTITY;
3375 *nextPtr = s;
3376 return XML_ERROR_NONE;
3377 }
3378 return XML_ERROR_NO_ELEMENTS;
3379 case XML_TOK_INVALID:
3380 *eventPP = next;
3381 return XML_ERROR_INVALID_TOKEN;
3382 case XML_TOK_PARTIAL:
3383 if (haveMore) {
3384 *nextPtr = s;
3385 return XML_ERROR_NONE;
3386 }
3387 return XML_ERROR_UNCLOSED_TOKEN;
3388 case XML_TOK_PARTIAL_CHAR:
3389 if (haveMore) {
3390 *nextPtr = s;
3391 return XML_ERROR_NONE;
3392 }
3393 return XML_ERROR_PARTIAL_CHAR;
3394 case XML_TOK_ENTITY_REF: {
3395 const XML_Char *name;
3396 ENTITY *entity;
3397 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3398 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3399 if (ch) {
3400 #if XML_GE == 1
3401 /* NOTE: We are replacing 4-6 characters original input for 1 character
3402 * so there is no amplification and hence recording without
3403 * protection. */
3404 accountingDiffTolerated(parser, tok, (char *)&ch,
3405 ((char *)&ch) + sizeof(XML_Char), __LINE__,
3406 XML_ACCOUNT_ENTITY_EXPANSION);
3407 #endif /* XML_GE == 1 */
3408 if (parser->m_characterDataHandler)
3409 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3410 else if (parser->m_defaultHandler)
3411 reportDefault(parser, enc, s, next);
3412 break;
3413 }
3414 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3415 next - enc->minBytesPerChar);
3416 if (! name)
3417 return XML_ERROR_NO_MEMORY;
3418 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3419 poolDiscard(&dtd->pool);
3420 /* First, determine if a check for an existing declaration is needed;
3421 if yes, check that the entity exists, and that it is internal,
3422 otherwise call the skipped entity or default handler.
3423 */
3424 if (! dtd->hasParamEntityRefs || dtd->standalone) {
3425 if (! entity)
3426 return XML_ERROR_UNDEFINED_ENTITY;
3427 else if (! entity->is_internal)
3428 return XML_ERROR_ENTITY_DECLARED_IN_PE;
3429 } else if (! entity) {
3430 if (parser->m_skippedEntityHandler)
3431 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3432 else if (parser->m_defaultHandler)
3433 reportDefault(parser, enc, s, next);
3434 break;
3435 }
3436 if (entity->open)
3437 return XML_ERROR_RECURSIVE_ENTITY_REF;
3438 if (entity->notation)
3439 return XML_ERROR_BINARY_ENTITY_REF;
3440 if (entity->textPtr) {
3441 enum XML_Error result;
3442 if (! parser->m_defaultExpandInternalEntities) {
3443 if (parser->m_skippedEntityHandler)
3444 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3445 0);
3446 else if (parser->m_defaultHandler)
3447 reportDefault(parser, enc, s, next);
3448 break;
3449 }
3450 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3451 if (result != XML_ERROR_NONE)
3452 return result;
3453 } else if (parser->m_externalEntityRefHandler) {
3454 const XML_Char *context;
3455 entity->open = XML_TRUE;
3456 context = getContext(parser);
3457 entity->open = XML_FALSE;
3458 if (! context)
3459 return XML_ERROR_NO_MEMORY;
3460 if (! parser->m_externalEntityRefHandler(
3461 parser->m_externalEntityRefHandlerArg, context, entity->base,
3462 entity->systemId, entity->publicId))
3463 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3464 poolDiscard(&parser->m_tempPool);
3465 } else if (parser->m_defaultHandler)
3466 reportDefault(parser, enc, s, next);
3467 break;
3468 }
3469 case XML_TOK_START_TAG_NO_ATTS:
3470 /* fall through */
3471 case XML_TOK_START_TAG_WITH_ATTS: {
3472 TAG *tag;
3473 enum XML_Error result;
3474 XML_Char *toPtr;
3475 if (parser->m_freeTagList) {
3476 tag = parser->m_freeTagList;
3477 parser->m_freeTagList = parser->m_freeTagList->parent;
3478 } else {
3479 tag = MALLOC(parser, sizeof(TAG));
3480 if (! tag)
3481 return XML_ERROR_NO_MEMORY;
3482 tag->buf.raw = MALLOC(parser, INIT_TAG_BUF_SIZE);
3483 if (! tag->buf.raw) {
3484 FREE(parser, tag);
3485 return XML_ERROR_NO_MEMORY;
3486 }
3487 tag->bufEnd = tag->buf.raw + INIT_TAG_BUF_SIZE;
3488 }
3489 tag->bindings = NULL;
3490 tag->parent = parser->m_tagStack;
3491 parser->m_tagStack = tag;
3492 tag->name.localPart = NULL;
3493 tag->name.prefix = NULL;
3494 tag->rawName = s + enc->minBytesPerChar;
3495 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3496 ++parser->m_tagLevel;
3497 {
3498 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3499 const char *fromPtr = tag->rawName;
3500 toPtr = tag->buf.str;
3501 for (;;) {
3502 int convLen;
3503 const enum XML_Convert_Result convert_res
3504 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3505 (ICHAR *)tag->bufEnd - 1);
3506 convLen = (int)(toPtr - tag->buf.str);
3507 if ((fromPtr >= rawNameEnd)
3508 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3509 tag->name.strLen = convLen;
3510 break;
3511 }
3512 if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf.raw))
3513 return XML_ERROR_NO_MEMORY;
3514 const size_t bufSize = (size_t)(tag->bufEnd - tag->buf.raw) * 2;
3515 {
3516 char *temp = REALLOC(parser, tag->buf.raw, bufSize);
3517 if (temp == NULL)
3518 return XML_ERROR_NO_MEMORY;
3519 tag->buf.raw = temp;
3520 tag->bufEnd = temp + bufSize;
3521 toPtr = (XML_Char *)temp + convLen;
3522 }
3523 }
3524 }
3525 tag->name.str = tag->buf.str;
3526 *toPtr = XML_T('\0');
3527 result
3528 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3529 if (result)
3530 return result;
3531 if (parser->m_startElementHandler)
3532 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3533 (const XML_Char **)parser->m_atts);
3534 else if (parser->m_defaultHandler)
3535 reportDefault(parser, enc, s, next);
3536 poolClear(&parser->m_tempPool);
3537 break;
3538 }
3539 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3540 /* fall through */
3541 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3542 const char *rawName = s + enc->minBytesPerChar;
3543 enum XML_Error result;
3544 BINDING *bindings = NULL;
3545 XML_Bool noElmHandlers = XML_TRUE;
3546 TAG_NAME name;
3547 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3548 rawName + XmlNameLength(enc, rawName));
3549 if (! name.str)
3550 return XML_ERROR_NO_MEMORY;
3551 poolFinish(&parser->m_tempPool);
3552 result = storeAtts(parser, enc, s, &name, &bindings,
3553 XML_ACCOUNT_NONE /* token spans whole start tag */);
3554 if (result != XML_ERROR_NONE) {
3555 freeBindings(parser, bindings);
3556 return result;
3557 }
3558 poolFinish(&parser->m_tempPool);
3559 if (parser->m_startElementHandler) {
3560 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3561 (const XML_Char **)parser->m_atts);
3562 noElmHandlers = XML_FALSE;
3563 }
3564 if (parser->m_endElementHandler) {
3565 if (parser->m_startElementHandler)
3566 *eventPP = *eventEndPP;
3567 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3568 noElmHandlers = XML_FALSE;
3569 }
3570 if (noElmHandlers && parser->m_defaultHandler)
3571 reportDefault(parser, enc, s, next);
3572 poolClear(&parser->m_tempPool);
3573 freeBindings(parser, bindings);
3574 }
3575 if ((parser->m_tagLevel == 0)
3576 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3577 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3578 || (parser->m_parsingStatus.parsing == XML_PARSING
3579 && parser->m_reenter))
3580 parser->m_processor = epilogProcessor;
3581 else
3582 return epilogProcessor(parser, next, end, nextPtr);
3583 }
3584 break;
3585 case XML_TOK_END_TAG:
3586 if (parser->m_tagLevel == startTagLevel)
3587 return XML_ERROR_ASYNC_ENTITY;
3588 else {
3589 int len;
3590 const char *rawName;
3591 TAG *tag = parser->m_tagStack;
3592 rawName = s + enc->minBytesPerChar * 2;
3593 len = XmlNameLength(enc, rawName);
3594 if (len != tag->rawNameLength
3595 || memcmp(tag->rawName, rawName, len) != 0) {
3596 *eventPP = rawName;
3597 return XML_ERROR_TAG_MISMATCH;
3598 }
3599 parser->m_tagStack = tag->parent;
3600 tag->parent = parser->m_freeTagList;
3601 parser->m_freeTagList = tag;
3602 --parser->m_tagLevel;
3603 if (parser->m_endElementHandler) {
3604 const XML_Char *localPart;
3605 const XML_Char *prefix;
3606 XML_Char *uri;
3607 localPart = tag->name.localPart;
3608 if (parser->m_ns && localPart) {
3609 /* localPart and prefix may have been overwritten in
3610 tag->name.str, since this points to the binding->uri
3611 buffer which gets reused; so we have to add them again
3612 */
3613 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3614 /* don't need to check for space - already done in storeAtts() */
3615 while (*localPart)
3616 *uri++ = *localPart++;
3617 prefix = tag->name.prefix;
3618 if (parser->m_ns_triplets && prefix) {
3619 *uri++ = parser->m_namespaceSeparator;
3620 while (*prefix)
3621 *uri++ = *prefix++;
3622 }
3623 *uri = XML_T('\0');
3624 }
3625 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3626 } else if (parser->m_defaultHandler)
3627 reportDefault(parser, enc, s, next);
3628 while (tag->bindings) {
3629 BINDING *b = tag->bindings;
3630 if (parser->m_endNamespaceDeclHandler)
3631 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3632 b->prefix->name);
3633 tag->bindings = tag->bindings->nextTagBinding;
3634 b->nextTagBinding = parser->m_freeBindingList;
3635 parser->m_freeBindingList = b;
3636 b->prefix->binding = b->prevPrefixBinding;
3637 }
3638 if ((parser->m_tagLevel == 0)
3639 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3640 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3641 || (parser->m_parsingStatus.parsing == XML_PARSING
3642 && parser->m_reenter))
3643 parser->m_processor = epilogProcessor;
3644 else
3645 return epilogProcessor(parser, next, end, nextPtr);
3646 }
3647 }
3648 break;
3649 case XML_TOK_CHAR_REF: {
3650 int n = XmlCharRefNumber(enc, s);
3651 if (n < 0)
3652 return XML_ERROR_BAD_CHAR_REF;
3653 if (parser->m_characterDataHandler) {
3654 XML_Char buf[XML_ENCODE_MAX];
3655 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3656 XmlEncode(n, (ICHAR *)buf));
3657 } else if (parser->m_defaultHandler)
3658 reportDefault(parser, enc, s, next);
3659 } break;
3660 case XML_TOK_XML_DECL:
3661 return XML_ERROR_MISPLACED_XML_PI;
3662 case XML_TOK_DATA_NEWLINE:
3663 if (parser->m_characterDataHandler) {
3664 XML_Char c = 0xA;
3665 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3666 } else if (parser->m_defaultHandler)
3667 reportDefault(parser, enc, s, next);
3668 break;
3669 case XML_TOK_CDATA_SECT_OPEN: {
3670 enum XML_Error result;
3671 if (parser->m_startCdataSectionHandler)
3672 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3673 /* BEGIN disabled code */
3674 /* Suppose you doing a transformation on a document that involves
3675 changing only the character data. You set up a defaultHandler
3676 and a characterDataHandler. The defaultHandler simply copies
3677 characters through. The characterDataHandler does the
3678 transformation and writes the characters out escaping them as
3679 necessary. This case will fail to work if we leave out the
3680 following two lines (because & and < inside CDATA sections will
3681 be incorrectly escaped).
3682
3683 However, now we have a start/endCdataSectionHandler, so it seems
3684 easier to let the user deal with this.
3685 */
3686 else if ((0) && parser->m_characterDataHandler)
3687 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3688 0);
3689 /* END disabled code */
3690 else if (parser->m_defaultHandler)
3691 reportDefault(parser, enc, s, next);
3692 result
3693 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3694 if (result != XML_ERROR_NONE)
3695 return result;
3696 else if (! next) {
3697 parser->m_processor = cdataSectionProcessor;
3698 return result;
3699 }
3700 } break;
3701 case XML_TOK_TRAILING_RSQB:
3702 if (haveMore) {
3703 *nextPtr = s;
3704 return XML_ERROR_NONE;
3705 }
3706 if (parser->m_characterDataHandler) {
3707 if (MUST_CONVERT(enc, s)) {
3708 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3709 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3710 parser->m_characterDataHandler(
3711 parser->m_handlerArg, parser->m_dataBuf,
3712 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3713 } else
3714 parser->m_characterDataHandler(
3715 parser->m_handlerArg, (const XML_Char *)s,
3716 (int)((const XML_Char *)end - (const XML_Char *)s));
3717 } else if (parser->m_defaultHandler)
3718 reportDefault(parser, enc, s, end);
3719 /* We are at the end of the final buffer, should we check for
3720 XML_SUSPENDED, XML_FINISHED?
3721 */
3722 if (startTagLevel == 0) {
3723 *eventPP = end;
3724 return XML_ERROR_NO_ELEMENTS;
3725 }
3726 if (parser->m_tagLevel != startTagLevel) {
3727 *eventPP = end;
3728 return XML_ERROR_ASYNC_ENTITY;
3729 }
3730 *nextPtr = end;
3731 return XML_ERROR_NONE;
3732 case XML_TOK_DATA_CHARS: {
3733 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3734 if (charDataHandler) {
3735 if (MUST_CONVERT(enc, s)) {
3736 for (;;) {
3737 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3738 const enum XML_Convert_Result convert_res = XmlConvert(
3739 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3740 *eventEndPP = s;
3741 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3742 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3743 if ((convert_res == XML_CONVERT_COMPLETED)
3744 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3745 break;
3746 *eventPP = s;
3747 }
3748 } else
3749 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3750 (int)((const XML_Char *)next - (const XML_Char *)s));
3751 } else if (parser->m_defaultHandler)
3752 reportDefault(parser, enc, s, next);
3753 } break;
3754 case XML_TOK_PI:
3755 if (! reportProcessingInstruction(parser, enc, s, next))
3756 return XML_ERROR_NO_MEMORY;
3757 break;
3758 case XML_TOK_COMMENT:
3759 if (! reportComment(parser, enc, s, next))
3760 return XML_ERROR_NO_MEMORY;
3761 break;
3762 default:
3763 /* All of the tokens produced by XmlContentTok() have their own
3764 * explicit cases, so this default is not strictly necessary.
3765 * However it is a useful safety net, so we retain the code and
3766 * simply exclude it from the coverage tests.
3767 *
3768 * LCOV_EXCL_START
3769 */
3770 if (parser->m_defaultHandler)
3771 reportDefault(parser, enc, s, next);
3772 break;
3773 /* LCOV_EXCL_STOP */
3774 }
3775 switch (parser->m_parsingStatus.parsing) {
3776 case XML_SUSPENDED:
3777 *eventPP = next;
3778 *nextPtr = next;
3779 return XML_ERROR_NONE;
3780 case XML_FINISHED:
3781 *eventPP = next;
3782 return XML_ERROR_ABORTED;
3783 case XML_PARSING:
3784 if (parser->m_reenter) {
3785 *nextPtr = next;
3786 return XML_ERROR_NONE;
3787 }
3788 /* Fall through */
3789 default:;
3790 *eventPP = s = next;
3791 }
3792 }
3793 /* not reached */
3794 }
3795
3796 /* This function does not call free() on the allocated memory, merely
3797 * moving it to the parser's m_freeBindingList where it can be freed or
3798 * reused as appropriate.
3799 */
3800 static void
3801 freeBindings(XML_Parser parser, BINDING *bindings) {
3802 while (bindings) {
3803 BINDING *b = bindings;
3804
3805 /* m_startNamespaceDeclHandler will have been called for this
3806 * binding in addBindings(), so call the end handler now.
3807 */
3808 if (parser->m_endNamespaceDeclHandler)
3809 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3810
3811 bindings = bindings->nextTagBinding;
3812 b->nextTagBinding = parser->m_freeBindingList;
3813 parser->m_freeBindingList = b;
3814 b->prefix->binding = b->prevPrefixBinding;
3815 }
3816 }
3817
3818 /* Precondition: all arguments must be non-NULL;
3819 Purpose:
3820 - normalize attributes
3821 - check attributes for well-formedness
3822 - generate namespace aware attribute names (URI, prefix)
3823 - build list of attributes for startElementHandler
3824 - default attributes
3825 - process namespace declarations (check and report them)
3826 - generate namespace aware element name (URI, prefix)
3827 */
3828 static enum XML_Error
3829 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3830 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3831 enum XML_Account account) {
3832 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3833 ELEMENT_TYPE *elementType;
3834 int nDefaultAtts;
3835 const XML_Char **appAtts; /* the attribute list for the application */
3836 int attIndex = 0;
3837 int prefixLen;
3838 int i;
3839 int n;
3840 XML_Char *uri;
3841 int nPrefixes = 0;
3842 BINDING *binding;
3843 const XML_Char *localPart;
3844
3845 /* lookup the element type name */
3846 elementType
3847 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3848 if (! elementType) {
3849 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3850 if (! name)
3851 return XML_ERROR_NO_MEMORY;
3852 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3853 sizeof(ELEMENT_TYPE));
3854 if (! elementType)
3855 return XML_ERROR_NO_MEMORY;
3856 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3857 return XML_ERROR_NO_MEMORY;
3858 }
3859 nDefaultAtts = elementType->nDefaultAtts;
3860
3861 /* get the attributes from the tokenizer */
3862 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3863
3864 /* Detect and prevent integer overflow */
3865 if (n > INT_MAX - nDefaultAtts) {
3866 return XML_ERROR_NO_MEMORY;
3867 }
3868
3869 if (n + nDefaultAtts > parser->m_attsSize) {
3870 int oldAttsSize = parser->m_attsSize;
3871 ATTRIBUTE *temp;
3872 #ifdef XML_ATTR_INFO
3873 XML_AttrInfo *temp2;
3874 #endif
3875
3876 /* Detect and prevent integer overflow */
3877 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3878 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3879 return XML_ERROR_NO_MEMORY;
3880 }
3881
3882 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3883
3884 /* Detect and prevent integer overflow.
3885 * The preprocessor guard addresses the "always false" warning
3886 * from -Wtype-limits on platforms where
3887 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3888 #if UINT_MAX >= SIZE_MAX
3889 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) {
3890 parser->m_attsSize = oldAttsSize;
3891 return XML_ERROR_NO_MEMORY;
3892 }
3893 #endif
3894
3895 temp = REALLOC(parser, parser->m_atts,
3896 parser->m_attsSize * sizeof(ATTRIBUTE));
3897 if (temp == NULL) {
3898 parser->m_attsSize = oldAttsSize;
3899 return XML_ERROR_NO_MEMORY;
3900 }
3901 parser->m_atts = temp;
3902 #ifdef XML_ATTR_INFO
3903 /* Detect and prevent integer overflow.
3904 * The preprocessor guard addresses the "always false" warning
3905 * from -Wtype-limits on platforms where
3906 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3907 # if UINT_MAX >= SIZE_MAX
3908 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) {
3909 parser->m_attsSize = oldAttsSize;
3910 return XML_ERROR_NO_MEMORY;
3911 }
3912 # endif
3913
3914 temp2 = REALLOC(parser, parser->m_attInfo,
3915 parser->m_attsSize * sizeof(XML_AttrInfo));
3916 if (temp2 == NULL) {
3917 parser->m_attsSize = oldAttsSize;
3918 return XML_ERROR_NO_MEMORY;
3919 }
3920 parser->m_attInfo = temp2;
3921 #endif
3922 if (n > oldAttsSize)
3923 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3924 }
3925
3926 appAtts = (const XML_Char **)parser->m_atts;
3927 for (i = 0; i < n; i++) {
3928 ATTRIBUTE *currAtt = &parser->m_atts[i];
3929 #ifdef XML_ATTR_INFO
3930 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3931 #endif
3932 /* add the name and value to the attribute list */
3933 ATTRIBUTE_ID *attId
3934 = getAttributeId(parser, enc, currAtt->name,
3935 currAtt->name + XmlNameLength(enc, currAtt->name));
3936 if (! attId)
3937 return XML_ERROR_NO_MEMORY;
3938 #ifdef XML_ATTR_INFO
3939 currAttInfo->nameStart
3940 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3941 currAttInfo->nameEnd
3942 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3943 currAttInfo->valueStart = parser->m_parseEndByteIndex
3944 - (parser->m_parseEndPtr - currAtt->valuePtr);
3945 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3946 - (parser->m_parseEndPtr - currAtt->valueEnd);
3947 #endif
3948 /* Detect duplicate attributes by their QNames. This does not work when
3949 namespace processing is turned on and different prefixes for the same
3950 namespace are used. For this case we have a check further down.
3951 */
3952 if ((attId->name)[-1]) {
3953 if (enc == parser->m_encoding)
3954 parser->m_eventPtr = parser->m_atts[i].name;
3955 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3956 }
3957 (attId->name)[-1] = 1;
3958 appAtts[attIndex++] = attId->name;
3959 if (! parser->m_atts[i].normalized) {
3960 enum XML_Error result;
3961 XML_Bool isCdata = XML_TRUE;
3962
3963 /* figure out whether declared as other than CDATA */
3964 if (attId->maybeTokenized) {
3965 int j;
3966 for (j = 0; j < nDefaultAtts; j++) {
3967 if (attId == elementType->defaultAtts[j].id) {
3968 isCdata = elementType->defaultAtts[j].isCdata;
3969 break;
3970 }
3971 }
3972 }
3973
3974 /* normalize the attribute value */
3975 result = storeAttributeValue(
3976 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3977 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3978 if (result)
3979 return result;
3980 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3981 poolFinish(&parser->m_tempPool);
3982 } else {
3983 /* the value did not need normalizing */
3984 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3985 parser->m_atts[i].valuePtr,
3986 parser->m_atts[i].valueEnd);
3987 if (appAtts[attIndex] == 0)
3988 return XML_ERROR_NO_MEMORY;
3989 poolFinish(&parser->m_tempPool);
3990 }
3991 /* handle prefixed attribute names */
3992 if (attId->prefix) {
3993 if (attId->xmlns) {
3994 /* deal with namespace declarations here */
3995 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3996 appAtts[attIndex], bindingsPtr);
3997 if (result)
3998 return result;
3999 --attIndex;
4000 } else {
4001 /* deal with other prefixed names later */
4002 attIndex++;
4003 nPrefixes++;
4004 (attId->name)[-1] = 2;
4005 }
4006 } else
4007 attIndex++;
4008 }
4009
4010 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
4011 parser->m_nSpecifiedAtts = attIndex;
4012 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
4013 for (i = 0; i < attIndex; i += 2)
4014 if (appAtts[i] == elementType->idAtt->name) {
4015 parser->m_idAttIndex = i;
4016 break;
4017 }
4018 } else
4019 parser->m_idAttIndex = -1;
4020
4021 /* do attribute defaulting */
4022 for (i = 0; i < nDefaultAtts; i++) {
4023 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
4024 if (! (da->id->name)[-1] && da->value) {
4025 if (da->id->prefix) {
4026 if (da->id->xmlns) {
4027 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
4028 da->value, bindingsPtr);
4029 if (result)
4030 return result;
4031 } else {
4032 (da->id->name)[-1] = 2;
4033 nPrefixes++;
4034 appAtts[attIndex++] = da->id->name;
4035 appAtts[attIndex++] = da->value;
4036 }
4037 } else {
4038 (da->id->name)[-1] = 1;
4039 appAtts[attIndex++] = da->id->name;
4040 appAtts[attIndex++] = da->value;
4041 }
4042 }
4043 }
4044 appAtts[attIndex] = 0;
4045
4046 /* expand prefixed attribute names, check for duplicates,
4047 and clear flags that say whether attributes were specified */
4048 i = 0;
4049 if (nPrefixes) {
4050 unsigned int j; /* hash table index */
4051 unsigned long version = parser->m_nsAttsVersion;
4052
4053 /* Detect and prevent invalid shift */
4054 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
4055 return XML_ERROR_NO_MEMORY;
4056 }
4057
4058 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
4059 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
4060 /* size of hash table must be at least 2 * (# of prefixed attributes) */
4061 if ((nPrefixes << 1)
4062 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
4063 NS_ATT *temp;
4064 /* hash table size must also be a power of 2 and >= 8 */
4065 while (nPrefixes >> parser->m_nsAttsPower++)
4066 ;
4067 if (parser->m_nsAttsPower < 3)
4068 parser->m_nsAttsPower = 3;
4069
4070 /* Detect and prevent invalid shift */
4071 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
4072 /* Restore actual size of memory in m_nsAtts */
4073 parser->m_nsAttsPower = oldNsAttsPower;
4074 return XML_ERROR_NO_MEMORY;
4075 }
4076
4077 nsAttsSize = 1u << parser->m_nsAttsPower;
4078
4079 /* Detect and prevent integer overflow.
4080 * The preprocessor guard addresses the "always false" warning
4081 * from -Wtype-limits on platforms where
4082 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4083 #if UINT_MAX >= SIZE_MAX
4084 if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) {
4085 /* Restore actual size of memory in m_nsAtts */
4086 parser->m_nsAttsPower = oldNsAttsPower;
4087 return XML_ERROR_NO_MEMORY;
4088 }
4089 #endif
4090
4091 temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
4092 if (! temp) {
4093 /* Restore actual size of memory in m_nsAtts */
4094 parser->m_nsAttsPower = oldNsAttsPower;
4095 return XML_ERROR_NO_MEMORY;
4096 }
4097 parser->m_nsAtts = temp;
4098 version = 0; /* force re-initialization of m_nsAtts hash table */
4099 }
4100 /* using a version flag saves us from initializing m_nsAtts every time */
4101 if (! version) { /* initialize version flags when version wraps around */
4102 version = INIT_ATTS_VERSION;
4103 for (j = nsAttsSize; j != 0;)
4104 parser->m_nsAtts[--j].version = version;
4105 }
4106 parser->m_nsAttsVersion = --version;
4107
4108 /* expand prefixed names and check for duplicates */
4109 for (; i < attIndex; i += 2) {
4110 const XML_Char *s = appAtts[i];
4111 if (s[-1] == 2) { /* prefixed */
4112 ATTRIBUTE_ID *id;
4113 const BINDING *b;
4114 unsigned long uriHash;
4115 struct siphash sip_state;
4116 struct sipkey sip_key;
4117
4118 copy_salt_to_sipkey(parser, &sip_key);
4119 sip24_init(&sip_state, &sip_key);
4120
4121 ((XML_Char *)s)[-1] = 0; /* clear flag */
4122 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
4123 if (! id || ! id->prefix) {
4124 /* This code is walking through the appAtts array, dealing
4125 * with (in this case) a prefixed attribute name. To be in
4126 * the array, the attribute must have already been bound, so
4127 * has to have passed through the hash table lookup once
4128 * already. That implies that an entry for it already
4129 * exists, so the lookup above will return a pointer to
4130 * already allocated memory. There is no opportunaity for
4131 * the allocator to fail, so the condition above cannot be
4132 * fulfilled.
4133 *
4134 * Since it is difficult to be certain that the above
4135 * analysis is complete, we retain the test and merely
4136 * remove the code from coverage tests.
4137 */
4138 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4139 }
4140 b = id->prefix->binding;
4141 if (! b)
4142 return XML_ERROR_UNBOUND_PREFIX;
4143
4144 for (j = 0; j < (unsigned int)b->uriLen; j++) {
4145 const XML_Char c = b->uri[j];
4146 if (! poolAppendChar(&parser->m_tempPool, c))
4147 return XML_ERROR_NO_MEMORY;
4148 }
4149
4150 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
4151
4152 while (*s++ != XML_T(ASCII_COLON))
4153 ;
4154
4155 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
4156
4157 do { /* copies null terminator */
4158 if (! poolAppendChar(&parser->m_tempPool, *s))
4159 return XML_ERROR_NO_MEMORY;
4160 } while (*s++);
4161
4162 uriHash = (unsigned long)sip24_final(&sip_state);
4163
4164 { /* Check hash table for duplicate of expanded name (uriName).
4165 Derived from code in lookup(parser, HASH_TABLE *table, ...).
4166 */
4167 unsigned char step = 0;
4168 unsigned long mask = nsAttsSize - 1;
4169 j = uriHash & mask; /* index into hash table */
4170 while (parser->m_nsAtts[j].version == version) {
4171 /* for speed we compare stored hash values first */
4172 if (uriHash == parser->m_nsAtts[j].hash) {
4173 const XML_Char *s1 = poolStart(&parser->m_tempPool);
4174 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
4175 /* s1 is null terminated, but not s2 */
4176 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
4177 ;
4178 if (*s1 == 0)
4179 return XML_ERROR_DUPLICATE_ATTRIBUTE;
4180 }
4181 if (! step)
4182 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
4183 j < step ? (j += nsAttsSize - step) : (j -= step);
4184 }
4185 }
4186
4187 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
4188 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
4189 s = b->prefix->name;
4190 do {
4191 if (! poolAppendChar(&parser->m_tempPool, *s))
4192 return XML_ERROR_NO_MEMORY;
4193 } while (*s++);
4194 }
4195
4196 /* store expanded name in attribute list */
4197 s = poolStart(&parser->m_tempPool);
4198 poolFinish(&parser->m_tempPool);
4199 appAtts[i] = s;
4200
4201 /* fill empty slot with new version, uriName and hash value */
4202 parser->m_nsAtts[j].version = version;
4203 parser->m_nsAtts[j].hash = uriHash;
4204 parser->m_nsAtts[j].uriName = s;
4205
4206 if (! --nPrefixes) {
4207 i += 2;
4208 break;
4209 }
4210 } else /* not prefixed */
4211 ((XML_Char *)s)[-1] = 0; /* clear flag */
4212 }
4213 }
4214 /* clear flags for the remaining attributes */
4215 for (; i < attIndex; i += 2)
4216 ((XML_Char *)(appAtts[i]))[-1] = 0;
4217 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
4218 binding->attId->name[-1] = 0;
4219
4220 if (! parser->m_ns)
4221 return XML_ERROR_NONE;
4222
4223 /* expand the element type name */
4224 if (elementType->prefix) {
4225 binding = elementType->prefix->binding;
4226 if (! binding)
4227 return XML_ERROR_UNBOUND_PREFIX;
4228 localPart = tagNamePtr->str;
4229 while (*localPart++ != XML_T(ASCII_COLON))
4230 ;
4231 } else if (dtd->defaultPrefix.binding) {
4232 binding = dtd->defaultPrefix.binding;
4233 localPart = tagNamePtr->str;
4234 } else
4235 return XML_ERROR_NONE;
4236 prefixLen = 0;
4237 if (parser->m_ns_triplets && binding->prefix->name) {
4238 while (binding->prefix->name[prefixLen++])
4239 ; /* prefixLen includes null terminator */
4240 }
4241 tagNamePtr->localPart = localPart;
4242 tagNamePtr->uriLen = binding->uriLen;
4243 tagNamePtr->prefix = binding->prefix->name;
4244 tagNamePtr->prefixLen = prefixLen;
4245 for (i = 0; localPart[i++];)
4246 ; /* i includes null terminator */
4247
4248 /* Detect and prevent integer overflow */
4249 if (binding->uriLen > INT_MAX - prefixLen
4250 || i > INT_MAX - (binding->uriLen + prefixLen)) {
4251 return XML_ERROR_NO_MEMORY;
4252 }
4253
4254 n = i + binding->uriLen + prefixLen;
4255 if (n > binding->uriAlloc) {
4256 TAG *p;
4257
4258 /* Detect and prevent integer overflow */
4259 if (n > INT_MAX - EXPAND_SPARE) {
4260 return XML_ERROR_NO_MEMORY;
4261 }
4262 /* Detect and prevent integer overflow.
4263 * The preprocessor guard addresses the "always false" warning
4264 * from -Wtype-limits on platforms where
4265 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4266 #if UINT_MAX >= SIZE_MAX
4267 if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4268 return XML_ERROR_NO_MEMORY;
4269 }
4270 #endif
4271
4272 uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
4273 if (! uri)
4274 return XML_ERROR_NO_MEMORY;
4275 binding->uriAlloc = n + EXPAND_SPARE;
4276 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
4277 for (p = parser->m_tagStack; p; p = p->parent)
4278 if (p->name.str == binding->uri)
4279 p->name.str = uri;
4280 FREE(parser, binding->uri);
4281 binding->uri = uri;
4282 }
4283 /* if m_namespaceSeparator != '\0' then uri includes it already */
4284 uri = binding->uri + binding->uriLen;
4285 memcpy(uri, localPart, i * sizeof(XML_Char));
4286 /* we always have a namespace separator between localPart and prefix */
4287 if (prefixLen) {
4288 uri += i - 1;
4289 *uri = parser->m_namespaceSeparator; /* replace null terminator */
4290 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
4291 }
4292 tagNamePtr->str = binding->uri;
4293 return XML_ERROR_NONE;
4294 }
4295
4296 static XML_Bool
4297 is_rfc3986_uri_char(XML_Char candidate) {
4298 // For the RFC 3986 ANBF grammar see
4299 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
4300
4301 switch (candidate) {
4302 // From rule "ALPHA" (uppercase half)
4303 case 'A':
4304 case 'B':
4305 case 'C':
4306 case 'D':
4307 case 'E':
4308 case 'F':
4309 case 'G':
4310 case 'H':
4311 case 'I':
4312 case 'J':
4313 case 'K':
4314 case 'L':
4315 case 'M':
4316 case 'N':
4317 case 'O':
4318 case 'P':
4319 case 'Q':
4320 case 'R':
4321 case 'S':
4322 case 'T':
4323 case 'U':
4324 case 'V':
4325 case 'W':
4326 case 'X':
4327 case 'Y':
4328 case 'Z':
4329
4330 // From rule "ALPHA" (lowercase half)
4331 case 'a':
4332 case 'b':
4333 case 'c':
4334 case 'd':
4335 case 'e':
4336 case 'f':
4337 case 'g':
4338 case 'h':
4339 case 'i':
4340 case 'j':
4341 case 'k':
4342 case 'l':
4343 case 'm':
4344 case 'n':
4345 case 'o':
4346 case 'p':
4347 case 'q':
4348 case 'r':
4349 case 's':
4350 case 't':
4351 case 'u':
4352 case 'v':
4353 case 'w':
4354 case 'x':
4355 case 'y':
4356 case 'z':
4357
4358 // From rule "DIGIT"
4359 case '0':
4360 case '1':
4361 case '2':
4362 case '3':
4363 case '4':
4364 case '5':
4365 case '6':
4366 case '7':
4367 case '8':
4368 case '9':
4369
4370 // From rule "pct-encoded"
4371 case '%':
4372
4373 // From rule "unreserved"
4374 case '-':
4375 case '.':
4376 case '_':
4377 case '~':
4378
4379 // From rule "gen-delims"
4380 case ':':
4381 case '/':
4382 case '?':
4383 case '#':
4384 case '[':
4385 case ']':
4386 case '@':
4387
4388 // From rule "sub-delims"
4389 case '!':
4390 case '$':
4391 case '&':
4392 case '\'':
4393 case '(':
4394 case ')':
4395 case '*':
4396 case '+':
4397 case ',':
4398 case ';':
4399 case '=':
4400 return XML_TRUE;
4401
4402 default:
4403 return XML_FALSE;
4404 }
4405 }
4406
4407 /* addBinding() overwrites the value of prefix->binding without checking.
4408 Therefore one must keep track of the old value outside of addBinding().
4409 */
4410 static enum XML_Error
4411 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4412 const XML_Char *uri, BINDING **bindingsPtr) {
4413 // "http://www.w3.org/XML/1998/namespace"
4414 static const XML_Char xmlNamespace[]
4415 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
4416 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
4417 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
4418 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
4419 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
4420 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
4421 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
4422 ASCII_e, '\0'};
4423 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4424 // "http://www.w3.org/2000/xmlns/"
4425 static const XML_Char xmlnsNamespace[]
4426 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
4427 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4428 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
4429 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
4430 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
4431 static const int xmlnsLen
4432 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4433
4434 XML_Bool mustBeXML = XML_FALSE;
4435 XML_Bool isXML = XML_TRUE;
4436 XML_Bool isXMLNS = XML_TRUE;
4437
4438 BINDING *b;
4439 int len;
4440
4441 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4442 if (*uri == XML_T('\0') && prefix->name)
4443 return XML_ERROR_UNDECLARING_PREFIX;
4444
4445 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4446 && prefix->name[1] == XML_T(ASCII_m)
4447 && prefix->name[2] == XML_T(ASCII_l)) {
4448 /* Not allowed to bind xmlns */
4449 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4450 && prefix->name[5] == XML_T('\0'))
4451 return XML_ERROR_RESERVED_PREFIX_XMLNS;
4452
4453 if (prefix->name[3] == XML_T('\0'))
4454 mustBeXML = XML_TRUE;
4455 }
4456
4457 for (len = 0; uri[len]; len++) {
4458 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4459 isXML = XML_FALSE;
4460
4461 if (! mustBeXML && isXMLNS
4462 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4463 isXMLNS = XML_FALSE;
4464
4465 // NOTE: While Expat does not validate namespace URIs against RFC 3986
4466 // today (and is not REQUIRED to do so with regard to the XML 1.0
4467 // namespaces specification) we have to at least make sure, that
4468 // the application on top of Expat (that is likely splitting expanded
4469 // element names ("qualified names") of form
4470 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4471 // in its element handler code) cannot be confused by an attacker
4472 // putting additional namespace separator characters into namespace
4473 // declarations. That would be ambiguous and not to be expected.
4474 //
4475 // While the HTML API docs of function XML_ParserCreateNS have been
4476 // advising against use of a namespace separator character that can
4477 // appear in a URI for >20 years now, some widespread applications
4478 // are using URI characters (':' (colon) in particular) for a
4479 // namespace separator, in practice. To keep these applications
4480 // functional, we only reject namespaces URIs containing the
4481 // application-chosen namespace separator if the chosen separator
4482 // is a non-URI character with regard to RFC 3986.
4483 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4484 && ! is_rfc3986_uri_char(uri[len])) {
4485 return XML_ERROR_SYNTAX;
4486 }
4487 }
4488 isXML = isXML && len == xmlLen;
4489 isXMLNS = isXMLNS && len == xmlnsLen;
4490
4491 if (mustBeXML != isXML)
4492 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4493 : XML_ERROR_RESERVED_NAMESPACE_URI;
4494
4495 if (isXMLNS)
4496 return XML_ERROR_RESERVED_NAMESPACE_URI;
4497
4498 if (parser->m_namespaceSeparator)
4499 len++;
4500 if (parser->m_freeBindingList) {
4501 b = parser->m_freeBindingList;
4502 if (len > b->uriAlloc) {
4503 /* Detect and prevent integer overflow */
4504 if (len > INT_MAX - EXPAND_SPARE) {
4505 return XML_ERROR_NO_MEMORY;
4506 }
4507
4508 /* Detect and prevent integer overflow.
4509 * The preprocessor guard addresses the "always false" warning
4510 * from -Wtype-limits on platforms where
4511 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4512 #if UINT_MAX >= SIZE_MAX
4513 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4514 return XML_ERROR_NO_MEMORY;
4515 }
4516 #endif
4517
4518 XML_Char *temp
4519 = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4520 if (temp == NULL)
4521 return XML_ERROR_NO_MEMORY;
4522 b->uri = temp;
4523 b->uriAlloc = len + EXPAND_SPARE;
4524 }
4525 parser->m_freeBindingList = b->nextTagBinding;
4526 } else {
4527 b = MALLOC(parser, sizeof(BINDING));
4528 if (! b)
4529 return XML_ERROR_NO_MEMORY;
4530
4531 /* Detect and prevent integer overflow */
4532 if (len > INT_MAX - EXPAND_SPARE) {
4533 return XML_ERROR_NO_MEMORY;
4534 }
4535 /* Detect and prevent integer overflow.
4536 * The preprocessor guard addresses the "always false" warning
4537 * from -Wtype-limits on platforms where
4538 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4539 #if UINT_MAX >= SIZE_MAX
4540 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) {
4541 return XML_ERROR_NO_MEMORY;
4542 }
4543 #endif
4544
4545 b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4546 if (! b->uri) {
4547 FREE(parser, b);
4548 return XML_ERROR_NO_MEMORY;
4549 }
4550 b->uriAlloc = len + EXPAND_SPARE;
4551 }
4552 b->uriLen = len;
4553 memcpy(b->uri, uri, len * sizeof(XML_Char));
4554 if (parser->m_namespaceSeparator)
4555 b->uri[len - 1] = parser->m_namespaceSeparator;
4556 b->prefix = prefix;
4557 b->attId = attId;
4558 b->prevPrefixBinding = prefix->binding;
4559 /* NULL binding when default namespace undeclared */
4560 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4561 prefix->binding = NULL;
4562 else
4563 prefix->binding = b;
4564 b->nextTagBinding = *bindingsPtr;
4565 *bindingsPtr = b;
4566 /* if attId == NULL then we are not starting a namespace scope */
4567 if (attId && parser->m_startNamespaceDeclHandler)
4568 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4569 prefix->binding ? uri : 0);
4570 return XML_ERROR_NONE;
4571 }
4572
4573 /* The idea here is to avoid using stack for each CDATA section when
4574 the whole file is parsed with one call.
4575 */
4576 static enum XML_Error PTRCALL
4577 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4578 const char **endPtr) {
4579 enum XML_Error result = doCdataSection(
4580 parser, parser->m_encoding, &start, end, endPtr,
4581 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4582 if (result != XML_ERROR_NONE)
4583 return result;
4584 if (start) {
4585 if (parser->m_parentParser) { /* we are parsing an external entity */
4586 parser->m_processor = externalEntityContentProcessor;
4587 return externalEntityContentProcessor(parser, start, end, endPtr);
4588 } else {
4589 parser->m_processor = contentProcessor;
4590 return contentProcessor(parser, start, end, endPtr);
4591 }
4592 }
4593 return result;
4594 }
4595
4596 /* startPtr gets set to non-null if the section is closed, and to null if
4597 the section is not yet closed.
4598 */
4599 static enum XML_Error
4600 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4601 const char *end, const char **nextPtr, XML_Bool haveMore,
4602 enum XML_Account account) {
4603 const char *s = *startPtr;
4604 const char **eventPP;
4605 const char **eventEndPP;
4606 if (enc == parser->m_encoding) {
4607 eventPP = &parser->m_eventPtr;
4608 *eventPP = s;
4609 eventEndPP = &parser->m_eventEndPtr;
4610 } else {
4611 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4612 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4613 }
4614 *eventPP = s;
4615 *startPtr = NULL;
4616
4617 for (;;) {
4618 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4619 int tok = XmlCdataSectionTok(enc, s, end, &next);
4620 #if XML_GE == 1
4621 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4622 accountingOnAbort(parser);
4623 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4624 }
4625 #else
4626 UNUSED_P(account);
4627 #endif
4628 *eventEndPP = next;
4629 switch (tok) {
4630 case XML_TOK_CDATA_SECT_CLOSE:
4631 if (parser->m_endCdataSectionHandler)
4632 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4633 /* BEGIN disabled code */
4634 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4635 else if ((0) && parser->m_characterDataHandler)
4636 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4637 0);
4638 /* END disabled code */
4639 else if (parser->m_defaultHandler)
4640 reportDefault(parser, enc, s, next);
4641 *startPtr = next;
4642 *nextPtr = next;
4643 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4644 return XML_ERROR_ABORTED;
4645 else
4646 return XML_ERROR_NONE;
4647 case XML_TOK_DATA_NEWLINE:
4648 if (parser->m_characterDataHandler) {
4649 XML_Char c = 0xA;
4650 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4651 } else if (parser->m_defaultHandler)
4652 reportDefault(parser, enc, s, next);
4653 break;
4654 case XML_TOK_DATA_CHARS: {
4655 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4656 if (charDataHandler) {
4657 if (MUST_CONVERT(enc, s)) {
4658 for (;;) {
4659 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4660 const enum XML_Convert_Result convert_res = XmlConvert(
4661 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4662 *eventEndPP = next;
4663 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4664 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4665 if ((convert_res == XML_CONVERT_COMPLETED)
4666 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4667 break;
4668 *eventPP = s;
4669 }
4670 } else
4671 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4672 (int)((const XML_Char *)next - (const XML_Char *)s));
4673 } else if (parser->m_defaultHandler)
4674 reportDefault(parser, enc, s, next);
4675 } break;
4676 case XML_TOK_INVALID:
4677 *eventPP = next;
4678 return XML_ERROR_INVALID_TOKEN;
4679 case XML_TOK_PARTIAL_CHAR:
4680 if (haveMore) {
4681 *nextPtr = s;
4682 return XML_ERROR_NONE;
4683 }
4684 return XML_ERROR_PARTIAL_CHAR;
4685 case XML_TOK_PARTIAL:
4686 case XML_TOK_NONE:
4687 if (haveMore) {
4688 *nextPtr = s;
4689 return XML_ERROR_NONE;
4690 }
4691 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4692 default:
4693 /* Every token returned by XmlCdataSectionTok() has its own
4694 * explicit case, so this default case will never be executed.
4695 * We retain it as a safety net and exclude it from the coverage
4696 * statistics.
4697 *
4698 * LCOV_EXCL_START
4699 */
4700 *eventPP = next;
4701 return XML_ERROR_UNEXPECTED_STATE;
4702 /* LCOV_EXCL_STOP */
4703 }
4704
4705 switch (parser->m_parsingStatus.parsing) {
4706 case XML_SUSPENDED:
4707 *eventPP = next;
4708 *nextPtr = next;
4709 return XML_ERROR_NONE;
4710 case XML_FINISHED:
4711 *eventPP = next;
4712 return XML_ERROR_ABORTED;
4713 case XML_PARSING:
4714 if (parser->m_reenter) {
4715 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4716 }
4717 /* Fall through */
4718 default:;
4719 *eventPP = s = next;
4720 }
4721 }
4722 /* not reached */
4723 }
4724
4725 #ifdef XML_DTD
4726
4727 /* The idea here is to avoid using stack for each IGNORE section when
4728 the whole file is parsed with one call.
4729 */
4730 static enum XML_Error PTRCALL
4731 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4732 const char **endPtr) {
4733 enum XML_Error result
4734 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4735 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4736 if (result != XML_ERROR_NONE)
4737 return result;
4738 if (start) {
4739 parser->m_processor = prologProcessor;
4740 return prologProcessor(parser, start, end, endPtr);
4741 }
4742 return result;
4743 }
4744
4745 /* startPtr gets set to non-null is the section is closed, and to null
4746 if the section is not yet closed.
4747 */
4748 static enum XML_Error
4749 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4750 const char *end, const char **nextPtr, XML_Bool haveMore) {
4751 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4752 int tok;
4753 const char *s = *startPtr;
4754 const char **eventPP;
4755 const char **eventEndPP;
4756 if (enc == parser->m_encoding) {
4757 eventPP = &parser->m_eventPtr;
4758 *eventPP = s;
4759 eventEndPP = &parser->m_eventEndPtr;
4760 } else {
4761 /* It's not entirely clear, but it seems the following two lines
4762 * of code cannot be executed. The only occasions on which 'enc'
4763 * is not 'encoding' are when this function is called
4764 * from the internal entity processing, and IGNORE sections are an
4765 * error in internal entities.
4766 *
4767 * Since it really isn't clear that this is true, we keep the code
4768 * and just remove it from our coverage tests.
4769 *
4770 * LCOV_EXCL_START
4771 */
4772 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4773 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4774 /* LCOV_EXCL_STOP */
4775 }
4776 *eventPP = s;
4777 *startPtr = NULL;
4778 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4779 # if XML_GE == 1
4780 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4781 XML_ACCOUNT_DIRECT)) {
4782 accountingOnAbort(parser);
4783 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4784 }
4785 # endif
4786 *eventEndPP = next;
4787 switch (tok) {
4788 case XML_TOK_IGNORE_SECT:
4789 if (parser->m_defaultHandler)
4790 reportDefault(parser, enc, s, next);
4791 *startPtr = next;
4792 *nextPtr = next;
4793 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4794 return XML_ERROR_ABORTED;
4795 else
4796 return XML_ERROR_NONE;
4797 case XML_TOK_INVALID:
4798 *eventPP = next;
4799 return XML_ERROR_INVALID_TOKEN;
4800 case XML_TOK_PARTIAL_CHAR:
4801 if (haveMore) {
4802 *nextPtr = s;
4803 return XML_ERROR_NONE;
4804 }
4805 return XML_ERROR_PARTIAL_CHAR;
4806 case XML_TOK_PARTIAL:
4807 case XML_TOK_NONE:
4808 if (haveMore) {
4809 *nextPtr = s;
4810 return XML_ERROR_NONE;
4811 }
4812 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4813 default:
4814 /* All of the tokens that XmlIgnoreSectionTok() returns have
4815 * explicit cases to handle them, so this default case is never
4816 * executed. We keep it as a safety net anyway, and remove it
4817 * from our test coverage statistics.
4818 *
4819 * LCOV_EXCL_START
4820 */
4821 *eventPP = next;
4822 return XML_ERROR_UNEXPECTED_STATE;
4823 /* LCOV_EXCL_STOP */
4824 }
4825 /* not reached */
4826 }
4827
4828 #endif /* XML_DTD */
4829
4830 static enum XML_Error
4831 initializeEncoding(XML_Parser parser) {
4832 const char *s;
4833 #ifdef XML_UNICODE
4834 char encodingBuf[128];
4835 /* See comments about `protocolEncodingName` in parserInit() */
4836 if (! parser->m_protocolEncodingName)
4837 s = NULL;
4838 else {
4839 int i;
4840 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4841 if (i == sizeof(encodingBuf) - 1
4842 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4843 encodingBuf[0] = '\0';
4844 break;
4845 }
4846 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4847 }
4848 encodingBuf[i] = '\0';
4849 s = encodingBuf;
4850 }
4851 #else
4852 s = parser->m_protocolEncodingName;
4853 #endif
4854 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4855 &parser->m_initEncoding, &parser->m_encoding, s))
4856 return XML_ERROR_NONE;
4857 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4858 }
4859
4860 static enum XML_Error
4861 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4862 const char *next) {
4863 const char *encodingName = NULL;
4864 const XML_Char *storedEncName = NULL;
4865 const ENCODING *newEncoding = NULL;
4866 const char *version = NULL;
4867 const char *versionend = NULL;
4868 const XML_Char *storedversion = NULL;
4869 int standalone = -1;
4870
4871 #if XML_GE == 1
4872 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4873 XML_ACCOUNT_DIRECT)) {
4874 accountingOnAbort(parser);
4875 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4876 }
4877 #endif
4878
4879 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4880 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4881 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4882 if (isGeneralTextEntity)
4883 return XML_ERROR_TEXT_DECL;
4884 else
4885 return XML_ERROR_XML_DECL;
4886 }
4887 if (! isGeneralTextEntity && standalone == 1) {
4888 parser->m_dtd->standalone = XML_TRUE;
4889 #ifdef XML_DTD
4890 if (parser->m_paramEntityParsing
4891 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4892 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4893 #endif /* XML_DTD */
4894 }
4895 if (parser->m_xmlDeclHandler) {
4896 if (encodingName != NULL) {
4897 storedEncName = poolStoreString(
4898 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4899 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4900 if (! storedEncName)
4901 return XML_ERROR_NO_MEMORY;
4902 poolFinish(&parser->m_temp2Pool);
4903 }
4904 if (version) {
4905 storedversion
4906 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4907 versionend - parser->m_encoding->minBytesPerChar);
4908 if (! storedversion)
4909 return XML_ERROR_NO_MEMORY;
4910 }
4911 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4912 standalone);
4913 } else if (parser->m_defaultHandler)
4914 reportDefault(parser, parser->m_encoding, s, next);
4915 if (parser->m_protocolEncodingName == NULL) {
4916 if (newEncoding) {
4917 /* Check that the specified encoding does not conflict with what
4918 * the parser has already deduced. Do we have the same number
4919 * of bytes in the smallest representation of a character? If
4920 * this is UTF-16, is it the same endianness?
4921 */
4922 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4923 || (newEncoding->minBytesPerChar == 2
4924 && newEncoding != parser->m_encoding)) {
4925 parser->m_eventPtr = encodingName;
4926 return XML_ERROR_INCORRECT_ENCODING;
4927 }
4928 parser->m_encoding = newEncoding;
4929 } else if (encodingName) {
4930 enum XML_Error result;
4931 if (! storedEncName) {
4932 storedEncName = poolStoreString(
4933 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4934 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4935 if (! storedEncName)
4936 return XML_ERROR_NO_MEMORY;
4937 }
4938 result = handleUnknownEncoding(parser, storedEncName);
4939 poolClear(&parser->m_temp2Pool);
4940 if (result == XML_ERROR_UNKNOWN_ENCODING)
4941 parser->m_eventPtr = encodingName;
4942 return result;
4943 }
4944 }
4945
4946 if (storedEncName || storedversion)
4947 poolClear(&parser->m_temp2Pool);
4948
4949 return XML_ERROR_NONE;
4950 }
4951
4952 static enum XML_Error
4953 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4954 if (parser->m_unknownEncodingHandler) {
4955 XML_Encoding info;
4956 int i;
4957 for (i = 0; i < 256; i++)
4958 info.map[i] = -1;
4959 info.convert = NULL;
4960 info.data = NULL;
4961 info.release = NULL;
4962 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4963 encodingName, &info)) {
4964 ENCODING *enc;
4965 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4966 if (! parser->m_unknownEncodingMem) {
4967 if (info.release)
4968 info.release(info.data);
4969 return XML_ERROR_NO_MEMORY;
4970 }
4971 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4972 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4973 if (enc) {
4974 parser->m_unknownEncodingData = info.data;
4975 parser->m_unknownEncodingRelease = info.release;
4976 parser->m_encoding = enc;
4977 return XML_ERROR_NONE;
4978 }
4979 }
4980 if (info.release != NULL)
4981 info.release(info.data);
4982 }
4983 return XML_ERROR_UNKNOWN_ENCODING;
4984 }
4985
4986 static enum XML_Error PTRCALL
4987 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4988 const char **nextPtr) {
4989 enum XML_Error result = initializeEncoding(parser);
4990 if (result != XML_ERROR_NONE)
4991 return result;
4992 parser->m_processor = prologProcessor;
4993 return prologProcessor(parser, s, end, nextPtr);
4994 }
4995
4996 #ifdef XML_DTD
4997
4998 static enum XML_Error PTRCALL
4999 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
5000 const char **nextPtr) {
5001 enum XML_Error result = initializeEncoding(parser);
5002 if (result != XML_ERROR_NONE)
5003 return result;
5004
5005 /* we know now that XML_Parse(Buffer) has been called,
5006 so we consider the external parameter entity read */
5007 parser->m_dtd->paramEntityRead = XML_TRUE;
5008
5009 if (parser->m_prologState.inEntityValue) {
5010 parser->m_processor = entityValueInitProcessor;
5011 return entityValueInitProcessor(parser, s, end, nextPtr);
5012 } else {
5013 parser->m_processor = externalParEntProcessor;
5014 return externalParEntProcessor(parser, s, end, nextPtr);
5015 }
5016 }
5017
5018 static enum XML_Error PTRCALL
5019 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
5020 const char **nextPtr) {
5021 int tok;
5022 const char *start = s;
5023 const char *next = start;
5024 parser->m_eventPtr = start;
5025
5026 for (;;) {
5027 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
5028 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
5029 - storeEntityValue
5030 - processXmlDecl
5031 */
5032 parser->m_eventEndPtr = next;
5033 if (tok <= 0) {
5034 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5035 *nextPtr = s;
5036 return XML_ERROR_NONE;
5037 }
5038 switch (tok) {
5039 case XML_TOK_INVALID:
5040 return XML_ERROR_INVALID_TOKEN;
5041 case XML_TOK_PARTIAL:
5042 return XML_ERROR_UNCLOSED_TOKEN;
5043 case XML_TOK_PARTIAL_CHAR:
5044 return XML_ERROR_PARTIAL_CHAR;
5045 case XML_TOK_NONE: /* start == end */
5046 default:
5047 break;
5048 }
5049 /* found end of entity value - can store it now */
5050 return storeEntityValue(parser, parser->m_encoding, s, end,
5051 XML_ACCOUNT_DIRECT, NULL);
5052 } else if (tok == XML_TOK_XML_DECL) {
5053 enum XML_Error result;
5054 result = processXmlDecl(parser, 0, start, next);
5055 if (result != XML_ERROR_NONE)
5056 return result;
5057 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
5058 * that to happen, a parameter entity parsing handler must have attempted
5059 * to suspend the parser, which fails and raises an error. The parser can
5060 * be aborted, but can't be suspended.
5061 */
5062 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5063 return XML_ERROR_ABORTED;
5064 *nextPtr = next;
5065 /* stop scanning for text declaration - we found one */
5066 parser->m_processor = entityValueProcessor;
5067 return entityValueProcessor(parser, next, end, nextPtr);
5068 }
5069 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
5070 must move s and nextPtr forward to consume the BOM.
5071
5072 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
5073 would leave the BOM in the buffer and return. On the next call to this
5074 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
5075 is not valid to have multiple BOMs.
5076 */
5077 else if (tok == XML_TOK_BOM) {
5078 # if XML_GE == 1
5079 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5080 XML_ACCOUNT_DIRECT)) {
5081 accountingOnAbort(parser);
5082 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5083 }
5084 # endif
5085
5086 *nextPtr = next;
5087 s = next;
5088 }
5089 /* If we get this token, we have the start of what might be a
5090 normal tag, but not a declaration (i.e. it doesn't begin with
5091 "<!" or "<?"). In a DTD context, that isn't legal.
5092 */
5093 else if (tok == XML_TOK_INSTANCE_START) {
5094 *nextPtr = next;
5095 return XML_ERROR_SYNTAX;
5096 }
5097 start = next;
5098 parser->m_eventPtr = start;
5099 }
5100 }
5101
5102 static enum XML_Error PTRCALL
5103 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
5104 const char **nextPtr) {
5105 const char *next = s;
5106 int tok;
5107
5108 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5109 if (tok <= 0) {
5110 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5111 *nextPtr = s;
5112 return XML_ERROR_NONE;
5113 }
5114 switch (tok) {
5115 case XML_TOK_INVALID:
5116 return XML_ERROR_INVALID_TOKEN;
5117 case XML_TOK_PARTIAL:
5118 return XML_ERROR_UNCLOSED_TOKEN;
5119 case XML_TOK_PARTIAL_CHAR:
5120 return XML_ERROR_PARTIAL_CHAR;
5121 case XML_TOK_NONE: /* start == end */
5122 default:
5123 break;
5124 }
5125 }
5126 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
5127 However, when parsing an external subset, doProlog will not accept a BOM
5128 as valid, and report a syntax error, so we have to skip the BOM, and
5129 account for the BOM bytes.
5130 */
5131 else if (tok == XML_TOK_BOM) {
5132 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5133 XML_ACCOUNT_DIRECT)) {
5134 accountingOnAbort(parser);
5135 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5136 }
5137
5138 s = next;
5139 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5140 }
5141
5142 parser->m_processor = prologProcessor;
5143 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5144 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5145 XML_ACCOUNT_DIRECT);
5146 }
5147
5148 static enum XML_Error PTRCALL
5149 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
5150 const char **nextPtr) {
5151 const char *start = s;
5152 const char *next = s;
5153 const ENCODING *enc = parser->m_encoding;
5154 int tok;
5155
5156 for (;;) {
5157 tok = XmlPrologTok(enc, start, end, &next);
5158 /* Note: These bytes are accounted later in:
5159 - storeEntityValue
5160 */
5161 if (tok <= 0) {
5162 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5163 *nextPtr = s;
5164 return XML_ERROR_NONE;
5165 }
5166 switch (tok) {
5167 case XML_TOK_INVALID:
5168 return XML_ERROR_INVALID_TOKEN;
5169 case XML_TOK_PARTIAL:
5170 return XML_ERROR_UNCLOSED_TOKEN;
5171 case XML_TOK_PARTIAL_CHAR:
5172 return XML_ERROR_PARTIAL_CHAR;
5173 case XML_TOK_NONE: /* start == end */
5174 default:
5175 break;
5176 }
5177 /* found end of entity value - can store it now */
5178 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
5179 }
5180 /* If we get this token, we have the start of what might be a
5181 normal tag, but not a declaration (i.e. it doesn't begin with
5182 "<!" or "<?"). In a DTD context, that isn't legal.
5183 */
5184 else if (tok == XML_TOK_INSTANCE_START) {
5185 *nextPtr = next;
5186 return XML_ERROR_SYNTAX;
5187 }
5188
5189 start = next;
5190 }
5191 }
5192
5193 #endif /* XML_DTD */
5194
5195 static enum XML_Error PTRCALL
5196 prologProcessor(XML_Parser parser, const char *s, const char *end,
5197 const char **nextPtr) {
5198 const char *next = s;
5199 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5200 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5201 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5202 XML_ACCOUNT_DIRECT);
5203 }
5204
5205 static enum XML_Error
5206 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
5207 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
5208 XML_Bool allowClosingDoctype, enum XML_Account account) {
5209 #ifdef XML_DTD
5210 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
5211 #endif /* XML_DTD */
5212 static const XML_Char atypeCDATA[]
5213 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
5214 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
5215 static const XML_Char atypeIDREF[]
5216 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
5217 static const XML_Char atypeIDREFS[]
5218 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
5219 static const XML_Char atypeENTITY[]
5220 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
5221 static const XML_Char atypeENTITIES[]
5222 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
5223 ASCII_I, ASCII_E, ASCII_S, '\0'};
5224 static const XML_Char atypeNMTOKEN[]
5225 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
5226 static const XML_Char atypeNMTOKENS[]
5227 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
5228 ASCII_E, ASCII_N, ASCII_S, '\0'};
5229 static const XML_Char notationPrefix[]
5230 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
5231 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
5232 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
5233 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
5234
5235 #ifndef XML_DTD
5236 UNUSED_P(account);
5237 #endif
5238
5239 /* save one level of indirection */
5240 DTD *const dtd = parser->m_dtd;
5241
5242 const char **eventPP;
5243 const char **eventEndPP;
5244 enum XML_Content_Quant quant;
5245
5246 if (enc == parser->m_encoding) {
5247 eventPP = &parser->m_eventPtr;
5248 eventEndPP = &parser->m_eventEndPtr;
5249 } else {
5250 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5251 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5252 }
5253
5254 for (;;) {
5255 int role;
5256 XML_Bool handleDefault = XML_TRUE;
5257 *eventPP = s;
5258 *eventEndPP = next;
5259 if (tok <= 0) {
5260 if (haveMore && tok != XML_TOK_INVALID) {
5261 *nextPtr = s;
5262 return XML_ERROR_NONE;
5263 }
5264 switch (tok) {
5265 case XML_TOK_INVALID:
5266 *eventPP = next;
5267 return XML_ERROR_INVALID_TOKEN;
5268 case XML_TOK_PARTIAL:
5269 return XML_ERROR_UNCLOSED_TOKEN;
5270 case XML_TOK_PARTIAL_CHAR:
5271 return XML_ERROR_PARTIAL_CHAR;
5272 case -XML_TOK_PROLOG_S:
5273 tok = -tok;
5274 break;
5275 case XML_TOK_NONE:
5276 #ifdef XML_DTD
5277 /* for internal PE NOT referenced between declarations */
5278 if (enc != parser->m_encoding
5279 && ! parser->m_openInternalEntities->betweenDecl) {
5280 *nextPtr = s;
5281 return XML_ERROR_NONE;
5282 }
5283 /* WFC: PE Between Declarations - must check that PE contains
5284 complete markup, not only for external PEs, but also for
5285 internal PEs if the reference occurs between declarations.
5286 */
5287 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5288 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
5289 == XML_ROLE_ERROR)
5290 return XML_ERROR_INCOMPLETE_PE;
5291 *nextPtr = s;
5292 return XML_ERROR_NONE;
5293 }
5294 #endif /* XML_DTD */
5295 return XML_ERROR_NO_ELEMENTS;
5296 default:
5297 tok = -tok;
5298 next = end;
5299 break;
5300 }
5301 }
5302 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
5303 #if XML_GE == 1
5304 switch (role) {
5305 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
5306 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
5307 # ifdef XML_DTD
5308 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
5309 # endif
5310 break;
5311 default:
5312 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
5313 accountingOnAbort(parser);
5314 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5315 }
5316 }
5317 #endif
5318 switch (role) {
5319 case XML_ROLE_XML_DECL: {
5320 enum XML_Error result = processXmlDecl(parser, 0, s, next);
5321 if (result != XML_ERROR_NONE)
5322 return result;
5323 enc = parser->m_encoding;
5324 handleDefault = XML_FALSE;
5325 } break;
5326 case XML_ROLE_DOCTYPE_NAME:
5327 if (parser->m_startDoctypeDeclHandler) {
5328 parser->m_doctypeName
5329 = poolStoreString(&parser->m_tempPool, enc, s, next);
5330 if (! parser->m_doctypeName)
5331 return XML_ERROR_NO_MEMORY;
5332 poolFinish(&parser->m_tempPool);
5333 parser->m_doctypePubid = NULL;
5334 handleDefault = XML_FALSE;
5335 }
5336 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
5337 break;
5338 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
5339 if (parser->m_startDoctypeDeclHandler) {
5340 parser->m_startDoctypeDeclHandler(
5341 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5342 parser->m_doctypePubid, 1);
5343 parser->m_doctypeName = NULL;
5344 poolClear(&parser->m_tempPool);
5345 handleDefault = XML_FALSE;
5346 }
5347 break;
5348 #ifdef XML_DTD
5349 case XML_ROLE_TEXT_DECL: {
5350 enum XML_Error result = processXmlDecl(parser, 1, s, next);
5351 if (result != XML_ERROR_NONE)
5352 return result;
5353 enc = parser->m_encoding;
5354 handleDefault = XML_FALSE;
5355 } break;
5356 #endif /* XML_DTD */
5357 case XML_ROLE_DOCTYPE_PUBLIC_ID:
5358 #ifdef XML_DTD
5359 parser->m_useForeignDTD = XML_FALSE;
5360 parser->m_declEntity = (ENTITY *)lookup(
5361 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5362 if (! parser->m_declEntity)
5363 return XML_ERROR_NO_MEMORY;
5364 #endif /* XML_DTD */
5365 dtd->hasParamEntityRefs = XML_TRUE;
5366 if (parser->m_startDoctypeDeclHandler) {
5367 XML_Char *pubId;
5368 if (! XmlIsPublicId(enc, s, next, eventPP))
5369 return XML_ERROR_PUBLICID;
5370 pubId = poolStoreString(&parser->m_tempPool, enc,
5371 s + enc->minBytesPerChar,
5372 next - enc->minBytesPerChar);
5373 if (! pubId)
5374 return XML_ERROR_NO_MEMORY;
5375 normalizePublicId(pubId);
5376 poolFinish(&parser->m_tempPool);
5377 parser->m_doctypePubid = pubId;
5378 handleDefault = XML_FALSE;
5379 goto alreadyChecked;
5380 }
5381 /* fall through */
5382 case XML_ROLE_ENTITY_PUBLIC_ID:
5383 if (! XmlIsPublicId(enc, s, next, eventPP))
5384 return XML_ERROR_PUBLICID;
5385 alreadyChecked:
5386 if (dtd->keepProcessing && parser->m_declEntity) {
5387 XML_Char *tem
5388 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5389 next - enc->minBytesPerChar);
5390 if (! tem)
5391 return XML_ERROR_NO_MEMORY;
5392 normalizePublicId(tem);
5393 parser->m_declEntity->publicId = tem;
5394 poolFinish(&dtd->pool);
5395 /* Don't suppress the default handler if we fell through from
5396 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5397 */
5398 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5399 handleDefault = XML_FALSE;
5400 }
5401 break;
5402 case XML_ROLE_DOCTYPE_CLOSE:
5403 if (allowClosingDoctype != XML_TRUE) {
5404 /* Must not close doctype from within expanded parameter entities */
5405 return XML_ERROR_INVALID_TOKEN;
5406 }
5407
5408 if (parser->m_doctypeName) {
5409 parser->m_startDoctypeDeclHandler(
5410 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5411 parser->m_doctypePubid, 0);
5412 poolClear(&parser->m_tempPool);
5413 handleDefault = XML_FALSE;
5414 }
5415 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
5416 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5417 was not set, indicating an external subset
5418 */
5419 #ifdef XML_DTD
5420 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5421 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5422 dtd->hasParamEntityRefs = XML_TRUE;
5423 if (parser->m_paramEntityParsing
5424 && parser->m_externalEntityRefHandler) {
5425 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5426 externalSubsetName, sizeof(ENTITY));
5427 if (! entity) {
5428 /* The external subset name "#" will have already been
5429 * inserted into the hash table at the start of the
5430 * external entity parsing, so no allocation will happen
5431 * and lookup() cannot fail.
5432 */
5433 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5434 }
5435 if (parser->m_useForeignDTD)
5436 entity->base = parser->m_curBase;
5437 dtd->paramEntityRead = XML_FALSE;
5438 if (! parser->m_externalEntityRefHandler(
5439 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5440 entity->systemId, entity->publicId))
5441 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5442 if (dtd->paramEntityRead) {
5443 if (! dtd->standalone && parser->m_notStandaloneHandler
5444 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5445 return XML_ERROR_NOT_STANDALONE;
5446 }
5447 /* if we didn't read the foreign DTD then this means that there
5448 is no external subset and we must reset dtd->hasParamEntityRefs
5449 */
5450 else if (! parser->m_doctypeSysid)
5451 dtd->hasParamEntityRefs = hadParamEntityRefs;
5452 /* end of DTD - no need to update dtd->keepProcessing */
5453 }
5454 parser->m_useForeignDTD = XML_FALSE;
5455 }
5456 #endif /* XML_DTD */
5457 if (parser->m_endDoctypeDeclHandler) {
5458 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5459 handleDefault = XML_FALSE;
5460 }
5461 break;
5462 case XML_ROLE_INSTANCE_START:
5463 #ifdef XML_DTD
5464 /* if there is no DOCTYPE declaration then now is the
5465 last chance to read the foreign DTD
5466 */
5467 if (parser->m_useForeignDTD) {
5468 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5469 dtd->hasParamEntityRefs = XML_TRUE;
5470 if (parser->m_paramEntityParsing
5471 && parser->m_externalEntityRefHandler) {
5472 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5473 externalSubsetName, sizeof(ENTITY));
5474 if (! entity)
5475 return XML_ERROR_NO_MEMORY;
5476 entity->base = parser->m_curBase;
5477 dtd->paramEntityRead = XML_FALSE;
5478 if (! parser->m_externalEntityRefHandler(
5479 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5480 entity->systemId, entity->publicId))
5481 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5482 if (dtd->paramEntityRead) {
5483 if (! dtd->standalone && parser->m_notStandaloneHandler
5484 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5485 return XML_ERROR_NOT_STANDALONE;
5486 }
5487 /* if we didn't read the foreign DTD then this means that there
5488 is no external subset and we must reset dtd->hasParamEntityRefs
5489 */
5490 else
5491 dtd->hasParamEntityRefs = hadParamEntityRefs;
5492 /* end of DTD - no need to update dtd->keepProcessing */
5493 }
5494 }
5495 #endif /* XML_DTD */
5496 parser->m_processor = contentProcessor;
5497 return contentProcessor(parser, s, end, nextPtr);
5498 case XML_ROLE_ATTLIST_ELEMENT_NAME:
5499 parser->m_declElementType = getElementType(parser, enc, s, next);
5500 if (! parser->m_declElementType)
5501 return XML_ERROR_NO_MEMORY;
5502 goto checkAttListDeclHandler;
5503 case XML_ROLE_ATTRIBUTE_NAME:
5504 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5505 if (! parser->m_declAttributeId)
5506 return XML_ERROR_NO_MEMORY;
5507 parser->m_declAttributeIsCdata = XML_FALSE;
5508 parser->m_declAttributeType = NULL;
5509 parser->m_declAttributeIsId = XML_FALSE;
5510 goto checkAttListDeclHandler;
5511 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5512 parser->m_declAttributeIsCdata = XML_TRUE;
5513 parser->m_declAttributeType = atypeCDATA;
5514 goto checkAttListDeclHandler;
5515 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5516 parser->m_declAttributeIsId = XML_TRUE;
5517 parser->m_declAttributeType = atypeID;
5518 goto checkAttListDeclHandler;
5519 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5520 parser->m_declAttributeType = atypeIDREF;
5521 goto checkAttListDeclHandler;
5522 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5523 parser->m_declAttributeType = atypeIDREFS;
5524 goto checkAttListDeclHandler;
5525 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5526 parser->m_declAttributeType = atypeENTITY;
5527 goto checkAttListDeclHandler;
5528 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5529 parser->m_declAttributeType = atypeENTITIES;
5530 goto checkAttListDeclHandler;
5531 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5532 parser->m_declAttributeType = atypeNMTOKEN;
5533 goto checkAttListDeclHandler;
5534 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5535 parser->m_declAttributeType = atypeNMTOKENS;
5536 checkAttListDeclHandler:
5537 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5538 handleDefault = XML_FALSE;
5539 break;
5540 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5541 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5542 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5543 const XML_Char *prefix;
5544 if (parser->m_declAttributeType) {
5545 prefix = enumValueSep;
5546 } else {
5547 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5548 : enumValueStart);
5549 }
5550 if (! poolAppendString(&parser->m_tempPool, prefix))
5551 return XML_ERROR_NO_MEMORY;
5552 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5553 return XML_ERROR_NO_MEMORY;
5554 parser->m_declAttributeType = parser->m_tempPool.start;
5555 handleDefault = XML_FALSE;
5556 }
5557 break;
5558 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5559 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5560 if (dtd->keepProcessing) {
5561 if (! defineAttribute(parser->m_declElementType,
5562 parser->m_declAttributeId,
5563 parser->m_declAttributeIsCdata,
5564 parser->m_declAttributeIsId, 0, parser))
5565 return XML_ERROR_NO_MEMORY;
5566 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5567 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5568 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5569 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5570 /* Enumerated or Notation type */
5571 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5572 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5573 return XML_ERROR_NO_MEMORY;
5574 parser->m_declAttributeType = parser->m_tempPool.start;
5575 poolFinish(&parser->m_tempPool);
5576 }
5577 *eventEndPP = s;
5578 parser->m_attlistDeclHandler(
5579 parser->m_handlerArg, parser->m_declElementType->name,
5580 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5581 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5582 handleDefault = XML_FALSE;
5583 }
5584 }
5585 poolClear(&parser->m_tempPool);
5586 break;
5587 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5588 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5589 if (dtd->keepProcessing) {
5590 const XML_Char *attVal;
5591 enum XML_Error result = storeAttributeValue(
5592 parser, enc, parser->m_declAttributeIsCdata,
5593 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5594 XML_ACCOUNT_NONE);
5595 if (result)
5596 return result;
5597 attVal = poolStart(&dtd->pool);
5598 poolFinish(&dtd->pool);
5599 /* ID attributes aren't allowed to have a default */
5600 if (! defineAttribute(
5601 parser->m_declElementType, parser->m_declAttributeId,
5602 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5603 return XML_ERROR_NO_MEMORY;
5604 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5605 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5606 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5607 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5608 /* Enumerated or Notation type */
5609 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5610 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5611 return XML_ERROR_NO_MEMORY;
5612 parser->m_declAttributeType = parser->m_tempPool.start;
5613 poolFinish(&parser->m_tempPool);
5614 }
5615 *eventEndPP = s;
5616 parser->m_attlistDeclHandler(
5617 parser->m_handlerArg, parser->m_declElementType->name,
5618 parser->m_declAttributeId->name, parser->m_declAttributeType,
5619 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5620 poolClear(&parser->m_tempPool);
5621 handleDefault = XML_FALSE;
5622 }
5623 }
5624 break;
5625 case XML_ROLE_ENTITY_VALUE:
5626 if (dtd->keepProcessing) {
5627 #if XML_GE == 1
5628 // This will store the given replacement text in
5629 // parser->m_declEntity->textPtr.
5630 enum XML_Error result = callStoreEntityValue(
5631 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5632 XML_ACCOUNT_NONE);
5633 if (parser->m_declEntity) {
5634 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5635 parser->m_declEntity->textLen
5636 = (int)(poolLength(&dtd->entityValuePool));
5637 poolFinish(&dtd->entityValuePool);
5638 if (parser->m_entityDeclHandler) {
5639 *eventEndPP = s;
5640 parser->m_entityDeclHandler(
5641 parser->m_handlerArg, parser->m_declEntity->name,
5642 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5643 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5644 handleDefault = XML_FALSE;
5645 }
5646 } else
5647 poolDiscard(&dtd->entityValuePool);
5648 if (result != XML_ERROR_NONE)
5649 return result;
5650 #else
5651 // This will store "&entity123;" in parser->m_declEntity->textPtr
5652 // to end up as "&entity123;" in the handler.
5653 if (parser->m_declEntity != NULL) {
5654 const enum XML_Error result
5655 = storeSelfEntityValue(parser, parser->m_declEntity);
5656 if (result != XML_ERROR_NONE)
5657 return result;
5658
5659 if (parser->m_entityDeclHandler) {
5660 *eventEndPP = s;
5661 parser->m_entityDeclHandler(
5662 parser->m_handlerArg, parser->m_declEntity->name,
5663 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5664 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5665 handleDefault = XML_FALSE;
5666 }
5667 }
5668 #endif
5669 }
5670 break;
5671 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5672 #ifdef XML_DTD
5673 parser->m_useForeignDTD = XML_FALSE;
5674 #endif /* XML_DTD */
5675 dtd->hasParamEntityRefs = XML_TRUE;
5676 if (parser->m_startDoctypeDeclHandler) {
5677 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5678 s + enc->minBytesPerChar,
5679 next - enc->minBytesPerChar);
5680 if (parser->m_doctypeSysid == NULL)
5681 return XML_ERROR_NO_MEMORY;
5682 poolFinish(&parser->m_tempPool);
5683 handleDefault = XML_FALSE;
5684 }
5685 #ifdef XML_DTD
5686 else
5687 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5688 for the case where no parser->m_startDoctypeDeclHandler is set */
5689 parser->m_doctypeSysid = externalSubsetName;
5690 #endif /* XML_DTD */
5691 if (! dtd->standalone
5692 #ifdef XML_DTD
5693 && ! parser->m_paramEntityParsing
5694 #endif /* XML_DTD */
5695 && parser->m_notStandaloneHandler
5696 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5697 return XML_ERROR_NOT_STANDALONE;
5698 #ifndef XML_DTD
5699 break;
5700 #else /* XML_DTD */
5701 if (! parser->m_declEntity) {
5702 parser->m_declEntity = (ENTITY *)lookup(
5703 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5704 if (! parser->m_declEntity)
5705 return XML_ERROR_NO_MEMORY;
5706 parser->m_declEntity->publicId = NULL;
5707 }
5708 #endif /* XML_DTD */
5709 /* fall through */
5710 case XML_ROLE_ENTITY_SYSTEM_ID:
5711 if (dtd->keepProcessing && parser->m_declEntity) {
5712 parser->m_declEntity->systemId
5713 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5714 next - enc->minBytesPerChar);
5715 if (! parser->m_declEntity->systemId)
5716 return XML_ERROR_NO_MEMORY;
5717 parser->m_declEntity->base = parser->m_curBase;
5718 poolFinish(&dtd->pool);
5719 /* Don't suppress the default handler if we fell through from
5720 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5721 */
5722 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5723 handleDefault = XML_FALSE;
5724 }
5725 break;
5726 case XML_ROLE_ENTITY_COMPLETE:
5727 #if XML_GE == 0
5728 // This will store "&entity123;" in entity->textPtr
5729 // to end up as "&entity123;" in the handler.
5730 if (parser->m_declEntity != NULL) {
5731 const enum XML_Error result
5732 = storeSelfEntityValue(parser, parser->m_declEntity);
5733 if (result != XML_ERROR_NONE)
5734 return result;
5735 }
5736 #endif
5737 if (dtd->keepProcessing && parser->m_declEntity
5738 && parser->m_entityDeclHandler) {
5739 *eventEndPP = s;
5740 parser->m_entityDeclHandler(
5741 parser->m_handlerArg, parser->m_declEntity->name,
5742 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5743 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5744 handleDefault = XML_FALSE;
5745 }
5746 break;
5747 case XML_ROLE_ENTITY_NOTATION_NAME:
5748 if (dtd->keepProcessing && parser->m_declEntity) {
5749 parser->m_declEntity->notation
5750 = poolStoreString(&dtd->pool, enc, s, next);
5751 if (! parser->m_declEntity->notation)
5752 return XML_ERROR_NO_MEMORY;
5753 poolFinish(&dtd->pool);
5754 if (parser->m_unparsedEntityDeclHandler) {
5755 *eventEndPP = s;
5756 parser->m_unparsedEntityDeclHandler(
5757 parser->m_handlerArg, parser->m_declEntity->name,
5758 parser->m_declEntity->base, parser->m_declEntity->systemId,
5759 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5760 handleDefault = XML_FALSE;
5761 } else if (parser->m_entityDeclHandler) {
5762 *eventEndPP = s;
5763 parser->m_entityDeclHandler(
5764 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5765 parser->m_declEntity->base, parser->m_declEntity->systemId,
5766 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5767 handleDefault = XML_FALSE;
5768 }
5769 }
5770 break;
5771 case XML_ROLE_GENERAL_ENTITY_NAME: {
5772 if (XmlPredefinedEntityName(enc, s, next)) {
5773 parser->m_declEntity = NULL;
5774 break;
5775 }
5776 if (dtd->keepProcessing) {
5777 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5778 if (! name)
5779 return XML_ERROR_NO_MEMORY;
5780 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5781 name, sizeof(ENTITY));
5782 if (! parser->m_declEntity)
5783 return XML_ERROR_NO_MEMORY;
5784 if (parser->m_declEntity->name != name) {
5785 poolDiscard(&dtd->pool);
5786 parser->m_declEntity = NULL;
5787 } else {
5788 poolFinish(&dtd->pool);
5789 parser->m_declEntity->publicId = NULL;
5790 parser->m_declEntity->is_param = XML_FALSE;
5791 /* if we have a parent parser or are reading an internal parameter
5792 entity, then the entity declaration is not considered "internal"
5793 */
5794 parser->m_declEntity->is_internal
5795 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5796 if (parser->m_entityDeclHandler)
5797 handleDefault = XML_FALSE;
5798 }
5799 } else {
5800 poolDiscard(&dtd->pool);
5801 parser->m_declEntity = NULL;
5802 }
5803 } break;
5804 case XML_ROLE_PARAM_ENTITY_NAME:
5805 #ifdef XML_DTD
5806 if (dtd->keepProcessing) {
5807 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5808 if (! name)
5809 return XML_ERROR_NO_MEMORY;
5810 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5811 name, sizeof(ENTITY));
5812 if (! parser->m_declEntity)
5813 return XML_ERROR_NO_MEMORY;
5814 if (parser->m_declEntity->name != name) {
5815 poolDiscard(&dtd->pool);
5816 parser->m_declEntity = NULL;
5817 } else {
5818 poolFinish(&dtd->pool);
5819 parser->m_declEntity->publicId = NULL;
5820 parser->m_declEntity->is_param = XML_TRUE;
5821 /* if we have a parent parser or are reading an internal parameter
5822 entity, then the entity declaration is not considered "internal"
5823 */
5824 parser->m_declEntity->is_internal
5825 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5826 if (parser->m_entityDeclHandler)
5827 handleDefault = XML_FALSE;
5828 }
5829 } else {
5830 poolDiscard(&dtd->pool);
5831 parser->m_declEntity = NULL;
5832 }
5833 #else /* not XML_DTD */
5834 parser->m_declEntity = NULL;
5835 #endif /* XML_DTD */
5836 break;
5837 case XML_ROLE_NOTATION_NAME:
5838 parser->m_declNotationPublicId = NULL;
5839 parser->m_declNotationName = NULL;
5840 if (parser->m_notationDeclHandler) {
5841 parser->m_declNotationName
5842 = poolStoreString(&parser->m_tempPool, enc, s, next);
5843 if (! parser->m_declNotationName)
5844 return XML_ERROR_NO_MEMORY;
5845 poolFinish(&parser->m_tempPool);
5846 handleDefault = XML_FALSE;
5847 }
5848 break;
5849 case XML_ROLE_NOTATION_PUBLIC_ID:
5850 if (! XmlIsPublicId(enc, s, next, eventPP))
5851 return XML_ERROR_PUBLICID;
5852 if (parser
5853 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5854 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5855 s + enc->minBytesPerChar,
5856 next - enc->minBytesPerChar);
5857 if (! tem)
5858 return XML_ERROR_NO_MEMORY;
5859 normalizePublicId(tem);
5860 parser->m_declNotationPublicId = tem;
5861 poolFinish(&parser->m_tempPool);
5862 handleDefault = XML_FALSE;
5863 }
5864 break;
5865 case XML_ROLE_NOTATION_SYSTEM_ID:
5866 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5867 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5868 s + enc->minBytesPerChar,
5869 next - enc->minBytesPerChar);
5870 if (! systemId)
5871 return XML_ERROR_NO_MEMORY;
5872 *eventEndPP = s;
5873 parser->m_notationDeclHandler(
5874 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5875 systemId, parser->m_declNotationPublicId);
5876 handleDefault = XML_FALSE;
5877 }
5878 poolClear(&parser->m_tempPool);
5879 break;
5880 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5881 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5882 *eventEndPP = s;
5883 parser->m_notationDeclHandler(
5884 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5885 0, parser->m_declNotationPublicId);
5886 handleDefault = XML_FALSE;
5887 }
5888 poolClear(&parser->m_tempPool);
5889 break;
5890 case XML_ROLE_ERROR:
5891 switch (tok) {
5892 case XML_TOK_PARAM_ENTITY_REF:
5893 /* PE references in internal subset are
5894 not allowed within declarations. */
5895 return XML_ERROR_PARAM_ENTITY_REF;
5896 case XML_TOK_XML_DECL:
5897 return XML_ERROR_MISPLACED_XML_PI;
5898 default:
5899 return XML_ERROR_SYNTAX;
5900 }
5901 #ifdef XML_DTD
5902 case XML_ROLE_IGNORE_SECT: {
5903 enum XML_Error result;
5904 if (parser->m_defaultHandler)
5905 reportDefault(parser, enc, s, next);
5906 handleDefault = XML_FALSE;
5907 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5908 if (result != XML_ERROR_NONE)
5909 return result;
5910 else if (! next) {
5911 parser->m_processor = ignoreSectionProcessor;
5912 return result;
5913 }
5914 } break;
5915 #endif /* XML_DTD */
5916 case XML_ROLE_GROUP_OPEN:
5917 if (parser->m_prologState.level >= parser->m_groupSize) {
5918 if (parser->m_groupSize) {
5919 {
5920 /* Detect and prevent integer overflow */
5921 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5922 return XML_ERROR_NO_MEMORY;
5923 }
5924
5925 char *const new_connector = REALLOC(
5926 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5927 if (new_connector == NULL) {
5928 parser->m_groupSize /= 2;
5929 return XML_ERROR_NO_MEMORY;
5930 }
5931 parser->m_groupConnector = new_connector;
5932 }
5933
5934 if (dtd->scaffIndex) {
5935 /* Detect and prevent integer overflow.
5936 * The preprocessor guard addresses the "always false" warning
5937 * from -Wtype-limits on platforms where
5938 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5939 #if UINT_MAX >= SIZE_MAX
5940 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) {
5941 parser->m_groupSize /= 2;
5942 return XML_ERROR_NO_MEMORY;
5943 }
5944 #endif
5945
5946 int *const new_scaff_index = REALLOC(
5947 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5948 if (new_scaff_index == NULL) {
5949 parser->m_groupSize /= 2;
5950 return XML_ERROR_NO_MEMORY;
5951 }
5952 dtd->scaffIndex = new_scaff_index;
5953 }
5954 } else {
5955 parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32);
5956 if (! parser->m_groupConnector) {
5957 parser->m_groupSize = 0;
5958 return XML_ERROR_NO_MEMORY;
5959 }
5960 }
5961 }
5962 parser->m_groupConnector[parser->m_prologState.level] = 0;
5963 if (dtd->in_eldecl) {
5964 int myindex = nextScaffoldPart(parser);
5965 if (myindex < 0)
5966 return XML_ERROR_NO_MEMORY;
5967 assert(dtd->scaffIndex != NULL);
5968 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5969 dtd->scaffLevel++;
5970 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5971 if (parser->m_elementDeclHandler)
5972 handleDefault = XML_FALSE;
5973 }
5974 break;
5975 case XML_ROLE_GROUP_SEQUENCE:
5976 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5977 return XML_ERROR_SYNTAX;
5978 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5979 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5980 handleDefault = XML_FALSE;
5981 break;
5982 case XML_ROLE_GROUP_CHOICE:
5983 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5984 return XML_ERROR_SYNTAX;
5985 if (dtd->in_eldecl
5986 && ! parser->m_groupConnector[parser->m_prologState.level]
5987 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5988 != XML_CTYPE_MIXED)) {
5989 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5990 = XML_CTYPE_CHOICE;
5991 if (parser->m_elementDeclHandler)
5992 handleDefault = XML_FALSE;
5993 }
5994 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5995 break;
5996 case XML_ROLE_PARAM_ENTITY_REF:
5997 #ifdef XML_DTD
5998 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5999 dtd->hasParamEntityRefs = XML_TRUE;
6000 if (! parser->m_paramEntityParsing)
6001 dtd->keepProcessing = dtd->standalone;
6002 else {
6003 const XML_Char *name;
6004 ENTITY *entity;
6005 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
6006 next - enc->minBytesPerChar);
6007 if (! name)
6008 return XML_ERROR_NO_MEMORY;
6009 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6010 poolDiscard(&dtd->pool);
6011 /* first, determine if a check for an existing declaration is needed;
6012 if yes, check that the entity exists, and that it is internal,
6013 otherwise call the skipped entity handler
6014 */
6015 if (parser->m_prologState.documentEntity
6016 && (dtd->standalone ? ! parser->m_openInternalEntities
6017 : ! dtd->hasParamEntityRefs)) {
6018 if (! entity)
6019 return XML_ERROR_UNDEFINED_ENTITY;
6020 else if (! entity->is_internal) {
6021 /* It's hard to exhaustively search the code to be sure,
6022 * but there doesn't seem to be a way of executing the
6023 * following line. There are two cases:
6024 *
6025 * If 'standalone' is false, the DTD must have no
6026 * parameter entities or we wouldn't have passed the outer
6027 * 'if' statement. That means the only entity in the hash
6028 * table is the external subset name "#" which cannot be
6029 * given as a parameter entity name in XML syntax, so the
6030 * lookup must have returned NULL and we don't even reach
6031 * the test for an internal entity.
6032 *
6033 * If 'standalone' is true, it does not seem to be
6034 * possible to create entities taking this code path that
6035 * are not internal entities, so fail the test above.
6036 *
6037 * Because this analysis is very uncertain, the code is
6038 * being left in place and merely removed from the
6039 * coverage test statistics.
6040 */
6041 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
6042 }
6043 } else if (! entity) {
6044 dtd->keepProcessing = dtd->standalone;
6045 /* cannot report skipped entities in declarations */
6046 if ((role == XML_ROLE_PARAM_ENTITY_REF)
6047 && parser->m_skippedEntityHandler) {
6048 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
6049 handleDefault = XML_FALSE;
6050 }
6051 break;
6052 }
6053 if (entity->open)
6054 return XML_ERROR_RECURSIVE_ENTITY_REF;
6055 if (entity->textPtr) {
6056 enum XML_Error result;
6057 XML_Bool betweenDecl
6058 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
6059 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
6060 if (result != XML_ERROR_NONE)
6061 return result;
6062 handleDefault = XML_FALSE;
6063 break;
6064 }
6065 if (parser->m_externalEntityRefHandler) {
6066 dtd->paramEntityRead = XML_FALSE;
6067 entity->open = XML_TRUE;
6068 entityTrackingOnOpen(parser, entity, __LINE__);
6069 if (! parser->m_externalEntityRefHandler(
6070 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6071 entity->systemId, entity->publicId)) {
6072 entityTrackingOnClose(parser, entity, __LINE__);
6073 entity->open = XML_FALSE;
6074 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6075 }
6076 entityTrackingOnClose(parser, entity, __LINE__);
6077 entity->open = XML_FALSE;
6078 handleDefault = XML_FALSE;
6079 if (! dtd->paramEntityRead) {
6080 dtd->keepProcessing = dtd->standalone;
6081 break;
6082 }
6083 } else {
6084 dtd->keepProcessing = dtd->standalone;
6085 break;
6086 }
6087 }
6088 #endif /* XML_DTD */
6089 if (! dtd->standalone && parser->m_notStandaloneHandler
6090 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
6091 return XML_ERROR_NOT_STANDALONE;
6092 break;
6093
6094 /* Element declaration stuff */
6095
6096 case XML_ROLE_ELEMENT_NAME:
6097 if (parser->m_elementDeclHandler) {
6098 parser->m_declElementType = getElementType(parser, enc, s, next);
6099 if (! parser->m_declElementType)
6100 return XML_ERROR_NO_MEMORY;
6101 dtd->scaffLevel = 0;
6102 dtd->scaffCount = 0;
6103 dtd->in_eldecl = XML_TRUE;
6104 handleDefault = XML_FALSE;
6105 }
6106 break;
6107
6108 case XML_ROLE_CONTENT_ANY:
6109 case XML_ROLE_CONTENT_EMPTY:
6110 if (dtd->in_eldecl) {
6111 if (parser->m_elementDeclHandler) {
6112 // NOTE: We are avoiding MALLOC(..) here to so that
6113 // applications that are not using XML_FreeContentModel but
6114 // plain free(..) or .free_fcn() to free the content model's
6115 // memory are safe.
6116 XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content));
6117 if (! content)
6118 return XML_ERROR_NO_MEMORY;
6119 content->quant = XML_CQUANT_NONE;
6120 content->name = NULL;
6121 content->numchildren = 0;
6122 content->children = NULL;
6123 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
6124 : XML_CTYPE_EMPTY);
6125 *eventEndPP = s;
6126 parser->m_elementDeclHandler(
6127 parser->m_handlerArg, parser->m_declElementType->name, content);
6128 handleDefault = XML_FALSE;
6129 }
6130 dtd->in_eldecl = XML_FALSE;
6131 }
6132 break;
6133
6134 case XML_ROLE_CONTENT_PCDATA:
6135 if (dtd->in_eldecl) {
6136 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
6137 = XML_CTYPE_MIXED;
6138 if (parser->m_elementDeclHandler)
6139 handleDefault = XML_FALSE;
6140 }
6141 break;
6142
6143 case XML_ROLE_CONTENT_ELEMENT:
6144 quant = XML_CQUANT_NONE;
6145 goto elementContent;
6146 case XML_ROLE_CONTENT_ELEMENT_OPT:
6147 quant = XML_CQUANT_OPT;
6148 goto elementContent;
6149 case XML_ROLE_CONTENT_ELEMENT_REP:
6150 quant = XML_CQUANT_REP;
6151 goto elementContent;
6152 case XML_ROLE_CONTENT_ELEMENT_PLUS:
6153 quant = XML_CQUANT_PLUS;
6154 elementContent:
6155 if (dtd->in_eldecl) {
6156 ELEMENT_TYPE *el;
6157 const XML_Char *name;
6158 size_t nameLen;
6159 const char *nxt
6160 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
6161 int myindex = nextScaffoldPart(parser);
6162 if (myindex < 0)
6163 return XML_ERROR_NO_MEMORY;
6164 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
6165 dtd->scaffold[myindex].quant = quant;
6166 el = getElementType(parser, enc, s, nxt);
6167 if (! el)
6168 return XML_ERROR_NO_MEMORY;
6169 name = el->name;
6170 dtd->scaffold[myindex].name = name;
6171 nameLen = 0;
6172 while (name[nameLen++])
6173 ;
6174
6175 /* Detect and prevent integer overflow */
6176 if (nameLen > UINT_MAX - dtd->contentStringLen) {
6177 return XML_ERROR_NO_MEMORY;
6178 }
6179
6180 dtd->contentStringLen += (unsigned)nameLen;
6181 if (parser->m_elementDeclHandler)
6182 handleDefault = XML_FALSE;
6183 }
6184 break;
6185
6186 case XML_ROLE_GROUP_CLOSE:
6187 quant = XML_CQUANT_NONE;
6188 goto closeGroup;
6189 case XML_ROLE_GROUP_CLOSE_OPT:
6190 quant = XML_CQUANT_OPT;
6191 goto closeGroup;
6192 case XML_ROLE_GROUP_CLOSE_REP:
6193 quant = XML_CQUANT_REP;
6194 goto closeGroup;
6195 case XML_ROLE_GROUP_CLOSE_PLUS:
6196 quant = XML_CQUANT_PLUS;
6197 closeGroup:
6198 if (dtd->in_eldecl) {
6199 if (parser->m_elementDeclHandler)
6200 handleDefault = XML_FALSE;
6201 dtd->scaffLevel--;
6202 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
6203 if (dtd->scaffLevel == 0) {
6204 if (! handleDefault) {
6205 XML_Content *model = build_model(parser);
6206 if (! model)
6207 return XML_ERROR_NO_MEMORY;
6208 *eventEndPP = s;
6209 parser->m_elementDeclHandler(
6210 parser->m_handlerArg, parser->m_declElementType->name, model);
6211 }
6212 dtd->in_eldecl = XML_FALSE;
6213 dtd->contentStringLen = 0;
6214 }
6215 }
6216 break;
6217 /* End element declaration stuff */
6218
6219 case XML_ROLE_PI:
6220 if (! reportProcessingInstruction(parser, enc, s, next))
6221 return XML_ERROR_NO_MEMORY;
6222 handleDefault = XML_FALSE;
6223 break;
6224 case XML_ROLE_COMMENT:
6225 if (! reportComment(parser, enc, s, next))
6226 return XML_ERROR_NO_MEMORY;
6227 handleDefault = XML_FALSE;
6228 break;
6229 case XML_ROLE_NONE:
6230 switch (tok) {
6231 case XML_TOK_BOM:
6232 handleDefault = XML_FALSE;
6233 break;
6234 }
6235 break;
6236 case XML_ROLE_DOCTYPE_NONE:
6237 if (parser->m_startDoctypeDeclHandler)
6238 handleDefault = XML_FALSE;
6239 break;
6240 case XML_ROLE_ENTITY_NONE:
6241 if (dtd->keepProcessing && parser->m_entityDeclHandler)
6242 handleDefault = XML_FALSE;
6243 break;
6244 case XML_ROLE_NOTATION_NONE:
6245 if (parser->m_notationDeclHandler)
6246 handleDefault = XML_FALSE;
6247 break;
6248 case XML_ROLE_ATTLIST_NONE:
6249 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
6250 handleDefault = XML_FALSE;
6251 break;
6252 case XML_ROLE_ELEMENT_NONE:
6253 if (parser->m_elementDeclHandler)
6254 handleDefault = XML_FALSE;
6255 break;
6256 } /* end of big switch */
6257
6258 if (handleDefault && parser->m_defaultHandler)
6259 reportDefault(parser, enc, s, next);
6260
6261 switch (parser->m_parsingStatus.parsing) {
6262 case XML_SUSPENDED:
6263 *nextPtr = next;
6264 return XML_ERROR_NONE;
6265 case XML_FINISHED:
6266 return XML_ERROR_ABORTED;
6267 case XML_PARSING:
6268 if (parser->m_reenter) {
6269 *nextPtr = next;
6270 return XML_ERROR_NONE;
6271 }
6272 /* Fall through */
6273 default:
6274 s = next;
6275 tok = XmlPrologTok(enc, s, end, &next);
6276 }
6277 }
6278 /* not reached */
6279 }
6280
6281 static enum XML_Error PTRCALL
6282 epilogProcessor(XML_Parser parser, const char *s, const char *end,
6283 const char **nextPtr) {
6284 parser->m_processor = epilogProcessor;
6285 parser->m_eventPtr = s;
6286 for (;;) {
6287 const char *next = NULL;
6288 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
6289 #if XML_GE == 1
6290 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
6291 XML_ACCOUNT_DIRECT)) {
6292 accountingOnAbort(parser);
6293 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6294 }
6295 #endif
6296 parser->m_eventEndPtr = next;
6297 switch (tok) {
6298 /* report partial linebreak - it might be the last token */
6299 case -XML_TOK_PROLOG_S:
6300 if (parser->m_defaultHandler) {
6301 reportDefault(parser, parser->m_encoding, s, next);
6302 if (parser->m_parsingStatus.parsing == XML_FINISHED)
6303 return XML_ERROR_ABORTED;
6304 }
6305 *nextPtr = next;
6306 return XML_ERROR_NONE;
6307 case XML_TOK_NONE:
6308 *nextPtr = s;
6309 return XML_ERROR_NONE;
6310 case XML_TOK_PROLOG_S:
6311 if (parser->m_defaultHandler)
6312 reportDefault(parser, parser->m_encoding, s, next);
6313 break;
6314 case XML_TOK_PI:
6315 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
6316 return XML_ERROR_NO_MEMORY;
6317 break;
6318 case XML_TOK_COMMENT:
6319 if (! reportComment(parser, parser->m_encoding, s, next))
6320 return XML_ERROR_NO_MEMORY;
6321 break;
6322 case XML_TOK_INVALID:
6323 parser->m_eventPtr = next;
6324 return XML_ERROR_INVALID_TOKEN;
6325 case XML_TOK_PARTIAL:
6326 if (! parser->m_parsingStatus.finalBuffer) {
6327 *nextPtr = s;
6328 return XML_ERROR_NONE;
6329 }
6330 return XML_ERROR_UNCLOSED_TOKEN;
6331 case XML_TOK_PARTIAL_CHAR:
6332 if (! parser->m_parsingStatus.finalBuffer) {
6333 *nextPtr = s;
6334 return XML_ERROR_NONE;
6335 }
6336 return XML_ERROR_PARTIAL_CHAR;
6337 default:
6338 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
6339 }
6340 switch (parser->m_parsingStatus.parsing) {
6341 case XML_SUSPENDED:
6342 parser->m_eventPtr = next;
6343 *nextPtr = next;
6344 return XML_ERROR_NONE;
6345 case XML_FINISHED:
6346 parser->m_eventPtr = next;
6347 return XML_ERROR_ABORTED;
6348 case XML_PARSING:
6349 if (parser->m_reenter) {
6350 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
6351 }
6352 /* Fall through */
6353 default:;
6354 parser->m_eventPtr = s = next;
6355 }
6356 }
6357 }
6358
6359 static enum XML_Error
6360 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
6361 enum EntityType type) {
6362 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
6363 switch (type) {
6364 case ENTITY_INTERNAL:
6365 parser->m_processor = internalEntityProcessor;
6366 openEntityList = &parser->m_openInternalEntities;
6367 freeEntityList = &parser->m_freeInternalEntities;
6368 break;
6369 case ENTITY_ATTRIBUTE:
6370 openEntityList = &parser->m_openAttributeEntities;
6371 freeEntityList = &parser->m_freeAttributeEntities;
6372 break;
6373 case ENTITY_VALUE:
6374 openEntityList = &parser->m_openValueEntities;
6375 freeEntityList = &parser->m_freeValueEntities;
6376 break;
6377 /* default case serves merely as a safety net in case of a
6378 * wrong entityType. Therefore we exclude the following lines
6379 * from the test coverage.
6380 *
6381 * LCOV_EXCL_START
6382 */
6383 default:
6384 // Should not reach here
6385 assert(0);
6386 /* LCOV_EXCL_STOP */
6387 }
6388
6389 if (*freeEntityList) {
6390 openEntity = *freeEntityList;
6391 *freeEntityList = openEntity->next;
6392 } else {
6393 openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6394 if (! openEntity)
6395 return XML_ERROR_NO_MEMORY;
6396 }
6397 entity->open = XML_TRUE;
6398 entity->hasMore = XML_TRUE;
6399 #if XML_GE == 1
6400 entityTrackingOnOpen(parser, entity, __LINE__);
6401 #endif
6402 entity->processed = 0;
6403 openEntity->next = *openEntityList;
6404 *openEntityList = openEntity;
6405 openEntity->entity = entity;
6406 openEntity->type = type;
6407 openEntity->startTagLevel = parser->m_tagLevel;
6408 openEntity->betweenDecl = betweenDecl;
6409 openEntity->internalEventPtr = NULL;
6410 openEntity->internalEventEndPtr = NULL;
6411
6412 // Only internal entities make use of the reenter flag
6413 // therefore no need to set it for other entity types
6414 if (type == ENTITY_INTERNAL) {
6415 triggerReenter(parser);
6416 }
6417 return XML_ERROR_NONE;
6418 }
6419
6420 static enum XML_Error PTRCALL
6421 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6422 const char **nextPtr) {
6423 UNUSED_P(s);
6424 UNUSED_P(end);
6425 UNUSED_P(nextPtr);
6426 ENTITY *entity;
6427 const char *textStart, *textEnd;
6428 const char *next;
6429 enum XML_Error result;
6430 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6431 if (! openEntity)
6432 return XML_ERROR_UNEXPECTED_STATE;
6433
6434 entity = openEntity->entity;
6435
6436 // This will return early
6437 if (entity->hasMore) {
6438 textStart = ((const char *)entity->textPtr) + entity->processed;
6439 textEnd = (const char *)(entity->textPtr + entity->textLen);
6440 /* Set a safe default value in case 'next' does not get set */
6441 next = textStart;
6442
6443 if (entity->is_param) {
6444 int tok
6445 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6446 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6447 tok, next, &next, XML_FALSE, XML_FALSE,
6448 XML_ACCOUNT_ENTITY_EXPANSION);
6449 } else {
6450 result = doContent(parser, openEntity->startTagLevel,
6451 parser->m_internalEncoding, textStart, textEnd, &next,
6452 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6453 }
6454
6455 if (result != XML_ERROR_NONE)
6456 return result;
6457 // Check if entity is complete, if not, mark down how much of it is
6458 // processed
6459 if (textEnd != next
6460 && (parser->m_parsingStatus.parsing == XML_SUSPENDED
6461 || (parser->m_parsingStatus.parsing == XML_PARSING
6462 && parser->m_reenter))) {
6463 entity->processed = (int)(next - (const char *)entity->textPtr);
6464 return result;
6465 }
6466
6467 // Entity is complete. We cannot close it here since we need to first
6468 // process its possible inner entities (which are added to the
6469 // m_openInternalEntities during doProlog or doContent calls above)
6470 entity->hasMore = XML_FALSE;
6471 if (! entity->is_param
6472 && (openEntity->startTagLevel != parser->m_tagLevel)) {
6473 return XML_ERROR_ASYNC_ENTITY;
6474 }
6475 triggerReenter(parser);
6476 return result;
6477 } // End of entity processing, "if" block will return here
6478
6479 // Remove fully processed openEntity from open entity list.
6480 #if XML_GE == 1
6481 entityTrackingOnClose(parser, entity, __LINE__);
6482 #endif
6483 // openEntity is m_openInternalEntities' head, as we set it at the start of
6484 // this function and we skipped doProlog and doContent calls with hasMore set
6485 // to false. This means we can directly remove the head of
6486 // m_openInternalEntities
6487 assert(parser->m_openInternalEntities == openEntity);
6488 entity->open = XML_FALSE;
6489 parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6490
6491 /* put openEntity back in list of free instances */
6492 openEntity->next = parser->m_freeInternalEntities;
6493 parser->m_freeInternalEntities = openEntity;
6494
6495 if (parser->m_openInternalEntities == NULL) {
6496 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6497 }
6498 triggerReenter(parser);
6499 return XML_ERROR_NONE;
6500 }
6501
6502 static enum XML_Error PTRCALL
6503 errorProcessor(XML_Parser parser, const char *s, const char *end,
6504 const char **nextPtr) {
6505 UNUSED_P(s);
6506 UNUSED_P(end);
6507 UNUSED_P(nextPtr);
6508 return parser->m_errorCode;
6509 }
6510
6511 static enum XML_Error
6512 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6513 const char *ptr, const char *end, STRING_POOL *pool,
6514 enum XML_Account account) {
6515 const char *next = ptr;
6516 enum XML_Error result = XML_ERROR_NONE;
6517
6518 while (1) {
6519 if (! parser->m_openAttributeEntities) {
6520 result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6521 account, &next);
6522 } else {
6523 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6524 if (! openEntity)
6525 return XML_ERROR_UNEXPECTED_STATE;
6526
6527 ENTITY *const entity = openEntity->entity;
6528 const char *const textStart
6529 = ((const char *)entity->textPtr) + entity->processed;
6530 const char *const textEnd
6531 = (const char *)(entity->textPtr + entity->textLen);
6532 /* Set a safe default value in case 'next' does not get set */
6533 const char *nextInEntity = textStart;
6534 if (entity->hasMore) {
6535 result = appendAttributeValue(
6536 parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6537 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6538 if (result != XML_ERROR_NONE)
6539 break;
6540 // Check if entity is complete, if not, mark down how much of it is
6541 // processed. A XML_SUSPENDED check here is not required as
6542 // appendAttributeValue will never suspend the parser.
6543 if (textEnd != nextInEntity) {
6544 entity->processed
6545 = (int)(nextInEntity - (const char *)entity->textPtr);
6546 continue;
6547 }
6548
6549 // Entity is complete. We cannot close it here since we need to first
6550 // process its possible inner entities (which are added to the
6551 // m_openAttributeEntities during appendAttributeValue)
6552 entity->hasMore = XML_FALSE;
6553 continue;
6554 } // End of entity processing, "if" block skips the rest
6555
6556 // Remove fully processed openEntity from open entity list.
6557 #if XML_GE == 1
6558 entityTrackingOnClose(parser, entity, __LINE__);
6559 #endif
6560 // openEntity is m_openAttributeEntities' head, since we set it at the
6561 // start of this function and because we skipped appendAttributeValue call
6562 // with hasMore set to false. This means we can directly remove the head
6563 // of m_openAttributeEntities
6564 assert(parser->m_openAttributeEntities == openEntity);
6565 entity->open = XML_FALSE;
6566 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6567
6568 /* put openEntity back in list of free instances */
6569 openEntity->next = parser->m_freeAttributeEntities;
6570 parser->m_freeAttributeEntities = openEntity;
6571 }
6572
6573 // Break if an error occurred or there is nothing left to process
6574 if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6575 break;
6576 }
6577 }
6578
6579 if (result)
6580 return result;
6581 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6582 poolChop(pool);
6583 if (! poolAppendChar(pool, XML_T('\0')))
6584 return XML_ERROR_NO_MEMORY;
6585 return XML_ERROR_NONE;
6586 }
6587
6588 static enum XML_Error
6589 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6590 const char *ptr, const char *end, STRING_POOL *pool,
6591 enum XML_Account account, const char **nextPtr) {
6592 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6593 #ifndef XML_DTD
6594 UNUSED_P(account);
6595 #endif
6596
6597 for (;;) {
6598 const char *next
6599 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6600 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6601 #if XML_GE == 1
6602 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6603 accountingOnAbort(parser);
6604 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6605 }
6606 #endif
6607 switch (tok) {
6608 case XML_TOK_NONE:
6609 if (nextPtr) {
6610 *nextPtr = next;
6611 }
6612 return XML_ERROR_NONE;
6613 case XML_TOK_INVALID:
6614 if (enc == parser->m_encoding)
6615 parser->m_eventPtr = next;
6616 return XML_ERROR_INVALID_TOKEN;
6617 case XML_TOK_PARTIAL:
6618 if (enc == parser->m_encoding)
6619 parser->m_eventPtr = ptr;
6620 return XML_ERROR_INVALID_TOKEN;
6621 case XML_TOK_CHAR_REF: {
6622 XML_Char buf[XML_ENCODE_MAX];
6623 int i;
6624 int n = XmlCharRefNumber(enc, ptr);
6625 if (n < 0) {
6626 if (enc == parser->m_encoding)
6627 parser->m_eventPtr = ptr;
6628 return XML_ERROR_BAD_CHAR_REF;
6629 }
6630 if (! isCdata && n == 0x20 /* space */
6631 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6632 break;
6633 n = XmlEncode(n, (ICHAR *)buf);
6634 /* The XmlEncode() functions can never return 0 here. That
6635 * error return happens if the code point passed in is either
6636 * negative or greater than or equal to 0x110000. The
6637 * XmlCharRefNumber() functions will all return a number
6638 * strictly less than 0x110000 or a negative value if an error
6639 * occurred. The negative value is intercepted above, so
6640 * XmlEncode() is never passed a value it might return an
6641 * error for.
6642 */
6643 for (i = 0; i < n; i++) {
6644 if (! poolAppendChar(pool, buf[i]))
6645 return XML_ERROR_NO_MEMORY;
6646 }
6647 } break;
6648 case XML_TOK_DATA_CHARS:
6649 if (! poolAppend(pool, enc, ptr, next))
6650 return XML_ERROR_NO_MEMORY;
6651 break;
6652 case XML_TOK_TRAILING_CR:
6653 next = ptr + enc->minBytesPerChar;
6654 /* fall through */
6655 case XML_TOK_ATTRIBUTE_VALUE_S:
6656 case XML_TOK_DATA_NEWLINE:
6657 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6658 break;
6659 if (! poolAppendChar(pool, 0x20))
6660 return XML_ERROR_NO_MEMORY;
6661 break;
6662 case XML_TOK_ENTITY_REF: {
6663 const XML_Char *name;
6664 ENTITY *entity;
6665 bool checkEntityDecl;
6666 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6667 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6668 if (ch) {
6669 #if XML_GE == 1
6670 /* NOTE: We are replacing 4-6 characters original input for 1 character
6671 * so there is no amplification and hence recording without
6672 * protection. */
6673 accountingDiffTolerated(parser, tok, (char *)&ch,
6674 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6675 XML_ACCOUNT_ENTITY_EXPANSION);
6676 #endif /* XML_GE == 1 */
6677 if (! poolAppendChar(pool, ch))
6678 return XML_ERROR_NO_MEMORY;
6679 break;
6680 }
6681 name = poolStoreString(&parser->m_temp2Pool, enc,
6682 ptr + enc->minBytesPerChar,
6683 next - enc->minBytesPerChar);
6684 if (! name)
6685 return XML_ERROR_NO_MEMORY;
6686 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6687 poolDiscard(&parser->m_temp2Pool);
6688 /* First, determine if a check for an existing declaration is needed;
6689 if yes, check that the entity exists, and that it is internal.
6690 */
6691 if (pool == &dtd->pool) /* are we called from prolog? */
6692 checkEntityDecl =
6693 #ifdef XML_DTD
6694 parser->m_prologState.documentEntity &&
6695 #endif /* XML_DTD */
6696 (dtd->standalone ? ! parser->m_openInternalEntities
6697 : ! dtd->hasParamEntityRefs);
6698 else /* if (pool == &parser->m_tempPool): we are called from content */
6699 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6700 if (checkEntityDecl) {
6701 if (! entity)
6702 return XML_ERROR_UNDEFINED_ENTITY;
6703 else if (! entity->is_internal)
6704 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6705 } else if (! entity) {
6706 /* Cannot report skipped entity here - see comments on
6707 parser->m_skippedEntityHandler.
6708 if (parser->m_skippedEntityHandler)
6709 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6710 */
6711 /* Cannot call the default handler because this would be
6712 out of sync with the call to the startElementHandler.
6713 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6714 reportDefault(parser, enc, ptr, next);
6715 */
6716 break;
6717 }
6718 if (entity->open) {
6719 if (enc == parser->m_encoding) {
6720 /* It does not appear that this line can be executed.
6721 *
6722 * The "if (entity->open)" check catches recursive entity
6723 * definitions. In order to be called with an open
6724 * entity, it must have gone through this code before and
6725 * been through the recursive call to
6726 * appendAttributeValue() some lines below. That call
6727 * sets the local encoding ("enc") to the parser's
6728 * internal encoding (internal_utf8 or internal_utf16),
6729 * which can never be the same as the principle encoding.
6730 * It doesn't appear there is another code path that gets
6731 * here with entity->open being TRUE.
6732 *
6733 * Since it is not certain that this logic is watertight,
6734 * we keep the line and merely exclude it from coverage
6735 * tests.
6736 */
6737 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6738 }
6739 return XML_ERROR_RECURSIVE_ENTITY_REF;
6740 }
6741 if (entity->notation) {
6742 if (enc == parser->m_encoding)
6743 parser->m_eventPtr = ptr;
6744 return XML_ERROR_BINARY_ENTITY_REF;
6745 }
6746 if (! entity->textPtr) {
6747 if (enc == parser->m_encoding)
6748 parser->m_eventPtr = ptr;
6749 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6750 } else {
6751 enum XML_Error result;
6752 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6753 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6754 *nextPtr = next;
6755 }
6756 return result;
6757 }
6758 } break;
6759 default:
6760 /* The only token returned by XmlAttributeValueTok() that does
6761 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6762 * Getting that would require an entity name to contain an
6763 * incomplete XML character (e.g. \xE2\x82); however previous
6764 * tokenisers will have already recognised and rejected such
6765 * names before XmlAttributeValueTok() gets a look-in. This
6766 * default case should be retained as a safety net, but the code
6767 * excluded from coverage tests.
6768 *
6769 * LCOV_EXCL_START
6770 */
6771 if (enc == parser->m_encoding)
6772 parser->m_eventPtr = ptr;
6773 return XML_ERROR_UNEXPECTED_STATE;
6774 /* LCOV_EXCL_STOP */
6775 }
6776 ptr = next;
6777 }
6778 /* not reached */
6779 }
6780
6781 #if XML_GE == 1
6782 static enum XML_Error
6783 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6784 const char *entityTextPtr, const char *entityTextEnd,
6785 enum XML_Account account, const char **nextPtr) {
6786 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6787 STRING_POOL *pool = &(dtd->entityValuePool);
6788 enum XML_Error result = XML_ERROR_NONE;
6789 # ifdef XML_DTD
6790 int oldInEntityValue = parser->m_prologState.inEntityValue;
6791 parser->m_prologState.inEntityValue = 1;
6792 # else
6793 UNUSED_P(account);
6794 # endif /* XML_DTD */
6795 /* never return Null for the value argument in EntityDeclHandler,
6796 since this would indicate an external entity; therefore we
6797 have to make sure that entityValuePool.start is not null */
6798 if (! pool->blocks) {
6799 if (! poolGrow(pool))
6800 return XML_ERROR_NO_MEMORY;
6801 }
6802
6803 const char *next = entityTextPtr;
6804
6805 /* Nothing to tokenize. */
6806 if (entityTextPtr >= entityTextEnd) {
6807 result = XML_ERROR_NONE;
6808 goto endEntityValue;
6809 }
6810
6811 for (;;) {
6812 next
6813 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6814 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6815
6816 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6817 account)) {
6818 accountingOnAbort(parser);
6819 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6820 goto endEntityValue;
6821 }
6822
6823 switch (tok) {
6824 case XML_TOK_PARAM_ENTITY_REF:
6825 # ifdef XML_DTD
6826 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6827 const XML_Char *name;
6828 ENTITY *entity;
6829 name = poolStoreString(&parser->m_tempPool, enc,
6830 entityTextPtr + enc->minBytesPerChar,
6831 next - enc->minBytesPerChar);
6832 if (! name) {
6833 result = XML_ERROR_NO_MEMORY;
6834 goto endEntityValue;
6835 }
6836 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6837 poolDiscard(&parser->m_tempPool);
6838 if (! entity) {
6839 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6840 /* cannot report skipped entity here - see comments on
6841 parser->m_skippedEntityHandler
6842 if (parser->m_skippedEntityHandler)
6843 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6844 */
6845 dtd->keepProcessing = dtd->standalone;
6846 goto endEntityValue;
6847 }
6848 if (entity->open || (entity == parser->m_declEntity)) {
6849 if (enc == parser->m_encoding)
6850 parser->m_eventPtr = entityTextPtr;
6851 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6852 goto endEntityValue;
6853 }
6854 if (entity->systemId) {
6855 if (parser->m_externalEntityRefHandler) {
6856 dtd->paramEntityRead = XML_FALSE;
6857 entity->open = XML_TRUE;
6858 entityTrackingOnOpen(parser, entity, __LINE__);
6859 if (! parser->m_externalEntityRefHandler(
6860 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6861 entity->systemId, entity->publicId)) {
6862 entityTrackingOnClose(parser, entity, __LINE__);
6863 entity->open = XML_FALSE;
6864 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6865 goto endEntityValue;
6866 }
6867 entityTrackingOnClose(parser, entity, __LINE__);
6868 entity->open = XML_FALSE;
6869 if (! dtd->paramEntityRead)
6870 dtd->keepProcessing = dtd->standalone;
6871 } else
6872 dtd->keepProcessing = dtd->standalone;
6873 } else {
6874 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6875 goto endEntityValue;
6876 }
6877 break;
6878 }
6879 # endif /* XML_DTD */
6880 /* In the internal subset, PE references are not legal
6881 within markup declarations, e.g entity values in this case. */
6882 parser->m_eventPtr = entityTextPtr;
6883 result = XML_ERROR_PARAM_ENTITY_REF;
6884 goto endEntityValue;
6885 case XML_TOK_NONE:
6886 result = XML_ERROR_NONE;
6887 goto endEntityValue;
6888 case XML_TOK_ENTITY_REF:
6889 case XML_TOK_DATA_CHARS:
6890 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6891 result = XML_ERROR_NO_MEMORY;
6892 goto endEntityValue;
6893 }
6894 break;
6895 case XML_TOK_TRAILING_CR:
6896 next = entityTextPtr + enc->minBytesPerChar;
6897 /* fall through */
6898 case XML_TOK_DATA_NEWLINE:
6899 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6900 result = XML_ERROR_NO_MEMORY;
6901 goto endEntityValue;
6902 }
6903 *(pool->ptr)++ = 0xA;
6904 break;
6905 case XML_TOK_CHAR_REF: {
6906 XML_Char buf[XML_ENCODE_MAX];
6907 int i;
6908 int n = XmlCharRefNumber(enc, entityTextPtr);
6909 if (n < 0) {
6910 if (enc == parser->m_encoding)
6911 parser->m_eventPtr = entityTextPtr;
6912 result = XML_ERROR_BAD_CHAR_REF;
6913 goto endEntityValue;
6914 }
6915 n = XmlEncode(n, (ICHAR *)buf);
6916 /* The XmlEncode() functions can never return 0 here. That
6917 * error return happens if the code point passed in is either
6918 * negative or greater than or equal to 0x110000. The
6919 * XmlCharRefNumber() functions will all return a number
6920 * strictly less than 0x110000 or a negative value if an error
6921 * occurred. The negative value is intercepted above, so
6922 * XmlEncode() is never passed a value it might return an
6923 * error for.
6924 */
6925 for (i = 0; i < n; i++) {
6926 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6927 result = XML_ERROR_NO_MEMORY;
6928 goto endEntityValue;
6929 }
6930 *(pool->ptr)++ = buf[i];
6931 }
6932 } break;
6933 case XML_TOK_PARTIAL:
6934 if (enc == parser->m_encoding)
6935 parser->m_eventPtr = entityTextPtr;
6936 result = XML_ERROR_INVALID_TOKEN;
6937 goto endEntityValue;
6938 case XML_TOK_INVALID:
6939 if (enc == parser->m_encoding)
6940 parser->m_eventPtr = next;
6941 result = XML_ERROR_INVALID_TOKEN;
6942 goto endEntityValue;
6943 default:
6944 /* This default case should be unnecessary -- all the tokens
6945 * that XmlEntityValueTok() can return have their own explicit
6946 * cases -- but should be retained for safety. We do however
6947 * exclude it from the coverage statistics.
6948 *
6949 * LCOV_EXCL_START
6950 */
6951 if (enc == parser->m_encoding)
6952 parser->m_eventPtr = entityTextPtr;
6953 result = XML_ERROR_UNEXPECTED_STATE;
6954 goto endEntityValue;
6955 /* LCOV_EXCL_STOP */
6956 }
6957 entityTextPtr = next;
6958 }
6959 endEntityValue:
6960 # ifdef XML_DTD
6961 parser->m_prologState.inEntityValue = oldInEntityValue;
6962 # endif /* XML_DTD */
6963 // If 'nextPtr' is given, it should be updated during the processing
6964 if (nextPtr != NULL) {
6965 *nextPtr = next;
6966 }
6967 return result;
6968 }
6969
6970 static enum XML_Error
6971 callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6972 const char *entityTextPtr, const char *entityTextEnd,
6973 enum XML_Account account) {
6974 const char *next = entityTextPtr;
6975 enum XML_Error result = XML_ERROR_NONE;
6976 while (1) {
6977 if (! parser->m_openValueEntities) {
6978 result
6979 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6980 } else {
6981 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6982 if (! openEntity)
6983 return XML_ERROR_UNEXPECTED_STATE;
6984
6985 ENTITY *const entity = openEntity->entity;
6986 const char *const textStart
6987 = ((const char *)entity->textPtr) + entity->processed;
6988 const char *const textEnd
6989 = (const char *)(entity->textPtr + entity->textLen);
6990 /* Set a safe default value in case 'next' does not get set */
6991 const char *nextInEntity = textStart;
6992 if (entity->hasMore) {
6993 result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6994 textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6995 &nextInEntity);
6996 if (result != XML_ERROR_NONE)
6997 break;
6998 // Check if entity is complete, if not, mark down how much of it is
6999 // processed. A XML_SUSPENDED check here is not required as
7000 // appendAttributeValue will never suspend the parser.
7001 if (textEnd != nextInEntity) {
7002 entity->processed
7003 = (int)(nextInEntity - (const char *)entity->textPtr);
7004 continue;
7005 }
7006
7007 // Entity is complete. We cannot close it here since we need to first
7008 // process its possible inner entities (which are added to the
7009 // m_openValueEntities during storeEntityValue)
7010 entity->hasMore = XML_FALSE;
7011 continue;
7012 } // End of entity processing, "if" block skips the rest
7013
7014 // Remove fully processed openEntity from open entity list.
7015 # if XML_GE == 1
7016 entityTrackingOnClose(parser, entity, __LINE__);
7017 # endif
7018 // openEntity is m_openValueEntities' head, since we set it at the
7019 // start of this function and because we skipped storeEntityValue call
7020 // with hasMore set to false. This means we can directly remove the head
7021 // of m_openValueEntities
7022 assert(parser->m_openValueEntities == openEntity);
7023 entity->open = XML_FALSE;
7024 parser->m_openValueEntities = parser->m_openValueEntities->next;
7025
7026 /* put openEntity back in list of free instances */
7027 openEntity->next = parser->m_freeValueEntities;
7028 parser->m_freeValueEntities = openEntity;
7029 }
7030
7031 // Break if an error occurred or there is nothing left to process
7032 if (result
7033 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
7034 break;
7035 }
7036 }
7037
7038 return result;
7039 }
7040
7041 #else /* XML_GE == 0 */
7042
7043 static enum XML_Error
7044 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
7045 // This will store "&entity123;" in entity->textPtr
7046 // to end up as "&entity123;" in the handler.
7047 const char *const entity_start = "&";
7048 const char *const entity_end = ";";
7049
7050 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
7051 if (! poolAppendString(pool, entity_start)
7052 || ! poolAppendString(pool, entity->name)
7053 || ! poolAppendString(pool, entity_end)) {
7054 poolDiscard(pool);
7055 return XML_ERROR_NO_MEMORY;
7056 }
7057
7058 entity->textPtr = poolStart(pool);
7059 entity->textLen = (int)(poolLength(pool));
7060 poolFinish(pool);
7061
7062 return XML_ERROR_NONE;
7063 }
7064
7065 #endif /* XML_GE == 0 */
7066
7067 static void FASTCALL
7068 normalizeLines(XML_Char *s) {
7069 XML_Char *p;
7070 for (;; s++) {
7071 if (*s == XML_T('\0'))
7072 return;
7073 if (*s == 0xD)
7074 break;
7075 }
7076 p = s;
7077 do {
7078 if (*s == 0xD) {
7079 *p++ = 0xA;
7080 if (*++s == 0xA)
7081 s++;
7082 } else
7083 *p++ = *s++;
7084 } while (*s);
7085 *p = XML_T('\0');
7086 }
7087
7088 static int
7089 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
7090 const char *start, const char *end) {
7091 const XML_Char *target;
7092 XML_Char *data;
7093 const char *tem;
7094 if (! parser->m_processingInstructionHandler) {
7095 if (parser->m_defaultHandler)
7096 reportDefault(parser, enc, start, end);
7097 return 1;
7098 }
7099 start += enc->minBytesPerChar * 2;
7100 tem = start + XmlNameLength(enc, start);
7101 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
7102 if (! target)
7103 return 0;
7104 poolFinish(&parser->m_tempPool);
7105 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
7106 end - enc->minBytesPerChar * 2);
7107 if (! data)
7108 return 0;
7109 normalizeLines(data);
7110 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
7111 poolClear(&parser->m_tempPool);
7112 return 1;
7113 }
7114
7115 static int
7116 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
7117 const char *end) {
7118 XML_Char *data;
7119 if (! parser->m_commentHandler) {
7120 if (parser->m_defaultHandler)
7121 reportDefault(parser, enc, start, end);
7122 return 1;
7123 }
7124 data = poolStoreString(&parser->m_tempPool, enc,
7125 start + enc->minBytesPerChar * 4,
7126 end - enc->minBytesPerChar * 3);
7127 if (! data)
7128 return 0;
7129 normalizeLines(data);
7130 parser->m_commentHandler(parser->m_handlerArg, data);
7131 poolClear(&parser->m_tempPool);
7132 return 1;
7133 }
7134
7135 static void
7136 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
7137 const char *end) {
7138 if (MUST_CONVERT(enc, s)) {
7139 enum XML_Convert_Result convert_res;
7140 const char **eventPP;
7141 const char **eventEndPP;
7142 if (enc == parser->m_encoding) {
7143 eventPP = &parser->m_eventPtr;
7144 eventEndPP = &parser->m_eventEndPtr;
7145 } else {
7146 /* To get here, two things must be true; the parser must be
7147 * using a character encoding that is not the same as the
7148 * encoding passed in, and the encoding passed in must need
7149 * conversion to the internal format (UTF-8 unless XML_UNICODE
7150 * is defined). The only occasions on which the encoding passed
7151 * in is not the same as the parser's encoding are when it is
7152 * the internal encoding (e.g. a previously defined parameter
7153 * entity, already converted to internal format). This by
7154 * definition doesn't need conversion, so the whole branch never
7155 * gets executed.
7156 *
7157 * For safety's sake we don't delete these lines and merely
7158 * exclude them from coverage statistics.
7159 *
7160 * LCOV_EXCL_START
7161 */
7162 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
7163 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
7164 /* LCOV_EXCL_STOP */
7165 }
7166 do {
7167 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
7168 convert_res
7169 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
7170 *eventEndPP = s;
7171 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
7172 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
7173 *eventPP = s;
7174 } while ((convert_res != XML_CONVERT_COMPLETED)
7175 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
7176 } else
7177 parser->m_defaultHandler(
7178 parser->m_handlerArg, (const XML_Char *)s,
7179 (int)((const XML_Char *)end - (const XML_Char *)s));
7180 }
7181
7182 static int
7183 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
7184 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
7185 DEFAULT_ATTRIBUTE *att;
7186 if (value || isId) {
7187 /* The handling of default attributes gets messed up if we have
7188 a default which duplicates a non-default. */
7189 int i;
7190 for (i = 0; i < type->nDefaultAtts; i++)
7191 if (attId == type->defaultAtts[i].id)
7192 return 1;
7193 if (isId && ! type->idAtt && ! attId->xmlns)
7194 type->idAtt = attId;
7195 }
7196 if (type->nDefaultAtts == type->allocDefaultAtts) {
7197 if (type->allocDefaultAtts == 0) {
7198 type->allocDefaultAtts = 8;
7199 type->defaultAtts
7200 = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7201 if (! type->defaultAtts) {
7202 type->allocDefaultAtts = 0;
7203 return 0;
7204 }
7205 } else {
7206 DEFAULT_ATTRIBUTE *temp;
7207
7208 /* Detect and prevent integer overflow */
7209 if (type->allocDefaultAtts > INT_MAX / 2) {
7210 return 0;
7211 }
7212
7213 int count = type->allocDefaultAtts * 2;
7214
7215 /* Detect and prevent integer overflow.
7216 * The preprocessor guard addresses the "always false" warning
7217 * from -Wtype-limits on platforms where
7218 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7219 #if UINT_MAX >= SIZE_MAX
7220 if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) {
7221 return 0;
7222 }
7223 #endif
7224
7225 temp = REALLOC(parser, type->defaultAtts,
7226 (count * sizeof(DEFAULT_ATTRIBUTE)));
7227 if (temp == NULL)
7228 return 0;
7229 type->allocDefaultAtts = count;
7230 type->defaultAtts = temp;
7231 }
7232 }
7233 att = type->defaultAtts + type->nDefaultAtts;
7234 att->id = attId;
7235 att->value = value;
7236 att->isCdata = isCdata;
7237 if (! isCdata)
7238 attId->maybeTokenized = XML_TRUE;
7239 type->nDefaultAtts += 1;
7240 return 1;
7241 }
7242
7243 static int
7244 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
7245 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7246 const XML_Char *name;
7247 for (name = elementType->name; *name; name++) {
7248 if (*name == XML_T(ASCII_COLON)) {
7249 PREFIX *prefix;
7250 const XML_Char *s;
7251 for (s = elementType->name; s != name; s++) {
7252 if (! poolAppendChar(&dtd->pool, *s))
7253 return 0;
7254 }
7255 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7256 return 0;
7257 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
7258 sizeof(PREFIX));
7259 if (! prefix)
7260 return 0;
7261 if (prefix->name == poolStart(&dtd->pool))
7262 poolFinish(&dtd->pool);
7263 else
7264 poolDiscard(&dtd->pool);
7265 elementType->prefix = prefix;
7266 break;
7267 }
7268 }
7269 return 1;
7270 }
7271
7272 static ATTRIBUTE_ID *
7273 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
7274 const char *end) {
7275 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7276 ATTRIBUTE_ID *id;
7277 const XML_Char *name;
7278 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7279 return NULL;
7280 name = poolStoreString(&dtd->pool, enc, start, end);
7281 if (! name)
7282 return NULL;
7283 /* skip quotation mark - its storage will be reused (like in name[-1]) */
7284 ++name;
7285 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
7286 sizeof(ATTRIBUTE_ID));
7287 if (! id)
7288 return NULL;
7289 if (id->name != name)
7290 poolDiscard(&dtd->pool);
7291 else {
7292 poolFinish(&dtd->pool);
7293 if (! parser->m_ns)
7294 ;
7295 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
7296 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
7297 && name[4] == XML_T(ASCII_s)
7298 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
7299 if (name[5] == XML_T('\0'))
7300 id->prefix = &dtd->defaultPrefix;
7301 else
7302 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
7303 sizeof(PREFIX));
7304 id->xmlns = XML_TRUE;
7305 } else {
7306 int i;
7307 for (i = 0; name[i]; i++) {
7308 /* attributes without prefix are *not* in the default namespace */
7309 if (name[i] == XML_T(ASCII_COLON)) {
7310 int j;
7311 for (j = 0; j < i; j++) {
7312 if (! poolAppendChar(&dtd->pool, name[j]))
7313 return NULL;
7314 }
7315 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7316 return NULL;
7317 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
7318 poolStart(&dtd->pool), sizeof(PREFIX));
7319 if (! id->prefix)
7320 return NULL;
7321 if (id->prefix->name == poolStart(&dtd->pool))
7322 poolFinish(&dtd->pool);
7323 else
7324 poolDiscard(&dtd->pool);
7325 break;
7326 }
7327 }
7328 }
7329 }
7330 return id;
7331 }
7332
7333 #define CONTEXT_SEP XML_T(ASCII_FF)
7334
7335 static const XML_Char *
7336 getContext(XML_Parser parser) {
7337 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7338 HASH_TABLE_ITER iter;
7339 XML_Bool needSep = XML_FALSE;
7340
7341 if (dtd->defaultPrefix.binding) {
7342 int i;
7343 int len;
7344 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7345 return NULL;
7346 len = dtd->defaultPrefix.binding->uriLen;
7347 if (parser->m_namespaceSeparator)
7348 len--;
7349 for (i = 0; i < len; i++) {
7350 if (! poolAppendChar(&parser->m_tempPool,
7351 dtd->defaultPrefix.binding->uri[i])) {
7352 /* Because of memory caching, I don't believe this line can be
7353 * executed.
7354 *
7355 * This is part of a loop copying the default prefix binding
7356 * URI into the parser's temporary string pool. Previously,
7357 * that URI was copied into the same string pool, with a
7358 * terminating NUL character, as part of setContext(). When
7359 * the pool was cleared, that leaves a block definitely big
7360 * enough to hold the URI on the free block list of the pool.
7361 * The URI copy in getContext() therefore cannot run out of
7362 * memory.
7363 *
7364 * If the pool is used between the setContext() and
7365 * getContext() calls, the worst it can do is leave a bigger
7366 * block on the front of the free list. Given that this is
7367 * all somewhat inobvious and program logic can be changed, we
7368 * don't delete the line but we do exclude it from the test
7369 * coverage statistics.
7370 */
7371 return NULL; /* LCOV_EXCL_LINE */
7372 }
7373 }
7374 needSep = XML_TRUE;
7375 }
7376
7377 hashTableIterInit(&iter, &(dtd->prefixes));
7378 for (;;) {
7379 int i;
7380 int len;
7381 const XML_Char *s;
7382 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
7383 if (! prefix)
7384 break;
7385 if (! prefix->binding) {
7386 /* This test appears to be (justifiable) paranoia. There does
7387 * not seem to be a way of injecting a prefix without a binding
7388 * that doesn't get errored long before this function is called.
7389 * The test should remain for safety's sake, so we instead
7390 * exclude the following line from the coverage statistics.
7391 */
7392 continue; /* LCOV_EXCL_LINE */
7393 }
7394 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7395 return NULL;
7396 for (s = prefix->name; *s; s++)
7397 if (! poolAppendChar(&parser->m_tempPool, *s))
7398 return NULL;
7399 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7400 return NULL;
7401 len = prefix->binding->uriLen;
7402 if (parser->m_namespaceSeparator)
7403 len--;
7404 for (i = 0; i < len; i++)
7405 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7406 return NULL;
7407 needSep = XML_TRUE;
7408 }
7409
7410 hashTableIterInit(&iter, &(dtd->generalEntities));
7411 for (;;) {
7412 const XML_Char *s;
7413 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7414 if (! e)
7415 break;
7416 if (! e->open)
7417 continue;
7418 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7419 return NULL;
7420 for (s = e->name; *s; s++)
7421 if (! poolAppendChar(&parser->m_tempPool, *s))
7422 return 0;
7423 needSep = XML_TRUE;
7424 }
7425
7426 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7427 return NULL;
7428 return parser->m_tempPool.start;
7429 }
7430
7431 static XML_Bool
7432 setContext(XML_Parser parser, const XML_Char *context) {
7433 if (context == NULL) {
7434 return XML_FALSE;
7435 }
7436
7437 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7438 const XML_Char *s = context;
7439
7440 while (*context != XML_T('\0')) {
7441 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7442 ENTITY *e;
7443 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7444 return XML_FALSE;
7445 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7446 poolStart(&parser->m_tempPool), 0);
7447 if (e)
7448 e->open = XML_TRUE;
7449 if (*s != XML_T('\0'))
7450 s++;
7451 context = s;
7452 poolDiscard(&parser->m_tempPool);
7453 } else if (*s == XML_T(ASCII_EQUALS)) {
7454 PREFIX *prefix;
7455 if (poolLength(&parser->m_tempPool) == 0)
7456 prefix = &dtd->defaultPrefix;
7457 else {
7458 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7459 return XML_FALSE;
7460 const XML_Char *const prefixName = poolCopyStringNoFinish(
7461 &dtd->pool, poolStart(&parser->m_tempPool));
7462 if (! prefixName) {
7463 return XML_FALSE;
7464 }
7465
7466 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, prefixName,
7467 sizeof(PREFIX));
7468
7469 const bool prefixNameUsed = prefix && prefix->name == prefixName;
7470 if (prefixNameUsed)
7471 poolFinish(&dtd->pool);
7472 else
7473 poolDiscard(&dtd->pool);
7474
7475 if (! prefix)
7476 return XML_FALSE;
7477
7478 poolDiscard(&parser->m_tempPool);
7479 }
7480 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7481 context++)
7482 if (! poolAppendChar(&parser->m_tempPool, *context))
7483 return XML_FALSE;
7484 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7485 return XML_FALSE;
7486 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7487 &parser->m_inheritedBindings)
7488 != XML_ERROR_NONE)
7489 return XML_FALSE;
7490 poolDiscard(&parser->m_tempPool);
7491 if (*context != XML_T('\0'))
7492 ++context;
7493 s = context;
7494 } else {
7495 if (! poolAppendChar(&parser->m_tempPool, *s))
7496 return XML_FALSE;
7497 s++;
7498 }
7499 }
7500 return XML_TRUE;
7501 }
7502
7503 static void FASTCALL
7504 normalizePublicId(XML_Char *publicId) {
7505 XML_Char *p = publicId;
7506 XML_Char *s;
7507 for (s = publicId; *s; s++) {
7508 switch (*s) {
7509 case 0x20:
7510 case 0xD:
7511 case 0xA:
7512 if (p != publicId && p[-1] != 0x20)
7513 *p++ = 0x20;
7514 break;
7515 default:
7516 *p++ = *s;
7517 }
7518 }
7519 if (p != publicId && p[-1] == 0x20)
7520 --p;
7521 *p = XML_T('\0');
7522 }
7523
7524 static DTD *
7525 dtdCreate(XML_Parser parser) {
7526 DTD *p = MALLOC(parser, sizeof(DTD));
7527 if (p == NULL)
7528 return p;
7529 poolInit(&(p->pool), parser);
7530 poolInit(&(p->entityValuePool), parser);
7531 hashTableInit(&(p->generalEntities), parser);
7532 hashTableInit(&(p->elementTypes), parser);
7533 hashTableInit(&(p->attributeIds), parser);
7534 hashTableInit(&(p->prefixes), parser);
7535 #ifdef XML_DTD
7536 p->paramEntityRead = XML_FALSE;
7537 hashTableInit(&(p->paramEntities), parser);
7538 #endif /* XML_DTD */
7539 p->defaultPrefix.name = NULL;
7540 p->defaultPrefix.binding = NULL;
7541
7542 p->in_eldecl = XML_FALSE;
7543 p->scaffIndex = NULL;
7544 p->scaffold = NULL;
7545 p->scaffLevel = 0;
7546 p->scaffSize = 0;
7547 p->scaffCount = 0;
7548 p->contentStringLen = 0;
7549
7550 p->keepProcessing = XML_TRUE;
7551 p->hasParamEntityRefs = XML_FALSE;
7552 p->standalone = XML_FALSE;
7553 return p;
7554 }
7555
7556 static void
7557 dtdReset(DTD *p, XML_Parser parser) {
7558 HASH_TABLE_ITER iter;
7559 hashTableIterInit(&iter, &(p->elementTypes));
7560 for (;;) {
7561 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7562 if (! e)
7563 break;
7564 if (e->allocDefaultAtts != 0)
7565 FREE(parser, e->defaultAtts);
7566 }
7567 hashTableClear(&(p->generalEntities));
7568 #ifdef XML_DTD
7569 p->paramEntityRead = XML_FALSE;
7570 hashTableClear(&(p->paramEntities));
7571 #endif /* XML_DTD */
7572 hashTableClear(&(p->elementTypes));
7573 hashTableClear(&(p->attributeIds));
7574 hashTableClear(&(p->prefixes));
7575 poolClear(&(p->pool));
7576 poolClear(&(p->entityValuePool));
7577 p->defaultPrefix.name = NULL;
7578 p->defaultPrefix.binding = NULL;
7579
7580 p->in_eldecl = XML_FALSE;
7581
7582 FREE(parser, p->scaffIndex);
7583 p->scaffIndex = NULL;
7584 FREE(parser, p->scaffold);
7585 p->scaffold = NULL;
7586
7587 p->scaffLevel = 0;
7588 p->scaffSize = 0;
7589 p->scaffCount = 0;
7590 p->contentStringLen = 0;
7591
7592 p->keepProcessing = XML_TRUE;
7593 p->hasParamEntityRefs = XML_FALSE;
7594 p->standalone = XML_FALSE;
7595 }
7596
7597 static void
7598 dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) {
7599 HASH_TABLE_ITER iter;
7600 hashTableIterInit(&iter, &(p->elementTypes));
7601 for (;;) {
7602 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7603 if (! e)
7604 break;
7605 if (e->allocDefaultAtts != 0)
7606 FREE(parser, e->defaultAtts);
7607 }
7608 hashTableDestroy(&(p->generalEntities));
7609 #ifdef XML_DTD
7610 hashTableDestroy(&(p->paramEntities));
7611 #endif /* XML_DTD */
7612 hashTableDestroy(&(p->elementTypes));
7613 hashTableDestroy(&(p->attributeIds));
7614 hashTableDestroy(&(p->prefixes));
7615 poolDestroy(&(p->pool));
7616 poolDestroy(&(p->entityValuePool));
7617 if (isDocEntity) {
7618 FREE(parser, p->scaffIndex);
7619 FREE(parser, p->scaffold);
7620 }
7621 FREE(parser, p);
7622 }
7623
7624 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7625 The new DTD has already been initialized.
7626 */
7627 static int
7628 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7629 XML_Parser parser) {
7630 HASH_TABLE_ITER iter;
7631
7632 /* Copy the prefix table. */
7633
7634 hashTableIterInit(&iter, &(oldDtd->prefixes));
7635 for (;;) {
7636 const XML_Char *name;
7637 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7638 if (! oldP)
7639 break;
7640 name = poolCopyString(&(newDtd->pool), oldP->name);
7641 if (! name)
7642 return 0;
7643 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7644 return 0;
7645 }
7646
7647 hashTableIterInit(&iter, &(oldDtd->attributeIds));
7648
7649 /* Copy the attribute id table. */
7650
7651 for (;;) {
7652 ATTRIBUTE_ID *newA;
7653 const XML_Char *name;
7654 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7655
7656 if (! oldA)
7657 break;
7658 /* Remember to allocate the scratch byte before the name. */
7659 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7660 return 0;
7661 name = poolCopyString(&(newDtd->pool), oldA->name);
7662 if (! name)
7663 return 0;
7664 ++name;
7665 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7666 sizeof(ATTRIBUTE_ID));
7667 if (! newA)
7668 return 0;
7669 newA->maybeTokenized = oldA->maybeTokenized;
7670 if (oldA->prefix) {
7671 newA->xmlns = oldA->xmlns;
7672 if (oldA->prefix == &oldDtd->defaultPrefix)
7673 newA->prefix = &newDtd->defaultPrefix;
7674 else
7675 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7676 oldA->prefix->name, 0);
7677 }
7678 }
7679
7680 /* Copy the element type table. */
7681
7682 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7683
7684 for (;;) {
7685 int i;
7686 ELEMENT_TYPE *newE;
7687 const XML_Char *name;
7688 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7689 if (! oldE)
7690 break;
7691 name = poolCopyString(&(newDtd->pool), oldE->name);
7692 if (! name)
7693 return 0;
7694 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7695 sizeof(ELEMENT_TYPE));
7696 if (! newE)
7697 return 0;
7698 if (oldE->nDefaultAtts) {
7699 /* Detect and prevent integer overflow.
7700 * The preprocessor guard addresses the "always false" warning
7701 * from -Wtype-limits on platforms where
7702 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7703 #if UINT_MAX >= SIZE_MAX
7704 if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) {
7705 return 0;
7706 }
7707 #endif
7708 newE->defaultAtts
7709 = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7710 if (! newE->defaultAtts) {
7711 return 0;
7712 }
7713 }
7714 if (oldE->idAtt)
7715 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7716 oldE->idAtt->name, 0);
7717 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7718 if (oldE->prefix)
7719 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7720 oldE->prefix->name, 0);
7721 for (i = 0; i < newE->nDefaultAtts; i++) {
7722 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7723 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7724 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7725 if (oldE->defaultAtts[i].value) {
7726 newE->defaultAtts[i].value
7727 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7728 if (! newE->defaultAtts[i].value)
7729 return 0;
7730 } else
7731 newE->defaultAtts[i].value = NULL;
7732 }
7733 }
7734
7735 /* Copy the entity tables. */
7736 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7737 &(oldDtd->generalEntities)))
7738 return 0;
7739
7740 #ifdef XML_DTD
7741 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7742 &(oldDtd->paramEntities)))
7743 return 0;
7744 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7745 #endif /* XML_DTD */
7746
7747 newDtd->keepProcessing = oldDtd->keepProcessing;
7748 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7749 newDtd->standalone = oldDtd->standalone;
7750
7751 /* Don't want deep copying for scaffolding */
7752 newDtd->in_eldecl = oldDtd->in_eldecl;
7753 newDtd->scaffold = oldDtd->scaffold;
7754 newDtd->contentStringLen = oldDtd->contentStringLen;
7755 newDtd->scaffSize = oldDtd->scaffSize;
7756 newDtd->scaffLevel = oldDtd->scaffLevel;
7757 newDtd->scaffIndex = oldDtd->scaffIndex;
7758
7759 return 1;
7760 } /* End dtdCopy */
7761
7762 static int
7763 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7764 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7765 HASH_TABLE_ITER iter;
7766 const XML_Char *cachedOldBase = NULL;
7767 const XML_Char *cachedNewBase = NULL;
7768
7769 hashTableIterInit(&iter, oldTable);
7770
7771 for (;;) {
7772 ENTITY *newE;
7773 const XML_Char *name;
7774 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7775 if (! oldE)
7776 break;
7777 name = poolCopyString(newPool, oldE->name);
7778 if (! name)
7779 return 0;
7780 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7781 if (! newE)
7782 return 0;
7783 if (oldE->systemId) {
7784 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7785 if (! tem)
7786 return 0;
7787 newE->systemId = tem;
7788 if (oldE->base) {
7789 if (oldE->base == cachedOldBase)
7790 newE->base = cachedNewBase;
7791 else {
7792 cachedOldBase = oldE->base;
7793 tem = poolCopyString(newPool, cachedOldBase);
7794 if (! tem)
7795 return 0;
7796 cachedNewBase = newE->base = tem;
7797 }
7798 }
7799 if (oldE->publicId) {
7800 tem = poolCopyString(newPool, oldE->publicId);
7801 if (! tem)
7802 return 0;
7803 newE->publicId = tem;
7804 }
7805 } else {
7806 const XML_Char *tem
7807 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7808 if (! tem)
7809 return 0;
7810 newE->textPtr = tem;
7811 newE->textLen = oldE->textLen;
7812 }
7813 if (oldE->notation) {
7814 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7815 if (! tem)
7816 return 0;
7817 newE->notation = tem;
7818 }
7819 newE->is_param = oldE->is_param;
7820 newE->is_internal = oldE->is_internal;
7821 }
7822 return 1;
7823 }
7824
7825 #define INIT_POWER 6
7826
7827 static XML_Bool FASTCALL
7828 keyeq(KEY s1, KEY s2) {
7829 for (; *s1 == *s2; s1++, s2++)
7830 if (*s1 == 0)
7831 return XML_TRUE;
7832 return XML_FALSE;
7833 }
7834
7835 static size_t
7836 keylen(KEY s) {
7837 size_t len = 0;
7838 for (; *s; s++, len++)
7839 ;
7840 return len;
7841 }
7842
7843 static void
7844 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7845 key->k[0] = 0;
7846 key->k[1] = get_hash_secret_salt(parser);
7847 }
7848
7849 static unsigned long FASTCALL
7850 hash(XML_Parser parser, KEY s) {
7851 struct siphash state;
7852 struct sipkey key;
7853 (void)sip24_valid;
7854 copy_salt_to_sipkey(parser, &key);
7855 sip24_init(&state, &key);
7856 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7857 return (unsigned long)sip24_final(&state);
7858 }
7859
7860 static NAMED *
7861 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7862 size_t i;
7863 if (table->size == 0) {
7864 size_t tsize;
7865 if (! createSize)
7866 return NULL;
7867 table->power = INIT_POWER;
7868 /* table->size is a power of 2 */
7869 table->size = (size_t)1 << INIT_POWER;
7870 tsize = table->size * sizeof(NAMED *);
7871 table->v = MALLOC(table->parser, tsize);
7872 if (! table->v) {
7873 table->size = 0;
7874 return NULL;
7875 }
7876 memset(table->v, 0, tsize);
7877 i = hash(parser, name) & ((unsigned long)table->size - 1);
7878 } else {
7879 unsigned long h = hash(parser, name);
7880 unsigned long mask = (unsigned long)table->size - 1;
7881 unsigned char step = 0;
7882 i = h & mask;
7883 while (table->v[i]) {
7884 if (keyeq(name, table->v[i]->name))
7885 return table->v[i];
7886 if (! step)
7887 step = PROBE_STEP(h, mask, table->power);
7888 i < step ? (i += table->size - step) : (i -= step);
7889 }
7890 if (! createSize)
7891 return NULL;
7892
7893 /* check for overflow (table is half full) */
7894 if (table->used >> (table->power - 1)) {
7895 unsigned char newPower = table->power + 1;
7896
7897 /* Detect and prevent invalid shift */
7898 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7899 return NULL;
7900 }
7901
7902 size_t newSize = (size_t)1 << newPower;
7903 unsigned long newMask = (unsigned long)newSize - 1;
7904
7905 /* Detect and prevent integer overflow */
7906 if (newSize > SIZE_MAX / sizeof(NAMED *)) {
7907 return NULL;
7908 }
7909
7910 size_t tsize = newSize * sizeof(NAMED *);
7911 NAMED **newV = MALLOC(table->parser, tsize);
7912 if (! newV)
7913 return NULL;
7914 memset(newV, 0, tsize);
7915 for (i = 0; i < table->size; i++)
7916 if (table->v[i]) {
7917 unsigned long newHash = hash(parser, table->v[i]->name);
7918 size_t j = newHash & newMask;
7919 step = 0;
7920 while (newV[j]) {
7921 if (! step)
7922 step = PROBE_STEP(newHash, newMask, newPower);
7923 j < step ? (j += newSize - step) : (j -= step);
7924 }
7925 newV[j] = table->v[i];
7926 }
7927 FREE(table->parser, table->v);
7928 table->v = newV;
7929 table->power = newPower;
7930 table->size = newSize;
7931 i = h & newMask;
7932 step = 0;
7933 while (table->v[i]) {
7934 if (! step)
7935 step = PROBE_STEP(h, newMask, newPower);
7936 i < step ? (i += newSize - step) : (i -= step);
7937 }
7938 }
7939 }
7940 table->v[i] = MALLOC(table->parser, createSize);
7941 if (! table->v[i])
7942 return NULL;
7943 memset(table->v[i], 0, createSize);
7944 table->v[i]->name = name;
7945 (table->used)++;
7946 return table->v[i];
7947 }
7948
7949 static void FASTCALL
7950 hashTableClear(HASH_TABLE *table) {
7951 size_t i;
7952 for (i = 0; i < table->size; i++) {
7953 FREE(table->parser, table->v[i]);
7954 table->v[i] = NULL;
7955 }
7956 table->used = 0;
7957 }
7958
7959 static void FASTCALL
7960 hashTableDestroy(HASH_TABLE *table) {
7961 size_t i;
7962 for (i = 0; i < table->size; i++)
7963 FREE(table->parser, table->v[i]);
7964 FREE(table->parser, table->v);
7965 }
7966
7967 static void FASTCALL
7968 hashTableInit(HASH_TABLE *p, XML_Parser parser) {
7969 p->power = 0;
7970 p->size = 0;
7971 p->used = 0;
7972 p->v = NULL;
7973 p->parser = parser;
7974 }
7975
7976 static void FASTCALL
7977 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7978 iter->p = table->v;
7979 iter->end = iter->p ? iter->p + table->size : NULL;
7980 }
7981
7982 static NAMED *FASTCALL
7983 hashTableIterNext(HASH_TABLE_ITER *iter) {
7984 while (iter->p != iter->end) {
7985 NAMED *tem = *(iter->p)++;
7986 if (tem)
7987 return tem;
7988 }
7989 return NULL;
7990 }
7991
7992 static void FASTCALL
7993 poolInit(STRING_POOL *pool, XML_Parser parser) {
7994 pool->blocks = NULL;
7995 pool->freeBlocks = NULL;
7996 pool->start = NULL;
7997 pool->ptr = NULL;
7998 pool->end = NULL;
7999 pool->parser = parser;
8000 }
8001
8002 static void FASTCALL
8003 poolClear(STRING_POOL *pool) {
8004 if (! pool->freeBlocks)
8005 pool->freeBlocks = pool->blocks;
8006 else {
8007 BLOCK *p = pool->blocks;
8008 while (p) {
8009 BLOCK *tem = p->next;
8010 p->next = pool->freeBlocks;
8011 pool->freeBlocks = p;
8012 p = tem;
8013 }
8014 }
8015 pool->blocks = NULL;
8016 pool->start = NULL;
8017 pool->ptr = NULL;
8018 pool->end = NULL;
8019 }
8020
8021 static void FASTCALL
8022 poolDestroy(STRING_POOL *pool) {
8023 BLOCK *p = pool->blocks;
8024 while (p) {
8025 BLOCK *tem = p->next;
8026 FREE(pool->parser, p);
8027 p = tem;
8028 }
8029 p = pool->freeBlocks;
8030 while (p) {
8031 BLOCK *tem = p->next;
8032 FREE(pool->parser, p);
8033 p = tem;
8034 }
8035 }
8036
8037 static XML_Char *
8038 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
8039 const char *end) {
8040 if (! pool->ptr && ! poolGrow(pool))
8041 return NULL;
8042 for (;;) {
8043 const enum XML_Convert_Result convert_res = XmlConvert(
8044 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
8045 if ((convert_res == XML_CONVERT_COMPLETED)
8046 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
8047 break;
8048 if (! poolGrow(pool))
8049 return NULL;
8050 }
8051 return pool->start;
8052 }
8053
8054 static const XML_Char *FASTCALL
8055 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
8056 do {
8057 if (! poolAppendChar(pool, *s))
8058 return NULL;
8059 } while (*s++);
8060 s = pool->start;
8061 poolFinish(pool);
8062 return s;
8063 }
8064
8065 // A version of `poolCopyString` that does not call `poolFinish`
8066 // and reverts any partial advancement upon failure.
8067 static const XML_Char *FASTCALL
8068 poolCopyStringNoFinish(STRING_POOL *pool, const XML_Char *s) {
8069 const XML_Char *const original = s;
8070 do {
8071 if (! poolAppendChar(pool, *s)) {
8072 // Revert any previously successful advancement
8073 const ptrdiff_t advancedBy = s - original;
8074 if (advancedBy > 0)
8075 pool->ptr -= advancedBy;
8076 return NULL;
8077 }
8078 } while (*s++);
8079 return pool->start;
8080 }
8081
8082 static const XML_Char *
8083 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
8084 if (! pool->ptr && ! poolGrow(pool)) {
8085 /* The following line is unreachable given the current usage of
8086 * poolCopyStringN(). Currently it is called from exactly one
8087 * place to copy the text of a simple general entity. By that
8088 * point, the name of the entity is already stored in the pool, so
8089 * pool->ptr cannot be NULL.
8090 *
8091 * If poolCopyStringN() is used elsewhere as it well might be,
8092 * this line may well become executable again. Regardless, this
8093 * sort of check shouldn't be removed lightly, so we just exclude
8094 * it from the coverage statistics.
8095 */
8096 return NULL; /* LCOV_EXCL_LINE */
8097 }
8098 for (; n > 0; --n, s++) {
8099 if (! poolAppendChar(pool, *s))
8100 return NULL;
8101 }
8102 s = pool->start;
8103 poolFinish(pool);
8104 return s;
8105 }
8106
8107 static const XML_Char *FASTCALL
8108 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
8109 while (*s) {
8110 if (! poolAppendChar(pool, *s))
8111 return NULL;
8112 s++;
8113 }
8114 return pool->start;
8115 }
8116
8117 static XML_Char *
8118 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
8119 const char *end) {
8120 if (! poolAppend(pool, enc, ptr, end))
8121 return NULL;
8122 if (pool->ptr == pool->end && ! poolGrow(pool))
8123 return NULL;
8124 *(pool->ptr)++ = 0;
8125 return pool->start;
8126 }
8127
8128 static size_t
8129 poolBytesToAllocateFor(int blockSize) {
8130 /* Unprotected math would be:
8131 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
8132 **
8133 ** Detect overflow, avoiding _signed_ overflow undefined behavior
8134 ** For a + b * c we check b * c in isolation first, so that addition of a
8135 ** on top has no chance of making us accept a small non-negative number
8136 */
8137 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
8138
8139 if (blockSize <= 0)
8140 return 0;
8141
8142 if (blockSize > (int)(INT_MAX / stretch))
8143 return 0;
8144
8145 {
8146 const int stretchedBlockSize = blockSize * (int)stretch;
8147 const int bytesToAllocate
8148 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
8149 if (bytesToAllocate < 0)
8150 return 0;
8151
8152 return (size_t)bytesToAllocate;
8153 }
8154 }
8155
8156 static XML_Bool FASTCALL
8157 poolGrow(STRING_POOL *pool) {
8158 if (pool->freeBlocks) {
8159 if (pool->start == NULL) {
8160 pool->blocks = pool->freeBlocks;
8161 pool->freeBlocks = pool->freeBlocks->next;
8162 pool->blocks->next = NULL;
8163 pool->start = pool->blocks->s;
8164 pool->end = pool->start + pool->blocks->size;
8165 pool->ptr = pool->start;
8166 return XML_TRUE;
8167 }
8168 if (pool->end - pool->start < pool->freeBlocks->size) {
8169 BLOCK *tem = pool->freeBlocks->next;
8170 pool->freeBlocks->next = pool->blocks;
8171 pool->blocks = pool->freeBlocks;
8172 pool->freeBlocks = tem;
8173 memcpy(pool->blocks->s, pool->start,
8174 (pool->end - pool->start) * sizeof(XML_Char));
8175 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
8176 pool->start = pool->blocks->s;
8177 pool->end = pool->start + pool->blocks->size;
8178 return XML_TRUE;
8179 }
8180 }
8181 if (pool->blocks && pool->start == pool->blocks->s) {
8182 BLOCK *temp;
8183 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
8184 size_t bytesToAllocate;
8185
8186 /* NOTE: Needs to be calculated prior to calling `realloc`
8187 to avoid dangling pointers: */
8188 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
8189
8190 if (blockSize < 0) {
8191 /* This condition traps a situation where either more than
8192 * INT_MAX/2 bytes have already been allocated. This isn't
8193 * readily testable, since it is unlikely that an average
8194 * machine will have that much memory, so we exclude it from the
8195 * coverage statistics.
8196 */
8197 return XML_FALSE; /* LCOV_EXCL_LINE */
8198 }
8199
8200 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8201 if (bytesToAllocate == 0)
8202 return XML_FALSE;
8203
8204 temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate);
8205 if (temp == NULL)
8206 return XML_FALSE;
8207 pool->blocks = temp;
8208 pool->blocks->size = blockSize;
8209 pool->ptr = pool->blocks->s + offsetInsideBlock;
8210 pool->start = pool->blocks->s;
8211 pool->end = pool->start + blockSize;
8212 } else {
8213 BLOCK *tem;
8214 int blockSize = (int)(pool->end - pool->start);
8215 size_t bytesToAllocate;
8216
8217 if (blockSize < 0) {
8218 /* This condition traps a situation where either more than
8219 * INT_MAX bytes have already been allocated (which is prevented
8220 * by various pieces of program logic, not least this one, never
8221 * mind the unlikelihood of actually having that much memory) or
8222 * the pool control fields have been corrupted (which could
8223 * conceivably happen in an extremely buggy user handler
8224 * function). Either way it isn't readily testable, so we
8225 * exclude it from the coverage statistics.
8226 */
8227 return XML_FALSE; /* LCOV_EXCL_LINE */
8228 }
8229
8230 if (blockSize < INIT_BLOCK_SIZE)
8231 blockSize = INIT_BLOCK_SIZE;
8232 else {
8233 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
8234 if ((int)((unsigned)blockSize * 2U) < 0) {
8235 return XML_FALSE;
8236 }
8237 blockSize *= 2;
8238 }
8239
8240 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8241 if (bytesToAllocate == 0)
8242 return XML_FALSE;
8243
8244 tem = MALLOC(pool->parser, bytesToAllocate);
8245 if (! tem)
8246 return XML_FALSE;
8247 tem->size = blockSize;
8248 tem->next = pool->blocks;
8249 pool->blocks = tem;
8250 if (pool->ptr != pool->start)
8251 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
8252 pool->ptr = tem->s + (pool->ptr - pool->start);
8253 pool->start = tem->s;
8254 pool->end = tem->s + blockSize;
8255 }
8256 return XML_TRUE;
8257 }
8258
8259 static int FASTCALL
8260 nextScaffoldPart(XML_Parser parser) {
8261 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8262 CONTENT_SCAFFOLD *me;
8263 int next;
8264
8265 if (! dtd->scaffIndex) {
8266 /* Detect and prevent integer overflow.
8267 * The preprocessor guard addresses the "always false" warning
8268 * from -Wtype-limits on platforms where
8269 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8270 #if UINT_MAX >= SIZE_MAX
8271 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) {
8272 return -1;
8273 }
8274 #endif
8275 dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int));
8276 if (! dtd->scaffIndex)
8277 return -1;
8278 dtd->scaffIndex[0] = 0;
8279 }
8280
8281 // Will casting to int be safe further down?
8282 if (dtd->scaffCount > INT_MAX) {
8283 return -1;
8284 }
8285
8286 if (dtd->scaffCount >= dtd->scaffSize) {
8287 CONTENT_SCAFFOLD *temp;
8288 if (dtd->scaffold) {
8289 /* Detect and prevent integer overflow */
8290 if (dtd->scaffSize > UINT_MAX / 2u) {
8291 return -1;
8292 }
8293 /* Detect and prevent integer overflow.
8294 * The preprocessor guard addresses the "always false" warning
8295 * from -Wtype-limits on platforms where
8296 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8297 #if UINT_MAX >= SIZE_MAX
8298 if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) {
8299 return -1;
8300 }
8301 #endif
8302
8303 temp = REALLOC(parser, dtd->scaffold,
8304 dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
8305 if (temp == NULL)
8306 return -1;
8307 dtd->scaffSize *= 2;
8308 } else {
8309 temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD));
8310 if (temp == NULL)
8311 return -1;
8312 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
8313 }
8314 dtd->scaffold = temp;
8315 }
8316 next = (int)dtd->scaffCount++;
8317 me = &dtd->scaffold[next];
8318 if (dtd->scaffLevel) {
8319 CONTENT_SCAFFOLD *parent
8320 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
8321 if (parent->lastchild) {
8322 dtd->scaffold[parent->lastchild].nextsib = next;
8323 }
8324 if (! parent->childcnt)
8325 parent->firstchild = next;
8326 parent->lastchild = next;
8327 parent->childcnt++;
8328 }
8329 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
8330 return next;
8331 }
8332
8333 static XML_Content *
8334 build_model(XML_Parser parser) {
8335 /* Function build_model transforms the existing parser->m_dtd->scaffold
8336 * array of CONTENT_SCAFFOLD tree nodes into a new array of
8337 * XML_Content tree nodes followed by a gapless list of zero-terminated
8338 * strings. */
8339 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8340 XML_Content *ret;
8341 XML_Char *str; /* the current string writing location */
8342
8343 /* Detect and prevent integer overflow.
8344 * The preprocessor guard addresses the "always false" warning
8345 * from -Wtype-limits on platforms where
8346 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8347 #if UINT_MAX >= SIZE_MAX
8348 if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) {
8349 return NULL;
8350 }
8351 if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) {
8352 return NULL;
8353 }
8354 #endif
8355 if (dtd->scaffCount * sizeof(XML_Content)
8356 > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) {
8357 return NULL;
8358 }
8359
8360 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
8361 + (dtd->contentStringLen * sizeof(XML_Char)));
8362
8363 // NOTE: We are avoiding MALLOC(..) here to so that
8364 // applications that are not using XML_FreeContentModel but plain
8365 // free(..) or .free_fcn() to free the content model's memory are safe.
8366 ret = parser->m_mem.malloc_fcn(allocsize);
8367 if (! ret)
8368 return NULL;
8369
8370 /* What follows is an iterative implementation (of what was previously done
8371 * recursively in a dedicated function called "build_node". The old recursive
8372 * build_node could be forced into stack exhaustion from input as small as a
8373 * few megabyte, and so that was a security issue. Hence, a function call
8374 * stack is avoided now by resolving recursion.)
8375 *
8376 * The iterative approach works as follows:
8377 *
8378 * - We have two writing pointers, both walking up the result array; one does
8379 * the work, the other creates "jobs" for its colleague to do, and leads
8380 * the way:
8381 *
8382 * - The faster one, pointer jobDest, always leads and writes "what job
8383 * to do" by the other, once they reach that place in the
8384 * array: leader "jobDest" stores the source node array index (relative
8385 * to array dtd->scaffold) in field "numchildren".
8386 *
8387 * - The slower one, pointer dest, looks at the value stored in the
8388 * "numchildren" field (which actually holds a source node array index
8389 * at that time) and puts the real data from dtd->scaffold in.
8390 *
8391 * - Before the loop starts, jobDest writes source array index 0
8392 * (where the root node is located) so that dest will have something to do
8393 * when it starts operation.
8394 *
8395 * - Whenever nodes with children are encountered, jobDest appends
8396 * them as new jobs, in order. As a result, tree node siblings are
8397 * adjacent in the resulting array, for example:
8398 *
8399 * [0] root, has two children
8400 * [1] first child of 0, has three children
8401 * [3] first child of 1, does not have children
8402 * [4] second child of 1, does not have children
8403 * [5] third child of 1, does not have children
8404 * [2] second child of 0, does not have children
8405 *
8406 * Or (the same data) presented in flat array view:
8407 *
8408 * [0] root, has two children
8409 *
8410 * [1] first child of 0, has three children
8411 * [2] second child of 0, does not have children
8412 *
8413 * [3] first child of 1, does not have children
8414 * [4] second child of 1, does not have children
8415 * [5] third child of 1, does not have children
8416 *
8417 * - The algorithm repeats until all target array indices have been processed.
8418 */
8419 XML_Content *dest = ret; /* tree node writing location, moves upwards */
8420 XML_Content *const destLimit = &ret[dtd->scaffCount];
8421 XML_Content *jobDest = ret; /* next free writing location in target array */
8422 str = (XML_Char *)&ret[dtd->scaffCount];
8423
8424 /* Add the starting job, the root node (index 0) of the source tree */
8425 (jobDest++)->numchildren = 0;
8426
8427 for (; dest < destLimit; dest++) {
8428 /* Retrieve source tree array index from job storage */
8429 const int src_node = (int)dest->numchildren;
8430
8431 /* Convert item */
8432 dest->type = dtd->scaffold[src_node].type;
8433 dest->quant = dtd->scaffold[src_node].quant;
8434 if (dest->type == XML_CTYPE_NAME) {
8435 const XML_Char *src;
8436 dest->name = str;
8437 src = dtd->scaffold[src_node].name;
8438 for (;;) {
8439 *str++ = *src;
8440 if (! *src)
8441 break;
8442 src++;
8443 }
8444 dest->numchildren = 0;
8445 dest->children = NULL;
8446 } else {
8447 unsigned int i;
8448 int cn;
8449 dest->name = NULL;
8450 dest->numchildren = dtd->scaffold[src_node].childcnt;
8451 dest->children = jobDest;
8452
8453 /* Append scaffold indices of children to array */
8454 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8455 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8456 (jobDest++)->numchildren = (unsigned int)cn;
8457 }
8458 }
8459
8460 return ret;
8461 }
8462
8463 static ELEMENT_TYPE *
8464 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8465 const char *end) {
8466 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8467 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8468 ELEMENT_TYPE *ret;
8469
8470 if (! name)
8471 return NULL;
8472 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8473 sizeof(ELEMENT_TYPE));
8474 if (! ret)
8475 return NULL;
8476 if (ret->name != name)
8477 poolDiscard(&dtd->pool);
8478 else {
8479 poolFinish(&dtd->pool);
8480 if (! setElementTypePrefix(parser, ret))
8481 return NULL;
8482 }
8483 return ret;
8484 }
8485
8486 static XML_Char *
8487 copyString(const XML_Char *s, XML_Parser parser) {
8488 size_t charsRequired = 0;
8489 XML_Char *result;
8490
8491 /* First determine how long the string is */
8492 while (s[charsRequired] != 0) {
8493 charsRequired++;
8494 }
8495 /* Include the terminator */
8496 charsRequired++;
8497
8498 /* Now allocate space for the copy */
8499 result = MALLOC(parser, charsRequired * sizeof(XML_Char));
8500 if (result == NULL)
8501 return NULL;
8502 /* Copy the original into place */
8503 memcpy(result, s, charsRequired * sizeof(XML_Char));
8504 return result;
8505 }
8506
8507 #if XML_GE == 1
8508
8509 static float
8510 accountingGetCurrentAmplification(XML_Parser rootParser) {
8511 // 1.........1.........12 => 22
8512 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8513 const XmlBigCount countBytesOutput
8514 = rootParser->m_accounting.countBytesDirect
8515 + rootParser->m_accounting.countBytesIndirect;
8516 const float amplificationFactor
8517 = rootParser->m_accounting.countBytesDirect
8518 ? ((float)countBytesOutput
8519 / (float)(rootParser->m_accounting.countBytesDirect))
8520 : ((float)(lenOfShortestInclude
8521 + rootParser->m_accounting.countBytesIndirect)
8522 / (float)lenOfShortestInclude);
8523 assert(! rootParser->m_parentParser);
8524 return amplificationFactor;
8525 }
8526
8527 static void
8528 accountingReportStats(XML_Parser originParser, const char *epilog) {
8529 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8530 assert(! rootParser->m_parentParser);
8531
8532 if (rootParser->m_accounting.debugLevel == 0u) {
8533 return;
8534 }
8535
8536 const float amplificationFactor
8537 = accountingGetCurrentAmplification(rootParser);
8538 fprintf(stderr,
8539 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8540 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8541 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
8542 rootParser->m_accounting.countBytesIndirect,
8543 (double)amplificationFactor, epilog);
8544 }
8545
8546 static void
8547 accountingOnAbort(XML_Parser originParser) {
8548 accountingReportStats(originParser, " ABORTING\n");
8549 }
8550
8551 static void
8552 accountingReportDiff(XML_Parser rootParser,
8553 unsigned int levelsAwayFromRootParser, const char *before,
8554 const char *after, ptrdiff_t bytesMore, int source_line,
8555 enum XML_Account account) {
8556 assert(! rootParser->m_parentParser);
8557
8558 fprintf(stderr,
8559 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8560 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8561 levelsAwayFromRootParser, source_line, 10, "");
8562
8563 const char ellipis[] = "[..]";
8564 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8565 const unsigned int contextLength = 10;
8566
8567 /* Note: Performance is of no concern here */
8568 const char *walker = before;
8569 if ((rootParser->m_accounting.debugLevel >= 3u)
8570 || (after - before)
8571 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8572 for (; walker < after; walker++) {
8573 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8574 }
8575 } else {
8576 for (; walker < before + contextLength; walker++) {
8577 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8578 }
8579 fprintf(stderr, ellipis);
8580 walker = after - contextLength;
8581 for (; walker < after; walker++) {
8582 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8583 }
8584 }
8585 fprintf(stderr, "\"\n");
8586 }
8587
8588 static XML_Bool
8589 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8590 const char *after, int source_line,
8591 enum XML_Account account) {
8592 /* Note: We need to check the token type *first* to be sure that
8593 * we can even access variable <after>, safely.
8594 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8595 switch (tok) {
8596 case XML_TOK_INVALID:
8597 case XML_TOK_PARTIAL:
8598 case XML_TOK_PARTIAL_CHAR:
8599 case XML_TOK_NONE:
8600 return XML_TRUE;
8601 }
8602
8603 if (account == XML_ACCOUNT_NONE)
8604 return XML_TRUE; /* because these bytes have been accounted for, already */
8605
8606 unsigned int levelsAwayFromRootParser;
8607 const XML_Parser rootParser
8608 = getRootParserOf(originParser, &levelsAwayFromRootParser);
8609 assert(! rootParser->m_parentParser);
8610
8611 const int isDirect
8612 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8613 const ptrdiff_t bytesMore = after - before;
8614
8615 XmlBigCount *const additionTarget
8616 = isDirect ? &rootParser->m_accounting.countBytesDirect
8617 : &rootParser->m_accounting.countBytesIndirect;
8618
8619 /* Detect and avoid integer overflow */
8620 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8621 return XML_FALSE;
8622 *additionTarget += bytesMore;
8623
8624 const XmlBigCount countBytesOutput
8625 = rootParser->m_accounting.countBytesDirect
8626 + rootParser->m_accounting.countBytesIndirect;
8627 const float amplificationFactor
8628 = accountingGetCurrentAmplification(rootParser);
8629 const XML_Bool tolerated
8630 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8631 || (amplificationFactor
8632 <= rootParser->m_accounting.maximumAmplificationFactor);
8633
8634 if (rootParser->m_accounting.debugLevel >= 2u) {
8635 accountingReportStats(rootParser, "");
8636 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8637 bytesMore, source_line, account);
8638 }
8639
8640 return tolerated;
8641 }
8642
8643 unsigned long long
8644 testingAccountingGetCountBytesDirect(XML_Parser parser) {
8645 if (! parser)
8646 return 0;
8647 return parser->m_accounting.countBytesDirect;
8648 }
8649
8650 unsigned long long
8651 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8652 if (! parser)
8653 return 0;
8654 return parser->m_accounting.countBytesIndirect;
8655 }
8656
8657 static void
8658 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8659 const char *action, int sourceLine) {
8660 assert(! rootParser->m_parentParser);
8661 if (rootParser->m_entity_stats.debugLevel == 0u)
8662 return;
8663
8664 # if defined(XML_UNICODE)
8665 const char *const entityName = "[..]";
8666 # else
8667 const char *const entityName = entity->name;
8668 # endif
8669
8670 fprintf(
8671 stderr,
8672 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8673 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8674 rootParser->m_entity_stats.currentDepth,
8675 rootParser->m_entity_stats.maximumDepthSeen,
8676 ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8677 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8678 sourceLine);
8679 }
8680
8681 static void
8682 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8683 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8684 assert(! rootParser->m_parentParser);
8685
8686 rootParser->m_entity_stats.countEverOpened++;
8687 rootParser->m_entity_stats.currentDepth++;
8688 if (rootParser->m_entity_stats.currentDepth
8689 > rootParser->m_entity_stats.maximumDepthSeen) {
8690 rootParser->m_entity_stats.maximumDepthSeen++;
8691 }
8692
8693 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8694 }
8695
8696 static void
8697 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8698 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8699 assert(! rootParser->m_parentParser);
8700
8701 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8702 rootParser->m_entity_stats.currentDepth--;
8703 }
8704
8705 #endif /* XML_GE == 1 */
8706
8707 static XML_Parser
8708 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8709 XML_Parser rootParser = parser;
8710 unsigned int stepsTakenUpwards = 0;
8711 while (rootParser->m_parentParser) {
8712 rootParser = rootParser->m_parentParser;
8713 stepsTakenUpwards++;
8714 }
8715 assert(! rootParser->m_parentParser);
8716 if (outLevelDiff != NULL) {
8717 *outLevelDiff = stepsTakenUpwards;
8718 }
8719 return rootParser;
8720 }
8721
8722 #if XML_GE == 1
8723
8724 const char *
8725 unsignedCharToPrintable(unsigned char c) {
8726 switch (c) {
8727 case 0:
8728 return "\\0";
8729 case 1:
8730 return "\\x1";
8731 case 2:
8732 return "\\x2";
8733 case 3:
8734 return "\\x3";
8735 case 4:
8736 return "\\x4";
8737 case 5:
8738 return "\\x5";
8739 case 6:
8740 return "\\x6";
8741 case 7:
8742 return "\\x7";
8743 case 8:
8744 return "\\x8";
8745 case 9:
8746 return "\\t";
8747 case 10:
8748 return "\\n";
8749 case 11:
8750 return "\\xB";
8751 case 12:
8752 return "\\xC";
8753 case 13:
8754 return "\\r";
8755 case 14:
8756 return "\\xE";
8757 case 15:
8758 return "\\xF";
8759 case 16:
8760 return "\\x10";
8761 case 17:
8762 return "\\x11";
8763 case 18:
8764 return "\\x12";
8765 case 19:
8766 return "\\x13";
8767 case 20:
8768 return "\\x14";
8769 case 21:
8770 return "\\x15";
8771 case 22:
8772 return "\\x16";
8773 case 23:
8774 return "\\x17";
8775 case 24:
8776 return "\\x18";
8777 case 25:
8778 return "\\x19";
8779 case 26:
8780 return "\\x1A";
8781 case 27:
8782 return "\\x1B";
8783 case 28:
8784 return "\\x1C";
8785 case 29:
8786 return "\\x1D";
8787 case 30:
8788 return "\\x1E";
8789 case 31:
8790 return "\\x1F";
8791 case 32:
8792 return " ";
8793 case 33:
8794 return "!";
8795 case 34:
8796 return "\\\"";
8797 case 35:
8798 return "#";
8799 case 36:
8800 return "$";
8801 case 37:
8802 return "%";
8803 case 38:
8804 return "&";
8805 case 39:
8806 return "'";
8807 case 40:
8808 return "(";
8809 case 41:
8810 return ")";
8811 case 42:
8812 return "*";
8813 case 43:
8814 return "+";
8815 case 44:
8816 return ",";
8817 case 45:
8818 return "-";
8819 case 46:
8820 return ".";
8821 case 47:
8822 return "/";
8823 case 48:
8824 return "0";
8825 case 49:
8826 return "1";
8827 case 50:
8828 return "2";
8829 case 51:
8830 return "3";
8831 case 52:
8832 return "4";
8833 case 53:
8834 return "5";
8835 case 54:
8836 return "6";
8837 case 55:
8838 return "7";
8839 case 56:
8840 return "8";
8841 case 57:
8842 return "9";
8843 case 58:
8844 return ":";
8845 case 59:
8846 return ";";
8847 case 60:
8848 return "<";
8849 case 61:
8850 return "=";
8851 case 62:
8852 return ">";
8853 case 63:
8854 return "?";
8855 case 64:
8856 return "@";
8857 case 65:
8858 return "A";
8859 case 66:
8860 return "B";
8861 case 67:
8862 return "C";
8863 case 68:
8864 return "D";
8865 case 69:
8866 return "E";
8867 case 70:
8868 return "F";
8869 case 71:
8870 return "G";
8871 case 72:
8872 return "H";
8873 case 73:
8874 return "I";
8875 case 74:
8876 return "J";
8877 case 75:
8878 return "K";
8879 case 76:
8880 return "L";
8881 case 77:
8882 return "M";
8883 case 78:
8884 return "N";
8885 case 79:
8886 return "O";
8887 case 80:
8888 return "P";
8889 case 81:
8890 return "Q";
8891 case 82:
8892 return "R";
8893 case 83:
8894 return "S";
8895 case 84:
8896 return "T";
8897 case 85:
8898 return "U";
8899 case 86:
8900 return "V";
8901 case 87:
8902 return "W";
8903 case 88:
8904 return "X";
8905 case 89:
8906 return "Y";
8907 case 90:
8908 return "Z";
8909 case 91:
8910 return "[";
8911 case 92:
8912 return "\\\\";
8913 case 93:
8914 return "]";
8915 case 94:
8916 return "^";
8917 case 95:
8918 return "_";
8919 case 96:
8920 return "`";
8921 case 97:
8922 return "a";
8923 case 98:
8924 return "b";
8925 case 99:
8926 return "c";
8927 case 100:
8928 return "d";
8929 case 101:
8930 return "e";
8931 case 102:
8932 return "f";
8933 case 103:
8934 return "g";
8935 case 104:
8936 return "h";
8937 case 105:
8938 return "i";
8939 case 106:
8940 return "j";
8941 case 107:
8942 return "k";
8943 case 108:
8944 return "l";
8945 case 109:
8946 return "m";
8947 case 110:
8948 return "n";
8949 case 111:
8950 return "o";
8951 case 112:
8952 return "p";
8953 case 113:
8954 return "q";
8955 case 114:
8956 return "r";
8957 case 115:
8958 return "s";
8959 case 116:
8960 return "t";
8961 case 117:
8962 return "u";
8963 case 118:
8964 return "v";
8965 case 119:
8966 return "w";
8967 case 120:
8968 return "x";
8969 case 121:
8970 return "y";
8971 case 122:
8972 return "z";
8973 case 123:
8974 return "{";
8975 case 124:
8976 return "|";
8977 case 125:
8978 return "}";
8979 case 126:
8980 return "~";
8981 case 127:
8982 return "\\x7F";
8983 case 128:
8984 return "\\x80";
8985 case 129:
8986 return "\\x81";
8987 case 130:
8988 return "\\x82";
8989 case 131:
8990 return "\\x83";
8991 case 132:
8992 return "\\x84";
8993 case 133:
8994 return "\\x85";
8995 case 134:
8996 return "\\x86";
8997 case 135:
8998 return "\\x87";
8999 case 136:
9000 return "\\x88";
9001 case 137:
9002 return "\\x89";
9003 case 138:
9004 return "\\x8A";
9005 case 139:
9006 return "\\x8B";
9007 case 140:
9008 return "\\x8C";
9009 case 141:
9010 return "\\x8D";
9011 case 142:
9012 return "\\x8E";
9013 case 143:
9014 return "\\x8F";
9015 case 144:
9016 return "\\x90";
9017 case 145:
9018 return "\\x91";
9019 case 146:
9020 return "\\x92";
9021 case 147:
9022 return "\\x93";
9023 case 148:
9024 return "\\x94";
9025 case 149:
9026 return "\\x95";
9027 case 150:
9028 return "\\x96";
9029 case 151:
9030 return "\\x97";
9031 case 152:
9032 return "\\x98";
9033 case 153:
9034 return "\\x99";
9035 case 154:
9036 return "\\x9A";
9037 case 155:
9038 return "\\x9B";
9039 case 156:
9040 return "\\x9C";
9041 case 157:
9042 return "\\x9D";
9043 case 158:
9044 return "\\x9E";
9045 case 159:
9046 return "\\x9F";
9047 case 160:
9048 return "\\xA0";
9049 case 161:
9050 return "\\xA1";
9051 case 162:
9052 return "\\xA2";
9053 case 163:
9054 return "\\xA3";
9055 case 164:
9056 return "\\xA4";
9057 case 165:
9058 return "\\xA5";
9059 case 166:
9060 return "\\xA6";
9061 case 167:
9062 return "\\xA7";
9063 case 168:
9064 return "\\xA8";
9065 case 169:
9066 return "\\xA9";
9067 case 170:
9068 return "\\xAA";
9069 case 171:
9070 return "\\xAB";
9071 case 172:
9072 return "\\xAC";
9073 case 173:
9074 return "\\xAD";
9075 case 174:
9076 return "\\xAE";
9077 case 175:
9078 return "\\xAF";
9079 case 176:
9080 return "\\xB0";
9081 case 177:
9082 return "\\xB1";
9083 case 178:
9084 return "\\xB2";
9085 case 179:
9086 return "\\xB3";
9087 case 180:
9088 return "\\xB4";
9089 case 181:
9090 return "\\xB5";
9091 case 182:
9092 return "\\xB6";
9093 case 183:
9094 return "\\xB7";
9095 case 184:
9096 return "\\xB8";
9097 case 185:
9098 return "\\xB9";
9099 case 186:
9100 return "\\xBA";
9101 case 187:
9102 return "\\xBB";
9103 case 188:
9104 return "\\xBC";
9105 case 189:
9106 return "\\xBD";
9107 case 190:
9108 return "\\xBE";
9109 case 191:
9110 return "\\xBF";
9111 case 192:
9112 return "\\xC0";
9113 case 193:
9114 return "\\xC1";
9115 case 194:
9116 return "\\xC2";
9117 case 195:
9118 return "\\xC3";
9119 case 196:
9120 return "\\xC4";
9121 case 197:
9122 return "\\xC5";
9123 case 198:
9124 return "\\xC6";
9125 case 199:
9126 return "\\xC7";
9127 case 200:
9128 return "\\xC8";
9129 case 201:
9130 return "\\xC9";
9131 case 202:
9132 return "\\xCA";
9133 case 203:
9134 return "\\xCB";
9135 case 204:
9136 return "\\xCC";
9137 case 205:
9138 return "\\xCD";
9139 case 206:
9140 return "\\xCE";
9141 case 207:
9142 return "\\xCF";
9143 case 208:
9144 return "\\xD0";
9145 case 209:
9146 return "\\xD1";
9147 case 210:
9148 return "\\xD2";
9149 case 211:
9150 return "\\xD3";
9151 case 212:
9152 return "\\xD4";
9153 case 213:
9154 return "\\xD5";
9155 case 214:
9156 return "\\xD6";
9157 case 215:
9158 return "\\xD7";
9159 case 216:
9160 return "\\xD8";
9161 case 217:
9162 return "\\xD9";
9163 case 218:
9164 return "\\xDA";
9165 case 219:
9166 return "\\xDB";
9167 case 220:
9168 return "\\xDC";
9169 case 221:
9170 return "\\xDD";
9171 case 222:
9172 return "\\xDE";
9173 case 223:
9174 return "\\xDF";
9175 case 224:
9176 return "\\xE0";
9177 case 225:
9178 return "\\xE1";
9179 case 226:
9180 return "\\xE2";
9181 case 227:
9182 return "\\xE3";
9183 case 228:
9184 return "\\xE4";
9185 case 229:
9186 return "\\xE5";
9187 case 230:
9188 return "\\xE6";
9189 case 231:
9190 return "\\xE7";
9191 case 232:
9192 return "\\xE8";
9193 case 233:
9194 return "\\xE9";
9195 case 234:
9196 return "\\xEA";
9197 case 235:
9198 return "\\xEB";
9199 case 236:
9200 return "\\xEC";
9201 case 237:
9202 return "\\xED";
9203 case 238:
9204 return "\\xEE";
9205 case 239:
9206 return "\\xEF";
9207 case 240:
9208 return "\\xF0";
9209 case 241:
9210 return "\\xF1";
9211 case 242:
9212 return "\\xF2";
9213 case 243:
9214 return "\\xF3";
9215 case 244:
9216 return "\\xF4";
9217 case 245:
9218 return "\\xF5";
9219 case 246:
9220 return "\\xF6";
9221 case 247:
9222 return "\\xF7";
9223 case 248:
9224 return "\\xF8";
9225 case 249:
9226 return "\\xF9";
9227 case 250:
9228 return "\\xFA";
9229 case 251:
9230 return "\\xFB";
9231 case 252:
9232 return "\\xFC";
9233 case 253:
9234 return "\\xFD";
9235 case 254:
9236 return "\\xFE";
9237 case 255:
9238 return "\\xFF";
9239 // LCOV_EXCL_START
9240 default:
9241 assert(0); /* never gets here */
9242 return "dead code";
9243 }
9244 assert(0); /* never gets here */
9245 // LCOV_EXCL_STOP
9246 }
9247
9248 #endif /* XML_GE == 1 */
9249
9250 static unsigned long
9251 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
9252 const char *const valueOrNull = getenv(variableName);
9253 if (valueOrNull == NULL) {
9254 return defaultDebugLevel;
9255 }
9256 const char *const value = valueOrNull;
9257
9258 errno = 0;
9259 char *afterValue = NULL;
9260 unsigned long debugLevel = strtoul(value, &afterValue, 10);
9261 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
9262 errno = 0;
9263 return defaultDebugLevel;
9264 }
9265
9266 return debugLevel;
9267 }
9268