1 /* 60e137abb91af642d6c3988f8f133d23329b32638659c74d47125fc0faf6ddd5 (2.7.2+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
44 Licensed under the MIT license:
45
46 Permission is hereby granted, free of charge, to any person obtaining
47 a copy of this software and associated documentation files (the
48 "Software"), to deal in the Software without restriction, including
49 without limitation the rights to use, copy, modify, merge, publish,
50 distribute, sublicense, and/or sell copies of the Software, and to permit
51 persons to whom the Software is furnished to do so, subject to the
52 following conditions:
53
54 The above copyright notice and this permission notice shall be included
55 in all copies or substantial portions of the Software.
56
57 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
58 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
59 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
60 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
61 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
62 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
63 USE OR OTHER DEALINGS IN THE SOFTWARE.
64 */
65
66 #define XML_BUILDING_EXPAT 1
67
68 #include "expat_config.h"
69
70 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
71 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
72 #endif
73
74 #if defined(XML_DTD) && XML_GE == 0
75 # error Either undefine XML_DTD or define XML_GE to 1.
76 #endif
77
78 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
79 || (XML_CONTEXT_BYTES + 0 < 0)
80 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
81 #endif
82
83 #if defined(HAVE_SYSCALL_GETRANDOM)
84 # if ! defined(_GNU_SOURCE)
85 # define _GNU_SOURCE 1 /* syscall prototype */
86 # endif
87 #endif
88
89 #ifdef _WIN32
90 /* force stdlib to define rand_s() */
91 # if ! defined(_CRT_RAND_S)
92 # define _CRT_RAND_S
93 # endif
94 #endif
95
96 #include <stdbool.h>
97 #include <stddef.h>
98 #include <string.h> /* memset(), memcpy() */
99 #include <assert.h>
100 #include <limits.h> /* INT_MAX, UINT_MAX */
101 #include <stdio.h> /* fprintf */
102 #include <stdlib.h> /* getenv, rand_s */
103 #include <stdint.h> /* uintptr_t */
104 #include <math.h> /* isnan */
105
106 #ifdef _WIN32
107 # define getpid GetCurrentProcessId
108 #else
109 # include <sys/time.h> /* gettimeofday() */
110 # include <sys/types.h> /* getpid() */
111 # include <unistd.h> /* getpid() */
112 # include <fcntl.h> /* O_RDONLY */
113 # include <errno.h>
114 #endif
115
116 #ifdef _WIN32
117 # include "winconfig.h"
118 #endif
119
120 #include "ascii.h"
121 #include "expat.h"
122 #include "siphash.h"
123
124 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
125 # if defined(HAVE_GETRANDOM)
126 # include <sys/random.h> /* getrandom */
127 # else
128 # include <unistd.h> /* syscall */
129 # include <sys/syscall.h> /* SYS_getrandom */
130 # endif
131 # if ! defined(GRND_NONBLOCK)
132 # define GRND_NONBLOCK 0x0001
133 # endif /* defined(GRND_NONBLOCK) */
134 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
135
136 #if defined(HAVE_LIBBSD) \
137 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
138 # include <bsd/stdlib.h>
139 #endif
140
141 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
142 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
143 #endif
144
145 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
146 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
147 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
148 && ! defined(XML_POOR_ENTROPY)
149 # error You do not have support for any sources of high quality entropy \
150 enabled. For end user security, that is probably not what you want. \
151 \
152 Your options include: \
153 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
154 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
155 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
156 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
157 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
158 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
159 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
160 * Windows >=Vista (rand_s): _WIN32. \
161 \
162 If insist on not using any of these, bypass this error by defining \
163 XML_POOR_ENTROPY; you have been warned. \
164 \
165 If you have reasons to patch this detection code away or need changes \
166 to the build system, please open a bug. Thank you!
167 #endif
168
169 #ifdef XML_UNICODE
170 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
171 # define XmlConvert XmlUtf16Convert
172 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
173 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
174 # define XmlEncode XmlUtf16Encode
175 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
176 typedef unsigned short ICHAR;
177 #else
178 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
179 # define XmlConvert XmlUtf8Convert
180 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
181 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
182 # define XmlEncode XmlUtf8Encode
183 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
184 typedef char ICHAR;
185 #endif
186
187 #ifndef XML_NS
188
189 # define XmlInitEncodingNS XmlInitEncoding
190 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
191 # undef XmlGetInternalEncodingNS
192 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
193 # define XmlParseXmlDeclNS XmlParseXmlDecl
194
195 #endif
196
197 #ifdef XML_UNICODE
198
199 # ifdef XML_UNICODE_WCHAR_T
200 # define XML_T(x) (const wchar_t) x
201 # define XML_L(x) L##x
202 # else
203 # define XML_T(x) (const unsigned short)x
204 # define XML_L(x) x
205 # endif
206
207 #else
208
209 # define XML_T(x) x
210 # define XML_L(x) x
211
212 #endif
213
214 /* Round up n to be a multiple of sz, where sz is a power of 2. */
215 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
216
217 /* Do safe (NULL-aware) pointer arithmetic */
218 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
219
220 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
221
222 #include "internal.h"
223 #include "xmltok.h"
224 #include "xmlrole.h"
225
226 typedef const XML_Char *KEY;
227
228 typedef struct {
229 KEY name;
230 } NAMED;
231
232 typedef struct {
233 NAMED **v;
234 unsigned char power;
235 size_t size;
236 size_t used;
237 XML_Parser parser;
238 } HASH_TABLE;
239
240 static size_t keylen(KEY s);
241
242 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
243
244 /* For probing (after a collision) we need a step size relative prime
245 to the hash table size, which is a power of 2. We use double-hashing,
246 since we can calculate a second hash value cheaply by taking those bits
247 of the first hash value that were discarded (masked out) when the table
248 index was calculated: index = hash & mask, where mask = table->size - 1.
249 We limit the maximum step size to table->size / 4 (mask >> 2) and make
250 it odd, since odd numbers are always relative prime to a power of 2.
251 */
252 #define SECOND_HASH(hash, mask, power) \
253 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
254 #define PROBE_STEP(hash, mask, power) \
255 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
256
257 typedef struct {
258 NAMED **p;
259 NAMED **end;
260 } HASH_TABLE_ITER;
261
262 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
263 #define INIT_DATA_BUF_SIZE 1024
264 #define INIT_ATTS_SIZE 16
265 #define INIT_ATTS_VERSION 0xFFFFFFFF
266 #define INIT_BLOCK_SIZE 1024
267 #define INIT_BUFFER_SIZE 1024
268
269 #define EXPAND_SPARE 24
270
271 typedef struct binding {
272 struct prefix *prefix;
273 struct binding *nextTagBinding;
274 struct binding *prevPrefixBinding;
275 const struct attribute_id *attId;
276 XML_Char *uri;
277 int uriLen;
278 int uriAlloc;
279 } BINDING;
280
281 typedef struct prefix {
282 const XML_Char *name;
283 BINDING *binding;
284 } PREFIX;
285
286 typedef struct {
287 const XML_Char *str;
288 const XML_Char *localPart;
289 const XML_Char *prefix;
290 int strLen;
291 int uriLen;
292 int prefixLen;
293 } TAG_NAME;
294
295 /* TAG represents an open element.
296 The name of the element is stored in both the document and API
297 encodings. The memory buffer 'buf' is a separately-allocated
298 memory area which stores the name. During the XML_Parse()/
299 XML_ParseBuffer() when the element is open, the memory for the 'raw'
300 version of the name (in the document encoding) is shared with the
301 document buffer. If the element is open across calls to
302 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
303 contain the 'raw' name as well.
304
305 A parser reuses these structures, maintaining a list of allocated
306 TAG objects in a free list.
307 */
308 typedef struct tag {
309 struct tag *parent; /* parent of this element */
310 const char *rawName; /* tagName in the original encoding */
311 int rawNameLength;
312 TAG_NAME name; /* tagName in the API encoding */
313 char *buf; /* buffer for name components */
314 char *bufEnd; /* end of the buffer */
315 BINDING *bindings;
316 } TAG;
317
318 typedef struct {
319 const XML_Char *name;
320 const XML_Char *textPtr;
321 int textLen; /* length in XML_Chars */
322 int processed; /* # of processed bytes - when suspended */
323 const XML_Char *systemId;
324 const XML_Char *base;
325 const XML_Char *publicId;
326 const XML_Char *notation;
327 XML_Bool open;
328 XML_Bool hasMore; /* true if entity has not been completely processed */
329 /* An entity can be open while being already completely processed (hasMore ==
330 XML_FALSE). The reason is the delayed closing of entities until their inner
331 entities are processed and closed */
332 XML_Bool is_param;
333 XML_Bool is_internal; /* true if declared in internal subset outside PE */
334 } ENTITY;
335
336 typedef struct {
337 enum XML_Content_Type type;
338 enum XML_Content_Quant quant;
339 const XML_Char *name;
340 int firstchild;
341 int lastchild;
342 int childcnt;
343 int nextsib;
344 } CONTENT_SCAFFOLD;
345
346 #define INIT_SCAFFOLD_ELEMENTS 32
347
348 typedef struct block {
349 struct block *next;
350 int size;
351 XML_Char s[1];
352 } BLOCK;
353
354 typedef struct {
355 BLOCK *blocks;
356 BLOCK *freeBlocks;
357 const XML_Char *end;
358 XML_Char *ptr;
359 XML_Char *start;
360 XML_Parser parser;
361 } STRING_POOL;
362
363 /* The XML_Char before the name is used to determine whether
364 an attribute has been specified. */
365 typedef struct attribute_id {
366 XML_Char *name;
367 PREFIX *prefix;
368 XML_Bool maybeTokenized;
369 XML_Bool xmlns;
370 } ATTRIBUTE_ID;
371
372 typedef struct {
373 const ATTRIBUTE_ID *id;
374 XML_Bool isCdata;
375 const XML_Char *value;
376 } DEFAULT_ATTRIBUTE;
377
378 typedef struct {
379 unsigned long version;
380 unsigned long hash;
381 const XML_Char *uriName;
382 } NS_ATT;
383
384 typedef struct {
385 const XML_Char *name;
386 PREFIX *prefix;
387 const ATTRIBUTE_ID *idAtt;
388 int nDefaultAtts;
389 int allocDefaultAtts;
390 DEFAULT_ATTRIBUTE *defaultAtts;
391 } ELEMENT_TYPE;
392
393 typedef struct {
394 HASH_TABLE generalEntities;
395 HASH_TABLE elementTypes;
396 HASH_TABLE attributeIds;
397 HASH_TABLE prefixes;
398 STRING_POOL pool;
399 STRING_POOL entityValuePool;
400 /* false once a parameter entity reference has been skipped */
401 XML_Bool keepProcessing;
402 /* true once an internal or external PE reference has been encountered;
403 this includes the reference to an external subset */
404 XML_Bool hasParamEntityRefs;
405 XML_Bool standalone;
406 #ifdef XML_DTD
407 /* indicates if external PE has been read */
408 XML_Bool paramEntityRead;
409 HASH_TABLE paramEntities;
410 #endif /* XML_DTD */
411 PREFIX defaultPrefix;
412 /* === scaffolding for building content model === */
413 XML_Bool in_eldecl;
414 CONTENT_SCAFFOLD *scaffold;
415 unsigned contentStringLen;
416 unsigned scaffSize;
417 unsigned scaffCount;
418 int scaffLevel;
419 int *scaffIndex;
420 } DTD;
421
422 enum EntityType {
423 ENTITY_INTERNAL,
424 ENTITY_ATTRIBUTE,
425 ENTITY_VALUE,
426 };
427
428 typedef struct open_internal_entity {
429 const char *internalEventPtr;
430 const char *internalEventEndPtr;
431 struct open_internal_entity *next;
432 ENTITY *entity;
433 int startTagLevel;
434 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
435 enum EntityType type;
436 } OPEN_INTERNAL_ENTITY;
437
438 enum XML_Account {
439 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
440 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
441 expansion */
442 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
443 };
444
445 #if XML_GE == 1
446 typedef unsigned long long XmlBigCount;
447 typedef struct accounting {
448 XmlBigCount countBytesDirect;
449 XmlBigCount countBytesIndirect;
450 unsigned long debugLevel;
451 float maximumAmplificationFactor; // >=1.0
452 unsigned long long activationThresholdBytes;
453 } ACCOUNTING;
454
455 typedef struct MALLOC_TRACKER {
456 XmlBigCount bytesAllocated;
457 XmlBigCount peakBytesAllocated; // updated live only for debug level >=2
458 unsigned long debugLevel;
459 float maximumAmplificationFactor; // >=1.0
460 XmlBigCount activationThresholdBytes;
461 } MALLOC_TRACKER;
462
463 typedef struct entity_stats {
464 unsigned int countEverOpened;
465 unsigned int currentDepth;
466 unsigned int maximumDepthSeen;
467 unsigned long debugLevel;
468 } ENTITY_STATS;
469 #endif /* XML_GE == 1 */
470
471 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
472 const char *end, const char **endPtr);
473
474 static Processor prologProcessor;
475 static Processor prologInitProcessor;
476 static Processor contentProcessor;
477 static Processor cdataSectionProcessor;
478 #ifdef XML_DTD
479 static Processor ignoreSectionProcessor;
480 static Processor externalParEntProcessor;
481 static Processor externalParEntInitProcessor;
482 static Processor entityValueProcessor;
483 static Processor entityValueInitProcessor;
484 #endif /* XML_DTD */
485 static Processor epilogProcessor;
486 static Processor errorProcessor;
487 static Processor externalEntityInitProcessor;
488 static Processor externalEntityInitProcessor2;
489 static Processor externalEntityInitProcessor3;
490 static Processor externalEntityContentProcessor;
491 static Processor internalEntityProcessor;
492
493 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
494 const XML_Char *encodingName);
495 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
496 const char *s, const char *next);
497 static enum XML_Error initializeEncoding(XML_Parser parser);
498 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
499 const char *s, const char *end, int tok,
500 const char *next, const char **nextPtr,
501 XML_Bool haveMore, XML_Bool allowClosingDoctype,
502 enum XML_Account account);
503 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
504 XML_Bool betweenDecl, enum EntityType type);
505 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
506 const ENCODING *enc, const char *start,
507 const char *end, const char **endPtr,
508 XML_Bool haveMore, enum XML_Account account);
509 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
510 const char **startPtr, const char *end,
511 const char **nextPtr, XML_Bool haveMore,
512 enum XML_Account account);
513 #ifdef XML_DTD
514 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
515 const char **startPtr, const char *end,
516 const char **nextPtr, XML_Bool haveMore);
517 #endif /* XML_DTD */
518
519 static void freeBindings(XML_Parser parser, BINDING *bindings);
520 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
521 const char *attStr, TAG_NAME *tagNamePtr,
522 BINDING **bindingsPtr,
523 enum XML_Account account);
524 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
525 const ATTRIBUTE_ID *attId, const XML_Char *uri,
526 BINDING **bindingsPtr);
527 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
528 XML_Bool isCdata, XML_Bool isId,
529 const XML_Char *value, XML_Parser parser);
530 static enum XML_Error storeAttributeValue(XML_Parser parser,
531 const ENCODING *enc, XML_Bool isCdata,
532 const char *ptr, const char *end,
533 STRING_POOL *pool,
534 enum XML_Account account);
535 static enum XML_Error
536 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
537 const char *ptr, const char *end, STRING_POOL *pool,
538 enum XML_Account account, const char **nextPtr);
539 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
540 const char *start, const char *end);
541 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
542 #if XML_GE == 1
543 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
544 const char *start, const char *end,
545 enum XML_Account account,
546 const char **nextPtr);
547 static enum XML_Error callStoreEntityValue(XML_Parser parser,
548 const ENCODING *enc,
549 const char *start, const char *end,
550 enum XML_Account account);
551 #else
552 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
553 #endif
554 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
555 const char *start, const char *end);
556 static int reportComment(XML_Parser parser, const ENCODING *enc,
557 const char *start, const char *end);
558 static void reportDefault(XML_Parser parser, const ENCODING *enc,
559 const char *start, const char *end);
560
561 static const XML_Char *getContext(XML_Parser parser);
562 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
563
564 static void FASTCALL normalizePublicId(XML_Char *s);
565
566 static DTD *dtdCreate(XML_Parser parser);
567 /* do not call if m_parentParser != NULL */
568 static void dtdReset(DTD *p, XML_Parser parser);
569 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser);
570 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
571 XML_Parser parser);
572 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
573 STRING_POOL *newPool, const HASH_TABLE *oldTable);
574 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
575 size_t createSize);
576 static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser);
577 static void FASTCALL hashTableClear(HASH_TABLE *table);
578 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
579 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
580 const HASH_TABLE *table);
581 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
582
583 static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser);
584 static void FASTCALL poolClear(STRING_POOL *pool);
585 static void FASTCALL poolDestroy(STRING_POOL *pool);
586 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
587 const char *ptr, const char *end);
588 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
589 const char *ptr, const char *end);
590 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
591 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
592 const XML_Char *s);
593 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
594 int n);
595 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
596 const XML_Char *s);
597
598 static int FASTCALL nextScaffoldPart(XML_Parser parser);
599 static XML_Content *build_model(XML_Parser parser);
600 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
601 const char *ptr, const char *end);
602
603 static XML_Char *copyString(const XML_Char *s, XML_Parser parser);
604
605 static unsigned long generate_hash_secret_salt(XML_Parser parser);
606 static XML_Bool startParsing(XML_Parser parser);
607
608 static XML_Parser parserCreate(const XML_Char *encodingName,
609 const XML_Memory_Handling_Suite *memsuite,
610 const XML_Char *nameSep, DTD *dtd,
611 XML_Parser parentParser);
612
613 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
614
615 #if XML_GE == 1
616 static float accountingGetCurrentAmplification(XML_Parser rootParser);
617 static void accountingReportStats(XML_Parser originParser, const char *epilog);
618 static void accountingOnAbort(XML_Parser originParser);
619 static void accountingReportDiff(XML_Parser rootParser,
620 unsigned int levelsAwayFromRootParser,
621 const char *before, const char *after,
622 ptrdiff_t bytesMore, int source_line,
623 enum XML_Account account);
624 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
625 const char *before, const char *after,
626 int source_line,
627 enum XML_Account account);
628
629 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
630 const char *action, int sourceLine);
631 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
632 int sourceLine);
633 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
634 int sourceLine);
635 #endif /* XML_GE == 1 */
636
637 static XML_Parser getRootParserOf(XML_Parser parser,
638 unsigned int *outLevelDiff);
639
640 static unsigned long getDebugLevel(const char *variableName,
641 unsigned long defaultDebugLevel);
642
643 #define poolStart(pool) ((pool)->start)
644 #define poolLength(pool) ((pool)->ptr - (pool)->start)
645 #define poolChop(pool) ((void)--(pool->ptr))
646 #define poolLastChar(pool) (((pool)->ptr)[-1])
647 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
648 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
649 #define poolAppendChar(pool, c) \
650 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
651 ? 0 \
652 : ((*((pool)->ptr)++ = c), 1))
653
654 #if ! defined(XML_TESTING)
655 const
656 #endif
657 XML_Bool g_reparseDeferralEnabledDefault
658 = XML_TRUE; // write ONLY in runtests.c
659 #if defined(XML_TESTING)
660 unsigned int g_bytesScanned = 0; // used for testing only
661 #endif
662
663 struct XML_ParserStruct {
664 /* The first member must be m_userData so that the XML_GetUserData
665 macro works. */
666 void *m_userData;
667 void *m_handlerArg;
668
669 // How the four parse buffer pointers below relate in time and space:
670 //
671 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
672 // | | | |
673 // <--parsed-->| | |
674 // <---parsing--->| |
675 // <--unoccupied-->|
676 // <---------total-malloced/realloced-------->|
677
678 char *m_buffer; // malloc/realloc base pointer of parse buffer
679 const XML_Memory_Handling_Suite m_mem;
680 const char *m_bufferPtr; // first character to be parsed
681 char *m_bufferEnd; // past last character to be parsed
682 const char *m_bufferLim; // allocated end of m_buffer
683
684 XML_Index m_parseEndByteIndex;
685 const char *m_parseEndPtr;
686 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
687 XML_Bool m_reparseDeferralEnabled;
688 int m_lastBufferRequestSize;
689 XML_Char *m_dataBuf;
690 XML_Char *m_dataBufEnd;
691 XML_StartElementHandler m_startElementHandler;
692 XML_EndElementHandler m_endElementHandler;
693 XML_CharacterDataHandler m_characterDataHandler;
694 XML_ProcessingInstructionHandler m_processingInstructionHandler;
695 XML_CommentHandler m_commentHandler;
696 XML_StartCdataSectionHandler m_startCdataSectionHandler;
697 XML_EndCdataSectionHandler m_endCdataSectionHandler;
698 XML_DefaultHandler m_defaultHandler;
699 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
700 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
701 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
702 XML_NotationDeclHandler m_notationDeclHandler;
703 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
704 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
705 XML_NotStandaloneHandler m_notStandaloneHandler;
706 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
707 XML_Parser m_externalEntityRefHandlerArg;
708 XML_SkippedEntityHandler m_skippedEntityHandler;
709 XML_UnknownEncodingHandler m_unknownEncodingHandler;
710 XML_ElementDeclHandler m_elementDeclHandler;
711 XML_AttlistDeclHandler m_attlistDeclHandler;
712 XML_EntityDeclHandler m_entityDeclHandler;
713 XML_XmlDeclHandler m_xmlDeclHandler;
714 const ENCODING *m_encoding;
715 INIT_ENCODING m_initEncoding;
716 const ENCODING *m_internalEncoding;
717 const XML_Char *m_protocolEncodingName;
718 XML_Bool m_ns;
719 XML_Bool m_ns_triplets;
720 void *m_unknownEncodingMem;
721 void *m_unknownEncodingData;
722 void *m_unknownEncodingHandlerData;
723 void(XMLCALL *m_unknownEncodingRelease)(void *);
724 PROLOG_STATE m_prologState;
725 Processor *m_processor;
726 enum XML_Error m_errorCode;
727 const char *m_eventPtr;
728 const char *m_eventEndPtr;
729 const char *m_positionPtr;
730 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
731 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
732 OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
733 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
734 OPEN_INTERNAL_ENTITY *m_openValueEntities;
735 OPEN_INTERNAL_ENTITY *m_freeValueEntities;
736 XML_Bool m_defaultExpandInternalEntities;
737 int m_tagLevel;
738 ENTITY *m_declEntity;
739 const XML_Char *m_doctypeName;
740 const XML_Char *m_doctypeSysid;
741 const XML_Char *m_doctypePubid;
742 const XML_Char *m_declAttributeType;
743 const XML_Char *m_declNotationName;
744 const XML_Char *m_declNotationPublicId;
745 ELEMENT_TYPE *m_declElementType;
746 ATTRIBUTE_ID *m_declAttributeId;
747 XML_Bool m_declAttributeIsCdata;
748 XML_Bool m_declAttributeIsId;
749 DTD *m_dtd;
750 const XML_Char *m_curBase;
751 TAG *m_tagStack;
752 TAG *m_freeTagList;
753 BINDING *m_inheritedBindings;
754 BINDING *m_freeBindingList;
755 int m_attsSize;
756 int m_nSpecifiedAtts;
757 int m_idAttIndex;
758 ATTRIBUTE *m_atts;
759 NS_ATT *m_nsAtts;
760 unsigned long m_nsAttsVersion;
761 unsigned char m_nsAttsPower;
762 #ifdef XML_ATTR_INFO
763 XML_AttrInfo *m_attInfo;
764 #endif
765 POSITION m_position;
766 STRING_POOL m_tempPool;
767 STRING_POOL m_temp2Pool;
768 char *m_groupConnector;
769 unsigned int m_groupSize;
770 XML_Char m_namespaceSeparator;
771 XML_Parser m_parentParser;
772 XML_ParsingStatus m_parsingStatus;
773 #ifdef XML_DTD
774 XML_Bool m_isParamEntity;
775 XML_Bool m_useForeignDTD;
776 enum XML_ParamEntityParsing m_paramEntityParsing;
777 #endif
778 unsigned long m_hash_secret_salt;
779 #if XML_GE == 1
780 ACCOUNTING m_accounting;
781 MALLOC_TRACKER m_alloc_tracker;
782 ENTITY_STATS m_entity_stats;
783 #endif
784 XML_Bool m_reenter;
785 };
786
787 #if XML_GE == 1
788 # define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__))
789 # define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__))
790 # define FREE(parser, p) (expat_free((parser), (p), __LINE__))
791 #else
792 # define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
793 # define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
794 # define FREE(parser, p) (parser->m_mem.free_fcn((p)))
795 #endif
796
797 #if XML_GE == 1
798 static void
expat_heap_stat(XML_Parser rootParser,char operator,XmlBigCount absDiff,XmlBigCount newTotal,XmlBigCount peakTotal,int sourceLine)799 expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff,
800 XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) {
801 // NOTE: This can be +infinity or -nan
802 const float amplification
803 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
804 fprintf(
805 stderr,
806 "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL(
807 "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n",
808 (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator,
809 absDiff, newTotal, peakTotal, (double)amplification, sourceLine);
810 }
811
812 static bool
expat_heap_increase_tolerable(XML_Parser rootParser,XmlBigCount increase,int sourceLine)813 expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase,
814 int sourceLine) {
815 assert(rootParser != NULL);
816 assert(increase > 0);
817
818 XmlBigCount newTotal = 0;
819 bool tolerable = true;
820
821 // Detect integer overflow
822 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) {
823 tolerable = false;
824 } else {
825 newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase;
826
827 if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) {
828 assert(newTotal > 0);
829 // NOTE: This can be +infinity when dividing by zero but not -nan
830 const float amplification
831 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect;
832 if (amplification
833 > rootParser->m_alloc_tracker.maximumAmplificationFactor) {
834 tolerable = false;
835 }
836 }
837 }
838
839 if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) {
840 expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine);
841 }
842
843 return tolerable;
844 }
845
846 # if defined(XML_TESTING)
847 void *
848 # else
849 static void *
850 # endif
expat_malloc(XML_Parser parser,size_t size,int sourceLine)851 expat_malloc(XML_Parser parser, size_t size, int sourceLine) {
852 // Detect integer overflow
853 if (SIZE_MAX - size < sizeof(size_t)) {
854 return NULL;
855 }
856
857 const XML_Parser rootParser = getRootParserOf(parser, NULL);
858 assert(rootParser->m_parentParser == NULL);
859
860 const size_t bytesToAllocate = sizeof(size_t) + size;
861
862 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
863 < bytesToAllocate) {
864 return NULL; // i.e. signal integer overflow as out-of-memory
865 }
866
867 if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate,
868 sourceLine)) {
869 return NULL; // i.e. signal violation as out-of-memory
870 }
871
872 // Actually allocate
873 void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate);
874
875 if (mallocedPtr == NULL) {
876 return NULL;
877 }
878
879 // Update in-block recorded size
880 *(size_t *)mallocedPtr = size;
881
882 // Update accounting
883 rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate;
884
885 // Report as needed
886 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
887 if (rootParser->m_alloc_tracker.bytesAllocated
888 > rootParser->m_alloc_tracker.peakBytesAllocated) {
889 rootParser->m_alloc_tracker.peakBytesAllocated
890 = rootParser->m_alloc_tracker.bytesAllocated;
891 }
892 expat_heap_stat(rootParser, '+', bytesToAllocate,
893 rootParser->m_alloc_tracker.bytesAllocated,
894 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
895 }
896
897 return (char *)mallocedPtr + sizeof(size_t);
898 }
899
900 # if defined(XML_TESTING)
901 void
902 # else
903 static void
904 # endif
expat_free(XML_Parser parser,void * ptr,int sourceLine)905 expat_free(XML_Parser parser, void *ptr, int sourceLine) {
906 assert(parser != NULL);
907
908 if (ptr == NULL) {
909 return;
910 }
911
912 const XML_Parser rootParser = getRootParserOf(parser, NULL);
913 assert(rootParser->m_parentParser == NULL);
914
915 // Extract size (to the eyes of malloc_fcn/realloc_fcn) and
916 // the original pointer returned by malloc/realloc
917 void *const mallocedPtr = (char *)ptr - sizeof(size_t);
918 const size_t bytesAllocated = sizeof(size_t) + *(size_t *)mallocedPtr;
919
920 // Update accounting
921 assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated);
922 rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated;
923
924 // Report as needed
925 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
926 expat_heap_stat(rootParser, '-', bytesAllocated,
927 rootParser->m_alloc_tracker.bytesAllocated,
928 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
929 }
930
931 // NOTE: This may be freeing rootParser, so freeing has to come last
932 parser->m_mem.free_fcn(mallocedPtr);
933 }
934
935 # if defined(XML_TESTING)
936 void *
937 # else
938 static void *
939 # endif
expat_realloc(XML_Parser parser,void * ptr,size_t size,int sourceLine)940 expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) {
941 assert(parser != NULL);
942
943 if (ptr == NULL) {
944 return expat_malloc(parser, size, sourceLine);
945 }
946
947 if (size == 0) {
948 expat_free(parser, ptr, sourceLine);
949 return NULL;
950 }
951
952 const XML_Parser rootParser = getRootParserOf(parser, NULL);
953 assert(rootParser->m_parentParser == NULL);
954
955 // Extract original size (to the eyes of the caller) and the original
956 // pointer returned by malloc/realloc
957 void *mallocedPtr = (char *)ptr - sizeof(size_t);
958 const size_t prevSize = *(size_t *)mallocedPtr;
959
960 // Classify upcoming change
961 const bool isIncrease = (size > prevSize);
962 const size_t absDiff
963 = (size > prevSize) ? (size - prevSize) : (prevSize - size);
964
965 // Ask for permission from accounting
966 if (isIncrease) {
967 if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) {
968 return NULL; // i.e. signal violation as out-of-memory
969 }
970 }
971
972 // Actually allocate
973 mallocedPtr = parser->m_mem.realloc_fcn(mallocedPtr, sizeof(size_t) + size);
974
975 if (mallocedPtr == NULL) {
976 return NULL;
977 }
978
979 // Update accounting
980 if (isIncrease) {
981 assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated
982 >= absDiff);
983 rootParser->m_alloc_tracker.bytesAllocated += absDiff;
984 } else { // i.e. decrease
985 assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff);
986 rootParser->m_alloc_tracker.bytesAllocated -= absDiff;
987 }
988
989 // Report as needed
990 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
991 if (rootParser->m_alloc_tracker.bytesAllocated
992 > rootParser->m_alloc_tracker.peakBytesAllocated) {
993 rootParser->m_alloc_tracker.peakBytesAllocated
994 = rootParser->m_alloc_tracker.bytesAllocated;
995 }
996 expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff,
997 rootParser->m_alloc_tracker.bytesAllocated,
998 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine);
999 }
1000
1001 // Update in-block recorded size
1002 *(size_t *)mallocedPtr = size;
1003
1004 return (char *)mallocedPtr + sizeof(size_t);
1005 }
1006 #endif // XML_GE == 1
1007
1008 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)1009 XML_ParserCreate(const XML_Char *encodingName) {
1010 return XML_ParserCreate_MM(encodingName, NULL, NULL);
1011 }
1012
1013 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)1014 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
1015 XML_Char tmp[2] = {nsSep, 0};
1016 return XML_ParserCreate_MM(encodingName, NULL, tmp);
1017 }
1018
1019 // "xml=http://www.w3.org/XML/1998/namespace"
1020 static const XML_Char implicitContext[]
1021 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
1022 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
1023 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
1024 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
1025 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
1026 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
1027 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
1028 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
1029 '\0'};
1030
1031 /* To avoid warnings about unused functions: */
1032 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1033
1034 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1035
1036 /* Obtain entropy on Linux 3.17+ */
1037 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)1038 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
1039 int success = 0; /* full count bytes written? */
1040 size_t bytesWrittenTotal = 0;
1041 const unsigned int getrandomFlags = GRND_NONBLOCK;
1042
1043 do {
1044 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
1045 const size_t bytesToWrite = count - bytesWrittenTotal;
1046
1047 assert(bytesToWrite <= INT_MAX);
1048
1049 const int bytesWrittenMore =
1050 # if defined(HAVE_GETRANDOM)
1051 (int)getrandom(currentTarget, bytesToWrite, getrandomFlags);
1052 # else
1053 (int)syscall(SYS_getrandom, currentTarget, bytesToWrite,
1054 getrandomFlags);
1055 # endif
1056
1057 if (bytesWrittenMore > 0) {
1058 bytesWrittenTotal += bytesWrittenMore;
1059 if (bytesWrittenTotal >= count)
1060 success = 1;
1061 }
1062 } while (! success && (errno == EINTR));
1063
1064 return success;
1065 }
1066
1067 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
1068
1069 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1070
1071 /* Extract entropy from /dev/urandom */
1072 static int
writeRandomBytes_dev_urandom(void * target,size_t count)1073 writeRandomBytes_dev_urandom(void *target, size_t count) {
1074 int success = 0; /* full count bytes written? */
1075 size_t bytesWrittenTotal = 0;
1076
1077 const int fd = open("/dev/urandom", O_RDONLY);
1078 if (fd < 0) {
1079 return 0;
1080 }
1081
1082 do {
1083 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
1084 const size_t bytesToWrite = count - bytesWrittenTotal;
1085
1086 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
1087
1088 if (bytesWrittenMore > 0) {
1089 bytesWrittenTotal += bytesWrittenMore;
1090 if (bytesWrittenTotal >= count)
1091 success = 1;
1092 }
1093 } while (! success && (errno == EINTR));
1094
1095 close(fd);
1096 return success;
1097 }
1098
1099 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1100
1101 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1102
1103 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
1104
1105 static void
writeRandomBytes_arc4random(void * target,size_t count)1106 writeRandomBytes_arc4random(void *target, size_t count) {
1107 size_t bytesWrittenTotal = 0;
1108
1109 while (bytesWrittenTotal < count) {
1110 const uint32_t random32 = arc4random();
1111 size_t i = 0;
1112
1113 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
1114 i++, bytesWrittenTotal++) {
1115 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
1116 ((uint8_t *)target)[bytesWrittenTotal] = random8;
1117 }
1118 }
1119 }
1120
1121 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
1122
1123 #ifdef _WIN32
1124
1125 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
1126 as it didn't declare it in its header prior to version 5.3.0 of its
1127 runtime package (mingwrt, containing stdlib.h). The upstream fix
1128 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
1129 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
1130 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
1131 __declspec(dllimport) int rand_s(unsigned int *);
1132 # endif
1133
1134 /* Obtain entropy on Windows using the rand_s() function which
1135 * generates cryptographically secure random numbers. Internally it
1136 * uses RtlGenRandom API which is present in Windows XP and later.
1137 */
1138 static int
writeRandomBytes_rand_s(void * target,size_t count)1139 writeRandomBytes_rand_s(void *target, size_t count) {
1140 size_t bytesWrittenTotal = 0;
1141
1142 while (bytesWrittenTotal < count) {
1143 unsigned int random32 = 0;
1144 size_t i = 0;
1145
1146 if (rand_s(&random32))
1147 return 0; /* failure */
1148
1149 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
1150 i++, bytesWrittenTotal++) {
1151 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
1152 ((uint8_t *)target)[bytesWrittenTotal] = random8;
1153 }
1154 }
1155 return 1; /* success */
1156 }
1157
1158 #endif /* _WIN32 */
1159
1160 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
1161
1162 static unsigned long
gather_time_entropy(void)1163 gather_time_entropy(void) {
1164 # ifdef _WIN32
1165 FILETIME ft;
1166 GetSystemTimeAsFileTime(&ft); /* never fails */
1167 return ft.dwHighDateTime ^ ft.dwLowDateTime;
1168 # else
1169 struct timeval tv;
1170 int gettimeofday_res;
1171
1172 gettimeofday_res = gettimeofday(&tv, NULL);
1173
1174 # if defined(NDEBUG)
1175 (void)gettimeofday_res;
1176 # else
1177 assert(gettimeofday_res == 0);
1178 # endif /* defined(NDEBUG) */
1179
1180 /* Microseconds time is <20 bits entropy */
1181 return tv.tv_usec;
1182 # endif
1183 }
1184
1185 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
1186
1187 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)1188 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
1189 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
1190 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
1191 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
1192 }
1193 return entropy;
1194 }
1195
1196 static unsigned long
generate_hash_secret_salt(XML_Parser parser)1197 generate_hash_secret_salt(XML_Parser parser) {
1198 unsigned long entropy;
1199 (void)parser;
1200
1201 /* "Failproof" high quality providers: */
1202 #if defined(HAVE_ARC4RANDOM_BUF)
1203 arc4random_buf(&entropy, sizeof(entropy));
1204 return ENTROPY_DEBUG("arc4random_buf", entropy);
1205 #elif defined(HAVE_ARC4RANDOM)
1206 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
1207 return ENTROPY_DEBUG("arc4random", entropy);
1208 #else
1209 /* Try high quality providers first .. */
1210 # ifdef _WIN32
1211 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
1212 return ENTROPY_DEBUG("rand_s", entropy);
1213 }
1214 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
1215 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
1216 return ENTROPY_DEBUG("getrandom", entropy);
1217 }
1218 # endif
1219 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
1220 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
1221 return ENTROPY_DEBUG("/dev/urandom", entropy);
1222 }
1223 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
1224 /* .. and self-made low quality for backup: */
1225
1226 /* Process ID is 0 bits entropy if attacker has local access */
1227 entropy = gather_time_entropy() ^ getpid();
1228
1229 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1230 if (sizeof(unsigned long) == 4) {
1231 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
1232 } else {
1233 return ENTROPY_DEBUG("fallback(8)",
1234 entropy * (unsigned long)2305843009213693951ULL);
1235 }
1236 #endif
1237 }
1238
1239 static unsigned long
get_hash_secret_salt(XML_Parser parser)1240 get_hash_secret_salt(XML_Parser parser) {
1241 const XML_Parser rootParser = getRootParserOf(parser, NULL);
1242 assert(! rootParser->m_parentParser);
1243
1244 return rootParser->m_hash_secret_salt;
1245 }
1246
1247 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1248 callProcessor(XML_Parser parser, const char *start, const char *end,
1249 const char **endPtr) {
1250 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1251
1252 if (parser->m_reparseDeferralEnabled
1253 && ! parser->m_parsingStatus.finalBuffer) {
1254 // Heuristic: don't try to parse a partial token again until the amount of
1255 // available data has increased significantly.
1256 const size_t had_before = parser->m_partialTokenBytesBefore;
1257 // ...but *do* try anyway if we're close to causing a reallocation.
1258 size_t available_buffer
1259 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1260 #if XML_CONTEXT_BYTES > 0
1261 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1262 #endif
1263 available_buffer
1264 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1265 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1266 const bool enough
1267 = (have_now >= 2 * had_before)
1268 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1269
1270 if (! enough) {
1271 *endPtr = start; // callers may expect this to be set
1272 return XML_ERROR_NONE;
1273 }
1274 }
1275 #if defined(XML_TESTING)
1276 g_bytesScanned += (unsigned)have_now;
1277 #endif
1278 // Run in a loop to eliminate dangerous recursion depths
1279 enum XML_Error ret;
1280 *endPtr = start;
1281 while (1) {
1282 // Use endPtr as the new start in each iteration, since it will
1283 // be set to the next start point by m_processor.
1284 ret = parser->m_processor(parser, *endPtr, end, endPtr);
1285
1286 // Make parsing status (and in particular XML_SUSPENDED) take
1287 // precedence over re-enter flag when they disagree
1288 if (parser->m_parsingStatus.parsing != XML_PARSING) {
1289 parser->m_reenter = XML_FALSE;
1290 }
1291
1292 if (! parser->m_reenter) {
1293 break;
1294 }
1295
1296 parser->m_reenter = XML_FALSE;
1297 if (ret != XML_ERROR_NONE)
1298 return ret;
1299 }
1300
1301 if (ret == XML_ERROR_NONE) {
1302 // if we consumed nothing, remember what we had on this parse attempt.
1303 if (*endPtr == start) {
1304 parser->m_partialTokenBytesBefore = have_now;
1305 } else {
1306 parser->m_partialTokenBytesBefore = 0;
1307 }
1308 }
1309 return ret;
1310 }
1311
1312 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1313 startParsing(XML_Parser parser) {
1314 /* hash functions must be initialized before setContext() is called */
1315 if (parser->m_hash_secret_salt == 0)
1316 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1317 if (parser->m_ns) {
1318 /* implicit context only set for root parser, since child
1319 parsers (i.e. external entity parsers) will inherit it
1320 */
1321 return setContext(parser, implicitContext);
1322 }
1323 return XML_TRUE;
1324 }
1325
1326 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1327 XML_ParserCreate_MM(const XML_Char *encodingName,
1328 const XML_Memory_Handling_Suite *memsuite,
1329 const XML_Char *nameSep) {
1330 return parserCreate(encodingName, memsuite, nameSep, NULL, NULL);
1331 }
1332
1333 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd,XML_Parser parentParser)1334 parserCreate(const XML_Char *encodingName,
1335 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1336 DTD *dtd, XML_Parser parentParser) {
1337 XML_Parser parser = NULL;
1338
1339 #if XML_GE == 1
1340 const size_t increase = sizeof(size_t) + sizeof(struct XML_ParserStruct);
1341
1342 if (parentParser != NULL) {
1343 const XML_Parser rootParser = getRootParserOf(parentParser, NULL);
1344 if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) {
1345 return NULL;
1346 }
1347 }
1348 #else
1349 UNUSED_P(parentParser);
1350 #endif
1351
1352 if (memsuite) {
1353 XML_Memory_Handling_Suite *mtemp;
1354 #if XML_GE == 1
1355 void *const sizeAndParser = memsuite->malloc_fcn(
1356 sizeof(size_t) + sizeof(struct XML_ParserStruct));
1357 if (sizeAndParser != NULL) {
1358 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1359 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t));
1360 #else
1361 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1362 if (parser != NULL) {
1363 #endif
1364 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1365 mtemp->malloc_fcn = memsuite->malloc_fcn;
1366 mtemp->realloc_fcn = memsuite->realloc_fcn;
1367 mtemp->free_fcn = memsuite->free_fcn;
1368 }
1369 } else {
1370 XML_Memory_Handling_Suite *mtemp;
1371 #if XML_GE == 1
1372 void *const sizeAndParser
1373 = malloc(sizeof(size_t) + sizeof(struct XML_ParserStruct));
1374 if (sizeAndParser != NULL) {
1375 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct);
1376 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t));
1377 #else
1378 parser = malloc(sizeof(struct XML_ParserStruct));
1379 if (parser != NULL) {
1380 #endif
1381 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1382 mtemp->malloc_fcn = malloc;
1383 mtemp->realloc_fcn = realloc;
1384 mtemp->free_fcn = free;
1385 }
1386 } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0
1387
1388 if (! parser)
1389 return parser;
1390
1391 #if XML_GE == 1
1392 // Initialize .m_alloc_tracker
1393 memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER));
1394 if (parentParser == NULL) {
1395 parser->m_alloc_tracker.debugLevel
1396 = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u);
1397 parser->m_alloc_tracker.maximumAmplificationFactor
1398 = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT;
1399 parser->m_alloc_tracker.activationThresholdBytes
1400 = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT;
1401
1402 // NOTE: This initialization needs to come this early because these fields
1403 // are read by allocation tracking code
1404 parser->m_parentParser = NULL;
1405 parser->m_accounting.countBytesDirect = 0;
1406 } else {
1407 parser->m_parentParser = parentParser;
1408 }
1409
1410 // Record XML_ParserStruct allocation we did a few lines up before
1411 const XML_Parser rootParser = getRootParserOf(parser, NULL);
1412 assert(rootParser->m_parentParser == NULL);
1413 assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase);
1414 rootParser->m_alloc_tracker.bytesAllocated += increase;
1415
1416 // Report on allocation
1417 if (rootParser->m_alloc_tracker.debugLevel >= 2) {
1418 if (rootParser->m_alloc_tracker.bytesAllocated
1419 > rootParser->m_alloc_tracker.peakBytesAllocated) {
1420 rootParser->m_alloc_tracker.peakBytesAllocated
1421 = rootParser->m_alloc_tracker.bytesAllocated;
1422 }
1423
1424 expat_heap_stat(rootParser, '+', increase,
1425 rootParser->m_alloc_tracker.bytesAllocated,
1426 rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__);
1427 }
1428 #else
1429 parser->m_parentParser = NULL;
1430 #endif // XML_GE == 1
1431
1432 parser->m_buffer = NULL;
1433 parser->m_bufferLim = NULL;
1434
1435 parser->m_attsSize = INIT_ATTS_SIZE;
1436 parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1437 if (parser->m_atts == NULL) {
1438 FREE(parser, parser);
1439 return NULL;
1440 }
1441 #ifdef XML_ATTR_INFO
1442 parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1443 if (parser->m_attInfo == NULL) {
1444 FREE(parser, parser->m_atts);
1445 FREE(parser, parser);
1446 return NULL;
1447 }
1448 #endif
1449 parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1450 if (parser->m_dataBuf == NULL) {
1451 FREE(parser, parser->m_atts);
1452 #ifdef XML_ATTR_INFO
1453 FREE(parser, parser->m_attInfo);
1454 #endif
1455 FREE(parser, parser);
1456 return NULL;
1457 }
1458 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1459
1460 if (dtd)
1461 parser->m_dtd = dtd;
1462 else {
1463 parser->m_dtd = dtdCreate(parser);
1464 if (parser->m_dtd == NULL) {
1465 FREE(parser, parser->m_dataBuf);
1466 FREE(parser, parser->m_atts);
1467 #ifdef XML_ATTR_INFO
1468 FREE(parser, parser->m_attInfo);
1469 #endif
1470 FREE(parser, parser);
1471 return NULL;
1472 }
1473 }
1474
1475 parser->m_freeBindingList = NULL;
1476 parser->m_freeTagList = NULL;
1477 parser->m_freeInternalEntities = NULL;
1478 parser->m_freeAttributeEntities = NULL;
1479 parser->m_freeValueEntities = NULL;
1480
1481 parser->m_groupSize = 0;
1482 parser->m_groupConnector = NULL;
1483
1484 parser->m_unknownEncodingHandler = NULL;
1485 parser->m_unknownEncodingHandlerData = NULL;
1486
1487 parser->m_namespaceSeparator = ASCII_EXCL;
1488 parser->m_ns = XML_FALSE;
1489 parser->m_ns_triplets = XML_FALSE;
1490
1491 parser->m_nsAtts = NULL;
1492 parser->m_nsAttsVersion = 0;
1493 parser->m_nsAttsPower = 0;
1494
1495 parser->m_protocolEncodingName = NULL;
1496
1497 poolInit(&parser->m_tempPool, parser);
1498 poolInit(&parser->m_temp2Pool, parser);
1499 parserInit(parser, encodingName);
1500
1501 if (encodingName && ! parser->m_protocolEncodingName) {
1502 if (dtd) {
1503 // We need to stop the upcoming call to XML_ParserFree from happily
1504 // destroying parser->m_dtd because the DTD is shared with the parent
1505 // parser and the only guard that keeps XML_ParserFree from destroying
1506 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1507 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1508 parser->m_dtd = NULL;
1509 }
1510 XML_ParserFree(parser);
1511 return NULL;
1512 }
1513
1514 if (nameSep) {
1515 parser->m_ns = XML_TRUE;
1516 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1517 parser->m_namespaceSeparator = *nameSep;
1518 } else {
1519 parser->m_internalEncoding = XmlGetInternalEncoding();
1520 }
1521
1522 return parser;
1523 }
1524
1525 static void
1526 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1527 parser->m_processor = prologInitProcessor;
1528 XmlPrologStateInit(&parser->m_prologState);
1529 if (encodingName != NULL) {
1530 parser->m_protocolEncodingName = copyString(encodingName, parser);
1531 }
1532 parser->m_curBase = NULL;
1533 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1534 parser->m_userData = NULL;
1535 parser->m_handlerArg = NULL;
1536 parser->m_startElementHandler = NULL;
1537 parser->m_endElementHandler = NULL;
1538 parser->m_characterDataHandler = NULL;
1539 parser->m_processingInstructionHandler = NULL;
1540 parser->m_commentHandler = NULL;
1541 parser->m_startCdataSectionHandler = NULL;
1542 parser->m_endCdataSectionHandler = NULL;
1543 parser->m_defaultHandler = NULL;
1544 parser->m_startDoctypeDeclHandler = NULL;
1545 parser->m_endDoctypeDeclHandler = NULL;
1546 parser->m_unparsedEntityDeclHandler = NULL;
1547 parser->m_notationDeclHandler = NULL;
1548 parser->m_startNamespaceDeclHandler = NULL;
1549 parser->m_endNamespaceDeclHandler = NULL;
1550 parser->m_notStandaloneHandler = NULL;
1551 parser->m_externalEntityRefHandler = NULL;
1552 parser->m_externalEntityRefHandlerArg = parser;
1553 parser->m_skippedEntityHandler = NULL;
1554 parser->m_elementDeclHandler = NULL;
1555 parser->m_attlistDeclHandler = NULL;
1556 parser->m_entityDeclHandler = NULL;
1557 parser->m_xmlDeclHandler = NULL;
1558 parser->m_bufferPtr = parser->m_buffer;
1559 parser->m_bufferEnd = parser->m_buffer;
1560 parser->m_parseEndByteIndex = 0;
1561 parser->m_parseEndPtr = NULL;
1562 parser->m_partialTokenBytesBefore = 0;
1563 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1564 parser->m_lastBufferRequestSize = 0;
1565 parser->m_declElementType = NULL;
1566 parser->m_declAttributeId = NULL;
1567 parser->m_declEntity = NULL;
1568 parser->m_doctypeName = NULL;
1569 parser->m_doctypeSysid = NULL;
1570 parser->m_doctypePubid = NULL;
1571 parser->m_declAttributeType = NULL;
1572 parser->m_declNotationName = NULL;
1573 parser->m_declNotationPublicId = NULL;
1574 parser->m_declAttributeIsCdata = XML_FALSE;
1575 parser->m_declAttributeIsId = XML_FALSE;
1576 memset(&parser->m_position, 0, sizeof(POSITION));
1577 parser->m_errorCode = XML_ERROR_NONE;
1578 parser->m_eventPtr = NULL;
1579 parser->m_eventEndPtr = NULL;
1580 parser->m_positionPtr = NULL;
1581 parser->m_openInternalEntities = NULL;
1582 parser->m_openAttributeEntities = NULL;
1583 parser->m_openValueEntities = NULL;
1584 parser->m_defaultExpandInternalEntities = XML_TRUE;
1585 parser->m_tagLevel = 0;
1586 parser->m_tagStack = NULL;
1587 parser->m_inheritedBindings = NULL;
1588 parser->m_nSpecifiedAtts = 0;
1589 parser->m_unknownEncodingMem = NULL;
1590 parser->m_unknownEncodingRelease = NULL;
1591 parser->m_unknownEncodingData = NULL;
1592 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1593 // Reentry can only be triggered inside m_processor calls
1594 parser->m_reenter = XML_FALSE;
1595 #ifdef XML_DTD
1596 parser->m_isParamEntity = XML_FALSE;
1597 parser->m_useForeignDTD = XML_FALSE;
1598 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1599 #endif
1600 parser->m_hash_secret_salt = 0;
1601
1602 #if XML_GE == 1
1603 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1604 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1605 parser->m_accounting.maximumAmplificationFactor
1606 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1607 parser->m_accounting.activationThresholdBytes
1608 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1609
1610 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1611 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1612 #endif
1613 }
1614
1615 /* moves list of bindings to m_freeBindingList */
1616 static void FASTCALL
1617 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1618 while (bindings) {
1619 BINDING *b = bindings;
1620 bindings = bindings->nextTagBinding;
1621 b->nextTagBinding = parser->m_freeBindingList;
1622 parser->m_freeBindingList = b;
1623 }
1624 }
1625
1626 XML_Bool XMLCALL
1627 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1628 TAG *tStk;
1629 OPEN_INTERNAL_ENTITY *openEntityList;
1630
1631 if (parser == NULL)
1632 return XML_FALSE;
1633
1634 if (parser->m_parentParser)
1635 return XML_FALSE;
1636 /* move m_tagStack to m_freeTagList */
1637 tStk = parser->m_tagStack;
1638 while (tStk) {
1639 TAG *tag = tStk;
1640 tStk = tStk->parent;
1641 tag->parent = parser->m_freeTagList;
1642 moveToFreeBindingList(parser, tag->bindings);
1643 tag->bindings = NULL;
1644 parser->m_freeTagList = tag;
1645 }
1646 /* move m_openInternalEntities to m_freeInternalEntities */
1647 openEntityList = parser->m_openInternalEntities;
1648 while (openEntityList) {
1649 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1650 openEntityList = openEntity->next;
1651 openEntity->next = parser->m_freeInternalEntities;
1652 parser->m_freeInternalEntities = openEntity;
1653 }
1654 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1655 * for attributes) */
1656 openEntityList = parser->m_openAttributeEntities;
1657 while (openEntityList) {
1658 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1659 openEntityList = openEntity->next;
1660 openEntity->next = parser->m_freeAttributeEntities;
1661 parser->m_freeAttributeEntities = openEntity;
1662 }
1663 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1664 * for value entities) */
1665 openEntityList = parser->m_openValueEntities;
1666 while (openEntityList) {
1667 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1668 openEntityList = openEntity->next;
1669 openEntity->next = parser->m_freeValueEntities;
1670 parser->m_freeValueEntities = openEntity;
1671 }
1672 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1673 FREE(parser, parser->m_unknownEncodingMem);
1674 if (parser->m_unknownEncodingRelease)
1675 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1676 poolClear(&parser->m_tempPool);
1677 poolClear(&parser->m_temp2Pool);
1678 FREE(parser, (void *)parser->m_protocolEncodingName);
1679 parser->m_protocolEncodingName = NULL;
1680 parserInit(parser, encodingName);
1681 dtdReset(parser->m_dtd, parser);
1682 return XML_TRUE;
1683 }
1684
1685 static XML_Bool
1686 parserBusy(XML_Parser parser) {
1687 switch (parser->m_parsingStatus.parsing) {
1688 case XML_PARSING:
1689 case XML_SUSPENDED:
1690 return XML_TRUE;
1691 case XML_INITIALIZED:
1692 case XML_FINISHED:
1693 default:
1694 return XML_FALSE;
1695 }
1696 }
1697
1698 enum XML_Status XMLCALL
1699 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1700 if (parser == NULL)
1701 return XML_STATUS_ERROR;
1702 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1703 XXX There's no way for the caller to determine which of the
1704 XXX possible error cases caused the XML_STATUS_ERROR return.
1705 */
1706 if (parserBusy(parser))
1707 return XML_STATUS_ERROR;
1708
1709 /* Get rid of any previous encoding name */
1710 FREE(parser, (void *)parser->m_protocolEncodingName);
1711
1712 if (encodingName == NULL)
1713 /* No new encoding name */
1714 parser->m_protocolEncodingName = NULL;
1715 else {
1716 /* Copy the new encoding name into allocated memory */
1717 parser->m_protocolEncodingName = copyString(encodingName, parser);
1718 if (! parser->m_protocolEncodingName)
1719 return XML_STATUS_ERROR;
1720 }
1721 return XML_STATUS_OK;
1722 }
1723
1724 XML_Parser XMLCALL
1725 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1726 const XML_Char *encodingName) {
1727 XML_Parser parser = oldParser;
1728 DTD *newDtd = NULL;
1729 DTD *oldDtd;
1730 XML_StartElementHandler oldStartElementHandler;
1731 XML_EndElementHandler oldEndElementHandler;
1732 XML_CharacterDataHandler oldCharacterDataHandler;
1733 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1734 XML_CommentHandler oldCommentHandler;
1735 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1736 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1737 XML_DefaultHandler oldDefaultHandler;
1738 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1739 XML_NotationDeclHandler oldNotationDeclHandler;
1740 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1741 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1742 XML_NotStandaloneHandler oldNotStandaloneHandler;
1743 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1744 XML_SkippedEntityHandler oldSkippedEntityHandler;
1745 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1746 XML_ElementDeclHandler oldElementDeclHandler;
1747 XML_AttlistDeclHandler oldAttlistDeclHandler;
1748 XML_EntityDeclHandler oldEntityDeclHandler;
1749 XML_XmlDeclHandler oldXmlDeclHandler;
1750 ELEMENT_TYPE *oldDeclElementType;
1751
1752 void *oldUserData;
1753 void *oldHandlerArg;
1754 XML_Bool oldDefaultExpandInternalEntities;
1755 XML_Parser oldExternalEntityRefHandlerArg;
1756 #ifdef XML_DTD
1757 enum XML_ParamEntityParsing oldParamEntityParsing;
1758 int oldInEntityValue;
1759 #endif
1760 XML_Bool oldns_triplets;
1761 /* Note that the new parser shares the same hash secret as the old
1762 parser, so that dtdCopy and copyEntityTable can lookup values
1763 from hash tables associated with either parser without us having
1764 to worry which hash secrets each table has.
1765 */
1766 unsigned long oldhash_secret_salt;
1767 XML_Bool oldReparseDeferralEnabled;
1768
1769 /* Validate the oldParser parameter before we pull everything out of it */
1770 if (oldParser == NULL)
1771 return NULL;
1772
1773 /* Stash the original parser contents on the stack */
1774 oldDtd = parser->m_dtd;
1775 oldStartElementHandler = parser->m_startElementHandler;
1776 oldEndElementHandler = parser->m_endElementHandler;
1777 oldCharacterDataHandler = parser->m_characterDataHandler;
1778 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1779 oldCommentHandler = parser->m_commentHandler;
1780 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1781 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1782 oldDefaultHandler = parser->m_defaultHandler;
1783 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1784 oldNotationDeclHandler = parser->m_notationDeclHandler;
1785 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1786 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1787 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1788 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1789 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1790 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1791 oldElementDeclHandler = parser->m_elementDeclHandler;
1792 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1793 oldEntityDeclHandler = parser->m_entityDeclHandler;
1794 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1795 oldDeclElementType = parser->m_declElementType;
1796
1797 oldUserData = parser->m_userData;
1798 oldHandlerArg = parser->m_handlerArg;
1799 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1800 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1801 #ifdef XML_DTD
1802 oldParamEntityParsing = parser->m_paramEntityParsing;
1803 oldInEntityValue = parser->m_prologState.inEntityValue;
1804 #endif
1805 oldns_triplets = parser->m_ns_triplets;
1806 /* Note that the new parser shares the same hash secret as the old
1807 parser, so that dtdCopy and copyEntityTable can lookup values
1808 from hash tables associated with either parser without us having
1809 to worry which hash secrets each table has.
1810 */
1811 oldhash_secret_salt = parser->m_hash_secret_salt;
1812 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1813
1814 #ifdef XML_DTD
1815 if (! context)
1816 newDtd = oldDtd;
1817 #endif /* XML_DTD */
1818
1819 /* Note that the magical uses of the pre-processor to make field
1820 access look more like C++ require that `parser' be overwritten
1821 here. This makes this function more painful to follow than it
1822 would be otherwise.
1823 */
1824 if (parser->m_ns) {
1825 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1826 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser);
1827 } else {
1828 parser
1829 = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser);
1830 }
1831
1832 if (! parser)
1833 return NULL;
1834
1835 parser->m_startElementHandler = oldStartElementHandler;
1836 parser->m_endElementHandler = oldEndElementHandler;
1837 parser->m_characterDataHandler = oldCharacterDataHandler;
1838 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1839 parser->m_commentHandler = oldCommentHandler;
1840 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1841 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1842 parser->m_defaultHandler = oldDefaultHandler;
1843 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1844 parser->m_notationDeclHandler = oldNotationDeclHandler;
1845 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1846 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1847 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1848 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1849 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1850 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1851 parser->m_elementDeclHandler = oldElementDeclHandler;
1852 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1853 parser->m_entityDeclHandler = oldEntityDeclHandler;
1854 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1855 parser->m_declElementType = oldDeclElementType;
1856 parser->m_userData = oldUserData;
1857 if (oldUserData == oldHandlerArg)
1858 parser->m_handlerArg = parser->m_userData;
1859 else
1860 parser->m_handlerArg = parser;
1861 if (oldExternalEntityRefHandlerArg != oldParser)
1862 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1863 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1864 parser->m_ns_triplets = oldns_triplets;
1865 parser->m_hash_secret_salt = oldhash_secret_salt;
1866 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1867 parser->m_parentParser = oldParser;
1868 #ifdef XML_DTD
1869 parser->m_paramEntityParsing = oldParamEntityParsing;
1870 parser->m_prologState.inEntityValue = oldInEntityValue;
1871 if (context) {
1872 #endif /* XML_DTD */
1873 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser)
1874 || ! setContext(parser, context)) {
1875 XML_ParserFree(parser);
1876 return NULL;
1877 }
1878 parser->m_processor = externalEntityInitProcessor;
1879 #ifdef XML_DTD
1880 } else {
1881 /* The DTD instance referenced by parser->m_dtd is shared between the
1882 document's root parser and external PE parsers, therefore one does not
1883 need to call setContext. In addition, one also *must* not call
1884 setContext, because this would overwrite existing prefix->binding
1885 pointers in parser->m_dtd with ones that get destroyed with the external
1886 PE parser. This would leave those prefixes with dangling pointers.
1887 */
1888 parser->m_isParamEntity = XML_TRUE;
1889 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1890 parser->m_processor = externalParEntInitProcessor;
1891 }
1892 #endif /* XML_DTD */
1893 return parser;
1894 }
1895
1896 static void FASTCALL
1897 destroyBindings(BINDING *bindings, XML_Parser parser) {
1898 for (;;) {
1899 BINDING *b = bindings;
1900 if (! b)
1901 break;
1902 bindings = b->nextTagBinding;
1903 FREE(parser, b->uri);
1904 FREE(parser, b);
1905 }
1906 }
1907
1908 void XMLCALL
1909 XML_ParserFree(XML_Parser parser) {
1910 TAG *tagList;
1911 OPEN_INTERNAL_ENTITY *entityList;
1912 if (parser == NULL)
1913 return;
1914 /* free m_tagStack and m_freeTagList */
1915 tagList = parser->m_tagStack;
1916 for (;;) {
1917 TAG *p;
1918 if (tagList == NULL) {
1919 if (parser->m_freeTagList == NULL)
1920 break;
1921 tagList = parser->m_freeTagList;
1922 parser->m_freeTagList = NULL;
1923 }
1924 p = tagList;
1925 tagList = tagList->parent;
1926 FREE(parser, p->buf);
1927 destroyBindings(p->bindings, parser);
1928 FREE(parser, p);
1929 }
1930 /* free m_openInternalEntities and m_freeInternalEntities */
1931 entityList = parser->m_openInternalEntities;
1932 for (;;) {
1933 OPEN_INTERNAL_ENTITY *openEntity;
1934 if (entityList == NULL) {
1935 if (parser->m_freeInternalEntities == NULL)
1936 break;
1937 entityList = parser->m_freeInternalEntities;
1938 parser->m_freeInternalEntities = NULL;
1939 }
1940 openEntity = entityList;
1941 entityList = entityList->next;
1942 FREE(parser, openEntity);
1943 }
1944 /* free m_openAttributeEntities and m_freeAttributeEntities */
1945 entityList = parser->m_openAttributeEntities;
1946 for (;;) {
1947 OPEN_INTERNAL_ENTITY *openEntity;
1948 if (entityList == NULL) {
1949 if (parser->m_freeAttributeEntities == NULL)
1950 break;
1951 entityList = parser->m_freeAttributeEntities;
1952 parser->m_freeAttributeEntities = NULL;
1953 }
1954 openEntity = entityList;
1955 entityList = entityList->next;
1956 FREE(parser, openEntity);
1957 }
1958 /* free m_openValueEntities and m_freeValueEntities */
1959 entityList = parser->m_openValueEntities;
1960 for (;;) {
1961 OPEN_INTERNAL_ENTITY *openEntity;
1962 if (entityList == NULL) {
1963 if (parser->m_freeValueEntities == NULL)
1964 break;
1965 entityList = parser->m_freeValueEntities;
1966 parser->m_freeValueEntities = NULL;
1967 }
1968 openEntity = entityList;
1969 entityList = entityList->next;
1970 FREE(parser, openEntity);
1971 }
1972 destroyBindings(parser->m_freeBindingList, parser);
1973 destroyBindings(parser->m_inheritedBindings, parser);
1974 poolDestroy(&parser->m_tempPool);
1975 poolDestroy(&parser->m_temp2Pool);
1976 FREE(parser, (void *)parser->m_protocolEncodingName);
1977 #ifdef XML_DTD
1978 /* external parameter entity parsers share the DTD structure
1979 parser->m_dtd with the root parser, so we must not destroy it
1980 */
1981 if (! parser->m_isParamEntity && parser->m_dtd)
1982 #else
1983 if (parser->m_dtd)
1984 #endif /* XML_DTD */
1985 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser);
1986 FREE(parser, parser->m_atts);
1987 #ifdef XML_ATTR_INFO
1988 FREE(parser, parser->m_attInfo);
1989 #endif
1990 FREE(parser, parser->m_groupConnector);
1991 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
1992 // is not being allocated with MALLOC(..) but with plain
1993 // .malloc_fcn(..).
1994 parser->m_mem.free_fcn(parser->m_buffer);
1995 FREE(parser, parser->m_dataBuf);
1996 FREE(parser, parser->m_nsAtts);
1997 FREE(parser, parser->m_unknownEncodingMem);
1998 if (parser->m_unknownEncodingRelease)
1999 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
2000 FREE(parser, parser);
2001 }
2002
2003 void XMLCALL
2004 XML_UseParserAsHandlerArg(XML_Parser parser) {
2005 if (parser != NULL)
2006 parser->m_handlerArg = parser;
2007 }
2008
2009 enum XML_Error XMLCALL
2010 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
2011 if (parser == NULL)
2012 return XML_ERROR_INVALID_ARGUMENT;
2013 #ifdef XML_DTD
2014 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2015 if (parserBusy(parser))
2016 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
2017 parser->m_useForeignDTD = useDTD;
2018 return XML_ERROR_NONE;
2019 #else
2020 UNUSED_P(useDTD);
2021 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
2022 #endif
2023 }
2024
2025 void XMLCALL
2026 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
2027 if (parser == NULL)
2028 return;
2029 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2030 if (parserBusy(parser))
2031 return;
2032 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
2033 }
2034
2035 void XMLCALL
2036 XML_SetUserData(XML_Parser parser, void *p) {
2037 if (parser == NULL)
2038 return;
2039 if (parser->m_handlerArg == parser->m_userData)
2040 parser->m_handlerArg = parser->m_userData = p;
2041 else
2042 parser->m_userData = p;
2043 }
2044
2045 enum XML_Status XMLCALL
2046 XML_SetBase(XML_Parser parser, const XML_Char *p) {
2047 if (parser == NULL)
2048 return XML_STATUS_ERROR;
2049 if (p) {
2050 p = poolCopyString(&parser->m_dtd->pool, p);
2051 if (! p)
2052 return XML_STATUS_ERROR;
2053 parser->m_curBase = p;
2054 } else
2055 parser->m_curBase = NULL;
2056 return XML_STATUS_OK;
2057 }
2058
2059 const XML_Char *XMLCALL
2060 XML_GetBase(XML_Parser parser) {
2061 if (parser == NULL)
2062 return NULL;
2063 return parser->m_curBase;
2064 }
2065
2066 int XMLCALL
2067 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
2068 if (parser == NULL)
2069 return -1;
2070 return parser->m_nSpecifiedAtts;
2071 }
2072
2073 int XMLCALL
2074 XML_GetIdAttributeIndex(XML_Parser parser) {
2075 if (parser == NULL)
2076 return -1;
2077 return parser->m_idAttIndex;
2078 }
2079
2080 #ifdef XML_ATTR_INFO
2081 const XML_AttrInfo *XMLCALL
2082 XML_GetAttributeInfo(XML_Parser parser) {
2083 if (parser == NULL)
2084 return NULL;
2085 return parser->m_attInfo;
2086 }
2087 #endif
2088
2089 void XMLCALL
2090 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
2091 XML_EndElementHandler end) {
2092 if (parser == NULL)
2093 return;
2094 parser->m_startElementHandler = start;
2095 parser->m_endElementHandler = end;
2096 }
2097
2098 void XMLCALL
2099 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
2100 if (parser != NULL)
2101 parser->m_startElementHandler = start;
2102 }
2103
2104 void XMLCALL
2105 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
2106 if (parser != NULL)
2107 parser->m_endElementHandler = end;
2108 }
2109
2110 void XMLCALL
2111 XML_SetCharacterDataHandler(XML_Parser parser,
2112 XML_CharacterDataHandler handler) {
2113 if (parser != NULL)
2114 parser->m_characterDataHandler = handler;
2115 }
2116
2117 void XMLCALL
2118 XML_SetProcessingInstructionHandler(XML_Parser parser,
2119 XML_ProcessingInstructionHandler handler) {
2120 if (parser != NULL)
2121 parser->m_processingInstructionHandler = handler;
2122 }
2123
2124 void XMLCALL
2125 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
2126 if (parser != NULL)
2127 parser->m_commentHandler = handler;
2128 }
2129
2130 void XMLCALL
2131 XML_SetCdataSectionHandler(XML_Parser parser,
2132 XML_StartCdataSectionHandler start,
2133 XML_EndCdataSectionHandler end) {
2134 if (parser == NULL)
2135 return;
2136 parser->m_startCdataSectionHandler = start;
2137 parser->m_endCdataSectionHandler = end;
2138 }
2139
2140 void XMLCALL
2141 XML_SetStartCdataSectionHandler(XML_Parser parser,
2142 XML_StartCdataSectionHandler start) {
2143 if (parser != NULL)
2144 parser->m_startCdataSectionHandler = start;
2145 }
2146
2147 void XMLCALL
2148 XML_SetEndCdataSectionHandler(XML_Parser parser,
2149 XML_EndCdataSectionHandler end) {
2150 if (parser != NULL)
2151 parser->m_endCdataSectionHandler = end;
2152 }
2153
2154 void XMLCALL
2155 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
2156 if (parser == NULL)
2157 return;
2158 parser->m_defaultHandler = handler;
2159 parser->m_defaultExpandInternalEntities = XML_FALSE;
2160 }
2161
2162 void XMLCALL
2163 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
2164 if (parser == NULL)
2165 return;
2166 parser->m_defaultHandler = handler;
2167 parser->m_defaultExpandInternalEntities = XML_TRUE;
2168 }
2169
2170 void XMLCALL
2171 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
2172 XML_EndDoctypeDeclHandler end) {
2173 if (parser == NULL)
2174 return;
2175 parser->m_startDoctypeDeclHandler = start;
2176 parser->m_endDoctypeDeclHandler = end;
2177 }
2178
2179 void XMLCALL
2180 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
2181 XML_StartDoctypeDeclHandler start) {
2182 if (parser != NULL)
2183 parser->m_startDoctypeDeclHandler = start;
2184 }
2185
2186 void XMLCALL
2187 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
2188 if (parser != NULL)
2189 parser->m_endDoctypeDeclHandler = end;
2190 }
2191
2192 void XMLCALL
2193 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
2194 XML_UnparsedEntityDeclHandler handler) {
2195 if (parser != NULL)
2196 parser->m_unparsedEntityDeclHandler = handler;
2197 }
2198
2199 void XMLCALL
2200 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
2201 if (parser != NULL)
2202 parser->m_notationDeclHandler = handler;
2203 }
2204
2205 void XMLCALL
2206 XML_SetNamespaceDeclHandler(XML_Parser parser,
2207 XML_StartNamespaceDeclHandler start,
2208 XML_EndNamespaceDeclHandler end) {
2209 if (parser == NULL)
2210 return;
2211 parser->m_startNamespaceDeclHandler = start;
2212 parser->m_endNamespaceDeclHandler = end;
2213 }
2214
2215 void XMLCALL
2216 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
2217 XML_StartNamespaceDeclHandler start) {
2218 if (parser != NULL)
2219 parser->m_startNamespaceDeclHandler = start;
2220 }
2221
2222 void XMLCALL
2223 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
2224 XML_EndNamespaceDeclHandler end) {
2225 if (parser != NULL)
2226 parser->m_endNamespaceDeclHandler = end;
2227 }
2228
2229 void XMLCALL
2230 XML_SetNotStandaloneHandler(XML_Parser parser,
2231 XML_NotStandaloneHandler handler) {
2232 if (parser != NULL)
2233 parser->m_notStandaloneHandler = handler;
2234 }
2235
2236 void XMLCALL
2237 XML_SetExternalEntityRefHandler(XML_Parser parser,
2238 XML_ExternalEntityRefHandler handler) {
2239 if (parser != NULL)
2240 parser->m_externalEntityRefHandler = handler;
2241 }
2242
2243 void XMLCALL
2244 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
2245 if (parser == NULL)
2246 return;
2247 if (arg)
2248 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
2249 else
2250 parser->m_externalEntityRefHandlerArg = parser;
2251 }
2252
2253 void XMLCALL
2254 XML_SetSkippedEntityHandler(XML_Parser parser,
2255 XML_SkippedEntityHandler handler) {
2256 if (parser != NULL)
2257 parser->m_skippedEntityHandler = handler;
2258 }
2259
2260 void XMLCALL
2261 XML_SetUnknownEncodingHandler(XML_Parser parser,
2262 XML_UnknownEncodingHandler handler, void *data) {
2263 if (parser == NULL)
2264 return;
2265 parser->m_unknownEncodingHandler = handler;
2266 parser->m_unknownEncodingHandlerData = data;
2267 }
2268
2269 void XMLCALL
2270 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
2271 if (parser != NULL)
2272 parser->m_elementDeclHandler = eldecl;
2273 }
2274
2275 void XMLCALL
2276 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
2277 if (parser != NULL)
2278 parser->m_attlistDeclHandler = attdecl;
2279 }
2280
2281 void XMLCALL
2282 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
2283 if (parser != NULL)
2284 parser->m_entityDeclHandler = handler;
2285 }
2286
2287 void XMLCALL
2288 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
2289 if (parser != NULL)
2290 parser->m_xmlDeclHandler = handler;
2291 }
2292
2293 int XMLCALL
2294 XML_SetParamEntityParsing(XML_Parser parser,
2295 enum XML_ParamEntityParsing peParsing) {
2296 if (parser == NULL)
2297 return 0;
2298 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2299 if (parserBusy(parser))
2300 return 0;
2301 #ifdef XML_DTD
2302 parser->m_paramEntityParsing = peParsing;
2303 return 1;
2304 #else
2305 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2306 #endif
2307 }
2308
2309 int XMLCALL
2310 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2311 if (parser == NULL)
2312 return 0;
2313
2314 const XML_Parser rootParser = getRootParserOf(parser, NULL);
2315 assert(! rootParser->m_parentParser);
2316
2317 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2318 if (parserBusy(rootParser))
2319 return 0;
2320 rootParser->m_hash_secret_salt = hash_salt;
2321 return 1;
2322 }
2323
2324 enum XML_Status XMLCALL
2325 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2326 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2327 if (parser != NULL)
2328 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2329 return XML_STATUS_ERROR;
2330 }
2331 switch (parser->m_parsingStatus.parsing) {
2332 case XML_SUSPENDED:
2333 parser->m_errorCode = XML_ERROR_SUSPENDED;
2334 return XML_STATUS_ERROR;
2335 case XML_FINISHED:
2336 parser->m_errorCode = XML_ERROR_FINISHED;
2337 return XML_STATUS_ERROR;
2338 case XML_INITIALIZED:
2339 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2340 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2341 return XML_STATUS_ERROR;
2342 }
2343 /* fall through */
2344 default:
2345 parser->m_parsingStatus.parsing = XML_PARSING;
2346 }
2347
2348 #if XML_CONTEXT_BYTES == 0
2349 if (parser->m_bufferPtr == parser->m_bufferEnd) {
2350 const char *end;
2351 int nLeftOver;
2352 enum XML_Status result;
2353 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
2354 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2355 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2356 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2357 parser->m_processor = errorProcessor;
2358 return XML_STATUS_ERROR;
2359 }
2360 // though this isn't a buffer request, we assume that `len` is the app's
2361 // preferred buffer fill size, and therefore save it here.
2362 parser->m_lastBufferRequestSize = len;
2363 parser->m_parseEndByteIndex += len;
2364 parser->m_positionPtr = s;
2365 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2366
2367 parser->m_errorCode
2368 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2369
2370 if (parser->m_errorCode != XML_ERROR_NONE) {
2371 parser->m_eventEndPtr = parser->m_eventPtr;
2372 parser->m_processor = errorProcessor;
2373 return XML_STATUS_ERROR;
2374 } else {
2375 switch (parser->m_parsingStatus.parsing) {
2376 case XML_SUSPENDED:
2377 result = XML_STATUS_SUSPENDED;
2378 break;
2379 case XML_INITIALIZED:
2380 case XML_PARSING:
2381 if (isFinal) {
2382 parser->m_parsingStatus.parsing = XML_FINISHED;
2383 return XML_STATUS_OK;
2384 }
2385 /* fall through */
2386 default:
2387 result = XML_STATUS_OK;
2388 }
2389 }
2390
2391 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2392 &parser->m_position);
2393 nLeftOver = s + len - end;
2394 if (nLeftOver) {
2395 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2396 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2397 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2398 parser->m_parsingStatus.parsing = XML_PARSING;
2399 void *const temp = XML_GetBuffer(parser, nLeftOver);
2400 parser->m_parsingStatus.parsing = originalStatus;
2401 // GetBuffer may have overwritten this, but we want to remember what the
2402 // app requested, not how many bytes were left over after parsing.
2403 parser->m_lastBufferRequestSize = len;
2404 if (temp == NULL) {
2405 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2406 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2407 parser->m_processor = errorProcessor;
2408 return XML_STATUS_ERROR;
2409 }
2410 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2411 // don't have any data to preserve, and can copy straight into the start
2412 // of the buffer rather than the GetBuffer return pointer (which may be
2413 // pointing further into the allocated buffer).
2414 memcpy(parser->m_buffer, end, nLeftOver);
2415 }
2416 parser->m_bufferPtr = parser->m_buffer;
2417 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2418 parser->m_positionPtr = parser->m_bufferPtr;
2419 parser->m_parseEndPtr = parser->m_bufferEnd;
2420 parser->m_eventPtr = parser->m_bufferPtr;
2421 parser->m_eventEndPtr = parser->m_bufferPtr;
2422 return result;
2423 }
2424 #endif /* XML_CONTEXT_BYTES == 0 */
2425 void *buff = XML_GetBuffer(parser, len);
2426 if (buff == NULL)
2427 return XML_STATUS_ERROR;
2428 if (len > 0) {
2429 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2430 memcpy(buff, s, len);
2431 }
2432 return XML_ParseBuffer(parser, len, isFinal);
2433 }
2434
2435 enum XML_Status XMLCALL
2436 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2437 const char *start;
2438 enum XML_Status result = XML_STATUS_OK;
2439
2440 if (parser == NULL)
2441 return XML_STATUS_ERROR;
2442
2443 if (len < 0) {
2444 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2445 return XML_STATUS_ERROR;
2446 }
2447
2448 switch (parser->m_parsingStatus.parsing) {
2449 case XML_SUSPENDED:
2450 parser->m_errorCode = XML_ERROR_SUSPENDED;
2451 return XML_STATUS_ERROR;
2452 case XML_FINISHED:
2453 parser->m_errorCode = XML_ERROR_FINISHED;
2454 return XML_STATUS_ERROR;
2455 case XML_INITIALIZED:
2456 /* Has someone called XML_GetBuffer successfully before? */
2457 if (! parser->m_bufferPtr) {
2458 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2459 return XML_STATUS_ERROR;
2460 }
2461
2462 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2463 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2464 return XML_STATUS_ERROR;
2465 }
2466 /* fall through */
2467 default:
2468 parser->m_parsingStatus.parsing = XML_PARSING;
2469 }
2470
2471 start = parser->m_bufferPtr;
2472 parser->m_positionPtr = start;
2473 parser->m_bufferEnd += len;
2474 parser->m_parseEndPtr = parser->m_bufferEnd;
2475 parser->m_parseEndByteIndex += len;
2476 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2477
2478 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2479 &parser->m_bufferPtr);
2480
2481 if (parser->m_errorCode != XML_ERROR_NONE) {
2482 parser->m_eventEndPtr = parser->m_eventPtr;
2483 parser->m_processor = errorProcessor;
2484 return XML_STATUS_ERROR;
2485 } else {
2486 switch (parser->m_parsingStatus.parsing) {
2487 case XML_SUSPENDED:
2488 result = XML_STATUS_SUSPENDED;
2489 break;
2490 case XML_INITIALIZED:
2491 case XML_PARSING:
2492 if (isFinal) {
2493 parser->m_parsingStatus.parsing = XML_FINISHED;
2494 return result;
2495 }
2496 default:; /* should not happen */
2497 }
2498 }
2499
2500 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2501 parser->m_bufferPtr, &parser->m_position);
2502 parser->m_positionPtr = parser->m_bufferPtr;
2503 return result;
2504 }
2505
2506 void *XMLCALL
2507 XML_GetBuffer(XML_Parser parser, int len) {
2508 if (parser == NULL)
2509 return NULL;
2510 if (len < 0) {
2511 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2512 return NULL;
2513 }
2514 switch (parser->m_parsingStatus.parsing) {
2515 case XML_SUSPENDED:
2516 parser->m_errorCode = XML_ERROR_SUSPENDED;
2517 return NULL;
2518 case XML_FINISHED:
2519 parser->m_errorCode = XML_ERROR_FINISHED;
2520 return NULL;
2521 default:;
2522 }
2523
2524 // whether or not the request succeeds, `len` seems to be the app's preferred
2525 // buffer fill size; remember it.
2526 parser->m_lastBufferRequestSize = len;
2527 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2528 || parser->m_buffer == NULL) {
2529 #if XML_CONTEXT_BYTES > 0
2530 int keep;
2531 #endif /* XML_CONTEXT_BYTES > 0 */
2532 /* Do not invoke signed arithmetic overflow: */
2533 int neededSize = (int)((unsigned)len
2534 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2535 parser->m_bufferEnd, parser->m_bufferPtr));
2536 if (neededSize < 0) {
2537 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2538 return NULL;
2539 }
2540 #if XML_CONTEXT_BYTES > 0
2541 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2542 if (keep > XML_CONTEXT_BYTES)
2543 keep = XML_CONTEXT_BYTES;
2544 /* Detect and prevent integer overflow */
2545 if (keep > INT_MAX - neededSize) {
2546 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2547 return NULL;
2548 }
2549 neededSize += keep;
2550 #endif /* XML_CONTEXT_BYTES > 0 */
2551 if (parser->m_buffer && parser->m_bufferPtr
2552 && neededSize
2553 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2554 #if XML_CONTEXT_BYTES > 0
2555 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2556 int offset
2557 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2558 - keep;
2559 /* The buffer pointers cannot be NULL here; we have at least some bytes
2560 * in the buffer */
2561 memmove(parser->m_buffer, &parser->m_buffer[offset],
2562 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2563 parser->m_bufferEnd -= offset;
2564 parser->m_bufferPtr -= offset;
2565 }
2566 #else
2567 memmove(parser->m_buffer, parser->m_bufferPtr,
2568 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2569 parser->m_bufferEnd
2570 = parser->m_buffer
2571 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2572 parser->m_bufferPtr = parser->m_buffer;
2573 #endif /* XML_CONTEXT_BYTES > 0 */
2574 } else {
2575 char *newBuf;
2576 int bufferSize
2577 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2578 if (bufferSize == 0)
2579 bufferSize = INIT_BUFFER_SIZE;
2580 do {
2581 /* Do not invoke signed arithmetic overflow: */
2582 bufferSize = (int)(2U * (unsigned)bufferSize);
2583 } while (bufferSize < neededSize && bufferSize > 0);
2584 if (bufferSize <= 0) {
2585 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2586 return NULL;
2587 }
2588 // NOTE: We are avoiding MALLOC(..) here to leave limiting
2589 // the input size to the application using Expat.
2590 newBuf = parser->m_mem.malloc_fcn(bufferSize);
2591 if (newBuf == 0) {
2592 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2593 return NULL;
2594 }
2595 parser->m_bufferLim = newBuf + bufferSize;
2596 #if XML_CONTEXT_BYTES > 0
2597 if (parser->m_bufferPtr) {
2598 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2599 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2600 + keep);
2601 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2602 // is not being allocated with MALLOC(..) but with plain
2603 // .malloc_fcn(..).
2604 parser->m_mem.free_fcn(parser->m_buffer);
2605 parser->m_buffer = newBuf;
2606 parser->m_bufferEnd
2607 = parser->m_buffer
2608 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2609 + keep;
2610 parser->m_bufferPtr = parser->m_buffer + keep;
2611 } else {
2612 /* This must be a brand new buffer with no data in it yet */
2613 parser->m_bufferEnd = newBuf;
2614 parser->m_bufferPtr = parser->m_buffer = newBuf;
2615 }
2616 #else
2617 if (parser->m_bufferPtr) {
2618 memcpy(newBuf, parser->m_bufferPtr,
2619 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2620 // NOTE: We are avoiding FREE(..) here because parser->m_buffer
2621 // is not being allocated with MALLOC(..) but with plain
2622 // .malloc_fcn(..).
2623 parser->m_mem.free_fcn(parser->m_buffer);
2624 parser->m_bufferEnd
2625 = newBuf
2626 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2627 } else {
2628 /* This must be a brand new buffer with no data in it yet */
2629 parser->m_bufferEnd = newBuf;
2630 }
2631 parser->m_bufferPtr = parser->m_buffer = newBuf;
2632 #endif /* XML_CONTEXT_BYTES > 0 */
2633 }
2634 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2635 parser->m_positionPtr = NULL;
2636 }
2637 return parser->m_bufferEnd;
2638 }
2639
2640 static void
2641 triggerReenter(XML_Parser parser) {
2642 parser->m_reenter = XML_TRUE;
2643 }
2644
2645 enum XML_Status XMLCALL
2646 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2647 if (parser == NULL)
2648 return XML_STATUS_ERROR;
2649 switch (parser->m_parsingStatus.parsing) {
2650 case XML_INITIALIZED:
2651 parser->m_errorCode = XML_ERROR_NOT_STARTED;
2652 return XML_STATUS_ERROR;
2653 case XML_SUSPENDED:
2654 if (resumable) {
2655 parser->m_errorCode = XML_ERROR_SUSPENDED;
2656 return XML_STATUS_ERROR;
2657 }
2658 parser->m_parsingStatus.parsing = XML_FINISHED;
2659 break;
2660 case XML_FINISHED:
2661 parser->m_errorCode = XML_ERROR_FINISHED;
2662 return XML_STATUS_ERROR;
2663 case XML_PARSING:
2664 if (resumable) {
2665 #ifdef XML_DTD
2666 if (parser->m_isParamEntity) {
2667 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2668 return XML_STATUS_ERROR;
2669 }
2670 #endif
2671 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2672 } else
2673 parser->m_parsingStatus.parsing = XML_FINISHED;
2674 break;
2675 default:
2676 assert(0);
2677 }
2678 return XML_STATUS_OK;
2679 }
2680
2681 enum XML_Status XMLCALL
2682 XML_ResumeParser(XML_Parser parser) {
2683 enum XML_Status result = XML_STATUS_OK;
2684
2685 if (parser == NULL)
2686 return XML_STATUS_ERROR;
2687 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2688 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2689 return XML_STATUS_ERROR;
2690 }
2691 parser->m_parsingStatus.parsing = XML_PARSING;
2692
2693 parser->m_errorCode = callProcessor(
2694 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2695
2696 if (parser->m_errorCode != XML_ERROR_NONE) {
2697 parser->m_eventEndPtr = parser->m_eventPtr;
2698 parser->m_processor = errorProcessor;
2699 return XML_STATUS_ERROR;
2700 } else {
2701 switch (parser->m_parsingStatus.parsing) {
2702 case XML_SUSPENDED:
2703 result = XML_STATUS_SUSPENDED;
2704 break;
2705 case XML_INITIALIZED:
2706 case XML_PARSING:
2707 if (parser->m_parsingStatus.finalBuffer) {
2708 parser->m_parsingStatus.parsing = XML_FINISHED;
2709 return result;
2710 }
2711 default:;
2712 }
2713 }
2714
2715 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2716 parser->m_bufferPtr, &parser->m_position);
2717 parser->m_positionPtr = parser->m_bufferPtr;
2718 return result;
2719 }
2720
2721 void XMLCALL
2722 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2723 if (parser == NULL)
2724 return;
2725 assert(status != NULL);
2726 *status = parser->m_parsingStatus;
2727 }
2728
2729 enum XML_Error XMLCALL
2730 XML_GetErrorCode(XML_Parser parser) {
2731 if (parser == NULL)
2732 return XML_ERROR_INVALID_ARGUMENT;
2733 return parser->m_errorCode;
2734 }
2735
2736 XML_Index XMLCALL
2737 XML_GetCurrentByteIndex(XML_Parser parser) {
2738 if (parser == NULL)
2739 return -1;
2740 if (parser->m_eventPtr)
2741 return (XML_Index)(parser->m_parseEndByteIndex
2742 - (parser->m_parseEndPtr - parser->m_eventPtr));
2743 return -1;
2744 }
2745
2746 int XMLCALL
2747 XML_GetCurrentByteCount(XML_Parser parser) {
2748 if (parser == NULL)
2749 return 0;
2750 if (parser->m_eventEndPtr && parser->m_eventPtr)
2751 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2752 return 0;
2753 }
2754
2755 const char *XMLCALL
2756 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2757 #if XML_CONTEXT_BYTES > 0
2758 if (parser == NULL)
2759 return NULL;
2760 if (parser->m_eventPtr && parser->m_buffer) {
2761 if (offset != NULL)
2762 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2763 if (size != NULL)
2764 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2765 return parser->m_buffer;
2766 }
2767 #else
2768 (void)parser;
2769 (void)offset;
2770 (void)size;
2771 #endif /* XML_CONTEXT_BYTES > 0 */
2772 return (const char *)0;
2773 }
2774
2775 XML_Size XMLCALL
2776 XML_GetCurrentLineNumber(XML_Parser parser) {
2777 if (parser == NULL)
2778 return 0;
2779 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2780 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2781 parser->m_eventPtr, &parser->m_position);
2782 parser->m_positionPtr = parser->m_eventPtr;
2783 }
2784 return parser->m_position.lineNumber + 1;
2785 }
2786
2787 XML_Size XMLCALL
2788 XML_GetCurrentColumnNumber(XML_Parser parser) {
2789 if (parser == NULL)
2790 return 0;
2791 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2792 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2793 parser->m_eventPtr, &parser->m_position);
2794 parser->m_positionPtr = parser->m_eventPtr;
2795 }
2796 return parser->m_position.columnNumber;
2797 }
2798
2799 void XMLCALL
2800 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2801 if (parser == NULL)
2802 return;
2803
2804 // NOTE: We are avoiding FREE(..) here because the content model
2805 // has been created using plain .malloc_fcn(..) rather than MALLOC(..).
2806 parser->m_mem.free_fcn(model);
2807 }
2808
2809 void *XMLCALL
2810 XML_MemMalloc(XML_Parser parser, size_t size) {
2811 if (parser == NULL)
2812 return NULL;
2813
2814 // NOTE: We are avoiding MALLOC(..) here to not include
2815 // user allocations with allocation tracking and limiting.
2816 return parser->m_mem.malloc_fcn(size);
2817 }
2818
2819 void *XMLCALL
2820 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2821 if (parser == NULL)
2822 return NULL;
2823
2824 // NOTE: We are avoiding REALLOC(..) here to not include
2825 // user allocations with allocation tracking and limiting.
2826 return parser->m_mem.realloc_fcn(ptr, size);
2827 }
2828
2829 void XMLCALL
2830 XML_MemFree(XML_Parser parser, void *ptr) {
2831 if (parser == NULL)
2832 return;
2833
2834 // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and
2835 // XML_MemRealloc are not using MALLOC(..) and REALLOC(..)
2836 // but plain .malloc_fcn(..) and .realloc_fcn(..), internally.
2837 parser->m_mem.free_fcn(ptr);
2838 }
2839
2840 void XMLCALL
2841 XML_DefaultCurrent(XML_Parser parser) {
2842 if (parser == NULL)
2843 return;
2844 if (parser->m_defaultHandler) {
2845 if (parser->m_openInternalEntities)
2846 reportDefault(parser, parser->m_internalEncoding,
2847 parser->m_openInternalEntities->internalEventPtr,
2848 parser->m_openInternalEntities->internalEventEndPtr);
2849 else
2850 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2851 parser->m_eventEndPtr);
2852 }
2853 }
2854
2855 const XML_LChar *XMLCALL
2856 XML_ErrorString(enum XML_Error code) {
2857 switch (code) {
2858 case XML_ERROR_NONE:
2859 return NULL;
2860 case XML_ERROR_NO_MEMORY:
2861 return XML_L("out of memory");
2862 case XML_ERROR_SYNTAX:
2863 return XML_L("syntax error");
2864 case XML_ERROR_NO_ELEMENTS:
2865 return XML_L("no element found");
2866 case XML_ERROR_INVALID_TOKEN:
2867 return XML_L("not well-formed (invalid token)");
2868 case XML_ERROR_UNCLOSED_TOKEN:
2869 return XML_L("unclosed token");
2870 case XML_ERROR_PARTIAL_CHAR:
2871 return XML_L("partial character");
2872 case XML_ERROR_TAG_MISMATCH:
2873 return XML_L("mismatched tag");
2874 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2875 return XML_L("duplicate attribute");
2876 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2877 return XML_L("junk after document element");
2878 case XML_ERROR_PARAM_ENTITY_REF:
2879 return XML_L("illegal parameter entity reference");
2880 case XML_ERROR_UNDEFINED_ENTITY:
2881 return XML_L("undefined entity");
2882 case XML_ERROR_RECURSIVE_ENTITY_REF:
2883 return XML_L("recursive entity reference");
2884 case XML_ERROR_ASYNC_ENTITY:
2885 return XML_L("asynchronous entity");
2886 case XML_ERROR_BAD_CHAR_REF:
2887 return XML_L("reference to invalid character number");
2888 case XML_ERROR_BINARY_ENTITY_REF:
2889 return XML_L("reference to binary entity");
2890 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2891 return XML_L("reference to external entity in attribute");
2892 case XML_ERROR_MISPLACED_XML_PI:
2893 return XML_L("XML or text declaration not at start of entity");
2894 case XML_ERROR_UNKNOWN_ENCODING:
2895 return XML_L("unknown encoding");
2896 case XML_ERROR_INCORRECT_ENCODING:
2897 return XML_L("encoding specified in XML declaration is incorrect");
2898 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2899 return XML_L("unclosed CDATA section");
2900 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2901 return XML_L("error in processing external entity reference");
2902 case XML_ERROR_NOT_STANDALONE:
2903 return XML_L("document is not standalone");
2904 case XML_ERROR_UNEXPECTED_STATE:
2905 return XML_L("unexpected parser state - please send a bug report");
2906 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2907 return XML_L("entity declared in parameter entity");
2908 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2909 return XML_L("requested feature requires XML_DTD support in Expat");
2910 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2911 return XML_L("cannot change setting once parsing has begun");
2912 /* Added in 1.95.7. */
2913 case XML_ERROR_UNBOUND_PREFIX:
2914 return XML_L("unbound prefix");
2915 /* Added in 1.95.8. */
2916 case XML_ERROR_UNDECLARING_PREFIX:
2917 return XML_L("must not undeclare prefix");
2918 case XML_ERROR_INCOMPLETE_PE:
2919 return XML_L("incomplete markup in parameter entity");
2920 case XML_ERROR_XML_DECL:
2921 return XML_L("XML declaration not well-formed");
2922 case XML_ERROR_TEXT_DECL:
2923 return XML_L("text declaration not well-formed");
2924 case XML_ERROR_PUBLICID:
2925 return XML_L("illegal character(s) in public id");
2926 case XML_ERROR_SUSPENDED:
2927 return XML_L("parser suspended");
2928 case XML_ERROR_NOT_SUSPENDED:
2929 return XML_L("parser not suspended");
2930 case XML_ERROR_ABORTED:
2931 return XML_L("parsing aborted");
2932 case XML_ERROR_FINISHED:
2933 return XML_L("parsing finished");
2934 case XML_ERROR_SUSPEND_PE:
2935 return XML_L("cannot suspend in external parameter entity");
2936 /* Added in 2.0.0. */
2937 case XML_ERROR_RESERVED_PREFIX_XML:
2938 return XML_L(
2939 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2940 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2941 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2942 case XML_ERROR_RESERVED_NAMESPACE_URI:
2943 return XML_L(
2944 "prefix must not be bound to one of the reserved namespace names");
2945 /* Added in 2.2.5. */
2946 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2947 return XML_L("invalid argument");
2948 /* Added in 2.3.0. */
2949 case XML_ERROR_NO_BUFFER:
2950 return XML_L(
2951 "a successful prior call to function XML_GetBuffer is required");
2952 /* Added in 2.4.0. */
2953 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2954 return XML_L(
2955 "limit on input amplification factor (from DTD and entities) breached");
2956 /* Added in 2.6.4. */
2957 case XML_ERROR_NOT_STARTED:
2958 return XML_L("parser not started");
2959 }
2960 return NULL;
2961 }
2962
2963 const XML_LChar *XMLCALL
2964 XML_ExpatVersion(void) {
2965 /* V1 is used to string-ize the version number. However, it would
2966 string-ize the actual version macro *names* unless we get them
2967 substituted before being passed to V1. CPP is defined to expand
2968 a macro, then rescan for more expansions. Thus, we use V2 to expand
2969 the version macros, then CPP will expand the resulting V1() macro
2970 with the correct numerals. */
2971 /* ### I'm assuming cpp is portable in this respect... */
2972
2973 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2974 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2975
2976 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2977
2978 #undef V1
2979 #undef V2
2980 }
2981
2982 XML_Expat_Version XMLCALL
2983 XML_ExpatVersionInfo(void) {
2984 XML_Expat_Version version;
2985
2986 version.major = XML_MAJOR_VERSION;
2987 version.minor = XML_MINOR_VERSION;
2988 version.micro = XML_MICRO_VERSION;
2989
2990 return version;
2991 }
2992
2993 const XML_Feature *XMLCALL
2994 XML_GetFeatureList(void) {
2995 static const XML_Feature features[] = {
2996 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2997 sizeof(XML_Char)},
2998 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2999 sizeof(XML_LChar)},
3000 #ifdef XML_UNICODE
3001 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
3002 #endif
3003 #ifdef XML_UNICODE_WCHAR_T
3004 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
3005 #endif
3006 #ifdef XML_DTD
3007 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
3008 #endif
3009 #if XML_CONTEXT_BYTES > 0
3010 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
3011 XML_CONTEXT_BYTES},
3012 #endif
3013 #ifdef XML_MIN_SIZE
3014 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
3015 #endif
3016 #ifdef XML_NS
3017 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
3018 #endif
3019 #ifdef XML_LARGE_SIZE
3020 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
3021 #endif
3022 #ifdef XML_ATTR_INFO
3023 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
3024 #endif
3025 #if XML_GE == 1
3026 /* Added in Expat 2.4.0 for XML_DTD defined and
3027 * added in Expat 2.6.0 for XML_GE == 1. */
3028 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
3029 XML_L("XML_BLAP_MAX_AMP"),
3030 (long int)
3031 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
3032 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
3033 XML_L("XML_BLAP_ACT_THRES"),
3034 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
3035 /* Added in Expat 2.6.0. */
3036 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
3037 /* Added in Expat 2.7.2. */
3038 {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT,
3039 XML_L("XML_AT_MAX_AMP"),
3040 (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT},
3041 {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT,
3042 XML_L("XML_AT_ACT_THRES"),
3043 (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT},
3044 #endif
3045 {XML_FEATURE_END, NULL, 0}};
3046
3047 return features;
3048 }
3049
3050 #if XML_GE == 1
3051 XML_Bool XMLCALL
3052 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
3053 XML_Parser parser, float maximumAmplificationFactor) {
3054 if ((parser == NULL) || (parser->m_parentParser != NULL)
3055 || isnan(maximumAmplificationFactor)
3056 || (maximumAmplificationFactor < 1.0f)) {
3057 return XML_FALSE;
3058 }
3059 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
3060 return XML_TRUE;
3061 }
3062
3063 XML_Bool XMLCALL
3064 XML_SetBillionLaughsAttackProtectionActivationThreshold(
3065 XML_Parser parser, unsigned long long activationThresholdBytes) {
3066 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3067 return XML_FALSE;
3068 }
3069 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
3070 return XML_TRUE;
3071 }
3072
3073 XML_Bool XMLCALL
3074 XML_SetAllocTrackerMaximumAmplification(XML_Parser parser,
3075 float maximumAmplificationFactor) {
3076 if ((parser == NULL) || (parser->m_parentParser != NULL)
3077 || isnan(maximumAmplificationFactor)
3078 || (maximumAmplificationFactor < 1.0f)) {
3079 return XML_FALSE;
3080 }
3081 parser->m_alloc_tracker.maximumAmplificationFactor
3082 = maximumAmplificationFactor;
3083 return XML_TRUE;
3084 }
3085
3086 XML_Bool XMLCALL
3087 XML_SetAllocTrackerActivationThreshold(
3088 XML_Parser parser, unsigned long long activationThresholdBytes) {
3089 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
3090 return XML_FALSE;
3091 }
3092 parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes;
3093 return XML_TRUE;
3094 }
3095 #endif /* XML_GE == 1 */
3096
3097 XML_Bool XMLCALL
3098 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
3099 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
3100 parser->m_reparseDeferralEnabled = enabled;
3101 return XML_TRUE;
3102 }
3103 return XML_FALSE;
3104 }
3105
3106 /* Initially tag->rawName always points into the parse buffer;
3107 for those TAG instances opened while the current parse buffer was
3108 processed, and not yet closed, we need to store tag->rawName in a more
3109 permanent location, since the parse buffer is about to be discarded.
3110 */
3111 static XML_Bool
3112 storeRawNames(XML_Parser parser) {
3113 TAG *tag = parser->m_tagStack;
3114 while (tag) {
3115 size_t bufSize;
3116 size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
3117 size_t rawNameLen;
3118 char *rawNameBuf = tag->buf + nameLen;
3119 /* Stop if already stored. Since m_tagStack is a stack, we can stop
3120 at the first entry that has already been copied; everything
3121 below it in the stack is already been accounted for in a
3122 previous call to this function.
3123 */
3124 if (tag->rawName == rawNameBuf)
3125 break;
3126 /* For reuse purposes we need to ensure that the
3127 size of tag->buf is a multiple of sizeof(XML_Char).
3128 */
3129 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
3130 /* Detect and prevent integer overflow. */
3131 if (rawNameLen > (size_t)INT_MAX - nameLen)
3132 return XML_FALSE;
3133 bufSize = nameLen + rawNameLen;
3134 if (bufSize > (size_t)(tag->bufEnd - tag->buf)) {
3135 char *temp = REALLOC(parser, tag->buf, bufSize);
3136 if (temp == NULL)
3137 return XML_FALSE;
3138 /* if tag->name.str points to tag->buf (only when namespace
3139 processing is off) then we have to update it
3140 */
3141 if (tag->name.str == (XML_Char *)tag->buf)
3142 tag->name.str = (XML_Char *)temp;
3143 /* if tag->name.localPart is set (when namespace processing is on)
3144 then update it as well, since it will always point into tag->buf
3145 */
3146 if (tag->name.localPart)
3147 tag->name.localPart
3148 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
3149 tag->buf = temp;
3150 tag->bufEnd = temp + bufSize;
3151 rawNameBuf = temp + nameLen;
3152 }
3153 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
3154 tag->rawName = rawNameBuf;
3155 tag = tag->parent;
3156 }
3157 return XML_TRUE;
3158 }
3159
3160 static enum XML_Error PTRCALL
3161 contentProcessor(XML_Parser parser, const char *start, const char *end,
3162 const char **endPtr) {
3163 enum XML_Error result = doContent(
3164 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
3165 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3166 XML_ACCOUNT_DIRECT);
3167 if (result == XML_ERROR_NONE) {
3168 if (! storeRawNames(parser))
3169 return XML_ERROR_NO_MEMORY;
3170 }
3171 return result;
3172 }
3173
3174 static enum XML_Error PTRCALL
3175 externalEntityInitProcessor(XML_Parser parser, const char *start,
3176 const char *end, const char **endPtr) {
3177 enum XML_Error result = initializeEncoding(parser);
3178 if (result != XML_ERROR_NONE)
3179 return result;
3180 parser->m_processor = externalEntityInitProcessor2;
3181 return externalEntityInitProcessor2(parser, start, end, endPtr);
3182 }
3183
3184 static enum XML_Error PTRCALL
3185 externalEntityInitProcessor2(XML_Parser parser, const char *start,
3186 const char *end, const char **endPtr) {
3187 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3188 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
3189 switch (tok) {
3190 case XML_TOK_BOM:
3191 #if XML_GE == 1
3192 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
3193 XML_ACCOUNT_DIRECT)) {
3194 accountingOnAbort(parser);
3195 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3196 }
3197 #endif /* XML_GE == 1 */
3198
3199 /* If we are at the end of the buffer, this would cause the next stage,
3200 i.e. externalEntityInitProcessor3, to pass control directly to
3201 doContent (by detecting XML_TOK_NONE) without processing any xml text
3202 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
3203 */
3204 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
3205 *endPtr = next;
3206 return XML_ERROR_NONE;
3207 }
3208 start = next;
3209 break;
3210 case XML_TOK_PARTIAL:
3211 if (! parser->m_parsingStatus.finalBuffer) {
3212 *endPtr = start;
3213 return XML_ERROR_NONE;
3214 }
3215 parser->m_eventPtr = start;
3216 return XML_ERROR_UNCLOSED_TOKEN;
3217 case XML_TOK_PARTIAL_CHAR:
3218 if (! parser->m_parsingStatus.finalBuffer) {
3219 *endPtr = start;
3220 return XML_ERROR_NONE;
3221 }
3222 parser->m_eventPtr = start;
3223 return XML_ERROR_PARTIAL_CHAR;
3224 }
3225 parser->m_processor = externalEntityInitProcessor3;
3226 return externalEntityInitProcessor3(parser, start, end, endPtr);
3227 }
3228
3229 static enum XML_Error PTRCALL
3230 externalEntityInitProcessor3(XML_Parser parser, const char *start,
3231 const char *end, const char **endPtr) {
3232 int tok;
3233 const char *next = start; /* XmlContentTok doesn't always set the last arg */
3234 parser->m_eventPtr = start;
3235 tok = XmlContentTok(parser->m_encoding, start, end, &next);
3236 /* Note: These bytes are accounted later in:
3237 - processXmlDecl
3238 - externalEntityContentProcessor
3239 */
3240 parser->m_eventEndPtr = next;
3241
3242 switch (tok) {
3243 case XML_TOK_XML_DECL: {
3244 enum XML_Error result;
3245 result = processXmlDecl(parser, 1, start, next);
3246 if (result != XML_ERROR_NONE)
3247 return result;
3248 switch (parser->m_parsingStatus.parsing) {
3249 case XML_SUSPENDED:
3250 *endPtr = next;
3251 return XML_ERROR_NONE;
3252 case XML_FINISHED:
3253 return XML_ERROR_ABORTED;
3254 case XML_PARSING:
3255 if (parser->m_reenter) {
3256 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
3257 }
3258 /* Fall through */
3259 default:
3260 start = next;
3261 }
3262 } break;
3263 case XML_TOK_PARTIAL:
3264 if (! parser->m_parsingStatus.finalBuffer) {
3265 *endPtr = start;
3266 return XML_ERROR_NONE;
3267 }
3268 return XML_ERROR_UNCLOSED_TOKEN;
3269 case XML_TOK_PARTIAL_CHAR:
3270 if (! parser->m_parsingStatus.finalBuffer) {
3271 *endPtr = start;
3272 return XML_ERROR_NONE;
3273 }
3274 return XML_ERROR_PARTIAL_CHAR;
3275 }
3276 parser->m_processor = externalEntityContentProcessor;
3277 parser->m_tagLevel = 1;
3278 return externalEntityContentProcessor(parser, start, end, endPtr);
3279 }
3280
3281 static enum XML_Error PTRCALL
3282 externalEntityContentProcessor(XML_Parser parser, const char *start,
3283 const char *end, const char **endPtr) {
3284 enum XML_Error result
3285 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
3286 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
3287 XML_ACCOUNT_ENTITY_EXPANSION);
3288 if (result == XML_ERROR_NONE) {
3289 if (! storeRawNames(parser))
3290 return XML_ERROR_NO_MEMORY;
3291 }
3292 return result;
3293 }
3294
3295 static enum XML_Error
3296 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
3297 const char *s, const char *end, const char **nextPtr,
3298 XML_Bool haveMore, enum XML_Account account) {
3299 /* save one level of indirection */
3300 DTD *const dtd = parser->m_dtd;
3301
3302 const char **eventPP;
3303 const char **eventEndPP;
3304 if (enc == parser->m_encoding) {
3305 eventPP = &parser->m_eventPtr;
3306 eventEndPP = &parser->m_eventEndPtr;
3307 } else {
3308 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3309 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3310 }
3311 *eventPP = s;
3312
3313 for (;;) {
3314 const char *next = s; /* XmlContentTok doesn't always set the last arg */
3315 int tok = XmlContentTok(enc, s, end, &next);
3316 #if XML_GE == 1
3317 const char *accountAfter
3318 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
3319 ? (haveMore ? s /* i.e. 0 bytes */ : end)
3320 : next;
3321 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
3322 account)) {
3323 accountingOnAbort(parser);
3324 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3325 }
3326 #endif
3327 *eventEndPP = next;
3328 switch (tok) {
3329 case XML_TOK_TRAILING_CR:
3330 if (haveMore) {
3331 *nextPtr = s;
3332 return XML_ERROR_NONE;
3333 }
3334 *eventEndPP = end;
3335 if (parser->m_characterDataHandler) {
3336 XML_Char c = 0xA;
3337 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3338 } else if (parser->m_defaultHandler)
3339 reportDefault(parser, enc, s, end);
3340 /* We are at the end of the final buffer, should we check for
3341 XML_SUSPENDED, XML_FINISHED?
3342 */
3343 if (startTagLevel == 0)
3344 return XML_ERROR_NO_ELEMENTS;
3345 if (parser->m_tagLevel != startTagLevel)
3346 return XML_ERROR_ASYNC_ENTITY;
3347 *nextPtr = end;
3348 return XML_ERROR_NONE;
3349 case XML_TOK_NONE:
3350 if (haveMore) {
3351 *nextPtr = s;
3352 return XML_ERROR_NONE;
3353 }
3354 if (startTagLevel > 0) {
3355 if (parser->m_tagLevel != startTagLevel)
3356 return XML_ERROR_ASYNC_ENTITY;
3357 *nextPtr = s;
3358 return XML_ERROR_NONE;
3359 }
3360 return XML_ERROR_NO_ELEMENTS;
3361 case XML_TOK_INVALID:
3362 *eventPP = next;
3363 return XML_ERROR_INVALID_TOKEN;
3364 case XML_TOK_PARTIAL:
3365 if (haveMore) {
3366 *nextPtr = s;
3367 return XML_ERROR_NONE;
3368 }
3369 return XML_ERROR_UNCLOSED_TOKEN;
3370 case XML_TOK_PARTIAL_CHAR:
3371 if (haveMore) {
3372 *nextPtr = s;
3373 return XML_ERROR_NONE;
3374 }
3375 return XML_ERROR_PARTIAL_CHAR;
3376 case XML_TOK_ENTITY_REF: {
3377 const XML_Char *name;
3378 ENTITY *entity;
3379 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3380 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3381 if (ch) {
3382 #if XML_GE == 1
3383 /* NOTE: We are replacing 4-6 characters original input for 1 character
3384 * so there is no amplification and hence recording without
3385 * protection. */
3386 accountingDiffTolerated(parser, tok, (char *)&ch,
3387 ((char *)&ch) + sizeof(XML_Char), __LINE__,
3388 XML_ACCOUNT_ENTITY_EXPANSION);
3389 #endif /* XML_GE == 1 */
3390 if (parser->m_characterDataHandler)
3391 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3392 else if (parser->m_defaultHandler)
3393 reportDefault(parser, enc, s, next);
3394 break;
3395 }
3396 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3397 next - enc->minBytesPerChar);
3398 if (! name)
3399 return XML_ERROR_NO_MEMORY;
3400 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3401 poolDiscard(&dtd->pool);
3402 /* First, determine if a check for an existing declaration is needed;
3403 if yes, check that the entity exists, and that it is internal,
3404 otherwise call the skipped entity or default handler.
3405 */
3406 if (! dtd->hasParamEntityRefs || dtd->standalone) {
3407 if (! entity)
3408 return XML_ERROR_UNDEFINED_ENTITY;
3409 else if (! entity->is_internal)
3410 return XML_ERROR_ENTITY_DECLARED_IN_PE;
3411 } else if (! entity) {
3412 if (parser->m_skippedEntityHandler)
3413 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3414 else if (parser->m_defaultHandler)
3415 reportDefault(parser, enc, s, next);
3416 break;
3417 }
3418 if (entity->open)
3419 return XML_ERROR_RECURSIVE_ENTITY_REF;
3420 if (entity->notation)
3421 return XML_ERROR_BINARY_ENTITY_REF;
3422 if (entity->textPtr) {
3423 enum XML_Error result;
3424 if (! parser->m_defaultExpandInternalEntities) {
3425 if (parser->m_skippedEntityHandler)
3426 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3427 0);
3428 else if (parser->m_defaultHandler)
3429 reportDefault(parser, enc, s, next);
3430 break;
3431 }
3432 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3433 if (result != XML_ERROR_NONE)
3434 return result;
3435 } else if (parser->m_externalEntityRefHandler) {
3436 const XML_Char *context;
3437 entity->open = XML_TRUE;
3438 context = getContext(parser);
3439 entity->open = XML_FALSE;
3440 if (! context)
3441 return XML_ERROR_NO_MEMORY;
3442 if (! parser->m_externalEntityRefHandler(
3443 parser->m_externalEntityRefHandlerArg, context, entity->base,
3444 entity->systemId, entity->publicId))
3445 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3446 poolDiscard(&parser->m_tempPool);
3447 } else if (parser->m_defaultHandler)
3448 reportDefault(parser, enc, s, next);
3449 break;
3450 }
3451 case XML_TOK_START_TAG_NO_ATTS:
3452 /* fall through */
3453 case XML_TOK_START_TAG_WITH_ATTS: {
3454 TAG *tag;
3455 enum XML_Error result;
3456 XML_Char *toPtr;
3457 if (parser->m_freeTagList) {
3458 tag = parser->m_freeTagList;
3459 parser->m_freeTagList = parser->m_freeTagList->parent;
3460 } else {
3461 tag = MALLOC(parser, sizeof(TAG));
3462 if (! tag)
3463 return XML_ERROR_NO_MEMORY;
3464 tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE);
3465 if (! tag->buf) {
3466 FREE(parser, tag);
3467 return XML_ERROR_NO_MEMORY;
3468 }
3469 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
3470 }
3471 tag->bindings = NULL;
3472 tag->parent = parser->m_tagStack;
3473 parser->m_tagStack = tag;
3474 tag->name.localPart = NULL;
3475 tag->name.prefix = NULL;
3476 tag->rawName = s + enc->minBytesPerChar;
3477 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3478 ++parser->m_tagLevel;
3479 {
3480 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3481 const char *fromPtr = tag->rawName;
3482 toPtr = (XML_Char *)tag->buf;
3483 for (;;) {
3484 int bufSize;
3485 int convLen;
3486 const enum XML_Convert_Result convert_res
3487 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3488 (ICHAR *)tag->bufEnd - 1);
3489 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3490 if ((fromPtr >= rawNameEnd)
3491 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3492 tag->name.strLen = convLen;
3493 break;
3494 }
3495 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3496 {
3497 char *temp = REALLOC(parser, tag->buf, bufSize);
3498 if (temp == NULL)
3499 return XML_ERROR_NO_MEMORY;
3500 tag->buf = temp;
3501 tag->bufEnd = temp + bufSize;
3502 toPtr = (XML_Char *)temp + convLen;
3503 }
3504 }
3505 }
3506 tag->name.str = (XML_Char *)tag->buf;
3507 *toPtr = XML_T('\0');
3508 result
3509 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3510 if (result)
3511 return result;
3512 if (parser->m_startElementHandler)
3513 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3514 (const XML_Char **)parser->m_atts);
3515 else if (parser->m_defaultHandler)
3516 reportDefault(parser, enc, s, next);
3517 poolClear(&parser->m_tempPool);
3518 break;
3519 }
3520 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3521 /* fall through */
3522 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3523 const char *rawName = s + enc->minBytesPerChar;
3524 enum XML_Error result;
3525 BINDING *bindings = NULL;
3526 XML_Bool noElmHandlers = XML_TRUE;
3527 TAG_NAME name;
3528 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3529 rawName + XmlNameLength(enc, rawName));
3530 if (! name.str)
3531 return XML_ERROR_NO_MEMORY;
3532 poolFinish(&parser->m_tempPool);
3533 result = storeAtts(parser, enc, s, &name, &bindings,
3534 XML_ACCOUNT_NONE /* token spans whole start tag */);
3535 if (result != XML_ERROR_NONE) {
3536 freeBindings(parser, bindings);
3537 return result;
3538 }
3539 poolFinish(&parser->m_tempPool);
3540 if (parser->m_startElementHandler) {
3541 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3542 (const XML_Char **)parser->m_atts);
3543 noElmHandlers = XML_FALSE;
3544 }
3545 if (parser->m_endElementHandler) {
3546 if (parser->m_startElementHandler)
3547 *eventPP = *eventEndPP;
3548 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3549 noElmHandlers = XML_FALSE;
3550 }
3551 if (noElmHandlers && parser->m_defaultHandler)
3552 reportDefault(parser, enc, s, next);
3553 poolClear(&parser->m_tempPool);
3554 freeBindings(parser, bindings);
3555 }
3556 if ((parser->m_tagLevel == 0)
3557 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3558 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3559 || (parser->m_parsingStatus.parsing == XML_PARSING
3560 && parser->m_reenter))
3561 parser->m_processor = epilogProcessor;
3562 else
3563 return epilogProcessor(parser, next, end, nextPtr);
3564 }
3565 break;
3566 case XML_TOK_END_TAG:
3567 if (parser->m_tagLevel == startTagLevel)
3568 return XML_ERROR_ASYNC_ENTITY;
3569 else {
3570 int len;
3571 const char *rawName;
3572 TAG *tag = parser->m_tagStack;
3573 rawName = s + enc->minBytesPerChar * 2;
3574 len = XmlNameLength(enc, rawName);
3575 if (len != tag->rawNameLength
3576 || memcmp(tag->rawName, rawName, len) != 0) {
3577 *eventPP = rawName;
3578 return XML_ERROR_TAG_MISMATCH;
3579 }
3580 parser->m_tagStack = tag->parent;
3581 tag->parent = parser->m_freeTagList;
3582 parser->m_freeTagList = tag;
3583 --parser->m_tagLevel;
3584 if (parser->m_endElementHandler) {
3585 const XML_Char *localPart;
3586 const XML_Char *prefix;
3587 XML_Char *uri;
3588 localPart = tag->name.localPart;
3589 if (parser->m_ns && localPart) {
3590 /* localPart and prefix may have been overwritten in
3591 tag->name.str, since this points to the binding->uri
3592 buffer which gets reused; so we have to add them again
3593 */
3594 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3595 /* don't need to check for space - already done in storeAtts() */
3596 while (*localPart)
3597 *uri++ = *localPart++;
3598 prefix = tag->name.prefix;
3599 if (parser->m_ns_triplets && prefix) {
3600 *uri++ = parser->m_namespaceSeparator;
3601 while (*prefix)
3602 *uri++ = *prefix++;
3603 }
3604 *uri = XML_T('\0');
3605 }
3606 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3607 } else if (parser->m_defaultHandler)
3608 reportDefault(parser, enc, s, next);
3609 while (tag->bindings) {
3610 BINDING *b = tag->bindings;
3611 if (parser->m_endNamespaceDeclHandler)
3612 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3613 b->prefix->name);
3614 tag->bindings = tag->bindings->nextTagBinding;
3615 b->nextTagBinding = parser->m_freeBindingList;
3616 parser->m_freeBindingList = b;
3617 b->prefix->binding = b->prevPrefixBinding;
3618 }
3619 if ((parser->m_tagLevel == 0)
3620 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3621 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3622 || (parser->m_parsingStatus.parsing == XML_PARSING
3623 && parser->m_reenter))
3624 parser->m_processor = epilogProcessor;
3625 else
3626 return epilogProcessor(parser, next, end, nextPtr);
3627 }
3628 }
3629 break;
3630 case XML_TOK_CHAR_REF: {
3631 int n = XmlCharRefNumber(enc, s);
3632 if (n < 0)
3633 return XML_ERROR_BAD_CHAR_REF;
3634 if (parser->m_characterDataHandler) {
3635 XML_Char buf[XML_ENCODE_MAX];
3636 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3637 XmlEncode(n, (ICHAR *)buf));
3638 } else if (parser->m_defaultHandler)
3639 reportDefault(parser, enc, s, next);
3640 } break;
3641 case XML_TOK_XML_DECL:
3642 return XML_ERROR_MISPLACED_XML_PI;
3643 case XML_TOK_DATA_NEWLINE:
3644 if (parser->m_characterDataHandler) {
3645 XML_Char c = 0xA;
3646 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3647 } else if (parser->m_defaultHandler)
3648 reportDefault(parser, enc, s, next);
3649 break;
3650 case XML_TOK_CDATA_SECT_OPEN: {
3651 enum XML_Error result;
3652 if (parser->m_startCdataSectionHandler)
3653 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3654 /* BEGIN disabled code */
3655 /* Suppose you doing a transformation on a document that involves
3656 changing only the character data. You set up a defaultHandler
3657 and a characterDataHandler. The defaultHandler simply copies
3658 characters through. The characterDataHandler does the
3659 transformation and writes the characters out escaping them as
3660 necessary. This case will fail to work if we leave out the
3661 following two lines (because & and < inside CDATA sections will
3662 be incorrectly escaped).
3663
3664 However, now we have a start/endCdataSectionHandler, so it seems
3665 easier to let the user deal with this.
3666 */
3667 else if ((0) && parser->m_characterDataHandler)
3668 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3669 0);
3670 /* END disabled code */
3671 else if (parser->m_defaultHandler)
3672 reportDefault(parser, enc, s, next);
3673 result
3674 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3675 if (result != XML_ERROR_NONE)
3676 return result;
3677 else if (! next) {
3678 parser->m_processor = cdataSectionProcessor;
3679 return result;
3680 }
3681 } break;
3682 case XML_TOK_TRAILING_RSQB:
3683 if (haveMore) {
3684 *nextPtr = s;
3685 return XML_ERROR_NONE;
3686 }
3687 if (parser->m_characterDataHandler) {
3688 if (MUST_CONVERT(enc, s)) {
3689 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3690 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3691 parser->m_characterDataHandler(
3692 parser->m_handlerArg, parser->m_dataBuf,
3693 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3694 } else
3695 parser->m_characterDataHandler(
3696 parser->m_handlerArg, (const XML_Char *)s,
3697 (int)((const XML_Char *)end - (const XML_Char *)s));
3698 } else if (parser->m_defaultHandler)
3699 reportDefault(parser, enc, s, end);
3700 /* We are at the end of the final buffer, should we check for
3701 XML_SUSPENDED, XML_FINISHED?
3702 */
3703 if (startTagLevel == 0) {
3704 *eventPP = end;
3705 return XML_ERROR_NO_ELEMENTS;
3706 }
3707 if (parser->m_tagLevel != startTagLevel) {
3708 *eventPP = end;
3709 return XML_ERROR_ASYNC_ENTITY;
3710 }
3711 *nextPtr = end;
3712 return XML_ERROR_NONE;
3713 case XML_TOK_DATA_CHARS: {
3714 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3715 if (charDataHandler) {
3716 if (MUST_CONVERT(enc, s)) {
3717 for (;;) {
3718 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3719 const enum XML_Convert_Result convert_res = XmlConvert(
3720 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3721 *eventEndPP = s;
3722 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3723 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3724 if ((convert_res == XML_CONVERT_COMPLETED)
3725 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3726 break;
3727 *eventPP = s;
3728 }
3729 } else
3730 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3731 (int)((const XML_Char *)next - (const XML_Char *)s));
3732 } else if (parser->m_defaultHandler)
3733 reportDefault(parser, enc, s, next);
3734 } break;
3735 case XML_TOK_PI:
3736 if (! reportProcessingInstruction(parser, enc, s, next))
3737 return XML_ERROR_NO_MEMORY;
3738 break;
3739 case XML_TOK_COMMENT:
3740 if (! reportComment(parser, enc, s, next))
3741 return XML_ERROR_NO_MEMORY;
3742 break;
3743 default:
3744 /* All of the tokens produced by XmlContentTok() have their own
3745 * explicit cases, so this default is not strictly necessary.
3746 * However it is a useful safety net, so we retain the code and
3747 * simply exclude it from the coverage tests.
3748 *
3749 * LCOV_EXCL_START
3750 */
3751 if (parser->m_defaultHandler)
3752 reportDefault(parser, enc, s, next);
3753 break;
3754 /* LCOV_EXCL_STOP */
3755 }
3756 switch (parser->m_parsingStatus.parsing) {
3757 case XML_SUSPENDED:
3758 *eventPP = next;
3759 *nextPtr = next;
3760 return XML_ERROR_NONE;
3761 case XML_FINISHED:
3762 *eventPP = next;
3763 return XML_ERROR_ABORTED;
3764 case XML_PARSING:
3765 if (parser->m_reenter) {
3766 *nextPtr = next;
3767 return XML_ERROR_NONE;
3768 }
3769 /* Fall through */
3770 default:;
3771 *eventPP = s = next;
3772 }
3773 }
3774 /* not reached */
3775 }
3776
3777 /* This function does not call free() on the allocated memory, merely
3778 * moving it to the parser's m_freeBindingList where it can be freed or
3779 * reused as appropriate.
3780 */
3781 static void
3782 freeBindings(XML_Parser parser, BINDING *bindings) {
3783 while (bindings) {
3784 BINDING *b = bindings;
3785
3786 /* m_startNamespaceDeclHandler will have been called for this
3787 * binding in addBindings(), so call the end handler now.
3788 */
3789 if (parser->m_endNamespaceDeclHandler)
3790 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3791
3792 bindings = bindings->nextTagBinding;
3793 b->nextTagBinding = parser->m_freeBindingList;
3794 parser->m_freeBindingList = b;
3795 b->prefix->binding = b->prevPrefixBinding;
3796 }
3797 }
3798
3799 /* Precondition: all arguments must be non-NULL;
3800 Purpose:
3801 - normalize attributes
3802 - check attributes for well-formedness
3803 - generate namespace aware attribute names (URI, prefix)
3804 - build list of attributes for startElementHandler
3805 - default attributes
3806 - process namespace declarations (check and report them)
3807 - generate namespace aware element name (URI, prefix)
3808 */
3809 static enum XML_Error
3810 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3811 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3812 enum XML_Account account) {
3813 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3814 ELEMENT_TYPE *elementType;
3815 int nDefaultAtts;
3816 const XML_Char **appAtts; /* the attribute list for the application */
3817 int attIndex = 0;
3818 int prefixLen;
3819 int i;
3820 int n;
3821 XML_Char *uri;
3822 int nPrefixes = 0;
3823 BINDING *binding;
3824 const XML_Char *localPart;
3825
3826 /* lookup the element type name */
3827 elementType
3828 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3829 if (! elementType) {
3830 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3831 if (! name)
3832 return XML_ERROR_NO_MEMORY;
3833 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3834 sizeof(ELEMENT_TYPE));
3835 if (! elementType)
3836 return XML_ERROR_NO_MEMORY;
3837 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3838 return XML_ERROR_NO_MEMORY;
3839 }
3840 nDefaultAtts = elementType->nDefaultAtts;
3841
3842 /* get the attributes from the tokenizer */
3843 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3844
3845 /* Detect and prevent integer overflow */
3846 if (n > INT_MAX - nDefaultAtts) {
3847 return XML_ERROR_NO_MEMORY;
3848 }
3849
3850 if (n + nDefaultAtts > parser->m_attsSize) {
3851 int oldAttsSize = parser->m_attsSize;
3852 ATTRIBUTE *temp;
3853 #ifdef XML_ATTR_INFO
3854 XML_AttrInfo *temp2;
3855 #endif
3856
3857 /* Detect and prevent integer overflow */
3858 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3859 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3860 return XML_ERROR_NO_MEMORY;
3861 }
3862
3863 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3864
3865 /* Detect and prevent integer overflow.
3866 * The preprocessor guard addresses the "always false" warning
3867 * from -Wtype-limits on platforms where
3868 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3869 #if UINT_MAX >= SIZE_MAX
3870 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3871 parser->m_attsSize = oldAttsSize;
3872 return XML_ERROR_NO_MEMORY;
3873 }
3874 #endif
3875
3876 temp = REALLOC(parser, parser->m_atts,
3877 parser->m_attsSize * sizeof(ATTRIBUTE));
3878 if (temp == NULL) {
3879 parser->m_attsSize = oldAttsSize;
3880 return XML_ERROR_NO_MEMORY;
3881 }
3882 parser->m_atts = temp;
3883 #ifdef XML_ATTR_INFO
3884 /* Detect and prevent integer overflow.
3885 * The preprocessor guard addresses the "always false" warning
3886 * from -Wtype-limits on platforms where
3887 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3888 # if UINT_MAX >= SIZE_MAX
3889 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3890 parser->m_attsSize = oldAttsSize;
3891 return XML_ERROR_NO_MEMORY;
3892 }
3893 # endif
3894
3895 temp2 = REALLOC(parser, parser->m_attInfo,
3896 parser->m_attsSize * sizeof(XML_AttrInfo));
3897 if (temp2 == NULL) {
3898 parser->m_attsSize = oldAttsSize;
3899 return XML_ERROR_NO_MEMORY;
3900 }
3901 parser->m_attInfo = temp2;
3902 #endif
3903 if (n > oldAttsSize)
3904 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3905 }
3906
3907 appAtts = (const XML_Char **)parser->m_atts;
3908 for (i = 0; i < n; i++) {
3909 ATTRIBUTE *currAtt = &parser->m_atts[i];
3910 #ifdef XML_ATTR_INFO
3911 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3912 #endif
3913 /* add the name and value to the attribute list */
3914 ATTRIBUTE_ID *attId
3915 = getAttributeId(parser, enc, currAtt->name,
3916 currAtt->name + XmlNameLength(enc, currAtt->name));
3917 if (! attId)
3918 return XML_ERROR_NO_MEMORY;
3919 #ifdef XML_ATTR_INFO
3920 currAttInfo->nameStart
3921 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3922 currAttInfo->nameEnd
3923 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3924 currAttInfo->valueStart = parser->m_parseEndByteIndex
3925 - (parser->m_parseEndPtr - currAtt->valuePtr);
3926 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3927 - (parser->m_parseEndPtr - currAtt->valueEnd);
3928 #endif
3929 /* Detect duplicate attributes by their QNames. This does not work when
3930 namespace processing is turned on and different prefixes for the same
3931 namespace are used. For this case we have a check further down.
3932 */
3933 if ((attId->name)[-1]) {
3934 if (enc == parser->m_encoding)
3935 parser->m_eventPtr = parser->m_atts[i].name;
3936 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3937 }
3938 (attId->name)[-1] = 1;
3939 appAtts[attIndex++] = attId->name;
3940 if (! parser->m_atts[i].normalized) {
3941 enum XML_Error result;
3942 XML_Bool isCdata = XML_TRUE;
3943
3944 /* figure out whether declared as other than CDATA */
3945 if (attId->maybeTokenized) {
3946 int j;
3947 for (j = 0; j < nDefaultAtts; j++) {
3948 if (attId == elementType->defaultAtts[j].id) {
3949 isCdata = elementType->defaultAtts[j].isCdata;
3950 break;
3951 }
3952 }
3953 }
3954
3955 /* normalize the attribute value */
3956 result = storeAttributeValue(
3957 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3958 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3959 if (result)
3960 return result;
3961 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3962 poolFinish(&parser->m_tempPool);
3963 } else {
3964 /* the value did not need normalizing */
3965 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3966 parser->m_atts[i].valuePtr,
3967 parser->m_atts[i].valueEnd);
3968 if (appAtts[attIndex] == 0)
3969 return XML_ERROR_NO_MEMORY;
3970 poolFinish(&parser->m_tempPool);
3971 }
3972 /* handle prefixed attribute names */
3973 if (attId->prefix) {
3974 if (attId->xmlns) {
3975 /* deal with namespace declarations here */
3976 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3977 appAtts[attIndex], bindingsPtr);
3978 if (result)
3979 return result;
3980 --attIndex;
3981 } else {
3982 /* deal with other prefixed names later */
3983 attIndex++;
3984 nPrefixes++;
3985 (attId->name)[-1] = 2;
3986 }
3987 } else
3988 attIndex++;
3989 }
3990
3991 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3992 parser->m_nSpecifiedAtts = attIndex;
3993 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3994 for (i = 0; i < attIndex; i += 2)
3995 if (appAtts[i] == elementType->idAtt->name) {
3996 parser->m_idAttIndex = i;
3997 break;
3998 }
3999 } else
4000 parser->m_idAttIndex = -1;
4001
4002 /* do attribute defaulting */
4003 for (i = 0; i < nDefaultAtts; i++) {
4004 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
4005 if (! (da->id->name)[-1] && da->value) {
4006 if (da->id->prefix) {
4007 if (da->id->xmlns) {
4008 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
4009 da->value, bindingsPtr);
4010 if (result)
4011 return result;
4012 } else {
4013 (da->id->name)[-1] = 2;
4014 nPrefixes++;
4015 appAtts[attIndex++] = da->id->name;
4016 appAtts[attIndex++] = da->value;
4017 }
4018 } else {
4019 (da->id->name)[-1] = 1;
4020 appAtts[attIndex++] = da->id->name;
4021 appAtts[attIndex++] = da->value;
4022 }
4023 }
4024 }
4025 appAtts[attIndex] = 0;
4026
4027 /* expand prefixed attribute names, check for duplicates,
4028 and clear flags that say whether attributes were specified */
4029 i = 0;
4030 if (nPrefixes) {
4031 unsigned int j; /* hash table index */
4032 unsigned long version = parser->m_nsAttsVersion;
4033
4034 /* Detect and prevent invalid shift */
4035 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
4036 return XML_ERROR_NO_MEMORY;
4037 }
4038
4039 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
4040 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
4041 /* size of hash table must be at least 2 * (# of prefixed attributes) */
4042 if ((nPrefixes << 1)
4043 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
4044 NS_ATT *temp;
4045 /* hash table size must also be a power of 2 and >= 8 */
4046 while (nPrefixes >> parser->m_nsAttsPower++)
4047 ;
4048 if (parser->m_nsAttsPower < 3)
4049 parser->m_nsAttsPower = 3;
4050
4051 /* Detect and prevent invalid shift */
4052 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
4053 /* Restore actual size of memory in m_nsAtts */
4054 parser->m_nsAttsPower = oldNsAttsPower;
4055 return XML_ERROR_NO_MEMORY;
4056 }
4057
4058 nsAttsSize = 1u << parser->m_nsAttsPower;
4059
4060 /* Detect and prevent integer overflow.
4061 * The preprocessor guard addresses the "always false" warning
4062 * from -Wtype-limits on platforms where
4063 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4064 #if UINT_MAX >= SIZE_MAX
4065 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
4066 /* Restore actual size of memory in m_nsAtts */
4067 parser->m_nsAttsPower = oldNsAttsPower;
4068 return XML_ERROR_NO_MEMORY;
4069 }
4070 #endif
4071
4072 temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
4073 if (! temp) {
4074 /* Restore actual size of memory in m_nsAtts */
4075 parser->m_nsAttsPower = oldNsAttsPower;
4076 return XML_ERROR_NO_MEMORY;
4077 }
4078 parser->m_nsAtts = temp;
4079 version = 0; /* force re-initialization of m_nsAtts hash table */
4080 }
4081 /* using a version flag saves us from initializing m_nsAtts every time */
4082 if (! version) { /* initialize version flags when version wraps around */
4083 version = INIT_ATTS_VERSION;
4084 for (j = nsAttsSize; j != 0;)
4085 parser->m_nsAtts[--j].version = version;
4086 }
4087 parser->m_nsAttsVersion = --version;
4088
4089 /* expand prefixed names and check for duplicates */
4090 for (; i < attIndex; i += 2) {
4091 const XML_Char *s = appAtts[i];
4092 if (s[-1] == 2) { /* prefixed */
4093 ATTRIBUTE_ID *id;
4094 const BINDING *b;
4095 unsigned long uriHash;
4096 struct siphash sip_state;
4097 struct sipkey sip_key;
4098
4099 copy_salt_to_sipkey(parser, &sip_key);
4100 sip24_init(&sip_state, &sip_key);
4101
4102 ((XML_Char *)s)[-1] = 0; /* clear flag */
4103 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
4104 if (! id || ! id->prefix) {
4105 /* This code is walking through the appAtts array, dealing
4106 * with (in this case) a prefixed attribute name. To be in
4107 * the array, the attribute must have already been bound, so
4108 * has to have passed through the hash table lookup once
4109 * already. That implies that an entry for it already
4110 * exists, so the lookup above will return a pointer to
4111 * already allocated memory. There is no opportunaity for
4112 * the allocator to fail, so the condition above cannot be
4113 * fulfilled.
4114 *
4115 * Since it is difficult to be certain that the above
4116 * analysis is complete, we retain the test and merely
4117 * remove the code from coverage tests.
4118 */
4119 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4120 }
4121 b = id->prefix->binding;
4122 if (! b)
4123 return XML_ERROR_UNBOUND_PREFIX;
4124
4125 for (j = 0; j < (unsigned int)b->uriLen; j++) {
4126 const XML_Char c = b->uri[j];
4127 if (! poolAppendChar(&parser->m_tempPool, c))
4128 return XML_ERROR_NO_MEMORY;
4129 }
4130
4131 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
4132
4133 while (*s++ != XML_T(ASCII_COLON))
4134 ;
4135
4136 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
4137
4138 do { /* copies null terminator */
4139 if (! poolAppendChar(&parser->m_tempPool, *s))
4140 return XML_ERROR_NO_MEMORY;
4141 } while (*s++);
4142
4143 uriHash = (unsigned long)sip24_final(&sip_state);
4144
4145 { /* Check hash table for duplicate of expanded name (uriName).
4146 Derived from code in lookup(parser, HASH_TABLE *table, ...).
4147 */
4148 unsigned char step = 0;
4149 unsigned long mask = nsAttsSize - 1;
4150 j = uriHash & mask; /* index into hash table */
4151 while (parser->m_nsAtts[j].version == version) {
4152 /* for speed we compare stored hash values first */
4153 if (uriHash == parser->m_nsAtts[j].hash) {
4154 const XML_Char *s1 = poolStart(&parser->m_tempPool);
4155 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
4156 /* s1 is null terminated, but not s2 */
4157 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
4158 ;
4159 if (*s1 == 0)
4160 return XML_ERROR_DUPLICATE_ATTRIBUTE;
4161 }
4162 if (! step)
4163 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
4164 j < step ? (j += nsAttsSize - step) : (j -= step);
4165 }
4166 }
4167
4168 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
4169 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
4170 s = b->prefix->name;
4171 do {
4172 if (! poolAppendChar(&parser->m_tempPool, *s))
4173 return XML_ERROR_NO_MEMORY;
4174 } while (*s++);
4175 }
4176
4177 /* store expanded name in attribute list */
4178 s = poolStart(&parser->m_tempPool);
4179 poolFinish(&parser->m_tempPool);
4180 appAtts[i] = s;
4181
4182 /* fill empty slot with new version, uriName and hash value */
4183 parser->m_nsAtts[j].version = version;
4184 parser->m_nsAtts[j].hash = uriHash;
4185 parser->m_nsAtts[j].uriName = s;
4186
4187 if (! --nPrefixes) {
4188 i += 2;
4189 break;
4190 }
4191 } else /* not prefixed */
4192 ((XML_Char *)s)[-1] = 0; /* clear flag */
4193 }
4194 }
4195 /* clear flags for the remaining attributes */
4196 for (; i < attIndex; i += 2)
4197 ((XML_Char *)(appAtts[i]))[-1] = 0;
4198 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
4199 binding->attId->name[-1] = 0;
4200
4201 if (! parser->m_ns)
4202 return XML_ERROR_NONE;
4203
4204 /* expand the element type name */
4205 if (elementType->prefix) {
4206 binding = elementType->prefix->binding;
4207 if (! binding)
4208 return XML_ERROR_UNBOUND_PREFIX;
4209 localPart = tagNamePtr->str;
4210 while (*localPart++ != XML_T(ASCII_COLON))
4211 ;
4212 } else if (dtd->defaultPrefix.binding) {
4213 binding = dtd->defaultPrefix.binding;
4214 localPart = tagNamePtr->str;
4215 } else
4216 return XML_ERROR_NONE;
4217 prefixLen = 0;
4218 if (parser->m_ns_triplets && binding->prefix->name) {
4219 while (binding->prefix->name[prefixLen++])
4220 ; /* prefixLen includes null terminator */
4221 }
4222 tagNamePtr->localPart = localPart;
4223 tagNamePtr->uriLen = binding->uriLen;
4224 tagNamePtr->prefix = binding->prefix->name;
4225 tagNamePtr->prefixLen = prefixLen;
4226 for (i = 0; localPart[i++];)
4227 ; /* i includes null terminator */
4228
4229 /* Detect and prevent integer overflow */
4230 if (binding->uriLen > INT_MAX - prefixLen
4231 || i > INT_MAX - (binding->uriLen + prefixLen)) {
4232 return XML_ERROR_NO_MEMORY;
4233 }
4234
4235 n = i + binding->uriLen + prefixLen;
4236 if (n > binding->uriAlloc) {
4237 TAG *p;
4238
4239 /* Detect and prevent integer overflow */
4240 if (n > INT_MAX - EXPAND_SPARE) {
4241 return XML_ERROR_NO_MEMORY;
4242 }
4243 /* Detect and prevent integer overflow.
4244 * The preprocessor guard addresses the "always false" warning
4245 * from -Wtype-limits on platforms where
4246 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4247 #if UINT_MAX >= SIZE_MAX
4248 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4249 return XML_ERROR_NO_MEMORY;
4250 }
4251 #endif
4252
4253 uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
4254 if (! uri)
4255 return XML_ERROR_NO_MEMORY;
4256 binding->uriAlloc = n + EXPAND_SPARE;
4257 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
4258 for (p = parser->m_tagStack; p; p = p->parent)
4259 if (p->name.str == binding->uri)
4260 p->name.str = uri;
4261 FREE(parser, binding->uri);
4262 binding->uri = uri;
4263 }
4264 /* if m_namespaceSeparator != '\0' then uri includes it already */
4265 uri = binding->uri + binding->uriLen;
4266 memcpy(uri, localPart, i * sizeof(XML_Char));
4267 /* we always have a namespace separator between localPart and prefix */
4268 if (prefixLen) {
4269 uri += i - 1;
4270 *uri = parser->m_namespaceSeparator; /* replace null terminator */
4271 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
4272 }
4273 tagNamePtr->str = binding->uri;
4274 return XML_ERROR_NONE;
4275 }
4276
4277 static XML_Bool
4278 is_rfc3986_uri_char(XML_Char candidate) {
4279 // For the RFC 3986 ANBF grammar see
4280 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
4281
4282 switch (candidate) {
4283 // From rule "ALPHA" (uppercase half)
4284 case 'A':
4285 case 'B':
4286 case 'C':
4287 case 'D':
4288 case 'E':
4289 case 'F':
4290 case 'G':
4291 case 'H':
4292 case 'I':
4293 case 'J':
4294 case 'K':
4295 case 'L':
4296 case 'M':
4297 case 'N':
4298 case 'O':
4299 case 'P':
4300 case 'Q':
4301 case 'R':
4302 case 'S':
4303 case 'T':
4304 case 'U':
4305 case 'V':
4306 case 'W':
4307 case 'X':
4308 case 'Y':
4309 case 'Z':
4310
4311 // From rule "ALPHA" (lowercase half)
4312 case 'a':
4313 case 'b':
4314 case 'c':
4315 case 'd':
4316 case 'e':
4317 case 'f':
4318 case 'g':
4319 case 'h':
4320 case 'i':
4321 case 'j':
4322 case 'k':
4323 case 'l':
4324 case 'm':
4325 case 'n':
4326 case 'o':
4327 case 'p':
4328 case 'q':
4329 case 'r':
4330 case 's':
4331 case 't':
4332 case 'u':
4333 case 'v':
4334 case 'w':
4335 case 'x':
4336 case 'y':
4337 case 'z':
4338
4339 // From rule "DIGIT"
4340 case '0':
4341 case '1':
4342 case '2':
4343 case '3':
4344 case '4':
4345 case '5':
4346 case '6':
4347 case '7':
4348 case '8':
4349 case '9':
4350
4351 // From rule "pct-encoded"
4352 case '%':
4353
4354 // From rule "unreserved"
4355 case '-':
4356 case '.':
4357 case '_':
4358 case '~':
4359
4360 // From rule "gen-delims"
4361 case ':':
4362 case '/':
4363 case '?':
4364 case '#':
4365 case '[':
4366 case ']':
4367 case '@':
4368
4369 // From rule "sub-delims"
4370 case '!':
4371 case '$':
4372 case '&':
4373 case '\'':
4374 case '(':
4375 case ')':
4376 case '*':
4377 case '+':
4378 case ',':
4379 case ';':
4380 case '=':
4381 return XML_TRUE;
4382
4383 default:
4384 return XML_FALSE;
4385 }
4386 }
4387
4388 /* addBinding() overwrites the value of prefix->binding without checking.
4389 Therefore one must keep track of the old value outside of addBinding().
4390 */
4391 static enum XML_Error
4392 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4393 const XML_Char *uri, BINDING **bindingsPtr) {
4394 // "http://www.w3.org/XML/1998/namespace"
4395 static const XML_Char xmlNamespace[]
4396 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
4397 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
4398 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
4399 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
4400 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
4401 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
4402 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
4403 ASCII_e, '\0'};
4404 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4405 // "http://www.w3.org/2000/xmlns/"
4406 static const XML_Char xmlnsNamespace[]
4407 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
4408 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4409 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
4410 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
4411 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
4412 static const int xmlnsLen
4413 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4414
4415 XML_Bool mustBeXML = XML_FALSE;
4416 XML_Bool isXML = XML_TRUE;
4417 XML_Bool isXMLNS = XML_TRUE;
4418
4419 BINDING *b;
4420 int len;
4421
4422 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4423 if (*uri == XML_T('\0') && prefix->name)
4424 return XML_ERROR_UNDECLARING_PREFIX;
4425
4426 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4427 && prefix->name[1] == XML_T(ASCII_m)
4428 && prefix->name[2] == XML_T(ASCII_l)) {
4429 /* Not allowed to bind xmlns */
4430 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4431 && prefix->name[5] == XML_T('\0'))
4432 return XML_ERROR_RESERVED_PREFIX_XMLNS;
4433
4434 if (prefix->name[3] == XML_T('\0'))
4435 mustBeXML = XML_TRUE;
4436 }
4437
4438 for (len = 0; uri[len]; len++) {
4439 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4440 isXML = XML_FALSE;
4441
4442 if (! mustBeXML && isXMLNS
4443 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4444 isXMLNS = XML_FALSE;
4445
4446 // NOTE: While Expat does not validate namespace URIs against RFC 3986
4447 // today (and is not REQUIRED to do so with regard to the XML 1.0
4448 // namespaces specification) we have to at least make sure, that
4449 // the application on top of Expat (that is likely splitting expanded
4450 // element names ("qualified names") of form
4451 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4452 // in its element handler code) cannot be confused by an attacker
4453 // putting additional namespace separator characters into namespace
4454 // declarations. That would be ambiguous and not to be expected.
4455 //
4456 // While the HTML API docs of function XML_ParserCreateNS have been
4457 // advising against use of a namespace separator character that can
4458 // appear in a URI for >20 years now, some widespread applications
4459 // are using URI characters (':' (colon) in particular) for a
4460 // namespace separator, in practice. To keep these applications
4461 // functional, we only reject namespaces URIs containing the
4462 // application-chosen namespace separator if the chosen separator
4463 // is a non-URI character with regard to RFC 3986.
4464 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4465 && ! is_rfc3986_uri_char(uri[len])) {
4466 return XML_ERROR_SYNTAX;
4467 }
4468 }
4469 isXML = isXML && len == xmlLen;
4470 isXMLNS = isXMLNS && len == xmlnsLen;
4471
4472 if (mustBeXML != isXML)
4473 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4474 : XML_ERROR_RESERVED_NAMESPACE_URI;
4475
4476 if (isXMLNS)
4477 return XML_ERROR_RESERVED_NAMESPACE_URI;
4478
4479 if (parser->m_namespaceSeparator)
4480 len++;
4481 if (parser->m_freeBindingList) {
4482 b = parser->m_freeBindingList;
4483 if (len > b->uriAlloc) {
4484 /* Detect and prevent integer overflow */
4485 if (len > INT_MAX - EXPAND_SPARE) {
4486 return XML_ERROR_NO_MEMORY;
4487 }
4488
4489 /* Detect and prevent integer overflow.
4490 * The preprocessor guard addresses the "always false" warning
4491 * from -Wtype-limits on platforms where
4492 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4493 #if UINT_MAX >= SIZE_MAX
4494 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4495 return XML_ERROR_NO_MEMORY;
4496 }
4497 #endif
4498
4499 XML_Char *temp
4500 = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4501 if (temp == NULL)
4502 return XML_ERROR_NO_MEMORY;
4503 b->uri = temp;
4504 b->uriAlloc = len + EXPAND_SPARE;
4505 }
4506 parser->m_freeBindingList = b->nextTagBinding;
4507 } else {
4508 b = MALLOC(parser, sizeof(BINDING));
4509 if (! b)
4510 return XML_ERROR_NO_MEMORY;
4511
4512 /* Detect and prevent integer overflow */
4513 if (len > INT_MAX - EXPAND_SPARE) {
4514 return XML_ERROR_NO_MEMORY;
4515 }
4516 /* Detect and prevent integer overflow.
4517 * The preprocessor guard addresses the "always false" warning
4518 * from -Wtype-limits on platforms where
4519 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4520 #if UINT_MAX >= SIZE_MAX
4521 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4522 return XML_ERROR_NO_MEMORY;
4523 }
4524 #endif
4525
4526 b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4527 if (! b->uri) {
4528 FREE(parser, b);
4529 return XML_ERROR_NO_MEMORY;
4530 }
4531 b->uriAlloc = len + EXPAND_SPARE;
4532 }
4533 b->uriLen = len;
4534 memcpy(b->uri, uri, len * sizeof(XML_Char));
4535 if (parser->m_namespaceSeparator)
4536 b->uri[len - 1] = parser->m_namespaceSeparator;
4537 b->prefix = prefix;
4538 b->attId = attId;
4539 b->prevPrefixBinding = prefix->binding;
4540 /* NULL binding when default namespace undeclared */
4541 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4542 prefix->binding = NULL;
4543 else
4544 prefix->binding = b;
4545 b->nextTagBinding = *bindingsPtr;
4546 *bindingsPtr = b;
4547 /* if attId == NULL then we are not starting a namespace scope */
4548 if (attId && parser->m_startNamespaceDeclHandler)
4549 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4550 prefix->binding ? uri : 0);
4551 return XML_ERROR_NONE;
4552 }
4553
4554 /* The idea here is to avoid using stack for each CDATA section when
4555 the whole file is parsed with one call.
4556 */
4557 static enum XML_Error PTRCALL
4558 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4559 const char **endPtr) {
4560 enum XML_Error result = doCdataSection(
4561 parser, parser->m_encoding, &start, end, endPtr,
4562 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4563 if (result != XML_ERROR_NONE)
4564 return result;
4565 if (start) {
4566 if (parser->m_parentParser) { /* we are parsing an external entity */
4567 parser->m_processor = externalEntityContentProcessor;
4568 return externalEntityContentProcessor(parser, start, end, endPtr);
4569 } else {
4570 parser->m_processor = contentProcessor;
4571 return contentProcessor(parser, start, end, endPtr);
4572 }
4573 }
4574 return result;
4575 }
4576
4577 /* startPtr gets set to non-null if the section is closed, and to null if
4578 the section is not yet closed.
4579 */
4580 static enum XML_Error
4581 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4582 const char *end, const char **nextPtr, XML_Bool haveMore,
4583 enum XML_Account account) {
4584 const char *s = *startPtr;
4585 const char **eventPP;
4586 const char **eventEndPP;
4587 if (enc == parser->m_encoding) {
4588 eventPP = &parser->m_eventPtr;
4589 *eventPP = s;
4590 eventEndPP = &parser->m_eventEndPtr;
4591 } else {
4592 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4593 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4594 }
4595 *eventPP = s;
4596 *startPtr = NULL;
4597
4598 for (;;) {
4599 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4600 int tok = XmlCdataSectionTok(enc, s, end, &next);
4601 #if XML_GE == 1
4602 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4603 accountingOnAbort(parser);
4604 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4605 }
4606 #else
4607 UNUSED_P(account);
4608 #endif
4609 *eventEndPP = next;
4610 switch (tok) {
4611 case XML_TOK_CDATA_SECT_CLOSE:
4612 if (parser->m_endCdataSectionHandler)
4613 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4614 /* BEGIN disabled code */
4615 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4616 else if ((0) && parser->m_characterDataHandler)
4617 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4618 0);
4619 /* END disabled code */
4620 else if (parser->m_defaultHandler)
4621 reportDefault(parser, enc, s, next);
4622 *startPtr = next;
4623 *nextPtr = next;
4624 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4625 return XML_ERROR_ABORTED;
4626 else
4627 return XML_ERROR_NONE;
4628 case XML_TOK_DATA_NEWLINE:
4629 if (parser->m_characterDataHandler) {
4630 XML_Char c = 0xA;
4631 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4632 } else if (parser->m_defaultHandler)
4633 reportDefault(parser, enc, s, next);
4634 break;
4635 case XML_TOK_DATA_CHARS: {
4636 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4637 if (charDataHandler) {
4638 if (MUST_CONVERT(enc, s)) {
4639 for (;;) {
4640 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4641 const enum XML_Convert_Result convert_res = XmlConvert(
4642 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4643 *eventEndPP = next;
4644 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4645 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4646 if ((convert_res == XML_CONVERT_COMPLETED)
4647 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4648 break;
4649 *eventPP = s;
4650 }
4651 } else
4652 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4653 (int)((const XML_Char *)next - (const XML_Char *)s));
4654 } else if (parser->m_defaultHandler)
4655 reportDefault(parser, enc, s, next);
4656 } break;
4657 case XML_TOK_INVALID:
4658 *eventPP = next;
4659 return XML_ERROR_INVALID_TOKEN;
4660 case XML_TOK_PARTIAL_CHAR:
4661 if (haveMore) {
4662 *nextPtr = s;
4663 return XML_ERROR_NONE;
4664 }
4665 return XML_ERROR_PARTIAL_CHAR;
4666 case XML_TOK_PARTIAL:
4667 case XML_TOK_NONE:
4668 if (haveMore) {
4669 *nextPtr = s;
4670 return XML_ERROR_NONE;
4671 }
4672 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4673 default:
4674 /* Every token returned by XmlCdataSectionTok() has its own
4675 * explicit case, so this default case will never be executed.
4676 * We retain it as a safety net and exclude it from the coverage
4677 * statistics.
4678 *
4679 * LCOV_EXCL_START
4680 */
4681 *eventPP = next;
4682 return XML_ERROR_UNEXPECTED_STATE;
4683 /* LCOV_EXCL_STOP */
4684 }
4685
4686 switch (parser->m_parsingStatus.parsing) {
4687 case XML_SUSPENDED:
4688 *eventPP = next;
4689 *nextPtr = next;
4690 return XML_ERROR_NONE;
4691 case XML_FINISHED:
4692 *eventPP = next;
4693 return XML_ERROR_ABORTED;
4694 case XML_PARSING:
4695 if (parser->m_reenter) {
4696 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4697 }
4698 /* Fall through */
4699 default:;
4700 *eventPP = s = next;
4701 }
4702 }
4703 /* not reached */
4704 }
4705
4706 #ifdef XML_DTD
4707
4708 /* The idea here is to avoid using stack for each IGNORE section when
4709 the whole file is parsed with one call.
4710 */
4711 static enum XML_Error PTRCALL
4712 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4713 const char **endPtr) {
4714 enum XML_Error result
4715 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4716 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4717 if (result != XML_ERROR_NONE)
4718 return result;
4719 if (start) {
4720 parser->m_processor = prologProcessor;
4721 return prologProcessor(parser, start, end, endPtr);
4722 }
4723 return result;
4724 }
4725
4726 /* startPtr gets set to non-null is the section is closed, and to null
4727 if the section is not yet closed.
4728 */
4729 static enum XML_Error
4730 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4731 const char *end, const char **nextPtr, XML_Bool haveMore) {
4732 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4733 int tok;
4734 const char *s = *startPtr;
4735 const char **eventPP;
4736 const char **eventEndPP;
4737 if (enc == parser->m_encoding) {
4738 eventPP = &parser->m_eventPtr;
4739 *eventPP = s;
4740 eventEndPP = &parser->m_eventEndPtr;
4741 } else {
4742 /* It's not entirely clear, but it seems the following two lines
4743 * of code cannot be executed. The only occasions on which 'enc'
4744 * is not 'encoding' are when this function is called
4745 * from the internal entity processing, and IGNORE sections are an
4746 * error in internal entities.
4747 *
4748 * Since it really isn't clear that this is true, we keep the code
4749 * and just remove it from our coverage tests.
4750 *
4751 * LCOV_EXCL_START
4752 */
4753 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4754 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4755 /* LCOV_EXCL_STOP */
4756 }
4757 *eventPP = s;
4758 *startPtr = NULL;
4759 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4760 # if XML_GE == 1
4761 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4762 XML_ACCOUNT_DIRECT)) {
4763 accountingOnAbort(parser);
4764 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4765 }
4766 # endif
4767 *eventEndPP = next;
4768 switch (tok) {
4769 case XML_TOK_IGNORE_SECT:
4770 if (parser->m_defaultHandler)
4771 reportDefault(parser, enc, s, next);
4772 *startPtr = next;
4773 *nextPtr = next;
4774 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4775 return XML_ERROR_ABORTED;
4776 else
4777 return XML_ERROR_NONE;
4778 case XML_TOK_INVALID:
4779 *eventPP = next;
4780 return XML_ERROR_INVALID_TOKEN;
4781 case XML_TOK_PARTIAL_CHAR:
4782 if (haveMore) {
4783 *nextPtr = s;
4784 return XML_ERROR_NONE;
4785 }
4786 return XML_ERROR_PARTIAL_CHAR;
4787 case XML_TOK_PARTIAL:
4788 case XML_TOK_NONE:
4789 if (haveMore) {
4790 *nextPtr = s;
4791 return XML_ERROR_NONE;
4792 }
4793 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4794 default:
4795 /* All of the tokens that XmlIgnoreSectionTok() returns have
4796 * explicit cases to handle them, so this default case is never
4797 * executed. We keep it as a safety net anyway, and remove it
4798 * from our test coverage statistics.
4799 *
4800 * LCOV_EXCL_START
4801 */
4802 *eventPP = next;
4803 return XML_ERROR_UNEXPECTED_STATE;
4804 /* LCOV_EXCL_STOP */
4805 }
4806 /* not reached */
4807 }
4808
4809 #endif /* XML_DTD */
4810
4811 static enum XML_Error
4812 initializeEncoding(XML_Parser parser) {
4813 const char *s;
4814 #ifdef XML_UNICODE
4815 char encodingBuf[128];
4816 /* See comments about `protocolEncodingName` in parserInit() */
4817 if (! parser->m_protocolEncodingName)
4818 s = NULL;
4819 else {
4820 int i;
4821 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4822 if (i == sizeof(encodingBuf) - 1
4823 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4824 encodingBuf[0] = '\0';
4825 break;
4826 }
4827 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4828 }
4829 encodingBuf[i] = '\0';
4830 s = encodingBuf;
4831 }
4832 #else
4833 s = parser->m_protocolEncodingName;
4834 #endif
4835 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4836 &parser->m_initEncoding, &parser->m_encoding, s))
4837 return XML_ERROR_NONE;
4838 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4839 }
4840
4841 static enum XML_Error
4842 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4843 const char *next) {
4844 const char *encodingName = NULL;
4845 const XML_Char *storedEncName = NULL;
4846 const ENCODING *newEncoding = NULL;
4847 const char *version = NULL;
4848 const char *versionend = NULL;
4849 const XML_Char *storedversion = NULL;
4850 int standalone = -1;
4851
4852 #if XML_GE == 1
4853 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4854 XML_ACCOUNT_DIRECT)) {
4855 accountingOnAbort(parser);
4856 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4857 }
4858 #endif
4859
4860 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4861 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4862 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4863 if (isGeneralTextEntity)
4864 return XML_ERROR_TEXT_DECL;
4865 else
4866 return XML_ERROR_XML_DECL;
4867 }
4868 if (! isGeneralTextEntity && standalone == 1) {
4869 parser->m_dtd->standalone = XML_TRUE;
4870 #ifdef XML_DTD
4871 if (parser->m_paramEntityParsing
4872 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4873 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4874 #endif /* XML_DTD */
4875 }
4876 if (parser->m_xmlDeclHandler) {
4877 if (encodingName != NULL) {
4878 storedEncName = poolStoreString(
4879 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4880 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4881 if (! storedEncName)
4882 return XML_ERROR_NO_MEMORY;
4883 poolFinish(&parser->m_temp2Pool);
4884 }
4885 if (version) {
4886 storedversion
4887 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4888 versionend - parser->m_encoding->minBytesPerChar);
4889 if (! storedversion)
4890 return XML_ERROR_NO_MEMORY;
4891 }
4892 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4893 standalone);
4894 } else if (parser->m_defaultHandler)
4895 reportDefault(parser, parser->m_encoding, s, next);
4896 if (parser->m_protocolEncodingName == NULL) {
4897 if (newEncoding) {
4898 /* Check that the specified encoding does not conflict with what
4899 * the parser has already deduced. Do we have the same number
4900 * of bytes in the smallest representation of a character? If
4901 * this is UTF-16, is it the same endianness?
4902 */
4903 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4904 || (newEncoding->minBytesPerChar == 2
4905 && newEncoding != parser->m_encoding)) {
4906 parser->m_eventPtr = encodingName;
4907 return XML_ERROR_INCORRECT_ENCODING;
4908 }
4909 parser->m_encoding = newEncoding;
4910 } else if (encodingName) {
4911 enum XML_Error result;
4912 if (! storedEncName) {
4913 storedEncName = poolStoreString(
4914 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4915 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4916 if (! storedEncName)
4917 return XML_ERROR_NO_MEMORY;
4918 }
4919 result = handleUnknownEncoding(parser, storedEncName);
4920 poolClear(&parser->m_temp2Pool);
4921 if (result == XML_ERROR_UNKNOWN_ENCODING)
4922 parser->m_eventPtr = encodingName;
4923 return result;
4924 }
4925 }
4926
4927 if (storedEncName || storedversion)
4928 poolClear(&parser->m_temp2Pool);
4929
4930 return XML_ERROR_NONE;
4931 }
4932
4933 static enum XML_Error
4934 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4935 if (parser->m_unknownEncodingHandler) {
4936 XML_Encoding info;
4937 int i;
4938 for (i = 0; i < 256; i++)
4939 info.map[i] = -1;
4940 info.convert = NULL;
4941 info.data = NULL;
4942 info.release = NULL;
4943 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4944 encodingName, &info)) {
4945 ENCODING *enc;
4946 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4947 if (! parser->m_unknownEncodingMem) {
4948 if (info.release)
4949 info.release(info.data);
4950 return XML_ERROR_NO_MEMORY;
4951 }
4952 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4953 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4954 if (enc) {
4955 parser->m_unknownEncodingData = info.data;
4956 parser->m_unknownEncodingRelease = info.release;
4957 parser->m_encoding = enc;
4958 return XML_ERROR_NONE;
4959 }
4960 }
4961 if (info.release != NULL)
4962 info.release(info.data);
4963 }
4964 return XML_ERROR_UNKNOWN_ENCODING;
4965 }
4966
4967 static enum XML_Error PTRCALL
4968 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4969 const char **nextPtr) {
4970 enum XML_Error result = initializeEncoding(parser);
4971 if (result != XML_ERROR_NONE)
4972 return result;
4973 parser->m_processor = prologProcessor;
4974 return prologProcessor(parser, s, end, nextPtr);
4975 }
4976
4977 #ifdef XML_DTD
4978
4979 static enum XML_Error PTRCALL
4980 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4981 const char **nextPtr) {
4982 enum XML_Error result = initializeEncoding(parser);
4983 if (result != XML_ERROR_NONE)
4984 return result;
4985
4986 /* we know now that XML_Parse(Buffer) has been called,
4987 so we consider the external parameter entity read */
4988 parser->m_dtd->paramEntityRead = XML_TRUE;
4989
4990 if (parser->m_prologState.inEntityValue) {
4991 parser->m_processor = entityValueInitProcessor;
4992 return entityValueInitProcessor(parser, s, end, nextPtr);
4993 } else {
4994 parser->m_processor = externalParEntProcessor;
4995 return externalParEntProcessor(parser, s, end, nextPtr);
4996 }
4997 }
4998
4999 static enum XML_Error PTRCALL
5000 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
5001 const char **nextPtr) {
5002 int tok;
5003 const char *start = s;
5004 const char *next = start;
5005 parser->m_eventPtr = start;
5006
5007 for (;;) {
5008 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
5009 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
5010 - storeEntityValue
5011 - processXmlDecl
5012 */
5013 parser->m_eventEndPtr = next;
5014 if (tok <= 0) {
5015 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5016 *nextPtr = s;
5017 return XML_ERROR_NONE;
5018 }
5019 switch (tok) {
5020 case XML_TOK_INVALID:
5021 return XML_ERROR_INVALID_TOKEN;
5022 case XML_TOK_PARTIAL:
5023 return XML_ERROR_UNCLOSED_TOKEN;
5024 case XML_TOK_PARTIAL_CHAR:
5025 return XML_ERROR_PARTIAL_CHAR;
5026 case XML_TOK_NONE: /* start == end */
5027 default:
5028 break;
5029 }
5030 /* found end of entity value - can store it now */
5031 return storeEntityValue(parser, parser->m_encoding, s, end,
5032 XML_ACCOUNT_DIRECT, NULL);
5033 } else if (tok == XML_TOK_XML_DECL) {
5034 enum XML_Error result;
5035 result = processXmlDecl(parser, 0, start, next);
5036 if (result != XML_ERROR_NONE)
5037 return result;
5038 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
5039 * that to happen, a parameter entity parsing handler must have attempted
5040 * to suspend the parser, which fails and raises an error. The parser can
5041 * be aborted, but can't be suspended.
5042 */
5043 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5044 return XML_ERROR_ABORTED;
5045 *nextPtr = next;
5046 /* stop scanning for text declaration - we found one */
5047 parser->m_processor = entityValueProcessor;
5048 return entityValueProcessor(parser, next, end, nextPtr);
5049 }
5050 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
5051 must move s and nextPtr forward to consume the BOM.
5052
5053 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
5054 would leave the BOM in the buffer and return. On the next call to this
5055 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
5056 is not valid to have multiple BOMs.
5057 */
5058 else if (tok == XML_TOK_BOM) {
5059 # if XML_GE == 1
5060 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5061 XML_ACCOUNT_DIRECT)) {
5062 accountingOnAbort(parser);
5063 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5064 }
5065 # endif
5066
5067 *nextPtr = next;
5068 s = next;
5069 }
5070 /* If we get this token, we have the start of what might be a
5071 normal tag, but not a declaration (i.e. it doesn't begin with
5072 "<!"). In a DTD context, that isn't legal.
5073 */
5074 else if (tok == XML_TOK_INSTANCE_START) {
5075 *nextPtr = next;
5076 return XML_ERROR_SYNTAX;
5077 }
5078 start = next;
5079 parser->m_eventPtr = start;
5080 }
5081 }
5082
5083 static enum XML_Error PTRCALL
5084 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
5085 const char **nextPtr) {
5086 const char *next = s;
5087 int tok;
5088
5089 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5090 if (tok <= 0) {
5091 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5092 *nextPtr = s;
5093 return XML_ERROR_NONE;
5094 }
5095 switch (tok) {
5096 case XML_TOK_INVALID:
5097 return XML_ERROR_INVALID_TOKEN;
5098 case XML_TOK_PARTIAL:
5099 return XML_ERROR_UNCLOSED_TOKEN;
5100 case XML_TOK_PARTIAL_CHAR:
5101 return XML_ERROR_PARTIAL_CHAR;
5102 case XML_TOK_NONE: /* start == end */
5103 default:
5104 break;
5105 }
5106 }
5107 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
5108 However, when parsing an external subset, doProlog will not accept a BOM
5109 as valid, and report a syntax error, so we have to skip the BOM, and
5110 account for the BOM bytes.
5111 */
5112 else if (tok == XML_TOK_BOM) {
5113 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5114 XML_ACCOUNT_DIRECT)) {
5115 accountingOnAbort(parser);
5116 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5117 }
5118
5119 s = next;
5120 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5121 }
5122
5123 parser->m_processor = prologProcessor;
5124 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5125 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5126 XML_ACCOUNT_DIRECT);
5127 }
5128
5129 static enum XML_Error PTRCALL
5130 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
5131 const char **nextPtr) {
5132 const char *start = s;
5133 const char *next = s;
5134 const ENCODING *enc = parser->m_encoding;
5135 int tok;
5136
5137 for (;;) {
5138 tok = XmlPrologTok(enc, start, end, &next);
5139 /* Note: These bytes are accounted later in:
5140 - storeEntityValue
5141 */
5142 if (tok <= 0) {
5143 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
5144 *nextPtr = s;
5145 return XML_ERROR_NONE;
5146 }
5147 switch (tok) {
5148 case XML_TOK_INVALID:
5149 return XML_ERROR_INVALID_TOKEN;
5150 case XML_TOK_PARTIAL:
5151 return XML_ERROR_UNCLOSED_TOKEN;
5152 case XML_TOK_PARTIAL_CHAR:
5153 return XML_ERROR_PARTIAL_CHAR;
5154 case XML_TOK_NONE: /* start == end */
5155 default:
5156 break;
5157 }
5158 /* found end of entity value - can store it now */
5159 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
5160 }
5161 start = next;
5162 }
5163 }
5164
5165 #endif /* XML_DTD */
5166
5167 static enum XML_Error PTRCALL
5168 prologProcessor(XML_Parser parser, const char *s, const char *end,
5169 const char **nextPtr) {
5170 const char *next = s;
5171 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5172 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5173 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5174 XML_ACCOUNT_DIRECT);
5175 }
5176
5177 static enum XML_Error
5178 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
5179 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
5180 XML_Bool allowClosingDoctype, enum XML_Account account) {
5181 #ifdef XML_DTD
5182 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
5183 #endif /* XML_DTD */
5184 static const XML_Char atypeCDATA[]
5185 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
5186 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
5187 static const XML_Char atypeIDREF[]
5188 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
5189 static const XML_Char atypeIDREFS[]
5190 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
5191 static const XML_Char atypeENTITY[]
5192 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
5193 static const XML_Char atypeENTITIES[]
5194 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
5195 ASCII_I, ASCII_E, ASCII_S, '\0'};
5196 static const XML_Char atypeNMTOKEN[]
5197 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
5198 static const XML_Char atypeNMTOKENS[]
5199 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
5200 ASCII_E, ASCII_N, ASCII_S, '\0'};
5201 static const XML_Char notationPrefix[]
5202 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
5203 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
5204 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
5205 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
5206
5207 #ifndef XML_DTD
5208 UNUSED_P(account);
5209 #endif
5210
5211 /* save one level of indirection */
5212 DTD *const dtd = parser->m_dtd;
5213
5214 const char **eventPP;
5215 const char **eventEndPP;
5216 enum XML_Content_Quant quant;
5217
5218 if (enc == parser->m_encoding) {
5219 eventPP = &parser->m_eventPtr;
5220 eventEndPP = &parser->m_eventEndPtr;
5221 } else {
5222 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5223 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5224 }
5225
5226 for (;;) {
5227 int role;
5228 XML_Bool handleDefault = XML_TRUE;
5229 *eventPP = s;
5230 *eventEndPP = next;
5231 if (tok <= 0) {
5232 if (haveMore && tok != XML_TOK_INVALID) {
5233 *nextPtr = s;
5234 return XML_ERROR_NONE;
5235 }
5236 switch (tok) {
5237 case XML_TOK_INVALID:
5238 *eventPP = next;
5239 return XML_ERROR_INVALID_TOKEN;
5240 case XML_TOK_PARTIAL:
5241 return XML_ERROR_UNCLOSED_TOKEN;
5242 case XML_TOK_PARTIAL_CHAR:
5243 return XML_ERROR_PARTIAL_CHAR;
5244 case -XML_TOK_PROLOG_S:
5245 tok = -tok;
5246 break;
5247 case XML_TOK_NONE:
5248 #ifdef XML_DTD
5249 /* for internal PE NOT referenced between declarations */
5250 if (enc != parser->m_encoding
5251 && ! parser->m_openInternalEntities->betweenDecl) {
5252 *nextPtr = s;
5253 return XML_ERROR_NONE;
5254 }
5255 /* WFC: PE Between Declarations - must check that PE contains
5256 complete markup, not only for external PEs, but also for
5257 internal PEs if the reference occurs between declarations.
5258 */
5259 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5260 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
5261 == XML_ROLE_ERROR)
5262 return XML_ERROR_INCOMPLETE_PE;
5263 *nextPtr = s;
5264 return XML_ERROR_NONE;
5265 }
5266 #endif /* XML_DTD */
5267 return XML_ERROR_NO_ELEMENTS;
5268 default:
5269 tok = -tok;
5270 next = end;
5271 break;
5272 }
5273 }
5274 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
5275 #if XML_GE == 1
5276 switch (role) {
5277 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
5278 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
5279 # ifdef XML_DTD
5280 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
5281 # endif
5282 break;
5283 default:
5284 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
5285 accountingOnAbort(parser);
5286 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5287 }
5288 }
5289 #endif
5290 switch (role) {
5291 case XML_ROLE_XML_DECL: {
5292 enum XML_Error result = processXmlDecl(parser, 0, s, next);
5293 if (result != XML_ERROR_NONE)
5294 return result;
5295 enc = parser->m_encoding;
5296 handleDefault = XML_FALSE;
5297 } break;
5298 case XML_ROLE_DOCTYPE_NAME:
5299 if (parser->m_startDoctypeDeclHandler) {
5300 parser->m_doctypeName
5301 = poolStoreString(&parser->m_tempPool, enc, s, next);
5302 if (! parser->m_doctypeName)
5303 return XML_ERROR_NO_MEMORY;
5304 poolFinish(&parser->m_tempPool);
5305 parser->m_doctypePubid = NULL;
5306 handleDefault = XML_FALSE;
5307 }
5308 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
5309 break;
5310 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
5311 if (parser->m_startDoctypeDeclHandler) {
5312 parser->m_startDoctypeDeclHandler(
5313 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5314 parser->m_doctypePubid, 1);
5315 parser->m_doctypeName = NULL;
5316 poolClear(&parser->m_tempPool);
5317 handleDefault = XML_FALSE;
5318 }
5319 break;
5320 #ifdef XML_DTD
5321 case XML_ROLE_TEXT_DECL: {
5322 enum XML_Error result = processXmlDecl(parser, 1, s, next);
5323 if (result != XML_ERROR_NONE)
5324 return result;
5325 enc = parser->m_encoding;
5326 handleDefault = XML_FALSE;
5327 } break;
5328 #endif /* XML_DTD */
5329 case XML_ROLE_DOCTYPE_PUBLIC_ID:
5330 #ifdef XML_DTD
5331 parser->m_useForeignDTD = XML_FALSE;
5332 parser->m_declEntity = (ENTITY *)lookup(
5333 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5334 if (! parser->m_declEntity)
5335 return XML_ERROR_NO_MEMORY;
5336 #endif /* XML_DTD */
5337 dtd->hasParamEntityRefs = XML_TRUE;
5338 if (parser->m_startDoctypeDeclHandler) {
5339 XML_Char *pubId;
5340 if (! XmlIsPublicId(enc, s, next, eventPP))
5341 return XML_ERROR_PUBLICID;
5342 pubId = poolStoreString(&parser->m_tempPool, enc,
5343 s + enc->minBytesPerChar,
5344 next - enc->minBytesPerChar);
5345 if (! pubId)
5346 return XML_ERROR_NO_MEMORY;
5347 normalizePublicId(pubId);
5348 poolFinish(&parser->m_tempPool);
5349 parser->m_doctypePubid = pubId;
5350 handleDefault = XML_FALSE;
5351 goto alreadyChecked;
5352 }
5353 /* fall through */
5354 case XML_ROLE_ENTITY_PUBLIC_ID:
5355 if (! XmlIsPublicId(enc, s, next, eventPP))
5356 return XML_ERROR_PUBLICID;
5357 alreadyChecked:
5358 if (dtd->keepProcessing && parser->m_declEntity) {
5359 XML_Char *tem
5360 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5361 next - enc->minBytesPerChar);
5362 if (! tem)
5363 return XML_ERROR_NO_MEMORY;
5364 normalizePublicId(tem);
5365 parser->m_declEntity->publicId = tem;
5366 poolFinish(&dtd->pool);
5367 /* Don't suppress the default handler if we fell through from
5368 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5369 */
5370 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5371 handleDefault = XML_FALSE;
5372 }
5373 break;
5374 case XML_ROLE_DOCTYPE_CLOSE:
5375 if (allowClosingDoctype != XML_TRUE) {
5376 /* Must not close doctype from within expanded parameter entities */
5377 return XML_ERROR_INVALID_TOKEN;
5378 }
5379
5380 if (parser->m_doctypeName) {
5381 parser->m_startDoctypeDeclHandler(
5382 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5383 parser->m_doctypePubid, 0);
5384 poolClear(&parser->m_tempPool);
5385 handleDefault = XML_FALSE;
5386 }
5387 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
5388 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5389 was not set, indicating an external subset
5390 */
5391 #ifdef XML_DTD
5392 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5393 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5394 dtd->hasParamEntityRefs = XML_TRUE;
5395 if (parser->m_paramEntityParsing
5396 && parser->m_externalEntityRefHandler) {
5397 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5398 externalSubsetName, sizeof(ENTITY));
5399 if (! entity) {
5400 /* The external subset name "#" will have already been
5401 * inserted into the hash table at the start of the
5402 * external entity parsing, so no allocation will happen
5403 * and lookup() cannot fail.
5404 */
5405 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5406 }
5407 if (parser->m_useForeignDTD)
5408 entity->base = parser->m_curBase;
5409 dtd->paramEntityRead = XML_FALSE;
5410 if (! parser->m_externalEntityRefHandler(
5411 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5412 entity->systemId, entity->publicId))
5413 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5414 if (dtd->paramEntityRead) {
5415 if (! dtd->standalone && parser->m_notStandaloneHandler
5416 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5417 return XML_ERROR_NOT_STANDALONE;
5418 }
5419 /* if we didn't read the foreign DTD then this means that there
5420 is no external subset and we must reset dtd->hasParamEntityRefs
5421 */
5422 else if (! parser->m_doctypeSysid)
5423 dtd->hasParamEntityRefs = hadParamEntityRefs;
5424 /* end of DTD - no need to update dtd->keepProcessing */
5425 }
5426 parser->m_useForeignDTD = XML_FALSE;
5427 }
5428 #endif /* XML_DTD */
5429 if (parser->m_endDoctypeDeclHandler) {
5430 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5431 handleDefault = XML_FALSE;
5432 }
5433 break;
5434 case XML_ROLE_INSTANCE_START:
5435 #ifdef XML_DTD
5436 /* if there is no DOCTYPE declaration then now is the
5437 last chance to read the foreign DTD
5438 */
5439 if (parser->m_useForeignDTD) {
5440 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5441 dtd->hasParamEntityRefs = XML_TRUE;
5442 if (parser->m_paramEntityParsing
5443 && parser->m_externalEntityRefHandler) {
5444 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5445 externalSubsetName, sizeof(ENTITY));
5446 if (! entity)
5447 return XML_ERROR_NO_MEMORY;
5448 entity->base = parser->m_curBase;
5449 dtd->paramEntityRead = XML_FALSE;
5450 if (! parser->m_externalEntityRefHandler(
5451 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5452 entity->systemId, entity->publicId))
5453 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5454 if (dtd->paramEntityRead) {
5455 if (! dtd->standalone && parser->m_notStandaloneHandler
5456 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5457 return XML_ERROR_NOT_STANDALONE;
5458 }
5459 /* if we didn't read the foreign DTD then this means that there
5460 is no external subset and we must reset dtd->hasParamEntityRefs
5461 */
5462 else
5463 dtd->hasParamEntityRefs = hadParamEntityRefs;
5464 /* end of DTD - no need to update dtd->keepProcessing */
5465 }
5466 }
5467 #endif /* XML_DTD */
5468 parser->m_processor = contentProcessor;
5469 return contentProcessor(parser, s, end, nextPtr);
5470 case XML_ROLE_ATTLIST_ELEMENT_NAME:
5471 parser->m_declElementType = getElementType(parser, enc, s, next);
5472 if (! parser->m_declElementType)
5473 return XML_ERROR_NO_MEMORY;
5474 goto checkAttListDeclHandler;
5475 case XML_ROLE_ATTRIBUTE_NAME:
5476 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5477 if (! parser->m_declAttributeId)
5478 return XML_ERROR_NO_MEMORY;
5479 parser->m_declAttributeIsCdata = XML_FALSE;
5480 parser->m_declAttributeType = NULL;
5481 parser->m_declAttributeIsId = XML_FALSE;
5482 goto checkAttListDeclHandler;
5483 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5484 parser->m_declAttributeIsCdata = XML_TRUE;
5485 parser->m_declAttributeType = atypeCDATA;
5486 goto checkAttListDeclHandler;
5487 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5488 parser->m_declAttributeIsId = XML_TRUE;
5489 parser->m_declAttributeType = atypeID;
5490 goto checkAttListDeclHandler;
5491 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5492 parser->m_declAttributeType = atypeIDREF;
5493 goto checkAttListDeclHandler;
5494 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5495 parser->m_declAttributeType = atypeIDREFS;
5496 goto checkAttListDeclHandler;
5497 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5498 parser->m_declAttributeType = atypeENTITY;
5499 goto checkAttListDeclHandler;
5500 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5501 parser->m_declAttributeType = atypeENTITIES;
5502 goto checkAttListDeclHandler;
5503 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5504 parser->m_declAttributeType = atypeNMTOKEN;
5505 goto checkAttListDeclHandler;
5506 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5507 parser->m_declAttributeType = atypeNMTOKENS;
5508 checkAttListDeclHandler:
5509 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5510 handleDefault = XML_FALSE;
5511 break;
5512 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5513 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5514 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5515 const XML_Char *prefix;
5516 if (parser->m_declAttributeType) {
5517 prefix = enumValueSep;
5518 } else {
5519 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5520 : enumValueStart);
5521 }
5522 if (! poolAppendString(&parser->m_tempPool, prefix))
5523 return XML_ERROR_NO_MEMORY;
5524 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5525 return XML_ERROR_NO_MEMORY;
5526 parser->m_declAttributeType = parser->m_tempPool.start;
5527 handleDefault = XML_FALSE;
5528 }
5529 break;
5530 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5531 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5532 if (dtd->keepProcessing) {
5533 if (! defineAttribute(parser->m_declElementType,
5534 parser->m_declAttributeId,
5535 parser->m_declAttributeIsCdata,
5536 parser->m_declAttributeIsId, 0, parser))
5537 return XML_ERROR_NO_MEMORY;
5538 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5539 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5540 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5541 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5542 /* Enumerated or Notation type */
5543 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5544 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5545 return XML_ERROR_NO_MEMORY;
5546 parser->m_declAttributeType = parser->m_tempPool.start;
5547 poolFinish(&parser->m_tempPool);
5548 }
5549 *eventEndPP = s;
5550 parser->m_attlistDeclHandler(
5551 parser->m_handlerArg, parser->m_declElementType->name,
5552 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5553 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5554 handleDefault = XML_FALSE;
5555 }
5556 }
5557 poolClear(&parser->m_tempPool);
5558 break;
5559 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5560 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5561 if (dtd->keepProcessing) {
5562 const XML_Char *attVal;
5563 enum XML_Error result = storeAttributeValue(
5564 parser, enc, parser->m_declAttributeIsCdata,
5565 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5566 XML_ACCOUNT_NONE);
5567 if (result)
5568 return result;
5569 attVal = poolStart(&dtd->pool);
5570 poolFinish(&dtd->pool);
5571 /* ID attributes aren't allowed to have a default */
5572 if (! defineAttribute(
5573 parser->m_declElementType, parser->m_declAttributeId,
5574 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5575 return XML_ERROR_NO_MEMORY;
5576 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5577 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5578 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5579 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5580 /* Enumerated or Notation type */
5581 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5582 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5583 return XML_ERROR_NO_MEMORY;
5584 parser->m_declAttributeType = parser->m_tempPool.start;
5585 poolFinish(&parser->m_tempPool);
5586 }
5587 *eventEndPP = s;
5588 parser->m_attlistDeclHandler(
5589 parser->m_handlerArg, parser->m_declElementType->name,
5590 parser->m_declAttributeId->name, parser->m_declAttributeType,
5591 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5592 poolClear(&parser->m_tempPool);
5593 handleDefault = XML_FALSE;
5594 }
5595 }
5596 break;
5597 case XML_ROLE_ENTITY_VALUE:
5598 if (dtd->keepProcessing) {
5599 #if XML_GE == 1
5600 // This will store the given replacement text in
5601 // parser->m_declEntity->textPtr.
5602 enum XML_Error result = callStoreEntityValue(
5603 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5604 XML_ACCOUNT_NONE);
5605 if (parser->m_declEntity) {
5606 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5607 parser->m_declEntity->textLen
5608 = (int)(poolLength(&dtd->entityValuePool));
5609 poolFinish(&dtd->entityValuePool);
5610 if (parser->m_entityDeclHandler) {
5611 *eventEndPP = s;
5612 parser->m_entityDeclHandler(
5613 parser->m_handlerArg, parser->m_declEntity->name,
5614 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5615 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5616 handleDefault = XML_FALSE;
5617 }
5618 } else
5619 poolDiscard(&dtd->entityValuePool);
5620 if (result != XML_ERROR_NONE)
5621 return result;
5622 #else
5623 // This will store "&entity123;" in parser->m_declEntity->textPtr
5624 // to end up as "&entity123;" in the handler.
5625 if (parser->m_declEntity != NULL) {
5626 const enum XML_Error result
5627 = storeSelfEntityValue(parser, parser->m_declEntity);
5628 if (result != XML_ERROR_NONE)
5629 return result;
5630
5631 if (parser->m_entityDeclHandler) {
5632 *eventEndPP = s;
5633 parser->m_entityDeclHandler(
5634 parser->m_handlerArg, parser->m_declEntity->name,
5635 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5636 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5637 handleDefault = XML_FALSE;
5638 }
5639 }
5640 #endif
5641 }
5642 break;
5643 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5644 #ifdef XML_DTD
5645 parser->m_useForeignDTD = XML_FALSE;
5646 #endif /* XML_DTD */
5647 dtd->hasParamEntityRefs = XML_TRUE;
5648 if (parser->m_startDoctypeDeclHandler) {
5649 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5650 s + enc->minBytesPerChar,
5651 next - enc->minBytesPerChar);
5652 if (parser->m_doctypeSysid == NULL)
5653 return XML_ERROR_NO_MEMORY;
5654 poolFinish(&parser->m_tempPool);
5655 handleDefault = XML_FALSE;
5656 }
5657 #ifdef XML_DTD
5658 else
5659 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5660 for the case where no parser->m_startDoctypeDeclHandler is set */
5661 parser->m_doctypeSysid = externalSubsetName;
5662 #endif /* XML_DTD */
5663 if (! dtd->standalone
5664 #ifdef XML_DTD
5665 && ! parser->m_paramEntityParsing
5666 #endif /* XML_DTD */
5667 && parser->m_notStandaloneHandler
5668 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5669 return XML_ERROR_NOT_STANDALONE;
5670 #ifndef XML_DTD
5671 break;
5672 #else /* XML_DTD */
5673 if (! parser->m_declEntity) {
5674 parser->m_declEntity = (ENTITY *)lookup(
5675 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5676 if (! parser->m_declEntity)
5677 return XML_ERROR_NO_MEMORY;
5678 parser->m_declEntity->publicId = NULL;
5679 }
5680 #endif /* XML_DTD */
5681 /* fall through */
5682 case XML_ROLE_ENTITY_SYSTEM_ID:
5683 if (dtd->keepProcessing && parser->m_declEntity) {
5684 parser->m_declEntity->systemId
5685 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5686 next - enc->minBytesPerChar);
5687 if (! parser->m_declEntity->systemId)
5688 return XML_ERROR_NO_MEMORY;
5689 parser->m_declEntity->base = parser->m_curBase;
5690 poolFinish(&dtd->pool);
5691 /* Don't suppress the default handler if we fell through from
5692 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5693 */
5694 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5695 handleDefault = XML_FALSE;
5696 }
5697 break;
5698 case XML_ROLE_ENTITY_COMPLETE:
5699 #if XML_GE == 0
5700 // This will store "&entity123;" in entity->textPtr
5701 // to end up as "&entity123;" in the handler.
5702 if (parser->m_declEntity != NULL) {
5703 const enum XML_Error result
5704 = storeSelfEntityValue(parser, parser->m_declEntity);
5705 if (result != XML_ERROR_NONE)
5706 return result;
5707 }
5708 #endif
5709 if (dtd->keepProcessing && parser->m_declEntity
5710 && parser->m_entityDeclHandler) {
5711 *eventEndPP = s;
5712 parser->m_entityDeclHandler(
5713 parser->m_handlerArg, parser->m_declEntity->name,
5714 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5715 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5716 handleDefault = XML_FALSE;
5717 }
5718 break;
5719 case XML_ROLE_ENTITY_NOTATION_NAME:
5720 if (dtd->keepProcessing && parser->m_declEntity) {
5721 parser->m_declEntity->notation
5722 = poolStoreString(&dtd->pool, enc, s, next);
5723 if (! parser->m_declEntity->notation)
5724 return XML_ERROR_NO_MEMORY;
5725 poolFinish(&dtd->pool);
5726 if (parser->m_unparsedEntityDeclHandler) {
5727 *eventEndPP = s;
5728 parser->m_unparsedEntityDeclHandler(
5729 parser->m_handlerArg, parser->m_declEntity->name,
5730 parser->m_declEntity->base, parser->m_declEntity->systemId,
5731 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5732 handleDefault = XML_FALSE;
5733 } else if (parser->m_entityDeclHandler) {
5734 *eventEndPP = s;
5735 parser->m_entityDeclHandler(
5736 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5737 parser->m_declEntity->base, parser->m_declEntity->systemId,
5738 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5739 handleDefault = XML_FALSE;
5740 }
5741 }
5742 break;
5743 case XML_ROLE_GENERAL_ENTITY_NAME: {
5744 if (XmlPredefinedEntityName(enc, s, next)) {
5745 parser->m_declEntity = NULL;
5746 break;
5747 }
5748 if (dtd->keepProcessing) {
5749 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5750 if (! name)
5751 return XML_ERROR_NO_MEMORY;
5752 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5753 name, sizeof(ENTITY));
5754 if (! parser->m_declEntity)
5755 return XML_ERROR_NO_MEMORY;
5756 if (parser->m_declEntity->name != name) {
5757 poolDiscard(&dtd->pool);
5758 parser->m_declEntity = NULL;
5759 } else {
5760 poolFinish(&dtd->pool);
5761 parser->m_declEntity->publicId = NULL;
5762 parser->m_declEntity->is_param = XML_FALSE;
5763 /* if we have a parent parser or are reading an internal parameter
5764 entity, then the entity declaration is not considered "internal"
5765 */
5766 parser->m_declEntity->is_internal
5767 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5768 if (parser->m_entityDeclHandler)
5769 handleDefault = XML_FALSE;
5770 }
5771 } else {
5772 poolDiscard(&dtd->pool);
5773 parser->m_declEntity = NULL;
5774 }
5775 } break;
5776 case XML_ROLE_PARAM_ENTITY_NAME:
5777 #ifdef XML_DTD
5778 if (dtd->keepProcessing) {
5779 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5780 if (! name)
5781 return XML_ERROR_NO_MEMORY;
5782 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5783 name, sizeof(ENTITY));
5784 if (! parser->m_declEntity)
5785 return XML_ERROR_NO_MEMORY;
5786 if (parser->m_declEntity->name != name) {
5787 poolDiscard(&dtd->pool);
5788 parser->m_declEntity = NULL;
5789 } else {
5790 poolFinish(&dtd->pool);
5791 parser->m_declEntity->publicId = NULL;
5792 parser->m_declEntity->is_param = XML_TRUE;
5793 /* if we have a parent parser or are reading an internal parameter
5794 entity, then the entity declaration is not considered "internal"
5795 */
5796 parser->m_declEntity->is_internal
5797 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5798 if (parser->m_entityDeclHandler)
5799 handleDefault = XML_FALSE;
5800 }
5801 } else {
5802 poolDiscard(&dtd->pool);
5803 parser->m_declEntity = NULL;
5804 }
5805 #else /* not XML_DTD */
5806 parser->m_declEntity = NULL;
5807 #endif /* XML_DTD */
5808 break;
5809 case XML_ROLE_NOTATION_NAME:
5810 parser->m_declNotationPublicId = NULL;
5811 parser->m_declNotationName = NULL;
5812 if (parser->m_notationDeclHandler) {
5813 parser->m_declNotationName
5814 = poolStoreString(&parser->m_tempPool, enc, s, next);
5815 if (! parser->m_declNotationName)
5816 return XML_ERROR_NO_MEMORY;
5817 poolFinish(&parser->m_tempPool);
5818 handleDefault = XML_FALSE;
5819 }
5820 break;
5821 case XML_ROLE_NOTATION_PUBLIC_ID:
5822 if (! XmlIsPublicId(enc, s, next, eventPP))
5823 return XML_ERROR_PUBLICID;
5824 if (parser
5825 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5826 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5827 s + enc->minBytesPerChar,
5828 next - enc->minBytesPerChar);
5829 if (! tem)
5830 return XML_ERROR_NO_MEMORY;
5831 normalizePublicId(tem);
5832 parser->m_declNotationPublicId = tem;
5833 poolFinish(&parser->m_tempPool);
5834 handleDefault = XML_FALSE;
5835 }
5836 break;
5837 case XML_ROLE_NOTATION_SYSTEM_ID:
5838 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5839 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5840 s + enc->minBytesPerChar,
5841 next - enc->minBytesPerChar);
5842 if (! systemId)
5843 return XML_ERROR_NO_MEMORY;
5844 *eventEndPP = s;
5845 parser->m_notationDeclHandler(
5846 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5847 systemId, parser->m_declNotationPublicId);
5848 handleDefault = XML_FALSE;
5849 }
5850 poolClear(&parser->m_tempPool);
5851 break;
5852 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5853 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5854 *eventEndPP = s;
5855 parser->m_notationDeclHandler(
5856 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5857 0, parser->m_declNotationPublicId);
5858 handleDefault = XML_FALSE;
5859 }
5860 poolClear(&parser->m_tempPool);
5861 break;
5862 case XML_ROLE_ERROR:
5863 switch (tok) {
5864 case XML_TOK_PARAM_ENTITY_REF:
5865 /* PE references in internal subset are
5866 not allowed within declarations. */
5867 return XML_ERROR_PARAM_ENTITY_REF;
5868 case XML_TOK_XML_DECL:
5869 return XML_ERROR_MISPLACED_XML_PI;
5870 default:
5871 return XML_ERROR_SYNTAX;
5872 }
5873 #ifdef XML_DTD
5874 case XML_ROLE_IGNORE_SECT: {
5875 enum XML_Error result;
5876 if (parser->m_defaultHandler)
5877 reportDefault(parser, enc, s, next);
5878 handleDefault = XML_FALSE;
5879 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5880 if (result != XML_ERROR_NONE)
5881 return result;
5882 else if (! next) {
5883 parser->m_processor = ignoreSectionProcessor;
5884 return result;
5885 }
5886 } break;
5887 #endif /* XML_DTD */
5888 case XML_ROLE_GROUP_OPEN:
5889 if (parser->m_prologState.level >= parser->m_groupSize) {
5890 if (parser->m_groupSize) {
5891 {
5892 /* Detect and prevent integer overflow */
5893 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5894 return XML_ERROR_NO_MEMORY;
5895 }
5896
5897 char *const new_connector = REALLOC(
5898 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5899 if (new_connector == NULL) {
5900 parser->m_groupSize /= 2;
5901 return XML_ERROR_NO_MEMORY;
5902 }
5903 parser->m_groupConnector = new_connector;
5904 }
5905
5906 if (dtd->scaffIndex) {
5907 /* Detect and prevent integer overflow.
5908 * The preprocessor guard addresses the "always false" warning
5909 * from -Wtype-limits on platforms where
5910 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5911 #if UINT_MAX >= SIZE_MAX
5912 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5913 return XML_ERROR_NO_MEMORY;
5914 }
5915 #endif
5916
5917 int *const new_scaff_index = REALLOC(
5918 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5919 if (new_scaff_index == NULL)
5920 return XML_ERROR_NO_MEMORY;
5921 dtd->scaffIndex = new_scaff_index;
5922 }
5923 } else {
5924 parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32);
5925 if (! parser->m_groupConnector) {
5926 parser->m_groupSize = 0;
5927 return XML_ERROR_NO_MEMORY;
5928 }
5929 }
5930 }
5931 parser->m_groupConnector[parser->m_prologState.level] = 0;
5932 if (dtd->in_eldecl) {
5933 int myindex = nextScaffoldPart(parser);
5934 if (myindex < 0)
5935 return XML_ERROR_NO_MEMORY;
5936 assert(dtd->scaffIndex != NULL);
5937 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5938 dtd->scaffLevel++;
5939 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5940 if (parser->m_elementDeclHandler)
5941 handleDefault = XML_FALSE;
5942 }
5943 break;
5944 case XML_ROLE_GROUP_SEQUENCE:
5945 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5946 return XML_ERROR_SYNTAX;
5947 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5948 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5949 handleDefault = XML_FALSE;
5950 break;
5951 case XML_ROLE_GROUP_CHOICE:
5952 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5953 return XML_ERROR_SYNTAX;
5954 if (dtd->in_eldecl
5955 && ! parser->m_groupConnector[parser->m_prologState.level]
5956 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5957 != XML_CTYPE_MIXED)) {
5958 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5959 = XML_CTYPE_CHOICE;
5960 if (parser->m_elementDeclHandler)
5961 handleDefault = XML_FALSE;
5962 }
5963 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5964 break;
5965 case XML_ROLE_PARAM_ENTITY_REF:
5966 #ifdef XML_DTD
5967 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5968 dtd->hasParamEntityRefs = XML_TRUE;
5969 if (! parser->m_paramEntityParsing)
5970 dtd->keepProcessing = dtd->standalone;
5971 else {
5972 const XML_Char *name;
5973 ENTITY *entity;
5974 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5975 next - enc->minBytesPerChar);
5976 if (! name)
5977 return XML_ERROR_NO_MEMORY;
5978 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5979 poolDiscard(&dtd->pool);
5980 /* first, determine if a check for an existing declaration is needed;
5981 if yes, check that the entity exists, and that it is internal,
5982 otherwise call the skipped entity handler
5983 */
5984 if (parser->m_prologState.documentEntity
5985 && (dtd->standalone ? ! parser->m_openInternalEntities
5986 : ! dtd->hasParamEntityRefs)) {
5987 if (! entity)
5988 return XML_ERROR_UNDEFINED_ENTITY;
5989 else if (! entity->is_internal) {
5990 /* It's hard to exhaustively search the code to be sure,
5991 * but there doesn't seem to be a way of executing the
5992 * following line. There are two cases:
5993 *
5994 * If 'standalone' is false, the DTD must have no
5995 * parameter entities or we wouldn't have passed the outer
5996 * 'if' statement. That means the only entity in the hash
5997 * table is the external subset name "#" which cannot be
5998 * given as a parameter entity name in XML syntax, so the
5999 * lookup must have returned NULL and we don't even reach
6000 * the test for an internal entity.
6001 *
6002 * If 'standalone' is true, it does not seem to be
6003 * possible to create entities taking this code path that
6004 * are not internal entities, so fail the test above.
6005 *
6006 * Because this analysis is very uncertain, the code is
6007 * being left in place and merely removed from the
6008 * coverage test statistics.
6009 */
6010 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
6011 }
6012 } else if (! entity) {
6013 dtd->keepProcessing = dtd->standalone;
6014 /* cannot report skipped entities in declarations */
6015 if ((role == XML_ROLE_PARAM_ENTITY_REF)
6016 && parser->m_skippedEntityHandler) {
6017 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
6018 handleDefault = XML_FALSE;
6019 }
6020 break;
6021 }
6022 if (entity->open)
6023 return XML_ERROR_RECURSIVE_ENTITY_REF;
6024 if (entity->textPtr) {
6025 enum XML_Error result;
6026 XML_Bool betweenDecl
6027 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
6028 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
6029 if (result != XML_ERROR_NONE)
6030 return result;
6031 handleDefault = XML_FALSE;
6032 break;
6033 }
6034 if (parser->m_externalEntityRefHandler) {
6035 dtd->paramEntityRead = XML_FALSE;
6036 entity->open = XML_TRUE;
6037 entityTrackingOnOpen(parser, entity, __LINE__);
6038 if (! parser->m_externalEntityRefHandler(
6039 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6040 entity->systemId, entity->publicId)) {
6041 entityTrackingOnClose(parser, entity, __LINE__);
6042 entity->open = XML_FALSE;
6043 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6044 }
6045 entityTrackingOnClose(parser, entity, __LINE__);
6046 entity->open = XML_FALSE;
6047 handleDefault = XML_FALSE;
6048 if (! dtd->paramEntityRead) {
6049 dtd->keepProcessing = dtd->standalone;
6050 break;
6051 }
6052 } else {
6053 dtd->keepProcessing = dtd->standalone;
6054 break;
6055 }
6056 }
6057 #endif /* XML_DTD */
6058 if (! dtd->standalone && parser->m_notStandaloneHandler
6059 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
6060 return XML_ERROR_NOT_STANDALONE;
6061 break;
6062
6063 /* Element declaration stuff */
6064
6065 case XML_ROLE_ELEMENT_NAME:
6066 if (parser->m_elementDeclHandler) {
6067 parser->m_declElementType = getElementType(parser, enc, s, next);
6068 if (! parser->m_declElementType)
6069 return XML_ERROR_NO_MEMORY;
6070 dtd->scaffLevel = 0;
6071 dtd->scaffCount = 0;
6072 dtd->in_eldecl = XML_TRUE;
6073 handleDefault = XML_FALSE;
6074 }
6075 break;
6076
6077 case XML_ROLE_CONTENT_ANY:
6078 case XML_ROLE_CONTENT_EMPTY:
6079 if (dtd->in_eldecl) {
6080 if (parser->m_elementDeclHandler) {
6081 // NOTE: We are avoiding MALLOC(..) here to so that
6082 // applications that are not using XML_FreeContentModel but
6083 // plain free(..) or .free_fcn() to free the content model's
6084 // memory are safe.
6085 XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content));
6086 if (! content)
6087 return XML_ERROR_NO_MEMORY;
6088 content->quant = XML_CQUANT_NONE;
6089 content->name = NULL;
6090 content->numchildren = 0;
6091 content->children = NULL;
6092 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
6093 : XML_CTYPE_EMPTY);
6094 *eventEndPP = s;
6095 parser->m_elementDeclHandler(
6096 parser->m_handlerArg, parser->m_declElementType->name, content);
6097 handleDefault = XML_FALSE;
6098 }
6099 dtd->in_eldecl = XML_FALSE;
6100 }
6101 break;
6102
6103 case XML_ROLE_CONTENT_PCDATA:
6104 if (dtd->in_eldecl) {
6105 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
6106 = XML_CTYPE_MIXED;
6107 if (parser->m_elementDeclHandler)
6108 handleDefault = XML_FALSE;
6109 }
6110 break;
6111
6112 case XML_ROLE_CONTENT_ELEMENT:
6113 quant = XML_CQUANT_NONE;
6114 goto elementContent;
6115 case XML_ROLE_CONTENT_ELEMENT_OPT:
6116 quant = XML_CQUANT_OPT;
6117 goto elementContent;
6118 case XML_ROLE_CONTENT_ELEMENT_REP:
6119 quant = XML_CQUANT_REP;
6120 goto elementContent;
6121 case XML_ROLE_CONTENT_ELEMENT_PLUS:
6122 quant = XML_CQUANT_PLUS;
6123 elementContent:
6124 if (dtd->in_eldecl) {
6125 ELEMENT_TYPE *el;
6126 const XML_Char *name;
6127 size_t nameLen;
6128 const char *nxt
6129 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
6130 int myindex = nextScaffoldPart(parser);
6131 if (myindex < 0)
6132 return XML_ERROR_NO_MEMORY;
6133 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
6134 dtd->scaffold[myindex].quant = quant;
6135 el = getElementType(parser, enc, s, nxt);
6136 if (! el)
6137 return XML_ERROR_NO_MEMORY;
6138 name = el->name;
6139 dtd->scaffold[myindex].name = name;
6140 nameLen = 0;
6141 while (name[nameLen++])
6142 ;
6143
6144 /* Detect and prevent integer overflow */
6145 if (nameLen > UINT_MAX - dtd->contentStringLen) {
6146 return XML_ERROR_NO_MEMORY;
6147 }
6148
6149 dtd->contentStringLen += (unsigned)nameLen;
6150 if (parser->m_elementDeclHandler)
6151 handleDefault = XML_FALSE;
6152 }
6153 break;
6154
6155 case XML_ROLE_GROUP_CLOSE:
6156 quant = XML_CQUANT_NONE;
6157 goto closeGroup;
6158 case XML_ROLE_GROUP_CLOSE_OPT:
6159 quant = XML_CQUANT_OPT;
6160 goto closeGroup;
6161 case XML_ROLE_GROUP_CLOSE_REP:
6162 quant = XML_CQUANT_REP;
6163 goto closeGroup;
6164 case XML_ROLE_GROUP_CLOSE_PLUS:
6165 quant = XML_CQUANT_PLUS;
6166 closeGroup:
6167 if (dtd->in_eldecl) {
6168 if (parser->m_elementDeclHandler)
6169 handleDefault = XML_FALSE;
6170 dtd->scaffLevel--;
6171 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
6172 if (dtd->scaffLevel == 0) {
6173 if (! handleDefault) {
6174 XML_Content *model = build_model(parser);
6175 if (! model)
6176 return XML_ERROR_NO_MEMORY;
6177 *eventEndPP = s;
6178 parser->m_elementDeclHandler(
6179 parser->m_handlerArg, parser->m_declElementType->name, model);
6180 }
6181 dtd->in_eldecl = XML_FALSE;
6182 dtd->contentStringLen = 0;
6183 }
6184 }
6185 break;
6186 /* End element declaration stuff */
6187
6188 case XML_ROLE_PI:
6189 if (! reportProcessingInstruction(parser, enc, s, next))
6190 return XML_ERROR_NO_MEMORY;
6191 handleDefault = XML_FALSE;
6192 break;
6193 case XML_ROLE_COMMENT:
6194 if (! reportComment(parser, enc, s, next))
6195 return XML_ERROR_NO_MEMORY;
6196 handleDefault = XML_FALSE;
6197 break;
6198 case XML_ROLE_NONE:
6199 switch (tok) {
6200 case XML_TOK_BOM:
6201 handleDefault = XML_FALSE;
6202 break;
6203 }
6204 break;
6205 case XML_ROLE_DOCTYPE_NONE:
6206 if (parser->m_startDoctypeDeclHandler)
6207 handleDefault = XML_FALSE;
6208 break;
6209 case XML_ROLE_ENTITY_NONE:
6210 if (dtd->keepProcessing && parser->m_entityDeclHandler)
6211 handleDefault = XML_FALSE;
6212 break;
6213 case XML_ROLE_NOTATION_NONE:
6214 if (parser->m_notationDeclHandler)
6215 handleDefault = XML_FALSE;
6216 break;
6217 case XML_ROLE_ATTLIST_NONE:
6218 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
6219 handleDefault = XML_FALSE;
6220 break;
6221 case XML_ROLE_ELEMENT_NONE:
6222 if (parser->m_elementDeclHandler)
6223 handleDefault = XML_FALSE;
6224 break;
6225 } /* end of big switch */
6226
6227 if (handleDefault && parser->m_defaultHandler)
6228 reportDefault(parser, enc, s, next);
6229
6230 switch (parser->m_parsingStatus.parsing) {
6231 case XML_SUSPENDED:
6232 *nextPtr = next;
6233 return XML_ERROR_NONE;
6234 case XML_FINISHED:
6235 return XML_ERROR_ABORTED;
6236 case XML_PARSING:
6237 if (parser->m_reenter) {
6238 *nextPtr = next;
6239 return XML_ERROR_NONE;
6240 }
6241 /* Fall through */
6242 default:
6243 s = next;
6244 tok = XmlPrologTok(enc, s, end, &next);
6245 }
6246 }
6247 /* not reached */
6248 }
6249
6250 static enum XML_Error PTRCALL
6251 epilogProcessor(XML_Parser parser, const char *s, const char *end,
6252 const char **nextPtr) {
6253 parser->m_processor = epilogProcessor;
6254 parser->m_eventPtr = s;
6255 for (;;) {
6256 const char *next = NULL;
6257 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
6258 #if XML_GE == 1
6259 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
6260 XML_ACCOUNT_DIRECT)) {
6261 accountingOnAbort(parser);
6262 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6263 }
6264 #endif
6265 parser->m_eventEndPtr = next;
6266 switch (tok) {
6267 /* report partial linebreak - it might be the last token */
6268 case -XML_TOK_PROLOG_S:
6269 if (parser->m_defaultHandler) {
6270 reportDefault(parser, parser->m_encoding, s, next);
6271 if (parser->m_parsingStatus.parsing == XML_FINISHED)
6272 return XML_ERROR_ABORTED;
6273 }
6274 *nextPtr = next;
6275 return XML_ERROR_NONE;
6276 case XML_TOK_NONE:
6277 *nextPtr = s;
6278 return XML_ERROR_NONE;
6279 case XML_TOK_PROLOG_S:
6280 if (parser->m_defaultHandler)
6281 reportDefault(parser, parser->m_encoding, s, next);
6282 break;
6283 case XML_TOK_PI:
6284 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
6285 return XML_ERROR_NO_MEMORY;
6286 break;
6287 case XML_TOK_COMMENT:
6288 if (! reportComment(parser, parser->m_encoding, s, next))
6289 return XML_ERROR_NO_MEMORY;
6290 break;
6291 case XML_TOK_INVALID:
6292 parser->m_eventPtr = next;
6293 return XML_ERROR_INVALID_TOKEN;
6294 case XML_TOK_PARTIAL:
6295 if (! parser->m_parsingStatus.finalBuffer) {
6296 *nextPtr = s;
6297 return XML_ERROR_NONE;
6298 }
6299 return XML_ERROR_UNCLOSED_TOKEN;
6300 case XML_TOK_PARTIAL_CHAR:
6301 if (! parser->m_parsingStatus.finalBuffer) {
6302 *nextPtr = s;
6303 return XML_ERROR_NONE;
6304 }
6305 return XML_ERROR_PARTIAL_CHAR;
6306 default:
6307 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
6308 }
6309 switch (parser->m_parsingStatus.parsing) {
6310 case XML_SUSPENDED:
6311 parser->m_eventPtr = next;
6312 *nextPtr = next;
6313 return XML_ERROR_NONE;
6314 case XML_FINISHED:
6315 parser->m_eventPtr = next;
6316 return XML_ERROR_ABORTED;
6317 case XML_PARSING:
6318 if (parser->m_reenter) {
6319 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
6320 }
6321 /* Fall through */
6322 default:;
6323 parser->m_eventPtr = s = next;
6324 }
6325 }
6326 }
6327
6328 static enum XML_Error
6329 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
6330 enum EntityType type) {
6331 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
6332 switch (type) {
6333 case ENTITY_INTERNAL:
6334 parser->m_processor = internalEntityProcessor;
6335 openEntityList = &parser->m_openInternalEntities;
6336 freeEntityList = &parser->m_freeInternalEntities;
6337 break;
6338 case ENTITY_ATTRIBUTE:
6339 openEntityList = &parser->m_openAttributeEntities;
6340 freeEntityList = &parser->m_freeAttributeEntities;
6341 break;
6342 case ENTITY_VALUE:
6343 openEntityList = &parser->m_openValueEntities;
6344 freeEntityList = &parser->m_freeValueEntities;
6345 break;
6346 /* default case serves merely as a safety net in case of a
6347 * wrong entityType. Therefore we exclude the following lines
6348 * from the test coverage.
6349 *
6350 * LCOV_EXCL_START
6351 */
6352 default:
6353 // Should not reach here
6354 assert(0);
6355 /* LCOV_EXCL_STOP */
6356 }
6357
6358 if (*freeEntityList) {
6359 openEntity = *freeEntityList;
6360 *freeEntityList = openEntity->next;
6361 } else {
6362 openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6363 if (! openEntity)
6364 return XML_ERROR_NO_MEMORY;
6365 }
6366 entity->open = XML_TRUE;
6367 entity->hasMore = XML_TRUE;
6368 #if XML_GE == 1
6369 entityTrackingOnOpen(parser, entity, __LINE__);
6370 #endif
6371 entity->processed = 0;
6372 openEntity->next = *openEntityList;
6373 *openEntityList = openEntity;
6374 openEntity->entity = entity;
6375 openEntity->type = type;
6376 openEntity->startTagLevel = parser->m_tagLevel;
6377 openEntity->betweenDecl = betweenDecl;
6378 openEntity->internalEventPtr = NULL;
6379 openEntity->internalEventEndPtr = NULL;
6380
6381 // Only internal entities make use of the reenter flag
6382 // therefore no need to set it for other entity types
6383 if (type == ENTITY_INTERNAL) {
6384 triggerReenter(parser);
6385 }
6386 return XML_ERROR_NONE;
6387 }
6388
6389 static enum XML_Error PTRCALL
6390 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6391 const char **nextPtr) {
6392 UNUSED_P(s);
6393 UNUSED_P(end);
6394 UNUSED_P(nextPtr);
6395 ENTITY *entity;
6396 const char *textStart, *textEnd;
6397 const char *next;
6398 enum XML_Error result;
6399 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6400 if (! openEntity)
6401 return XML_ERROR_UNEXPECTED_STATE;
6402
6403 entity = openEntity->entity;
6404
6405 // This will return early
6406 if (entity->hasMore) {
6407 textStart = ((const char *)entity->textPtr) + entity->processed;
6408 textEnd = (const char *)(entity->textPtr + entity->textLen);
6409 /* Set a safe default value in case 'next' does not get set */
6410 next = textStart;
6411
6412 if (entity->is_param) {
6413 int tok
6414 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6415 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6416 tok, next, &next, XML_FALSE, XML_FALSE,
6417 XML_ACCOUNT_ENTITY_EXPANSION);
6418 } else {
6419 result = doContent(parser, openEntity->startTagLevel,
6420 parser->m_internalEncoding, textStart, textEnd, &next,
6421 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6422 }
6423
6424 if (result != XML_ERROR_NONE)
6425 return result;
6426 // Check if entity is complete, if not, mark down how much of it is
6427 // processed
6428 if (textEnd != next
6429 && (parser->m_parsingStatus.parsing == XML_SUSPENDED
6430 || (parser->m_parsingStatus.parsing == XML_PARSING
6431 && parser->m_reenter))) {
6432 entity->processed = (int)(next - (const char *)entity->textPtr);
6433 return result;
6434 }
6435
6436 // Entity is complete. We cannot close it here since we need to first
6437 // process its possible inner entities (which are added to the
6438 // m_openInternalEntities during doProlog or doContent calls above)
6439 entity->hasMore = XML_FALSE;
6440 triggerReenter(parser);
6441 return result;
6442 } // End of entity processing, "if" block will return here
6443
6444 // Remove fully processed openEntity from open entity list.
6445 #if XML_GE == 1
6446 entityTrackingOnClose(parser, entity, __LINE__);
6447 #endif
6448 // openEntity is m_openInternalEntities' head, as we set it at the start of
6449 // this function and we skipped doProlog and doContent calls with hasMore set
6450 // to false. This means we can directly remove the head of
6451 // m_openInternalEntities
6452 assert(parser->m_openInternalEntities == openEntity);
6453 entity->open = XML_FALSE;
6454 parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6455
6456 /* put openEntity back in list of free instances */
6457 openEntity->next = parser->m_freeInternalEntities;
6458 parser->m_freeInternalEntities = openEntity;
6459
6460 if (parser->m_openInternalEntities == NULL) {
6461 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6462 }
6463 triggerReenter(parser);
6464 return XML_ERROR_NONE;
6465 }
6466
6467 static enum XML_Error PTRCALL
6468 errorProcessor(XML_Parser parser, const char *s, const char *end,
6469 const char **nextPtr) {
6470 UNUSED_P(s);
6471 UNUSED_P(end);
6472 UNUSED_P(nextPtr);
6473 return parser->m_errorCode;
6474 }
6475
6476 static enum XML_Error
6477 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6478 const char *ptr, const char *end, STRING_POOL *pool,
6479 enum XML_Account account) {
6480 const char *next = ptr;
6481 enum XML_Error result = XML_ERROR_NONE;
6482
6483 while (1) {
6484 if (! parser->m_openAttributeEntities) {
6485 result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6486 account, &next);
6487 } else {
6488 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6489 if (! openEntity)
6490 return XML_ERROR_UNEXPECTED_STATE;
6491
6492 ENTITY *const entity = openEntity->entity;
6493 const char *const textStart
6494 = ((const char *)entity->textPtr) + entity->processed;
6495 const char *const textEnd
6496 = (const char *)(entity->textPtr + entity->textLen);
6497 /* Set a safe default value in case 'next' does not get set */
6498 const char *nextInEntity = textStart;
6499 if (entity->hasMore) {
6500 result = appendAttributeValue(
6501 parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6502 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6503 if (result != XML_ERROR_NONE)
6504 break;
6505 // Check if entity is complete, if not, mark down how much of it is
6506 // processed. A XML_SUSPENDED check here is not required as
6507 // appendAttributeValue will never suspend the parser.
6508 if (textEnd != nextInEntity) {
6509 entity->processed
6510 = (int)(nextInEntity - (const char *)entity->textPtr);
6511 continue;
6512 }
6513
6514 // Entity is complete. We cannot close it here since we need to first
6515 // process its possible inner entities (which are added to the
6516 // m_openAttributeEntities during appendAttributeValue)
6517 entity->hasMore = XML_FALSE;
6518 continue;
6519 } // End of entity processing, "if" block skips the rest
6520
6521 // Remove fully processed openEntity from open entity list.
6522 #if XML_GE == 1
6523 entityTrackingOnClose(parser, entity, __LINE__);
6524 #endif
6525 // openEntity is m_openAttributeEntities' head, since we set it at the
6526 // start of this function and because we skipped appendAttributeValue call
6527 // with hasMore set to false. This means we can directly remove the head
6528 // of m_openAttributeEntities
6529 assert(parser->m_openAttributeEntities == openEntity);
6530 entity->open = XML_FALSE;
6531 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6532
6533 /* put openEntity back in list of free instances */
6534 openEntity->next = parser->m_freeAttributeEntities;
6535 parser->m_freeAttributeEntities = openEntity;
6536 }
6537
6538 // Break if an error occurred or there is nothing left to process
6539 if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6540 break;
6541 }
6542 }
6543
6544 if (result)
6545 return result;
6546 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6547 poolChop(pool);
6548 if (! poolAppendChar(pool, XML_T('\0')))
6549 return XML_ERROR_NO_MEMORY;
6550 return XML_ERROR_NONE;
6551 }
6552
6553 static enum XML_Error
6554 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6555 const char *ptr, const char *end, STRING_POOL *pool,
6556 enum XML_Account account, const char **nextPtr) {
6557 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6558 #ifndef XML_DTD
6559 UNUSED_P(account);
6560 #endif
6561
6562 for (;;) {
6563 const char *next
6564 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6565 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6566 #if XML_GE == 1
6567 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6568 accountingOnAbort(parser);
6569 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6570 }
6571 #endif
6572 switch (tok) {
6573 case XML_TOK_NONE:
6574 if (nextPtr) {
6575 *nextPtr = next;
6576 }
6577 return XML_ERROR_NONE;
6578 case XML_TOK_INVALID:
6579 if (enc == parser->m_encoding)
6580 parser->m_eventPtr = next;
6581 return XML_ERROR_INVALID_TOKEN;
6582 case XML_TOK_PARTIAL:
6583 if (enc == parser->m_encoding)
6584 parser->m_eventPtr = ptr;
6585 return XML_ERROR_INVALID_TOKEN;
6586 case XML_TOK_CHAR_REF: {
6587 XML_Char buf[XML_ENCODE_MAX];
6588 int i;
6589 int n = XmlCharRefNumber(enc, ptr);
6590 if (n < 0) {
6591 if (enc == parser->m_encoding)
6592 parser->m_eventPtr = ptr;
6593 return XML_ERROR_BAD_CHAR_REF;
6594 }
6595 if (! isCdata && n == 0x20 /* space */
6596 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6597 break;
6598 n = XmlEncode(n, (ICHAR *)buf);
6599 /* The XmlEncode() functions can never return 0 here. That
6600 * error return happens if the code point passed in is either
6601 * negative or greater than or equal to 0x110000. The
6602 * XmlCharRefNumber() functions will all return a number
6603 * strictly less than 0x110000 or a negative value if an error
6604 * occurred. The negative value is intercepted above, so
6605 * XmlEncode() is never passed a value it might return an
6606 * error for.
6607 */
6608 for (i = 0; i < n; i++) {
6609 if (! poolAppendChar(pool, buf[i]))
6610 return XML_ERROR_NO_MEMORY;
6611 }
6612 } break;
6613 case XML_TOK_DATA_CHARS:
6614 if (! poolAppend(pool, enc, ptr, next))
6615 return XML_ERROR_NO_MEMORY;
6616 break;
6617 case XML_TOK_TRAILING_CR:
6618 next = ptr + enc->minBytesPerChar;
6619 /* fall through */
6620 case XML_TOK_ATTRIBUTE_VALUE_S:
6621 case XML_TOK_DATA_NEWLINE:
6622 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6623 break;
6624 if (! poolAppendChar(pool, 0x20))
6625 return XML_ERROR_NO_MEMORY;
6626 break;
6627 case XML_TOK_ENTITY_REF: {
6628 const XML_Char *name;
6629 ENTITY *entity;
6630 bool checkEntityDecl;
6631 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6632 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6633 if (ch) {
6634 #if XML_GE == 1
6635 /* NOTE: We are replacing 4-6 characters original input for 1 character
6636 * so there is no amplification and hence recording without
6637 * protection. */
6638 accountingDiffTolerated(parser, tok, (char *)&ch,
6639 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6640 XML_ACCOUNT_ENTITY_EXPANSION);
6641 #endif /* XML_GE == 1 */
6642 if (! poolAppendChar(pool, ch))
6643 return XML_ERROR_NO_MEMORY;
6644 break;
6645 }
6646 name = poolStoreString(&parser->m_temp2Pool, enc,
6647 ptr + enc->minBytesPerChar,
6648 next - enc->minBytesPerChar);
6649 if (! name)
6650 return XML_ERROR_NO_MEMORY;
6651 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6652 poolDiscard(&parser->m_temp2Pool);
6653 /* First, determine if a check for an existing declaration is needed;
6654 if yes, check that the entity exists, and that it is internal.
6655 */
6656 if (pool == &dtd->pool) /* are we called from prolog? */
6657 checkEntityDecl =
6658 #ifdef XML_DTD
6659 parser->m_prologState.documentEntity &&
6660 #endif /* XML_DTD */
6661 (dtd->standalone ? ! parser->m_openInternalEntities
6662 : ! dtd->hasParamEntityRefs);
6663 else /* if (pool == &parser->m_tempPool): we are called from content */
6664 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6665 if (checkEntityDecl) {
6666 if (! entity)
6667 return XML_ERROR_UNDEFINED_ENTITY;
6668 else if (! entity->is_internal)
6669 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6670 } else if (! entity) {
6671 /* Cannot report skipped entity here - see comments on
6672 parser->m_skippedEntityHandler.
6673 if (parser->m_skippedEntityHandler)
6674 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6675 */
6676 /* Cannot call the default handler because this would be
6677 out of sync with the call to the startElementHandler.
6678 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6679 reportDefault(parser, enc, ptr, next);
6680 */
6681 break;
6682 }
6683 if (entity->open) {
6684 if (enc == parser->m_encoding) {
6685 /* It does not appear that this line can be executed.
6686 *
6687 * The "if (entity->open)" check catches recursive entity
6688 * definitions. In order to be called with an open
6689 * entity, it must have gone through this code before and
6690 * been through the recursive call to
6691 * appendAttributeValue() some lines below. That call
6692 * sets the local encoding ("enc") to the parser's
6693 * internal encoding (internal_utf8 or internal_utf16),
6694 * which can never be the same as the principle encoding.
6695 * It doesn't appear there is another code path that gets
6696 * here with entity->open being TRUE.
6697 *
6698 * Since it is not certain that this logic is watertight,
6699 * we keep the line and merely exclude it from coverage
6700 * tests.
6701 */
6702 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6703 }
6704 return XML_ERROR_RECURSIVE_ENTITY_REF;
6705 }
6706 if (entity->notation) {
6707 if (enc == parser->m_encoding)
6708 parser->m_eventPtr = ptr;
6709 return XML_ERROR_BINARY_ENTITY_REF;
6710 }
6711 if (! entity->textPtr) {
6712 if (enc == parser->m_encoding)
6713 parser->m_eventPtr = ptr;
6714 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6715 } else {
6716 enum XML_Error result;
6717 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6718 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6719 *nextPtr = next;
6720 }
6721 return result;
6722 }
6723 } break;
6724 default:
6725 /* The only token returned by XmlAttributeValueTok() that does
6726 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6727 * Getting that would require an entity name to contain an
6728 * incomplete XML character (e.g. \xE2\x82); however previous
6729 * tokenisers will have already recognised and rejected such
6730 * names before XmlAttributeValueTok() gets a look-in. This
6731 * default case should be retained as a safety net, but the code
6732 * excluded from coverage tests.
6733 *
6734 * LCOV_EXCL_START
6735 */
6736 if (enc == parser->m_encoding)
6737 parser->m_eventPtr = ptr;
6738 return XML_ERROR_UNEXPECTED_STATE;
6739 /* LCOV_EXCL_STOP */
6740 }
6741 ptr = next;
6742 }
6743 /* not reached */
6744 }
6745
6746 #if XML_GE == 1
6747 static enum XML_Error
6748 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6749 const char *entityTextPtr, const char *entityTextEnd,
6750 enum XML_Account account, const char **nextPtr) {
6751 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6752 STRING_POOL *pool = &(dtd->entityValuePool);
6753 enum XML_Error result = XML_ERROR_NONE;
6754 # ifdef XML_DTD
6755 int oldInEntityValue = parser->m_prologState.inEntityValue;
6756 parser->m_prologState.inEntityValue = 1;
6757 # else
6758 UNUSED_P(account);
6759 # endif /* XML_DTD */
6760 /* never return Null for the value argument in EntityDeclHandler,
6761 since this would indicate an external entity; therefore we
6762 have to make sure that entityValuePool.start is not null */
6763 if (! pool->blocks) {
6764 if (! poolGrow(pool))
6765 return XML_ERROR_NO_MEMORY;
6766 }
6767
6768 const char *next;
6769 for (;;) {
6770 next
6771 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6772 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6773
6774 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6775 account)) {
6776 accountingOnAbort(parser);
6777 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6778 goto endEntityValue;
6779 }
6780
6781 switch (tok) {
6782 case XML_TOK_PARAM_ENTITY_REF:
6783 # ifdef XML_DTD
6784 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6785 const XML_Char *name;
6786 ENTITY *entity;
6787 name = poolStoreString(&parser->m_tempPool, enc,
6788 entityTextPtr + enc->minBytesPerChar,
6789 next - enc->minBytesPerChar);
6790 if (! name) {
6791 result = XML_ERROR_NO_MEMORY;
6792 goto endEntityValue;
6793 }
6794 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6795 poolDiscard(&parser->m_tempPool);
6796 if (! entity) {
6797 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6798 /* cannot report skipped entity here - see comments on
6799 parser->m_skippedEntityHandler
6800 if (parser->m_skippedEntityHandler)
6801 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6802 */
6803 dtd->keepProcessing = dtd->standalone;
6804 goto endEntityValue;
6805 }
6806 if (entity->open || (entity == parser->m_declEntity)) {
6807 if (enc == parser->m_encoding)
6808 parser->m_eventPtr = entityTextPtr;
6809 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6810 goto endEntityValue;
6811 }
6812 if (entity->systemId) {
6813 if (parser->m_externalEntityRefHandler) {
6814 dtd->paramEntityRead = XML_FALSE;
6815 entity->open = XML_TRUE;
6816 entityTrackingOnOpen(parser, entity, __LINE__);
6817 if (! parser->m_externalEntityRefHandler(
6818 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6819 entity->systemId, entity->publicId)) {
6820 entityTrackingOnClose(parser, entity, __LINE__);
6821 entity->open = XML_FALSE;
6822 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6823 goto endEntityValue;
6824 }
6825 entityTrackingOnClose(parser, entity, __LINE__);
6826 entity->open = XML_FALSE;
6827 if (! dtd->paramEntityRead)
6828 dtd->keepProcessing = dtd->standalone;
6829 } else
6830 dtd->keepProcessing = dtd->standalone;
6831 } else {
6832 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6833 goto endEntityValue;
6834 }
6835 break;
6836 }
6837 # endif /* XML_DTD */
6838 /* In the internal subset, PE references are not legal
6839 within markup declarations, e.g entity values in this case. */
6840 parser->m_eventPtr = entityTextPtr;
6841 result = XML_ERROR_PARAM_ENTITY_REF;
6842 goto endEntityValue;
6843 case XML_TOK_NONE:
6844 result = XML_ERROR_NONE;
6845 goto endEntityValue;
6846 case XML_TOK_ENTITY_REF:
6847 case XML_TOK_DATA_CHARS:
6848 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6849 result = XML_ERROR_NO_MEMORY;
6850 goto endEntityValue;
6851 }
6852 break;
6853 case XML_TOK_TRAILING_CR:
6854 next = entityTextPtr + enc->minBytesPerChar;
6855 /* fall through */
6856 case XML_TOK_DATA_NEWLINE:
6857 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6858 result = XML_ERROR_NO_MEMORY;
6859 goto endEntityValue;
6860 }
6861 *(pool->ptr)++ = 0xA;
6862 break;
6863 case XML_TOK_CHAR_REF: {
6864 XML_Char buf[XML_ENCODE_MAX];
6865 int i;
6866 int n = XmlCharRefNumber(enc, entityTextPtr);
6867 if (n < 0) {
6868 if (enc == parser->m_encoding)
6869 parser->m_eventPtr = entityTextPtr;
6870 result = XML_ERROR_BAD_CHAR_REF;
6871 goto endEntityValue;
6872 }
6873 n = XmlEncode(n, (ICHAR *)buf);
6874 /* The XmlEncode() functions can never return 0 here. That
6875 * error return happens if the code point passed in is either
6876 * negative or greater than or equal to 0x110000. The
6877 * XmlCharRefNumber() functions will all return a number
6878 * strictly less than 0x110000 or a negative value if an error
6879 * occurred. The negative value is intercepted above, so
6880 * XmlEncode() is never passed a value it might return an
6881 * error for.
6882 */
6883 for (i = 0; i < n; i++) {
6884 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6885 result = XML_ERROR_NO_MEMORY;
6886 goto endEntityValue;
6887 }
6888 *(pool->ptr)++ = buf[i];
6889 }
6890 } break;
6891 case XML_TOK_PARTIAL:
6892 if (enc == parser->m_encoding)
6893 parser->m_eventPtr = entityTextPtr;
6894 result = XML_ERROR_INVALID_TOKEN;
6895 goto endEntityValue;
6896 case XML_TOK_INVALID:
6897 if (enc == parser->m_encoding)
6898 parser->m_eventPtr = next;
6899 result = XML_ERROR_INVALID_TOKEN;
6900 goto endEntityValue;
6901 default:
6902 /* This default case should be unnecessary -- all the tokens
6903 * that XmlEntityValueTok() can return have their own explicit
6904 * cases -- but should be retained for safety. We do however
6905 * exclude it from the coverage statistics.
6906 *
6907 * LCOV_EXCL_START
6908 */
6909 if (enc == parser->m_encoding)
6910 parser->m_eventPtr = entityTextPtr;
6911 result = XML_ERROR_UNEXPECTED_STATE;
6912 goto endEntityValue;
6913 /* LCOV_EXCL_STOP */
6914 }
6915 entityTextPtr = next;
6916 }
6917 endEntityValue:
6918 # ifdef XML_DTD
6919 parser->m_prologState.inEntityValue = oldInEntityValue;
6920 # endif /* XML_DTD */
6921 // If 'nextPtr' is given, it should be updated during the processing
6922 if (nextPtr != NULL) {
6923 *nextPtr = next;
6924 }
6925 return result;
6926 }
6927
6928 static enum XML_Error
6929 callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6930 const char *entityTextPtr, const char *entityTextEnd,
6931 enum XML_Account account) {
6932 const char *next = entityTextPtr;
6933 enum XML_Error result = XML_ERROR_NONE;
6934 while (1) {
6935 if (! parser->m_openValueEntities) {
6936 result
6937 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6938 } else {
6939 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6940 if (! openEntity)
6941 return XML_ERROR_UNEXPECTED_STATE;
6942
6943 ENTITY *const entity = openEntity->entity;
6944 const char *const textStart
6945 = ((const char *)entity->textPtr) + entity->processed;
6946 const char *const textEnd
6947 = (const char *)(entity->textPtr + entity->textLen);
6948 /* Set a safe default value in case 'next' does not get set */
6949 const char *nextInEntity = textStart;
6950 if (entity->hasMore) {
6951 result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6952 textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6953 &nextInEntity);
6954 if (result != XML_ERROR_NONE)
6955 break;
6956 // Check if entity is complete, if not, mark down how much of it is
6957 // processed. A XML_SUSPENDED check here is not required as
6958 // appendAttributeValue will never suspend the parser.
6959 if (textEnd != nextInEntity) {
6960 entity->processed
6961 = (int)(nextInEntity - (const char *)entity->textPtr);
6962 continue;
6963 }
6964
6965 // Entity is complete. We cannot close it here since we need to first
6966 // process its possible inner entities (which are added to the
6967 // m_openValueEntities during storeEntityValue)
6968 entity->hasMore = XML_FALSE;
6969 continue;
6970 } // End of entity processing, "if" block skips the rest
6971
6972 // Remove fully processed openEntity from open entity list.
6973 # if XML_GE == 1
6974 entityTrackingOnClose(parser, entity, __LINE__);
6975 # endif
6976 // openEntity is m_openValueEntities' head, since we set it at the
6977 // start of this function and because we skipped storeEntityValue call
6978 // with hasMore set to false. This means we can directly remove the head
6979 // of m_openValueEntities
6980 assert(parser->m_openValueEntities == openEntity);
6981 entity->open = XML_FALSE;
6982 parser->m_openValueEntities = parser->m_openValueEntities->next;
6983
6984 /* put openEntity back in list of free instances */
6985 openEntity->next = parser->m_freeValueEntities;
6986 parser->m_freeValueEntities = openEntity;
6987 }
6988
6989 // Break if an error occurred or there is nothing left to process
6990 if (result
6991 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
6992 break;
6993 }
6994 }
6995
6996 return result;
6997 }
6998
6999 #else /* XML_GE == 0 */
7000
7001 static enum XML_Error
7002 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
7003 // This will store "&entity123;" in entity->textPtr
7004 // to end up as "&entity123;" in the handler.
7005 const char *const entity_start = "&";
7006 const char *const entity_end = ";";
7007
7008 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
7009 if (! poolAppendString(pool, entity_start)
7010 || ! poolAppendString(pool, entity->name)
7011 || ! poolAppendString(pool, entity_end)) {
7012 poolDiscard(pool);
7013 return XML_ERROR_NO_MEMORY;
7014 }
7015
7016 entity->textPtr = poolStart(pool);
7017 entity->textLen = (int)(poolLength(pool));
7018 poolFinish(pool);
7019
7020 return XML_ERROR_NONE;
7021 }
7022
7023 #endif /* XML_GE == 0 */
7024
7025 static void FASTCALL
7026 normalizeLines(XML_Char *s) {
7027 XML_Char *p;
7028 for (;; s++) {
7029 if (*s == XML_T('\0'))
7030 return;
7031 if (*s == 0xD)
7032 break;
7033 }
7034 p = s;
7035 do {
7036 if (*s == 0xD) {
7037 *p++ = 0xA;
7038 if (*++s == 0xA)
7039 s++;
7040 } else
7041 *p++ = *s++;
7042 } while (*s);
7043 *p = XML_T('\0');
7044 }
7045
7046 static int
7047 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
7048 const char *start, const char *end) {
7049 const XML_Char *target;
7050 XML_Char *data;
7051 const char *tem;
7052 if (! parser->m_processingInstructionHandler) {
7053 if (parser->m_defaultHandler)
7054 reportDefault(parser, enc, start, end);
7055 return 1;
7056 }
7057 start += enc->minBytesPerChar * 2;
7058 tem = start + XmlNameLength(enc, start);
7059 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
7060 if (! target)
7061 return 0;
7062 poolFinish(&parser->m_tempPool);
7063 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
7064 end - enc->minBytesPerChar * 2);
7065 if (! data)
7066 return 0;
7067 normalizeLines(data);
7068 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
7069 poolClear(&parser->m_tempPool);
7070 return 1;
7071 }
7072
7073 static int
7074 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
7075 const char *end) {
7076 XML_Char *data;
7077 if (! parser->m_commentHandler) {
7078 if (parser->m_defaultHandler)
7079 reportDefault(parser, enc, start, end);
7080 return 1;
7081 }
7082 data = poolStoreString(&parser->m_tempPool, enc,
7083 start + enc->minBytesPerChar * 4,
7084 end - enc->minBytesPerChar * 3);
7085 if (! data)
7086 return 0;
7087 normalizeLines(data);
7088 parser->m_commentHandler(parser->m_handlerArg, data);
7089 poolClear(&parser->m_tempPool);
7090 return 1;
7091 }
7092
7093 static void
7094 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
7095 const char *end) {
7096 if (MUST_CONVERT(enc, s)) {
7097 enum XML_Convert_Result convert_res;
7098 const char **eventPP;
7099 const char **eventEndPP;
7100 if (enc == parser->m_encoding) {
7101 eventPP = &parser->m_eventPtr;
7102 eventEndPP = &parser->m_eventEndPtr;
7103 } else {
7104 /* To get here, two things must be true; the parser must be
7105 * using a character encoding that is not the same as the
7106 * encoding passed in, and the encoding passed in must need
7107 * conversion to the internal format (UTF-8 unless XML_UNICODE
7108 * is defined). The only occasions on which the encoding passed
7109 * in is not the same as the parser's encoding are when it is
7110 * the internal encoding (e.g. a previously defined parameter
7111 * entity, already converted to internal format). This by
7112 * definition doesn't need conversion, so the whole branch never
7113 * gets executed.
7114 *
7115 * For safety's sake we don't delete these lines and merely
7116 * exclude them from coverage statistics.
7117 *
7118 * LCOV_EXCL_START
7119 */
7120 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
7121 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
7122 /* LCOV_EXCL_STOP */
7123 }
7124 do {
7125 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
7126 convert_res
7127 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
7128 *eventEndPP = s;
7129 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
7130 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
7131 *eventPP = s;
7132 } while ((convert_res != XML_CONVERT_COMPLETED)
7133 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
7134 } else
7135 parser->m_defaultHandler(
7136 parser->m_handlerArg, (const XML_Char *)s,
7137 (int)((const XML_Char *)end - (const XML_Char *)s));
7138 }
7139
7140 static int
7141 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
7142 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
7143 DEFAULT_ATTRIBUTE *att;
7144 if (value || isId) {
7145 /* The handling of default attributes gets messed up if we have
7146 a default which duplicates a non-default. */
7147 int i;
7148 for (i = 0; i < type->nDefaultAtts; i++)
7149 if (attId == type->defaultAtts[i].id)
7150 return 1;
7151 if (isId && ! type->idAtt && ! attId->xmlns)
7152 type->idAtt = attId;
7153 }
7154 if (type->nDefaultAtts == type->allocDefaultAtts) {
7155 if (type->allocDefaultAtts == 0) {
7156 type->allocDefaultAtts = 8;
7157 type->defaultAtts
7158 = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7159 if (! type->defaultAtts) {
7160 type->allocDefaultAtts = 0;
7161 return 0;
7162 }
7163 } else {
7164 DEFAULT_ATTRIBUTE *temp;
7165
7166 /* Detect and prevent integer overflow */
7167 if (type->allocDefaultAtts > INT_MAX / 2) {
7168 return 0;
7169 }
7170
7171 int count = type->allocDefaultAtts * 2;
7172
7173 /* Detect and prevent integer overflow.
7174 * The preprocessor guard addresses the "always false" warning
7175 * from -Wtype-limits on platforms where
7176 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7177 #if UINT_MAX >= SIZE_MAX
7178 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
7179 return 0;
7180 }
7181 #endif
7182
7183 temp = REALLOC(parser, type->defaultAtts,
7184 (count * sizeof(DEFAULT_ATTRIBUTE)));
7185 if (temp == NULL)
7186 return 0;
7187 type->allocDefaultAtts = count;
7188 type->defaultAtts = temp;
7189 }
7190 }
7191 att = type->defaultAtts + type->nDefaultAtts;
7192 att->id = attId;
7193 att->value = value;
7194 att->isCdata = isCdata;
7195 if (! isCdata)
7196 attId->maybeTokenized = XML_TRUE;
7197 type->nDefaultAtts += 1;
7198 return 1;
7199 }
7200
7201 static int
7202 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
7203 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7204 const XML_Char *name;
7205 for (name = elementType->name; *name; name++) {
7206 if (*name == XML_T(ASCII_COLON)) {
7207 PREFIX *prefix;
7208 const XML_Char *s;
7209 for (s = elementType->name; s != name; s++) {
7210 if (! poolAppendChar(&dtd->pool, *s))
7211 return 0;
7212 }
7213 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7214 return 0;
7215 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
7216 sizeof(PREFIX));
7217 if (! prefix)
7218 return 0;
7219 if (prefix->name == poolStart(&dtd->pool))
7220 poolFinish(&dtd->pool);
7221 else
7222 poolDiscard(&dtd->pool);
7223 elementType->prefix = prefix;
7224 break;
7225 }
7226 }
7227 return 1;
7228 }
7229
7230 static ATTRIBUTE_ID *
7231 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
7232 const char *end) {
7233 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7234 ATTRIBUTE_ID *id;
7235 const XML_Char *name;
7236 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7237 return NULL;
7238 name = poolStoreString(&dtd->pool, enc, start, end);
7239 if (! name)
7240 return NULL;
7241 /* skip quotation mark - its storage will be reused (like in name[-1]) */
7242 ++name;
7243 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
7244 sizeof(ATTRIBUTE_ID));
7245 if (! id)
7246 return NULL;
7247 if (id->name != name)
7248 poolDiscard(&dtd->pool);
7249 else {
7250 poolFinish(&dtd->pool);
7251 if (! parser->m_ns)
7252 ;
7253 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
7254 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
7255 && name[4] == XML_T(ASCII_s)
7256 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
7257 if (name[5] == XML_T('\0'))
7258 id->prefix = &dtd->defaultPrefix;
7259 else
7260 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
7261 sizeof(PREFIX));
7262 id->xmlns = XML_TRUE;
7263 } else {
7264 int i;
7265 for (i = 0; name[i]; i++) {
7266 /* attributes without prefix are *not* in the default namespace */
7267 if (name[i] == XML_T(ASCII_COLON)) {
7268 int j;
7269 for (j = 0; j < i; j++) {
7270 if (! poolAppendChar(&dtd->pool, name[j]))
7271 return NULL;
7272 }
7273 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
7274 return NULL;
7275 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
7276 poolStart(&dtd->pool), sizeof(PREFIX));
7277 if (! id->prefix)
7278 return NULL;
7279 if (id->prefix->name == poolStart(&dtd->pool))
7280 poolFinish(&dtd->pool);
7281 else
7282 poolDiscard(&dtd->pool);
7283 break;
7284 }
7285 }
7286 }
7287 }
7288 return id;
7289 }
7290
7291 #define CONTEXT_SEP XML_T(ASCII_FF)
7292
7293 static const XML_Char *
7294 getContext(XML_Parser parser) {
7295 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7296 HASH_TABLE_ITER iter;
7297 XML_Bool needSep = XML_FALSE;
7298
7299 if (dtd->defaultPrefix.binding) {
7300 int i;
7301 int len;
7302 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7303 return NULL;
7304 len = dtd->defaultPrefix.binding->uriLen;
7305 if (parser->m_namespaceSeparator)
7306 len--;
7307 for (i = 0; i < len; i++) {
7308 if (! poolAppendChar(&parser->m_tempPool,
7309 dtd->defaultPrefix.binding->uri[i])) {
7310 /* Because of memory caching, I don't believe this line can be
7311 * executed.
7312 *
7313 * This is part of a loop copying the default prefix binding
7314 * URI into the parser's temporary string pool. Previously,
7315 * that URI was copied into the same string pool, with a
7316 * terminating NUL character, as part of setContext(). When
7317 * the pool was cleared, that leaves a block definitely big
7318 * enough to hold the URI on the free block list of the pool.
7319 * The URI copy in getContext() therefore cannot run out of
7320 * memory.
7321 *
7322 * If the pool is used between the setContext() and
7323 * getContext() calls, the worst it can do is leave a bigger
7324 * block on the front of the free list. Given that this is
7325 * all somewhat inobvious and program logic can be changed, we
7326 * don't delete the line but we do exclude it from the test
7327 * coverage statistics.
7328 */
7329 return NULL; /* LCOV_EXCL_LINE */
7330 }
7331 }
7332 needSep = XML_TRUE;
7333 }
7334
7335 hashTableIterInit(&iter, &(dtd->prefixes));
7336 for (;;) {
7337 int i;
7338 int len;
7339 const XML_Char *s;
7340 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
7341 if (! prefix)
7342 break;
7343 if (! prefix->binding) {
7344 /* This test appears to be (justifiable) paranoia. There does
7345 * not seem to be a way of injecting a prefix without a binding
7346 * that doesn't get errored long before this function is called.
7347 * The test should remain for safety's sake, so we instead
7348 * exclude the following line from the coverage statistics.
7349 */
7350 continue; /* LCOV_EXCL_LINE */
7351 }
7352 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7353 return NULL;
7354 for (s = prefix->name; *s; s++)
7355 if (! poolAppendChar(&parser->m_tempPool, *s))
7356 return NULL;
7357 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7358 return NULL;
7359 len = prefix->binding->uriLen;
7360 if (parser->m_namespaceSeparator)
7361 len--;
7362 for (i = 0; i < len; i++)
7363 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7364 return NULL;
7365 needSep = XML_TRUE;
7366 }
7367
7368 hashTableIterInit(&iter, &(dtd->generalEntities));
7369 for (;;) {
7370 const XML_Char *s;
7371 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7372 if (! e)
7373 break;
7374 if (! e->open)
7375 continue;
7376 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7377 return NULL;
7378 for (s = e->name; *s; s++)
7379 if (! poolAppendChar(&parser->m_tempPool, *s))
7380 return 0;
7381 needSep = XML_TRUE;
7382 }
7383
7384 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7385 return NULL;
7386 return parser->m_tempPool.start;
7387 }
7388
7389 static XML_Bool
7390 setContext(XML_Parser parser, const XML_Char *context) {
7391 if (context == NULL) {
7392 return XML_FALSE;
7393 }
7394
7395 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7396 const XML_Char *s = context;
7397
7398 while (*context != XML_T('\0')) {
7399 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7400 ENTITY *e;
7401 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7402 return XML_FALSE;
7403 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7404 poolStart(&parser->m_tempPool), 0);
7405 if (e)
7406 e->open = XML_TRUE;
7407 if (*s != XML_T('\0'))
7408 s++;
7409 context = s;
7410 poolDiscard(&parser->m_tempPool);
7411 } else if (*s == XML_T(ASCII_EQUALS)) {
7412 PREFIX *prefix;
7413 if (poolLength(&parser->m_tempPool) == 0)
7414 prefix = &dtd->defaultPrefix;
7415 else {
7416 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7417 return XML_FALSE;
7418 prefix
7419 = (PREFIX *)lookup(parser, &dtd->prefixes,
7420 poolStart(&parser->m_tempPool), sizeof(PREFIX));
7421 if (! prefix)
7422 return XML_FALSE;
7423 if (prefix->name == poolStart(&parser->m_tempPool)) {
7424 prefix->name = poolCopyString(&dtd->pool, prefix->name);
7425 if (! prefix->name)
7426 return XML_FALSE;
7427 }
7428 poolDiscard(&parser->m_tempPool);
7429 }
7430 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7431 context++)
7432 if (! poolAppendChar(&parser->m_tempPool, *context))
7433 return XML_FALSE;
7434 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7435 return XML_FALSE;
7436 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7437 &parser->m_inheritedBindings)
7438 != XML_ERROR_NONE)
7439 return XML_FALSE;
7440 poolDiscard(&parser->m_tempPool);
7441 if (*context != XML_T('\0'))
7442 ++context;
7443 s = context;
7444 } else {
7445 if (! poolAppendChar(&parser->m_tempPool, *s))
7446 return XML_FALSE;
7447 s++;
7448 }
7449 }
7450 return XML_TRUE;
7451 }
7452
7453 static void FASTCALL
7454 normalizePublicId(XML_Char *publicId) {
7455 XML_Char *p = publicId;
7456 XML_Char *s;
7457 for (s = publicId; *s; s++) {
7458 switch (*s) {
7459 case 0x20:
7460 case 0xD:
7461 case 0xA:
7462 if (p != publicId && p[-1] != 0x20)
7463 *p++ = 0x20;
7464 break;
7465 default:
7466 *p++ = *s;
7467 }
7468 }
7469 if (p != publicId && p[-1] == 0x20)
7470 --p;
7471 *p = XML_T('\0');
7472 }
7473
7474 static DTD *
7475 dtdCreate(XML_Parser parser) {
7476 DTD *p = MALLOC(parser, sizeof(DTD));
7477 if (p == NULL)
7478 return p;
7479 poolInit(&(p->pool), parser);
7480 poolInit(&(p->entityValuePool), parser);
7481 hashTableInit(&(p->generalEntities), parser);
7482 hashTableInit(&(p->elementTypes), parser);
7483 hashTableInit(&(p->attributeIds), parser);
7484 hashTableInit(&(p->prefixes), parser);
7485 #ifdef XML_DTD
7486 p->paramEntityRead = XML_FALSE;
7487 hashTableInit(&(p->paramEntities), parser);
7488 #endif /* XML_DTD */
7489 p->defaultPrefix.name = NULL;
7490 p->defaultPrefix.binding = NULL;
7491
7492 p->in_eldecl = XML_FALSE;
7493 p->scaffIndex = NULL;
7494 p->scaffold = NULL;
7495 p->scaffLevel = 0;
7496 p->scaffSize = 0;
7497 p->scaffCount = 0;
7498 p->contentStringLen = 0;
7499
7500 p->keepProcessing = XML_TRUE;
7501 p->hasParamEntityRefs = XML_FALSE;
7502 p->standalone = XML_FALSE;
7503 return p;
7504 }
7505
7506 static void
7507 dtdReset(DTD *p, XML_Parser parser) {
7508 HASH_TABLE_ITER iter;
7509 hashTableIterInit(&iter, &(p->elementTypes));
7510 for (;;) {
7511 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7512 if (! e)
7513 break;
7514 if (e->allocDefaultAtts != 0)
7515 FREE(parser, e->defaultAtts);
7516 }
7517 hashTableClear(&(p->generalEntities));
7518 #ifdef XML_DTD
7519 p->paramEntityRead = XML_FALSE;
7520 hashTableClear(&(p->paramEntities));
7521 #endif /* XML_DTD */
7522 hashTableClear(&(p->elementTypes));
7523 hashTableClear(&(p->attributeIds));
7524 hashTableClear(&(p->prefixes));
7525 poolClear(&(p->pool));
7526 poolClear(&(p->entityValuePool));
7527 p->defaultPrefix.name = NULL;
7528 p->defaultPrefix.binding = NULL;
7529
7530 p->in_eldecl = XML_FALSE;
7531
7532 FREE(parser, p->scaffIndex);
7533 p->scaffIndex = NULL;
7534 FREE(parser, p->scaffold);
7535 p->scaffold = NULL;
7536
7537 p->scaffLevel = 0;
7538 p->scaffSize = 0;
7539 p->scaffCount = 0;
7540 p->contentStringLen = 0;
7541
7542 p->keepProcessing = XML_TRUE;
7543 p->hasParamEntityRefs = XML_FALSE;
7544 p->standalone = XML_FALSE;
7545 }
7546
7547 static void
7548 dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) {
7549 HASH_TABLE_ITER iter;
7550 hashTableIterInit(&iter, &(p->elementTypes));
7551 for (;;) {
7552 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7553 if (! e)
7554 break;
7555 if (e->allocDefaultAtts != 0)
7556 FREE(parser, e->defaultAtts);
7557 }
7558 hashTableDestroy(&(p->generalEntities));
7559 #ifdef XML_DTD
7560 hashTableDestroy(&(p->paramEntities));
7561 #endif /* XML_DTD */
7562 hashTableDestroy(&(p->elementTypes));
7563 hashTableDestroy(&(p->attributeIds));
7564 hashTableDestroy(&(p->prefixes));
7565 poolDestroy(&(p->pool));
7566 poolDestroy(&(p->entityValuePool));
7567 if (isDocEntity) {
7568 FREE(parser, p->scaffIndex);
7569 FREE(parser, p->scaffold);
7570 }
7571 FREE(parser, p);
7572 }
7573
7574 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7575 The new DTD has already been initialized.
7576 */
7577 static int
7578 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7579 XML_Parser parser) {
7580 HASH_TABLE_ITER iter;
7581
7582 /* Copy the prefix table. */
7583
7584 hashTableIterInit(&iter, &(oldDtd->prefixes));
7585 for (;;) {
7586 const XML_Char *name;
7587 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7588 if (! oldP)
7589 break;
7590 name = poolCopyString(&(newDtd->pool), oldP->name);
7591 if (! name)
7592 return 0;
7593 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7594 return 0;
7595 }
7596
7597 hashTableIterInit(&iter, &(oldDtd->attributeIds));
7598
7599 /* Copy the attribute id table. */
7600
7601 for (;;) {
7602 ATTRIBUTE_ID *newA;
7603 const XML_Char *name;
7604 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7605
7606 if (! oldA)
7607 break;
7608 /* Remember to allocate the scratch byte before the name. */
7609 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7610 return 0;
7611 name = poolCopyString(&(newDtd->pool), oldA->name);
7612 if (! name)
7613 return 0;
7614 ++name;
7615 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7616 sizeof(ATTRIBUTE_ID));
7617 if (! newA)
7618 return 0;
7619 newA->maybeTokenized = oldA->maybeTokenized;
7620 if (oldA->prefix) {
7621 newA->xmlns = oldA->xmlns;
7622 if (oldA->prefix == &oldDtd->defaultPrefix)
7623 newA->prefix = &newDtd->defaultPrefix;
7624 else
7625 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7626 oldA->prefix->name, 0);
7627 }
7628 }
7629
7630 /* Copy the element type table. */
7631
7632 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7633
7634 for (;;) {
7635 int i;
7636 ELEMENT_TYPE *newE;
7637 const XML_Char *name;
7638 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7639 if (! oldE)
7640 break;
7641 name = poolCopyString(&(newDtd->pool), oldE->name);
7642 if (! name)
7643 return 0;
7644 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7645 sizeof(ELEMENT_TYPE));
7646 if (! newE)
7647 return 0;
7648 if (oldE->nDefaultAtts) {
7649 /* Detect and prevent integer overflow.
7650 * The preprocessor guard addresses the "always false" warning
7651 * from -Wtype-limits on platforms where
7652 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7653 #if UINT_MAX >= SIZE_MAX
7654 if ((size_t)oldE->nDefaultAtts
7655 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7656 return 0;
7657 }
7658 #endif
7659 newE->defaultAtts
7660 = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7661 if (! newE->defaultAtts) {
7662 return 0;
7663 }
7664 }
7665 if (oldE->idAtt)
7666 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7667 oldE->idAtt->name, 0);
7668 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7669 if (oldE->prefix)
7670 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7671 oldE->prefix->name, 0);
7672 for (i = 0; i < newE->nDefaultAtts; i++) {
7673 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7674 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7675 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7676 if (oldE->defaultAtts[i].value) {
7677 newE->defaultAtts[i].value
7678 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7679 if (! newE->defaultAtts[i].value)
7680 return 0;
7681 } else
7682 newE->defaultAtts[i].value = NULL;
7683 }
7684 }
7685
7686 /* Copy the entity tables. */
7687 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7688 &(oldDtd->generalEntities)))
7689 return 0;
7690
7691 #ifdef XML_DTD
7692 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7693 &(oldDtd->paramEntities)))
7694 return 0;
7695 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7696 #endif /* XML_DTD */
7697
7698 newDtd->keepProcessing = oldDtd->keepProcessing;
7699 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7700 newDtd->standalone = oldDtd->standalone;
7701
7702 /* Don't want deep copying for scaffolding */
7703 newDtd->in_eldecl = oldDtd->in_eldecl;
7704 newDtd->scaffold = oldDtd->scaffold;
7705 newDtd->contentStringLen = oldDtd->contentStringLen;
7706 newDtd->scaffSize = oldDtd->scaffSize;
7707 newDtd->scaffLevel = oldDtd->scaffLevel;
7708 newDtd->scaffIndex = oldDtd->scaffIndex;
7709
7710 return 1;
7711 } /* End dtdCopy */
7712
7713 static int
7714 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7715 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7716 HASH_TABLE_ITER iter;
7717 const XML_Char *cachedOldBase = NULL;
7718 const XML_Char *cachedNewBase = NULL;
7719
7720 hashTableIterInit(&iter, oldTable);
7721
7722 for (;;) {
7723 ENTITY *newE;
7724 const XML_Char *name;
7725 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7726 if (! oldE)
7727 break;
7728 name = poolCopyString(newPool, oldE->name);
7729 if (! name)
7730 return 0;
7731 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7732 if (! newE)
7733 return 0;
7734 if (oldE->systemId) {
7735 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7736 if (! tem)
7737 return 0;
7738 newE->systemId = tem;
7739 if (oldE->base) {
7740 if (oldE->base == cachedOldBase)
7741 newE->base = cachedNewBase;
7742 else {
7743 cachedOldBase = oldE->base;
7744 tem = poolCopyString(newPool, cachedOldBase);
7745 if (! tem)
7746 return 0;
7747 cachedNewBase = newE->base = tem;
7748 }
7749 }
7750 if (oldE->publicId) {
7751 tem = poolCopyString(newPool, oldE->publicId);
7752 if (! tem)
7753 return 0;
7754 newE->publicId = tem;
7755 }
7756 } else {
7757 const XML_Char *tem
7758 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7759 if (! tem)
7760 return 0;
7761 newE->textPtr = tem;
7762 newE->textLen = oldE->textLen;
7763 }
7764 if (oldE->notation) {
7765 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7766 if (! tem)
7767 return 0;
7768 newE->notation = tem;
7769 }
7770 newE->is_param = oldE->is_param;
7771 newE->is_internal = oldE->is_internal;
7772 }
7773 return 1;
7774 }
7775
7776 #define INIT_POWER 6
7777
7778 static XML_Bool FASTCALL
7779 keyeq(KEY s1, KEY s2) {
7780 for (; *s1 == *s2; s1++, s2++)
7781 if (*s1 == 0)
7782 return XML_TRUE;
7783 return XML_FALSE;
7784 }
7785
7786 static size_t
7787 keylen(KEY s) {
7788 size_t len = 0;
7789 for (; *s; s++, len++)
7790 ;
7791 return len;
7792 }
7793
7794 static void
7795 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7796 key->k[0] = 0;
7797 key->k[1] = get_hash_secret_salt(parser);
7798 }
7799
7800 static unsigned long FASTCALL
7801 hash(XML_Parser parser, KEY s) {
7802 struct siphash state;
7803 struct sipkey key;
7804 (void)sip24_valid;
7805 copy_salt_to_sipkey(parser, &key);
7806 sip24_init(&state, &key);
7807 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7808 return (unsigned long)sip24_final(&state);
7809 }
7810
7811 static NAMED *
7812 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7813 size_t i;
7814 if (table->size == 0) {
7815 size_t tsize;
7816 if (! createSize)
7817 return NULL;
7818 table->power = INIT_POWER;
7819 /* table->size is a power of 2 */
7820 table->size = (size_t)1 << INIT_POWER;
7821 tsize = table->size * sizeof(NAMED *);
7822 table->v = MALLOC(table->parser, tsize);
7823 if (! table->v) {
7824 table->size = 0;
7825 return NULL;
7826 }
7827 memset(table->v, 0, tsize);
7828 i = hash(parser, name) & ((unsigned long)table->size - 1);
7829 } else {
7830 unsigned long h = hash(parser, name);
7831 unsigned long mask = (unsigned long)table->size - 1;
7832 unsigned char step = 0;
7833 i = h & mask;
7834 while (table->v[i]) {
7835 if (keyeq(name, table->v[i]->name))
7836 return table->v[i];
7837 if (! step)
7838 step = PROBE_STEP(h, mask, table->power);
7839 i < step ? (i += table->size - step) : (i -= step);
7840 }
7841 if (! createSize)
7842 return NULL;
7843
7844 /* check for overflow (table is half full) */
7845 if (table->used >> (table->power - 1)) {
7846 unsigned char newPower = table->power + 1;
7847
7848 /* Detect and prevent invalid shift */
7849 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7850 return NULL;
7851 }
7852
7853 size_t newSize = (size_t)1 << newPower;
7854 unsigned long newMask = (unsigned long)newSize - 1;
7855
7856 /* Detect and prevent integer overflow */
7857 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7858 return NULL;
7859 }
7860
7861 size_t tsize = newSize * sizeof(NAMED *);
7862 NAMED **newV = MALLOC(table->parser, tsize);
7863 if (! newV)
7864 return NULL;
7865 memset(newV, 0, tsize);
7866 for (i = 0; i < table->size; i++)
7867 if (table->v[i]) {
7868 unsigned long newHash = hash(parser, table->v[i]->name);
7869 size_t j = newHash & newMask;
7870 step = 0;
7871 while (newV[j]) {
7872 if (! step)
7873 step = PROBE_STEP(newHash, newMask, newPower);
7874 j < step ? (j += newSize - step) : (j -= step);
7875 }
7876 newV[j] = table->v[i];
7877 }
7878 FREE(table->parser, table->v);
7879 table->v = newV;
7880 table->power = newPower;
7881 table->size = newSize;
7882 i = h & newMask;
7883 step = 0;
7884 while (table->v[i]) {
7885 if (! step)
7886 step = PROBE_STEP(h, newMask, newPower);
7887 i < step ? (i += newSize - step) : (i -= step);
7888 }
7889 }
7890 }
7891 table->v[i] = MALLOC(table->parser, createSize);
7892 if (! table->v[i])
7893 return NULL;
7894 memset(table->v[i], 0, createSize);
7895 table->v[i]->name = name;
7896 (table->used)++;
7897 return table->v[i];
7898 }
7899
7900 static void FASTCALL
7901 hashTableClear(HASH_TABLE *table) {
7902 size_t i;
7903 for (i = 0; i < table->size; i++) {
7904 FREE(table->parser, table->v[i]);
7905 table->v[i] = NULL;
7906 }
7907 table->used = 0;
7908 }
7909
7910 static void FASTCALL
7911 hashTableDestroy(HASH_TABLE *table) {
7912 size_t i;
7913 for (i = 0; i < table->size; i++)
7914 FREE(table->parser, table->v[i]);
7915 FREE(table->parser, table->v);
7916 }
7917
7918 static void FASTCALL
7919 hashTableInit(HASH_TABLE *p, XML_Parser parser) {
7920 p->power = 0;
7921 p->size = 0;
7922 p->used = 0;
7923 p->v = NULL;
7924 p->parser = parser;
7925 }
7926
7927 static void FASTCALL
7928 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7929 iter->p = table->v;
7930 iter->end = iter->p ? iter->p + table->size : NULL;
7931 }
7932
7933 static NAMED *FASTCALL
7934 hashTableIterNext(HASH_TABLE_ITER *iter) {
7935 while (iter->p != iter->end) {
7936 NAMED *tem = *(iter->p)++;
7937 if (tem)
7938 return tem;
7939 }
7940 return NULL;
7941 }
7942
7943 static void FASTCALL
7944 poolInit(STRING_POOL *pool, XML_Parser parser) {
7945 pool->blocks = NULL;
7946 pool->freeBlocks = NULL;
7947 pool->start = NULL;
7948 pool->ptr = NULL;
7949 pool->end = NULL;
7950 pool->parser = parser;
7951 }
7952
7953 static void FASTCALL
7954 poolClear(STRING_POOL *pool) {
7955 if (! pool->freeBlocks)
7956 pool->freeBlocks = pool->blocks;
7957 else {
7958 BLOCK *p = pool->blocks;
7959 while (p) {
7960 BLOCK *tem = p->next;
7961 p->next = pool->freeBlocks;
7962 pool->freeBlocks = p;
7963 p = tem;
7964 }
7965 }
7966 pool->blocks = NULL;
7967 pool->start = NULL;
7968 pool->ptr = NULL;
7969 pool->end = NULL;
7970 }
7971
7972 static void FASTCALL
7973 poolDestroy(STRING_POOL *pool) {
7974 BLOCK *p = pool->blocks;
7975 while (p) {
7976 BLOCK *tem = p->next;
7977 FREE(pool->parser, p);
7978 p = tem;
7979 }
7980 p = pool->freeBlocks;
7981 while (p) {
7982 BLOCK *tem = p->next;
7983 FREE(pool->parser, p);
7984 p = tem;
7985 }
7986 }
7987
7988 static XML_Char *
7989 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7990 const char *end) {
7991 if (! pool->ptr && ! poolGrow(pool))
7992 return NULL;
7993 for (;;) {
7994 const enum XML_Convert_Result convert_res = XmlConvert(
7995 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7996 if ((convert_res == XML_CONVERT_COMPLETED)
7997 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7998 break;
7999 if (! poolGrow(pool))
8000 return NULL;
8001 }
8002 return pool->start;
8003 }
8004
8005 static const XML_Char *FASTCALL
8006 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
8007 do {
8008 if (! poolAppendChar(pool, *s))
8009 return NULL;
8010 } while (*s++);
8011 s = pool->start;
8012 poolFinish(pool);
8013 return s;
8014 }
8015
8016 static const XML_Char *
8017 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
8018 if (! pool->ptr && ! poolGrow(pool)) {
8019 /* The following line is unreachable given the current usage of
8020 * poolCopyStringN(). Currently it is called from exactly one
8021 * place to copy the text of a simple general entity. By that
8022 * point, the name of the entity is already stored in the pool, so
8023 * pool->ptr cannot be NULL.
8024 *
8025 * If poolCopyStringN() is used elsewhere as it well might be,
8026 * this line may well become executable again. Regardless, this
8027 * sort of check shouldn't be removed lightly, so we just exclude
8028 * it from the coverage statistics.
8029 */
8030 return NULL; /* LCOV_EXCL_LINE */
8031 }
8032 for (; n > 0; --n, s++) {
8033 if (! poolAppendChar(pool, *s))
8034 return NULL;
8035 }
8036 s = pool->start;
8037 poolFinish(pool);
8038 return s;
8039 }
8040
8041 static const XML_Char *FASTCALL
8042 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
8043 while (*s) {
8044 if (! poolAppendChar(pool, *s))
8045 return NULL;
8046 s++;
8047 }
8048 return pool->start;
8049 }
8050
8051 static XML_Char *
8052 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
8053 const char *end) {
8054 if (! poolAppend(pool, enc, ptr, end))
8055 return NULL;
8056 if (pool->ptr == pool->end && ! poolGrow(pool))
8057 return NULL;
8058 *(pool->ptr)++ = 0;
8059 return pool->start;
8060 }
8061
8062 static size_t
8063 poolBytesToAllocateFor(int blockSize) {
8064 /* Unprotected math would be:
8065 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
8066 **
8067 ** Detect overflow, avoiding _signed_ overflow undefined behavior
8068 ** For a + b * c we check b * c in isolation first, so that addition of a
8069 ** on top has no chance of making us accept a small non-negative number
8070 */
8071 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
8072
8073 if (blockSize <= 0)
8074 return 0;
8075
8076 if (blockSize > (int)(INT_MAX / stretch))
8077 return 0;
8078
8079 {
8080 const int stretchedBlockSize = blockSize * (int)stretch;
8081 const int bytesToAllocate
8082 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
8083 if (bytesToAllocate < 0)
8084 return 0;
8085
8086 return (size_t)bytesToAllocate;
8087 }
8088 }
8089
8090 static XML_Bool FASTCALL
8091 poolGrow(STRING_POOL *pool) {
8092 if (pool->freeBlocks) {
8093 if (pool->start == 0) {
8094 pool->blocks = pool->freeBlocks;
8095 pool->freeBlocks = pool->freeBlocks->next;
8096 pool->blocks->next = NULL;
8097 pool->start = pool->blocks->s;
8098 pool->end = pool->start + pool->blocks->size;
8099 pool->ptr = pool->start;
8100 return XML_TRUE;
8101 }
8102 if (pool->end - pool->start < pool->freeBlocks->size) {
8103 BLOCK *tem = pool->freeBlocks->next;
8104 pool->freeBlocks->next = pool->blocks;
8105 pool->blocks = pool->freeBlocks;
8106 pool->freeBlocks = tem;
8107 memcpy(pool->blocks->s, pool->start,
8108 (pool->end - pool->start) * sizeof(XML_Char));
8109 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
8110 pool->start = pool->blocks->s;
8111 pool->end = pool->start + pool->blocks->size;
8112 return XML_TRUE;
8113 }
8114 }
8115 if (pool->blocks && pool->start == pool->blocks->s) {
8116 BLOCK *temp;
8117 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
8118 size_t bytesToAllocate;
8119
8120 /* NOTE: Needs to be calculated prior to calling `realloc`
8121 to avoid dangling pointers: */
8122 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
8123
8124 if (blockSize < 0) {
8125 /* This condition traps a situation where either more than
8126 * INT_MAX/2 bytes have already been allocated. This isn't
8127 * readily testable, since it is unlikely that an average
8128 * machine will have that much memory, so we exclude it from the
8129 * coverage statistics.
8130 */
8131 return XML_FALSE; /* LCOV_EXCL_LINE */
8132 }
8133
8134 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8135 if (bytesToAllocate == 0)
8136 return XML_FALSE;
8137
8138 temp = REALLOC(pool->parser, pool->blocks, (unsigned)bytesToAllocate);
8139 if (temp == NULL)
8140 return XML_FALSE;
8141 pool->blocks = temp;
8142 pool->blocks->size = blockSize;
8143 pool->ptr = pool->blocks->s + offsetInsideBlock;
8144 pool->start = pool->blocks->s;
8145 pool->end = pool->start + blockSize;
8146 } else {
8147 BLOCK *tem;
8148 int blockSize = (int)(pool->end - pool->start);
8149 size_t bytesToAllocate;
8150
8151 if (blockSize < 0) {
8152 /* This condition traps a situation where either more than
8153 * INT_MAX bytes have already been allocated (which is prevented
8154 * by various pieces of program logic, not least this one, never
8155 * mind the unlikelihood of actually having that much memory) or
8156 * the pool control fields have been corrupted (which could
8157 * conceivably happen in an extremely buggy user handler
8158 * function). Either way it isn't readily testable, so we
8159 * exclude it from the coverage statistics.
8160 */
8161 return XML_FALSE; /* LCOV_EXCL_LINE */
8162 }
8163
8164 if (blockSize < INIT_BLOCK_SIZE)
8165 blockSize = INIT_BLOCK_SIZE;
8166 else {
8167 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
8168 if ((int)((unsigned)blockSize * 2U) < 0) {
8169 return XML_FALSE;
8170 }
8171 blockSize *= 2;
8172 }
8173
8174 bytesToAllocate = poolBytesToAllocateFor(blockSize);
8175 if (bytesToAllocate == 0)
8176 return XML_FALSE;
8177
8178 tem = MALLOC(pool->parser, bytesToAllocate);
8179 if (! tem)
8180 return XML_FALSE;
8181 tem->size = blockSize;
8182 tem->next = pool->blocks;
8183 pool->blocks = tem;
8184 if (pool->ptr != pool->start)
8185 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
8186 pool->ptr = tem->s + (pool->ptr - pool->start);
8187 pool->start = tem->s;
8188 pool->end = tem->s + blockSize;
8189 }
8190 return XML_TRUE;
8191 }
8192
8193 static int FASTCALL
8194 nextScaffoldPart(XML_Parser parser) {
8195 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8196 CONTENT_SCAFFOLD *me;
8197 int next;
8198
8199 if (! dtd->scaffIndex) {
8200 /* Detect and prevent integer overflow.
8201 * The preprocessor guard addresses the "always false" warning
8202 * from -Wtype-limits on platforms where
8203 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8204 #if UINT_MAX >= SIZE_MAX
8205 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
8206 return -1;
8207 }
8208 #endif
8209 dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int));
8210 if (! dtd->scaffIndex)
8211 return -1;
8212 dtd->scaffIndex[0] = 0;
8213 }
8214
8215 // Will casting to int be safe further down?
8216 if (dtd->scaffCount > INT_MAX) {
8217 return -1;
8218 }
8219
8220 if (dtd->scaffCount >= dtd->scaffSize) {
8221 CONTENT_SCAFFOLD *temp;
8222 if (dtd->scaffold) {
8223 /* Detect and prevent integer overflow */
8224 if (dtd->scaffSize > UINT_MAX / 2u) {
8225 return -1;
8226 }
8227 /* Detect and prevent integer overflow.
8228 * The preprocessor guard addresses the "always false" warning
8229 * from -Wtype-limits on platforms where
8230 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8231 #if UINT_MAX >= SIZE_MAX
8232 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
8233 return -1;
8234 }
8235 #endif
8236
8237 temp = REALLOC(parser, dtd->scaffold,
8238 dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
8239 if (temp == NULL)
8240 return -1;
8241 dtd->scaffSize *= 2;
8242 } else {
8243 temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD));
8244 if (temp == NULL)
8245 return -1;
8246 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
8247 }
8248 dtd->scaffold = temp;
8249 }
8250 next = (int)dtd->scaffCount++;
8251 me = &dtd->scaffold[next];
8252 if (dtd->scaffLevel) {
8253 CONTENT_SCAFFOLD *parent
8254 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
8255 if (parent->lastchild) {
8256 dtd->scaffold[parent->lastchild].nextsib = next;
8257 }
8258 if (! parent->childcnt)
8259 parent->firstchild = next;
8260 parent->lastchild = next;
8261 parent->childcnt++;
8262 }
8263 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
8264 return next;
8265 }
8266
8267 static XML_Content *
8268 build_model(XML_Parser parser) {
8269 /* Function build_model transforms the existing parser->m_dtd->scaffold
8270 * array of CONTENT_SCAFFOLD tree nodes into a new array of
8271 * XML_Content tree nodes followed by a gapless list of zero-terminated
8272 * strings. */
8273 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8274 XML_Content *ret;
8275 XML_Char *str; /* the current string writing location */
8276
8277 /* Detect and prevent integer overflow.
8278 * The preprocessor guard addresses the "always false" warning
8279 * from -Wtype-limits on platforms where
8280 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
8281 #if UINT_MAX >= SIZE_MAX
8282 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
8283 return NULL;
8284 }
8285 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
8286 return NULL;
8287 }
8288 #endif
8289 if (dtd->scaffCount * sizeof(XML_Content)
8290 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
8291 return NULL;
8292 }
8293
8294 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
8295 + (dtd->contentStringLen * sizeof(XML_Char)));
8296
8297 // NOTE: We are avoiding MALLOC(..) here to so that
8298 // applications that are not using XML_FreeContentModel but plain
8299 // free(..) or .free_fcn() to free the content model's memory are safe.
8300 ret = parser->m_mem.malloc_fcn(allocsize);
8301 if (! ret)
8302 return NULL;
8303
8304 /* What follows is an iterative implementation (of what was previously done
8305 * recursively in a dedicated function called "build_node". The old recursive
8306 * build_node could be forced into stack exhaustion from input as small as a
8307 * few megabyte, and so that was a security issue. Hence, a function call
8308 * stack is avoided now by resolving recursion.)
8309 *
8310 * The iterative approach works as follows:
8311 *
8312 * - We have two writing pointers, both walking up the result array; one does
8313 * the work, the other creates "jobs" for its colleague to do, and leads
8314 * the way:
8315 *
8316 * - The faster one, pointer jobDest, always leads and writes "what job
8317 * to do" by the other, once they reach that place in the
8318 * array: leader "jobDest" stores the source node array index (relative
8319 * to array dtd->scaffold) in field "numchildren".
8320 *
8321 * - The slower one, pointer dest, looks at the value stored in the
8322 * "numchildren" field (which actually holds a source node array index
8323 * at that time) and puts the real data from dtd->scaffold in.
8324 *
8325 * - Before the loop starts, jobDest writes source array index 0
8326 * (where the root node is located) so that dest will have something to do
8327 * when it starts operation.
8328 *
8329 * - Whenever nodes with children are encountered, jobDest appends
8330 * them as new jobs, in order. As a result, tree node siblings are
8331 * adjacent in the resulting array, for example:
8332 *
8333 * [0] root, has two children
8334 * [1] first child of 0, has three children
8335 * [3] first child of 1, does not have children
8336 * [4] second child of 1, does not have children
8337 * [5] third child of 1, does not have children
8338 * [2] second child of 0, does not have children
8339 *
8340 * Or (the same data) presented in flat array view:
8341 *
8342 * [0] root, has two children
8343 *
8344 * [1] first child of 0, has three children
8345 * [2] second child of 0, does not have children
8346 *
8347 * [3] first child of 1, does not have children
8348 * [4] second child of 1, does not have children
8349 * [5] third child of 1, does not have children
8350 *
8351 * - The algorithm repeats until all target array indices have been processed.
8352 */
8353 XML_Content *dest = ret; /* tree node writing location, moves upwards */
8354 XML_Content *const destLimit = &ret[dtd->scaffCount];
8355 XML_Content *jobDest = ret; /* next free writing location in target array */
8356 str = (XML_Char *)&ret[dtd->scaffCount];
8357
8358 /* Add the starting job, the root node (index 0) of the source tree */
8359 (jobDest++)->numchildren = 0;
8360
8361 for (; dest < destLimit; dest++) {
8362 /* Retrieve source tree array index from job storage */
8363 const int src_node = (int)dest->numchildren;
8364
8365 /* Convert item */
8366 dest->type = dtd->scaffold[src_node].type;
8367 dest->quant = dtd->scaffold[src_node].quant;
8368 if (dest->type == XML_CTYPE_NAME) {
8369 const XML_Char *src;
8370 dest->name = str;
8371 src = dtd->scaffold[src_node].name;
8372 for (;;) {
8373 *str++ = *src;
8374 if (! *src)
8375 break;
8376 src++;
8377 }
8378 dest->numchildren = 0;
8379 dest->children = NULL;
8380 } else {
8381 unsigned int i;
8382 int cn;
8383 dest->name = NULL;
8384 dest->numchildren = dtd->scaffold[src_node].childcnt;
8385 dest->children = jobDest;
8386
8387 /* Append scaffold indices of children to array */
8388 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8389 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8390 (jobDest++)->numchildren = (unsigned int)cn;
8391 }
8392 }
8393
8394 return ret;
8395 }
8396
8397 static ELEMENT_TYPE *
8398 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8399 const char *end) {
8400 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8401 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8402 ELEMENT_TYPE *ret;
8403
8404 if (! name)
8405 return NULL;
8406 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8407 sizeof(ELEMENT_TYPE));
8408 if (! ret)
8409 return NULL;
8410 if (ret->name != name)
8411 poolDiscard(&dtd->pool);
8412 else {
8413 poolFinish(&dtd->pool);
8414 if (! setElementTypePrefix(parser, ret))
8415 return NULL;
8416 }
8417 return ret;
8418 }
8419
8420 static XML_Char *
8421 copyString(const XML_Char *s, XML_Parser parser) {
8422 size_t charsRequired = 0;
8423 XML_Char *result;
8424
8425 /* First determine how long the string is */
8426 while (s[charsRequired] != 0) {
8427 charsRequired++;
8428 }
8429 /* Include the terminator */
8430 charsRequired++;
8431
8432 /* Now allocate space for the copy */
8433 result = MALLOC(parser, charsRequired * sizeof(XML_Char));
8434 if (result == NULL)
8435 return NULL;
8436 /* Copy the original into place */
8437 memcpy(result, s, charsRequired * sizeof(XML_Char));
8438 return result;
8439 }
8440
8441 #if XML_GE == 1
8442
8443 static float
8444 accountingGetCurrentAmplification(XML_Parser rootParser) {
8445 // 1.........1.........12 => 22
8446 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8447 const XmlBigCount countBytesOutput
8448 = rootParser->m_accounting.countBytesDirect
8449 + rootParser->m_accounting.countBytesIndirect;
8450 const float amplificationFactor
8451 = rootParser->m_accounting.countBytesDirect
8452 ? ((float)countBytesOutput
8453 / (float)(rootParser->m_accounting.countBytesDirect))
8454 : ((float)(lenOfShortestInclude
8455 + rootParser->m_accounting.countBytesIndirect)
8456 / (float)lenOfShortestInclude);
8457 assert(! rootParser->m_parentParser);
8458 return amplificationFactor;
8459 }
8460
8461 static void
8462 accountingReportStats(XML_Parser originParser, const char *epilog) {
8463 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8464 assert(! rootParser->m_parentParser);
8465
8466 if (rootParser->m_accounting.debugLevel == 0u) {
8467 return;
8468 }
8469
8470 const float amplificationFactor
8471 = accountingGetCurrentAmplification(rootParser);
8472 fprintf(stderr,
8473 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8474 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8475 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
8476 rootParser->m_accounting.countBytesIndirect,
8477 (double)amplificationFactor, epilog);
8478 }
8479
8480 static void
8481 accountingOnAbort(XML_Parser originParser) {
8482 accountingReportStats(originParser, " ABORTING\n");
8483 }
8484
8485 static void
8486 accountingReportDiff(XML_Parser rootParser,
8487 unsigned int levelsAwayFromRootParser, const char *before,
8488 const char *after, ptrdiff_t bytesMore, int source_line,
8489 enum XML_Account account) {
8490 assert(! rootParser->m_parentParser);
8491
8492 fprintf(stderr,
8493 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8494 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8495 levelsAwayFromRootParser, source_line, 10, "");
8496
8497 const char ellipis[] = "[..]";
8498 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8499 const unsigned int contextLength = 10;
8500
8501 /* Note: Performance is of no concern here */
8502 const char *walker = before;
8503 if ((rootParser->m_accounting.debugLevel >= 3u)
8504 || (after - before)
8505 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8506 for (; walker < after; walker++) {
8507 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8508 }
8509 } else {
8510 for (; walker < before + contextLength; walker++) {
8511 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8512 }
8513 fprintf(stderr, ellipis);
8514 walker = after - contextLength;
8515 for (; walker < after; walker++) {
8516 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8517 }
8518 }
8519 fprintf(stderr, "\"\n");
8520 }
8521
8522 static XML_Bool
8523 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8524 const char *after, int source_line,
8525 enum XML_Account account) {
8526 /* Note: We need to check the token type *first* to be sure that
8527 * we can even access variable <after>, safely.
8528 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8529 switch (tok) {
8530 case XML_TOK_INVALID:
8531 case XML_TOK_PARTIAL:
8532 case XML_TOK_PARTIAL_CHAR:
8533 case XML_TOK_NONE:
8534 return XML_TRUE;
8535 }
8536
8537 if (account == XML_ACCOUNT_NONE)
8538 return XML_TRUE; /* because these bytes have been accounted for, already */
8539
8540 unsigned int levelsAwayFromRootParser;
8541 const XML_Parser rootParser
8542 = getRootParserOf(originParser, &levelsAwayFromRootParser);
8543 assert(! rootParser->m_parentParser);
8544
8545 const int isDirect
8546 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8547 const ptrdiff_t bytesMore = after - before;
8548
8549 XmlBigCount *const additionTarget
8550 = isDirect ? &rootParser->m_accounting.countBytesDirect
8551 : &rootParser->m_accounting.countBytesIndirect;
8552
8553 /* Detect and avoid integer overflow */
8554 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8555 return XML_FALSE;
8556 *additionTarget += bytesMore;
8557
8558 const XmlBigCount countBytesOutput
8559 = rootParser->m_accounting.countBytesDirect
8560 + rootParser->m_accounting.countBytesIndirect;
8561 const float amplificationFactor
8562 = accountingGetCurrentAmplification(rootParser);
8563 const XML_Bool tolerated
8564 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8565 || (amplificationFactor
8566 <= rootParser->m_accounting.maximumAmplificationFactor);
8567
8568 if (rootParser->m_accounting.debugLevel >= 2u) {
8569 accountingReportStats(rootParser, "");
8570 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8571 bytesMore, source_line, account);
8572 }
8573
8574 return tolerated;
8575 }
8576
8577 unsigned long long
8578 testingAccountingGetCountBytesDirect(XML_Parser parser) {
8579 if (! parser)
8580 return 0;
8581 return parser->m_accounting.countBytesDirect;
8582 }
8583
8584 unsigned long long
8585 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8586 if (! parser)
8587 return 0;
8588 return parser->m_accounting.countBytesIndirect;
8589 }
8590
8591 static void
8592 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8593 const char *action, int sourceLine) {
8594 assert(! rootParser->m_parentParser);
8595 if (rootParser->m_entity_stats.debugLevel == 0u)
8596 return;
8597
8598 # if defined(XML_UNICODE)
8599 const char *const entityName = "[..]";
8600 # else
8601 const char *const entityName = entity->name;
8602 # endif
8603
8604 fprintf(
8605 stderr,
8606 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8607 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8608 rootParser->m_entity_stats.currentDepth,
8609 rootParser->m_entity_stats.maximumDepthSeen,
8610 ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8611 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8612 sourceLine);
8613 }
8614
8615 static void
8616 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8617 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8618 assert(! rootParser->m_parentParser);
8619
8620 rootParser->m_entity_stats.countEverOpened++;
8621 rootParser->m_entity_stats.currentDepth++;
8622 if (rootParser->m_entity_stats.currentDepth
8623 > rootParser->m_entity_stats.maximumDepthSeen) {
8624 rootParser->m_entity_stats.maximumDepthSeen++;
8625 }
8626
8627 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8628 }
8629
8630 static void
8631 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8632 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8633 assert(! rootParser->m_parentParser);
8634
8635 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8636 rootParser->m_entity_stats.currentDepth--;
8637 }
8638
8639 #endif /* XML_GE == 1 */
8640
8641 static XML_Parser
8642 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8643 XML_Parser rootParser = parser;
8644 unsigned int stepsTakenUpwards = 0;
8645 while (rootParser->m_parentParser) {
8646 rootParser = rootParser->m_parentParser;
8647 stepsTakenUpwards++;
8648 }
8649 assert(! rootParser->m_parentParser);
8650 if (outLevelDiff != NULL) {
8651 *outLevelDiff = stepsTakenUpwards;
8652 }
8653 return rootParser;
8654 }
8655
8656 #if XML_GE == 1
8657
8658 const char *
8659 unsignedCharToPrintable(unsigned char c) {
8660 switch (c) {
8661 case 0:
8662 return "\\0";
8663 case 1:
8664 return "\\x1";
8665 case 2:
8666 return "\\x2";
8667 case 3:
8668 return "\\x3";
8669 case 4:
8670 return "\\x4";
8671 case 5:
8672 return "\\x5";
8673 case 6:
8674 return "\\x6";
8675 case 7:
8676 return "\\x7";
8677 case 8:
8678 return "\\x8";
8679 case 9:
8680 return "\\t";
8681 case 10:
8682 return "\\n";
8683 case 11:
8684 return "\\xB";
8685 case 12:
8686 return "\\xC";
8687 case 13:
8688 return "\\r";
8689 case 14:
8690 return "\\xE";
8691 case 15:
8692 return "\\xF";
8693 case 16:
8694 return "\\x10";
8695 case 17:
8696 return "\\x11";
8697 case 18:
8698 return "\\x12";
8699 case 19:
8700 return "\\x13";
8701 case 20:
8702 return "\\x14";
8703 case 21:
8704 return "\\x15";
8705 case 22:
8706 return "\\x16";
8707 case 23:
8708 return "\\x17";
8709 case 24:
8710 return "\\x18";
8711 case 25:
8712 return "\\x19";
8713 case 26:
8714 return "\\x1A";
8715 case 27:
8716 return "\\x1B";
8717 case 28:
8718 return "\\x1C";
8719 case 29:
8720 return "\\x1D";
8721 case 30:
8722 return "\\x1E";
8723 case 31:
8724 return "\\x1F";
8725 case 32:
8726 return " ";
8727 case 33:
8728 return "!";
8729 case 34:
8730 return "\\\"";
8731 case 35:
8732 return "#";
8733 case 36:
8734 return "$";
8735 case 37:
8736 return "%";
8737 case 38:
8738 return "&";
8739 case 39:
8740 return "'";
8741 case 40:
8742 return "(";
8743 case 41:
8744 return ")";
8745 case 42:
8746 return "*";
8747 case 43:
8748 return "+";
8749 case 44:
8750 return ",";
8751 case 45:
8752 return "-";
8753 case 46:
8754 return ".";
8755 case 47:
8756 return "/";
8757 case 48:
8758 return "0";
8759 case 49:
8760 return "1";
8761 case 50:
8762 return "2";
8763 case 51:
8764 return "3";
8765 case 52:
8766 return "4";
8767 case 53:
8768 return "5";
8769 case 54:
8770 return "6";
8771 case 55:
8772 return "7";
8773 case 56:
8774 return "8";
8775 case 57:
8776 return "9";
8777 case 58:
8778 return ":";
8779 case 59:
8780 return ";";
8781 case 60:
8782 return "<";
8783 case 61:
8784 return "=";
8785 case 62:
8786 return ">";
8787 case 63:
8788 return "?";
8789 case 64:
8790 return "@";
8791 case 65:
8792 return "A";
8793 case 66:
8794 return "B";
8795 case 67:
8796 return "C";
8797 case 68:
8798 return "D";
8799 case 69:
8800 return "E";
8801 case 70:
8802 return "F";
8803 case 71:
8804 return "G";
8805 case 72:
8806 return "H";
8807 case 73:
8808 return "I";
8809 case 74:
8810 return "J";
8811 case 75:
8812 return "K";
8813 case 76:
8814 return "L";
8815 case 77:
8816 return "M";
8817 case 78:
8818 return "N";
8819 case 79:
8820 return "O";
8821 case 80:
8822 return "P";
8823 case 81:
8824 return "Q";
8825 case 82:
8826 return "R";
8827 case 83:
8828 return "S";
8829 case 84:
8830 return "T";
8831 case 85:
8832 return "U";
8833 case 86:
8834 return "V";
8835 case 87:
8836 return "W";
8837 case 88:
8838 return "X";
8839 case 89:
8840 return "Y";
8841 case 90:
8842 return "Z";
8843 case 91:
8844 return "[";
8845 case 92:
8846 return "\\\\";
8847 case 93:
8848 return "]";
8849 case 94:
8850 return "^";
8851 case 95:
8852 return "_";
8853 case 96:
8854 return "`";
8855 case 97:
8856 return "a";
8857 case 98:
8858 return "b";
8859 case 99:
8860 return "c";
8861 case 100:
8862 return "d";
8863 case 101:
8864 return "e";
8865 case 102:
8866 return "f";
8867 case 103:
8868 return "g";
8869 case 104:
8870 return "h";
8871 case 105:
8872 return "i";
8873 case 106:
8874 return "j";
8875 case 107:
8876 return "k";
8877 case 108:
8878 return "l";
8879 case 109:
8880 return "m";
8881 case 110:
8882 return "n";
8883 case 111:
8884 return "o";
8885 case 112:
8886 return "p";
8887 case 113:
8888 return "q";
8889 case 114:
8890 return "r";
8891 case 115:
8892 return "s";
8893 case 116:
8894 return "t";
8895 case 117:
8896 return "u";
8897 case 118:
8898 return "v";
8899 case 119:
8900 return "w";
8901 case 120:
8902 return "x";
8903 case 121:
8904 return "y";
8905 case 122:
8906 return "z";
8907 case 123:
8908 return "{";
8909 case 124:
8910 return "|";
8911 case 125:
8912 return "}";
8913 case 126:
8914 return "~";
8915 case 127:
8916 return "\\x7F";
8917 case 128:
8918 return "\\x80";
8919 case 129:
8920 return "\\x81";
8921 case 130:
8922 return "\\x82";
8923 case 131:
8924 return "\\x83";
8925 case 132:
8926 return "\\x84";
8927 case 133:
8928 return "\\x85";
8929 case 134:
8930 return "\\x86";
8931 case 135:
8932 return "\\x87";
8933 case 136:
8934 return "\\x88";
8935 case 137:
8936 return "\\x89";
8937 case 138:
8938 return "\\x8A";
8939 case 139:
8940 return "\\x8B";
8941 case 140:
8942 return "\\x8C";
8943 case 141:
8944 return "\\x8D";
8945 case 142:
8946 return "\\x8E";
8947 case 143:
8948 return "\\x8F";
8949 case 144:
8950 return "\\x90";
8951 case 145:
8952 return "\\x91";
8953 case 146:
8954 return "\\x92";
8955 case 147:
8956 return "\\x93";
8957 case 148:
8958 return "\\x94";
8959 case 149:
8960 return "\\x95";
8961 case 150:
8962 return "\\x96";
8963 case 151:
8964 return "\\x97";
8965 case 152:
8966 return "\\x98";
8967 case 153:
8968 return "\\x99";
8969 case 154:
8970 return "\\x9A";
8971 case 155:
8972 return "\\x9B";
8973 case 156:
8974 return "\\x9C";
8975 case 157:
8976 return "\\x9D";
8977 case 158:
8978 return "\\x9E";
8979 case 159:
8980 return "\\x9F";
8981 case 160:
8982 return "\\xA0";
8983 case 161:
8984 return "\\xA1";
8985 case 162:
8986 return "\\xA2";
8987 case 163:
8988 return "\\xA3";
8989 case 164:
8990 return "\\xA4";
8991 case 165:
8992 return "\\xA5";
8993 case 166:
8994 return "\\xA6";
8995 case 167:
8996 return "\\xA7";
8997 case 168:
8998 return "\\xA8";
8999 case 169:
9000 return "\\xA9";
9001 case 170:
9002 return "\\xAA";
9003 case 171:
9004 return "\\xAB";
9005 case 172:
9006 return "\\xAC";
9007 case 173:
9008 return "\\xAD";
9009 case 174:
9010 return "\\xAE";
9011 case 175:
9012 return "\\xAF";
9013 case 176:
9014 return "\\xB0";
9015 case 177:
9016 return "\\xB1";
9017 case 178:
9018 return "\\xB2";
9019 case 179:
9020 return "\\xB3";
9021 case 180:
9022 return "\\xB4";
9023 case 181:
9024 return "\\xB5";
9025 case 182:
9026 return "\\xB6";
9027 case 183:
9028 return "\\xB7";
9029 case 184:
9030 return "\\xB8";
9031 case 185:
9032 return "\\xB9";
9033 case 186:
9034 return "\\xBA";
9035 case 187:
9036 return "\\xBB";
9037 case 188:
9038 return "\\xBC";
9039 case 189:
9040 return "\\xBD";
9041 case 190:
9042 return "\\xBE";
9043 case 191:
9044 return "\\xBF";
9045 case 192:
9046 return "\\xC0";
9047 case 193:
9048 return "\\xC1";
9049 case 194:
9050 return "\\xC2";
9051 case 195:
9052 return "\\xC3";
9053 case 196:
9054 return "\\xC4";
9055 case 197:
9056 return "\\xC5";
9057 case 198:
9058 return "\\xC6";
9059 case 199:
9060 return "\\xC7";
9061 case 200:
9062 return "\\xC8";
9063 case 201:
9064 return "\\xC9";
9065 case 202:
9066 return "\\xCA";
9067 case 203:
9068 return "\\xCB";
9069 case 204:
9070 return "\\xCC";
9071 case 205:
9072 return "\\xCD";
9073 case 206:
9074 return "\\xCE";
9075 case 207:
9076 return "\\xCF";
9077 case 208:
9078 return "\\xD0";
9079 case 209:
9080 return "\\xD1";
9081 case 210:
9082 return "\\xD2";
9083 case 211:
9084 return "\\xD3";
9085 case 212:
9086 return "\\xD4";
9087 case 213:
9088 return "\\xD5";
9089 case 214:
9090 return "\\xD6";
9091 case 215:
9092 return "\\xD7";
9093 case 216:
9094 return "\\xD8";
9095 case 217:
9096 return "\\xD9";
9097 case 218:
9098 return "\\xDA";
9099 case 219:
9100 return "\\xDB";
9101 case 220:
9102 return "\\xDC";
9103 case 221:
9104 return "\\xDD";
9105 case 222:
9106 return "\\xDE";
9107 case 223:
9108 return "\\xDF";
9109 case 224:
9110 return "\\xE0";
9111 case 225:
9112 return "\\xE1";
9113 case 226:
9114 return "\\xE2";
9115 case 227:
9116 return "\\xE3";
9117 case 228:
9118 return "\\xE4";
9119 case 229:
9120 return "\\xE5";
9121 case 230:
9122 return "\\xE6";
9123 case 231:
9124 return "\\xE7";
9125 case 232:
9126 return "\\xE8";
9127 case 233:
9128 return "\\xE9";
9129 case 234:
9130 return "\\xEA";
9131 case 235:
9132 return "\\xEB";
9133 case 236:
9134 return "\\xEC";
9135 case 237:
9136 return "\\xED";
9137 case 238:
9138 return "\\xEE";
9139 case 239:
9140 return "\\xEF";
9141 case 240:
9142 return "\\xF0";
9143 case 241:
9144 return "\\xF1";
9145 case 242:
9146 return "\\xF2";
9147 case 243:
9148 return "\\xF3";
9149 case 244:
9150 return "\\xF4";
9151 case 245:
9152 return "\\xF5";
9153 case 246:
9154 return "\\xF6";
9155 case 247:
9156 return "\\xF7";
9157 case 248:
9158 return "\\xF8";
9159 case 249:
9160 return "\\xF9";
9161 case 250:
9162 return "\\xFA";
9163 case 251:
9164 return "\\xFB";
9165 case 252:
9166 return "\\xFC";
9167 case 253:
9168 return "\\xFD";
9169 case 254:
9170 return "\\xFE";
9171 case 255:
9172 return "\\xFF";
9173 // LCOV_EXCL_START
9174 default:
9175 assert(0); /* never gets here */
9176 return "dead code";
9177 }
9178 assert(0); /* never gets here */
9179 // LCOV_EXCL_STOP
9180 }
9181
9182 #endif /* XML_GE == 1 */
9183
9184 static unsigned long
9185 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
9186 const char *const valueOrNull = getenv(variableName);
9187 if (valueOrNull == NULL) {
9188 return defaultDebugLevel;
9189 }
9190 const char *const value = valueOrNull;
9191
9192 errno = 0;
9193 char *afterValue = NULL;
9194 unsigned long debugLevel = strtoul(value, &afterValue, 10);
9195 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
9196 errno = 0;
9197 return defaultDebugLevel;
9198 }
9199
9200 return debugLevel;
9201 }
9202