1 /* d19ae032c224863c1527ba44d228cc34b99192c3a4c5a27af1f4e054d45ee031 (2.7.1+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
44 Licensed under the MIT license:
45
46 Permission is hereby granted, free of charge, to any person obtaining
47 a copy of this software and associated documentation files (the
48 "Software"), to deal in the Software without restriction, including
49 without limitation the rights to use, copy, modify, merge, publish,
50 distribute, sublicense, and/or sell copies of the Software, and to permit
51 persons to whom the Software is furnished to do so, subject to the
52 following conditions:
53
54 The above copyright notice and this permission notice shall be included
55 in all copies or substantial portions of the Software.
56
57 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
58 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
59 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
60 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
61 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
62 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
63 USE OR OTHER DEALINGS IN THE SOFTWARE.
64 */
65
66 #define XML_BUILDING_EXPAT 1
67
68 #include "expat_config.h"
69
70 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
71 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
72 #endif
73
74 #if defined(XML_DTD) && XML_GE == 0
75 # error Either undefine XML_DTD or define XML_GE to 1.
76 #endif
77
78 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
79 || (XML_CONTEXT_BYTES + 0 < 0)
80 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
81 #endif
82
83 #if defined(HAVE_SYSCALL_GETRANDOM)
84 # if ! defined(_GNU_SOURCE)
85 # define _GNU_SOURCE 1 /* syscall prototype */
86 # endif
87 #endif
88
89 #ifdef _WIN32
90 /* force stdlib to define rand_s() */
91 # if ! defined(_CRT_RAND_S)
92 # define _CRT_RAND_S
93 # endif
94 #endif
95
96 #include <stdbool.h>
97 #include <stddef.h>
98 #include <string.h> /* memset(), memcpy() */
99 #include <assert.h>
100 #include <limits.h> /* UINT_MAX */
101 #include <stdio.h> /* fprintf */
102 #include <stdlib.h> /* getenv, rand_s */
103 #include <stdint.h> /* uintptr_t */
104 #include <math.h> /* isnan */
105
106 #ifdef _WIN32
107 # define getpid GetCurrentProcessId
108 #else
109 # include <sys/time.h> /* gettimeofday() */
110 # include <sys/types.h> /* getpid() */
111 # include <unistd.h> /* getpid() */
112 # include <fcntl.h> /* O_RDONLY */
113 # include <errno.h>
114 #endif
115
116 #ifdef _WIN32
117 # include "winconfig.h"
118 #endif
119
120 #include "ascii.h"
121 #include "expat.h"
122 #include "siphash.h"
123
124 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
125 # if defined(HAVE_GETRANDOM)
126 # include <sys/random.h> /* getrandom */
127 # else
128 # include <unistd.h> /* syscall */
129 # include <sys/syscall.h> /* SYS_getrandom */
130 # endif
131 # if ! defined(GRND_NONBLOCK)
132 # define GRND_NONBLOCK 0x0001
133 # endif /* defined(GRND_NONBLOCK) */
134 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
135
136 #if defined(HAVE_LIBBSD) \
137 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
138 # include <bsd/stdlib.h>
139 #endif
140
141 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
142 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
143 #endif
144
145 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
146 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
147 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
148 && ! defined(XML_POOR_ENTROPY)
149 # error You do not have support for any sources of high quality entropy \
150 enabled. For end user security, that is probably not what you want. \
151 \
152 Your options include: \
153 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
154 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
155 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
156 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
157 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
158 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
159 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
160 * Windows >=Vista (rand_s): _WIN32. \
161 \
162 If insist on not using any of these, bypass this error by defining \
163 XML_POOR_ENTROPY; you have been warned. \
164 \
165 If you have reasons to patch this detection code away or need changes \
166 to the build system, please open a bug. Thank you!
167 #endif
168
169 #ifdef XML_UNICODE
170 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
171 # define XmlConvert XmlUtf16Convert
172 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
173 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
174 # define XmlEncode XmlUtf16Encode
175 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
176 typedef unsigned short ICHAR;
177 #else
178 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
179 # define XmlConvert XmlUtf8Convert
180 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
181 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
182 # define XmlEncode XmlUtf8Encode
183 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
184 typedef char ICHAR;
185 #endif
186
187 #ifndef XML_NS
188
189 # define XmlInitEncodingNS XmlInitEncoding
190 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
191 # undef XmlGetInternalEncodingNS
192 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
193 # define XmlParseXmlDeclNS XmlParseXmlDecl
194
195 #endif
196
197 #ifdef XML_UNICODE
198
199 # ifdef XML_UNICODE_WCHAR_T
200 # define XML_T(x) (const wchar_t) x
201 # define XML_L(x) L##x
202 # else
203 # define XML_T(x) (const unsigned short)x
204 # define XML_L(x) x
205 # endif
206
207 #else
208
209 # define XML_T(x) x
210 # define XML_L(x) x
211
212 #endif
213
214 /* Round up n to be a multiple of sz, where sz is a power of 2. */
215 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
216
217 /* Do safe (NULL-aware) pointer arithmetic */
218 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
219
220 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
221
222 #include "internal.h"
223 #include "xmltok.h"
224 #include "xmlrole.h"
225
226 typedef const XML_Char *KEY;
227
228 typedef struct {
229 KEY name;
230 } NAMED;
231
232 typedef struct {
233 NAMED **v;
234 unsigned char power;
235 size_t size;
236 size_t used;
237 const XML_Memory_Handling_Suite *mem;
238 } HASH_TABLE;
239
240 static size_t keylen(KEY s);
241
242 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
243
244 /* For probing (after a collision) we need a step size relative prime
245 to the hash table size, which is a power of 2. We use double-hashing,
246 since we can calculate a second hash value cheaply by taking those bits
247 of the first hash value that were discarded (masked out) when the table
248 index was calculated: index = hash & mask, where mask = table->size - 1.
249 We limit the maximum step size to table->size / 4 (mask >> 2) and make
250 it odd, since odd numbers are always relative prime to a power of 2.
251 */
252 #define SECOND_HASH(hash, mask, power) \
253 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
254 #define PROBE_STEP(hash, mask, power) \
255 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
256
257 typedef struct {
258 NAMED **p;
259 NAMED **end;
260 } HASH_TABLE_ITER;
261
262 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
263 #define INIT_DATA_BUF_SIZE 1024
264 #define INIT_ATTS_SIZE 16
265 #define INIT_ATTS_VERSION 0xFFFFFFFF
266 #define INIT_BLOCK_SIZE 1024
267 #define INIT_BUFFER_SIZE 1024
268
269 #define EXPAND_SPARE 24
270
271 typedef struct binding {
272 struct prefix *prefix;
273 struct binding *nextTagBinding;
274 struct binding *prevPrefixBinding;
275 const struct attribute_id *attId;
276 XML_Char *uri;
277 int uriLen;
278 int uriAlloc;
279 } BINDING;
280
281 typedef struct prefix {
282 const XML_Char *name;
283 BINDING *binding;
284 } PREFIX;
285
286 typedef struct {
287 const XML_Char *str;
288 const XML_Char *localPart;
289 const XML_Char *prefix;
290 int strLen;
291 int uriLen;
292 int prefixLen;
293 } TAG_NAME;
294
295 /* TAG represents an open element.
296 The name of the element is stored in both the document and API
297 encodings. The memory buffer 'buf' is a separately-allocated
298 memory area which stores the name. During the XML_Parse()/
299 XML_ParseBuffer() when the element is open, the memory for the 'raw'
300 version of the name (in the document encoding) is shared with the
301 document buffer. If the element is open across calls to
302 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
303 contain the 'raw' name as well.
304
305 A parser reuses these structures, maintaining a list of allocated
306 TAG objects in a free list.
307 */
308 typedef struct tag {
309 struct tag *parent; /* parent of this element */
310 const char *rawName; /* tagName in the original encoding */
311 int rawNameLength;
312 TAG_NAME name; /* tagName in the API encoding */
313 char *buf; /* buffer for name components */
314 char *bufEnd; /* end of the buffer */
315 BINDING *bindings;
316 } TAG;
317
318 typedef struct {
319 const XML_Char *name;
320 const XML_Char *textPtr;
321 int textLen; /* length in XML_Chars */
322 int processed; /* # of processed bytes - when suspended */
323 const XML_Char *systemId;
324 const XML_Char *base;
325 const XML_Char *publicId;
326 const XML_Char *notation;
327 XML_Bool open;
328 XML_Bool hasMore; /* true if entity has not been completely processed */
329 /* An entity can be open while being already completely processed (hasMore ==
330 XML_FALSE). The reason is the delayed closing of entities until their inner
331 entities are processed and closed */
332 XML_Bool is_param;
333 XML_Bool is_internal; /* true if declared in internal subset outside PE */
334 } ENTITY;
335
336 typedef struct {
337 enum XML_Content_Type type;
338 enum XML_Content_Quant quant;
339 const XML_Char *name;
340 int firstchild;
341 int lastchild;
342 int childcnt;
343 int nextsib;
344 } CONTENT_SCAFFOLD;
345
346 #define INIT_SCAFFOLD_ELEMENTS 32
347
348 typedef struct block {
349 struct block *next;
350 int size;
351 XML_Char s[1];
352 } BLOCK;
353
354 typedef struct {
355 BLOCK *blocks;
356 BLOCK *freeBlocks;
357 const XML_Char *end;
358 XML_Char *ptr;
359 XML_Char *start;
360 const XML_Memory_Handling_Suite *mem;
361 } STRING_POOL;
362
363 /* The XML_Char before the name is used to determine whether
364 an attribute has been specified. */
365 typedef struct attribute_id {
366 XML_Char *name;
367 PREFIX *prefix;
368 XML_Bool maybeTokenized;
369 XML_Bool xmlns;
370 } ATTRIBUTE_ID;
371
372 typedef struct {
373 const ATTRIBUTE_ID *id;
374 XML_Bool isCdata;
375 const XML_Char *value;
376 } DEFAULT_ATTRIBUTE;
377
378 typedef struct {
379 unsigned long version;
380 unsigned long hash;
381 const XML_Char *uriName;
382 } NS_ATT;
383
384 typedef struct {
385 const XML_Char *name;
386 PREFIX *prefix;
387 const ATTRIBUTE_ID *idAtt;
388 int nDefaultAtts;
389 int allocDefaultAtts;
390 DEFAULT_ATTRIBUTE *defaultAtts;
391 } ELEMENT_TYPE;
392
393 typedef struct {
394 HASH_TABLE generalEntities;
395 HASH_TABLE elementTypes;
396 HASH_TABLE attributeIds;
397 HASH_TABLE prefixes;
398 STRING_POOL pool;
399 STRING_POOL entityValuePool;
400 /* false once a parameter entity reference has been skipped */
401 XML_Bool keepProcessing;
402 /* true once an internal or external PE reference has been encountered;
403 this includes the reference to an external subset */
404 XML_Bool hasParamEntityRefs;
405 XML_Bool standalone;
406 #ifdef XML_DTD
407 /* indicates if external PE has been read */
408 XML_Bool paramEntityRead;
409 HASH_TABLE paramEntities;
410 #endif /* XML_DTD */
411 PREFIX defaultPrefix;
412 /* === scaffolding for building content model === */
413 XML_Bool in_eldecl;
414 CONTENT_SCAFFOLD *scaffold;
415 unsigned contentStringLen;
416 unsigned scaffSize;
417 unsigned scaffCount;
418 int scaffLevel;
419 int *scaffIndex;
420 } DTD;
421
422 enum EntityType {
423 ENTITY_INTERNAL,
424 ENTITY_ATTRIBUTE,
425 ENTITY_VALUE,
426 };
427
428 typedef struct open_internal_entity {
429 const char *internalEventPtr;
430 const char *internalEventEndPtr;
431 struct open_internal_entity *next;
432 ENTITY *entity;
433 int startTagLevel;
434 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
435 enum EntityType type;
436 } OPEN_INTERNAL_ENTITY;
437
438 enum XML_Account {
439 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
440 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
441 expansion */
442 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
443 };
444
445 #if XML_GE == 1
446 typedef unsigned long long XmlBigCount;
447 typedef struct accounting {
448 XmlBigCount countBytesDirect;
449 XmlBigCount countBytesIndirect;
450 unsigned long debugLevel;
451 float maximumAmplificationFactor; // >=1.0
452 unsigned long long activationThresholdBytes;
453 } ACCOUNTING;
454
455 typedef struct entity_stats {
456 unsigned int countEverOpened;
457 unsigned int currentDepth;
458 unsigned int maximumDepthSeen;
459 unsigned long debugLevel;
460 } ENTITY_STATS;
461 #endif /* XML_GE == 1 */
462
463 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
464 const char *end, const char **endPtr);
465
466 static Processor prologProcessor;
467 static Processor prologInitProcessor;
468 static Processor contentProcessor;
469 static Processor cdataSectionProcessor;
470 #ifdef XML_DTD
471 static Processor ignoreSectionProcessor;
472 static Processor externalParEntProcessor;
473 static Processor externalParEntInitProcessor;
474 static Processor entityValueProcessor;
475 static Processor entityValueInitProcessor;
476 #endif /* XML_DTD */
477 static Processor epilogProcessor;
478 static Processor errorProcessor;
479 static Processor externalEntityInitProcessor;
480 static Processor externalEntityInitProcessor2;
481 static Processor externalEntityInitProcessor3;
482 static Processor externalEntityContentProcessor;
483 static Processor internalEntityProcessor;
484
485 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
486 const XML_Char *encodingName);
487 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
488 const char *s, const char *next);
489 static enum XML_Error initializeEncoding(XML_Parser parser);
490 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
491 const char *s, const char *end, int tok,
492 const char *next, const char **nextPtr,
493 XML_Bool haveMore, XML_Bool allowClosingDoctype,
494 enum XML_Account account);
495 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
496 XML_Bool betweenDecl, enum EntityType type);
497 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
498 const ENCODING *enc, const char *start,
499 const char *end, const char **endPtr,
500 XML_Bool haveMore, enum XML_Account account);
501 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
502 const char **startPtr, const char *end,
503 const char **nextPtr, XML_Bool haveMore,
504 enum XML_Account account);
505 #ifdef XML_DTD
506 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
507 const char **startPtr, const char *end,
508 const char **nextPtr, XML_Bool haveMore);
509 #endif /* XML_DTD */
510
511 static void freeBindings(XML_Parser parser, BINDING *bindings);
512 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
513 const char *attStr, TAG_NAME *tagNamePtr,
514 BINDING **bindingsPtr,
515 enum XML_Account account);
516 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
517 const ATTRIBUTE_ID *attId, const XML_Char *uri,
518 BINDING **bindingsPtr);
519 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
520 XML_Bool isCdata, XML_Bool isId,
521 const XML_Char *value, XML_Parser parser);
522 static enum XML_Error storeAttributeValue(XML_Parser parser,
523 const ENCODING *enc, XML_Bool isCdata,
524 const char *ptr, const char *end,
525 STRING_POOL *pool,
526 enum XML_Account account);
527 static enum XML_Error
528 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
529 const char *ptr, const char *end, STRING_POOL *pool,
530 enum XML_Account account, const char **nextPtr);
531 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
532 const char *start, const char *end);
533 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
534 #if XML_GE == 1
535 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
536 const char *start, const char *end,
537 enum XML_Account account,
538 const char **nextPtr);
539 static enum XML_Error callStoreEntityValue(XML_Parser parser,
540 const ENCODING *enc,
541 const char *start, const char *end,
542 enum XML_Account account);
543 #else
544 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
545 #endif
546 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
547 const char *start, const char *end);
548 static int reportComment(XML_Parser parser, const ENCODING *enc,
549 const char *start, const char *end);
550 static void reportDefault(XML_Parser parser, const ENCODING *enc,
551 const char *start, const char *end);
552
553 static const XML_Char *getContext(XML_Parser parser);
554 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
555
556 static void FASTCALL normalizePublicId(XML_Char *s);
557
558 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
559 /* do not call if m_parentParser != NULL */
560 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
561 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
562 const XML_Memory_Handling_Suite *ms);
563 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
564 const XML_Memory_Handling_Suite *ms);
565 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
566 STRING_POOL *newPool, const HASH_TABLE *oldTable);
567 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
568 size_t createSize);
569 static void FASTCALL hashTableInit(HASH_TABLE *table,
570 const XML_Memory_Handling_Suite *ms);
571 static void FASTCALL hashTableClear(HASH_TABLE *table);
572 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
573 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
574 const HASH_TABLE *table);
575 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
576
577 static void FASTCALL poolInit(STRING_POOL *pool,
578 const XML_Memory_Handling_Suite *ms);
579 static void FASTCALL poolClear(STRING_POOL *pool);
580 static void FASTCALL poolDestroy(STRING_POOL *pool);
581 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
582 const char *ptr, const char *end);
583 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
584 const char *ptr, const char *end);
585 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
586 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
587 const XML_Char *s);
588 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
589 int n);
590 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
591 const XML_Char *s);
592
593 static int FASTCALL nextScaffoldPart(XML_Parser parser);
594 static XML_Content *build_model(XML_Parser parser);
595 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
596 const char *ptr, const char *end);
597
598 static XML_Char *copyString(const XML_Char *s,
599 const XML_Memory_Handling_Suite *memsuite);
600
601 static unsigned long generate_hash_secret_salt(XML_Parser parser);
602 static XML_Bool startParsing(XML_Parser parser);
603
604 static XML_Parser parserCreate(const XML_Char *encodingName,
605 const XML_Memory_Handling_Suite *memsuite,
606 const XML_Char *nameSep, DTD *dtd);
607
608 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
609
610 #if XML_GE == 1
611 static float accountingGetCurrentAmplification(XML_Parser rootParser);
612 static void accountingReportStats(XML_Parser originParser, const char *epilog);
613 static void accountingOnAbort(XML_Parser originParser);
614 static void accountingReportDiff(XML_Parser rootParser,
615 unsigned int levelsAwayFromRootParser,
616 const char *before, const char *after,
617 ptrdiff_t bytesMore, int source_line,
618 enum XML_Account account);
619 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
620 const char *before, const char *after,
621 int source_line,
622 enum XML_Account account);
623
624 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
625 const char *action, int sourceLine);
626 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
627 int sourceLine);
628 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
629 int sourceLine);
630
631 static XML_Parser getRootParserOf(XML_Parser parser,
632 unsigned int *outLevelDiff);
633 #endif /* XML_GE == 1 */
634
635 static unsigned long getDebugLevel(const char *variableName,
636 unsigned long defaultDebugLevel);
637
638 #define poolStart(pool) ((pool)->start)
639 #define poolLength(pool) ((pool)->ptr - (pool)->start)
640 #define poolChop(pool) ((void)--(pool->ptr))
641 #define poolLastChar(pool) (((pool)->ptr)[-1])
642 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
643 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
644 #define poolAppendChar(pool, c) \
645 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
646 ? 0 \
647 : ((*((pool)->ptr)++ = c), 1))
648
649 #if ! defined(XML_TESTING)
650 const
651 #endif
652 XML_Bool g_reparseDeferralEnabledDefault
653 = XML_TRUE; // write ONLY in runtests.c
654 #if defined(XML_TESTING)
655 unsigned int g_bytesScanned = 0; // used for testing only
656 #endif
657
658 struct XML_ParserStruct {
659 /* The first member must be m_userData so that the XML_GetUserData
660 macro works. */
661 void *m_userData;
662 void *m_handlerArg;
663
664 // How the four parse buffer pointers below relate in time and space:
665 //
666 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
667 // | | | |
668 // <--parsed-->| | |
669 // <---parsing--->| |
670 // <--unoccupied-->|
671 // <---------total-malloced/realloced-------->|
672
673 char *m_buffer; // malloc/realloc base pointer of parse buffer
674 const XML_Memory_Handling_Suite m_mem;
675 const char *m_bufferPtr; // first character to be parsed
676 char *m_bufferEnd; // past last character to be parsed
677 const char *m_bufferLim; // allocated end of m_buffer
678
679 XML_Index m_parseEndByteIndex;
680 const char *m_parseEndPtr;
681 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
682 XML_Bool m_reparseDeferralEnabled;
683 int m_lastBufferRequestSize;
684 XML_Char *m_dataBuf;
685 XML_Char *m_dataBufEnd;
686 XML_StartElementHandler m_startElementHandler;
687 XML_EndElementHandler m_endElementHandler;
688 XML_CharacterDataHandler m_characterDataHandler;
689 XML_ProcessingInstructionHandler m_processingInstructionHandler;
690 XML_CommentHandler m_commentHandler;
691 XML_StartCdataSectionHandler m_startCdataSectionHandler;
692 XML_EndCdataSectionHandler m_endCdataSectionHandler;
693 XML_DefaultHandler m_defaultHandler;
694 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
695 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
696 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
697 XML_NotationDeclHandler m_notationDeclHandler;
698 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
699 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
700 XML_NotStandaloneHandler m_notStandaloneHandler;
701 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
702 XML_Parser m_externalEntityRefHandlerArg;
703 XML_SkippedEntityHandler m_skippedEntityHandler;
704 XML_UnknownEncodingHandler m_unknownEncodingHandler;
705 XML_ElementDeclHandler m_elementDeclHandler;
706 XML_AttlistDeclHandler m_attlistDeclHandler;
707 XML_EntityDeclHandler m_entityDeclHandler;
708 XML_XmlDeclHandler m_xmlDeclHandler;
709 const ENCODING *m_encoding;
710 INIT_ENCODING m_initEncoding;
711 const ENCODING *m_internalEncoding;
712 const XML_Char *m_protocolEncodingName;
713 XML_Bool m_ns;
714 XML_Bool m_ns_triplets;
715 void *m_unknownEncodingMem;
716 void *m_unknownEncodingData;
717 void *m_unknownEncodingHandlerData;
718 void(XMLCALL *m_unknownEncodingRelease)(void *);
719 PROLOG_STATE m_prologState;
720 Processor *m_processor;
721 enum XML_Error m_errorCode;
722 const char *m_eventPtr;
723 const char *m_eventEndPtr;
724 const char *m_positionPtr;
725 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
726 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
727 OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
728 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
729 OPEN_INTERNAL_ENTITY *m_openValueEntities;
730 OPEN_INTERNAL_ENTITY *m_freeValueEntities;
731 XML_Bool m_defaultExpandInternalEntities;
732 int m_tagLevel;
733 ENTITY *m_declEntity;
734 const XML_Char *m_doctypeName;
735 const XML_Char *m_doctypeSysid;
736 const XML_Char *m_doctypePubid;
737 const XML_Char *m_declAttributeType;
738 const XML_Char *m_declNotationName;
739 const XML_Char *m_declNotationPublicId;
740 ELEMENT_TYPE *m_declElementType;
741 ATTRIBUTE_ID *m_declAttributeId;
742 XML_Bool m_declAttributeIsCdata;
743 XML_Bool m_declAttributeIsId;
744 DTD *m_dtd;
745 const XML_Char *m_curBase;
746 TAG *m_tagStack;
747 TAG *m_freeTagList;
748 BINDING *m_inheritedBindings;
749 BINDING *m_freeBindingList;
750 int m_attsSize;
751 int m_nSpecifiedAtts;
752 int m_idAttIndex;
753 ATTRIBUTE *m_atts;
754 NS_ATT *m_nsAtts;
755 unsigned long m_nsAttsVersion;
756 unsigned char m_nsAttsPower;
757 #ifdef XML_ATTR_INFO
758 XML_AttrInfo *m_attInfo;
759 #endif
760 POSITION m_position;
761 STRING_POOL m_tempPool;
762 STRING_POOL m_temp2Pool;
763 char *m_groupConnector;
764 unsigned int m_groupSize;
765 XML_Char m_namespaceSeparator;
766 XML_Parser m_parentParser;
767 XML_ParsingStatus m_parsingStatus;
768 #ifdef XML_DTD
769 XML_Bool m_isParamEntity;
770 XML_Bool m_useForeignDTD;
771 enum XML_ParamEntityParsing m_paramEntityParsing;
772 #endif
773 unsigned long m_hash_secret_salt;
774 #if XML_GE == 1
775 ACCOUNTING m_accounting;
776 ENTITY_STATS m_entity_stats;
777 #endif
778 XML_Bool m_reenter;
779 };
780
781 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
782 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
783 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
784
785 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)786 XML_ParserCreate(const XML_Char *encodingName) {
787 return XML_ParserCreate_MM(encodingName, NULL, NULL);
788 }
789
790 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)791 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
792 XML_Char tmp[2] = {nsSep, 0};
793 return XML_ParserCreate_MM(encodingName, NULL, tmp);
794 }
795
796 // "xml=http://www.w3.org/XML/1998/namespace"
797 static const XML_Char implicitContext[]
798 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
799 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
800 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
801 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
802 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
803 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
804 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
805 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
806 '\0'};
807
808 /* To avoid warnings about unused functions: */
809 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
810
811 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
812
813 /* Obtain entropy on Linux 3.17+ */
814 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)815 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
816 int success = 0; /* full count bytes written? */
817 size_t bytesWrittenTotal = 0;
818 const unsigned int getrandomFlags = GRND_NONBLOCK;
819
820 do {
821 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
822 const size_t bytesToWrite = count - bytesWrittenTotal;
823
824 const int bytesWrittenMore =
825 # if defined(HAVE_GETRANDOM)
826 getrandom(currentTarget, bytesToWrite, getrandomFlags);
827 # else
828 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
829 # endif
830
831 if (bytesWrittenMore > 0) {
832 bytesWrittenTotal += bytesWrittenMore;
833 if (bytesWrittenTotal >= count)
834 success = 1;
835 }
836 } while (! success && (errno == EINTR));
837
838 return success;
839 }
840
841 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
842
843 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
844
845 /* Extract entropy from /dev/urandom */
846 static int
writeRandomBytes_dev_urandom(void * target,size_t count)847 writeRandomBytes_dev_urandom(void *target, size_t count) {
848 int success = 0; /* full count bytes written? */
849 size_t bytesWrittenTotal = 0;
850
851 const int fd = open("/dev/urandom", O_RDONLY);
852 if (fd < 0) {
853 return 0;
854 }
855
856 do {
857 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
858 const size_t bytesToWrite = count - bytesWrittenTotal;
859
860 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
861
862 if (bytesWrittenMore > 0) {
863 bytesWrittenTotal += bytesWrittenMore;
864 if (bytesWrittenTotal >= count)
865 success = 1;
866 }
867 } while (! success && (errno == EINTR));
868
869 close(fd);
870 return success;
871 }
872
873 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
874
875 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
876
877 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
878
879 static void
writeRandomBytes_arc4random(void * target,size_t count)880 writeRandomBytes_arc4random(void *target, size_t count) {
881 size_t bytesWrittenTotal = 0;
882
883 while (bytesWrittenTotal < count) {
884 const uint32_t random32 = arc4random();
885 size_t i = 0;
886
887 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
888 i++, bytesWrittenTotal++) {
889 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
890 ((uint8_t *)target)[bytesWrittenTotal] = random8;
891 }
892 }
893 }
894
895 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
896
897 #ifdef _WIN32
898
899 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
900 as it didn't declare it in its header prior to version 5.3.0 of its
901 runtime package (mingwrt, containing stdlib.h). The upstream fix
902 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
903 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
904 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
905 __declspec(dllimport) int rand_s(unsigned int *);
906 # endif
907
908 /* Obtain entropy on Windows using the rand_s() function which
909 * generates cryptographically secure random numbers. Internally it
910 * uses RtlGenRandom API which is present in Windows XP and later.
911 */
912 static int
writeRandomBytes_rand_s(void * target,size_t count)913 writeRandomBytes_rand_s(void *target, size_t count) {
914 size_t bytesWrittenTotal = 0;
915
916 while (bytesWrittenTotal < count) {
917 unsigned int random32 = 0;
918 size_t i = 0;
919
920 if (rand_s(&random32))
921 return 0; /* failure */
922
923 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
924 i++, bytesWrittenTotal++) {
925 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
926 ((uint8_t *)target)[bytesWrittenTotal] = random8;
927 }
928 }
929 return 1; /* success */
930 }
931
932 #endif /* _WIN32 */
933
934 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
935
936 static unsigned long
gather_time_entropy(void)937 gather_time_entropy(void) {
938 # ifdef _WIN32
939 FILETIME ft;
940 GetSystemTimeAsFileTime(&ft); /* never fails */
941 return ft.dwHighDateTime ^ ft.dwLowDateTime;
942 # else
943 struct timeval tv;
944 int gettimeofday_res;
945
946 gettimeofday_res = gettimeofday(&tv, NULL);
947
948 # if defined(NDEBUG)
949 (void)gettimeofday_res;
950 # else
951 assert(gettimeofday_res == 0);
952 # endif /* defined(NDEBUG) */
953
954 /* Microseconds time is <20 bits entropy */
955 return tv.tv_usec;
956 # endif
957 }
958
959 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
960
961 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)962 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
963 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
964 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
965 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
966 }
967 return entropy;
968 }
969
970 static unsigned long
generate_hash_secret_salt(XML_Parser parser)971 generate_hash_secret_salt(XML_Parser parser) {
972 unsigned long entropy;
973 (void)parser;
974
975 /* "Failproof" high quality providers: */
976 #if defined(HAVE_ARC4RANDOM_BUF)
977 arc4random_buf(&entropy, sizeof(entropy));
978 return ENTROPY_DEBUG("arc4random_buf", entropy);
979 #elif defined(HAVE_ARC4RANDOM)
980 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
981 return ENTROPY_DEBUG("arc4random", entropy);
982 #else
983 /* Try high quality providers first .. */
984 # ifdef _WIN32
985 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
986 return ENTROPY_DEBUG("rand_s", entropy);
987 }
988 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
989 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
990 return ENTROPY_DEBUG("getrandom", entropy);
991 }
992 # endif
993 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
994 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
995 return ENTROPY_DEBUG("/dev/urandom", entropy);
996 }
997 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
998 /* .. and self-made low quality for backup: */
999
1000 /* Process ID is 0 bits entropy if attacker has local access */
1001 entropy = gather_time_entropy() ^ getpid();
1002
1003 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1004 if (sizeof(unsigned long) == 4) {
1005 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
1006 } else {
1007 return ENTROPY_DEBUG("fallback(8)",
1008 entropy * (unsigned long)2305843009213693951ULL);
1009 }
1010 #endif
1011 }
1012
1013 static unsigned long
get_hash_secret_salt(XML_Parser parser)1014 get_hash_secret_salt(XML_Parser parser) {
1015 if (parser->m_parentParser != NULL)
1016 return get_hash_secret_salt(parser->m_parentParser);
1017 return parser->m_hash_secret_salt;
1018 }
1019
1020 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1021 callProcessor(XML_Parser parser, const char *start, const char *end,
1022 const char **endPtr) {
1023 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1024
1025 if (parser->m_reparseDeferralEnabled
1026 && ! parser->m_parsingStatus.finalBuffer) {
1027 // Heuristic: don't try to parse a partial token again until the amount of
1028 // available data has increased significantly.
1029 const size_t had_before = parser->m_partialTokenBytesBefore;
1030 // ...but *do* try anyway if we're close to causing a reallocation.
1031 size_t available_buffer
1032 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1033 #if XML_CONTEXT_BYTES > 0
1034 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1035 #endif
1036 available_buffer
1037 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1038 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1039 const bool enough
1040 = (have_now >= 2 * had_before)
1041 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1042
1043 if (! enough) {
1044 *endPtr = start; // callers may expect this to be set
1045 return XML_ERROR_NONE;
1046 }
1047 }
1048 #if defined(XML_TESTING)
1049 g_bytesScanned += (unsigned)have_now;
1050 #endif
1051 // Run in a loop to eliminate dangerous recursion depths
1052 enum XML_Error ret;
1053 *endPtr = start;
1054 while (1) {
1055 // Use endPtr as the new start in each iteration, since it will
1056 // be set to the next start point by m_processor.
1057 ret = parser->m_processor(parser, *endPtr, end, endPtr);
1058
1059 // Make parsing status (and in particular XML_SUSPENDED) take
1060 // precedence over re-enter flag when they disagree
1061 if (parser->m_parsingStatus.parsing != XML_PARSING) {
1062 parser->m_reenter = XML_FALSE;
1063 }
1064
1065 if (! parser->m_reenter) {
1066 break;
1067 }
1068
1069 parser->m_reenter = XML_FALSE;
1070 if (ret != XML_ERROR_NONE)
1071 return ret;
1072 }
1073
1074 if (ret == XML_ERROR_NONE) {
1075 // if we consumed nothing, remember what we had on this parse attempt.
1076 if (*endPtr == start) {
1077 parser->m_partialTokenBytesBefore = have_now;
1078 } else {
1079 parser->m_partialTokenBytesBefore = 0;
1080 }
1081 }
1082 return ret;
1083 }
1084
1085 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1086 startParsing(XML_Parser parser) {
1087 /* hash functions must be initialized before setContext() is called */
1088 if (parser->m_hash_secret_salt == 0)
1089 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1090 if (parser->m_ns) {
1091 /* implicit context only set for root parser, since child
1092 parsers (i.e. external entity parsers) will inherit it
1093 */
1094 return setContext(parser, implicitContext);
1095 }
1096 return XML_TRUE;
1097 }
1098
1099 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1100 XML_ParserCreate_MM(const XML_Char *encodingName,
1101 const XML_Memory_Handling_Suite *memsuite,
1102 const XML_Char *nameSep) {
1103 return parserCreate(encodingName, memsuite, nameSep, NULL);
1104 }
1105
1106 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1107 parserCreate(const XML_Char *encodingName,
1108 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1109 DTD *dtd) {
1110 XML_Parser parser;
1111
1112 if (memsuite) {
1113 XML_Memory_Handling_Suite *mtemp;
1114 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1115 if (parser != NULL) {
1116 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1117 mtemp->malloc_fcn = memsuite->malloc_fcn;
1118 mtemp->realloc_fcn = memsuite->realloc_fcn;
1119 mtemp->free_fcn = memsuite->free_fcn;
1120 }
1121 } else {
1122 XML_Memory_Handling_Suite *mtemp;
1123 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1124 if (parser != NULL) {
1125 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1126 mtemp->malloc_fcn = malloc;
1127 mtemp->realloc_fcn = realloc;
1128 mtemp->free_fcn = free;
1129 }
1130 }
1131
1132 if (! parser)
1133 return parser;
1134
1135 parser->m_buffer = NULL;
1136 parser->m_bufferLim = NULL;
1137
1138 parser->m_attsSize = INIT_ATTS_SIZE;
1139 parser->m_atts
1140 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1141 if (parser->m_atts == NULL) {
1142 FREE(parser, parser);
1143 return NULL;
1144 }
1145 #ifdef XML_ATTR_INFO
1146 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1147 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1148 if (parser->m_attInfo == NULL) {
1149 FREE(parser, parser->m_atts);
1150 FREE(parser, parser);
1151 return NULL;
1152 }
1153 #endif
1154 parser->m_dataBuf
1155 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1156 if (parser->m_dataBuf == NULL) {
1157 FREE(parser, parser->m_atts);
1158 #ifdef XML_ATTR_INFO
1159 FREE(parser, parser->m_attInfo);
1160 #endif
1161 FREE(parser, parser);
1162 return NULL;
1163 }
1164 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1165
1166 if (dtd)
1167 parser->m_dtd = dtd;
1168 else {
1169 parser->m_dtd = dtdCreate(&parser->m_mem);
1170 if (parser->m_dtd == NULL) {
1171 FREE(parser, parser->m_dataBuf);
1172 FREE(parser, parser->m_atts);
1173 #ifdef XML_ATTR_INFO
1174 FREE(parser, parser->m_attInfo);
1175 #endif
1176 FREE(parser, parser);
1177 return NULL;
1178 }
1179 }
1180
1181 parser->m_freeBindingList = NULL;
1182 parser->m_freeTagList = NULL;
1183 parser->m_freeInternalEntities = NULL;
1184 parser->m_freeAttributeEntities = NULL;
1185 parser->m_freeValueEntities = NULL;
1186
1187 parser->m_groupSize = 0;
1188 parser->m_groupConnector = NULL;
1189
1190 parser->m_unknownEncodingHandler = NULL;
1191 parser->m_unknownEncodingHandlerData = NULL;
1192
1193 parser->m_namespaceSeparator = ASCII_EXCL;
1194 parser->m_ns = XML_FALSE;
1195 parser->m_ns_triplets = XML_FALSE;
1196
1197 parser->m_nsAtts = NULL;
1198 parser->m_nsAttsVersion = 0;
1199 parser->m_nsAttsPower = 0;
1200
1201 parser->m_protocolEncodingName = NULL;
1202
1203 poolInit(&parser->m_tempPool, &(parser->m_mem));
1204 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1205 parserInit(parser, encodingName);
1206
1207 if (encodingName && ! parser->m_protocolEncodingName) {
1208 if (dtd) {
1209 // We need to stop the upcoming call to XML_ParserFree from happily
1210 // destroying parser->m_dtd because the DTD is shared with the parent
1211 // parser and the only guard that keeps XML_ParserFree from destroying
1212 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1213 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1214 parser->m_dtd = NULL;
1215 }
1216 XML_ParserFree(parser);
1217 return NULL;
1218 }
1219
1220 if (nameSep) {
1221 parser->m_ns = XML_TRUE;
1222 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1223 parser->m_namespaceSeparator = *nameSep;
1224 } else {
1225 parser->m_internalEncoding = XmlGetInternalEncoding();
1226 }
1227
1228 return parser;
1229 }
1230
1231 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1232 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1233 parser->m_processor = prologInitProcessor;
1234 XmlPrologStateInit(&parser->m_prologState);
1235 if (encodingName != NULL) {
1236 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1237 }
1238 parser->m_curBase = NULL;
1239 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1240 parser->m_userData = NULL;
1241 parser->m_handlerArg = NULL;
1242 parser->m_startElementHandler = NULL;
1243 parser->m_endElementHandler = NULL;
1244 parser->m_characterDataHandler = NULL;
1245 parser->m_processingInstructionHandler = NULL;
1246 parser->m_commentHandler = NULL;
1247 parser->m_startCdataSectionHandler = NULL;
1248 parser->m_endCdataSectionHandler = NULL;
1249 parser->m_defaultHandler = NULL;
1250 parser->m_startDoctypeDeclHandler = NULL;
1251 parser->m_endDoctypeDeclHandler = NULL;
1252 parser->m_unparsedEntityDeclHandler = NULL;
1253 parser->m_notationDeclHandler = NULL;
1254 parser->m_startNamespaceDeclHandler = NULL;
1255 parser->m_endNamespaceDeclHandler = NULL;
1256 parser->m_notStandaloneHandler = NULL;
1257 parser->m_externalEntityRefHandler = NULL;
1258 parser->m_externalEntityRefHandlerArg = parser;
1259 parser->m_skippedEntityHandler = NULL;
1260 parser->m_elementDeclHandler = NULL;
1261 parser->m_attlistDeclHandler = NULL;
1262 parser->m_entityDeclHandler = NULL;
1263 parser->m_xmlDeclHandler = NULL;
1264 parser->m_bufferPtr = parser->m_buffer;
1265 parser->m_bufferEnd = parser->m_buffer;
1266 parser->m_parseEndByteIndex = 0;
1267 parser->m_parseEndPtr = NULL;
1268 parser->m_partialTokenBytesBefore = 0;
1269 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1270 parser->m_lastBufferRequestSize = 0;
1271 parser->m_declElementType = NULL;
1272 parser->m_declAttributeId = NULL;
1273 parser->m_declEntity = NULL;
1274 parser->m_doctypeName = NULL;
1275 parser->m_doctypeSysid = NULL;
1276 parser->m_doctypePubid = NULL;
1277 parser->m_declAttributeType = NULL;
1278 parser->m_declNotationName = NULL;
1279 parser->m_declNotationPublicId = NULL;
1280 parser->m_declAttributeIsCdata = XML_FALSE;
1281 parser->m_declAttributeIsId = XML_FALSE;
1282 memset(&parser->m_position, 0, sizeof(POSITION));
1283 parser->m_errorCode = XML_ERROR_NONE;
1284 parser->m_eventPtr = NULL;
1285 parser->m_eventEndPtr = NULL;
1286 parser->m_positionPtr = NULL;
1287 parser->m_openInternalEntities = NULL;
1288 parser->m_openAttributeEntities = NULL;
1289 parser->m_openValueEntities = NULL;
1290 parser->m_defaultExpandInternalEntities = XML_TRUE;
1291 parser->m_tagLevel = 0;
1292 parser->m_tagStack = NULL;
1293 parser->m_inheritedBindings = NULL;
1294 parser->m_nSpecifiedAtts = 0;
1295 parser->m_unknownEncodingMem = NULL;
1296 parser->m_unknownEncodingRelease = NULL;
1297 parser->m_unknownEncodingData = NULL;
1298 parser->m_parentParser = NULL;
1299 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1300 // Reentry can only be triggered inside m_processor calls
1301 parser->m_reenter = XML_FALSE;
1302 #ifdef XML_DTD
1303 parser->m_isParamEntity = XML_FALSE;
1304 parser->m_useForeignDTD = XML_FALSE;
1305 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1306 #endif
1307 parser->m_hash_secret_salt = 0;
1308
1309 #if XML_GE == 1
1310 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1311 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1312 parser->m_accounting.maximumAmplificationFactor
1313 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1314 parser->m_accounting.activationThresholdBytes
1315 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1316
1317 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1318 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1319 #endif
1320 }
1321
1322 /* moves list of bindings to m_freeBindingList */
1323 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1324 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1325 while (bindings) {
1326 BINDING *b = bindings;
1327 bindings = bindings->nextTagBinding;
1328 b->nextTagBinding = parser->m_freeBindingList;
1329 parser->m_freeBindingList = b;
1330 }
1331 }
1332
1333 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1334 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1335 TAG *tStk;
1336 OPEN_INTERNAL_ENTITY *openEntityList;
1337
1338 if (parser == NULL)
1339 return XML_FALSE;
1340
1341 if (parser->m_parentParser)
1342 return XML_FALSE;
1343 /* move m_tagStack to m_freeTagList */
1344 tStk = parser->m_tagStack;
1345 while (tStk) {
1346 TAG *tag = tStk;
1347 tStk = tStk->parent;
1348 tag->parent = parser->m_freeTagList;
1349 moveToFreeBindingList(parser, tag->bindings);
1350 tag->bindings = NULL;
1351 parser->m_freeTagList = tag;
1352 }
1353 /* move m_openInternalEntities to m_freeInternalEntities */
1354 openEntityList = parser->m_openInternalEntities;
1355 while (openEntityList) {
1356 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1357 openEntityList = openEntity->next;
1358 openEntity->next = parser->m_freeInternalEntities;
1359 parser->m_freeInternalEntities = openEntity;
1360 }
1361 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1362 * for attributes) */
1363 openEntityList = parser->m_openAttributeEntities;
1364 while (openEntityList) {
1365 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1366 openEntityList = openEntity->next;
1367 openEntity->next = parser->m_freeAttributeEntities;
1368 parser->m_freeAttributeEntities = openEntity;
1369 }
1370 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1371 * for value entities) */
1372 openEntityList = parser->m_openValueEntities;
1373 while (openEntityList) {
1374 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1375 openEntityList = openEntity->next;
1376 openEntity->next = parser->m_freeValueEntities;
1377 parser->m_freeValueEntities = openEntity;
1378 }
1379 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1380 FREE(parser, parser->m_unknownEncodingMem);
1381 if (parser->m_unknownEncodingRelease)
1382 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1383 poolClear(&parser->m_tempPool);
1384 poolClear(&parser->m_temp2Pool);
1385 FREE(parser, (void *)parser->m_protocolEncodingName);
1386 parser->m_protocolEncodingName = NULL;
1387 parserInit(parser, encodingName);
1388 dtdReset(parser->m_dtd, &parser->m_mem);
1389 return XML_TRUE;
1390 }
1391
1392 static XML_Bool
parserBusy(XML_Parser parser)1393 parserBusy(XML_Parser parser) {
1394 switch (parser->m_parsingStatus.parsing) {
1395 case XML_PARSING:
1396 case XML_SUSPENDED:
1397 return XML_TRUE;
1398 case XML_INITIALIZED:
1399 case XML_FINISHED:
1400 default:
1401 return XML_FALSE;
1402 }
1403 }
1404
1405 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1406 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1407 if (parser == NULL)
1408 return XML_STATUS_ERROR;
1409 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1410 XXX There's no way for the caller to determine which of the
1411 XXX possible error cases caused the XML_STATUS_ERROR return.
1412 */
1413 if (parserBusy(parser))
1414 return XML_STATUS_ERROR;
1415
1416 /* Get rid of any previous encoding name */
1417 FREE(parser, (void *)parser->m_protocolEncodingName);
1418
1419 if (encodingName == NULL)
1420 /* No new encoding name */
1421 parser->m_protocolEncodingName = NULL;
1422 else {
1423 /* Copy the new encoding name into allocated memory */
1424 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1425 if (! parser->m_protocolEncodingName)
1426 return XML_STATUS_ERROR;
1427 }
1428 return XML_STATUS_OK;
1429 }
1430
1431 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1432 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1433 const XML_Char *encodingName) {
1434 XML_Parser parser = oldParser;
1435 DTD *newDtd = NULL;
1436 DTD *oldDtd;
1437 XML_StartElementHandler oldStartElementHandler;
1438 XML_EndElementHandler oldEndElementHandler;
1439 XML_CharacterDataHandler oldCharacterDataHandler;
1440 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1441 XML_CommentHandler oldCommentHandler;
1442 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1443 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1444 XML_DefaultHandler oldDefaultHandler;
1445 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1446 XML_NotationDeclHandler oldNotationDeclHandler;
1447 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1448 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1449 XML_NotStandaloneHandler oldNotStandaloneHandler;
1450 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1451 XML_SkippedEntityHandler oldSkippedEntityHandler;
1452 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1453 XML_ElementDeclHandler oldElementDeclHandler;
1454 XML_AttlistDeclHandler oldAttlistDeclHandler;
1455 XML_EntityDeclHandler oldEntityDeclHandler;
1456 XML_XmlDeclHandler oldXmlDeclHandler;
1457 ELEMENT_TYPE *oldDeclElementType;
1458
1459 void *oldUserData;
1460 void *oldHandlerArg;
1461 XML_Bool oldDefaultExpandInternalEntities;
1462 XML_Parser oldExternalEntityRefHandlerArg;
1463 #ifdef XML_DTD
1464 enum XML_ParamEntityParsing oldParamEntityParsing;
1465 int oldInEntityValue;
1466 #endif
1467 XML_Bool oldns_triplets;
1468 /* Note that the new parser shares the same hash secret as the old
1469 parser, so that dtdCopy and copyEntityTable can lookup values
1470 from hash tables associated with either parser without us having
1471 to worry which hash secrets each table has.
1472 */
1473 unsigned long oldhash_secret_salt;
1474 XML_Bool oldReparseDeferralEnabled;
1475
1476 /* Validate the oldParser parameter before we pull everything out of it */
1477 if (oldParser == NULL)
1478 return NULL;
1479
1480 /* Stash the original parser contents on the stack */
1481 oldDtd = parser->m_dtd;
1482 oldStartElementHandler = parser->m_startElementHandler;
1483 oldEndElementHandler = parser->m_endElementHandler;
1484 oldCharacterDataHandler = parser->m_characterDataHandler;
1485 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1486 oldCommentHandler = parser->m_commentHandler;
1487 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1488 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1489 oldDefaultHandler = parser->m_defaultHandler;
1490 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1491 oldNotationDeclHandler = parser->m_notationDeclHandler;
1492 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1493 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1494 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1495 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1496 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1497 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1498 oldElementDeclHandler = parser->m_elementDeclHandler;
1499 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1500 oldEntityDeclHandler = parser->m_entityDeclHandler;
1501 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1502 oldDeclElementType = parser->m_declElementType;
1503
1504 oldUserData = parser->m_userData;
1505 oldHandlerArg = parser->m_handlerArg;
1506 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1507 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1508 #ifdef XML_DTD
1509 oldParamEntityParsing = parser->m_paramEntityParsing;
1510 oldInEntityValue = parser->m_prologState.inEntityValue;
1511 #endif
1512 oldns_triplets = parser->m_ns_triplets;
1513 /* Note that the new parser shares the same hash secret as the old
1514 parser, so that dtdCopy and copyEntityTable can lookup values
1515 from hash tables associated with either parser without us having
1516 to worry which hash secrets each table has.
1517 */
1518 oldhash_secret_salt = parser->m_hash_secret_salt;
1519 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1520
1521 #ifdef XML_DTD
1522 if (! context)
1523 newDtd = oldDtd;
1524 #endif /* XML_DTD */
1525
1526 /* Note that the magical uses of the pre-processor to make field
1527 access look more like C++ require that `parser' be overwritten
1528 here. This makes this function more painful to follow than it
1529 would be otherwise.
1530 */
1531 if (parser->m_ns) {
1532 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1533 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1534 } else {
1535 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1536 }
1537
1538 if (! parser)
1539 return NULL;
1540
1541 parser->m_startElementHandler = oldStartElementHandler;
1542 parser->m_endElementHandler = oldEndElementHandler;
1543 parser->m_characterDataHandler = oldCharacterDataHandler;
1544 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1545 parser->m_commentHandler = oldCommentHandler;
1546 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1547 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1548 parser->m_defaultHandler = oldDefaultHandler;
1549 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1550 parser->m_notationDeclHandler = oldNotationDeclHandler;
1551 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1552 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1553 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1554 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1555 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1556 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1557 parser->m_elementDeclHandler = oldElementDeclHandler;
1558 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1559 parser->m_entityDeclHandler = oldEntityDeclHandler;
1560 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1561 parser->m_declElementType = oldDeclElementType;
1562 parser->m_userData = oldUserData;
1563 if (oldUserData == oldHandlerArg)
1564 parser->m_handlerArg = parser->m_userData;
1565 else
1566 parser->m_handlerArg = parser;
1567 if (oldExternalEntityRefHandlerArg != oldParser)
1568 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1569 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1570 parser->m_ns_triplets = oldns_triplets;
1571 parser->m_hash_secret_salt = oldhash_secret_salt;
1572 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1573 parser->m_parentParser = oldParser;
1574 #ifdef XML_DTD
1575 parser->m_paramEntityParsing = oldParamEntityParsing;
1576 parser->m_prologState.inEntityValue = oldInEntityValue;
1577 if (context) {
1578 #endif /* XML_DTD */
1579 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1580 || ! setContext(parser, context)) {
1581 XML_ParserFree(parser);
1582 return NULL;
1583 }
1584 parser->m_processor = externalEntityInitProcessor;
1585 #ifdef XML_DTD
1586 } else {
1587 /* The DTD instance referenced by parser->m_dtd is shared between the
1588 document's root parser and external PE parsers, therefore one does not
1589 need to call setContext. In addition, one also *must* not call
1590 setContext, because this would overwrite existing prefix->binding
1591 pointers in parser->m_dtd with ones that get destroyed with the external
1592 PE parser. This would leave those prefixes with dangling pointers.
1593 */
1594 parser->m_isParamEntity = XML_TRUE;
1595 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1596 parser->m_processor = externalParEntInitProcessor;
1597 }
1598 #endif /* XML_DTD */
1599 return parser;
1600 }
1601
1602 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1603 destroyBindings(BINDING *bindings, XML_Parser parser) {
1604 for (;;) {
1605 BINDING *b = bindings;
1606 if (! b)
1607 break;
1608 bindings = b->nextTagBinding;
1609 FREE(parser, b->uri);
1610 FREE(parser, b);
1611 }
1612 }
1613
1614 void XMLCALL
XML_ParserFree(XML_Parser parser)1615 XML_ParserFree(XML_Parser parser) {
1616 TAG *tagList;
1617 OPEN_INTERNAL_ENTITY *entityList;
1618 if (parser == NULL)
1619 return;
1620 /* free m_tagStack and m_freeTagList */
1621 tagList = parser->m_tagStack;
1622 for (;;) {
1623 TAG *p;
1624 if (tagList == NULL) {
1625 if (parser->m_freeTagList == NULL)
1626 break;
1627 tagList = parser->m_freeTagList;
1628 parser->m_freeTagList = NULL;
1629 }
1630 p = tagList;
1631 tagList = tagList->parent;
1632 FREE(parser, p->buf);
1633 destroyBindings(p->bindings, parser);
1634 FREE(parser, p);
1635 }
1636 /* free m_openInternalEntities and m_freeInternalEntities */
1637 entityList = parser->m_openInternalEntities;
1638 for (;;) {
1639 OPEN_INTERNAL_ENTITY *openEntity;
1640 if (entityList == NULL) {
1641 if (parser->m_freeInternalEntities == NULL)
1642 break;
1643 entityList = parser->m_freeInternalEntities;
1644 parser->m_freeInternalEntities = NULL;
1645 }
1646 openEntity = entityList;
1647 entityList = entityList->next;
1648 FREE(parser, openEntity);
1649 }
1650 /* free m_openAttributeEntities and m_freeAttributeEntities */
1651 entityList = parser->m_openAttributeEntities;
1652 for (;;) {
1653 OPEN_INTERNAL_ENTITY *openEntity;
1654 if (entityList == NULL) {
1655 if (parser->m_freeAttributeEntities == NULL)
1656 break;
1657 entityList = parser->m_freeAttributeEntities;
1658 parser->m_freeAttributeEntities = NULL;
1659 }
1660 openEntity = entityList;
1661 entityList = entityList->next;
1662 FREE(parser, openEntity);
1663 }
1664 /* free m_openValueEntities and m_freeValueEntities */
1665 entityList = parser->m_openValueEntities;
1666 for (;;) {
1667 OPEN_INTERNAL_ENTITY *openEntity;
1668 if (entityList == NULL) {
1669 if (parser->m_freeValueEntities == NULL)
1670 break;
1671 entityList = parser->m_freeValueEntities;
1672 parser->m_freeValueEntities = NULL;
1673 }
1674 openEntity = entityList;
1675 entityList = entityList->next;
1676 FREE(parser, openEntity);
1677 }
1678 destroyBindings(parser->m_freeBindingList, parser);
1679 destroyBindings(parser->m_inheritedBindings, parser);
1680 poolDestroy(&parser->m_tempPool);
1681 poolDestroy(&parser->m_temp2Pool);
1682 FREE(parser, (void *)parser->m_protocolEncodingName);
1683 #ifdef XML_DTD
1684 /* external parameter entity parsers share the DTD structure
1685 parser->m_dtd with the root parser, so we must not destroy it
1686 */
1687 if (! parser->m_isParamEntity && parser->m_dtd)
1688 #else
1689 if (parser->m_dtd)
1690 #endif /* XML_DTD */
1691 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1692 &parser->m_mem);
1693 FREE(parser, (void *)parser->m_atts);
1694 #ifdef XML_ATTR_INFO
1695 FREE(parser, (void *)parser->m_attInfo);
1696 #endif
1697 FREE(parser, parser->m_groupConnector);
1698 FREE(parser, parser->m_buffer);
1699 FREE(parser, parser->m_dataBuf);
1700 FREE(parser, parser->m_nsAtts);
1701 FREE(parser, parser->m_unknownEncodingMem);
1702 if (parser->m_unknownEncodingRelease)
1703 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1704 FREE(parser, parser);
1705 }
1706
1707 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1708 XML_UseParserAsHandlerArg(XML_Parser parser) {
1709 if (parser != NULL)
1710 parser->m_handlerArg = parser;
1711 }
1712
1713 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1714 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1715 if (parser == NULL)
1716 return XML_ERROR_INVALID_ARGUMENT;
1717 #ifdef XML_DTD
1718 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1719 if (parserBusy(parser))
1720 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1721 parser->m_useForeignDTD = useDTD;
1722 return XML_ERROR_NONE;
1723 #else
1724 UNUSED_P(useDTD);
1725 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1726 #endif
1727 }
1728
1729 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1730 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1731 if (parser == NULL)
1732 return;
1733 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1734 if (parserBusy(parser))
1735 return;
1736 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1737 }
1738
1739 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1740 XML_SetUserData(XML_Parser parser, void *p) {
1741 if (parser == NULL)
1742 return;
1743 if (parser->m_handlerArg == parser->m_userData)
1744 parser->m_handlerArg = parser->m_userData = p;
1745 else
1746 parser->m_userData = p;
1747 }
1748
1749 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1750 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1751 if (parser == NULL)
1752 return XML_STATUS_ERROR;
1753 if (p) {
1754 p = poolCopyString(&parser->m_dtd->pool, p);
1755 if (! p)
1756 return XML_STATUS_ERROR;
1757 parser->m_curBase = p;
1758 } else
1759 parser->m_curBase = NULL;
1760 return XML_STATUS_OK;
1761 }
1762
1763 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1764 XML_GetBase(XML_Parser parser) {
1765 if (parser == NULL)
1766 return NULL;
1767 return parser->m_curBase;
1768 }
1769
1770 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1771 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1772 if (parser == NULL)
1773 return -1;
1774 return parser->m_nSpecifiedAtts;
1775 }
1776
1777 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1778 XML_GetIdAttributeIndex(XML_Parser parser) {
1779 if (parser == NULL)
1780 return -1;
1781 return parser->m_idAttIndex;
1782 }
1783
1784 #ifdef XML_ATTR_INFO
1785 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1786 XML_GetAttributeInfo(XML_Parser parser) {
1787 if (parser == NULL)
1788 return NULL;
1789 return parser->m_attInfo;
1790 }
1791 #endif
1792
1793 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1794 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1795 XML_EndElementHandler end) {
1796 if (parser == NULL)
1797 return;
1798 parser->m_startElementHandler = start;
1799 parser->m_endElementHandler = end;
1800 }
1801
1802 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1803 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1804 if (parser != NULL)
1805 parser->m_startElementHandler = start;
1806 }
1807
1808 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1809 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1810 if (parser != NULL)
1811 parser->m_endElementHandler = end;
1812 }
1813
1814 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1815 XML_SetCharacterDataHandler(XML_Parser parser,
1816 XML_CharacterDataHandler handler) {
1817 if (parser != NULL)
1818 parser->m_characterDataHandler = handler;
1819 }
1820
1821 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1822 XML_SetProcessingInstructionHandler(XML_Parser parser,
1823 XML_ProcessingInstructionHandler handler) {
1824 if (parser != NULL)
1825 parser->m_processingInstructionHandler = handler;
1826 }
1827
1828 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1829 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1830 if (parser != NULL)
1831 parser->m_commentHandler = handler;
1832 }
1833
1834 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1835 XML_SetCdataSectionHandler(XML_Parser parser,
1836 XML_StartCdataSectionHandler start,
1837 XML_EndCdataSectionHandler end) {
1838 if (parser == NULL)
1839 return;
1840 parser->m_startCdataSectionHandler = start;
1841 parser->m_endCdataSectionHandler = end;
1842 }
1843
1844 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1845 XML_SetStartCdataSectionHandler(XML_Parser parser,
1846 XML_StartCdataSectionHandler start) {
1847 if (parser != NULL)
1848 parser->m_startCdataSectionHandler = start;
1849 }
1850
1851 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1852 XML_SetEndCdataSectionHandler(XML_Parser parser,
1853 XML_EndCdataSectionHandler end) {
1854 if (parser != NULL)
1855 parser->m_endCdataSectionHandler = end;
1856 }
1857
1858 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1859 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1860 if (parser == NULL)
1861 return;
1862 parser->m_defaultHandler = handler;
1863 parser->m_defaultExpandInternalEntities = XML_FALSE;
1864 }
1865
1866 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1867 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1868 if (parser == NULL)
1869 return;
1870 parser->m_defaultHandler = handler;
1871 parser->m_defaultExpandInternalEntities = XML_TRUE;
1872 }
1873
1874 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1875 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1876 XML_EndDoctypeDeclHandler end) {
1877 if (parser == NULL)
1878 return;
1879 parser->m_startDoctypeDeclHandler = start;
1880 parser->m_endDoctypeDeclHandler = end;
1881 }
1882
1883 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1884 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1885 XML_StartDoctypeDeclHandler start) {
1886 if (parser != NULL)
1887 parser->m_startDoctypeDeclHandler = start;
1888 }
1889
1890 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1891 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1892 if (parser != NULL)
1893 parser->m_endDoctypeDeclHandler = end;
1894 }
1895
1896 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1897 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1898 XML_UnparsedEntityDeclHandler handler) {
1899 if (parser != NULL)
1900 parser->m_unparsedEntityDeclHandler = handler;
1901 }
1902
1903 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1904 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1905 if (parser != NULL)
1906 parser->m_notationDeclHandler = handler;
1907 }
1908
1909 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1910 XML_SetNamespaceDeclHandler(XML_Parser parser,
1911 XML_StartNamespaceDeclHandler start,
1912 XML_EndNamespaceDeclHandler end) {
1913 if (parser == NULL)
1914 return;
1915 parser->m_startNamespaceDeclHandler = start;
1916 parser->m_endNamespaceDeclHandler = end;
1917 }
1918
1919 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1920 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1921 XML_StartNamespaceDeclHandler start) {
1922 if (parser != NULL)
1923 parser->m_startNamespaceDeclHandler = start;
1924 }
1925
1926 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1927 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1928 XML_EndNamespaceDeclHandler end) {
1929 if (parser != NULL)
1930 parser->m_endNamespaceDeclHandler = end;
1931 }
1932
1933 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1934 XML_SetNotStandaloneHandler(XML_Parser parser,
1935 XML_NotStandaloneHandler handler) {
1936 if (parser != NULL)
1937 parser->m_notStandaloneHandler = handler;
1938 }
1939
1940 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1941 XML_SetExternalEntityRefHandler(XML_Parser parser,
1942 XML_ExternalEntityRefHandler handler) {
1943 if (parser != NULL)
1944 parser->m_externalEntityRefHandler = handler;
1945 }
1946
1947 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1948 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1949 if (parser == NULL)
1950 return;
1951 if (arg)
1952 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1953 else
1954 parser->m_externalEntityRefHandlerArg = parser;
1955 }
1956
1957 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1958 XML_SetSkippedEntityHandler(XML_Parser parser,
1959 XML_SkippedEntityHandler handler) {
1960 if (parser != NULL)
1961 parser->m_skippedEntityHandler = handler;
1962 }
1963
1964 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1965 XML_SetUnknownEncodingHandler(XML_Parser parser,
1966 XML_UnknownEncodingHandler handler, void *data) {
1967 if (parser == NULL)
1968 return;
1969 parser->m_unknownEncodingHandler = handler;
1970 parser->m_unknownEncodingHandlerData = data;
1971 }
1972
1973 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1974 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1975 if (parser != NULL)
1976 parser->m_elementDeclHandler = eldecl;
1977 }
1978
1979 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1980 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1981 if (parser != NULL)
1982 parser->m_attlistDeclHandler = attdecl;
1983 }
1984
1985 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1986 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1987 if (parser != NULL)
1988 parser->m_entityDeclHandler = handler;
1989 }
1990
1991 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1992 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1993 if (parser != NULL)
1994 parser->m_xmlDeclHandler = handler;
1995 }
1996
1997 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1998 XML_SetParamEntityParsing(XML_Parser parser,
1999 enum XML_ParamEntityParsing peParsing) {
2000 if (parser == NULL)
2001 return 0;
2002 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2003 if (parserBusy(parser))
2004 return 0;
2005 #ifdef XML_DTD
2006 parser->m_paramEntityParsing = peParsing;
2007 return 1;
2008 #else
2009 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2010 #endif
2011 }
2012
2013 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)2014 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2015 if (parser == NULL)
2016 return 0;
2017 if (parser->m_parentParser)
2018 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
2019 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2020 if (parserBusy(parser))
2021 return 0;
2022 parser->m_hash_secret_salt = hash_salt;
2023 return 1;
2024 }
2025
2026 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)2027 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2028 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2029 if (parser != NULL)
2030 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2031 return XML_STATUS_ERROR;
2032 }
2033 switch (parser->m_parsingStatus.parsing) {
2034 case XML_SUSPENDED:
2035 parser->m_errorCode = XML_ERROR_SUSPENDED;
2036 return XML_STATUS_ERROR;
2037 case XML_FINISHED:
2038 parser->m_errorCode = XML_ERROR_FINISHED;
2039 return XML_STATUS_ERROR;
2040 case XML_INITIALIZED:
2041 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2042 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2043 return XML_STATUS_ERROR;
2044 }
2045 /* fall through */
2046 default:
2047 parser->m_parsingStatus.parsing = XML_PARSING;
2048 }
2049
2050 #if XML_CONTEXT_BYTES == 0
2051 if (parser->m_bufferPtr == parser->m_bufferEnd) {
2052 const char *end;
2053 int nLeftOver;
2054 enum XML_Status result;
2055 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
2056 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2057 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2058 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2059 parser->m_processor = errorProcessor;
2060 return XML_STATUS_ERROR;
2061 }
2062 // though this isn't a buffer request, we assume that `len` is the app's
2063 // preferred buffer fill size, and therefore save it here.
2064 parser->m_lastBufferRequestSize = len;
2065 parser->m_parseEndByteIndex += len;
2066 parser->m_positionPtr = s;
2067 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2068
2069 parser->m_errorCode
2070 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2071
2072 if (parser->m_errorCode != XML_ERROR_NONE) {
2073 parser->m_eventEndPtr = parser->m_eventPtr;
2074 parser->m_processor = errorProcessor;
2075 return XML_STATUS_ERROR;
2076 } else {
2077 switch (parser->m_parsingStatus.parsing) {
2078 case XML_SUSPENDED:
2079 result = XML_STATUS_SUSPENDED;
2080 break;
2081 case XML_INITIALIZED:
2082 case XML_PARSING:
2083 if (isFinal) {
2084 parser->m_parsingStatus.parsing = XML_FINISHED;
2085 return XML_STATUS_OK;
2086 }
2087 /* fall through */
2088 default:
2089 result = XML_STATUS_OK;
2090 }
2091 }
2092
2093 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2094 &parser->m_position);
2095 nLeftOver = s + len - end;
2096 if (nLeftOver) {
2097 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2098 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2099 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2100 parser->m_parsingStatus.parsing = XML_PARSING;
2101 void *const temp = XML_GetBuffer(parser, nLeftOver);
2102 parser->m_parsingStatus.parsing = originalStatus;
2103 // GetBuffer may have overwritten this, but we want to remember what the
2104 // app requested, not how many bytes were left over after parsing.
2105 parser->m_lastBufferRequestSize = len;
2106 if (temp == NULL) {
2107 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2108 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2109 parser->m_processor = errorProcessor;
2110 return XML_STATUS_ERROR;
2111 }
2112 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2113 // don't have any data to preserve, and can copy straight into the start
2114 // of the buffer rather than the GetBuffer return pointer (which may be
2115 // pointing further into the allocated buffer).
2116 memcpy(parser->m_buffer, end, nLeftOver);
2117 }
2118 parser->m_bufferPtr = parser->m_buffer;
2119 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2120 parser->m_positionPtr = parser->m_bufferPtr;
2121 parser->m_parseEndPtr = parser->m_bufferEnd;
2122 parser->m_eventPtr = parser->m_bufferPtr;
2123 parser->m_eventEndPtr = parser->m_bufferPtr;
2124 return result;
2125 }
2126 #endif /* XML_CONTEXT_BYTES == 0 */
2127 void *buff = XML_GetBuffer(parser, len);
2128 if (buff == NULL)
2129 return XML_STATUS_ERROR;
2130 if (len > 0) {
2131 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2132 memcpy(buff, s, len);
2133 }
2134 return XML_ParseBuffer(parser, len, isFinal);
2135 }
2136
2137 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2138 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2139 const char *start;
2140 enum XML_Status result = XML_STATUS_OK;
2141
2142 if (parser == NULL)
2143 return XML_STATUS_ERROR;
2144
2145 if (len < 0) {
2146 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2147 return XML_STATUS_ERROR;
2148 }
2149
2150 switch (parser->m_parsingStatus.parsing) {
2151 case XML_SUSPENDED:
2152 parser->m_errorCode = XML_ERROR_SUSPENDED;
2153 return XML_STATUS_ERROR;
2154 case XML_FINISHED:
2155 parser->m_errorCode = XML_ERROR_FINISHED;
2156 return XML_STATUS_ERROR;
2157 case XML_INITIALIZED:
2158 /* Has someone called XML_GetBuffer successfully before? */
2159 if (! parser->m_bufferPtr) {
2160 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2161 return XML_STATUS_ERROR;
2162 }
2163
2164 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2165 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2166 return XML_STATUS_ERROR;
2167 }
2168 /* fall through */
2169 default:
2170 parser->m_parsingStatus.parsing = XML_PARSING;
2171 }
2172
2173 start = parser->m_bufferPtr;
2174 parser->m_positionPtr = start;
2175 parser->m_bufferEnd += len;
2176 parser->m_parseEndPtr = parser->m_bufferEnd;
2177 parser->m_parseEndByteIndex += len;
2178 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2179
2180 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2181 &parser->m_bufferPtr);
2182
2183 if (parser->m_errorCode != XML_ERROR_NONE) {
2184 parser->m_eventEndPtr = parser->m_eventPtr;
2185 parser->m_processor = errorProcessor;
2186 return XML_STATUS_ERROR;
2187 } else {
2188 switch (parser->m_parsingStatus.parsing) {
2189 case XML_SUSPENDED:
2190 result = XML_STATUS_SUSPENDED;
2191 break;
2192 case XML_INITIALIZED:
2193 case XML_PARSING:
2194 if (isFinal) {
2195 parser->m_parsingStatus.parsing = XML_FINISHED;
2196 return result;
2197 }
2198 default:; /* should not happen */
2199 }
2200 }
2201
2202 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2203 parser->m_bufferPtr, &parser->m_position);
2204 parser->m_positionPtr = parser->m_bufferPtr;
2205 return result;
2206 }
2207
2208 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2209 XML_GetBuffer(XML_Parser parser, int len) {
2210 if (parser == NULL)
2211 return NULL;
2212 if (len < 0) {
2213 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2214 return NULL;
2215 }
2216 switch (parser->m_parsingStatus.parsing) {
2217 case XML_SUSPENDED:
2218 parser->m_errorCode = XML_ERROR_SUSPENDED;
2219 return NULL;
2220 case XML_FINISHED:
2221 parser->m_errorCode = XML_ERROR_FINISHED;
2222 return NULL;
2223 default:;
2224 }
2225
2226 // whether or not the request succeeds, `len` seems to be the app's preferred
2227 // buffer fill size; remember it.
2228 parser->m_lastBufferRequestSize = len;
2229 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2230 || parser->m_buffer == NULL) {
2231 #if XML_CONTEXT_BYTES > 0
2232 int keep;
2233 #endif /* XML_CONTEXT_BYTES > 0 */
2234 /* Do not invoke signed arithmetic overflow: */
2235 int neededSize = (int)((unsigned)len
2236 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2237 parser->m_bufferEnd, parser->m_bufferPtr));
2238 if (neededSize < 0) {
2239 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2240 return NULL;
2241 }
2242 #if XML_CONTEXT_BYTES > 0
2243 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2244 if (keep > XML_CONTEXT_BYTES)
2245 keep = XML_CONTEXT_BYTES;
2246 /* Detect and prevent integer overflow */
2247 if (keep > INT_MAX - neededSize) {
2248 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2249 return NULL;
2250 }
2251 neededSize += keep;
2252 #endif /* XML_CONTEXT_BYTES > 0 */
2253 if (parser->m_buffer && parser->m_bufferPtr
2254 && neededSize
2255 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2256 #if XML_CONTEXT_BYTES > 0
2257 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2258 int offset
2259 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2260 - keep;
2261 /* The buffer pointers cannot be NULL here; we have at least some bytes
2262 * in the buffer */
2263 memmove(parser->m_buffer, &parser->m_buffer[offset],
2264 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2265 parser->m_bufferEnd -= offset;
2266 parser->m_bufferPtr -= offset;
2267 }
2268 #else
2269 memmove(parser->m_buffer, parser->m_bufferPtr,
2270 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2271 parser->m_bufferEnd
2272 = parser->m_buffer
2273 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2274 parser->m_bufferPtr = parser->m_buffer;
2275 #endif /* XML_CONTEXT_BYTES > 0 */
2276 } else {
2277 char *newBuf;
2278 int bufferSize
2279 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2280 if (bufferSize == 0)
2281 bufferSize = INIT_BUFFER_SIZE;
2282 do {
2283 /* Do not invoke signed arithmetic overflow: */
2284 bufferSize = (int)(2U * (unsigned)bufferSize);
2285 } while (bufferSize < neededSize && bufferSize > 0);
2286 if (bufferSize <= 0) {
2287 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2288 return NULL;
2289 }
2290 newBuf = (char *)MALLOC(parser, bufferSize);
2291 if (newBuf == 0) {
2292 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2293 return NULL;
2294 }
2295 parser->m_bufferLim = newBuf + bufferSize;
2296 #if XML_CONTEXT_BYTES > 0
2297 if (parser->m_bufferPtr) {
2298 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2299 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2300 + keep);
2301 FREE(parser, parser->m_buffer);
2302 parser->m_buffer = newBuf;
2303 parser->m_bufferEnd
2304 = parser->m_buffer
2305 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2306 + keep;
2307 parser->m_bufferPtr = parser->m_buffer + keep;
2308 } else {
2309 /* This must be a brand new buffer with no data in it yet */
2310 parser->m_bufferEnd = newBuf;
2311 parser->m_bufferPtr = parser->m_buffer = newBuf;
2312 }
2313 #else
2314 if (parser->m_bufferPtr) {
2315 memcpy(newBuf, parser->m_bufferPtr,
2316 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2317 FREE(parser, parser->m_buffer);
2318 parser->m_bufferEnd
2319 = newBuf
2320 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2321 } else {
2322 /* This must be a brand new buffer with no data in it yet */
2323 parser->m_bufferEnd = newBuf;
2324 }
2325 parser->m_bufferPtr = parser->m_buffer = newBuf;
2326 #endif /* XML_CONTEXT_BYTES > 0 */
2327 }
2328 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2329 parser->m_positionPtr = NULL;
2330 }
2331 return parser->m_bufferEnd;
2332 }
2333
2334 static void
triggerReenter(XML_Parser parser)2335 triggerReenter(XML_Parser parser) {
2336 parser->m_reenter = XML_TRUE;
2337 }
2338
2339 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2340 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2341 if (parser == NULL)
2342 return XML_STATUS_ERROR;
2343 switch (parser->m_parsingStatus.parsing) {
2344 case XML_INITIALIZED:
2345 parser->m_errorCode = XML_ERROR_NOT_STARTED;
2346 return XML_STATUS_ERROR;
2347 case XML_SUSPENDED:
2348 if (resumable) {
2349 parser->m_errorCode = XML_ERROR_SUSPENDED;
2350 return XML_STATUS_ERROR;
2351 }
2352 parser->m_parsingStatus.parsing = XML_FINISHED;
2353 break;
2354 case XML_FINISHED:
2355 parser->m_errorCode = XML_ERROR_FINISHED;
2356 return XML_STATUS_ERROR;
2357 case XML_PARSING:
2358 if (resumable) {
2359 #ifdef XML_DTD
2360 if (parser->m_isParamEntity) {
2361 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2362 return XML_STATUS_ERROR;
2363 }
2364 #endif
2365 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2366 } else
2367 parser->m_parsingStatus.parsing = XML_FINISHED;
2368 break;
2369 default:
2370 assert(0);
2371 }
2372 return XML_STATUS_OK;
2373 }
2374
2375 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2376 XML_ResumeParser(XML_Parser parser) {
2377 enum XML_Status result = XML_STATUS_OK;
2378
2379 if (parser == NULL)
2380 return XML_STATUS_ERROR;
2381 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2382 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2383 return XML_STATUS_ERROR;
2384 }
2385 parser->m_parsingStatus.parsing = XML_PARSING;
2386
2387 parser->m_errorCode = callProcessor(
2388 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2389
2390 if (parser->m_errorCode != XML_ERROR_NONE) {
2391 parser->m_eventEndPtr = parser->m_eventPtr;
2392 parser->m_processor = errorProcessor;
2393 return XML_STATUS_ERROR;
2394 } else {
2395 switch (parser->m_parsingStatus.parsing) {
2396 case XML_SUSPENDED:
2397 result = XML_STATUS_SUSPENDED;
2398 break;
2399 case XML_INITIALIZED:
2400 case XML_PARSING:
2401 if (parser->m_parsingStatus.finalBuffer) {
2402 parser->m_parsingStatus.parsing = XML_FINISHED;
2403 return result;
2404 }
2405 default:;
2406 }
2407 }
2408
2409 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2410 parser->m_bufferPtr, &parser->m_position);
2411 parser->m_positionPtr = parser->m_bufferPtr;
2412 return result;
2413 }
2414
2415 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2416 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2417 if (parser == NULL)
2418 return;
2419 assert(status != NULL);
2420 *status = parser->m_parsingStatus;
2421 }
2422
2423 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2424 XML_GetErrorCode(XML_Parser parser) {
2425 if (parser == NULL)
2426 return XML_ERROR_INVALID_ARGUMENT;
2427 return parser->m_errorCode;
2428 }
2429
2430 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2431 XML_GetCurrentByteIndex(XML_Parser parser) {
2432 if (parser == NULL)
2433 return -1;
2434 if (parser->m_eventPtr)
2435 return (XML_Index)(parser->m_parseEndByteIndex
2436 - (parser->m_parseEndPtr - parser->m_eventPtr));
2437 return -1;
2438 }
2439
2440 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2441 XML_GetCurrentByteCount(XML_Parser parser) {
2442 if (parser == NULL)
2443 return 0;
2444 if (parser->m_eventEndPtr && parser->m_eventPtr)
2445 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2446 return 0;
2447 }
2448
2449 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2450 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2451 #if XML_CONTEXT_BYTES > 0
2452 if (parser == NULL)
2453 return NULL;
2454 if (parser->m_eventPtr && parser->m_buffer) {
2455 if (offset != NULL)
2456 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2457 if (size != NULL)
2458 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2459 return parser->m_buffer;
2460 }
2461 #else
2462 (void)parser;
2463 (void)offset;
2464 (void)size;
2465 #endif /* XML_CONTEXT_BYTES > 0 */
2466 return (const char *)0;
2467 }
2468
2469 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2470 XML_GetCurrentLineNumber(XML_Parser parser) {
2471 if (parser == NULL)
2472 return 0;
2473 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2474 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2475 parser->m_eventPtr, &parser->m_position);
2476 parser->m_positionPtr = parser->m_eventPtr;
2477 }
2478 return parser->m_position.lineNumber + 1;
2479 }
2480
2481 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2482 XML_GetCurrentColumnNumber(XML_Parser parser) {
2483 if (parser == NULL)
2484 return 0;
2485 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2486 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2487 parser->m_eventPtr, &parser->m_position);
2488 parser->m_positionPtr = parser->m_eventPtr;
2489 }
2490 return parser->m_position.columnNumber;
2491 }
2492
2493 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2494 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2495 if (parser != NULL)
2496 FREE(parser, model);
2497 }
2498
2499 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2500 XML_MemMalloc(XML_Parser parser, size_t size) {
2501 if (parser == NULL)
2502 return NULL;
2503 return MALLOC(parser, size);
2504 }
2505
2506 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2507 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2508 if (parser == NULL)
2509 return NULL;
2510 return REALLOC(parser, ptr, size);
2511 }
2512
2513 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2514 XML_MemFree(XML_Parser parser, void *ptr) {
2515 if (parser != NULL)
2516 FREE(parser, ptr);
2517 }
2518
2519 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2520 XML_DefaultCurrent(XML_Parser parser) {
2521 if (parser == NULL)
2522 return;
2523 if (parser->m_defaultHandler) {
2524 if (parser->m_openInternalEntities)
2525 reportDefault(parser, parser->m_internalEncoding,
2526 parser->m_openInternalEntities->internalEventPtr,
2527 parser->m_openInternalEntities->internalEventEndPtr);
2528 else
2529 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2530 parser->m_eventEndPtr);
2531 }
2532 }
2533
2534 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2535 XML_ErrorString(enum XML_Error code) {
2536 switch (code) {
2537 case XML_ERROR_NONE:
2538 return NULL;
2539 case XML_ERROR_NO_MEMORY:
2540 return XML_L("out of memory");
2541 case XML_ERROR_SYNTAX:
2542 return XML_L("syntax error");
2543 case XML_ERROR_NO_ELEMENTS:
2544 return XML_L("no element found");
2545 case XML_ERROR_INVALID_TOKEN:
2546 return XML_L("not well-formed (invalid token)");
2547 case XML_ERROR_UNCLOSED_TOKEN:
2548 return XML_L("unclosed token");
2549 case XML_ERROR_PARTIAL_CHAR:
2550 return XML_L("partial character");
2551 case XML_ERROR_TAG_MISMATCH:
2552 return XML_L("mismatched tag");
2553 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2554 return XML_L("duplicate attribute");
2555 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2556 return XML_L("junk after document element");
2557 case XML_ERROR_PARAM_ENTITY_REF:
2558 return XML_L("illegal parameter entity reference");
2559 case XML_ERROR_UNDEFINED_ENTITY:
2560 return XML_L("undefined entity");
2561 case XML_ERROR_RECURSIVE_ENTITY_REF:
2562 return XML_L("recursive entity reference");
2563 case XML_ERROR_ASYNC_ENTITY:
2564 return XML_L("asynchronous entity");
2565 case XML_ERROR_BAD_CHAR_REF:
2566 return XML_L("reference to invalid character number");
2567 case XML_ERROR_BINARY_ENTITY_REF:
2568 return XML_L("reference to binary entity");
2569 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2570 return XML_L("reference to external entity in attribute");
2571 case XML_ERROR_MISPLACED_XML_PI:
2572 return XML_L("XML or text declaration not at start of entity");
2573 case XML_ERROR_UNKNOWN_ENCODING:
2574 return XML_L("unknown encoding");
2575 case XML_ERROR_INCORRECT_ENCODING:
2576 return XML_L("encoding specified in XML declaration is incorrect");
2577 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2578 return XML_L("unclosed CDATA section");
2579 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2580 return XML_L("error in processing external entity reference");
2581 case XML_ERROR_NOT_STANDALONE:
2582 return XML_L("document is not standalone");
2583 case XML_ERROR_UNEXPECTED_STATE:
2584 return XML_L("unexpected parser state - please send a bug report");
2585 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2586 return XML_L("entity declared in parameter entity");
2587 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2588 return XML_L("requested feature requires XML_DTD support in Expat");
2589 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2590 return XML_L("cannot change setting once parsing has begun");
2591 /* Added in 1.95.7. */
2592 case XML_ERROR_UNBOUND_PREFIX:
2593 return XML_L("unbound prefix");
2594 /* Added in 1.95.8. */
2595 case XML_ERROR_UNDECLARING_PREFIX:
2596 return XML_L("must not undeclare prefix");
2597 case XML_ERROR_INCOMPLETE_PE:
2598 return XML_L("incomplete markup in parameter entity");
2599 case XML_ERROR_XML_DECL:
2600 return XML_L("XML declaration not well-formed");
2601 case XML_ERROR_TEXT_DECL:
2602 return XML_L("text declaration not well-formed");
2603 case XML_ERROR_PUBLICID:
2604 return XML_L("illegal character(s) in public id");
2605 case XML_ERROR_SUSPENDED:
2606 return XML_L("parser suspended");
2607 case XML_ERROR_NOT_SUSPENDED:
2608 return XML_L("parser not suspended");
2609 case XML_ERROR_ABORTED:
2610 return XML_L("parsing aborted");
2611 case XML_ERROR_FINISHED:
2612 return XML_L("parsing finished");
2613 case XML_ERROR_SUSPEND_PE:
2614 return XML_L("cannot suspend in external parameter entity");
2615 /* Added in 2.0.0. */
2616 case XML_ERROR_RESERVED_PREFIX_XML:
2617 return XML_L(
2618 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2619 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2620 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2621 case XML_ERROR_RESERVED_NAMESPACE_URI:
2622 return XML_L(
2623 "prefix must not be bound to one of the reserved namespace names");
2624 /* Added in 2.2.5. */
2625 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2626 return XML_L("invalid argument");
2627 /* Added in 2.3.0. */
2628 case XML_ERROR_NO_BUFFER:
2629 return XML_L(
2630 "a successful prior call to function XML_GetBuffer is required");
2631 /* Added in 2.4.0. */
2632 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2633 return XML_L(
2634 "limit on input amplification factor (from DTD and entities) breached");
2635 /* Added in 2.6.4. */
2636 case XML_ERROR_NOT_STARTED:
2637 return XML_L("parser not started");
2638 }
2639 return NULL;
2640 }
2641
2642 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2643 XML_ExpatVersion(void) {
2644 /* V1 is used to string-ize the version number. However, it would
2645 string-ize the actual version macro *names* unless we get them
2646 substituted before being passed to V1. CPP is defined to expand
2647 a macro, then rescan for more expansions. Thus, we use V2 to expand
2648 the version macros, then CPP will expand the resulting V1() macro
2649 with the correct numerals. */
2650 /* ### I'm assuming cpp is portable in this respect... */
2651
2652 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2653 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2654
2655 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2656
2657 #undef V1
2658 #undef V2
2659 }
2660
2661 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2662 XML_ExpatVersionInfo(void) {
2663 XML_Expat_Version version;
2664
2665 version.major = XML_MAJOR_VERSION;
2666 version.minor = XML_MINOR_VERSION;
2667 version.micro = XML_MICRO_VERSION;
2668
2669 return version;
2670 }
2671
2672 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2673 XML_GetFeatureList(void) {
2674 static const XML_Feature features[] = {
2675 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2676 sizeof(XML_Char)},
2677 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2678 sizeof(XML_LChar)},
2679 #ifdef XML_UNICODE
2680 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2681 #endif
2682 #ifdef XML_UNICODE_WCHAR_T
2683 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2684 #endif
2685 #ifdef XML_DTD
2686 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2687 #endif
2688 #if XML_CONTEXT_BYTES > 0
2689 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2690 XML_CONTEXT_BYTES},
2691 #endif
2692 #ifdef XML_MIN_SIZE
2693 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2694 #endif
2695 #ifdef XML_NS
2696 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2697 #endif
2698 #ifdef XML_LARGE_SIZE
2699 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2700 #endif
2701 #ifdef XML_ATTR_INFO
2702 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2703 #endif
2704 #if XML_GE == 1
2705 /* Added in Expat 2.4.0 for XML_DTD defined and
2706 * added in Expat 2.6.0 for XML_GE == 1. */
2707 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2708 XML_L("XML_BLAP_MAX_AMP"),
2709 (long int)
2710 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2711 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2712 XML_L("XML_BLAP_ACT_THRES"),
2713 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2714 /* Added in Expat 2.6.0. */
2715 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2716 #endif
2717 {XML_FEATURE_END, NULL, 0}};
2718
2719 return features;
2720 }
2721
2722 #if XML_GE == 1
2723 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2724 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2725 XML_Parser parser, float maximumAmplificationFactor) {
2726 if ((parser == NULL) || (parser->m_parentParser != NULL)
2727 || isnan(maximumAmplificationFactor)
2728 || (maximumAmplificationFactor < 1.0f)) {
2729 return XML_FALSE;
2730 }
2731 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2732 return XML_TRUE;
2733 }
2734
2735 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2736 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2737 XML_Parser parser, unsigned long long activationThresholdBytes) {
2738 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2739 return XML_FALSE;
2740 }
2741 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2742 return XML_TRUE;
2743 }
2744 #endif /* XML_GE == 1 */
2745
2746 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2747 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2748 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2749 parser->m_reparseDeferralEnabled = enabled;
2750 return XML_TRUE;
2751 }
2752 return XML_FALSE;
2753 }
2754
2755 /* Initially tag->rawName always points into the parse buffer;
2756 for those TAG instances opened while the current parse buffer was
2757 processed, and not yet closed, we need to store tag->rawName in a more
2758 permanent location, since the parse buffer is about to be discarded.
2759 */
2760 static XML_Bool
storeRawNames(XML_Parser parser)2761 storeRawNames(XML_Parser parser) {
2762 TAG *tag = parser->m_tagStack;
2763 while (tag) {
2764 int bufSize;
2765 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2766 size_t rawNameLen;
2767 char *rawNameBuf = tag->buf + nameLen;
2768 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2769 at the first entry that has already been copied; everything
2770 below it in the stack is already been accounted for in a
2771 previous call to this function.
2772 */
2773 if (tag->rawName == rawNameBuf)
2774 break;
2775 /* For reuse purposes we need to ensure that the
2776 size of tag->buf is a multiple of sizeof(XML_Char).
2777 */
2778 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2779 /* Detect and prevent integer overflow. */
2780 if (rawNameLen > (size_t)INT_MAX - nameLen)
2781 return XML_FALSE;
2782 bufSize = nameLen + (int)rawNameLen;
2783 if (bufSize > tag->bufEnd - tag->buf) {
2784 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2785 if (temp == NULL)
2786 return XML_FALSE;
2787 /* if tag->name.str points to tag->buf (only when namespace
2788 processing is off) then we have to update it
2789 */
2790 if (tag->name.str == (XML_Char *)tag->buf)
2791 tag->name.str = (XML_Char *)temp;
2792 /* if tag->name.localPart is set (when namespace processing is on)
2793 then update it as well, since it will always point into tag->buf
2794 */
2795 if (tag->name.localPart)
2796 tag->name.localPart
2797 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2798 tag->buf = temp;
2799 tag->bufEnd = temp + bufSize;
2800 rawNameBuf = temp + nameLen;
2801 }
2802 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2803 tag->rawName = rawNameBuf;
2804 tag = tag->parent;
2805 }
2806 return XML_TRUE;
2807 }
2808
2809 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2810 contentProcessor(XML_Parser parser, const char *start, const char *end,
2811 const char **endPtr) {
2812 enum XML_Error result = doContent(
2813 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
2814 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2815 XML_ACCOUNT_DIRECT);
2816 if (result == XML_ERROR_NONE) {
2817 if (! storeRawNames(parser))
2818 return XML_ERROR_NO_MEMORY;
2819 }
2820 return result;
2821 }
2822
2823 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2824 externalEntityInitProcessor(XML_Parser parser, const char *start,
2825 const char *end, const char **endPtr) {
2826 enum XML_Error result = initializeEncoding(parser);
2827 if (result != XML_ERROR_NONE)
2828 return result;
2829 parser->m_processor = externalEntityInitProcessor2;
2830 return externalEntityInitProcessor2(parser, start, end, endPtr);
2831 }
2832
2833 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2834 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2835 const char *end, const char **endPtr) {
2836 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2837 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2838 switch (tok) {
2839 case XML_TOK_BOM:
2840 #if XML_GE == 1
2841 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2842 XML_ACCOUNT_DIRECT)) {
2843 accountingOnAbort(parser);
2844 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2845 }
2846 #endif /* XML_GE == 1 */
2847
2848 /* If we are at the end of the buffer, this would cause the next stage,
2849 i.e. externalEntityInitProcessor3, to pass control directly to
2850 doContent (by detecting XML_TOK_NONE) without processing any xml text
2851 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2852 */
2853 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2854 *endPtr = next;
2855 return XML_ERROR_NONE;
2856 }
2857 start = next;
2858 break;
2859 case XML_TOK_PARTIAL:
2860 if (! parser->m_parsingStatus.finalBuffer) {
2861 *endPtr = start;
2862 return XML_ERROR_NONE;
2863 }
2864 parser->m_eventPtr = start;
2865 return XML_ERROR_UNCLOSED_TOKEN;
2866 case XML_TOK_PARTIAL_CHAR:
2867 if (! parser->m_parsingStatus.finalBuffer) {
2868 *endPtr = start;
2869 return XML_ERROR_NONE;
2870 }
2871 parser->m_eventPtr = start;
2872 return XML_ERROR_PARTIAL_CHAR;
2873 }
2874 parser->m_processor = externalEntityInitProcessor3;
2875 return externalEntityInitProcessor3(parser, start, end, endPtr);
2876 }
2877
2878 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2879 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2880 const char *end, const char **endPtr) {
2881 int tok;
2882 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2883 parser->m_eventPtr = start;
2884 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2885 /* Note: These bytes are accounted later in:
2886 - processXmlDecl
2887 - externalEntityContentProcessor
2888 */
2889 parser->m_eventEndPtr = next;
2890
2891 switch (tok) {
2892 case XML_TOK_XML_DECL: {
2893 enum XML_Error result;
2894 result = processXmlDecl(parser, 1, start, next);
2895 if (result != XML_ERROR_NONE)
2896 return result;
2897 switch (parser->m_parsingStatus.parsing) {
2898 case XML_SUSPENDED:
2899 *endPtr = next;
2900 return XML_ERROR_NONE;
2901 case XML_FINISHED:
2902 return XML_ERROR_ABORTED;
2903 case XML_PARSING:
2904 if (parser->m_reenter) {
2905 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
2906 }
2907 /* Fall through */
2908 default:
2909 start = next;
2910 }
2911 } break;
2912 case XML_TOK_PARTIAL:
2913 if (! parser->m_parsingStatus.finalBuffer) {
2914 *endPtr = start;
2915 return XML_ERROR_NONE;
2916 }
2917 return XML_ERROR_UNCLOSED_TOKEN;
2918 case XML_TOK_PARTIAL_CHAR:
2919 if (! parser->m_parsingStatus.finalBuffer) {
2920 *endPtr = start;
2921 return XML_ERROR_NONE;
2922 }
2923 return XML_ERROR_PARTIAL_CHAR;
2924 }
2925 parser->m_processor = externalEntityContentProcessor;
2926 parser->m_tagLevel = 1;
2927 return externalEntityContentProcessor(parser, start, end, endPtr);
2928 }
2929
2930 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2931 externalEntityContentProcessor(XML_Parser parser, const char *start,
2932 const char *end, const char **endPtr) {
2933 enum XML_Error result
2934 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2935 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2936 XML_ACCOUNT_ENTITY_EXPANSION);
2937 if (result == XML_ERROR_NONE) {
2938 if (! storeRawNames(parser))
2939 return XML_ERROR_NO_MEMORY;
2940 }
2941 return result;
2942 }
2943
2944 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2945 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2946 const char *s, const char *end, const char **nextPtr,
2947 XML_Bool haveMore, enum XML_Account account) {
2948 /* save one level of indirection */
2949 DTD *const dtd = parser->m_dtd;
2950
2951 const char **eventPP;
2952 const char **eventEndPP;
2953 if (enc == parser->m_encoding) {
2954 eventPP = &parser->m_eventPtr;
2955 eventEndPP = &parser->m_eventEndPtr;
2956 } else {
2957 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2958 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2959 }
2960 *eventPP = s;
2961
2962 for (;;) {
2963 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2964 int tok = XmlContentTok(enc, s, end, &next);
2965 #if XML_GE == 1
2966 const char *accountAfter
2967 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2968 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2969 : next;
2970 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2971 account)) {
2972 accountingOnAbort(parser);
2973 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2974 }
2975 #endif
2976 *eventEndPP = next;
2977 switch (tok) {
2978 case XML_TOK_TRAILING_CR:
2979 if (haveMore) {
2980 *nextPtr = s;
2981 return XML_ERROR_NONE;
2982 }
2983 *eventEndPP = end;
2984 if (parser->m_characterDataHandler) {
2985 XML_Char c = 0xA;
2986 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2987 } else if (parser->m_defaultHandler)
2988 reportDefault(parser, enc, s, end);
2989 /* We are at the end of the final buffer, should we check for
2990 XML_SUSPENDED, XML_FINISHED?
2991 */
2992 if (startTagLevel == 0)
2993 return XML_ERROR_NO_ELEMENTS;
2994 if (parser->m_tagLevel != startTagLevel)
2995 return XML_ERROR_ASYNC_ENTITY;
2996 *nextPtr = end;
2997 return XML_ERROR_NONE;
2998 case XML_TOK_NONE:
2999 if (haveMore) {
3000 *nextPtr = s;
3001 return XML_ERROR_NONE;
3002 }
3003 if (startTagLevel > 0) {
3004 if (parser->m_tagLevel != startTagLevel)
3005 return XML_ERROR_ASYNC_ENTITY;
3006 *nextPtr = s;
3007 return XML_ERROR_NONE;
3008 }
3009 return XML_ERROR_NO_ELEMENTS;
3010 case XML_TOK_INVALID:
3011 *eventPP = next;
3012 return XML_ERROR_INVALID_TOKEN;
3013 case XML_TOK_PARTIAL:
3014 if (haveMore) {
3015 *nextPtr = s;
3016 return XML_ERROR_NONE;
3017 }
3018 return XML_ERROR_UNCLOSED_TOKEN;
3019 case XML_TOK_PARTIAL_CHAR:
3020 if (haveMore) {
3021 *nextPtr = s;
3022 return XML_ERROR_NONE;
3023 }
3024 return XML_ERROR_PARTIAL_CHAR;
3025 case XML_TOK_ENTITY_REF: {
3026 const XML_Char *name;
3027 ENTITY *entity;
3028 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3029 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3030 if (ch) {
3031 #if XML_GE == 1
3032 /* NOTE: We are replacing 4-6 characters original input for 1 character
3033 * so there is no amplification and hence recording without
3034 * protection. */
3035 accountingDiffTolerated(parser, tok, (char *)&ch,
3036 ((char *)&ch) + sizeof(XML_Char), __LINE__,
3037 XML_ACCOUNT_ENTITY_EXPANSION);
3038 #endif /* XML_GE == 1 */
3039 if (parser->m_characterDataHandler)
3040 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3041 else if (parser->m_defaultHandler)
3042 reportDefault(parser, enc, s, next);
3043 break;
3044 }
3045 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3046 next - enc->minBytesPerChar);
3047 if (! name)
3048 return XML_ERROR_NO_MEMORY;
3049 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3050 poolDiscard(&dtd->pool);
3051 /* First, determine if a check for an existing declaration is needed;
3052 if yes, check that the entity exists, and that it is internal,
3053 otherwise call the skipped entity or default handler.
3054 */
3055 if (! dtd->hasParamEntityRefs || dtd->standalone) {
3056 if (! entity)
3057 return XML_ERROR_UNDEFINED_ENTITY;
3058 else if (! entity->is_internal)
3059 return XML_ERROR_ENTITY_DECLARED_IN_PE;
3060 } else if (! entity) {
3061 if (parser->m_skippedEntityHandler)
3062 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3063 else if (parser->m_defaultHandler)
3064 reportDefault(parser, enc, s, next);
3065 break;
3066 }
3067 if (entity->open)
3068 return XML_ERROR_RECURSIVE_ENTITY_REF;
3069 if (entity->notation)
3070 return XML_ERROR_BINARY_ENTITY_REF;
3071 if (entity->textPtr) {
3072 enum XML_Error result;
3073 if (! parser->m_defaultExpandInternalEntities) {
3074 if (parser->m_skippedEntityHandler)
3075 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3076 0);
3077 else if (parser->m_defaultHandler)
3078 reportDefault(parser, enc, s, next);
3079 break;
3080 }
3081 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3082 if (result != XML_ERROR_NONE)
3083 return result;
3084 } else if (parser->m_externalEntityRefHandler) {
3085 const XML_Char *context;
3086 entity->open = XML_TRUE;
3087 context = getContext(parser);
3088 entity->open = XML_FALSE;
3089 if (! context)
3090 return XML_ERROR_NO_MEMORY;
3091 if (! parser->m_externalEntityRefHandler(
3092 parser->m_externalEntityRefHandlerArg, context, entity->base,
3093 entity->systemId, entity->publicId))
3094 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3095 poolDiscard(&parser->m_tempPool);
3096 } else if (parser->m_defaultHandler)
3097 reportDefault(parser, enc, s, next);
3098 break;
3099 }
3100 case XML_TOK_START_TAG_NO_ATTS:
3101 /* fall through */
3102 case XML_TOK_START_TAG_WITH_ATTS: {
3103 TAG *tag;
3104 enum XML_Error result;
3105 XML_Char *toPtr;
3106 if (parser->m_freeTagList) {
3107 tag = parser->m_freeTagList;
3108 parser->m_freeTagList = parser->m_freeTagList->parent;
3109 } else {
3110 tag = (TAG *)MALLOC(parser, sizeof(TAG));
3111 if (! tag)
3112 return XML_ERROR_NO_MEMORY;
3113 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
3114 if (! tag->buf) {
3115 FREE(parser, tag);
3116 return XML_ERROR_NO_MEMORY;
3117 }
3118 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
3119 }
3120 tag->bindings = NULL;
3121 tag->parent = parser->m_tagStack;
3122 parser->m_tagStack = tag;
3123 tag->name.localPart = NULL;
3124 tag->name.prefix = NULL;
3125 tag->rawName = s + enc->minBytesPerChar;
3126 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3127 ++parser->m_tagLevel;
3128 {
3129 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3130 const char *fromPtr = tag->rawName;
3131 toPtr = (XML_Char *)tag->buf;
3132 for (;;) {
3133 int bufSize;
3134 int convLen;
3135 const enum XML_Convert_Result convert_res
3136 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3137 (ICHAR *)tag->bufEnd - 1);
3138 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3139 if ((fromPtr >= rawNameEnd)
3140 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3141 tag->name.strLen = convLen;
3142 break;
3143 }
3144 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3145 {
3146 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3147 if (temp == NULL)
3148 return XML_ERROR_NO_MEMORY;
3149 tag->buf = temp;
3150 tag->bufEnd = temp + bufSize;
3151 toPtr = (XML_Char *)temp + convLen;
3152 }
3153 }
3154 }
3155 tag->name.str = (XML_Char *)tag->buf;
3156 *toPtr = XML_T('\0');
3157 result
3158 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3159 if (result)
3160 return result;
3161 if (parser->m_startElementHandler)
3162 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3163 (const XML_Char **)parser->m_atts);
3164 else if (parser->m_defaultHandler)
3165 reportDefault(parser, enc, s, next);
3166 poolClear(&parser->m_tempPool);
3167 break;
3168 }
3169 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3170 /* fall through */
3171 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3172 const char *rawName = s + enc->minBytesPerChar;
3173 enum XML_Error result;
3174 BINDING *bindings = NULL;
3175 XML_Bool noElmHandlers = XML_TRUE;
3176 TAG_NAME name;
3177 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3178 rawName + XmlNameLength(enc, rawName));
3179 if (! name.str)
3180 return XML_ERROR_NO_MEMORY;
3181 poolFinish(&parser->m_tempPool);
3182 result = storeAtts(parser, enc, s, &name, &bindings,
3183 XML_ACCOUNT_NONE /* token spans whole start tag */);
3184 if (result != XML_ERROR_NONE) {
3185 freeBindings(parser, bindings);
3186 return result;
3187 }
3188 poolFinish(&parser->m_tempPool);
3189 if (parser->m_startElementHandler) {
3190 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3191 (const XML_Char **)parser->m_atts);
3192 noElmHandlers = XML_FALSE;
3193 }
3194 if (parser->m_endElementHandler) {
3195 if (parser->m_startElementHandler)
3196 *eventPP = *eventEndPP;
3197 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3198 noElmHandlers = XML_FALSE;
3199 }
3200 if (noElmHandlers && parser->m_defaultHandler)
3201 reportDefault(parser, enc, s, next);
3202 poolClear(&parser->m_tempPool);
3203 freeBindings(parser, bindings);
3204 }
3205 if ((parser->m_tagLevel == 0)
3206 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3207 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3208 || (parser->m_parsingStatus.parsing == XML_PARSING
3209 && parser->m_reenter))
3210 parser->m_processor = epilogProcessor;
3211 else
3212 return epilogProcessor(parser, next, end, nextPtr);
3213 }
3214 break;
3215 case XML_TOK_END_TAG:
3216 if (parser->m_tagLevel == startTagLevel)
3217 return XML_ERROR_ASYNC_ENTITY;
3218 else {
3219 int len;
3220 const char *rawName;
3221 TAG *tag = parser->m_tagStack;
3222 rawName = s + enc->minBytesPerChar * 2;
3223 len = XmlNameLength(enc, rawName);
3224 if (len != tag->rawNameLength
3225 || memcmp(tag->rawName, rawName, len) != 0) {
3226 *eventPP = rawName;
3227 return XML_ERROR_TAG_MISMATCH;
3228 }
3229 parser->m_tagStack = tag->parent;
3230 tag->parent = parser->m_freeTagList;
3231 parser->m_freeTagList = tag;
3232 --parser->m_tagLevel;
3233 if (parser->m_endElementHandler) {
3234 const XML_Char *localPart;
3235 const XML_Char *prefix;
3236 XML_Char *uri;
3237 localPart = tag->name.localPart;
3238 if (parser->m_ns && localPart) {
3239 /* localPart and prefix may have been overwritten in
3240 tag->name.str, since this points to the binding->uri
3241 buffer which gets reused; so we have to add them again
3242 */
3243 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3244 /* don't need to check for space - already done in storeAtts() */
3245 while (*localPart)
3246 *uri++ = *localPart++;
3247 prefix = tag->name.prefix;
3248 if (parser->m_ns_triplets && prefix) {
3249 *uri++ = parser->m_namespaceSeparator;
3250 while (*prefix)
3251 *uri++ = *prefix++;
3252 }
3253 *uri = XML_T('\0');
3254 }
3255 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3256 } else if (parser->m_defaultHandler)
3257 reportDefault(parser, enc, s, next);
3258 while (tag->bindings) {
3259 BINDING *b = tag->bindings;
3260 if (parser->m_endNamespaceDeclHandler)
3261 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3262 b->prefix->name);
3263 tag->bindings = tag->bindings->nextTagBinding;
3264 b->nextTagBinding = parser->m_freeBindingList;
3265 parser->m_freeBindingList = b;
3266 b->prefix->binding = b->prevPrefixBinding;
3267 }
3268 if ((parser->m_tagLevel == 0)
3269 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3270 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3271 || (parser->m_parsingStatus.parsing == XML_PARSING
3272 && parser->m_reenter))
3273 parser->m_processor = epilogProcessor;
3274 else
3275 return epilogProcessor(parser, next, end, nextPtr);
3276 }
3277 }
3278 break;
3279 case XML_TOK_CHAR_REF: {
3280 int n = XmlCharRefNumber(enc, s);
3281 if (n < 0)
3282 return XML_ERROR_BAD_CHAR_REF;
3283 if (parser->m_characterDataHandler) {
3284 XML_Char buf[XML_ENCODE_MAX];
3285 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3286 XmlEncode(n, (ICHAR *)buf));
3287 } else if (parser->m_defaultHandler)
3288 reportDefault(parser, enc, s, next);
3289 } break;
3290 case XML_TOK_XML_DECL:
3291 return XML_ERROR_MISPLACED_XML_PI;
3292 case XML_TOK_DATA_NEWLINE:
3293 if (parser->m_characterDataHandler) {
3294 XML_Char c = 0xA;
3295 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3296 } else if (parser->m_defaultHandler)
3297 reportDefault(parser, enc, s, next);
3298 break;
3299 case XML_TOK_CDATA_SECT_OPEN: {
3300 enum XML_Error result;
3301 if (parser->m_startCdataSectionHandler)
3302 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3303 /* BEGIN disabled code */
3304 /* Suppose you doing a transformation on a document that involves
3305 changing only the character data. You set up a defaultHandler
3306 and a characterDataHandler. The defaultHandler simply copies
3307 characters through. The characterDataHandler does the
3308 transformation and writes the characters out escaping them as
3309 necessary. This case will fail to work if we leave out the
3310 following two lines (because & and < inside CDATA sections will
3311 be incorrectly escaped).
3312
3313 However, now we have a start/endCdataSectionHandler, so it seems
3314 easier to let the user deal with this.
3315 */
3316 else if ((0) && parser->m_characterDataHandler)
3317 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3318 0);
3319 /* END disabled code */
3320 else if (parser->m_defaultHandler)
3321 reportDefault(parser, enc, s, next);
3322 result
3323 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3324 if (result != XML_ERROR_NONE)
3325 return result;
3326 else if (! next) {
3327 parser->m_processor = cdataSectionProcessor;
3328 return result;
3329 }
3330 } break;
3331 case XML_TOK_TRAILING_RSQB:
3332 if (haveMore) {
3333 *nextPtr = s;
3334 return XML_ERROR_NONE;
3335 }
3336 if (parser->m_characterDataHandler) {
3337 if (MUST_CONVERT(enc, s)) {
3338 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3339 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3340 parser->m_characterDataHandler(
3341 parser->m_handlerArg, parser->m_dataBuf,
3342 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3343 } else
3344 parser->m_characterDataHandler(
3345 parser->m_handlerArg, (const XML_Char *)s,
3346 (int)((const XML_Char *)end - (const XML_Char *)s));
3347 } else if (parser->m_defaultHandler)
3348 reportDefault(parser, enc, s, end);
3349 /* We are at the end of the final buffer, should we check for
3350 XML_SUSPENDED, XML_FINISHED?
3351 */
3352 if (startTagLevel == 0) {
3353 *eventPP = end;
3354 return XML_ERROR_NO_ELEMENTS;
3355 }
3356 if (parser->m_tagLevel != startTagLevel) {
3357 *eventPP = end;
3358 return XML_ERROR_ASYNC_ENTITY;
3359 }
3360 *nextPtr = end;
3361 return XML_ERROR_NONE;
3362 case XML_TOK_DATA_CHARS: {
3363 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3364 if (charDataHandler) {
3365 if (MUST_CONVERT(enc, s)) {
3366 for (;;) {
3367 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3368 const enum XML_Convert_Result convert_res = XmlConvert(
3369 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3370 *eventEndPP = s;
3371 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3372 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3373 if ((convert_res == XML_CONVERT_COMPLETED)
3374 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3375 break;
3376 *eventPP = s;
3377 }
3378 } else
3379 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3380 (int)((const XML_Char *)next - (const XML_Char *)s));
3381 } else if (parser->m_defaultHandler)
3382 reportDefault(parser, enc, s, next);
3383 } break;
3384 case XML_TOK_PI:
3385 if (! reportProcessingInstruction(parser, enc, s, next))
3386 return XML_ERROR_NO_MEMORY;
3387 break;
3388 case XML_TOK_COMMENT:
3389 if (! reportComment(parser, enc, s, next))
3390 return XML_ERROR_NO_MEMORY;
3391 break;
3392 default:
3393 /* All of the tokens produced by XmlContentTok() have their own
3394 * explicit cases, so this default is not strictly necessary.
3395 * However it is a useful safety net, so we retain the code and
3396 * simply exclude it from the coverage tests.
3397 *
3398 * LCOV_EXCL_START
3399 */
3400 if (parser->m_defaultHandler)
3401 reportDefault(parser, enc, s, next);
3402 break;
3403 /* LCOV_EXCL_STOP */
3404 }
3405 switch (parser->m_parsingStatus.parsing) {
3406 case XML_SUSPENDED:
3407 *eventPP = next;
3408 *nextPtr = next;
3409 return XML_ERROR_NONE;
3410 case XML_FINISHED:
3411 *eventPP = next;
3412 return XML_ERROR_ABORTED;
3413 case XML_PARSING:
3414 if (parser->m_reenter) {
3415 *nextPtr = next;
3416 return XML_ERROR_NONE;
3417 }
3418 /* Fall through */
3419 default:;
3420 *eventPP = s = next;
3421 }
3422 }
3423 /* not reached */
3424 }
3425
3426 /* This function does not call free() on the allocated memory, merely
3427 * moving it to the parser's m_freeBindingList where it can be freed or
3428 * reused as appropriate.
3429 */
3430 static void
freeBindings(XML_Parser parser,BINDING * bindings)3431 freeBindings(XML_Parser parser, BINDING *bindings) {
3432 while (bindings) {
3433 BINDING *b = bindings;
3434
3435 /* m_startNamespaceDeclHandler will have been called for this
3436 * binding in addBindings(), so call the end handler now.
3437 */
3438 if (parser->m_endNamespaceDeclHandler)
3439 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3440
3441 bindings = bindings->nextTagBinding;
3442 b->nextTagBinding = parser->m_freeBindingList;
3443 parser->m_freeBindingList = b;
3444 b->prefix->binding = b->prevPrefixBinding;
3445 }
3446 }
3447
3448 /* Precondition: all arguments must be non-NULL;
3449 Purpose:
3450 - normalize attributes
3451 - check attributes for well-formedness
3452 - generate namespace aware attribute names (URI, prefix)
3453 - build list of attributes for startElementHandler
3454 - default attributes
3455 - process namespace declarations (check and report them)
3456 - generate namespace aware element name (URI, prefix)
3457 */
3458 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3459 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3460 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3461 enum XML_Account account) {
3462 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3463 ELEMENT_TYPE *elementType;
3464 int nDefaultAtts;
3465 const XML_Char **appAtts; /* the attribute list for the application */
3466 int attIndex = 0;
3467 int prefixLen;
3468 int i;
3469 int n;
3470 XML_Char *uri;
3471 int nPrefixes = 0;
3472 BINDING *binding;
3473 const XML_Char *localPart;
3474
3475 /* lookup the element type name */
3476 elementType
3477 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3478 if (! elementType) {
3479 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3480 if (! name)
3481 return XML_ERROR_NO_MEMORY;
3482 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3483 sizeof(ELEMENT_TYPE));
3484 if (! elementType)
3485 return XML_ERROR_NO_MEMORY;
3486 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3487 return XML_ERROR_NO_MEMORY;
3488 }
3489 nDefaultAtts = elementType->nDefaultAtts;
3490
3491 /* get the attributes from the tokenizer */
3492 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3493
3494 /* Detect and prevent integer overflow */
3495 if (n > INT_MAX - nDefaultAtts) {
3496 return XML_ERROR_NO_MEMORY;
3497 }
3498
3499 if (n + nDefaultAtts > parser->m_attsSize) {
3500 int oldAttsSize = parser->m_attsSize;
3501 ATTRIBUTE *temp;
3502 #ifdef XML_ATTR_INFO
3503 XML_AttrInfo *temp2;
3504 #endif
3505
3506 /* Detect and prevent integer overflow */
3507 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3508 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3509 return XML_ERROR_NO_MEMORY;
3510 }
3511
3512 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3513
3514 /* Detect and prevent integer overflow.
3515 * The preprocessor guard addresses the "always false" warning
3516 * from -Wtype-limits on platforms where
3517 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3518 #if UINT_MAX >= SIZE_MAX
3519 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3520 parser->m_attsSize = oldAttsSize;
3521 return XML_ERROR_NO_MEMORY;
3522 }
3523 #endif
3524
3525 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3526 parser->m_attsSize * sizeof(ATTRIBUTE));
3527 if (temp == NULL) {
3528 parser->m_attsSize = oldAttsSize;
3529 return XML_ERROR_NO_MEMORY;
3530 }
3531 parser->m_atts = temp;
3532 #ifdef XML_ATTR_INFO
3533 /* Detect and prevent integer overflow.
3534 * The preprocessor guard addresses the "always false" warning
3535 * from -Wtype-limits on platforms where
3536 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3537 # if UINT_MAX >= SIZE_MAX
3538 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3539 parser->m_attsSize = oldAttsSize;
3540 return XML_ERROR_NO_MEMORY;
3541 }
3542 # endif
3543
3544 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3545 parser->m_attsSize * sizeof(XML_AttrInfo));
3546 if (temp2 == NULL) {
3547 parser->m_attsSize = oldAttsSize;
3548 return XML_ERROR_NO_MEMORY;
3549 }
3550 parser->m_attInfo = temp2;
3551 #endif
3552 if (n > oldAttsSize)
3553 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3554 }
3555
3556 appAtts = (const XML_Char **)parser->m_atts;
3557 for (i = 0; i < n; i++) {
3558 ATTRIBUTE *currAtt = &parser->m_atts[i];
3559 #ifdef XML_ATTR_INFO
3560 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3561 #endif
3562 /* add the name and value to the attribute list */
3563 ATTRIBUTE_ID *attId
3564 = getAttributeId(parser, enc, currAtt->name,
3565 currAtt->name + XmlNameLength(enc, currAtt->name));
3566 if (! attId)
3567 return XML_ERROR_NO_MEMORY;
3568 #ifdef XML_ATTR_INFO
3569 currAttInfo->nameStart
3570 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3571 currAttInfo->nameEnd
3572 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3573 currAttInfo->valueStart = parser->m_parseEndByteIndex
3574 - (parser->m_parseEndPtr - currAtt->valuePtr);
3575 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3576 - (parser->m_parseEndPtr - currAtt->valueEnd);
3577 #endif
3578 /* Detect duplicate attributes by their QNames. This does not work when
3579 namespace processing is turned on and different prefixes for the same
3580 namespace are used. For this case we have a check further down.
3581 */
3582 if ((attId->name)[-1]) {
3583 if (enc == parser->m_encoding)
3584 parser->m_eventPtr = parser->m_atts[i].name;
3585 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3586 }
3587 (attId->name)[-1] = 1;
3588 appAtts[attIndex++] = attId->name;
3589 if (! parser->m_atts[i].normalized) {
3590 enum XML_Error result;
3591 XML_Bool isCdata = XML_TRUE;
3592
3593 /* figure out whether declared as other than CDATA */
3594 if (attId->maybeTokenized) {
3595 int j;
3596 for (j = 0; j < nDefaultAtts; j++) {
3597 if (attId == elementType->defaultAtts[j].id) {
3598 isCdata = elementType->defaultAtts[j].isCdata;
3599 break;
3600 }
3601 }
3602 }
3603
3604 /* normalize the attribute value */
3605 result = storeAttributeValue(
3606 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3607 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3608 if (result)
3609 return result;
3610 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3611 poolFinish(&parser->m_tempPool);
3612 } else {
3613 /* the value did not need normalizing */
3614 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3615 parser->m_atts[i].valuePtr,
3616 parser->m_atts[i].valueEnd);
3617 if (appAtts[attIndex] == 0)
3618 return XML_ERROR_NO_MEMORY;
3619 poolFinish(&parser->m_tempPool);
3620 }
3621 /* handle prefixed attribute names */
3622 if (attId->prefix) {
3623 if (attId->xmlns) {
3624 /* deal with namespace declarations here */
3625 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3626 appAtts[attIndex], bindingsPtr);
3627 if (result)
3628 return result;
3629 --attIndex;
3630 } else {
3631 /* deal with other prefixed names later */
3632 attIndex++;
3633 nPrefixes++;
3634 (attId->name)[-1] = 2;
3635 }
3636 } else
3637 attIndex++;
3638 }
3639
3640 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3641 parser->m_nSpecifiedAtts = attIndex;
3642 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3643 for (i = 0; i < attIndex; i += 2)
3644 if (appAtts[i] == elementType->idAtt->name) {
3645 parser->m_idAttIndex = i;
3646 break;
3647 }
3648 } else
3649 parser->m_idAttIndex = -1;
3650
3651 /* do attribute defaulting */
3652 for (i = 0; i < nDefaultAtts; i++) {
3653 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3654 if (! (da->id->name)[-1] && da->value) {
3655 if (da->id->prefix) {
3656 if (da->id->xmlns) {
3657 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3658 da->value, bindingsPtr);
3659 if (result)
3660 return result;
3661 } else {
3662 (da->id->name)[-1] = 2;
3663 nPrefixes++;
3664 appAtts[attIndex++] = da->id->name;
3665 appAtts[attIndex++] = da->value;
3666 }
3667 } else {
3668 (da->id->name)[-1] = 1;
3669 appAtts[attIndex++] = da->id->name;
3670 appAtts[attIndex++] = da->value;
3671 }
3672 }
3673 }
3674 appAtts[attIndex] = 0;
3675
3676 /* expand prefixed attribute names, check for duplicates,
3677 and clear flags that say whether attributes were specified */
3678 i = 0;
3679 if (nPrefixes) {
3680 int j; /* hash table index */
3681 unsigned long version = parser->m_nsAttsVersion;
3682
3683 /* Detect and prevent invalid shift */
3684 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3685 return XML_ERROR_NO_MEMORY;
3686 }
3687
3688 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3689 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3690 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3691 if ((nPrefixes << 1)
3692 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3693 NS_ATT *temp;
3694 /* hash table size must also be a power of 2 and >= 8 */
3695 while (nPrefixes >> parser->m_nsAttsPower++)
3696 ;
3697 if (parser->m_nsAttsPower < 3)
3698 parser->m_nsAttsPower = 3;
3699
3700 /* Detect and prevent invalid shift */
3701 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3702 /* Restore actual size of memory in m_nsAtts */
3703 parser->m_nsAttsPower = oldNsAttsPower;
3704 return XML_ERROR_NO_MEMORY;
3705 }
3706
3707 nsAttsSize = 1u << parser->m_nsAttsPower;
3708
3709 /* Detect and prevent integer overflow.
3710 * The preprocessor guard addresses the "always false" warning
3711 * from -Wtype-limits on platforms where
3712 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3713 #if UINT_MAX >= SIZE_MAX
3714 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3715 /* Restore actual size of memory in m_nsAtts */
3716 parser->m_nsAttsPower = oldNsAttsPower;
3717 return XML_ERROR_NO_MEMORY;
3718 }
3719 #endif
3720
3721 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3722 nsAttsSize * sizeof(NS_ATT));
3723 if (! temp) {
3724 /* Restore actual size of memory in m_nsAtts */
3725 parser->m_nsAttsPower = oldNsAttsPower;
3726 return XML_ERROR_NO_MEMORY;
3727 }
3728 parser->m_nsAtts = temp;
3729 version = 0; /* force re-initialization of m_nsAtts hash table */
3730 }
3731 /* using a version flag saves us from initializing m_nsAtts every time */
3732 if (! version) { /* initialize version flags when version wraps around */
3733 version = INIT_ATTS_VERSION;
3734 for (j = nsAttsSize; j != 0;)
3735 parser->m_nsAtts[--j].version = version;
3736 }
3737 parser->m_nsAttsVersion = --version;
3738
3739 /* expand prefixed names and check for duplicates */
3740 for (; i < attIndex; i += 2) {
3741 const XML_Char *s = appAtts[i];
3742 if (s[-1] == 2) { /* prefixed */
3743 ATTRIBUTE_ID *id;
3744 const BINDING *b;
3745 unsigned long uriHash;
3746 struct siphash sip_state;
3747 struct sipkey sip_key;
3748
3749 copy_salt_to_sipkey(parser, &sip_key);
3750 sip24_init(&sip_state, &sip_key);
3751
3752 ((XML_Char *)s)[-1] = 0; /* clear flag */
3753 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3754 if (! id || ! id->prefix) {
3755 /* This code is walking through the appAtts array, dealing
3756 * with (in this case) a prefixed attribute name. To be in
3757 * the array, the attribute must have already been bound, so
3758 * has to have passed through the hash table lookup once
3759 * already. That implies that an entry for it already
3760 * exists, so the lookup above will return a pointer to
3761 * already allocated memory. There is no opportunaity for
3762 * the allocator to fail, so the condition above cannot be
3763 * fulfilled.
3764 *
3765 * Since it is difficult to be certain that the above
3766 * analysis is complete, we retain the test and merely
3767 * remove the code from coverage tests.
3768 */
3769 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3770 }
3771 b = id->prefix->binding;
3772 if (! b)
3773 return XML_ERROR_UNBOUND_PREFIX;
3774
3775 for (j = 0; j < b->uriLen; j++) {
3776 const XML_Char c = b->uri[j];
3777 if (! poolAppendChar(&parser->m_tempPool, c))
3778 return XML_ERROR_NO_MEMORY;
3779 }
3780
3781 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3782
3783 while (*s++ != XML_T(ASCII_COLON))
3784 ;
3785
3786 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3787
3788 do { /* copies null terminator */
3789 if (! poolAppendChar(&parser->m_tempPool, *s))
3790 return XML_ERROR_NO_MEMORY;
3791 } while (*s++);
3792
3793 uriHash = (unsigned long)sip24_final(&sip_state);
3794
3795 { /* Check hash table for duplicate of expanded name (uriName).
3796 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3797 */
3798 unsigned char step = 0;
3799 unsigned long mask = nsAttsSize - 1;
3800 j = uriHash & mask; /* index into hash table */
3801 while (parser->m_nsAtts[j].version == version) {
3802 /* for speed we compare stored hash values first */
3803 if (uriHash == parser->m_nsAtts[j].hash) {
3804 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3805 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3806 /* s1 is null terminated, but not s2 */
3807 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3808 ;
3809 if (*s1 == 0)
3810 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3811 }
3812 if (! step)
3813 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3814 j < step ? (j += nsAttsSize - step) : (j -= step);
3815 }
3816 }
3817
3818 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3819 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3820 s = b->prefix->name;
3821 do {
3822 if (! poolAppendChar(&parser->m_tempPool, *s))
3823 return XML_ERROR_NO_MEMORY;
3824 } while (*s++);
3825 }
3826
3827 /* store expanded name in attribute list */
3828 s = poolStart(&parser->m_tempPool);
3829 poolFinish(&parser->m_tempPool);
3830 appAtts[i] = s;
3831
3832 /* fill empty slot with new version, uriName and hash value */
3833 parser->m_nsAtts[j].version = version;
3834 parser->m_nsAtts[j].hash = uriHash;
3835 parser->m_nsAtts[j].uriName = s;
3836
3837 if (! --nPrefixes) {
3838 i += 2;
3839 break;
3840 }
3841 } else /* not prefixed */
3842 ((XML_Char *)s)[-1] = 0; /* clear flag */
3843 }
3844 }
3845 /* clear flags for the remaining attributes */
3846 for (; i < attIndex; i += 2)
3847 ((XML_Char *)(appAtts[i]))[-1] = 0;
3848 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3849 binding->attId->name[-1] = 0;
3850
3851 if (! parser->m_ns)
3852 return XML_ERROR_NONE;
3853
3854 /* expand the element type name */
3855 if (elementType->prefix) {
3856 binding = elementType->prefix->binding;
3857 if (! binding)
3858 return XML_ERROR_UNBOUND_PREFIX;
3859 localPart = tagNamePtr->str;
3860 while (*localPart++ != XML_T(ASCII_COLON))
3861 ;
3862 } else if (dtd->defaultPrefix.binding) {
3863 binding = dtd->defaultPrefix.binding;
3864 localPart = tagNamePtr->str;
3865 } else
3866 return XML_ERROR_NONE;
3867 prefixLen = 0;
3868 if (parser->m_ns_triplets && binding->prefix->name) {
3869 for (; binding->prefix->name[prefixLen++];)
3870 ; /* prefixLen includes null terminator */
3871 }
3872 tagNamePtr->localPart = localPart;
3873 tagNamePtr->uriLen = binding->uriLen;
3874 tagNamePtr->prefix = binding->prefix->name;
3875 tagNamePtr->prefixLen = prefixLen;
3876 for (i = 0; localPart[i++];)
3877 ; /* i includes null terminator */
3878
3879 /* Detect and prevent integer overflow */
3880 if (binding->uriLen > INT_MAX - prefixLen
3881 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3882 return XML_ERROR_NO_MEMORY;
3883 }
3884
3885 n = i + binding->uriLen + prefixLen;
3886 if (n > binding->uriAlloc) {
3887 TAG *p;
3888
3889 /* Detect and prevent integer overflow */
3890 if (n > INT_MAX - EXPAND_SPARE) {
3891 return XML_ERROR_NO_MEMORY;
3892 }
3893 /* Detect and prevent integer overflow.
3894 * The preprocessor guard addresses the "always false" warning
3895 * from -Wtype-limits on platforms where
3896 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3897 #if UINT_MAX >= SIZE_MAX
3898 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3899 return XML_ERROR_NO_MEMORY;
3900 }
3901 #endif
3902
3903 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3904 if (! uri)
3905 return XML_ERROR_NO_MEMORY;
3906 binding->uriAlloc = n + EXPAND_SPARE;
3907 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3908 for (p = parser->m_tagStack; p; p = p->parent)
3909 if (p->name.str == binding->uri)
3910 p->name.str = uri;
3911 FREE(parser, binding->uri);
3912 binding->uri = uri;
3913 }
3914 /* if m_namespaceSeparator != '\0' then uri includes it already */
3915 uri = binding->uri + binding->uriLen;
3916 memcpy(uri, localPart, i * sizeof(XML_Char));
3917 /* we always have a namespace separator between localPart and prefix */
3918 if (prefixLen) {
3919 uri += i - 1;
3920 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3921 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3922 }
3923 tagNamePtr->str = binding->uri;
3924 return XML_ERROR_NONE;
3925 }
3926
3927 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3928 is_rfc3986_uri_char(XML_Char candidate) {
3929 // For the RFC 3986 ANBF grammar see
3930 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3931
3932 switch (candidate) {
3933 // From rule "ALPHA" (uppercase half)
3934 case 'A':
3935 case 'B':
3936 case 'C':
3937 case 'D':
3938 case 'E':
3939 case 'F':
3940 case 'G':
3941 case 'H':
3942 case 'I':
3943 case 'J':
3944 case 'K':
3945 case 'L':
3946 case 'M':
3947 case 'N':
3948 case 'O':
3949 case 'P':
3950 case 'Q':
3951 case 'R':
3952 case 'S':
3953 case 'T':
3954 case 'U':
3955 case 'V':
3956 case 'W':
3957 case 'X':
3958 case 'Y':
3959 case 'Z':
3960
3961 // From rule "ALPHA" (lowercase half)
3962 case 'a':
3963 case 'b':
3964 case 'c':
3965 case 'd':
3966 case 'e':
3967 case 'f':
3968 case 'g':
3969 case 'h':
3970 case 'i':
3971 case 'j':
3972 case 'k':
3973 case 'l':
3974 case 'm':
3975 case 'n':
3976 case 'o':
3977 case 'p':
3978 case 'q':
3979 case 'r':
3980 case 's':
3981 case 't':
3982 case 'u':
3983 case 'v':
3984 case 'w':
3985 case 'x':
3986 case 'y':
3987 case 'z':
3988
3989 // From rule "DIGIT"
3990 case '0':
3991 case '1':
3992 case '2':
3993 case '3':
3994 case '4':
3995 case '5':
3996 case '6':
3997 case '7':
3998 case '8':
3999 case '9':
4000
4001 // From rule "pct-encoded"
4002 case '%':
4003
4004 // From rule "unreserved"
4005 case '-':
4006 case '.':
4007 case '_':
4008 case '~':
4009
4010 // From rule "gen-delims"
4011 case ':':
4012 case '/':
4013 case '?':
4014 case '#':
4015 case '[':
4016 case ']':
4017 case '@':
4018
4019 // From rule "sub-delims"
4020 case '!':
4021 case '$':
4022 case '&':
4023 case '\'':
4024 case '(':
4025 case ')':
4026 case '*':
4027 case '+':
4028 case ',':
4029 case ';':
4030 case '=':
4031 return XML_TRUE;
4032
4033 default:
4034 return XML_FALSE;
4035 }
4036 }
4037
4038 /* addBinding() overwrites the value of prefix->binding without checking.
4039 Therefore one must keep track of the old value outside of addBinding().
4040 */
4041 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)4042 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4043 const XML_Char *uri, BINDING **bindingsPtr) {
4044 // "http://www.w3.org/XML/1998/namespace"
4045 static const XML_Char xmlNamespace[]
4046 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
4047 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
4048 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
4049 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
4050 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
4051 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
4052 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
4053 ASCII_e, '\0'};
4054 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4055 // "http://www.w3.org/2000/xmlns/"
4056 static const XML_Char xmlnsNamespace[]
4057 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
4058 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4059 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
4060 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
4061 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
4062 static const int xmlnsLen
4063 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4064
4065 XML_Bool mustBeXML = XML_FALSE;
4066 XML_Bool isXML = XML_TRUE;
4067 XML_Bool isXMLNS = XML_TRUE;
4068
4069 BINDING *b;
4070 int len;
4071
4072 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4073 if (*uri == XML_T('\0') && prefix->name)
4074 return XML_ERROR_UNDECLARING_PREFIX;
4075
4076 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4077 && prefix->name[1] == XML_T(ASCII_m)
4078 && prefix->name[2] == XML_T(ASCII_l)) {
4079 /* Not allowed to bind xmlns */
4080 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4081 && prefix->name[5] == XML_T('\0'))
4082 return XML_ERROR_RESERVED_PREFIX_XMLNS;
4083
4084 if (prefix->name[3] == XML_T('\0'))
4085 mustBeXML = XML_TRUE;
4086 }
4087
4088 for (len = 0; uri[len]; len++) {
4089 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4090 isXML = XML_FALSE;
4091
4092 if (! mustBeXML && isXMLNS
4093 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4094 isXMLNS = XML_FALSE;
4095
4096 // NOTE: While Expat does not validate namespace URIs against RFC 3986
4097 // today (and is not REQUIRED to do so with regard to the XML 1.0
4098 // namespaces specification) we have to at least make sure, that
4099 // the application on top of Expat (that is likely splitting expanded
4100 // element names ("qualified names") of form
4101 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4102 // in its element handler code) cannot be confused by an attacker
4103 // putting additional namespace separator characters into namespace
4104 // declarations. That would be ambiguous and not to be expected.
4105 //
4106 // While the HTML API docs of function XML_ParserCreateNS have been
4107 // advising against use of a namespace separator character that can
4108 // appear in a URI for >20 years now, some widespread applications
4109 // are using URI characters (':' (colon) in particular) for a
4110 // namespace separator, in practice. To keep these applications
4111 // functional, we only reject namespaces URIs containing the
4112 // application-chosen namespace separator if the chosen separator
4113 // is a non-URI character with regard to RFC 3986.
4114 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4115 && ! is_rfc3986_uri_char(uri[len])) {
4116 return XML_ERROR_SYNTAX;
4117 }
4118 }
4119 isXML = isXML && len == xmlLen;
4120 isXMLNS = isXMLNS && len == xmlnsLen;
4121
4122 if (mustBeXML != isXML)
4123 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4124 : XML_ERROR_RESERVED_NAMESPACE_URI;
4125
4126 if (isXMLNS)
4127 return XML_ERROR_RESERVED_NAMESPACE_URI;
4128
4129 if (parser->m_namespaceSeparator)
4130 len++;
4131 if (parser->m_freeBindingList) {
4132 b = parser->m_freeBindingList;
4133 if (len > b->uriAlloc) {
4134 /* Detect and prevent integer overflow */
4135 if (len > INT_MAX - EXPAND_SPARE) {
4136 return XML_ERROR_NO_MEMORY;
4137 }
4138
4139 /* Detect and prevent integer overflow.
4140 * The preprocessor guard addresses the "always false" warning
4141 * from -Wtype-limits on platforms where
4142 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4143 #if UINT_MAX >= SIZE_MAX
4144 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4145 return XML_ERROR_NO_MEMORY;
4146 }
4147 #endif
4148
4149 XML_Char *temp = (XML_Char *)REALLOC(
4150 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4151 if (temp == NULL)
4152 return XML_ERROR_NO_MEMORY;
4153 b->uri = temp;
4154 b->uriAlloc = len + EXPAND_SPARE;
4155 }
4156 parser->m_freeBindingList = b->nextTagBinding;
4157 } else {
4158 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4159 if (! b)
4160 return XML_ERROR_NO_MEMORY;
4161
4162 /* Detect and prevent integer overflow */
4163 if (len > INT_MAX - EXPAND_SPARE) {
4164 return XML_ERROR_NO_MEMORY;
4165 }
4166 /* Detect and prevent integer overflow.
4167 * The preprocessor guard addresses the "always false" warning
4168 * from -Wtype-limits on platforms where
4169 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4170 #if UINT_MAX >= SIZE_MAX
4171 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4172 return XML_ERROR_NO_MEMORY;
4173 }
4174 #endif
4175
4176 b->uri
4177 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4178 if (! b->uri) {
4179 FREE(parser, b);
4180 return XML_ERROR_NO_MEMORY;
4181 }
4182 b->uriAlloc = len + EXPAND_SPARE;
4183 }
4184 b->uriLen = len;
4185 memcpy(b->uri, uri, len * sizeof(XML_Char));
4186 if (parser->m_namespaceSeparator)
4187 b->uri[len - 1] = parser->m_namespaceSeparator;
4188 b->prefix = prefix;
4189 b->attId = attId;
4190 b->prevPrefixBinding = prefix->binding;
4191 /* NULL binding when default namespace undeclared */
4192 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4193 prefix->binding = NULL;
4194 else
4195 prefix->binding = b;
4196 b->nextTagBinding = *bindingsPtr;
4197 *bindingsPtr = b;
4198 /* if attId == NULL then we are not starting a namespace scope */
4199 if (attId && parser->m_startNamespaceDeclHandler)
4200 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4201 prefix->binding ? uri : 0);
4202 return XML_ERROR_NONE;
4203 }
4204
4205 /* The idea here is to avoid using stack for each CDATA section when
4206 the whole file is parsed with one call.
4207 */
4208 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4209 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4210 const char **endPtr) {
4211 enum XML_Error result = doCdataSection(
4212 parser, parser->m_encoding, &start, end, endPtr,
4213 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4214 if (result != XML_ERROR_NONE)
4215 return result;
4216 if (start) {
4217 if (parser->m_parentParser) { /* we are parsing an external entity */
4218 parser->m_processor = externalEntityContentProcessor;
4219 return externalEntityContentProcessor(parser, start, end, endPtr);
4220 } else {
4221 parser->m_processor = contentProcessor;
4222 return contentProcessor(parser, start, end, endPtr);
4223 }
4224 }
4225 return result;
4226 }
4227
4228 /* startPtr gets set to non-null if the section is closed, and to null if
4229 the section is not yet closed.
4230 */
4231 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4232 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4233 const char *end, const char **nextPtr, XML_Bool haveMore,
4234 enum XML_Account account) {
4235 const char *s = *startPtr;
4236 const char **eventPP;
4237 const char **eventEndPP;
4238 if (enc == parser->m_encoding) {
4239 eventPP = &parser->m_eventPtr;
4240 *eventPP = s;
4241 eventEndPP = &parser->m_eventEndPtr;
4242 } else {
4243 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4244 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4245 }
4246 *eventPP = s;
4247 *startPtr = NULL;
4248
4249 for (;;) {
4250 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4251 int tok = XmlCdataSectionTok(enc, s, end, &next);
4252 #if XML_GE == 1
4253 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4254 accountingOnAbort(parser);
4255 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4256 }
4257 #else
4258 UNUSED_P(account);
4259 #endif
4260 *eventEndPP = next;
4261 switch (tok) {
4262 case XML_TOK_CDATA_SECT_CLOSE:
4263 if (parser->m_endCdataSectionHandler)
4264 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4265 /* BEGIN disabled code */
4266 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4267 else if ((0) && parser->m_characterDataHandler)
4268 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4269 0);
4270 /* END disabled code */
4271 else if (parser->m_defaultHandler)
4272 reportDefault(parser, enc, s, next);
4273 *startPtr = next;
4274 *nextPtr = next;
4275 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4276 return XML_ERROR_ABORTED;
4277 else
4278 return XML_ERROR_NONE;
4279 case XML_TOK_DATA_NEWLINE:
4280 if (parser->m_characterDataHandler) {
4281 XML_Char c = 0xA;
4282 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4283 } else if (parser->m_defaultHandler)
4284 reportDefault(parser, enc, s, next);
4285 break;
4286 case XML_TOK_DATA_CHARS: {
4287 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4288 if (charDataHandler) {
4289 if (MUST_CONVERT(enc, s)) {
4290 for (;;) {
4291 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4292 const enum XML_Convert_Result convert_res = XmlConvert(
4293 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4294 *eventEndPP = next;
4295 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4296 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4297 if ((convert_res == XML_CONVERT_COMPLETED)
4298 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4299 break;
4300 *eventPP = s;
4301 }
4302 } else
4303 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4304 (int)((const XML_Char *)next - (const XML_Char *)s));
4305 } else if (parser->m_defaultHandler)
4306 reportDefault(parser, enc, s, next);
4307 } break;
4308 case XML_TOK_INVALID:
4309 *eventPP = next;
4310 return XML_ERROR_INVALID_TOKEN;
4311 case XML_TOK_PARTIAL_CHAR:
4312 if (haveMore) {
4313 *nextPtr = s;
4314 return XML_ERROR_NONE;
4315 }
4316 return XML_ERROR_PARTIAL_CHAR;
4317 case XML_TOK_PARTIAL:
4318 case XML_TOK_NONE:
4319 if (haveMore) {
4320 *nextPtr = s;
4321 return XML_ERROR_NONE;
4322 }
4323 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4324 default:
4325 /* Every token returned by XmlCdataSectionTok() has its own
4326 * explicit case, so this default case will never be executed.
4327 * We retain it as a safety net and exclude it from the coverage
4328 * statistics.
4329 *
4330 * LCOV_EXCL_START
4331 */
4332 *eventPP = next;
4333 return XML_ERROR_UNEXPECTED_STATE;
4334 /* LCOV_EXCL_STOP */
4335 }
4336
4337 switch (parser->m_parsingStatus.parsing) {
4338 case XML_SUSPENDED:
4339 *eventPP = next;
4340 *nextPtr = next;
4341 return XML_ERROR_NONE;
4342 case XML_FINISHED:
4343 *eventPP = next;
4344 return XML_ERROR_ABORTED;
4345 case XML_PARSING:
4346 if (parser->m_reenter) {
4347 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4348 }
4349 /* Fall through */
4350 default:;
4351 *eventPP = s = next;
4352 }
4353 }
4354 /* not reached */
4355 }
4356
4357 #ifdef XML_DTD
4358
4359 /* The idea here is to avoid using stack for each IGNORE section when
4360 the whole file is parsed with one call.
4361 */
4362 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4363 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4364 const char **endPtr) {
4365 enum XML_Error result
4366 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4367 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4368 if (result != XML_ERROR_NONE)
4369 return result;
4370 if (start) {
4371 parser->m_processor = prologProcessor;
4372 return prologProcessor(parser, start, end, endPtr);
4373 }
4374 return result;
4375 }
4376
4377 /* startPtr gets set to non-null is the section is closed, and to null
4378 if the section is not yet closed.
4379 */
4380 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4381 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4382 const char *end, const char **nextPtr, XML_Bool haveMore) {
4383 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4384 int tok;
4385 const char *s = *startPtr;
4386 const char **eventPP;
4387 const char **eventEndPP;
4388 if (enc == parser->m_encoding) {
4389 eventPP = &parser->m_eventPtr;
4390 *eventPP = s;
4391 eventEndPP = &parser->m_eventEndPtr;
4392 } else {
4393 /* It's not entirely clear, but it seems the following two lines
4394 * of code cannot be executed. The only occasions on which 'enc'
4395 * is not 'encoding' are when this function is called
4396 * from the internal entity processing, and IGNORE sections are an
4397 * error in internal entities.
4398 *
4399 * Since it really isn't clear that this is true, we keep the code
4400 * and just remove it from our coverage tests.
4401 *
4402 * LCOV_EXCL_START
4403 */
4404 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4405 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4406 /* LCOV_EXCL_STOP */
4407 }
4408 *eventPP = s;
4409 *startPtr = NULL;
4410 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4411 # if XML_GE == 1
4412 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4413 XML_ACCOUNT_DIRECT)) {
4414 accountingOnAbort(parser);
4415 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4416 }
4417 # endif
4418 *eventEndPP = next;
4419 switch (tok) {
4420 case XML_TOK_IGNORE_SECT:
4421 if (parser->m_defaultHandler)
4422 reportDefault(parser, enc, s, next);
4423 *startPtr = next;
4424 *nextPtr = next;
4425 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4426 return XML_ERROR_ABORTED;
4427 else
4428 return XML_ERROR_NONE;
4429 case XML_TOK_INVALID:
4430 *eventPP = next;
4431 return XML_ERROR_INVALID_TOKEN;
4432 case XML_TOK_PARTIAL_CHAR:
4433 if (haveMore) {
4434 *nextPtr = s;
4435 return XML_ERROR_NONE;
4436 }
4437 return XML_ERROR_PARTIAL_CHAR;
4438 case XML_TOK_PARTIAL:
4439 case XML_TOK_NONE:
4440 if (haveMore) {
4441 *nextPtr = s;
4442 return XML_ERROR_NONE;
4443 }
4444 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4445 default:
4446 /* All of the tokens that XmlIgnoreSectionTok() returns have
4447 * explicit cases to handle them, so this default case is never
4448 * executed. We keep it as a safety net anyway, and remove it
4449 * from our test coverage statistics.
4450 *
4451 * LCOV_EXCL_START
4452 */
4453 *eventPP = next;
4454 return XML_ERROR_UNEXPECTED_STATE;
4455 /* LCOV_EXCL_STOP */
4456 }
4457 /* not reached */
4458 }
4459
4460 #endif /* XML_DTD */
4461
4462 static enum XML_Error
initializeEncoding(XML_Parser parser)4463 initializeEncoding(XML_Parser parser) {
4464 const char *s;
4465 #ifdef XML_UNICODE
4466 char encodingBuf[128];
4467 /* See comments about `protocolEncodingName` in parserInit() */
4468 if (! parser->m_protocolEncodingName)
4469 s = NULL;
4470 else {
4471 int i;
4472 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4473 if (i == sizeof(encodingBuf) - 1
4474 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4475 encodingBuf[0] = '\0';
4476 break;
4477 }
4478 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4479 }
4480 encodingBuf[i] = '\0';
4481 s = encodingBuf;
4482 }
4483 #else
4484 s = parser->m_protocolEncodingName;
4485 #endif
4486 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4487 &parser->m_initEncoding, &parser->m_encoding, s))
4488 return XML_ERROR_NONE;
4489 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4490 }
4491
4492 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4493 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4494 const char *next) {
4495 const char *encodingName = NULL;
4496 const XML_Char *storedEncName = NULL;
4497 const ENCODING *newEncoding = NULL;
4498 const char *version = NULL;
4499 const char *versionend = NULL;
4500 const XML_Char *storedversion = NULL;
4501 int standalone = -1;
4502
4503 #if XML_GE == 1
4504 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4505 XML_ACCOUNT_DIRECT)) {
4506 accountingOnAbort(parser);
4507 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4508 }
4509 #endif
4510
4511 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4512 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4513 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4514 if (isGeneralTextEntity)
4515 return XML_ERROR_TEXT_DECL;
4516 else
4517 return XML_ERROR_XML_DECL;
4518 }
4519 if (! isGeneralTextEntity && standalone == 1) {
4520 parser->m_dtd->standalone = XML_TRUE;
4521 #ifdef XML_DTD
4522 if (parser->m_paramEntityParsing
4523 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4524 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4525 #endif /* XML_DTD */
4526 }
4527 if (parser->m_xmlDeclHandler) {
4528 if (encodingName != NULL) {
4529 storedEncName = poolStoreString(
4530 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4531 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4532 if (! storedEncName)
4533 return XML_ERROR_NO_MEMORY;
4534 poolFinish(&parser->m_temp2Pool);
4535 }
4536 if (version) {
4537 storedversion
4538 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4539 versionend - parser->m_encoding->minBytesPerChar);
4540 if (! storedversion)
4541 return XML_ERROR_NO_MEMORY;
4542 }
4543 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4544 standalone);
4545 } else if (parser->m_defaultHandler)
4546 reportDefault(parser, parser->m_encoding, s, next);
4547 if (parser->m_protocolEncodingName == NULL) {
4548 if (newEncoding) {
4549 /* Check that the specified encoding does not conflict with what
4550 * the parser has already deduced. Do we have the same number
4551 * of bytes in the smallest representation of a character? If
4552 * this is UTF-16, is it the same endianness?
4553 */
4554 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4555 || (newEncoding->minBytesPerChar == 2
4556 && newEncoding != parser->m_encoding)) {
4557 parser->m_eventPtr = encodingName;
4558 return XML_ERROR_INCORRECT_ENCODING;
4559 }
4560 parser->m_encoding = newEncoding;
4561 } else if (encodingName) {
4562 enum XML_Error result;
4563 if (! storedEncName) {
4564 storedEncName = poolStoreString(
4565 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4566 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4567 if (! storedEncName)
4568 return XML_ERROR_NO_MEMORY;
4569 }
4570 result = handleUnknownEncoding(parser, storedEncName);
4571 poolClear(&parser->m_temp2Pool);
4572 if (result == XML_ERROR_UNKNOWN_ENCODING)
4573 parser->m_eventPtr = encodingName;
4574 return result;
4575 }
4576 }
4577
4578 if (storedEncName || storedversion)
4579 poolClear(&parser->m_temp2Pool);
4580
4581 return XML_ERROR_NONE;
4582 }
4583
4584 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4585 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4586 if (parser->m_unknownEncodingHandler) {
4587 XML_Encoding info;
4588 int i;
4589 for (i = 0; i < 256; i++)
4590 info.map[i] = -1;
4591 info.convert = NULL;
4592 info.data = NULL;
4593 info.release = NULL;
4594 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4595 encodingName, &info)) {
4596 ENCODING *enc;
4597 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4598 if (! parser->m_unknownEncodingMem) {
4599 if (info.release)
4600 info.release(info.data);
4601 return XML_ERROR_NO_MEMORY;
4602 }
4603 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4604 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4605 if (enc) {
4606 parser->m_unknownEncodingData = info.data;
4607 parser->m_unknownEncodingRelease = info.release;
4608 parser->m_encoding = enc;
4609 return XML_ERROR_NONE;
4610 }
4611 }
4612 if (info.release != NULL)
4613 info.release(info.data);
4614 }
4615 return XML_ERROR_UNKNOWN_ENCODING;
4616 }
4617
4618 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4619 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4620 const char **nextPtr) {
4621 enum XML_Error result = initializeEncoding(parser);
4622 if (result != XML_ERROR_NONE)
4623 return result;
4624 parser->m_processor = prologProcessor;
4625 return prologProcessor(parser, s, end, nextPtr);
4626 }
4627
4628 #ifdef XML_DTD
4629
4630 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4631 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4632 const char **nextPtr) {
4633 enum XML_Error result = initializeEncoding(parser);
4634 if (result != XML_ERROR_NONE)
4635 return result;
4636
4637 /* we know now that XML_Parse(Buffer) has been called,
4638 so we consider the external parameter entity read */
4639 parser->m_dtd->paramEntityRead = XML_TRUE;
4640
4641 if (parser->m_prologState.inEntityValue) {
4642 parser->m_processor = entityValueInitProcessor;
4643 return entityValueInitProcessor(parser, s, end, nextPtr);
4644 } else {
4645 parser->m_processor = externalParEntProcessor;
4646 return externalParEntProcessor(parser, s, end, nextPtr);
4647 }
4648 }
4649
4650 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4651 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4652 const char **nextPtr) {
4653 int tok;
4654 const char *start = s;
4655 const char *next = start;
4656 parser->m_eventPtr = start;
4657
4658 for (;;) {
4659 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4660 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4661 - storeEntityValue
4662 - processXmlDecl
4663 */
4664 parser->m_eventEndPtr = next;
4665 if (tok <= 0) {
4666 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4667 *nextPtr = s;
4668 return XML_ERROR_NONE;
4669 }
4670 switch (tok) {
4671 case XML_TOK_INVALID:
4672 return XML_ERROR_INVALID_TOKEN;
4673 case XML_TOK_PARTIAL:
4674 return XML_ERROR_UNCLOSED_TOKEN;
4675 case XML_TOK_PARTIAL_CHAR:
4676 return XML_ERROR_PARTIAL_CHAR;
4677 case XML_TOK_NONE: /* start == end */
4678 default:
4679 break;
4680 }
4681 /* found end of entity value - can store it now */
4682 return storeEntityValue(parser, parser->m_encoding, s, end,
4683 XML_ACCOUNT_DIRECT, NULL);
4684 } else if (tok == XML_TOK_XML_DECL) {
4685 enum XML_Error result;
4686 result = processXmlDecl(parser, 0, start, next);
4687 if (result != XML_ERROR_NONE)
4688 return result;
4689 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4690 * that to happen, a parameter entity parsing handler must have attempted
4691 * to suspend the parser, which fails and raises an error. The parser can
4692 * be aborted, but can't be suspended.
4693 */
4694 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4695 return XML_ERROR_ABORTED;
4696 *nextPtr = next;
4697 /* stop scanning for text declaration - we found one */
4698 parser->m_processor = entityValueProcessor;
4699 return entityValueProcessor(parser, next, end, nextPtr);
4700 }
4701 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4702 must move s and nextPtr forward to consume the BOM.
4703
4704 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4705 would leave the BOM in the buffer and return. On the next call to this
4706 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4707 is not valid to have multiple BOMs.
4708 */
4709 else if (tok == XML_TOK_BOM) {
4710 # if XML_GE == 1
4711 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4712 XML_ACCOUNT_DIRECT)) {
4713 accountingOnAbort(parser);
4714 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4715 }
4716 # endif
4717
4718 *nextPtr = next;
4719 s = next;
4720 }
4721 /* If we get this token, we have the start of what might be a
4722 normal tag, but not a declaration (i.e. it doesn't begin with
4723 "<!"). In a DTD context, that isn't legal.
4724 */
4725 else if (tok == XML_TOK_INSTANCE_START) {
4726 *nextPtr = next;
4727 return XML_ERROR_SYNTAX;
4728 }
4729 start = next;
4730 parser->m_eventPtr = start;
4731 }
4732 }
4733
4734 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4735 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4736 const char **nextPtr) {
4737 const char *next = s;
4738 int tok;
4739
4740 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4741 if (tok <= 0) {
4742 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4743 *nextPtr = s;
4744 return XML_ERROR_NONE;
4745 }
4746 switch (tok) {
4747 case XML_TOK_INVALID:
4748 return XML_ERROR_INVALID_TOKEN;
4749 case XML_TOK_PARTIAL:
4750 return XML_ERROR_UNCLOSED_TOKEN;
4751 case XML_TOK_PARTIAL_CHAR:
4752 return XML_ERROR_PARTIAL_CHAR;
4753 case XML_TOK_NONE: /* start == end */
4754 default:
4755 break;
4756 }
4757 }
4758 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4759 However, when parsing an external subset, doProlog will not accept a BOM
4760 as valid, and report a syntax error, so we have to skip the BOM, and
4761 account for the BOM bytes.
4762 */
4763 else if (tok == XML_TOK_BOM) {
4764 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4765 XML_ACCOUNT_DIRECT)) {
4766 accountingOnAbort(parser);
4767 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4768 }
4769
4770 s = next;
4771 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4772 }
4773
4774 parser->m_processor = prologProcessor;
4775 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4776 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4777 XML_ACCOUNT_DIRECT);
4778 }
4779
4780 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4781 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4782 const char **nextPtr) {
4783 const char *start = s;
4784 const char *next = s;
4785 const ENCODING *enc = parser->m_encoding;
4786 int tok;
4787
4788 for (;;) {
4789 tok = XmlPrologTok(enc, start, end, &next);
4790 /* Note: These bytes are accounted later in:
4791 - storeEntityValue
4792 */
4793 if (tok <= 0) {
4794 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4795 *nextPtr = s;
4796 return XML_ERROR_NONE;
4797 }
4798 switch (tok) {
4799 case XML_TOK_INVALID:
4800 return XML_ERROR_INVALID_TOKEN;
4801 case XML_TOK_PARTIAL:
4802 return XML_ERROR_UNCLOSED_TOKEN;
4803 case XML_TOK_PARTIAL_CHAR:
4804 return XML_ERROR_PARTIAL_CHAR;
4805 case XML_TOK_NONE: /* start == end */
4806 default:
4807 break;
4808 }
4809 /* found end of entity value - can store it now */
4810 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
4811 }
4812 start = next;
4813 }
4814 }
4815
4816 #endif /* XML_DTD */
4817
4818 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4819 prologProcessor(XML_Parser parser, const char *s, const char *end,
4820 const char **nextPtr) {
4821 const char *next = s;
4822 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4823 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4824 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4825 XML_ACCOUNT_DIRECT);
4826 }
4827
4828 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4829 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4830 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4831 XML_Bool allowClosingDoctype, enum XML_Account account) {
4832 #ifdef XML_DTD
4833 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4834 #endif /* XML_DTD */
4835 static const XML_Char atypeCDATA[]
4836 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4837 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4838 static const XML_Char atypeIDREF[]
4839 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4840 static const XML_Char atypeIDREFS[]
4841 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4842 static const XML_Char atypeENTITY[]
4843 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4844 static const XML_Char atypeENTITIES[]
4845 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4846 ASCII_I, ASCII_E, ASCII_S, '\0'};
4847 static const XML_Char atypeNMTOKEN[]
4848 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4849 static const XML_Char atypeNMTOKENS[]
4850 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4851 ASCII_E, ASCII_N, ASCII_S, '\0'};
4852 static const XML_Char notationPrefix[]
4853 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4854 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4855 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4856 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4857
4858 #ifndef XML_DTD
4859 UNUSED_P(account);
4860 #endif
4861
4862 /* save one level of indirection */
4863 DTD *const dtd = parser->m_dtd;
4864
4865 const char **eventPP;
4866 const char **eventEndPP;
4867 enum XML_Content_Quant quant;
4868
4869 if (enc == parser->m_encoding) {
4870 eventPP = &parser->m_eventPtr;
4871 eventEndPP = &parser->m_eventEndPtr;
4872 } else {
4873 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4874 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4875 }
4876
4877 for (;;) {
4878 int role;
4879 XML_Bool handleDefault = XML_TRUE;
4880 *eventPP = s;
4881 *eventEndPP = next;
4882 if (tok <= 0) {
4883 if (haveMore && tok != XML_TOK_INVALID) {
4884 *nextPtr = s;
4885 return XML_ERROR_NONE;
4886 }
4887 switch (tok) {
4888 case XML_TOK_INVALID:
4889 *eventPP = next;
4890 return XML_ERROR_INVALID_TOKEN;
4891 case XML_TOK_PARTIAL:
4892 return XML_ERROR_UNCLOSED_TOKEN;
4893 case XML_TOK_PARTIAL_CHAR:
4894 return XML_ERROR_PARTIAL_CHAR;
4895 case -XML_TOK_PROLOG_S:
4896 tok = -tok;
4897 break;
4898 case XML_TOK_NONE:
4899 #ifdef XML_DTD
4900 /* for internal PE NOT referenced between declarations */
4901 if (enc != parser->m_encoding
4902 && ! parser->m_openInternalEntities->betweenDecl) {
4903 *nextPtr = s;
4904 return XML_ERROR_NONE;
4905 }
4906 /* WFC: PE Between Declarations - must check that PE contains
4907 complete markup, not only for external PEs, but also for
4908 internal PEs if the reference occurs between declarations.
4909 */
4910 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4911 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4912 == XML_ROLE_ERROR)
4913 return XML_ERROR_INCOMPLETE_PE;
4914 *nextPtr = s;
4915 return XML_ERROR_NONE;
4916 }
4917 #endif /* XML_DTD */
4918 return XML_ERROR_NO_ELEMENTS;
4919 default:
4920 tok = -tok;
4921 next = end;
4922 break;
4923 }
4924 }
4925 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4926 #if XML_GE == 1
4927 switch (role) {
4928 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4929 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4930 # ifdef XML_DTD
4931 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4932 # endif
4933 break;
4934 default:
4935 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4936 accountingOnAbort(parser);
4937 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4938 }
4939 }
4940 #endif
4941 switch (role) {
4942 case XML_ROLE_XML_DECL: {
4943 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4944 if (result != XML_ERROR_NONE)
4945 return result;
4946 enc = parser->m_encoding;
4947 handleDefault = XML_FALSE;
4948 } break;
4949 case XML_ROLE_DOCTYPE_NAME:
4950 if (parser->m_startDoctypeDeclHandler) {
4951 parser->m_doctypeName
4952 = poolStoreString(&parser->m_tempPool, enc, s, next);
4953 if (! parser->m_doctypeName)
4954 return XML_ERROR_NO_MEMORY;
4955 poolFinish(&parser->m_tempPool);
4956 parser->m_doctypePubid = NULL;
4957 handleDefault = XML_FALSE;
4958 }
4959 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4960 break;
4961 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4962 if (parser->m_startDoctypeDeclHandler) {
4963 parser->m_startDoctypeDeclHandler(
4964 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4965 parser->m_doctypePubid, 1);
4966 parser->m_doctypeName = NULL;
4967 poolClear(&parser->m_tempPool);
4968 handleDefault = XML_FALSE;
4969 }
4970 break;
4971 #ifdef XML_DTD
4972 case XML_ROLE_TEXT_DECL: {
4973 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4974 if (result != XML_ERROR_NONE)
4975 return result;
4976 enc = parser->m_encoding;
4977 handleDefault = XML_FALSE;
4978 } break;
4979 #endif /* XML_DTD */
4980 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4981 #ifdef XML_DTD
4982 parser->m_useForeignDTD = XML_FALSE;
4983 parser->m_declEntity = (ENTITY *)lookup(
4984 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4985 if (! parser->m_declEntity)
4986 return XML_ERROR_NO_MEMORY;
4987 #endif /* XML_DTD */
4988 dtd->hasParamEntityRefs = XML_TRUE;
4989 if (parser->m_startDoctypeDeclHandler) {
4990 XML_Char *pubId;
4991 if (! XmlIsPublicId(enc, s, next, eventPP))
4992 return XML_ERROR_PUBLICID;
4993 pubId = poolStoreString(&parser->m_tempPool, enc,
4994 s + enc->minBytesPerChar,
4995 next - enc->minBytesPerChar);
4996 if (! pubId)
4997 return XML_ERROR_NO_MEMORY;
4998 normalizePublicId(pubId);
4999 poolFinish(&parser->m_tempPool);
5000 parser->m_doctypePubid = pubId;
5001 handleDefault = XML_FALSE;
5002 goto alreadyChecked;
5003 }
5004 /* fall through */
5005 case XML_ROLE_ENTITY_PUBLIC_ID:
5006 if (! XmlIsPublicId(enc, s, next, eventPP))
5007 return XML_ERROR_PUBLICID;
5008 alreadyChecked:
5009 if (dtd->keepProcessing && parser->m_declEntity) {
5010 XML_Char *tem
5011 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5012 next - enc->minBytesPerChar);
5013 if (! tem)
5014 return XML_ERROR_NO_MEMORY;
5015 normalizePublicId(tem);
5016 parser->m_declEntity->publicId = tem;
5017 poolFinish(&dtd->pool);
5018 /* Don't suppress the default handler if we fell through from
5019 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5020 */
5021 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5022 handleDefault = XML_FALSE;
5023 }
5024 break;
5025 case XML_ROLE_DOCTYPE_CLOSE:
5026 if (allowClosingDoctype != XML_TRUE) {
5027 /* Must not close doctype from within expanded parameter entities */
5028 return XML_ERROR_INVALID_TOKEN;
5029 }
5030
5031 if (parser->m_doctypeName) {
5032 parser->m_startDoctypeDeclHandler(
5033 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5034 parser->m_doctypePubid, 0);
5035 poolClear(&parser->m_tempPool);
5036 handleDefault = XML_FALSE;
5037 }
5038 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
5039 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5040 was not set, indicating an external subset
5041 */
5042 #ifdef XML_DTD
5043 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5044 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5045 dtd->hasParamEntityRefs = XML_TRUE;
5046 if (parser->m_paramEntityParsing
5047 && parser->m_externalEntityRefHandler) {
5048 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5049 externalSubsetName, sizeof(ENTITY));
5050 if (! entity) {
5051 /* The external subset name "#" will have already been
5052 * inserted into the hash table at the start of the
5053 * external entity parsing, so no allocation will happen
5054 * and lookup() cannot fail.
5055 */
5056 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5057 }
5058 if (parser->m_useForeignDTD)
5059 entity->base = parser->m_curBase;
5060 dtd->paramEntityRead = XML_FALSE;
5061 if (! parser->m_externalEntityRefHandler(
5062 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5063 entity->systemId, entity->publicId))
5064 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5065 if (dtd->paramEntityRead) {
5066 if (! dtd->standalone && parser->m_notStandaloneHandler
5067 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5068 return XML_ERROR_NOT_STANDALONE;
5069 }
5070 /* if we didn't read the foreign DTD then this means that there
5071 is no external subset and we must reset dtd->hasParamEntityRefs
5072 */
5073 else if (! parser->m_doctypeSysid)
5074 dtd->hasParamEntityRefs = hadParamEntityRefs;
5075 /* end of DTD - no need to update dtd->keepProcessing */
5076 }
5077 parser->m_useForeignDTD = XML_FALSE;
5078 }
5079 #endif /* XML_DTD */
5080 if (parser->m_endDoctypeDeclHandler) {
5081 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5082 handleDefault = XML_FALSE;
5083 }
5084 break;
5085 case XML_ROLE_INSTANCE_START:
5086 #ifdef XML_DTD
5087 /* if there is no DOCTYPE declaration then now is the
5088 last chance to read the foreign DTD
5089 */
5090 if (parser->m_useForeignDTD) {
5091 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5092 dtd->hasParamEntityRefs = XML_TRUE;
5093 if (parser->m_paramEntityParsing
5094 && parser->m_externalEntityRefHandler) {
5095 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5096 externalSubsetName, sizeof(ENTITY));
5097 if (! entity)
5098 return XML_ERROR_NO_MEMORY;
5099 entity->base = parser->m_curBase;
5100 dtd->paramEntityRead = XML_FALSE;
5101 if (! parser->m_externalEntityRefHandler(
5102 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5103 entity->systemId, entity->publicId))
5104 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5105 if (dtd->paramEntityRead) {
5106 if (! dtd->standalone && parser->m_notStandaloneHandler
5107 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5108 return XML_ERROR_NOT_STANDALONE;
5109 }
5110 /* if we didn't read the foreign DTD then this means that there
5111 is no external subset and we must reset dtd->hasParamEntityRefs
5112 */
5113 else
5114 dtd->hasParamEntityRefs = hadParamEntityRefs;
5115 /* end of DTD - no need to update dtd->keepProcessing */
5116 }
5117 }
5118 #endif /* XML_DTD */
5119 parser->m_processor = contentProcessor;
5120 return contentProcessor(parser, s, end, nextPtr);
5121 case XML_ROLE_ATTLIST_ELEMENT_NAME:
5122 parser->m_declElementType = getElementType(parser, enc, s, next);
5123 if (! parser->m_declElementType)
5124 return XML_ERROR_NO_MEMORY;
5125 goto checkAttListDeclHandler;
5126 case XML_ROLE_ATTRIBUTE_NAME:
5127 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5128 if (! parser->m_declAttributeId)
5129 return XML_ERROR_NO_MEMORY;
5130 parser->m_declAttributeIsCdata = XML_FALSE;
5131 parser->m_declAttributeType = NULL;
5132 parser->m_declAttributeIsId = XML_FALSE;
5133 goto checkAttListDeclHandler;
5134 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5135 parser->m_declAttributeIsCdata = XML_TRUE;
5136 parser->m_declAttributeType = atypeCDATA;
5137 goto checkAttListDeclHandler;
5138 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5139 parser->m_declAttributeIsId = XML_TRUE;
5140 parser->m_declAttributeType = atypeID;
5141 goto checkAttListDeclHandler;
5142 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5143 parser->m_declAttributeType = atypeIDREF;
5144 goto checkAttListDeclHandler;
5145 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5146 parser->m_declAttributeType = atypeIDREFS;
5147 goto checkAttListDeclHandler;
5148 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5149 parser->m_declAttributeType = atypeENTITY;
5150 goto checkAttListDeclHandler;
5151 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5152 parser->m_declAttributeType = atypeENTITIES;
5153 goto checkAttListDeclHandler;
5154 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5155 parser->m_declAttributeType = atypeNMTOKEN;
5156 goto checkAttListDeclHandler;
5157 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5158 parser->m_declAttributeType = atypeNMTOKENS;
5159 checkAttListDeclHandler:
5160 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5161 handleDefault = XML_FALSE;
5162 break;
5163 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5164 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5165 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5166 const XML_Char *prefix;
5167 if (parser->m_declAttributeType) {
5168 prefix = enumValueSep;
5169 } else {
5170 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5171 : enumValueStart);
5172 }
5173 if (! poolAppendString(&parser->m_tempPool, prefix))
5174 return XML_ERROR_NO_MEMORY;
5175 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5176 return XML_ERROR_NO_MEMORY;
5177 parser->m_declAttributeType = parser->m_tempPool.start;
5178 handleDefault = XML_FALSE;
5179 }
5180 break;
5181 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5182 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5183 if (dtd->keepProcessing) {
5184 if (! defineAttribute(parser->m_declElementType,
5185 parser->m_declAttributeId,
5186 parser->m_declAttributeIsCdata,
5187 parser->m_declAttributeIsId, 0, parser))
5188 return XML_ERROR_NO_MEMORY;
5189 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5190 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5191 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5192 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5193 /* Enumerated or Notation type */
5194 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5195 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5196 return XML_ERROR_NO_MEMORY;
5197 parser->m_declAttributeType = parser->m_tempPool.start;
5198 poolFinish(&parser->m_tempPool);
5199 }
5200 *eventEndPP = s;
5201 parser->m_attlistDeclHandler(
5202 parser->m_handlerArg, parser->m_declElementType->name,
5203 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5204 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5205 handleDefault = XML_FALSE;
5206 }
5207 }
5208 poolClear(&parser->m_tempPool);
5209 break;
5210 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5211 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5212 if (dtd->keepProcessing) {
5213 const XML_Char *attVal;
5214 enum XML_Error result = storeAttributeValue(
5215 parser, enc, parser->m_declAttributeIsCdata,
5216 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5217 XML_ACCOUNT_NONE);
5218 if (result)
5219 return result;
5220 attVal = poolStart(&dtd->pool);
5221 poolFinish(&dtd->pool);
5222 /* ID attributes aren't allowed to have a default */
5223 if (! defineAttribute(
5224 parser->m_declElementType, parser->m_declAttributeId,
5225 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5226 return XML_ERROR_NO_MEMORY;
5227 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5228 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5229 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5230 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5231 /* Enumerated or Notation type */
5232 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5233 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5234 return XML_ERROR_NO_MEMORY;
5235 parser->m_declAttributeType = parser->m_tempPool.start;
5236 poolFinish(&parser->m_tempPool);
5237 }
5238 *eventEndPP = s;
5239 parser->m_attlistDeclHandler(
5240 parser->m_handlerArg, parser->m_declElementType->name,
5241 parser->m_declAttributeId->name, parser->m_declAttributeType,
5242 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5243 poolClear(&parser->m_tempPool);
5244 handleDefault = XML_FALSE;
5245 }
5246 }
5247 break;
5248 case XML_ROLE_ENTITY_VALUE:
5249 if (dtd->keepProcessing) {
5250 #if XML_GE == 1
5251 // This will store the given replacement text in
5252 // parser->m_declEntity->textPtr.
5253 enum XML_Error result = callStoreEntityValue(
5254 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5255 XML_ACCOUNT_NONE);
5256 if (parser->m_declEntity) {
5257 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5258 parser->m_declEntity->textLen
5259 = (int)(poolLength(&dtd->entityValuePool));
5260 poolFinish(&dtd->entityValuePool);
5261 if (parser->m_entityDeclHandler) {
5262 *eventEndPP = s;
5263 parser->m_entityDeclHandler(
5264 parser->m_handlerArg, parser->m_declEntity->name,
5265 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5266 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5267 handleDefault = XML_FALSE;
5268 }
5269 } else
5270 poolDiscard(&dtd->entityValuePool);
5271 if (result != XML_ERROR_NONE)
5272 return result;
5273 #else
5274 // This will store "&entity123;" in parser->m_declEntity->textPtr
5275 // to end up as "&entity123;" in the handler.
5276 if (parser->m_declEntity != NULL) {
5277 const enum XML_Error result
5278 = storeSelfEntityValue(parser, parser->m_declEntity);
5279 if (result != XML_ERROR_NONE)
5280 return result;
5281
5282 if (parser->m_entityDeclHandler) {
5283 *eventEndPP = s;
5284 parser->m_entityDeclHandler(
5285 parser->m_handlerArg, parser->m_declEntity->name,
5286 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5287 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5288 handleDefault = XML_FALSE;
5289 }
5290 }
5291 #endif
5292 }
5293 break;
5294 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5295 #ifdef XML_DTD
5296 parser->m_useForeignDTD = XML_FALSE;
5297 #endif /* XML_DTD */
5298 dtd->hasParamEntityRefs = XML_TRUE;
5299 if (parser->m_startDoctypeDeclHandler) {
5300 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5301 s + enc->minBytesPerChar,
5302 next - enc->minBytesPerChar);
5303 if (parser->m_doctypeSysid == NULL)
5304 return XML_ERROR_NO_MEMORY;
5305 poolFinish(&parser->m_tempPool);
5306 handleDefault = XML_FALSE;
5307 }
5308 #ifdef XML_DTD
5309 else
5310 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5311 for the case where no parser->m_startDoctypeDeclHandler is set */
5312 parser->m_doctypeSysid = externalSubsetName;
5313 #endif /* XML_DTD */
5314 if (! dtd->standalone
5315 #ifdef XML_DTD
5316 && ! parser->m_paramEntityParsing
5317 #endif /* XML_DTD */
5318 && parser->m_notStandaloneHandler
5319 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5320 return XML_ERROR_NOT_STANDALONE;
5321 #ifndef XML_DTD
5322 break;
5323 #else /* XML_DTD */
5324 if (! parser->m_declEntity) {
5325 parser->m_declEntity = (ENTITY *)lookup(
5326 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5327 if (! parser->m_declEntity)
5328 return XML_ERROR_NO_MEMORY;
5329 parser->m_declEntity->publicId = NULL;
5330 }
5331 #endif /* XML_DTD */
5332 /* fall through */
5333 case XML_ROLE_ENTITY_SYSTEM_ID:
5334 if (dtd->keepProcessing && parser->m_declEntity) {
5335 parser->m_declEntity->systemId
5336 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5337 next - enc->minBytesPerChar);
5338 if (! parser->m_declEntity->systemId)
5339 return XML_ERROR_NO_MEMORY;
5340 parser->m_declEntity->base = parser->m_curBase;
5341 poolFinish(&dtd->pool);
5342 /* Don't suppress the default handler if we fell through from
5343 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5344 */
5345 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5346 handleDefault = XML_FALSE;
5347 }
5348 break;
5349 case XML_ROLE_ENTITY_COMPLETE:
5350 #if XML_GE == 0
5351 // This will store "&entity123;" in entity->textPtr
5352 // to end up as "&entity123;" in the handler.
5353 if (parser->m_declEntity != NULL) {
5354 const enum XML_Error result
5355 = storeSelfEntityValue(parser, parser->m_declEntity);
5356 if (result != XML_ERROR_NONE)
5357 return result;
5358 }
5359 #endif
5360 if (dtd->keepProcessing && parser->m_declEntity
5361 && parser->m_entityDeclHandler) {
5362 *eventEndPP = s;
5363 parser->m_entityDeclHandler(
5364 parser->m_handlerArg, parser->m_declEntity->name,
5365 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5366 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5367 handleDefault = XML_FALSE;
5368 }
5369 break;
5370 case XML_ROLE_ENTITY_NOTATION_NAME:
5371 if (dtd->keepProcessing && parser->m_declEntity) {
5372 parser->m_declEntity->notation
5373 = poolStoreString(&dtd->pool, enc, s, next);
5374 if (! parser->m_declEntity->notation)
5375 return XML_ERROR_NO_MEMORY;
5376 poolFinish(&dtd->pool);
5377 if (parser->m_unparsedEntityDeclHandler) {
5378 *eventEndPP = s;
5379 parser->m_unparsedEntityDeclHandler(
5380 parser->m_handlerArg, parser->m_declEntity->name,
5381 parser->m_declEntity->base, parser->m_declEntity->systemId,
5382 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5383 handleDefault = XML_FALSE;
5384 } else if (parser->m_entityDeclHandler) {
5385 *eventEndPP = s;
5386 parser->m_entityDeclHandler(
5387 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5388 parser->m_declEntity->base, parser->m_declEntity->systemId,
5389 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5390 handleDefault = XML_FALSE;
5391 }
5392 }
5393 break;
5394 case XML_ROLE_GENERAL_ENTITY_NAME: {
5395 if (XmlPredefinedEntityName(enc, s, next)) {
5396 parser->m_declEntity = NULL;
5397 break;
5398 }
5399 if (dtd->keepProcessing) {
5400 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5401 if (! name)
5402 return XML_ERROR_NO_MEMORY;
5403 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5404 name, sizeof(ENTITY));
5405 if (! parser->m_declEntity)
5406 return XML_ERROR_NO_MEMORY;
5407 if (parser->m_declEntity->name != name) {
5408 poolDiscard(&dtd->pool);
5409 parser->m_declEntity = NULL;
5410 } else {
5411 poolFinish(&dtd->pool);
5412 parser->m_declEntity->publicId = NULL;
5413 parser->m_declEntity->is_param = XML_FALSE;
5414 /* if we have a parent parser or are reading an internal parameter
5415 entity, then the entity declaration is not considered "internal"
5416 */
5417 parser->m_declEntity->is_internal
5418 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5419 if (parser->m_entityDeclHandler)
5420 handleDefault = XML_FALSE;
5421 }
5422 } else {
5423 poolDiscard(&dtd->pool);
5424 parser->m_declEntity = NULL;
5425 }
5426 } break;
5427 case XML_ROLE_PARAM_ENTITY_NAME:
5428 #ifdef XML_DTD
5429 if (dtd->keepProcessing) {
5430 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5431 if (! name)
5432 return XML_ERROR_NO_MEMORY;
5433 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5434 name, sizeof(ENTITY));
5435 if (! parser->m_declEntity)
5436 return XML_ERROR_NO_MEMORY;
5437 if (parser->m_declEntity->name != name) {
5438 poolDiscard(&dtd->pool);
5439 parser->m_declEntity = NULL;
5440 } else {
5441 poolFinish(&dtd->pool);
5442 parser->m_declEntity->publicId = NULL;
5443 parser->m_declEntity->is_param = XML_TRUE;
5444 /* if we have a parent parser or are reading an internal parameter
5445 entity, then the entity declaration is not considered "internal"
5446 */
5447 parser->m_declEntity->is_internal
5448 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5449 if (parser->m_entityDeclHandler)
5450 handleDefault = XML_FALSE;
5451 }
5452 } else {
5453 poolDiscard(&dtd->pool);
5454 parser->m_declEntity = NULL;
5455 }
5456 #else /* not XML_DTD */
5457 parser->m_declEntity = NULL;
5458 #endif /* XML_DTD */
5459 break;
5460 case XML_ROLE_NOTATION_NAME:
5461 parser->m_declNotationPublicId = NULL;
5462 parser->m_declNotationName = NULL;
5463 if (parser->m_notationDeclHandler) {
5464 parser->m_declNotationName
5465 = poolStoreString(&parser->m_tempPool, enc, s, next);
5466 if (! parser->m_declNotationName)
5467 return XML_ERROR_NO_MEMORY;
5468 poolFinish(&parser->m_tempPool);
5469 handleDefault = XML_FALSE;
5470 }
5471 break;
5472 case XML_ROLE_NOTATION_PUBLIC_ID:
5473 if (! XmlIsPublicId(enc, s, next, eventPP))
5474 return XML_ERROR_PUBLICID;
5475 if (parser
5476 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5477 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5478 s + enc->minBytesPerChar,
5479 next - enc->minBytesPerChar);
5480 if (! tem)
5481 return XML_ERROR_NO_MEMORY;
5482 normalizePublicId(tem);
5483 parser->m_declNotationPublicId = tem;
5484 poolFinish(&parser->m_tempPool);
5485 handleDefault = XML_FALSE;
5486 }
5487 break;
5488 case XML_ROLE_NOTATION_SYSTEM_ID:
5489 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5490 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5491 s + enc->minBytesPerChar,
5492 next - enc->minBytesPerChar);
5493 if (! systemId)
5494 return XML_ERROR_NO_MEMORY;
5495 *eventEndPP = s;
5496 parser->m_notationDeclHandler(
5497 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5498 systemId, parser->m_declNotationPublicId);
5499 handleDefault = XML_FALSE;
5500 }
5501 poolClear(&parser->m_tempPool);
5502 break;
5503 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5504 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5505 *eventEndPP = s;
5506 parser->m_notationDeclHandler(
5507 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5508 0, parser->m_declNotationPublicId);
5509 handleDefault = XML_FALSE;
5510 }
5511 poolClear(&parser->m_tempPool);
5512 break;
5513 case XML_ROLE_ERROR:
5514 switch (tok) {
5515 case XML_TOK_PARAM_ENTITY_REF:
5516 /* PE references in internal subset are
5517 not allowed within declarations. */
5518 return XML_ERROR_PARAM_ENTITY_REF;
5519 case XML_TOK_XML_DECL:
5520 return XML_ERROR_MISPLACED_XML_PI;
5521 default:
5522 return XML_ERROR_SYNTAX;
5523 }
5524 #ifdef XML_DTD
5525 case XML_ROLE_IGNORE_SECT: {
5526 enum XML_Error result;
5527 if (parser->m_defaultHandler)
5528 reportDefault(parser, enc, s, next);
5529 handleDefault = XML_FALSE;
5530 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5531 if (result != XML_ERROR_NONE)
5532 return result;
5533 else if (! next) {
5534 parser->m_processor = ignoreSectionProcessor;
5535 return result;
5536 }
5537 } break;
5538 #endif /* XML_DTD */
5539 case XML_ROLE_GROUP_OPEN:
5540 if (parser->m_prologState.level >= parser->m_groupSize) {
5541 if (parser->m_groupSize) {
5542 {
5543 /* Detect and prevent integer overflow */
5544 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5545 return XML_ERROR_NO_MEMORY;
5546 }
5547
5548 char *const new_connector = (char *)REALLOC(
5549 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5550 if (new_connector == NULL) {
5551 parser->m_groupSize /= 2;
5552 return XML_ERROR_NO_MEMORY;
5553 }
5554 parser->m_groupConnector = new_connector;
5555 }
5556
5557 if (dtd->scaffIndex) {
5558 /* Detect and prevent integer overflow.
5559 * The preprocessor guard addresses the "always false" warning
5560 * from -Wtype-limits on platforms where
5561 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5562 #if UINT_MAX >= SIZE_MAX
5563 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5564 return XML_ERROR_NO_MEMORY;
5565 }
5566 #endif
5567
5568 int *const new_scaff_index = (int *)REALLOC(
5569 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5570 if (new_scaff_index == NULL)
5571 return XML_ERROR_NO_MEMORY;
5572 dtd->scaffIndex = new_scaff_index;
5573 }
5574 } else {
5575 parser->m_groupConnector
5576 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5577 if (! parser->m_groupConnector) {
5578 parser->m_groupSize = 0;
5579 return XML_ERROR_NO_MEMORY;
5580 }
5581 }
5582 }
5583 parser->m_groupConnector[parser->m_prologState.level] = 0;
5584 if (dtd->in_eldecl) {
5585 int myindex = nextScaffoldPart(parser);
5586 if (myindex < 0)
5587 return XML_ERROR_NO_MEMORY;
5588 assert(dtd->scaffIndex != NULL);
5589 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5590 dtd->scaffLevel++;
5591 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5592 if (parser->m_elementDeclHandler)
5593 handleDefault = XML_FALSE;
5594 }
5595 break;
5596 case XML_ROLE_GROUP_SEQUENCE:
5597 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5598 return XML_ERROR_SYNTAX;
5599 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5600 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5601 handleDefault = XML_FALSE;
5602 break;
5603 case XML_ROLE_GROUP_CHOICE:
5604 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5605 return XML_ERROR_SYNTAX;
5606 if (dtd->in_eldecl
5607 && ! parser->m_groupConnector[parser->m_prologState.level]
5608 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5609 != XML_CTYPE_MIXED)) {
5610 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5611 = XML_CTYPE_CHOICE;
5612 if (parser->m_elementDeclHandler)
5613 handleDefault = XML_FALSE;
5614 }
5615 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5616 break;
5617 case XML_ROLE_PARAM_ENTITY_REF:
5618 #ifdef XML_DTD
5619 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5620 dtd->hasParamEntityRefs = XML_TRUE;
5621 if (! parser->m_paramEntityParsing)
5622 dtd->keepProcessing = dtd->standalone;
5623 else {
5624 const XML_Char *name;
5625 ENTITY *entity;
5626 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5627 next - enc->minBytesPerChar);
5628 if (! name)
5629 return XML_ERROR_NO_MEMORY;
5630 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5631 poolDiscard(&dtd->pool);
5632 /* first, determine if a check for an existing declaration is needed;
5633 if yes, check that the entity exists, and that it is internal,
5634 otherwise call the skipped entity handler
5635 */
5636 if (parser->m_prologState.documentEntity
5637 && (dtd->standalone ? ! parser->m_openInternalEntities
5638 : ! dtd->hasParamEntityRefs)) {
5639 if (! entity)
5640 return XML_ERROR_UNDEFINED_ENTITY;
5641 else if (! entity->is_internal) {
5642 /* It's hard to exhaustively search the code to be sure,
5643 * but there doesn't seem to be a way of executing the
5644 * following line. There are two cases:
5645 *
5646 * If 'standalone' is false, the DTD must have no
5647 * parameter entities or we wouldn't have passed the outer
5648 * 'if' statement. That means the only entity in the hash
5649 * table is the external subset name "#" which cannot be
5650 * given as a parameter entity name in XML syntax, so the
5651 * lookup must have returned NULL and we don't even reach
5652 * the test for an internal entity.
5653 *
5654 * If 'standalone' is true, it does not seem to be
5655 * possible to create entities taking this code path that
5656 * are not internal entities, so fail the test above.
5657 *
5658 * Because this analysis is very uncertain, the code is
5659 * being left in place and merely removed from the
5660 * coverage test statistics.
5661 */
5662 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5663 }
5664 } else if (! entity) {
5665 dtd->keepProcessing = dtd->standalone;
5666 /* cannot report skipped entities in declarations */
5667 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5668 && parser->m_skippedEntityHandler) {
5669 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5670 handleDefault = XML_FALSE;
5671 }
5672 break;
5673 }
5674 if (entity->open)
5675 return XML_ERROR_RECURSIVE_ENTITY_REF;
5676 if (entity->textPtr) {
5677 enum XML_Error result;
5678 XML_Bool betweenDecl
5679 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5680 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
5681 if (result != XML_ERROR_NONE)
5682 return result;
5683 handleDefault = XML_FALSE;
5684 break;
5685 }
5686 if (parser->m_externalEntityRefHandler) {
5687 dtd->paramEntityRead = XML_FALSE;
5688 entity->open = XML_TRUE;
5689 entityTrackingOnOpen(parser, entity, __LINE__);
5690 if (! parser->m_externalEntityRefHandler(
5691 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5692 entity->systemId, entity->publicId)) {
5693 entityTrackingOnClose(parser, entity, __LINE__);
5694 entity->open = XML_FALSE;
5695 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5696 }
5697 entityTrackingOnClose(parser, entity, __LINE__);
5698 entity->open = XML_FALSE;
5699 handleDefault = XML_FALSE;
5700 if (! dtd->paramEntityRead) {
5701 dtd->keepProcessing = dtd->standalone;
5702 break;
5703 }
5704 } else {
5705 dtd->keepProcessing = dtd->standalone;
5706 break;
5707 }
5708 }
5709 #endif /* XML_DTD */
5710 if (! dtd->standalone && parser->m_notStandaloneHandler
5711 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5712 return XML_ERROR_NOT_STANDALONE;
5713 break;
5714
5715 /* Element declaration stuff */
5716
5717 case XML_ROLE_ELEMENT_NAME:
5718 if (parser->m_elementDeclHandler) {
5719 parser->m_declElementType = getElementType(parser, enc, s, next);
5720 if (! parser->m_declElementType)
5721 return XML_ERROR_NO_MEMORY;
5722 dtd->scaffLevel = 0;
5723 dtd->scaffCount = 0;
5724 dtd->in_eldecl = XML_TRUE;
5725 handleDefault = XML_FALSE;
5726 }
5727 break;
5728
5729 case XML_ROLE_CONTENT_ANY:
5730 case XML_ROLE_CONTENT_EMPTY:
5731 if (dtd->in_eldecl) {
5732 if (parser->m_elementDeclHandler) {
5733 XML_Content *content
5734 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5735 if (! content)
5736 return XML_ERROR_NO_MEMORY;
5737 content->quant = XML_CQUANT_NONE;
5738 content->name = NULL;
5739 content->numchildren = 0;
5740 content->children = NULL;
5741 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5742 : XML_CTYPE_EMPTY);
5743 *eventEndPP = s;
5744 parser->m_elementDeclHandler(
5745 parser->m_handlerArg, parser->m_declElementType->name, content);
5746 handleDefault = XML_FALSE;
5747 }
5748 dtd->in_eldecl = XML_FALSE;
5749 }
5750 break;
5751
5752 case XML_ROLE_CONTENT_PCDATA:
5753 if (dtd->in_eldecl) {
5754 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5755 = XML_CTYPE_MIXED;
5756 if (parser->m_elementDeclHandler)
5757 handleDefault = XML_FALSE;
5758 }
5759 break;
5760
5761 case XML_ROLE_CONTENT_ELEMENT:
5762 quant = XML_CQUANT_NONE;
5763 goto elementContent;
5764 case XML_ROLE_CONTENT_ELEMENT_OPT:
5765 quant = XML_CQUANT_OPT;
5766 goto elementContent;
5767 case XML_ROLE_CONTENT_ELEMENT_REP:
5768 quant = XML_CQUANT_REP;
5769 goto elementContent;
5770 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5771 quant = XML_CQUANT_PLUS;
5772 elementContent:
5773 if (dtd->in_eldecl) {
5774 ELEMENT_TYPE *el;
5775 const XML_Char *name;
5776 size_t nameLen;
5777 const char *nxt
5778 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5779 int myindex = nextScaffoldPart(parser);
5780 if (myindex < 0)
5781 return XML_ERROR_NO_MEMORY;
5782 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5783 dtd->scaffold[myindex].quant = quant;
5784 el = getElementType(parser, enc, s, nxt);
5785 if (! el)
5786 return XML_ERROR_NO_MEMORY;
5787 name = el->name;
5788 dtd->scaffold[myindex].name = name;
5789 nameLen = 0;
5790 for (; name[nameLen++];)
5791 ;
5792
5793 /* Detect and prevent integer overflow */
5794 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5795 return XML_ERROR_NO_MEMORY;
5796 }
5797
5798 dtd->contentStringLen += (unsigned)nameLen;
5799 if (parser->m_elementDeclHandler)
5800 handleDefault = XML_FALSE;
5801 }
5802 break;
5803
5804 case XML_ROLE_GROUP_CLOSE:
5805 quant = XML_CQUANT_NONE;
5806 goto closeGroup;
5807 case XML_ROLE_GROUP_CLOSE_OPT:
5808 quant = XML_CQUANT_OPT;
5809 goto closeGroup;
5810 case XML_ROLE_GROUP_CLOSE_REP:
5811 quant = XML_CQUANT_REP;
5812 goto closeGroup;
5813 case XML_ROLE_GROUP_CLOSE_PLUS:
5814 quant = XML_CQUANT_PLUS;
5815 closeGroup:
5816 if (dtd->in_eldecl) {
5817 if (parser->m_elementDeclHandler)
5818 handleDefault = XML_FALSE;
5819 dtd->scaffLevel--;
5820 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5821 if (dtd->scaffLevel == 0) {
5822 if (! handleDefault) {
5823 XML_Content *model = build_model(parser);
5824 if (! model)
5825 return XML_ERROR_NO_MEMORY;
5826 *eventEndPP = s;
5827 parser->m_elementDeclHandler(
5828 parser->m_handlerArg, parser->m_declElementType->name, model);
5829 }
5830 dtd->in_eldecl = XML_FALSE;
5831 dtd->contentStringLen = 0;
5832 }
5833 }
5834 break;
5835 /* End element declaration stuff */
5836
5837 case XML_ROLE_PI:
5838 if (! reportProcessingInstruction(parser, enc, s, next))
5839 return XML_ERROR_NO_MEMORY;
5840 handleDefault = XML_FALSE;
5841 break;
5842 case XML_ROLE_COMMENT:
5843 if (! reportComment(parser, enc, s, next))
5844 return XML_ERROR_NO_MEMORY;
5845 handleDefault = XML_FALSE;
5846 break;
5847 case XML_ROLE_NONE:
5848 switch (tok) {
5849 case XML_TOK_BOM:
5850 handleDefault = XML_FALSE;
5851 break;
5852 }
5853 break;
5854 case XML_ROLE_DOCTYPE_NONE:
5855 if (parser->m_startDoctypeDeclHandler)
5856 handleDefault = XML_FALSE;
5857 break;
5858 case XML_ROLE_ENTITY_NONE:
5859 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5860 handleDefault = XML_FALSE;
5861 break;
5862 case XML_ROLE_NOTATION_NONE:
5863 if (parser->m_notationDeclHandler)
5864 handleDefault = XML_FALSE;
5865 break;
5866 case XML_ROLE_ATTLIST_NONE:
5867 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5868 handleDefault = XML_FALSE;
5869 break;
5870 case XML_ROLE_ELEMENT_NONE:
5871 if (parser->m_elementDeclHandler)
5872 handleDefault = XML_FALSE;
5873 break;
5874 } /* end of big switch */
5875
5876 if (handleDefault && parser->m_defaultHandler)
5877 reportDefault(parser, enc, s, next);
5878
5879 switch (parser->m_parsingStatus.parsing) {
5880 case XML_SUSPENDED:
5881 *nextPtr = next;
5882 return XML_ERROR_NONE;
5883 case XML_FINISHED:
5884 return XML_ERROR_ABORTED;
5885 case XML_PARSING:
5886 if (parser->m_reenter) {
5887 *nextPtr = next;
5888 return XML_ERROR_NONE;
5889 }
5890 /* Fall through */
5891 default:
5892 s = next;
5893 tok = XmlPrologTok(enc, s, end, &next);
5894 }
5895 }
5896 /* not reached */
5897 }
5898
5899 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5900 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5901 const char **nextPtr) {
5902 parser->m_processor = epilogProcessor;
5903 parser->m_eventPtr = s;
5904 for (;;) {
5905 const char *next = NULL;
5906 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5907 #if XML_GE == 1
5908 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5909 XML_ACCOUNT_DIRECT)) {
5910 accountingOnAbort(parser);
5911 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5912 }
5913 #endif
5914 parser->m_eventEndPtr = next;
5915 switch (tok) {
5916 /* report partial linebreak - it might be the last token */
5917 case -XML_TOK_PROLOG_S:
5918 if (parser->m_defaultHandler) {
5919 reportDefault(parser, parser->m_encoding, s, next);
5920 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5921 return XML_ERROR_ABORTED;
5922 }
5923 *nextPtr = next;
5924 return XML_ERROR_NONE;
5925 case XML_TOK_NONE:
5926 *nextPtr = s;
5927 return XML_ERROR_NONE;
5928 case XML_TOK_PROLOG_S:
5929 if (parser->m_defaultHandler)
5930 reportDefault(parser, parser->m_encoding, s, next);
5931 break;
5932 case XML_TOK_PI:
5933 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5934 return XML_ERROR_NO_MEMORY;
5935 break;
5936 case XML_TOK_COMMENT:
5937 if (! reportComment(parser, parser->m_encoding, s, next))
5938 return XML_ERROR_NO_MEMORY;
5939 break;
5940 case XML_TOK_INVALID:
5941 parser->m_eventPtr = next;
5942 return XML_ERROR_INVALID_TOKEN;
5943 case XML_TOK_PARTIAL:
5944 if (! parser->m_parsingStatus.finalBuffer) {
5945 *nextPtr = s;
5946 return XML_ERROR_NONE;
5947 }
5948 return XML_ERROR_UNCLOSED_TOKEN;
5949 case XML_TOK_PARTIAL_CHAR:
5950 if (! parser->m_parsingStatus.finalBuffer) {
5951 *nextPtr = s;
5952 return XML_ERROR_NONE;
5953 }
5954 return XML_ERROR_PARTIAL_CHAR;
5955 default:
5956 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5957 }
5958 switch (parser->m_parsingStatus.parsing) {
5959 case XML_SUSPENDED:
5960 parser->m_eventPtr = next;
5961 *nextPtr = next;
5962 return XML_ERROR_NONE;
5963 case XML_FINISHED:
5964 parser->m_eventPtr = next;
5965 return XML_ERROR_ABORTED;
5966 case XML_PARSING:
5967 if (parser->m_reenter) {
5968 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
5969 }
5970 /* Fall through */
5971 default:;
5972 parser->m_eventPtr = s = next;
5973 }
5974 }
5975 }
5976
5977 static enum XML_Error
processEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl,enum EntityType type)5978 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
5979 enum EntityType type) {
5980 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
5981 switch (type) {
5982 case ENTITY_INTERNAL:
5983 parser->m_processor = internalEntityProcessor;
5984 openEntityList = &parser->m_openInternalEntities;
5985 freeEntityList = &parser->m_freeInternalEntities;
5986 break;
5987 case ENTITY_ATTRIBUTE:
5988 openEntityList = &parser->m_openAttributeEntities;
5989 freeEntityList = &parser->m_freeAttributeEntities;
5990 break;
5991 case ENTITY_VALUE:
5992 openEntityList = &parser->m_openValueEntities;
5993 freeEntityList = &parser->m_freeValueEntities;
5994 break;
5995 /* default case serves merely as a safety net in case of a
5996 * wrong entityType. Therefore we exclude the following lines
5997 * from the test coverage.
5998 *
5999 * LCOV_EXCL_START
6000 */
6001 default:
6002 // Should not reach here
6003 assert(0);
6004 /* LCOV_EXCL_STOP */
6005 }
6006
6007 if (*freeEntityList) {
6008 openEntity = *freeEntityList;
6009 *freeEntityList = openEntity->next;
6010 } else {
6011 openEntity
6012 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6013 if (! openEntity)
6014 return XML_ERROR_NO_MEMORY;
6015 }
6016 entity->open = XML_TRUE;
6017 entity->hasMore = XML_TRUE;
6018 #if XML_GE == 1
6019 entityTrackingOnOpen(parser, entity, __LINE__);
6020 #endif
6021 entity->processed = 0;
6022 openEntity->next = *openEntityList;
6023 *openEntityList = openEntity;
6024 openEntity->entity = entity;
6025 openEntity->type = type;
6026 openEntity->startTagLevel = parser->m_tagLevel;
6027 openEntity->betweenDecl = betweenDecl;
6028 openEntity->internalEventPtr = NULL;
6029 openEntity->internalEventEndPtr = NULL;
6030
6031 // Only internal entities make use of the reenter flag
6032 // therefore no need to set it for other entity types
6033 if (type == ENTITY_INTERNAL) {
6034 triggerReenter(parser);
6035 }
6036 return XML_ERROR_NONE;
6037 }
6038
6039 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)6040 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6041 const char **nextPtr) {
6042 UNUSED_P(s);
6043 UNUSED_P(end);
6044 UNUSED_P(nextPtr);
6045 ENTITY *entity;
6046 const char *textStart, *textEnd;
6047 const char *next;
6048 enum XML_Error result;
6049 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6050 if (! openEntity)
6051 return XML_ERROR_UNEXPECTED_STATE;
6052
6053 entity = openEntity->entity;
6054
6055 // This will return early
6056 if (entity->hasMore) {
6057 textStart = ((const char *)entity->textPtr) + entity->processed;
6058 textEnd = (const char *)(entity->textPtr + entity->textLen);
6059 /* Set a safe default value in case 'next' does not get set */
6060 next = textStart;
6061
6062 if (entity->is_param) {
6063 int tok
6064 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6065 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6066 tok, next, &next, XML_FALSE, XML_FALSE,
6067 XML_ACCOUNT_ENTITY_EXPANSION);
6068 } else {
6069 result = doContent(parser, openEntity->startTagLevel,
6070 parser->m_internalEncoding, textStart, textEnd, &next,
6071 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6072 }
6073
6074 if (result != XML_ERROR_NONE)
6075 return result;
6076 // Check if entity is complete, if not, mark down how much of it is
6077 // processed
6078 if (textEnd != next
6079 && (parser->m_parsingStatus.parsing == XML_SUSPENDED
6080 || (parser->m_parsingStatus.parsing == XML_PARSING
6081 && parser->m_reenter))) {
6082 entity->processed = (int)(next - (const char *)entity->textPtr);
6083 return result;
6084 }
6085
6086 // Entity is complete. We cannot close it here since we need to first
6087 // process its possible inner entities (which are added to the
6088 // m_openInternalEntities during doProlog or doContent calls above)
6089 entity->hasMore = XML_FALSE;
6090 triggerReenter(parser);
6091 return result;
6092 } // End of entity processing, "if" block will return here
6093
6094 // Remove fully processed openEntity from open entity list.
6095 #if XML_GE == 1
6096 entityTrackingOnClose(parser, entity, __LINE__);
6097 #endif
6098 // openEntity is m_openInternalEntities' head, as we set it at the start of
6099 // this function and we skipped doProlog and doContent calls with hasMore set
6100 // to false. This means we can directly remove the head of
6101 // m_openInternalEntities
6102 assert(parser->m_openInternalEntities == openEntity);
6103 entity->open = XML_FALSE;
6104 parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6105
6106 /* put openEntity back in list of free instances */
6107 openEntity->next = parser->m_freeInternalEntities;
6108 parser->m_freeInternalEntities = openEntity;
6109
6110 if (parser->m_openInternalEntities == NULL) {
6111 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6112 }
6113 triggerReenter(parser);
6114 return XML_ERROR_NONE;
6115 }
6116
6117 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)6118 errorProcessor(XML_Parser parser, const char *s, const char *end,
6119 const char **nextPtr) {
6120 UNUSED_P(s);
6121 UNUSED_P(end);
6122 UNUSED_P(nextPtr);
6123 return parser->m_errorCode;
6124 }
6125
6126 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)6127 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6128 const char *ptr, const char *end, STRING_POOL *pool,
6129 enum XML_Account account) {
6130 const char *next = ptr;
6131 enum XML_Error result = XML_ERROR_NONE;
6132
6133 while (1) {
6134 if (! parser->m_openAttributeEntities) {
6135 result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6136 account, &next);
6137 } else {
6138 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6139 if (! openEntity)
6140 return XML_ERROR_UNEXPECTED_STATE;
6141
6142 ENTITY *const entity = openEntity->entity;
6143 const char *const textStart
6144 = ((const char *)entity->textPtr) + entity->processed;
6145 const char *const textEnd
6146 = (const char *)(entity->textPtr + entity->textLen);
6147 /* Set a safe default value in case 'next' does not get set */
6148 const char *nextInEntity = textStart;
6149 if (entity->hasMore) {
6150 result = appendAttributeValue(
6151 parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6152 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6153 if (result != XML_ERROR_NONE)
6154 break;
6155 // Check if entity is complete, if not, mark down how much of it is
6156 // processed. A XML_SUSPENDED check here is not required as
6157 // appendAttributeValue will never suspend the parser.
6158 if (textEnd != nextInEntity) {
6159 entity->processed
6160 = (int)(nextInEntity - (const char *)entity->textPtr);
6161 continue;
6162 }
6163
6164 // Entity is complete. We cannot close it here since we need to first
6165 // process its possible inner entities (which are added to the
6166 // m_openAttributeEntities during appendAttributeValue)
6167 entity->hasMore = XML_FALSE;
6168 continue;
6169 } // End of entity processing, "if" block skips the rest
6170
6171 // Remove fully processed openEntity from open entity list.
6172 #if XML_GE == 1
6173 entityTrackingOnClose(parser, entity, __LINE__);
6174 #endif
6175 // openEntity is m_openAttributeEntities' head, since we set it at the
6176 // start of this function and because we skipped appendAttributeValue call
6177 // with hasMore set to false. This means we can directly remove the head
6178 // of m_openAttributeEntities
6179 assert(parser->m_openAttributeEntities == openEntity);
6180 entity->open = XML_FALSE;
6181 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6182
6183 /* put openEntity back in list of free instances */
6184 openEntity->next = parser->m_freeAttributeEntities;
6185 parser->m_freeAttributeEntities = openEntity;
6186 }
6187
6188 // Break if an error occurred or there is nothing left to process
6189 if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6190 break;
6191 }
6192 }
6193
6194 if (result)
6195 return result;
6196 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6197 poolChop(pool);
6198 if (! poolAppendChar(pool, XML_T('\0')))
6199 return XML_ERROR_NO_MEMORY;
6200 return XML_ERROR_NONE;
6201 }
6202
6203 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account,const char ** nextPtr)6204 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6205 const char *ptr, const char *end, STRING_POOL *pool,
6206 enum XML_Account account, const char **nextPtr) {
6207 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6208 #ifndef XML_DTD
6209 UNUSED_P(account);
6210 #endif
6211
6212 for (;;) {
6213 const char *next
6214 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6215 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6216 #if XML_GE == 1
6217 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6218 accountingOnAbort(parser);
6219 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6220 }
6221 #endif
6222 switch (tok) {
6223 case XML_TOK_NONE:
6224 if (nextPtr) {
6225 *nextPtr = next;
6226 }
6227 return XML_ERROR_NONE;
6228 case XML_TOK_INVALID:
6229 if (enc == parser->m_encoding)
6230 parser->m_eventPtr = next;
6231 return XML_ERROR_INVALID_TOKEN;
6232 case XML_TOK_PARTIAL:
6233 if (enc == parser->m_encoding)
6234 parser->m_eventPtr = ptr;
6235 return XML_ERROR_INVALID_TOKEN;
6236 case XML_TOK_CHAR_REF: {
6237 XML_Char buf[XML_ENCODE_MAX];
6238 int i;
6239 int n = XmlCharRefNumber(enc, ptr);
6240 if (n < 0) {
6241 if (enc == parser->m_encoding)
6242 parser->m_eventPtr = ptr;
6243 return XML_ERROR_BAD_CHAR_REF;
6244 }
6245 if (! isCdata && n == 0x20 /* space */
6246 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6247 break;
6248 n = XmlEncode(n, (ICHAR *)buf);
6249 /* The XmlEncode() functions can never return 0 here. That
6250 * error return happens if the code point passed in is either
6251 * negative or greater than or equal to 0x110000. The
6252 * XmlCharRefNumber() functions will all return a number
6253 * strictly less than 0x110000 or a negative value if an error
6254 * occurred. The negative value is intercepted above, so
6255 * XmlEncode() is never passed a value it might return an
6256 * error for.
6257 */
6258 for (i = 0; i < n; i++) {
6259 if (! poolAppendChar(pool, buf[i]))
6260 return XML_ERROR_NO_MEMORY;
6261 }
6262 } break;
6263 case XML_TOK_DATA_CHARS:
6264 if (! poolAppend(pool, enc, ptr, next))
6265 return XML_ERROR_NO_MEMORY;
6266 break;
6267 case XML_TOK_TRAILING_CR:
6268 next = ptr + enc->minBytesPerChar;
6269 /* fall through */
6270 case XML_TOK_ATTRIBUTE_VALUE_S:
6271 case XML_TOK_DATA_NEWLINE:
6272 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6273 break;
6274 if (! poolAppendChar(pool, 0x20))
6275 return XML_ERROR_NO_MEMORY;
6276 break;
6277 case XML_TOK_ENTITY_REF: {
6278 const XML_Char *name;
6279 ENTITY *entity;
6280 char checkEntityDecl;
6281 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6282 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6283 if (ch) {
6284 #if XML_GE == 1
6285 /* NOTE: We are replacing 4-6 characters original input for 1 character
6286 * so there is no amplification and hence recording without
6287 * protection. */
6288 accountingDiffTolerated(parser, tok, (char *)&ch,
6289 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6290 XML_ACCOUNT_ENTITY_EXPANSION);
6291 #endif /* XML_GE == 1 */
6292 if (! poolAppendChar(pool, ch))
6293 return XML_ERROR_NO_MEMORY;
6294 break;
6295 }
6296 name = poolStoreString(&parser->m_temp2Pool, enc,
6297 ptr + enc->minBytesPerChar,
6298 next - enc->minBytesPerChar);
6299 if (! name)
6300 return XML_ERROR_NO_MEMORY;
6301 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6302 poolDiscard(&parser->m_temp2Pool);
6303 /* First, determine if a check for an existing declaration is needed;
6304 if yes, check that the entity exists, and that it is internal.
6305 */
6306 if (pool == &dtd->pool) /* are we called from prolog? */
6307 checkEntityDecl =
6308 #ifdef XML_DTD
6309 parser->m_prologState.documentEntity &&
6310 #endif /* XML_DTD */
6311 (dtd->standalone ? ! parser->m_openInternalEntities
6312 : ! dtd->hasParamEntityRefs);
6313 else /* if (pool == &parser->m_tempPool): we are called from content */
6314 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6315 if (checkEntityDecl) {
6316 if (! entity)
6317 return XML_ERROR_UNDEFINED_ENTITY;
6318 else if (! entity->is_internal)
6319 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6320 } else if (! entity) {
6321 /* Cannot report skipped entity here - see comments on
6322 parser->m_skippedEntityHandler.
6323 if (parser->m_skippedEntityHandler)
6324 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6325 */
6326 /* Cannot call the default handler because this would be
6327 out of sync with the call to the startElementHandler.
6328 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6329 reportDefault(parser, enc, ptr, next);
6330 */
6331 break;
6332 }
6333 if (entity->open) {
6334 if (enc == parser->m_encoding) {
6335 /* It does not appear that this line can be executed.
6336 *
6337 * The "if (entity->open)" check catches recursive entity
6338 * definitions. In order to be called with an open
6339 * entity, it must have gone through this code before and
6340 * been through the recursive call to
6341 * appendAttributeValue() some lines below. That call
6342 * sets the local encoding ("enc") to the parser's
6343 * internal encoding (internal_utf8 or internal_utf16),
6344 * which can never be the same as the principle encoding.
6345 * It doesn't appear there is another code path that gets
6346 * here with entity->open being TRUE.
6347 *
6348 * Since it is not certain that this logic is watertight,
6349 * we keep the line and merely exclude it from coverage
6350 * tests.
6351 */
6352 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6353 }
6354 return XML_ERROR_RECURSIVE_ENTITY_REF;
6355 }
6356 if (entity->notation) {
6357 if (enc == parser->m_encoding)
6358 parser->m_eventPtr = ptr;
6359 return XML_ERROR_BINARY_ENTITY_REF;
6360 }
6361 if (! entity->textPtr) {
6362 if (enc == parser->m_encoding)
6363 parser->m_eventPtr = ptr;
6364 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6365 } else {
6366 enum XML_Error result;
6367 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6368 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6369 *nextPtr = next;
6370 }
6371 return result;
6372 }
6373 } break;
6374 default:
6375 /* The only token returned by XmlAttributeValueTok() that does
6376 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6377 * Getting that would require an entity name to contain an
6378 * incomplete XML character (e.g. \xE2\x82); however previous
6379 * tokenisers will have already recognised and rejected such
6380 * names before XmlAttributeValueTok() gets a look-in. This
6381 * default case should be retained as a safety net, but the code
6382 * excluded from coverage tests.
6383 *
6384 * LCOV_EXCL_START
6385 */
6386 if (enc == parser->m_encoding)
6387 parser->m_eventPtr = ptr;
6388 return XML_ERROR_UNEXPECTED_STATE;
6389 /* LCOV_EXCL_STOP */
6390 }
6391 ptr = next;
6392 }
6393 /* not reached */
6394 }
6395
6396 #if XML_GE == 1
6397 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account,const char ** nextPtr)6398 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6399 const char *entityTextPtr, const char *entityTextEnd,
6400 enum XML_Account account, const char **nextPtr) {
6401 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6402 STRING_POOL *pool = &(dtd->entityValuePool);
6403 enum XML_Error result = XML_ERROR_NONE;
6404 # ifdef XML_DTD
6405 int oldInEntityValue = parser->m_prologState.inEntityValue;
6406 parser->m_prologState.inEntityValue = 1;
6407 # else
6408 UNUSED_P(account);
6409 # endif /* XML_DTD */
6410 /* never return Null for the value argument in EntityDeclHandler,
6411 since this would indicate an external entity; therefore we
6412 have to make sure that entityValuePool.start is not null */
6413 if (! pool->blocks) {
6414 if (! poolGrow(pool))
6415 return XML_ERROR_NO_MEMORY;
6416 }
6417
6418 const char *next;
6419 for (;;) {
6420 next
6421 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6422 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6423
6424 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6425 account)) {
6426 accountingOnAbort(parser);
6427 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6428 goto endEntityValue;
6429 }
6430
6431 switch (tok) {
6432 case XML_TOK_PARAM_ENTITY_REF:
6433 # ifdef XML_DTD
6434 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6435 const XML_Char *name;
6436 ENTITY *entity;
6437 name = poolStoreString(&parser->m_tempPool, enc,
6438 entityTextPtr + enc->minBytesPerChar,
6439 next - enc->minBytesPerChar);
6440 if (! name) {
6441 result = XML_ERROR_NO_MEMORY;
6442 goto endEntityValue;
6443 }
6444 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6445 poolDiscard(&parser->m_tempPool);
6446 if (! entity) {
6447 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6448 /* cannot report skipped entity here - see comments on
6449 parser->m_skippedEntityHandler
6450 if (parser->m_skippedEntityHandler)
6451 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6452 */
6453 dtd->keepProcessing = dtd->standalone;
6454 goto endEntityValue;
6455 }
6456 if (entity->open || (entity == parser->m_declEntity)) {
6457 if (enc == parser->m_encoding)
6458 parser->m_eventPtr = entityTextPtr;
6459 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6460 goto endEntityValue;
6461 }
6462 if (entity->systemId) {
6463 if (parser->m_externalEntityRefHandler) {
6464 dtd->paramEntityRead = XML_FALSE;
6465 entity->open = XML_TRUE;
6466 entityTrackingOnOpen(parser, entity, __LINE__);
6467 if (! parser->m_externalEntityRefHandler(
6468 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6469 entity->systemId, entity->publicId)) {
6470 entityTrackingOnClose(parser, entity, __LINE__);
6471 entity->open = XML_FALSE;
6472 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6473 goto endEntityValue;
6474 }
6475 entityTrackingOnClose(parser, entity, __LINE__);
6476 entity->open = XML_FALSE;
6477 if (! dtd->paramEntityRead)
6478 dtd->keepProcessing = dtd->standalone;
6479 } else
6480 dtd->keepProcessing = dtd->standalone;
6481 } else {
6482 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6483 goto endEntityValue;
6484 }
6485 break;
6486 }
6487 # endif /* XML_DTD */
6488 /* In the internal subset, PE references are not legal
6489 within markup declarations, e.g entity values in this case. */
6490 parser->m_eventPtr = entityTextPtr;
6491 result = XML_ERROR_PARAM_ENTITY_REF;
6492 goto endEntityValue;
6493 case XML_TOK_NONE:
6494 result = XML_ERROR_NONE;
6495 goto endEntityValue;
6496 case XML_TOK_ENTITY_REF:
6497 case XML_TOK_DATA_CHARS:
6498 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6499 result = XML_ERROR_NO_MEMORY;
6500 goto endEntityValue;
6501 }
6502 break;
6503 case XML_TOK_TRAILING_CR:
6504 next = entityTextPtr + enc->minBytesPerChar;
6505 /* fall through */
6506 case XML_TOK_DATA_NEWLINE:
6507 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6508 result = XML_ERROR_NO_MEMORY;
6509 goto endEntityValue;
6510 }
6511 *(pool->ptr)++ = 0xA;
6512 break;
6513 case XML_TOK_CHAR_REF: {
6514 XML_Char buf[XML_ENCODE_MAX];
6515 int i;
6516 int n = XmlCharRefNumber(enc, entityTextPtr);
6517 if (n < 0) {
6518 if (enc == parser->m_encoding)
6519 parser->m_eventPtr = entityTextPtr;
6520 result = XML_ERROR_BAD_CHAR_REF;
6521 goto endEntityValue;
6522 }
6523 n = XmlEncode(n, (ICHAR *)buf);
6524 /* The XmlEncode() functions can never return 0 here. That
6525 * error return happens if the code point passed in is either
6526 * negative or greater than or equal to 0x110000. The
6527 * XmlCharRefNumber() functions will all return a number
6528 * strictly less than 0x110000 or a negative value if an error
6529 * occurred. The negative value is intercepted above, so
6530 * XmlEncode() is never passed a value it might return an
6531 * error for.
6532 */
6533 for (i = 0; i < n; i++) {
6534 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6535 result = XML_ERROR_NO_MEMORY;
6536 goto endEntityValue;
6537 }
6538 *(pool->ptr)++ = buf[i];
6539 }
6540 } break;
6541 case XML_TOK_PARTIAL:
6542 if (enc == parser->m_encoding)
6543 parser->m_eventPtr = entityTextPtr;
6544 result = XML_ERROR_INVALID_TOKEN;
6545 goto endEntityValue;
6546 case XML_TOK_INVALID:
6547 if (enc == parser->m_encoding)
6548 parser->m_eventPtr = next;
6549 result = XML_ERROR_INVALID_TOKEN;
6550 goto endEntityValue;
6551 default:
6552 /* This default case should be unnecessary -- all the tokens
6553 * that XmlEntityValueTok() can return have their own explicit
6554 * cases -- but should be retained for safety. We do however
6555 * exclude it from the coverage statistics.
6556 *
6557 * LCOV_EXCL_START
6558 */
6559 if (enc == parser->m_encoding)
6560 parser->m_eventPtr = entityTextPtr;
6561 result = XML_ERROR_UNEXPECTED_STATE;
6562 goto endEntityValue;
6563 /* LCOV_EXCL_STOP */
6564 }
6565 entityTextPtr = next;
6566 }
6567 endEntityValue:
6568 # ifdef XML_DTD
6569 parser->m_prologState.inEntityValue = oldInEntityValue;
6570 # endif /* XML_DTD */
6571 // If 'nextPtr' is given, it should be updated during the processing
6572 if (nextPtr != NULL) {
6573 *nextPtr = next;
6574 }
6575 return result;
6576 }
6577
6578 static enum XML_Error
callStoreEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6579 callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6580 const char *entityTextPtr, const char *entityTextEnd,
6581 enum XML_Account account) {
6582 const char *next = entityTextPtr;
6583 enum XML_Error result = XML_ERROR_NONE;
6584 while (1) {
6585 if (! parser->m_openValueEntities) {
6586 result
6587 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6588 } else {
6589 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6590 if (! openEntity)
6591 return XML_ERROR_UNEXPECTED_STATE;
6592
6593 ENTITY *const entity = openEntity->entity;
6594 const char *const textStart
6595 = ((const char *)entity->textPtr) + entity->processed;
6596 const char *const textEnd
6597 = (const char *)(entity->textPtr + entity->textLen);
6598 /* Set a safe default value in case 'next' does not get set */
6599 const char *nextInEntity = textStart;
6600 if (entity->hasMore) {
6601 result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6602 textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6603 &nextInEntity);
6604 if (result != XML_ERROR_NONE)
6605 break;
6606 // Check if entity is complete, if not, mark down how much of it is
6607 // processed. A XML_SUSPENDED check here is not required as
6608 // appendAttributeValue will never suspend the parser.
6609 if (textEnd != nextInEntity) {
6610 entity->processed
6611 = (int)(nextInEntity - (const char *)entity->textPtr);
6612 continue;
6613 }
6614
6615 // Entity is complete. We cannot close it here since we need to first
6616 // process its possible inner entities (which are added to the
6617 // m_openValueEntities during storeEntityValue)
6618 entity->hasMore = XML_FALSE;
6619 continue;
6620 } // End of entity processing, "if" block skips the rest
6621
6622 // Remove fully processed openEntity from open entity list.
6623 # if XML_GE == 1
6624 entityTrackingOnClose(parser, entity, __LINE__);
6625 # endif
6626 // openEntity is m_openValueEntities' head, since we set it at the
6627 // start of this function and because we skipped storeEntityValue call
6628 // with hasMore set to false. This means we can directly remove the head
6629 // of m_openValueEntities
6630 assert(parser->m_openValueEntities == openEntity);
6631 entity->open = XML_FALSE;
6632 parser->m_openValueEntities = parser->m_openValueEntities->next;
6633
6634 /* put openEntity back in list of free instances */
6635 openEntity->next = parser->m_freeValueEntities;
6636 parser->m_freeValueEntities = openEntity;
6637 }
6638
6639 // Break if an error occurred or there is nothing left to process
6640 if (result
6641 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
6642 break;
6643 }
6644 }
6645
6646 return result;
6647 }
6648
6649 #else /* XML_GE == 0 */
6650
6651 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6652 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6653 // This will store "&entity123;" in entity->textPtr
6654 // to end up as "&entity123;" in the handler.
6655 const char *const entity_start = "&";
6656 const char *const entity_end = ";";
6657
6658 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6659 if (! poolAppendString(pool, entity_start)
6660 || ! poolAppendString(pool, entity->name)
6661 || ! poolAppendString(pool, entity_end)) {
6662 poolDiscard(pool);
6663 return XML_ERROR_NO_MEMORY;
6664 }
6665
6666 entity->textPtr = poolStart(pool);
6667 entity->textLen = (int)(poolLength(pool));
6668 poolFinish(pool);
6669
6670 return XML_ERROR_NONE;
6671 }
6672
6673 #endif /* XML_GE == 0 */
6674
6675 static void FASTCALL
normalizeLines(XML_Char * s)6676 normalizeLines(XML_Char *s) {
6677 XML_Char *p;
6678 for (;; s++) {
6679 if (*s == XML_T('\0'))
6680 return;
6681 if (*s == 0xD)
6682 break;
6683 }
6684 p = s;
6685 do {
6686 if (*s == 0xD) {
6687 *p++ = 0xA;
6688 if (*++s == 0xA)
6689 s++;
6690 } else
6691 *p++ = *s++;
6692 } while (*s);
6693 *p = XML_T('\0');
6694 }
6695
6696 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6697 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6698 const char *start, const char *end) {
6699 const XML_Char *target;
6700 XML_Char *data;
6701 const char *tem;
6702 if (! parser->m_processingInstructionHandler) {
6703 if (parser->m_defaultHandler)
6704 reportDefault(parser, enc, start, end);
6705 return 1;
6706 }
6707 start += enc->minBytesPerChar * 2;
6708 tem = start + XmlNameLength(enc, start);
6709 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6710 if (! target)
6711 return 0;
6712 poolFinish(&parser->m_tempPool);
6713 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6714 end - enc->minBytesPerChar * 2);
6715 if (! data)
6716 return 0;
6717 normalizeLines(data);
6718 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6719 poolClear(&parser->m_tempPool);
6720 return 1;
6721 }
6722
6723 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6724 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6725 const char *end) {
6726 XML_Char *data;
6727 if (! parser->m_commentHandler) {
6728 if (parser->m_defaultHandler)
6729 reportDefault(parser, enc, start, end);
6730 return 1;
6731 }
6732 data = poolStoreString(&parser->m_tempPool, enc,
6733 start + enc->minBytesPerChar * 4,
6734 end - enc->minBytesPerChar * 3);
6735 if (! data)
6736 return 0;
6737 normalizeLines(data);
6738 parser->m_commentHandler(parser->m_handlerArg, data);
6739 poolClear(&parser->m_tempPool);
6740 return 1;
6741 }
6742
6743 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6744 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6745 const char *end) {
6746 if (MUST_CONVERT(enc, s)) {
6747 enum XML_Convert_Result convert_res;
6748 const char **eventPP;
6749 const char **eventEndPP;
6750 if (enc == parser->m_encoding) {
6751 eventPP = &parser->m_eventPtr;
6752 eventEndPP = &parser->m_eventEndPtr;
6753 } else {
6754 /* To get here, two things must be true; the parser must be
6755 * using a character encoding that is not the same as the
6756 * encoding passed in, and the encoding passed in must need
6757 * conversion to the internal format (UTF-8 unless XML_UNICODE
6758 * is defined). The only occasions on which the encoding passed
6759 * in is not the same as the parser's encoding are when it is
6760 * the internal encoding (e.g. a previously defined parameter
6761 * entity, already converted to internal format). This by
6762 * definition doesn't need conversion, so the whole branch never
6763 * gets executed.
6764 *
6765 * For safety's sake we don't delete these lines and merely
6766 * exclude them from coverage statistics.
6767 *
6768 * LCOV_EXCL_START
6769 */
6770 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6771 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6772 /* LCOV_EXCL_STOP */
6773 }
6774 do {
6775 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6776 convert_res
6777 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6778 *eventEndPP = s;
6779 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6780 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6781 *eventPP = s;
6782 } while ((convert_res != XML_CONVERT_COMPLETED)
6783 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6784 } else
6785 parser->m_defaultHandler(
6786 parser->m_handlerArg, (const XML_Char *)s,
6787 (int)((const XML_Char *)end - (const XML_Char *)s));
6788 }
6789
6790 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6791 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6792 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6793 DEFAULT_ATTRIBUTE *att;
6794 if (value || isId) {
6795 /* The handling of default attributes gets messed up if we have
6796 a default which duplicates a non-default. */
6797 int i;
6798 for (i = 0; i < type->nDefaultAtts; i++)
6799 if (attId == type->defaultAtts[i].id)
6800 return 1;
6801 if (isId && ! type->idAtt && ! attId->xmlns)
6802 type->idAtt = attId;
6803 }
6804 if (type->nDefaultAtts == type->allocDefaultAtts) {
6805 if (type->allocDefaultAtts == 0) {
6806 type->allocDefaultAtts = 8;
6807 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6808 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6809 if (! type->defaultAtts) {
6810 type->allocDefaultAtts = 0;
6811 return 0;
6812 }
6813 } else {
6814 DEFAULT_ATTRIBUTE *temp;
6815
6816 /* Detect and prevent integer overflow */
6817 if (type->allocDefaultAtts > INT_MAX / 2) {
6818 return 0;
6819 }
6820
6821 int count = type->allocDefaultAtts * 2;
6822
6823 /* Detect and prevent integer overflow.
6824 * The preprocessor guard addresses the "always false" warning
6825 * from -Wtype-limits on platforms where
6826 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6827 #if UINT_MAX >= SIZE_MAX
6828 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6829 return 0;
6830 }
6831 #endif
6832
6833 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6834 (count * sizeof(DEFAULT_ATTRIBUTE)));
6835 if (temp == NULL)
6836 return 0;
6837 type->allocDefaultAtts = count;
6838 type->defaultAtts = temp;
6839 }
6840 }
6841 att = type->defaultAtts + type->nDefaultAtts;
6842 att->id = attId;
6843 att->value = value;
6844 att->isCdata = isCdata;
6845 if (! isCdata)
6846 attId->maybeTokenized = XML_TRUE;
6847 type->nDefaultAtts += 1;
6848 return 1;
6849 }
6850
6851 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6852 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6853 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6854 const XML_Char *name;
6855 for (name = elementType->name; *name; name++) {
6856 if (*name == XML_T(ASCII_COLON)) {
6857 PREFIX *prefix;
6858 const XML_Char *s;
6859 for (s = elementType->name; s != name; s++) {
6860 if (! poolAppendChar(&dtd->pool, *s))
6861 return 0;
6862 }
6863 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6864 return 0;
6865 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6866 sizeof(PREFIX));
6867 if (! prefix)
6868 return 0;
6869 if (prefix->name == poolStart(&dtd->pool))
6870 poolFinish(&dtd->pool);
6871 else
6872 poolDiscard(&dtd->pool);
6873 elementType->prefix = prefix;
6874 break;
6875 }
6876 }
6877 return 1;
6878 }
6879
6880 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6881 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6882 const char *end) {
6883 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6884 ATTRIBUTE_ID *id;
6885 const XML_Char *name;
6886 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6887 return NULL;
6888 name = poolStoreString(&dtd->pool, enc, start, end);
6889 if (! name)
6890 return NULL;
6891 /* skip quotation mark - its storage will be reused (like in name[-1]) */
6892 ++name;
6893 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6894 sizeof(ATTRIBUTE_ID));
6895 if (! id)
6896 return NULL;
6897 if (id->name != name)
6898 poolDiscard(&dtd->pool);
6899 else {
6900 poolFinish(&dtd->pool);
6901 if (! parser->m_ns)
6902 ;
6903 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6904 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6905 && name[4] == XML_T(ASCII_s)
6906 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6907 if (name[5] == XML_T('\0'))
6908 id->prefix = &dtd->defaultPrefix;
6909 else
6910 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6911 sizeof(PREFIX));
6912 id->xmlns = XML_TRUE;
6913 } else {
6914 int i;
6915 for (i = 0; name[i]; i++) {
6916 /* attributes without prefix are *not* in the default namespace */
6917 if (name[i] == XML_T(ASCII_COLON)) {
6918 int j;
6919 for (j = 0; j < i; j++) {
6920 if (! poolAppendChar(&dtd->pool, name[j]))
6921 return NULL;
6922 }
6923 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6924 return NULL;
6925 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6926 poolStart(&dtd->pool), sizeof(PREFIX));
6927 if (! id->prefix)
6928 return NULL;
6929 if (id->prefix->name == poolStart(&dtd->pool))
6930 poolFinish(&dtd->pool);
6931 else
6932 poolDiscard(&dtd->pool);
6933 break;
6934 }
6935 }
6936 }
6937 }
6938 return id;
6939 }
6940
6941 #define CONTEXT_SEP XML_T(ASCII_FF)
6942
6943 static const XML_Char *
getContext(XML_Parser parser)6944 getContext(XML_Parser parser) {
6945 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6946 HASH_TABLE_ITER iter;
6947 XML_Bool needSep = XML_FALSE;
6948
6949 if (dtd->defaultPrefix.binding) {
6950 int i;
6951 int len;
6952 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6953 return NULL;
6954 len = dtd->defaultPrefix.binding->uriLen;
6955 if (parser->m_namespaceSeparator)
6956 len--;
6957 for (i = 0; i < len; i++) {
6958 if (! poolAppendChar(&parser->m_tempPool,
6959 dtd->defaultPrefix.binding->uri[i])) {
6960 /* Because of memory caching, I don't believe this line can be
6961 * executed.
6962 *
6963 * This is part of a loop copying the default prefix binding
6964 * URI into the parser's temporary string pool. Previously,
6965 * that URI was copied into the same string pool, with a
6966 * terminating NUL character, as part of setContext(). When
6967 * the pool was cleared, that leaves a block definitely big
6968 * enough to hold the URI on the free block list of the pool.
6969 * The URI copy in getContext() therefore cannot run out of
6970 * memory.
6971 *
6972 * If the pool is used between the setContext() and
6973 * getContext() calls, the worst it can do is leave a bigger
6974 * block on the front of the free list. Given that this is
6975 * all somewhat inobvious and program logic can be changed, we
6976 * don't delete the line but we do exclude it from the test
6977 * coverage statistics.
6978 */
6979 return NULL; /* LCOV_EXCL_LINE */
6980 }
6981 }
6982 needSep = XML_TRUE;
6983 }
6984
6985 hashTableIterInit(&iter, &(dtd->prefixes));
6986 for (;;) {
6987 int i;
6988 int len;
6989 const XML_Char *s;
6990 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6991 if (! prefix)
6992 break;
6993 if (! prefix->binding) {
6994 /* This test appears to be (justifiable) paranoia. There does
6995 * not seem to be a way of injecting a prefix without a binding
6996 * that doesn't get errored long before this function is called.
6997 * The test should remain for safety's sake, so we instead
6998 * exclude the following line from the coverage statistics.
6999 */
7000 continue; /* LCOV_EXCL_LINE */
7001 }
7002 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7003 return NULL;
7004 for (s = prefix->name; *s; s++)
7005 if (! poolAppendChar(&parser->m_tempPool, *s))
7006 return NULL;
7007 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7008 return NULL;
7009 len = prefix->binding->uriLen;
7010 if (parser->m_namespaceSeparator)
7011 len--;
7012 for (i = 0; i < len; i++)
7013 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7014 return NULL;
7015 needSep = XML_TRUE;
7016 }
7017
7018 hashTableIterInit(&iter, &(dtd->generalEntities));
7019 for (;;) {
7020 const XML_Char *s;
7021 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7022 if (! e)
7023 break;
7024 if (! e->open)
7025 continue;
7026 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7027 return NULL;
7028 for (s = e->name; *s; s++)
7029 if (! poolAppendChar(&parser->m_tempPool, *s))
7030 return 0;
7031 needSep = XML_TRUE;
7032 }
7033
7034 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7035 return NULL;
7036 return parser->m_tempPool.start;
7037 }
7038
7039 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)7040 setContext(XML_Parser parser, const XML_Char *context) {
7041 if (context == NULL) {
7042 return XML_FALSE;
7043 }
7044
7045 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7046 const XML_Char *s = context;
7047
7048 while (*context != XML_T('\0')) {
7049 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7050 ENTITY *e;
7051 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7052 return XML_FALSE;
7053 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7054 poolStart(&parser->m_tempPool), 0);
7055 if (e)
7056 e->open = XML_TRUE;
7057 if (*s != XML_T('\0'))
7058 s++;
7059 context = s;
7060 poolDiscard(&parser->m_tempPool);
7061 } else if (*s == XML_T(ASCII_EQUALS)) {
7062 PREFIX *prefix;
7063 if (poolLength(&parser->m_tempPool) == 0)
7064 prefix = &dtd->defaultPrefix;
7065 else {
7066 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7067 return XML_FALSE;
7068 prefix
7069 = (PREFIX *)lookup(parser, &dtd->prefixes,
7070 poolStart(&parser->m_tempPool), sizeof(PREFIX));
7071 if (! prefix)
7072 return XML_FALSE;
7073 if (prefix->name == poolStart(&parser->m_tempPool)) {
7074 prefix->name = poolCopyString(&dtd->pool, prefix->name);
7075 if (! prefix->name)
7076 return XML_FALSE;
7077 }
7078 poolDiscard(&parser->m_tempPool);
7079 }
7080 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7081 context++)
7082 if (! poolAppendChar(&parser->m_tempPool, *context))
7083 return XML_FALSE;
7084 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7085 return XML_FALSE;
7086 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7087 &parser->m_inheritedBindings)
7088 != XML_ERROR_NONE)
7089 return XML_FALSE;
7090 poolDiscard(&parser->m_tempPool);
7091 if (*context != XML_T('\0'))
7092 ++context;
7093 s = context;
7094 } else {
7095 if (! poolAppendChar(&parser->m_tempPool, *s))
7096 return XML_FALSE;
7097 s++;
7098 }
7099 }
7100 return XML_TRUE;
7101 }
7102
7103 static void FASTCALL
normalizePublicId(XML_Char * publicId)7104 normalizePublicId(XML_Char *publicId) {
7105 XML_Char *p = publicId;
7106 XML_Char *s;
7107 for (s = publicId; *s; s++) {
7108 switch (*s) {
7109 case 0x20:
7110 case 0xD:
7111 case 0xA:
7112 if (p != publicId && p[-1] != 0x20)
7113 *p++ = 0x20;
7114 break;
7115 default:
7116 *p++ = *s;
7117 }
7118 }
7119 if (p != publicId && p[-1] == 0x20)
7120 --p;
7121 *p = XML_T('\0');
7122 }
7123
7124 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)7125 dtdCreate(const XML_Memory_Handling_Suite *ms) {
7126 DTD *p = ms->malloc_fcn(sizeof(DTD));
7127 if (p == NULL)
7128 return p;
7129 poolInit(&(p->pool), ms);
7130 poolInit(&(p->entityValuePool), ms);
7131 hashTableInit(&(p->generalEntities), ms);
7132 hashTableInit(&(p->elementTypes), ms);
7133 hashTableInit(&(p->attributeIds), ms);
7134 hashTableInit(&(p->prefixes), ms);
7135 #ifdef XML_DTD
7136 p->paramEntityRead = XML_FALSE;
7137 hashTableInit(&(p->paramEntities), ms);
7138 #endif /* XML_DTD */
7139 p->defaultPrefix.name = NULL;
7140 p->defaultPrefix.binding = NULL;
7141
7142 p->in_eldecl = XML_FALSE;
7143 p->scaffIndex = NULL;
7144 p->scaffold = NULL;
7145 p->scaffLevel = 0;
7146 p->scaffSize = 0;
7147 p->scaffCount = 0;
7148 p->contentStringLen = 0;
7149
7150 p->keepProcessing = XML_TRUE;
7151 p->hasParamEntityRefs = XML_FALSE;
7152 p->standalone = XML_FALSE;
7153 return p;
7154 }
7155
7156 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)7157 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
7158 HASH_TABLE_ITER iter;
7159 hashTableIterInit(&iter, &(p->elementTypes));
7160 for (;;) {
7161 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7162 if (! e)
7163 break;
7164 if (e->allocDefaultAtts != 0)
7165 ms->free_fcn(e->defaultAtts);
7166 }
7167 hashTableClear(&(p->generalEntities));
7168 #ifdef XML_DTD
7169 p->paramEntityRead = XML_FALSE;
7170 hashTableClear(&(p->paramEntities));
7171 #endif /* XML_DTD */
7172 hashTableClear(&(p->elementTypes));
7173 hashTableClear(&(p->attributeIds));
7174 hashTableClear(&(p->prefixes));
7175 poolClear(&(p->pool));
7176 poolClear(&(p->entityValuePool));
7177 p->defaultPrefix.name = NULL;
7178 p->defaultPrefix.binding = NULL;
7179
7180 p->in_eldecl = XML_FALSE;
7181
7182 ms->free_fcn(p->scaffIndex);
7183 p->scaffIndex = NULL;
7184 ms->free_fcn(p->scaffold);
7185 p->scaffold = NULL;
7186
7187 p->scaffLevel = 0;
7188 p->scaffSize = 0;
7189 p->scaffCount = 0;
7190 p->contentStringLen = 0;
7191
7192 p->keepProcessing = XML_TRUE;
7193 p->hasParamEntityRefs = XML_FALSE;
7194 p->standalone = XML_FALSE;
7195 }
7196
7197 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)7198 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
7199 HASH_TABLE_ITER iter;
7200 hashTableIterInit(&iter, &(p->elementTypes));
7201 for (;;) {
7202 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7203 if (! e)
7204 break;
7205 if (e->allocDefaultAtts != 0)
7206 ms->free_fcn(e->defaultAtts);
7207 }
7208 hashTableDestroy(&(p->generalEntities));
7209 #ifdef XML_DTD
7210 hashTableDestroy(&(p->paramEntities));
7211 #endif /* XML_DTD */
7212 hashTableDestroy(&(p->elementTypes));
7213 hashTableDestroy(&(p->attributeIds));
7214 hashTableDestroy(&(p->prefixes));
7215 poolDestroy(&(p->pool));
7216 poolDestroy(&(p->entityValuePool));
7217 if (isDocEntity) {
7218 ms->free_fcn(p->scaffIndex);
7219 ms->free_fcn(p->scaffold);
7220 }
7221 ms->free_fcn(p);
7222 }
7223
7224 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7225 The new DTD has already been initialized.
7226 */
7227 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)7228 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7229 const XML_Memory_Handling_Suite *ms) {
7230 HASH_TABLE_ITER iter;
7231
7232 /* Copy the prefix table. */
7233
7234 hashTableIterInit(&iter, &(oldDtd->prefixes));
7235 for (;;) {
7236 const XML_Char *name;
7237 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7238 if (! oldP)
7239 break;
7240 name = poolCopyString(&(newDtd->pool), oldP->name);
7241 if (! name)
7242 return 0;
7243 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7244 return 0;
7245 }
7246
7247 hashTableIterInit(&iter, &(oldDtd->attributeIds));
7248
7249 /* Copy the attribute id table. */
7250
7251 for (;;) {
7252 ATTRIBUTE_ID *newA;
7253 const XML_Char *name;
7254 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7255
7256 if (! oldA)
7257 break;
7258 /* Remember to allocate the scratch byte before the name. */
7259 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7260 return 0;
7261 name = poolCopyString(&(newDtd->pool), oldA->name);
7262 if (! name)
7263 return 0;
7264 ++name;
7265 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7266 sizeof(ATTRIBUTE_ID));
7267 if (! newA)
7268 return 0;
7269 newA->maybeTokenized = oldA->maybeTokenized;
7270 if (oldA->prefix) {
7271 newA->xmlns = oldA->xmlns;
7272 if (oldA->prefix == &oldDtd->defaultPrefix)
7273 newA->prefix = &newDtd->defaultPrefix;
7274 else
7275 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7276 oldA->prefix->name, 0);
7277 }
7278 }
7279
7280 /* Copy the element type table. */
7281
7282 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7283
7284 for (;;) {
7285 int i;
7286 ELEMENT_TYPE *newE;
7287 const XML_Char *name;
7288 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7289 if (! oldE)
7290 break;
7291 name = poolCopyString(&(newDtd->pool), oldE->name);
7292 if (! name)
7293 return 0;
7294 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7295 sizeof(ELEMENT_TYPE));
7296 if (! newE)
7297 return 0;
7298 if (oldE->nDefaultAtts) {
7299 /* Detect and prevent integer overflow.
7300 * The preprocessor guard addresses the "always false" warning
7301 * from -Wtype-limits on platforms where
7302 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7303 #if UINT_MAX >= SIZE_MAX
7304 if ((size_t)oldE->nDefaultAtts
7305 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7306 return 0;
7307 }
7308 #endif
7309 newE->defaultAtts
7310 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7311 if (! newE->defaultAtts) {
7312 return 0;
7313 }
7314 }
7315 if (oldE->idAtt)
7316 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7317 oldE->idAtt->name, 0);
7318 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7319 if (oldE->prefix)
7320 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7321 oldE->prefix->name, 0);
7322 for (i = 0; i < newE->nDefaultAtts; i++) {
7323 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7324 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7325 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7326 if (oldE->defaultAtts[i].value) {
7327 newE->defaultAtts[i].value
7328 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7329 if (! newE->defaultAtts[i].value)
7330 return 0;
7331 } else
7332 newE->defaultAtts[i].value = NULL;
7333 }
7334 }
7335
7336 /* Copy the entity tables. */
7337 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7338 &(oldDtd->generalEntities)))
7339 return 0;
7340
7341 #ifdef XML_DTD
7342 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7343 &(oldDtd->paramEntities)))
7344 return 0;
7345 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7346 #endif /* XML_DTD */
7347
7348 newDtd->keepProcessing = oldDtd->keepProcessing;
7349 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7350 newDtd->standalone = oldDtd->standalone;
7351
7352 /* Don't want deep copying for scaffolding */
7353 newDtd->in_eldecl = oldDtd->in_eldecl;
7354 newDtd->scaffold = oldDtd->scaffold;
7355 newDtd->contentStringLen = oldDtd->contentStringLen;
7356 newDtd->scaffSize = oldDtd->scaffSize;
7357 newDtd->scaffLevel = oldDtd->scaffLevel;
7358 newDtd->scaffIndex = oldDtd->scaffIndex;
7359
7360 return 1;
7361 } /* End dtdCopy */
7362
7363 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7364 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7365 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7366 HASH_TABLE_ITER iter;
7367 const XML_Char *cachedOldBase = NULL;
7368 const XML_Char *cachedNewBase = NULL;
7369
7370 hashTableIterInit(&iter, oldTable);
7371
7372 for (;;) {
7373 ENTITY *newE;
7374 const XML_Char *name;
7375 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7376 if (! oldE)
7377 break;
7378 name = poolCopyString(newPool, oldE->name);
7379 if (! name)
7380 return 0;
7381 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7382 if (! newE)
7383 return 0;
7384 if (oldE->systemId) {
7385 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7386 if (! tem)
7387 return 0;
7388 newE->systemId = tem;
7389 if (oldE->base) {
7390 if (oldE->base == cachedOldBase)
7391 newE->base = cachedNewBase;
7392 else {
7393 cachedOldBase = oldE->base;
7394 tem = poolCopyString(newPool, cachedOldBase);
7395 if (! tem)
7396 return 0;
7397 cachedNewBase = newE->base = tem;
7398 }
7399 }
7400 if (oldE->publicId) {
7401 tem = poolCopyString(newPool, oldE->publicId);
7402 if (! tem)
7403 return 0;
7404 newE->publicId = tem;
7405 }
7406 } else {
7407 const XML_Char *tem
7408 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7409 if (! tem)
7410 return 0;
7411 newE->textPtr = tem;
7412 newE->textLen = oldE->textLen;
7413 }
7414 if (oldE->notation) {
7415 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7416 if (! tem)
7417 return 0;
7418 newE->notation = tem;
7419 }
7420 newE->is_param = oldE->is_param;
7421 newE->is_internal = oldE->is_internal;
7422 }
7423 return 1;
7424 }
7425
7426 #define INIT_POWER 6
7427
7428 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7429 keyeq(KEY s1, KEY s2) {
7430 for (; *s1 == *s2; s1++, s2++)
7431 if (*s1 == 0)
7432 return XML_TRUE;
7433 return XML_FALSE;
7434 }
7435
7436 static size_t
keylen(KEY s)7437 keylen(KEY s) {
7438 size_t len = 0;
7439 for (; *s; s++, len++)
7440 ;
7441 return len;
7442 }
7443
7444 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7445 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7446 key->k[0] = 0;
7447 key->k[1] = get_hash_secret_salt(parser);
7448 }
7449
7450 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7451 hash(XML_Parser parser, KEY s) {
7452 struct siphash state;
7453 struct sipkey key;
7454 (void)sip24_valid;
7455 copy_salt_to_sipkey(parser, &key);
7456 sip24_init(&state, &key);
7457 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7458 return (unsigned long)sip24_final(&state);
7459 }
7460
7461 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7462 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7463 size_t i;
7464 if (table->size == 0) {
7465 size_t tsize;
7466 if (! createSize)
7467 return NULL;
7468 table->power = INIT_POWER;
7469 /* table->size is a power of 2 */
7470 table->size = (size_t)1 << INIT_POWER;
7471 tsize = table->size * sizeof(NAMED *);
7472 table->v = table->mem->malloc_fcn(tsize);
7473 if (! table->v) {
7474 table->size = 0;
7475 return NULL;
7476 }
7477 memset(table->v, 0, tsize);
7478 i = hash(parser, name) & ((unsigned long)table->size - 1);
7479 } else {
7480 unsigned long h = hash(parser, name);
7481 unsigned long mask = (unsigned long)table->size - 1;
7482 unsigned char step = 0;
7483 i = h & mask;
7484 while (table->v[i]) {
7485 if (keyeq(name, table->v[i]->name))
7486 return table->v[i];
7487 if (! step)
7488 step = PROBE_STEP(h, mask, table->power);
7489 i < step ? (i += table->size - step) : (i -= step);
7490 }
7491 if (! createSize)
7492 return NULL;
7493
7494 /* check for overflow (table is half full) */
7495 if (table->used >> (table->power - 1)) {
7496 unsigned char newPower = table->power + 1;
7497
7498 /* Detect and prevent invalid shift */
7499 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7500 return NULL;
7501 }
7502
7503 size_t newSize = (size_t)1 << newPower;
7504 unsigned long newMask = (unsigned long)newSize - 1;
7505
7506 /* Detect and prevent integer overflow */
7507 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7508 return NULL;
7509 }
7510
7511 size_t tsize = newSize * sizeof(NAMED *);
7512 NAMED **newV = table->mem->malloc_fcn(tsize);
7513 if (! newV)
7514 return NULL;
7515 memset(newV, 0, tsize);
7516 for (i = 0; i < table->size; i++)
7517 if (table->v[i]) {
7518 unsigned long newHash = hash(parser, table->v[i]->name);
7519 size_t j = newHash & newMask;
7520 step = 0;
7521 while (newV[j]) {
7522 if (! step)
7523 step = PROBE_STEP(newHash, newMask, newPower);
7524 j < step ? (j += newSize - step) : (j -= step);
7525 }
7526 newV[j] = table->v[i];
7527 }
7528 table->mem->free_fcn(table->v);
7529 table->v = newV;
7530 table->power = newPower;
7531 table->size = newSize;
7532 i = h & newMask;
7533 step = 0;
7534 while (table->v[i]) {
7535 if (! step)
7536 step = PROBE_STEP(h, newMask, newPower);
7537 i < step ? (i += newSize - step) : (i -= step);
7538 }
7539 }
7540 }
7541 table->v[i] = table->mem->malloc_fcn(createSize);
7542 if (! table->v[i])
7543 return NULL;
7544 memset(table->v[i], 0, createSize);
7545 table->v[i]->name = name;
7546 (table->used)++;
7547 return table->v[i];
7548 }
7549
7550 static void FASTCALL
hashTableClear(HASH_TABLE * table)7551 hashTableClear(HASH_TABLE *table) {
7552 size_t i;
7553 for (i = 0; i < table->size; i++) {
7554 table->mem->free_fcn(table->v[i]);
7555 table->v[i] = NULL;
7556 }
7557 table->used = 0;
7558 }
7559
7560 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7561 hashTableDestroy(HASH_TABLE *table) {
7562 size_t i;
7563 for (i = 0; i < table->size; i++)
7564 table->mem->free_fcn(table->v[i]);
7565 table->mem->free_fcn(table->v);
7566 }
7567
7568 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7569 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7570 p->power = 0;
7571 p->size = 0;
7572 p->used = 0;
7573 p->v = NULL;
7574 p->mem = ms;
7575 }
7576
7577 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7578 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7579 iter->p = table->v;
7580 iter->end = iter->p ? iter->p + table->size : NULL;
7581 }
7582
7583 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7584 hashTableIterNext(HASH_TABLE_ITER *iter) {
7585 while (iter->p != iter->end) {
7586 NAMED *tem = *(iter->p)++;
7587 if (tem)
7588 return tem;
7589 }
7590 return NULL;
7591 }
7592
7593 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7594 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7595 pool->blocks = NULL;
7596 pool->freeBlocks = NULL;
7597 pool->start = NULL;
7598 pool->ptr = NULL;
7599 pool->end = NULL;
7600 pool->mem = ms;
7601 }
7602
7603 static void FASTCALL
poolClear(STRING_POOL * pool)7604 poolClear(STRING_POOL *pool) {
7605 if (! pool->freeBlocks)
7606 pool->freeBlocks = pool->blocks;
7607 else {
7608 BLOCK *p = pool->blocks;
7609 while (p) {
7610 BLOCK *tem = p->next;
7611 p->next = pool->freeBlocks;
7612 pool->freeBlocks = p;
7613 p = tem;
7614 }
7615 }
7616 pool->blocks = NULL;
7617 pool->start = NULL;
7618 pool->ptr = NULL;
7619 pool->end = NULL;
7620 }
7621
7622 static void FASTCALL
poolDestroy(STRING_POOL * pool)7623 poolDestroy(STRING_POOL *pool) {
7624 BLOCK *p = pool->blocks;
7625 while (p) {
7626 BLOCK *tem = p->next;
7627 pool->mem->free_fcn(p);
7628 p = tem;
7629 }
7630 p = pool->freeBlocks;
7631 while (p) {
7632 BLOCK *tem = p->next;
7633 pool->mem->free_fcn(p);
7634 p = tem;
7635 }
7636 }
7637
7638 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7639 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7640 const char *end) {
7641 if (! pool->ptr && ! poolGrow(pool))
7642 return NULL;
7643 for (;;) {
7644 const enum XML_Convert_Result convert_res = XmlConvert(
7645 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7646 if ((convert_res == XML_CONVERT_COMPLETED)
7647 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7648 break;
7649 if (! poolGrow(pool))
7650 return NULL;
7651 }
7652 return pool->start;
7653 }
7654
7655 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7656 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7657 do {
7658 if (! poolAppendChar(pool, *s))
7659 return NULL;
7660 } while (*s++);
7661 s = pool->start;
7662 poolFinish(pool);
7663 return s;
7664 }
7665
7666 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7667 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7668 if (! pool->ptr && ! poolGrow(pool)) {
7669 /* The following line is unreachable given the current usage of
7670 * poolCopyStringN(). Currently it is called from exactly one
7671 * place to copy the text of a simple general entity. By that
7672 * point, the name of the entity is already stored in the pool, so
7673 * pool->ptr cannot be NULL.
7674 *
7675 * If poolCopyStringN() is used elsewhere as it well might be,
7676 * this line may well become executable again. Regardless, this
7677 * sort of check shouldn't be removed lightly, so we just exclude
7678 * it from the coverage statistics.
7679 */
7680 return NULL; /* LCOV_EXCL_LINE */
7681 }
7682 for (; n > 0; --n, s++) {
7683 if (! poolAppendChar(pool, *s))
7684 return NULL;
7685 }
7686 s = pool->start;
7687 poolFinish(pool);
7688 return s;
7689 }
7690
7691 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7692 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7693 while (*s) {
7694 if (! poolAppendChar(pool, *s))
7695 return NULL;
7696 s++;
7697 }
7698 return pool->start;
7699 }
7700
7701 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7702 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7703 const char *end) {
7704 if (! poolAppend(pool, enc, ptr, end))
7705 return NULL;
7706 if (pool->ptr == pool->end && ! poolGrow(pool))
7707 return NULL;
7708 *(pool->ptr)++ = 0;
7709 return pool->start;
7710 }
7711
7712 static size_t
poolBytesToAllocateFor(int blockSize)7713 poolBytesToAllocateFor(int blockSize) {
7714 /* Unprotected math would be:
7715 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7716 **
7717 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7718 ** For a + b * c we check b * c in isolation first, so that addition of a
7719 ** on top has no chance of making us accept a small non-negative number
7720 */
7721 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7722
7723 if (blockSize <= 0)
7724 return 0;
7725
7726 if (blockSize > (int)(INT_MAX / stretch))
7727 return 0;
7728
7729 {
7730 const int stretchedBlockSize = blockSize * (int)stretch;
7731 const int bytesToAllocate
7732 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7733 if (bytesToAllocate < 0)
7734 return 0;
7735
7736 return (size_t)bytesToAllocate;
7737 }
7738 }
7739
7740 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7741 poolGrow(STRING_POOL *pool) {
7742 if (pool->freeBlocks) {
7743 if (pool->start == 0) {
7744 pool->blocks = pool->freeBlocks;
7745 pool->freeBlocks = pool->freeBlocks->next;
7746 pool->blocks->next = NULL;
7747 pool->start = pool->blocks->s;
7748 pool->end = pool->start + pool->blocks->size;
7749 pool->ptr = pool->start;
7750 return XML_TRUE;
7751 }
7752 if (pool->end - pool->start < pool->freeBlocks->size) {
7753 BLOCK *tem = pool->freeBlocks->next;
7754 pool->freeBlocks->next = pool->blocks;
7755 pool->blocks = pool->freeBlocks;
7756 pool->freeBlocks = tem;
7757 memcpy(pool->blocks->s, pool->start,
7758 (pool->end - pool->start) * sizeof(XML_Char));
7759 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7760 pool->start = pool->blocks->s;
7761 pool->end = pool->start + pool->blocks->size;
7762 return XML_TRUE;
7763 }
7764 }
7765 if (pool->blocks && pool->start == pool->blocks->s) {
7766 BLOCK *temp;
7767 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7768 size_t bytesToAllocate;
7769
7770 /* NOTE: Needs to be calculated prior to calling `realloc`
7771 to avoid dangling pointers: */
7772 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7773
7774 if (blockSize < 0) {
7775 /* This condition traps a situation where either more than
7776 * INT_MAX/2 bytes have already been allocated. This isn't
7777 * readily testable, since it is unlikely that an average
7778 * machine will have that much memory, so we exclude it from the
7779 * coverage statistics.
7780 */
7781 return XML_FALSE; /* LCOV_EXCL_LINE */
7782 }
7783
7784 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7785 if (bytesToAllocate == 0)
7786 return XML_FALSE;
7787
7788 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7789 (unsigned)bytesToAllocate);
7790 if (temp == NULL)
7791 return XML_FALSE;
7792 pool->blocks = temp;
7793 pool->blocks->size = blockSize;
7794 pool->ptr = pool->blocks->s + offsetInsideBlock;
7795 pool->start = pool->blocks->s;
7796 pool->end = pool->start + blockSize;
7797 } else {
7798 BLOCK *tem;
7799 int blockSize = (int)(pool->end - pool->start);
7800 size_t bytesToAllocate;
7801
7802 if (blockSize < 0) {
7803 /* This condition traps a situation where either more than
7804 * INT_MAX bytes have already been allocated (which is prevented
7805 * by various pieces of program logic, not least this one, never
7806 * mind the unlikelihood of actually having that much memory) or
7807 * the pool control fields have been corrupted (which could
7808 * conceivably happen in an extremely buggy user handler
7809 * function). Either way it isn't readily testable, so we
7810 * exclude it from the coverage statistics.
7811 */
7812 return XML_FALSE; /* LCOV_EXCL_LINE */
7813 }
7814
7815 if (blockSize < INIT_BLOCK_SIZE)
7816 blockSize = INIT_BLOCK_SIZE;
7817 else {
7818 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7819 if ((int)((unsigned)blockSize * 2U) < 0) {
7820 return XML_FALSE;
7821 }
7822 blockSize *= 2;
7823 }
7824
7825 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7826 if (bytesToAllocate == 0)
7827 return XML_FALSE;
7828
7829 tem = pool->mem->malloc_fcn(bytesToAllocate);
7830 if (! tem)
7831 return XML_FALSE;
7832 tem->size = blockSize;
7833 tem->next = pool->blocks;
7834 pool->blocks = tem;
7835 if (pool->ptr != pool->start)
7836 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7837 pool->ptr = tem->s + (pool->ptr - pool->start);
7838 pool->start = tem->s;
7839 pool->end = tem->s + blockSize;
7840 }
7841 return XML_TRUE;
7842 }
7843
7844 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7845 nextScaffoldPart(XML_Parser parser) {
7846 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7847 CONTENT_SCAFFOLD *me;
7848 int next;
7849
7850 if (! dtd->scaffIndex) {
7851 /* Detect and prevent integer overflow.
7852 * The preprocessor guard addresses the "always false" warning
7853 * from -Wtype-limits on platforms where
7854 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7855 #if UINT_MAX >= SIZE_MAX
7856 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
7857 return -1;
7858 }
7859 #endif
7860 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7861 if (! dtd->scaffIndex)
7862 return -1;
7863 dtd->scaffIndex[0] = 0;
7864 }
7865
7866 if (dtd->scaffCount >= dtd->scaffSize) {
7867 CONTENT_SCAFFOLD *temp;
7868 if (dtd->scaffold) {
7869 /* Detect and prevent integer overflow */
7870 if (dtd->scaffSize > UINT_MAX / 2u) {
7871 return -1;
7872 }
7873 /* Detect and prevent integer overflow.
7874 * The preprocessor guard addresses the "always false" warning
7875 * from -Wtype-limits on platforms where
7876 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7877 #if UINT_MAX >= SIZE_MAX
7878 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7879 return -1;
7880 }
7881 #endif
7882
7883 temp = (CONTENT_SCAFFOLD *)REALLOC(
7884 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7885 if (temp == NULL)
7886 return -1;
7887 dtd->scaffSize *= 2;
7888 } else {
7889 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7890 * sizeof(CONTENT_SCAFFOLD));
7891 if (temp == NULL)
7892 return -1;
7893 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7894 }
7895 dtd->scaffold = temp;
7896 }
7897 next = dtd->scaffCount++;
7898 me = &dtd->scaffold[next];
7899 if (dtd->scaffLevel) {
7900 CONTENT_SCAFFOLD *parent
7901 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7902 if (parent->lastchild) {
7903 dtd->scaffold[parent->lastchild].nextsib = next;
7904 }
7905 if (! parent->childcnt)
7906 parent->firstchild = next;
7907 parent->lastchild = next;
7908 parent->childcnt++;
7909 }
7910 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7911 return next;
7912 }
7913
7914 static XML_Content *
build_model(XML_Parser parser)7915 build_model(XML_Parser parser) {
7916 /* Function build_model transforms the existing parser->m_dtd->scaffold
7917 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7918 * XML_Content tree nodes followed by a gapless list of zero-terminated
7919 * strings. */
7920 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7921 XML_Content *ret;
7922 XML_Char *str; /* the current string writing location */
7923
7924 /* Detect and prevent integer overflow.
7925 * The preprocessor guard addresses the "always false" warning
7926 * from -Wtype-limits on platforms where
7927 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7928 #if UINT_MAX >= SIZE_MAX
7929 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7930 return NULL;
7931 }
7932 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7933 return NULL;
7934 }
7935 #endif
7936 if (dtd->scaffCount * sizeof(XML_Content)
7937 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7938 return NULL;
7939 }
7940
7941 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7942 + (dtd->contentStringLen * sizeof(XML_Char)));
7943
7944 ret = (XML_Content *)MALLOC(parser, allocsize);
7945 if (! ret)
7946 return NULL;
7947
7948 /* What follows is an iterative implementation (of what was previously done
7949 * recursively in a dedicated function called "build_node". The old recursive
7950 * build_node could be forced into stack exhaustion from input as small as a
7951 * few megabyte, and so that was a security issue. Hence, a function call
7952 * stack is avoided now by resolving recursion.)
7953 *
7954 * The iterative approach works as follows:
7955 *
7956 * - We have two writing pointers, both walking up the result array; one does
7957 * the work, the other creates "jobs" for its colleague to do, and leads
7958 * the way:
7959 *
7960 * - The faster one, pointer jobDest, always leads and writes "what job
7961 * to do" by the other, once they reach that place in the
7962 * array: leader "jobDest" stores the source node array index (relative
7963 * to array dtd->scaffold) in field "numchildren".
7964 *
7965 * - The slower one, pointer dest, looks at the value stored in the
7966 * "numchildren" field (which actually holds a source node array index
7967 * at that time) and puts the real data from dtd->scaffold in.
7968 *
7969 * - Before the loop starts, jobDest writes source array index 0
7970 * (where the root node is located) so that dest will have something to do
7971 * when it starts operation.
7972 *
7973 * - Whenever nodes with children are encountered, jobDest appends
7974 * them as new jobs, in order. As a result, tree node siblings are
7975 * adjacent in the resulting array, for example:
7976 *
7977 * [0] root, has two children
7978 * [1] first child of 0, has three children
7979 * [3] first child of 1, does not have children
7980 * [4] second child of 1, does not have children
7981 * [5] third child of 1, does not have children
7982 * [2] second child of 0, does not have children
7983 *
7984 * Or (the same data) presented in flat array view:
7985 *
7986 * [0] root, has two children
7987 *
7988 * [1] first child of 0, has three children
7989 * [2] second child of 0, does not have children
7990 *
7991 * [3] first child of 1, does not have children
7992 * [4] second child of 1, does not have children
7993 * [5] third child of 1, does not have children
7994 *
7995 * - The algorithm repeats until all target array indices have been processed.
7996 */
7997 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7998 XML_Content *const destLimit = &ret[dtd->scaffCount];
7999 XML_Content *jobDest = ret; /* next free writing location in target array */
8000 str = (XML_Char *)&ret[dtd->scaffCount];
8001
8002 /* Add the starting job, the root node (index 0) of the source tree */
8003 (jobDest++)->numchildren = 0;
8004
8005 for (; dest < destLimit; dest++) {
8006 /* Retrieve source tree array index from job storage */
8007 const int src_node = (int)dest->numchildren;
8008
8009 /* Convert item */
8010 dest->type = dtd->scaffold[src_node].type;
8011 dest->quant = dtd->scaffold[src_node].quant;
8012 if (dest->type == XML_CTYPE_NAME) {
8013 const XML_Char *src;
8014 dest->name = str;
8015 src = dtd->scaffold[src_node].name;
8016 for (;;) {
8017 *str++ = *src;
8018 if (! *src)
8019 break;
8020 src++;
8021 }
8022 dest->numchildren = 0;
8023 dest->children = NULL;
8024 } else {
8025 unsigned int i;
8026 int cn;
8027 dest->name = NULL;
8028 dest->numchildren = dtd->scaffold[src_node].childcnt;
8029 dest->children = jobDest;
8030
8031 /* Append scaffold indices of children to array */
8032 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8033 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8034 (jobDest++)->numchildren = (unsigned int)cn;
8035 }
8036 }
8037
8038 return ret;
8039 }
8040
8041 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)8042 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8043 const char *end) {
8044 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8045 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8046 ELEMENT_TYPE *ret;
8047
8048 if (! name)
8049 return NULL;
8050 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8051 sizeof(ELEMENT_TYPE));
8052 if (! ret)
8053 return NULL;
8054 if (ret->name != name)
8055 poolDiscard(&dtd->pool);
8056 else {
8057 poolFinish(&dtd->pool);
8058 if (! setElementTypePrefix(parser, ret))
8059 return NULL;
8060 }
8061 return ret;
8062 }
8063
8064 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)8065 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
8066 size_t charsRequired = 0;
8067 XML_Char *result;
8068
8069 /* First determine how long the string is */
8070 while (s[charsRequired] != 0) {
8071 charsRequired++;
8072 }
8073 /* Include the terminator */
8074 charsRequired++;
8075
8076 /* Now allocate space for the copy */
8077 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
8078 if (result == NULL)
8079 return NULL;
8080 /* Copy the original into place */
8081 memcpy(result, s, charsRequired * sizeof(XML_Char));
8082 return result;
8083 }
8084
8085 #if XML_GE == 1
8086
8087 static float
accountingGetCurrentAmplification(XML_Parser rootParser)8088 accountingGetCurrentAmplification(XML_Parser rootParser) {
8089 // 1.........1.........12 => 22
8090 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8091 const XmlBigCount countBytesOutput
8092 = rootParser->m_accounting.countBytesDirect
8093 + rootParser->m_accounting.countBytesIndirect;
8094 const float amplificationFactor
8095 = rootParser->m_accounting.countBytesDirect
8096 ? (countBytesOutput
8097 / (float)(rootParser->m_accounting.countBytesDirect))
8098 : ((lenOfShortestInclude
8099 + rootParser->m_accounting.countBytesIndirect)
8100 / (float)lenOfShortestInclude);
8101 assert(! rootParser->m_parentParser);
8102 return amplificationFactor;
8103 }
8104
8105 static void
accountingReportStats(XML_Parser originParser,const char * epilog)8106 accountingReportStats(XML_Parser originParser, const char *epilog) {
8107 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8108 assert(! rootParser->m_parentParser);
8109
8110 if (rootParser->m_accounting.debugLevel == 0u) {
8111 return;
8112 }
8113
8114 const float amplificationFactor
8115 = accountingGetCurrentAmplification(rootParser);
8116 fprintf(stderr,
8117 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8118 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8119 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
8120 rootParser->m_accounting.countBytesIndirect,
8121 (double)amplificationFactor, epilog);
8122 }
8123
8124 static void
accountingOnAbort(XML_Parser originParser)8125 accountingOnAbort(XML_Parser originParser) {
8126 accountingReportStats(originParser, " ABORTING\n");
8127 }
8128
8129 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)8130 accountingReportDiff(XML_Parser rootParser,
8131 unsigned int levelsAwayFromRootParser, const char *before,
8132 const char *after, ptrdiff_t bytesMore, int source_line,
8133 enum XML_Account account) {
8134 assert(! rootParser->m_parentParser);
8135
8136 fprintf(stderr,
8137 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8138 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8139 levelsAwayFromRootParser, source_line, 10, "");
8140
8141 const char ellipis[] = "[..]";
8142 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8143 const unsigned int contextLength = 10;
8144
8145 /* Note: Performance is of no concern here */
8146 const char *walker = before;
8147 if ((rootParser->m_accounting.debugLevel >= 3u)
8148 || (after - before)
8149 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8150 for (; walker < after; walker++) {
8151 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8152 }
8153 } else {
8154 for (; walker < before + contextLength; walker++) {
8155 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8156 }
8157 fprintf(stderr, ellipis);
8158 walker = after - contextLength;
8159 for (; walker < after; walker++) {
8160 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8161 }
8162 }
8163 fprintf(stderr, "\"\n");
8164 }
8165
8166 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)8167 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8168 const char *after, int source_line,
8169 enum XML_Account account) {
8170 /* Note: We need to check the token type *first* to be sure that
8171 * we can even access variable <after>, safely.
8172 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8173 switch (tok) {
8174 case XML_TOK_INVALID:
8175 case XML_TOK_PARTIAL:
8176 case XML_TOK_PARTIAL_CHAR:
8177 case XML_TOK_NONE:
8178 return XML_TRUE;
8179 }
8180
8181 if (account == XML_ACCOUNT_NONE)
8182 return XML_TRUE; /* because these bytes have been accounted for, already */
8183
8184 unsigned int levelsAwayFromRootParser;
8185 const XML_Parser rootParser
8186 = getRootParserOf(originParser, &levelsAwayFromRootParser);
8187 assert(! rootParser->m_parentParser);
8188
8189 const int isDirect
8190 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8191 const ptrdiff_t bytesMore = after - before;
8192
8193 XmlBigCount *const additionTarget
8194 = isDirect ? &rootParser->m_accounting.countBytesDirect
8195 : &rootParser->m_accounting.countBytesIndirect;
8196
8197 /* Detect and avoid integer overflow */
8198 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8199 return XML_FALSE;
8200 *additionTarget += bytesMore;
8201
8202 const XmlBigCount countBytesOutput
8203 = rootParser->m_accounting.countBytesDirect
8204 + rootParser->m_accounting.countBytesIndirect;
8205 const float amplificationFactor
8206 = accountingGetCurrentAmplification(rootParser);
8207 const XML_Bool tolerated
8208 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8209 || (amplificationFactor
8210 <= rootParser->m_accounting.maximumAmplificationFactor);
8211
8212 if (rootParser->m_accounting.debugLevel >= 2u) {
8213 accountingReportStats(rootParser, "");
8214 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8215 bytesMore, source_line, account);
8216 }
8217
8218 return tolerated;
8219 }
8220
8221 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)8222 testingAccountingGetCountBytesDirect(XML_Parser parser) {
8223 if (! parser)
8224 return 0;
8225 return parser->m_accounting.countBytesDirect;
8226 }
8227
8228 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)8229 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8230 if (! parser)
8231 return 0;
8232 return parser->m_accounting.countBytesIndirect;
8233 }
8234
8235 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)8236 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8237 const char *action, int sourceLine) {
8238 assert(! rootParser->m_parentParser);
8239 if (rootParser->m_entity_stats.debugLevel == 0u)
8240 return;
8241
8242 # if defined(XML_UNICODE)
8243 const char *const entityName = "[..]";
8244 # else
8245 const char *const entityName = entity->name;
8246 # endif
8247
8248 fprintf(
8249 stderr,
8250 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8251 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8252 rootParser->m_entity_stats.currentDepth,
8253 rootParser->m_entity_stats.maximumDepthSeen,
8254 ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8255 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8256 sourceLine);
8257 }
8258
8259 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)8260 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8261 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8262 assert(! rootParser->m_parentParser);
8263
8264 rootParser->m_entity_stats.countEverOpened++;
8265 rootParser->m_entity_stats.currentDepth++;
8266 if (rootParser->m_entity_stats.currentDepth
8267 > rootParser->m_entity_stats.maximumDepthSeen) {
8268 rootParser->m_entity_stats.maximumDepthSeen++;
8269 }
8270
8271 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8272 }
8273
8274 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)8275 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8276 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8277 assert(! rootParser->m_parentParser);
8278
8279 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8280 rootParser->m_entity_stats.currentDepth--;
8281 }
8282
8283 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)8284 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8285 XML_Parser rootParser = parser;
8286 unsigned int stepsTakenUpwards = 0;
8287 while (rootParser->m_parentParser) {
8288 rootParser = rootParser->m_parentParser;
8289 stepsTakenUpwards++;
8290 }
8291 assert(! rootParser->m_parentParser);
8292 if (outLevelDiff != NULL) {
8293 *outLevelDiff = stepsTakenUpwards;
8294 }
8295 return rootParser;
8296 }
8297
8298 const char *
unsignedCharToPrintable(unsigned char c)8299 unsignedCharToPrintable(unsigned char c) {
8300 switch (c) {
8301 case 0:
8302 return "\\0";
8303 case 1:
8304 return "\\x1";
8305 case 2:
8306 return "\\x2";
8307 case 3:
8308 return "\\x3";
8309 case 4:
8310 return "\\x4";
8311 case 5:
8312 return "\\x5";
8313 case 6:
8314 return "\\x6";
8315 case 7:
8316 return "\\x7";
8317 case 8:
8318 return "\\x8";
8319 case 9:
8320 return "\\t";
8321 case 10:
8322 return "\\n";
8323 case 11:
8324 return "\\xB";
8325 case 12:
8326 return "\\xC";
8327 case 13:
8328 return "\\r";
8329 case 14:
8330 return "\\xE";
8331 case 15:
8332 return "\\xF";
8333 case 16:
8334 return "\\x10";
8335 case 17:
8336 return "\\x11";
8337 case 18:
8338 return "\\x12";
8339 case 19:
8340 return "\\x13";
8341 case 20:
8342 return "\\x14";
8343 case 21:
8344 return "\\x15";
8345 case 22:
8346 return "\\x16";
8347 case 23:
8348 return "\\x17";
8349 case 24:
8350 return "\\x18";
8351 case 25:
8352 return "\\x19";
8353 case 26:
8354 return "\\x1A";
8355 case 27:
8356 return "\\x1B";
8357 case 28:
8358 return "\\x1C";
8359 case 29:
8360 return "\\x1D";
8361 case 30:
8362 return "\\x1E";
8363 case 31:
8364 return "\\x1F";
8365 case 32:
8366 return " ";
8367 case 33:
8368 return "!";
8369 case 34:
8370 return "\\\"";
8371 case 35:
8372 return "#";
8373 case 36:
8374 return "$";
8375 case 37:
8376 return "%";
8377 case 38:
8378 return "&";
8379 case 39:
8380 return "'";
8381 case 40:
8382 return "(";
8383 case 41:
8384 return ")";
8385 case 42:
8386 return "*";
8387 case 43:
8388 return "+";
8389 case 44:
8390 return ",";
8391 case 45:
8392 return "-";
8393 case 46:
8394 return ".";
8395 case 47:
8396 return "/";
8397 case 48:
8398 return "0";
8399 case 49:
8400 return "1";
8401 case 50:
8402 return "2";
8403 case 51:
8404 return "3";
8405 case 52:
8406 return "4";
8407 case 53:
8408 return "5";
8409 case 54:
8410 return "6";
8411 case 55:
8412 return "7";
8413 case 56:
8414 return "8";
8415 case 57:
8416 return "9";
8417 case 58:
8418 return ":";
8419 case 59:
8420 return ";";
8421 case 60:
8422 return "<";
8423 case 61:
8424 return "=";
8425 case 62:
8426 return ">";
8427 case 63:
8428 return "?";
8429 case 64:
8430 return "@";
8431 case 65:
8432 return "A";
8433 case 66:
8434 return "B";
8435 case 67:
8436 return "C";
8437 case 68:
8438 return "D";
8439 case 69:
8440 return "E";
8441 case 70:
8442 return "F";
8443 case 71:
8444 return "G";
8445 case 72:
8446 return "H";
8447 case 73:
8448 return "I";
8449 case 74:
8450 return "J";
8451 case 75:
8452 return "K";
8453 case 76:
8454 return "L";
8455 case 77:
8456 return "M";
8457 case 78:
8458 return "N";
8459 case 79:
8460 return "O";
8461 case 80:
8462 return "P";
8463 case 81:
8464 return "Q";
8465 case 82:
8466 return "R";
8467 case 83:
8468 return "S";
8469 case 84:
8470 return "T";
8471 case 85:
8472 return "U";
8473 case 86:
8474 return "V";
8475 case 87:
8476 return "W";
8477 case 88:
8478 return "X";
8479 case 89:
8480 return "Y";
8481 case 90:
8482 return "Z";
8483 case 91:
8484 return "[";
8485 case 92:
8486 return "\\\\";
8487 case 93:
8488 return "]";
8489 case 94:
8490 return "^";
8491 case 95:
8492 return "_";
8493 case 96:
8494 return "`";
8495 case 97:
8496 return "a";
8497 case 98:
8498 return "b";
8499 case 99:
8500 return "c";
8501 case 100:
8502 return "d";
8503 case 101:
8504 return "e";
8505 case 102:
8506 return "f";
8507 case 103:
8508 return "g";
8509 case 104:
8510 return "h";
8511 case 105:
8512 return "i";
8513 case 106:
8514 return "j";
8515 case 107:
8516 return "k";
8517 case 108:
8518 return "l";
8519 case 109:
8520 return "m";
8521 case 110:
8522 return "n";
8523 case 111:
8524 return "o";
8525 case 112:
8526 return "p";
8527 case 113:
8528 return "q";
8529 case 114:
8530 return "r";
8531 case 115:
8532 return "s";
8533 case 116:
8534 return "t";
8535 case 117:
8536 return "u";
8537 case 118:
8538 return "v";
8539 case 119:
8540 return "w";
8541 case 120:
8542 return "x";
8543 case 121:
8544 return "y";
8545 case 122:
8546 return "z";
8547 case 123:
8548 return "{";
8549 case 124:
8550 return "|";
8551 case 125:
8552 return "}";
8553 case 126:
8554 return "~";
8555 case 127:
8556 return "\\x7F";
8557 case 128:
8558 return "\\x80";
8559 case 129:
8560 return "\\x81";
8561 case 130:
8562 return "\\x82";
8563 case 131:
8564 return "\\x83";
8565 case 132:
8566 return "\\x84";
8567 case 133:
8568 return "\\x85";
8569 case 134:
8570 return "\\x86";
8571 case 135:
8572 return "\\x87";
8573 case 136:
8574 return "\\x88";
8575 case 137:
8576 return "\\x89";
8577 case 138:
8578 return "\\x8A";
8579 case 139:
8580 return "\\x8B";
8581 case 140:
8582 return "\\x8C";
8583 case 141:
8584 return "\\x8D";
8585 case 142:
8586 return "\\x8E";
8587 case 143:
8588 return "\\x8F";
8589 case 144:
8590 return "\\x90";
8591 case 145:
8592 return "\\x91";
8593 case 146:
8594 return "\\x92";
8595 case 147:
8596 return "\\x93";
8597 case 148:
8598 return "\\x94";
8599 case 149:
8600 return "\\x95";
8601 case 150:
8602 return "\\x96";
8603 case 151:
8604 return "\\x97";
8605 case 152:
8606 return "\\x98";
8607 case 153:
8608 return "\\x99";
8609 case 154:
8610 return "\\x9A";
8611 case 155:
8612 return "\\x9B";
8613 case 156:
8614 return "\\x9C";
8615 case 157:
8616 return "\\x9D";
8617 case 158:
8618 return "\\x9E";
8619 case 159:
8620 return "\\x9F";
8621 case 160:
8622 return "\\xA0";
8623 case 161:
8624 return "\\xA1";
8625 case 162:
8626 return "\\xA2";
8627 case 163:
8628 return "\\xA3";
8629 case 164:
8630 return "\\xA4";
8631 case 165:
8632 return "\\xA5";
8633 case 166:
8634 return "\\xA6";
8635 case 167:
8636 return "\\xA7";
8637 case 168:
8638 return "\\xA8";
8639 case 169:
8640 return "\\xA9";
8641 case 170:
8642 return "\\xAA";
8643 case 171:
8644 return "\\xAB";
8645 case 172:
8646 return "\\xAC";
8647 case 173:
8648 return "\\xAD";
8649 case 174:
8650 return "\\xAE";
8651 case 175:
8652 return "\\xAF";
8653 case 176:
8654 return "\\xB0";
8655 case 177:
8656 return "\\xB1";
8657 case 178:
8658 return "\\xB2";
8659 case 179:
8660 return "\\xB3";
8661 case 180:
8662 return "\\xB4";
8663 case 181:
8664 return "\\xB5";
8665 case 182:
8666 return "\\xB6";
8667 case 183:
8668 return "\\xB7";
8669 case 184:
8670 return "\\xB8";
8671 case 185:
8672 return "\\xB9";
8673 case 186:
8674 return "\\xBA";
8675 case 187:
8676 return "\\xBB";
8677 case 188:
8678 return "\\xBC";
8679 case 189:
8680 return "\\xBD";
8681 case 190:
8682 return "\\xBE";
8683 case 191:
8684 return "\\xBF";
8685 case 192:
8686 return "\\xC0";
8687 case 193:
8688 return "\\xC1";
8689 case 194:
8690 return "\\xC2";
8691 case 195:
8692 return "\\xC3";
8693 case 196:
8694 return "\\xC4";
8695 case 197:
8696 return "\\xC5";
8697 case 198:
8698 return "\\xC6";
8699 case 199:
8700 return "\\xC7";
8701 case 200:
8702 return "\\xC8";
8703 case 201:
8704 return "\\xC9";
8705 case 202:
8706 return "\\xCA";
8707 case 203:
8708 return "\\xCB";
8709 case 204:
8710 return "\\xCC";
8711 case 205:
8712 return "\\xCD";
8713 case 206:
8714 return "\\xCE";
8715 case 207:
8716 return "\\xCF";
8717 case 208:
8718 return "\\xD0";
8719 case 209:
8720 return "\\xD1";
8721 case 210:
8722 return "\\xD2";
8723 case 211:
8724 return "\\xD3";
8725 case 212:
8726 return "\\xD4";
8727 case 213:
8728 return "\\xD5";
8729 case 214:
8730 return "\\xD6";
8731 case 215:
8732 return "\\xD7";
8733 case 216:
8734 return "\\xD8";
8735 case 217:
8736 return "\\xD9";
8737 case 218:
8738 return "\\xDA";
8739 case 219:
8740 return "\\xDB";
8741 case 220:
8742 return "\\xDC";
8743 case 221:
8744 return "\\xDD";
8745 case 222:
8746 return "\\xDE";
8747 case 223:
8748 return "\\xDF";
8749 case 224:
8750 return "\\xE0";
8751 case 225:
8752 return "\\xE1";
8753 case 226:
8754 return "\\xE2";
8755 case 227:
8756 return "\\xE3";
8757 case 228:
8758 return "\\xE4";
8759 case 229:
8760 return "\\xE5";
8761 case 230:
8762 return "\\xE6";
8763 case 231:
8764 return "\\xE7";
8765 case 232:
8766 return "\\xE8";
8767 case 233:
8768 return "\\xE9";
8769 case 234:
8770 return "\\xEA";
8771 case 235:
8772 return "\\xEB";
8773 case 236:
8774 return "\\xEC";
8775 case 237:
8776 return "\\xED";
8777 case 238:
8778 return "\\xEE";
8779 case 239:
8780 return "\\xEF";
8781 case 240:
8782 return "\\xF0";
8783 case 241:
8784 return "\\xF1";
8785 case 242:
8786 return "\\xF2";
8787 case 243:
8788 return "\\xF3";
8789 case 244:
8790 return "\\xF4";
8791 case 245:
8792 return "\\xF5";
8793 case 246:
8794 return "\\xF6";
8795 case 247:
8796 return "\\xF7";
8797 case 248:
8798 return "\\xF8";
8799 case 249:
8800 return "\\xF9";
8801 case 250:
8802 return "\\xFA";
8803 case 251:
8804 return "\\xFB";
8805 case 252:
8806 return "\\xFC";
8807 case 253:
8808 return "\\xFD";
8809 case 254:
8810 return "\\xFE";
8811 case 255:
8812 return "\\xFF";
8813 // LCOV_EXCL_START
8814 default:
8815 assert(0); /* never gets here */
8816 return "dead code";
8817 }
8818 assert(0); /* never gets here */
8819 // LCOV_EXCL_STOP
8820 }
8821
8822 #endif /* XML_GE == 1 */
8823
8824 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8825 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8826 const char *const valueOrNull = getenv(variableName);
8827 if (valueOrNull == NULL) {
8828 return defaultDebugLevel;
8829 }
8830 const char *const value = valueOrNull;
8831
8832 errno = 0;
8833 char *afterValue = NULL;
8834 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8835 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8836 errno = 0;
8837 return defaultDebugLevel;
8838 }
8839
8840 return debugLevel;
8841 }
8842