1 /* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind@bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
42 Copyright (c) 2024 Berkay Eren Ürün <berkay.ueruen@siemens.com>
43 Licensed under the MIT license:
44
45 Permission is hereby granted, free of charge, to any person obtaining
46 a copy of this software and associated documentation files (the
47 "Software"), to deal in the Software without restriction, including
48 without limitation the rights to use, copy, modify, merge, publish,
49 distribute, sublicense, and/or sell copies of the Software, and to permit
50 persons to whom the Software is furnished to do so, subject to the
51 following conditions:
52
53 The above copyright notice and this permission notice shall be included
54 in all copies or substantial portions of the Software.
55
56 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
57 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
58 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
59 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
60 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
61 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
62 USE OR OTHER DEALINGS IN THE SOFTWARE.
63 */
64
65 #define XML_BUILDING_EXPAT 1
66
67 #include "expat_config.h"
68
69 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
70 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
71 #endif
72
73 #if defined(XML_DTD) && XML_GE == 0
74 # error Either undefine XML_DTD or define XML_GE to 1.
75 #endif
76
77 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
78 || (XML_CONTEXT_BYTES + 0 < 0)
79 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
80 #endif
81
82 #if defined(HAVE_SYSCALL_GETRANDOM)
83 # if ! defined(_GNU_SOURCE)
84 # define _GNU_SOURCE 1 /* syscall prototype */
85 # endif
86 #endif
87
88 #ifdef _WIN32
89 /* force stdlib to define rand_s() */
90 # if ! defined(_CRT_RAND_S)
91 # define _CRT_RAND_S
92 # endif
93 #endif
94
95 #include <stdbool.h>
96 #include <stddef.h>
97 #include <string.h> /* memset(), memcpy() */
98 #include <assert.h>
99 #include <limits.h> /* UINT_MAX */
100 #include <stdio.h> /* fprintf */
101 #include <stdlib.h> /* getenv, rand_s */
102 #include <stdint.h> /* uintptr_t */
103 #include <math.h> /* isnan */
104
105 #ifdef _WIN32
106 # define getpid GetCurrentProcessId
107 #else
108 # include <sys/time.h> /* gettimeofday() */
109 # include <sys/types.h> /* getpid() */
110 # include <unistd.h> /* getpid() */
111 # include <fcntl.h> /* O_RDONLY */
112 # include <errno.h>
113 #endif
114
115 #ifdef _WIN32
116 # include "winconfig.h"
117 #endif
118
119 #include "ascii.h"
120 #include "expat.h"
121 #include "siphash.h"
122
123 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
124 # if defined(HAVE_GETRANDOM)
125 # include <sys/random.h> /* getrandom */
126 # else
127 # include <unistd.h> /* syscall */
128 # include <sys/syscall.h> /* SYS_getrandom */
129 # endif
130 # if ! defined(GRND_NONBLOCK)
131 # define GRND_NONBLOCK 0x0001
132 # endif /* defined(GRND_NONBLOCK) */
133 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
134
135 #if defined(HAVE_LIBBSD) \
136 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
137 # include <bsd/stdlib.h>
138 #endif
139
140 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
141 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
142 #endif
143
144 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
145 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
146 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
147 && ! defined(XML_POOR_ENTROPY)
148 # error You do not have support for any sources of high quality entropy \
149 enabled. For end user security, that is probably not what you want. \
150 \
151 Your options include: \
152 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
153 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
154 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
155 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
156 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
157 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
158 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
159 * Windows >=Vista (rand_s): _WIN32. \
160 \
161 If insist on not using any of these, bypass this error by defining \
162 XML_POOR_ENTROPY; you have been warned. \
163 \
164 If you have reasons to patch this detection code away or need changes \
165 to the build system, please open a bug. Thank you!
166 #endif
167
168 #ifdef XML_UNICODE
169 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
170 # define XmlConvert XmlUtf16Convert
171 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
172 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
173 # define XmlEncode XmlUtf16Encode
174 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
175 typedef unsigned short ICHAR;
176 #else
177 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
178 # define XmlConvert XmlUtf8Convert
179 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
180 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
181 # define XmlEncode XmlUtf8Encode
182 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
183 typedef char ICHAR;
184 #endif
185
186 #ifndef XML_NS
187
188 # define XmlInitEncodingNS XmlInitEncoding
189 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
190 # undef XmlGetInternalEncodingNS
191 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
192 # define XmlParseXmlDeclNS XmlParseXmlDecl
193
194 #endif
195
196 #ifdef XML_UNICODE
197
198 # ifdef XML_UNICODE_WCHAR_T
199 # define XML_T(x) (const wchar_t) x
200 # define XML_L(x) L##x
201 # else
202 # define XML_T(x) (const unsigned short)x
203 # define XML_L(x) x
204 # endif
205
206 #else
207
208 # define XML_T(x) x
209 # define XML_L(x) x
210
211 #endif
212
213 /* Round up n to be a multiple of sz, where sz is a power of 2. */
214 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
215
216 /* Do safe (NULL-aware) pointer arithmetic */
217 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
218
219 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
220
221 #include "internal.h"
222 #include "xmltok.h"
223 #include "xmlrole.h"
224
225 typedef const XML_Char *KEY;
226
227 typedef struct {
228 KEY name;
229 } NAMED;
230
231 typedef struct {
232 NAMED **v;
233 unsigned char power;
234 size_t size;
235 size_t used;
236 const XML_Memory_Handling_Suite *mem;
237 } HASH_TABLE;
238
239 static size_t keylen(KEY s);
240
241 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
242
243 /* For probing (after a collision) we need a step size relative prime
244 to the hash table size, which is a power of 2. We use double-hashing,
245 since we can calculate a second hash value cheaply by taking those bits
246 of the first hash value that were discarded (masked out) when the table
247 index was calculated: index = hash & mask, where mask = table->size - 1.
248 We limit the maximum step size to table->size / 4 (mask >> 2) and make
249 it odd, since odd numbers are always relative prime to a power of 2.
250 */
251 #define SECOND_HASH(hash, mask, power) \
252 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
253 #define PROBE_STEP(hash, mask, power) \
254 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
255
256 typedef struct {
257 NAMED **p;
258 NAMED **end;
259 } HASH_TABLE_ITER;
260
261 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
262 #define INIT_DATA_BUF_SIZE 1024
263 #define INIT_ATTS_SIZE 16
264 #define INIT_ATTS_VERSION 0xFFFFFFFF
265 #define INIT_BLOCK_SIZE 1024
266 #define INIT_BUFFER_SIZE 1024
267
268 #define EXPAND_SPARE 24
269
270 typedef struct binding {
271 struct prefix *prefix;
272 struct binding *nextTagBinding;
273 struct binding *prevPrefixBinding;
274 const struct attribute_id *attId;
275 XML_Char *uri;
276 int uriLen;
277 int uriAlloc;
278 } BINDING;
279
280 typedef struct prefix {
281 const XML_Char *name;
282 BINDING *binding;
283 } PREFIX;
284
285 typedef struct {
286 const XML_Char *str;
287 const XML_Char *localPart;
288 const XML_Char *prefix;
289 int strLen;
290 int uriLen;
291 int prefixLen;
292 } TAG_NAME;
293
294 /* TAG represents an open element.
295 The name of the element is stored in both the document and API
296 encodings. The memory buffer 'buf' is a separately-allocated
297 memory area which stores the name. During the XML_Parse()/
298 XML_ParseBuffer() when the element is open, the memory for the 'raw'
299 version of the name (in the document encoding) is shared with the
300 document buffer. If the element is open across calls to
301 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
302 contain the 'raw' name as well.
303
304 A parser reuses these structures, maintaining a list of allocated
305 TAG objects in a free list.
306 */
307 typedef struct tag {
308 struct tag *parent; /* parent of this element */
309 const char *rawName; /* tagName in the original encoding */
310 int rawNameLength;
311 TAG_NAME name; /* tagName in the API encoding */
312 char *buf; /* buffer for name components */
313 char *bufEnd; /* end of the buffer */
314 BINDING *bindings;
315 } TAG;
316
317 typedef struct {
318 const XML_Char *name;
319 const XML_Char *textPtr;
320 int textLen; /* length in XML_Chars */
321 int processed; /* # of processed bytes - when suspended */
322 const XML_Char *systemId;
323 const XML_Char *base;
324 const XML_Char *publicId;
325 const XML_Char *notation;
326 XML_Bool open;
327 XML_Bool is_param;
328 XML_Bool is_internal; /* true if declared in internal subset outside PE */
329 } ENTITY;
330
331 typedef struct {
332 enum XML_Content_Type type;
333 enum XML_Content_Quant quant;
334 const XML_Char *name;
335 int firstchild;
336 int lastchild;
337 int childcnt;
338 int nextsib;
339 } CONTENT_SCAFFOLD;
340
341 #define INIT_SCAFFOLD_ELEMENTS 32
342
343 typedef struct block {
344 struct block *next;
345 int size;
346 XML_Char s[1];
347 } BLOCK;
348
349 typedef struct {
350 BLOCK *blocks;
351 BLOCK *freeBlocks;
352 const XML_Char *end;
353 XML_Char *ptr;
354 XML_Char *start;
355 const XML_Memory_Handling_Suite *mem;
356 } STRING_POOL;
357
358 /* The XML_Char before the name is used to determine whether
359 an attribute has been specified. */
360 typedef struct attribute_id {
361 XML_Char *name;
362 PREFIX *prefix;
363 XML_Bool maybeTokenized;
364 XML_Bool xmlns;
365 } ATTRIBUTE_ID;
366
367 typedef struct {
368 const ATTRIBUTE_ID *id;
369 XML_Bool isCdata;
370 const XML_Char *value;
371 } DEFAULT_ATTRIBUTE;
372
373 typedef struct {
374 unsigned long version;
375 unsigned long hash;
376 const XML_Char *uriName;
377 } NS_ATT;
378
379 typedef struct {
380 const XML_Char *name;
381 PREFIX *prefix;
382 const ATTRIBUTE_ID *idAtt;
383 int nDefaultAtts;
384 int allocDefaultAtts;
385 DEFAULT_ATTRIBUTE *defaultAtts;
386 } ELEMENT_TYPE;
387
388 typedef struct {
389 HASH_TABLE generalEntities;
390 HASH_TABLE elementTypes;
391 HASH_TABLE attributeIds;
392 HASH_TABLE prefixes;
393 STRING_POOL pool;
394 STRING_POOL entityValuePool;
395 /* false once a parameter entity reference has been skipped */
396 XML_Bool keepProcessing;
397 /* true once an internal or external PE reference has been encountered;
398 this includes the reference to an external subset */
399 XML_Bool hasParamEntityRefs;
400 XML_Bool standalone;
401 #ifdef XML_DTD
402 /* indicates if external PE has been read */
403 XML_Bool paramEntityRead;
404 HASH_TABLE paramEntities;
405 #endif /* XML_DTD */
406 PREFIX defaultPrefix;
407 /* === scaffolding for building content model === */
408 XML_Bool in_eldecl;
409 CONTENT_SCAFFOLD *scaffold;
410 unsigned contentStringLen;
411 unsigned scaffSize;
412 unsigned scaffCount;
413 int scaffLevel;
414 int *scaffIndex;
415 } DTD;
416
417 typedef struct open_internal_entity {
418 const char *internalEventPtr;
419 const char *internalEventEndPtr;
420 struct open_internal_entity *next;
421 ENTITY *entity;
422 int startTagLevel;
423 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
424 } OPEN_INTERNAL_ENTITY;
425
426 enum XML_Account {
427 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
428 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
429 expansion */
430 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
431 };
432
433 #if XML_GE == 1
434 typedef unsigned long long XmlBigCount;
435 typedef struct accounting {
436 XmlBigCount countBytesDirect;
437 XmlBigCount countBytesIndirect;
438 unsigned long debugLevel;
439 float maximumAmplificationFactor; // >=1.0
440 unsigned long long activationThresholdBytes;
441 } ACCOUNTING;
442
443 typedef struct entity_stats {
444 unsigned int countEverOpened;
445 unsigned int currentDepth;
446 unsigned int maximumDepthSeen;
447 unsigned long debugLevel;
448 } ENTITY_STATS;
449 #endif /* XML_GE == 1 */
450
451 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
452 const char *end, const char **endPtr);
453
454 static Processor prologProcessor;
455 static Processor prologInitProcessor;
456 static Processor contentProcessor;
457 static Processor cdataSectionProcessor;
458 #ifdef XML_DTD
459 static Processor ignoreSectionProcessor;
460 static Processor externalParEntProcessor;
461 static Processor externalParEntInitProcessor;
462 static Processor entityValueProcessor;
463 static Processor entityValueInitProcessor;
464 #endif /* XML_DTD */
465 static Processor epilogProcessor;
466 static Processor errorProcessor;
467 static Processor externalEntityInitProcessor;
468 static Processor externalEntityInitProcessor2;
469 static Processor externalEntityInitProcessor3;
470 static Processor externalEntityContentProcessor;
471 static Processor internalEntityProcessor;
472
473 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
474 const XML_Char *encodingName);
475 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
476 const char *s, const char *next);
477 static enum XML_Error initializeEncoding(XML_Parser parser);
478 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
479 const char *s, const char *end, int tok,
480 const char *next, const char **nextPtr,
481 XML_Bool haveMore, XML_Bool allowClosingDoctype,
482 enum XML_Account account);
483 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
484 XML_Bool betweenDecl);
485 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
486 const ENCODING *enc, const char *start,
487 const char *end, const char **endPtr,
488 XML_Bool haveMore, enum XML_Account account);
489 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
490 const char **startPtr, const char *end,
491 const char **nextPtr, XML_Bool haveMore,
492 enum XML_Account account);
493 #ifdef XML_DTD
494 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
495 const char **startPtr, const char *end,
496 const char **nextPtr, XML_Bool haveMore);
497 #endif /* XML_DTD */
498
499 static void freeBindings(XML_Parser parser, BINDING *bindings);
500 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
501 const char *attStr, TAG_NAME *tagNamePtr,
502 BINDING **bindingsPtr,
503 enum XML_Account account);
504 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
505 const ATTRIBUTE_ID *attId, const XML_Char *uri,
506 BINDING **bindingsPtr);
507 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
508 XML_Bool isCdata, XML_Bool isId,
509 const XML_Char *value, XML_Parser parser);
510 static enum XML_Error storeAttributeValue(XML_Parser parser,
511 const ENCODING *enc, XML_Bool isCdata,
512 const char *ptr, const char *end,
513 STRING_POOL *pool,
514 enum XML_Account account);
515 static enum XML_Error appendAttributeValue(XML_Parser parser,
516 const ENCODING *enc,
517 XML_Bool isCdata, const char *ptr,
518 const char *end, STRING_POOL *pool,
519 enum XML_Account account);
520 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
521 const char *start, const char *end);
522 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
523 #if XML_GE == 1
524 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
525 const char *start, const char *end,
526 enum XML_Account account);
527 #else
528 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
529 #endif
530 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
531 const char *start, const char *end);
532 static int reportComment(XML_Parser parser, const ENCODING *enc,
533 const char *start, const char *end);
534 static void reportDefault(XML_Parser parser, const ENCODING *enc,
535 const char *start, const char *end);
536
537 static const XML_Char *getContext(XML_Parser parser);
538 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
539
540 static void FASTCALL normalizePublicId(XML_Char *s);
541
542 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
543 /* do not call if m_parentParser != NULL */
544 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
545 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
546 const XML_Memory_Handling_Suite *ms);
547 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
548 const XML_Memory_Handling_Suite *ms);
549 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
550 STRING_POOL *newPool, const HASH_TABLE *oldTable);
551 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
552 size_t createSize);
553 static void FASTCALL hashTableInit(HASH_TABLE *table,
554 const XML_Memory_Handling_Suite *ms);
555 static void FASTCALL hashTableClear(HASH_TABLE *table);
556 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
557 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
558 const HASH_TABLE *table);
559 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
560
561 static void FASTCALL poolInit(STRING_POOL *pool,
562 const XML_Memory_Handling_Suite *ms);
563 static void FASTCALL poolClear(STRING_POOL *pool);
564 static void FASTCALL poolDestroy(STRING_POOL *pool);
565 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
566 const char *ptr, const char *end);
567 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
568 const char *ptr, const char *end);
569 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
570 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
571 const XML_Char *s);
572 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
573 int n);
574 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
575 const XML_Char *s);
576
577 static int FASTCALL nextScaffoldPart(XML_Parser parser);
578 static XML_Content *build_model(XML_Parser parser);
579 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
580 const char *ptr, const char *end);
581
582 static XML_Char *copyString(const XML_Char *s,
583 const XML_Memory_Handling_Suite *memsuite);
584
585 static unsigned long generate_hash_secret_salt(XML_Parser parser);
586 static XML_Bool startParsing(XML_Parser parser);
587
588 static XML_Parser parserCreate(const XML_Char *encodingName,
589 const XML_Memory_Handling_Suite *memsuite,
590 const XML_Char *nameSep, DTD *dtd);
591
592 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
593
594 #if XML_GE == 1
595 static float accountingGetCurrentAmplification(XML_Parser rootParser);
596 static void accountingReportStats(XML_Parser originParser, const char *epilog);
597 static void accountingOnAbort(XML_Parser originParser);
598 static void accountingReportDiff(XML_Parser rootParser,
599 unsigned int levelsAwayFromRootParser,
600 const char *before, const char *after,
601 ptrdiff_t bytesMore, int source_line,
602 enum XML_Account account);
603 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
604 const char *before, const char *after,
605 int source_line,
606 enum XML_Account account);
607
608 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
609 const char *action, int sourceLine);
610 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
611 int sourceLine);
612 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
613 int sourceLine);
614
615 static XML_Parser getRootParserOf(XML_Parser parser,
616 unsigned int *outLevelDiff);
617 #endif /* XML_GE == 1 */
618
619 static unsigned long getDebugLevel(const char *variableName,
620 unsigned long defaultDebugLevel);
621
622 #define poolStart(pool) ((pool)->start)
623 #define poolLength(pool) ((pool)->ptr - (pool)->start)
624 #define poolChop(pool) ((void)--(pool->ptr))
625 #define poolLastChar(pool) (((pool)->ptr)[-1])
626 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
627 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
628 #define poolAppendChar(pool, c) \
629 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
630 ? 0 \
631 : ((*((pool)->ptr)++ = c), 1))
632
633 #if ! defined(XML_TESTING)
634 const
635 #endif
636 XML_Bool g_reparseDeferralEnabledDefault
637 = XML_TRUE; // write ONLY in runtests.c
638 #if defined(XML_TESTING)
639 unsigned int g_bytesScanned = 0; // used for testing only
640 #endif
641
642 struct XML_ParserStruct {
643 /* The first member must be m_userData so that the XML_GetUserData
644 macro works. */
645 void *m_userData;
646 void *m_handlerArg;
647
648 // How the four parse buffer pointers below relate in time and space:
649 //
650 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
651 // | | | |
652 // <--parsed-->| | |
653 // <---parsing--->| |
654 // <--unoccupied-->|
655 // <---------total-malloced/realloced-------->|
656
657 char *m_buffer; // malloc/realloc base pointer of parse buffer
658 const XML_Memory_Handling_Suite m_mem;
659 const char *m_bufferPtr; // first character to be parsed
660 char *m_bufferEnd; // past last character to be parsed
661 const char *m_bufferLim; // allocated end of m_buffer
662
663 XML_Index m_parseEndByteIndex;
664 const char *m_parseEndPtr;
665 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
666 XML_Bool m_reparseDeferralEnabled;
667 int m_lastBufferRequestSize;
668 XML_Char *m_dataBuf;
669 XML_Char *m_dataBufEnd;
670 XML_StartElementHandler m_startElementHandler;
671 XML_EndElementHandler m_endElementHandler;
672 XML_CharacterDataHandler m_characterDataHandler;
673 XML_ProcessingInstructionHandler m_processingInstructionHandler;
674 XML_CommentHandler m_commentHandler;
675 XML_StartCdataSectionHandler m_startCdataSectionHandler;
676 XML_EndCdataSectionHandler m_endCdataSectionHandler;
677 XML_DefaultHandler m_defaultHandler;
678 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
679 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
680 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
681 XML_NotationDeclHandler m_notationDeclHandler;
682 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
683 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
684 XML_NotStandaloneHandler m_notStandaloneHandler;
685 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
686 XML_Parser m_externalEntityRefHandlerArg;
687 XML_SkippedEntityHandler m_skippedEntityHandler;
688 XML_UnknownEncodingHandler m_unknownEncodingHandler;
689 XML_ElementDeclHandler m_elementDeclHandler;
690 XML_AttlistDeclHandler m_attlistDeclHandler;
691 XML_EntityDeclHandler m_entityDeclHandler;
692 XML_XmlDeclHandler m_xmlDeclHandler;
693 const ENCODING *m_encoding;
694 INIT_ENCODING m_initEncoding;
695 const ENCODING *m_internalEncoding;
696 const XML_Char *m_protocolEncodingName;
697 XML_Bool m_ns;
698 XML_Bool m_ns_triplets;
699 void *m_unknownEncodingMem;
700 void *m_unknownEncodingData;
701 void *m_unknownEncodingHandlerData;
702 void(XMLCALL *m_unknownEncodingRelease)(void *);
703 PROLOG_STATE m_prologState;
704 Processor *m_processor;
705 enum XML_Error m_errorCode;
706 const char *m_eventPtr;
707 const char *m_eventEndPtr;
708 const char *m_positionPtr;
709 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
710 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
711 XML_Bool m_defaultExpandInternalEntities;
712 int m_tagLevel;
713 ENTITY *m_declEntity;
714 const XML_Char *m_doctypeName;
715 const XML_Char *m_doctypeSysid;
716 const XML_Char *m_doctypePubid;
717 const XML_Char *m_declAttributeType;
718 const XML_Char *m_declNotationName;
719 const XML_Char *m_declNotationPublicId;
720 ELEMENT_TYPE *m_declElementType;
721 ATTRIBUTE_ID *m_declAttributeId;
722 XML_Bool m_declAttributeIsCdata;
723 XML_Bool m_declAttributeIsId;
724 DTD *m_dtd;
725 const XML_Char *m_curBase;
726 TAG *m_tagStack;
727 TAG *m_freeTagList;
728 BINDING *m_inheritedBindings;
729 BINDING *m_freeBindingList;
730 int m_attsSize;
731 int m_nSpecifiedAtts;
732 int m_idAttIndex;
733 ATTRIBUTE *m_atts;
734 NS_ATT *m_nsAtts;
735 unsigned long m_nsAttsVersion;
736 unsigned char m_nsAttsPower;
737 #ifdef XML_ATTR_INFO
738 XML_AttrInfo *m_attInfo;
739 #endif
740 POSITION m_position;
741 STRING_POOL m_tempPool;
742 STRING_POOL m_temp2Pool;
743 char *m_groupConnector;
744 unsigned int m_groupSize;
745 XML_Char m_namespaceSeparator;
746 XML_Parser m_parentParser;
747 XML_ParsingStatus m_parsingStatus;
748 #ifdef XML_DTD
749 XML_Bool m_isParamEntity;
750 XML_Bool m_useForeignDTD;
751 enum XML_ParamEntityParsing m_paramEntityParsing;
752 #endif
753 unsigned long m_hash_secret_salt;
754 #if XML_GE == 1
755 ACCOUNTING m_accounting;
756 ENTITY_STATS m_entity_stats;
757 #endif
758 };
759
760 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
761 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
762 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
763
764 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)765 XML_ParserCreate(const XML_Char *encodingName) {
766 return XML_ParserCreate_MM(encodingName, NULL, NULL);
767 }
768
769 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)770 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
771 XML_Char tmp[2] = {nsSep, 0};
772 return XML_ParserCreate_MM(encodingName, NULL, tmp);
773 }
774
775 // "xml=http://www.w3.org/XML/1998/namespace"
776 static const XML_Char implicitContext[]
777 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
778 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
779 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
780 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
781 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
782 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
783 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
784 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
785 '\0'};
786
787 /* To avoid warnings about unused functions: */
788 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
789
790 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
791
792 /* Obtain entropy on Linux 3.17+ */
793 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)794 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
795 int success = 0; /* full count bytes written? */
796 size_t bytesWrittenTotal = 0;
797 const unsigned int getrandomFlags = GRND_NONBLOCK;
798
799 do {
800 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
801 const size_t bytesToWrite = count - bytesWrittenTotal;
802
803 const int bytesWrittenMore =
804 # if defined(HAVE_GETRANDOM)
805 getrandom(currentTarget, bytesToWrite, getrandomFlags);
806 # else
807 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
808 # endif
809
810 if (bytesWrittenMore > 0) {
811 bytesWrittenTotal += bytesWrittenMore;
812 if (bytesWrittenTotal >= count)
813 success = 1;
814 }
815 } while (! success && (errno == EINTR));
816
817 return success;
818 }
819
820 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
821
822 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
823
824 /* Extract entropy from /dev/urandom */
825 static int
writeRandomBytes_dev_urandom(void * target,size_t count)826 writeRandomBytes_dev_urandom(void *target, size_t count) {
827 int success = 0; /* full count bytes written? */
828 size_t bytesWrittenTotal = 0;
829
830 const int fd = open("/dev/urandom", O_RDONLY);
831 if (fd < 0) {
832 return 0;
833 }
834
835 do {
836 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
837 const size_t bytesToWrite = count - bytesWrittenTotal;
838
839 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
840
841 if (bytesWrittenMore > 0) {
842 bytesWrittenTotal += bytesWrittenMore;
843 if (bytesWrittenTotal >= count)
844 success = 1;
845 }
846 } while (! success && (errno == EINTR));
847
848 close(fd);
849 return success;
850 }
851
852 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
853
854 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
855
856 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
857
858 static void
writeRandomBytes_arc4random(void * target,size_t count)859 writeRandomBytes_arc4random(void *target, size_t count) {
860 size_t bytesWrittenTotal = 0;
861
862 while (bytesWrittenTotal < count) {
863 const uint32_t random32 = arc4random();
864 size_t i = 0;
865
866 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
867 i++, bytesWrittenTotal++) {
868 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
869 ((uint8_t *)target)[bytesWrittenTotal] = random8;
870 }
871 }
872 }
873
874 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
875
876 #ifdef _WIN32
877
878 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
879 as it didn't declare it in its header prior to version 5.3.0 of its
880 runtime package (mingwrt, containing stdlib.h). The upstream fix
881 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
882 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
883 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
884 __declspec(dllimport) int rand_s(unsigned int *);
885 # endif
886
887 /* Obtain entropy on Windows using the rand_s() function which
888 * generates cryptographically secure random numbers. Internally it
889 * uses RtlGenRandom API which is present in Windows XP and later.
890 */
891 static int
writeRandomBytes_rand_s(void * target,size_t count)892 writeRandomBytes_rand_s(void *target, size_t count) {
893 size_t bytesWrittenTotal = 0;
894
895 while (bytesWrittenTotal < count) {
896 unsigned int random32 = 0;
897 size_t i = 0;
898
899 if (rand_s(&random32))
900 return 0; /* failure */
901
902 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
903 i++, bytesWrittenTotal++) {
904 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
905 ((uint8_t *)target)[bytesWrittenTotal] = random8;
906 }
907 }
908 return 1; /* success */
909 }
910
911 #endif /* _WIN32 */
912
913 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
914
915 static unsigned long
gather_time_entropy(void)916 gather_time_entropy(void) {
917 # ifdef _WIN32
918 FILETIME ft;
919 GetSystemTimeAsFileTime(&ft); /* never fails */
920 return ft.dwHighDateTime ^ ft.dwLowDateTime;
921 # else
922 struct timeval tv;
923 int gettimeofday_res;
924
925 gettimeofday_res = gettimeofday(&tv, NULL);
926
927 # if defined(NDEBUG)
928 (void)gettimeofday_res;
929 # else
930 assert(gettimeofday_res == 0);
931 # endif /* defined(NDEBUG) */
932
933 /* Microseconds time is <20 bits entropy */
934 return tv.tv_usec;
935 # endif
936 }
937
938 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
939
940 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)941 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
942 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
943 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
944 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
945 }
946 return entropy;
947 }
948
949 static unsigned long
generate_hash_secret_salt(XML_Parser parser)950 generate_hash_secret_salt(XML_Parser parser) {
951 unsigned long entropy;
952 (void)parser;
953
954 /* "Failproof" high quality providers: */
955 #if defined(HAVE_ARC4RANDOM_BUF)
956 arc4random_buf(&entropy, sizeof(entropy));
957 return ENTROPY_DEBUG("arc4random_buf", entropy);
958 #elif defined(HAVE_ARC4RANDOM)
959 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
960 return ENTROPY_DEBUG("arc4random", entropy);
961 #else
962 /* Try high quality providers first .. */
963 # ifdef _WIN32
964 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
965 return ENTROPY_DEBUG("rand_s", entropy);
966 }
967 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
968 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
969 return ENTROPY_DEBUG("getrandom", entropy);
970 }
971 # endif
972 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
973 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
974 return ENTROPY_DEBUG("/dev/urandom", entropy);
975 }
976 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
977 /* .. and self-made low quality for backup: */
978
979 /* Process ID is 0 bits entropy if attacker has local access */
980 entropy = gather_time_entropy() ^ getpid();
981
982 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
983 if (sizeof(unsigned long) == 4) {
984 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
985 } else {
986 return ENTROPY_DEBUG("fallback(8)",
987 entropy * (unsigned long)2305843009213693951ULL);
988 }
989 #endif
990 }
991
992 static unsigned long
get_hash_secret_salt(XML_Parser parser)993 get_hash_secret_salt(XML_Parser parser) {
994 if (parser->m_parentParser != NULL)
995 return get_hash_secret_salt(parser->m_parentParser);
996 return parser->m_hash_secret_salt;
997 }
998
999 static enum XML_Error
callProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)1000 callProcessor(XML_Parser parser, const char *start, const char *end,
1001 const char **endPtr) {
1002 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1003
1004 if (parser->m_reparseDeferralEnabled
1005 && ! parser->m_parsingStatus.finalBuffer) {
1006 // Heuristic: don't try to parse a partial token again until the amount of
1007 // available data has increased significantly.
1008 const size_t had_before = parser->m_partialTokenBytesBefore;
1009 // ...but *do* try anyway if we're close to causing a reallocation.
1010 size_t available_buffer
1011 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1012 #if XML_CONTEXT_BYTES > 0
1013 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1014 #endif
1015 available_buffer
1016 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1017 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1018 const bool enough
1019 = (have_now >= 2 * had_before)
1020 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1021
1022 if (! enough) {
1023 *endPtr = start; // callers may expect this to be set
1024 return XML_ERROR_NONE;
1025 }
1026 }
1027 #if defined(XML_TESTING)
1028 g_bytesScanned += (unsigned)have_now;
1029 #endif
1030 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
1031 if (ret == XML_ERROR_NONE) {
1032 // if we consumed nothing, remember what we had on this parse attempt.
1033 if (*endPtr == start) {
1034 parser->m_partialTokenBytesBefore = have_now;
1035 } else {
1036 parser->m_partialTokenBytesBefore = 0;
1037 }
1038 }
1039 return ret;
1040 }
1041
1042 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)1043 startParsing(XML_Parser parser) {
1044 /* hash functions must be initialized before setContext() is called */
1045 if (parser->m_hash_secret_salt == 0)
1046 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1047 if (parser->m_ns) {
1048 /* implicit context only set for root parser, since child
1049 parsers (i.e. external entity parsers) will inherit it
1050 */
1051 return setContext(parser, implicitContext);
1052 }
1053 return XML_TRUE;
1054 }
1055
1056 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)1057 XML_ParserCreate_MM(const XML_Char *encodingName,
1058 const XML_Memory_Handling_Suite *memsuite,
1059 const XML_Char *nameSep) {
1060 return parserCreate(encodingName, memsuite, nameSep, NULL);
1061 }
1062
1063 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)1064 parserCreate(const XML_Char *encodingName,
1065 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1066 DTD *dtd) {
1067 XML_Parser parser;
1068
1069 if (memsuite) {
1070 XML_Memory_Handling_Suite *mtemp;
1071 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1072 if (parser != NULL) {
1073 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1074 mtemp->malloc_fcn = memsuite->malloc_fcn;
1075 mtemp->realloc_fcn = memsuite->realloc_fcn;
1076 mtemp->free_fcn = memsuite->free_fcn;
1077 }
1078 } else {
1079 XML_Memory_Handling_Suite *mtemp;
1080 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1081 if (parser != NULL) {
1082 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1083 mtemp->malloc_fcn = malloc;
1084 mtemp->realloc_fcn = realloc;
1085 mtemp->free_fcn = free;
1086 }
1087 }
1088
1089 if (! parser)
1090 return parser;
1091
1092 parser->m_buffer = NULL;
1093 parser->m_bufferLim = NULL;
1094
1095 parser->m_attsSize = INIT_ATTS_SIZE;
1096 parser->m_atts
1097 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1098 if (parser->m_atts == NULL) {
1099 FREE(parser, parser);
1100 return NULL;
1101 }
1102 #ifdef XML_ATTR_INFO
1103 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1104 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1105 if (parser->m_attInfo == NULL) {
1106 FREE(parser, parser->m_atts);
1107 FREE(parser, parser);
1108 return NULL;
1109 }
1110 #endif
1111 parser->m_dataBuf
1112 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1113 if (parser->m_dataBuf == NULL) {
1114 FREE(parser, parser->m_atts);
1115 #ifdef XML_ATTR_INFO
1116 FREE(parser, parser->m_attInfo);
1117 #endif
1118 FREE(parser, parser);
1119 return NULL;
1120 }
1121 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1122
1123 if (dtd)
1124 parser->m_dtd = dtd;
1125 else {
1126 parser->m_dtd = dtdCreate(&parser->m_mem);
1127 if (parser->m_dtd == NULL) {
1128 FREE(parser, parser->m_dataBuf);
1129 FREE(parser, parser->m_atts);
1130 #ifdef XML_ATTR_INFO
1131 FREE(parser, parser->m_attInfo);
1132 #endif
1133 FREE(parser, parser);
1134 return NULL;
1135 }
1136 }
1137
1138 parser->m_freeBindingList = NULL;
1139 parser->m_freeTagList = NULL;
1140 parser->m_freeInternalEntities = NULL;
1141
1142 parser->m_groupSize = 0;
1143 parser->m_groupConnector = NULL;
1144
1145 parser->m_unknownEncodingHandler = NULL;
1146 parser->m_unknownEncodingHandlerData = NULL;
1147
1148 parser->m_namespaceSeparator = ASCII_EXCL;
1149 parser->m_ns = XML_FALSE;
1150 parser->m_ns_triplets = XML_FALSE;
1151
1152 parser->m_nsAtts = NULL;
1153 parser->m_nsAttsVersion = 0;
1154 parser->m_nsAttsPower = 0;
1155
1156 parser->m_protocolEncodingName = NULL;
1157
1158 poolInit(&parser->m_tempPool, &(parser->m_mem));
1159 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1160 parserInit(parser, encodingName);
1161
1162 if (encodingName && ! parser->m_protocolEncodingName) {
1163 if (dtd) {
1164 // We need to stop the upcoming call to XML_ParserFree from happily
1165 // destroying parser->m_dtd because the DTD is shared with the parent
1166 // parser and the only guard that keeps XML_ParserFree from destroying
1167 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1168 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1169 parser->m_dtd = NULL;
1170 }
1171 XML_ParserFree(parser);
1172 return NULL;
1173 }
1174
1175 if (nameSep) {
1176 parser->m_ns = XML_TRUE;
1177 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1178 parser->m_namespaceSeparator = *nameSep;
1179 } else {
1180 parser->m_internalEncoding = XmlGetInternalEncoding();
1181 }
1182
1183 return parser;
1184 }
1185
1186 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1187 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1188 parser->m_processor = prologInitProcessor;
1189 XmlPrologStateInit(&parser->m_prologState);
1190 if (encodingName != NULL) {
1191 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1192 }
1193 parser->m_curBase = NULL;
1194 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1195 parser->m_userData = NULL;
1196 parser->m_handlerArg = NULL;
1197 parser->m_startElementHandler = NULL;
1198 parser->m_endElementHandler = NULL;
1199 parser->m_characterDataHandler = NULL;
1200 parser->m_processingInstructionHandler = NULL;
1201 parser->m_commentHandler = NULL;
1202 parser->m_startCdataSectionHandler = NULL;
1203 parser->m_endCdataSectionHandler = NULL;
1204 parser->m_defaultHandler = NULL;
1205 parser->m_startDoctypeDeclHandler = NULL;
1206 parser->m_endDoctypeDeclHandler = NULL;
1207 parser->m_unparsedEntityDeclHandler = NULL;
1208 parser->m_notationDeclHandler = NULL;
1209 parser->m_startNamespaceDeclHandler = NULL;
1210 parser->m_endNamespaceDeclHandler = NULL;
1211 parser->m_notStandaloneHandler = NULL;
1212 parser->m_externalEntityRefHandler = NULL;
1213 parser->m_externalEntityRefHandlerArg = parser;
1214 parser->m_skippedEntityHandler = NULL;
1215 parser->m_elementDeclHandler = NULL;
1216 parser->m_attlistDeclHandler = NULL;
1217 parser->m_entityDeclHandler = NULL;
1218 parser->m_xmlDeclHandler = NULL;
1219 parser->m_bufferPtr = parser->m_buffer;
1220 parser->m_bufferEnd = parser->m_buffer;
1221 parser->m_parseEndByteIndex = 0;
1222 parser->m_parseEndPtr = NULL;
1223 parser->m_partialTokenBytesBefore = 0;
1224 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1225 parser->m_lastBufferRequestSize = 0;
1226 parser->m_declElementType = NULL;
1227 parser->m_declAttributeId = NULL;
1228 parser->m_declEntity = NULL;
1229 parser->m_doctypeName = NULL;
1230 parser->m_doctypeSysid = NULL;
1231 parser->m_doctypePubid = NULL;
1232 parser->m_declAttributeType = NULL;
1233 parser->m_declNotationName = NULL;
1234 parser->m_declNotationPublicId = NULL;
1235 parser->m_declAttributeIsCdata = XML_FALSE;
1236 parser->m_declAttributeIsId = XML_FALSE;
1237 memset(&parser->m_position, 0, sizeof(POSITION));
1238 parser->m_errorCode = XML_ERROR_NONE;
1239 parser->m_eventPtr = NULL;
1240 parser->m_eventEndPtr = NULL;
1241 parser->m_positionPtr = NULL;
1242 parser->m_openInternalEntities = NULL;
1243 parser->m_defaultExpandInternalEntities = XML_TRUE;
1244 parser->m_tagLevel = 0;
1245 parser->m_tagStack = NULL;
1246 parser->m_inheritedBindings = NULL;
1247 parser->m_nSpecifiedAtts = 0;
1248 parser->m_unknownEncodingMem = NULL;
1249 parser->m_unknownEncodingRelease = NULL;
1250 parser->m_unknownEncodingData = NULL;
1251 parser->m_parentParser = NULL;
1252 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1253 #ifdef XML_DTD
1254 parser->m_isParamEntity = XML_FALSE;
1255 parser->m_useForeignDTD = XML_FALSE;
1256 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1257 #endif
1258 parser->m_hash_secret_salt = 0;
1259
1260 #if XML_GE == 1
1261 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1262 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1263 parser->m_accounting.maximumAmplificationFactor
1264 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1265 parser->m_accounting.activationThresholdBytes
1266 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1267
1268 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1269 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1270 #endif
1271 }
1272
1273 /* moves list of bindings to m_freeBindingList */
1274 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1275 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1276 while (bindings) {
1277 BINDING *b = bindings;
1278 bindings = bindings->nextTagBinding;
1279 b->nextTagBinding = parser->m_freeBindingList;
1280 parser->m_freeBindingList = b;
1281 }
1282 }
1283
1284 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1285 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1286 TAG *tStk;
1287 OPEN_INTERNAL_ENTITY *openEntityList;
1288
1289 if (parser == NULL)
1290 return XML_FALSE;
1291
1292 if (parser->m_parentParser)
1293 return XML_FALSE;
1294 /* move m_tagStack to m_freeTagList */
1295 tStk = parser->m_tagStack;
1296 while (tStk) {
1297 TAG *tag = tStk;
1298 tStk = tStk->parent;
1299 tag->parent = parser->m_freeTagList;
1300 moveToFreeBindingList(parser, tag->bindings);
1301 tag->bindings = NULL;
1302 parser->m_freeTagList = tag;
1303 }
1304 /* move m_openInternalEntities to m_freeInternalEntities */
1305 openEntityList = parser->m_openInternalEntities;
1306 while (openEntityList) {
1307 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1308 openEntityList = openEntity->next;
1309 openEntity->next = parser->m_freeInternalEntities;
1310 parser->m_freeInternalEntities = openEntity;
1311 }
1312 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1313 FREE(parser, parser->m_unknownEncodingMem);
1314 if (parser->m_unknownEncodingRelease)
1315 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1316 poolClear(&parser->m_tempPool);
1317 poolClear(&parser->m_temp2Pool);
1318 FREE(parser, (void *)parser->m_protocolEncodingName);
1319 parser->m_protocolEncodingName = NULL;
1320 parserInit(parser, encodingName);
1321 dtdReset(parser->m_dtd, &parser->m_mem);
1322 return XML_TRUE;
1323 }
1324
1325 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1326 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1327 if (parser == NULL)
1328 return XML_STATUS_ERROR;
1329 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1330 XXX There's no way for the caller to determine which of the
1331 XXX possible error cases caused the XML_STATUS_ERROR return.
1332 */
1333 if (parser->m_parsingStatus.parsing == XML_PARSING
1334 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1335 return XML_STATUS_ERROR;
1336
1337 /* Get rid of any previous encoding name */
1338 FREE(parser, (void *)parser->m_protocolEncodingName);
1339
1340 if (encodingName == NULL)
1341 /* No new encoding name */
1342 parser->m_protocolEncodingName = NULL;
1343 else {
1344 /* Copy the new encoding name into allocated memory */
1345 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1346 if (! parser->m_protocolEncodingName)
1347 return XML_STATUS_ERROR;
1348 }
1349 return XML_STATUS_OK;
1350 }
1351
1352 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1353 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1354 const XML_Char *encodingName) {
1355 XML_Parser parser = oldParser;
1356 DTD *newDtd = NULL;
1357 DTD *oldDtd;
1358 XML_StartElementHandler oldStartElementHandler;
1359 XML_EndElementHandler oldEndElementHandler;
1360 XML_CharacterDataHandler oldCharacterDataHandler;
1361 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1362 XML_CommentHandler oldCommentHandler;
1363 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1364 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1365 XML_DefaultHandler oldDefaultHandler;
1366 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1367 XML_NotationDeclHandler oldNotationDeclHandler;
1368 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1369 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1370 XML_NotStandaloneHandler oldNotStandaloneHandler;
1371 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1372 XML_SkippedEntityHandler oldSkippedEntityHandler;
1373 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1374 XML_ElementDeclHandler oldElementDeclHandler;
1375 XML_AttlistDeclHandler oldAttlistDeclHandler;
1376 XML_EntityDeclHandler oldEntityDeclHandler;
1377 XML_XmlDeclHandler oldXmlDeclHandler;
1378 ELEMENT_TYPE *oldDeclElementType;
1379
1380 void *oldUserData;
1381 void *oldHandlerArg;
1382 XML_Bool oldDefaultExpandInternalEntities;
1383 XML_Parser oldExternalEntityRefHandlerArg;
1384 #ifdef XML_DTD
1385 enum XML_ParamEntityParsing oldParamEntityParsing;
1386 int oldInEntityValue;
1387 #endif
1388 XML_Bool oldns_triplets;
1389 /* Note that the new parser shares the same hash secret as the old
1390 parser, so that dtdCopy and copyEntityTable can lookup values
1391 from hash tables associated with either parser without us having
1392 to worry which hash secrets each table has.
1393 */
1394 unsigned long oldhash_secret_salt;
1395 XML_Bool oldReparseDeferralEnabled;
1396
1397 /* Validate the oldParser parameter before we pull everything out of it */
1398 if (oldParser == NULL)
1399 return NULL;
1400
1401 /* Stash the original parser contents on the stack */
1402 oldDtd = parser->m_dtd;
1403 oldStartElementHandler = parser->m_startElementHandler;
1404 oldEndElementHandler = parser->m_endElementHandler;
1405 oldCharacterDataHandler = parser->m_characterDataHandler;
1406 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1407 oldCommentHandler = parser->m_commentHandler;
1408 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1409 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1410 oldDefaultHandler = parser->m_defaultHandler;
1411 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1412 oldNotationDeclHandler = parser->m_notationDeclHandler;
1413 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1414 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1415 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1416 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1417 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1418 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1419 oldElementDeclHandler = parser->m_elementDeclHandler;
1420 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1421 oldEntityDeclHandler = parser->m_entityDeclHandler;
1422 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1423 oldDeclElementType = parser->m_declElementType;
1424
1425 oldUserData = parser->m_userData;
1426 oldHandlerArg = parser->m_handlerArg;
1427 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1428 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1429 #ifdef XML_DTD
1430 oldParamEntityParsing = parser->m_paramEntityParsing;
1431 oldInEntityValue = parser->m_prologState.inEntityValue;
1432 #endif
1433 oldns_triplets = parser->m_ns_triplets;
1434 /* Note that the new parser shares the same hash secret as the old
1435 parser, so that dtdCopy and copyEntityTable can lookup values
1436 from hash tables associated with either parser without us having
1437 to worry which hash secrets each table has.
1438 */
1439 oldhash_secret_salt = parser->m_hash_secret_salt;
1440 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1441
1442 #ifdef XML_DTD
1443 if (! context)
1444 newDtd = oldDtd;
1445 #endif /* XML_DTD */
1446
1447 /* Note that the magical uses of the pre-processor to make field
1448 access look more like C++ require that `parser' be overwritten
1449 here. This makes this function more painful to follow than it
1450 would be otherwise.
1451 */
1452 if (parser->m_ns) {
1453 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1454 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1455 } else {
1456 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1457 }
1458
1459 if (! parser)
1460 return NULL;
1461
1462 parser->m_startElementHandler = oldStartElementHandler;
1463 parser->m_endElementHandler = oldEndElementHandler;
1464 parser->m_characterDataHandler = oldCharacterDataHandler;
1465 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1466 parser->m_commentHandler = oldCommentHandler;
1467 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1468 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1469 parser->m_defaultHandler = oldDefaultHandler;
1470 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1471 parser->m_notationDeclHandler = oldNotationDeclHandler;
1472 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1473 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1474 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1475 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1476 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1477 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1478 parser->m_elementDeclHandler = oldElementDeclHandler;
1479 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1480 parser->m_entityDeclHandler = oldEntityDeclHandler;
1481 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1482 parser->m_declElementType = oldDeclElementType;
1483 parser->m_userData = oldUserData;
1484 if (oldUserData == oldHandlerArg)
1485 parser->m_handlerArg = parser->m_userData;
1486 else
1487 parser->m_handlerArg = parser;
1488 if (oldExternalEntityRefHandlerArg != oldParser)
1489 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1490 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1491 parser->m_ns_triplets = oldns_triplets;
1492 parser->m_hash_secret_salt = oldhash_secret_salt;
1493 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1494 parser->m_parentParser = oldParser;
1495 #ifdef XML_DTD
1496 parser->m_paramEntityParsing = oldParamEntityParsing;
1497 parser->m_prologState.inEntityValue = oldInEntityValue;
1498 if (context) {
1499 #endif /* XML_DTD */
1500 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1501 || ! setContext(parser, context)) {
1502 XML_ParserFree(parser);
1503 return NULL;
1504 }
1505 parser->m_processor = externalEntityInitProcessor;
1506 #ifdef XML_DTD
1507 } else {
1508 /* The DTD instance referenced by parser->m_dtd is shared between the
1509 document's root parser and external PE parsers, therefore one does not
1510 need to call setContext. In addition, one also *must* not call
1511 setContext, because this would overwrite existing prefix->binding
1512 pointers in parser->m_dtd with ones that get destroyed with the external
1513 PE parser. This would leave those prefixes with dangling pointers.
1514 */
1515 parser->m_isParamEntity = XML_TRUE;
1516 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1517 parser->m_processor = externalParEntInitProcessor;
1518 }
1519 #endif /* XML_DTD */
1520 return parser;
1521 }
1522
1523 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1524 destroyBindings(BINDING *bindings, XML_Parser parser) {
1525 for (;;) {
1526 BINDING *b = bindings;
1527 if (! b)
1528 break;
1529 bindings = b->nextTagBinding;
1530 FREE(parser, b->uri);
1531 FREE(parser, b);
1532 }
1533 }
1534
1535 void XMLCALL
XML_ParserFree(XML_Parser parser)1536 XML_ParserFree(XML_Parser parser) {
1537 TAG *tagList;
1538 OPEN_INTERNAL_ENTITY *entityList;
1539 if (parser == NULL)
1540 return;
1541 /* free m_tagStack and m_freeTagList */
1542 tagList = parser->m_tagStack;
1543 for (;;) {
1544 TAG *p;
1545 if (tagList == NULL) {
1546 if (parser->m_freeTagList == NULL)
1547 break;
1548 tagList = parser->m_freeTagList;
1549 parser->m_freeTagList = NULL;
1550 }
1551 p = tagList;
1552 tagList = tagList->parent;
1553 FREE(parser, p->buf);
1554 destroyBindings(p->bindings, parser);
1555 FREE(parser, p);
1556 }
1557 /* free m_openInternalEntities and m_freeInternalEntities */
1558 entityList = parser->m_openInternalEntities;
1559 for (;;) {
1560 OPEN_INTERNAL_ENTITY *openEntity;
1561 if (entityList == NULL) {
1562 if (parser->m_freeInternalEntities == NULL)
1563 break;
1564 entityList = parser->m_freeInternalEntities;
1565 parser->m_freeInternalEntities = NULL;
1566 }
1567 openEntity = entityList;
1568 entityList = entityList->next;
1569 FREE(parser, openEntity);
1570 }
1571
1572 destroyBindings(parser->m_freeBindingList, parser);
1573 destroyBindings(parser->m_inheritedBindings, parser);
1574 poolDestroy(&parser->m_tempPool);
1575 poolDestroy(&parser->m_temp2Pool);
1576 FREE(parser, (void *)parser->m_protocolEncodingName);
1577 #ifdef XML_DTD
1578 /* external parameter entity parsers share the DTD structure
1579 parser->m_dtd with the root parser, so we must not destroy it
1580 */
1581 if (! parser->m_isParamEntity && parser->m_dtd)
1582 #else
1583 if (parser->m_dtd)
1584 #endif /* XML_DTD */
1585 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1586 &parser->m_mem);
1587 FREE(parser, (void *)parser->m_atts);
1588 #ifdef XML_ATTR_INFO
1589 FREE(parser, (void *)parser->m_attInfo);
1590 #endif
1591 FREE(parser, parser->m_groupConnector);
1592 FREE(parser, parser->m_buffer);
1593 FREE(parser, parser->m_dataBuf);
1594 FREE(parser, parser->m_nsAtts);
1595 FREE(parser, parser->m_unknownEncodingMem);
1596 if (parser->m_unknownEncodingRelease)
1597 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1598 FREE(parser, parser);
1599 }
1600
1601 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1602 XML_UseParserAsHandlerArg(XML_Parser parser) {
1603 if (parser != NULL)
1604 parser->m_handlerArg = parser;
1605 }
1606
1607 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1608 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1609 if (parser == NULL)
1610 return XML_ERROR_INVALID_ARGUMENT;
1611 #ifdef XML_DTD
1612 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1613 if (parser->m_parsingStatus.parsing == XML_PARSING
1614 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1615 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1616 parser->m_useForeignDTD = useDTD;
1617 return XML_ERROR_NONE;
1618 #else
1619 UNUSED_P(useDTD);
1620 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1621 #endif
1622 }
1623
1624 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1625 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1626 if (parser == NULL)
1627 return;
1628 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1629 if (parser->m_parsingStatus.parsing == XML_PARSING
1630 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1631 return;
1632 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1633 }
1634
1635 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1636 XML_SetUserData(XML_Parser parser, void *p) {
1637 if (parser == NULL)
1638 return;
1639 if (parser->m_handlerArg == parser->m_userData)
1640 parser->m_handlerArg = parser->m_userData = p;
1641 else
1642 parser->m_userData = p;
1643 }
1644
1645 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1646 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1647 if (parser == NULL)
1648 return XML_STATUS_ERROR;
1649 if (p) {
1650 p = poolCopyString(&parser->m_dtd->pool, p);
1651 if (! p)
1652 return XML_STATUS_ERROR;
1653 parser->m_curBase = p;
1654 } else
1655 parser->m_curBase = NULL;
1656 return XML_STATUS_OK;
1657 }
1658
1659 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1660 XML_GetBase(XML_Parser parser) {
1661 if (parser == NULL)
1662 return NULL;
1663 return parser->m_curBase;
1664 }
1665
1666 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1667 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1668 if (parser == NULL)
1669 return -1;
1670 return parser->m_nSpecifiedAtts;
1671 }
1672
1673 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1674 XML_GetIdAttributeIndex(XML_Parser parser) {
1675 if (parser == NULL)
1676 return -1;
1677 return parser->m_idAttIndex;
1678 }
1679
1680 #ifdef XML_ATTR_INFO
1681 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1682 XML_GetAttributeInfo(XML_Parser parser) {
1683 if (parser == NULL)
1684 return NULL;
1685 return parser->m_attInfo;
1686 }
1687 #endif
1688
1689 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1690 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1691 XML_EndElementHandler end) {
1692 if (parser == NULL)
1693 return;
1694 parser->m_startElementHandler = start;
1695 parser->m_endElementHandler = end;
1696 }
1697
1698 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1699 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1700 if (parser != NULL)
1701 parser->m_startElementHandler = start;
1702 }
1703
1704 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1705 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1706 if (parser != NULL)
1707 parser->m_endElementHandler = end;
1708 }
1709
1710 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1711 XML_SetCharacterDataHandler(XML_Parser parser,
1712 XML_CharacterDataHandler handler) {
1713 if (parser != NULL)
1714 parser->m_characterDataHandler = handler;
1715 }
1716
1717 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1718 XML_SetProcessingInstructionHandler(XML_Parser parser,
1719 XML_ProcessingInstructionHandler handler) {
1720 if (parser != NULL)
1721 parser->m_processingInstructionHandler = handler;
1722 }
1723
1724 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1725 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1726 if (parser != NULL)
1727 parser->m_commentHandler = handler;
1728 }
1729
1730 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1731 XML_SetCdataSectionHandler(XML_Parser parser,
1732 XML_StartCdataSectionHandler start,
1733 XML_EndCdataSectionHandler end) {
1734 if (parser == NULL)
1735 return;
1736 parser->m_startCdataSectionHandler = start;
1737 parser->m_endCdataSectionHandler = end;
1738 }
1739
1740 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1741 XML_SetStartCdataSectionHandler(XML_Parser parser,
1742 XML_StartCdataSectionHandler start) {
1743 if (parser != NULL)
1744 parser->m_startCdataSectionHandler = start;
1745 }
1746
1747 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1748 XML_SetEndCdataSectionHandler(XML_Parser parser,
1749 XML_EndCdataSectionHandler end) {
1750 if (parser != NULL)
1751 parser->m_endCdataSectionHandler = end;
1752 }
1753
1754 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1755 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1756 if (parser == NULL)
1757 return;
1758 parser->m_defaultHandler = handler;
1759 parser->m_defaultExpandInternalEntities = XML_FALSE;
1760 }
1761
1762 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1763 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1764 if (parser == NULL)
1765 return;
1766 parser->m_defaultHandler = handler;
1767 parser->m_defaultExpandInternalEntities = XML_TRUE;
1768 }
1769
1770 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1771 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1772 XML_EndDoctypeDeclHandler end) {
1773 if (parser == NULL)
1774 return;
1775 parser->m_startDoctypeDeclHandler = start;
1776 parser->m_endDoctypeDeclHandler = end;
1777 }
1778
1779 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1780 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1781 XML_StartDoctypeDeclHandler start) {
1782 if (parser != NULL)
1783 parser->m_startDoctypeDeclHandler = start;
1784 }
1785
1786 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1787 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1788 if (parser != NULL)
1789 parser->m_endDoctypeDeclHandler = end;
1790 }
1791
1792 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1793 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1794 XML_UnparsedEntityDeclHandler handler) {
1795 if (parser != NULL)
1796 parser->m_unparsedEntityDeclHandler = handler;
1797 }
1798
1799 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1800 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1801 if (parser != NULL)
1802 parser->m_notationDeclHandler = handler;
1803 }
1804
1805 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1806 XML_SetNamespaceDeclHandler(XML_Parser parser,
1807 XML_StartNamespaceDeclHandler start,
1808 XML_EndNamespaceDeclHandler end) {
1809 if (parser == NULL)
1810 return;
1811 parser->m_startNamespaceDeclHandler = start;
1812 parser->m_endNamespaceDeclHandler = end;
1813 }
1814
1815 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1816 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1817 XML_StartNamespaceDeclHandler start) {
1818 if (parser != NULL)
1819 parser->m_startNamespaceDeclHandler = start;
1820 }
1821
1822 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1823 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1824 XML_EndNamespaceDeclHandler end) {
1825 if (parser != NULL)
1826 parser->m_endNamespaceDeclHandler = end;
1827 }
1828
1829 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1830 XML_SetNotStandaloneHandler(XML_Parser parser,
1831 XML_NotStandaloneHandler handler) {
1832 if (parser != NULL)
1833 parser->m_notStandaloneHandler = handler;
1834 }
1835
1836 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1837 XML_SetExternalEntityRefHandler(XML_Parser parser,
1838 XML_ExternalEntityRefHandler handler) {
1839 if (parser != NULL)
1840 parser->m_externalEntityRefHandler = handler;
1841 }
1842
1843 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1844 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1845 if (parser == NULL)
1846 return;
1847 if (arg)
1848 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1849 else
1850 parser->m_externalEntityRefHandlerArg = parser;
1851 }
1852
1853 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1854 XML_SetSkippedEntityHandler(XML_Parser parser,
1855 XML_SkippedEntityHandler handler) {
1856 if (parser != NULL)
1857 parser->m_skippedEntityHandler = handler;
1858 }
1859
1860 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1861 XML_SetUnknownEncodingHandler(XML_Parser parser,
1862 XML_UnknownEncodingHandler handler, void *data) {
1863 if (parser == NULL)
1864 return;
1865 parser->m_unknownEncodingHandler = handler;
1866 parser->m_unknownEncodingHandlerData = data;
1867 }
1868
1869 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1870 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1871 if (parser != NULL)
1872 parser->m_elementDeclHandler = eldecl;
1873 }
1874
1875 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1876 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1877 if (parser != NULL)
1878 parser->m_attlistDeclHandler = attdecl;
1879 }
1880
1881 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1882 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1883 if (parser != NULL)
1884 parser->m_entityDeclHandler = handler;
1885 }
1886
1887 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1888 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1889 if (parser != NULL)
1890 parser->m_xmlDeclHandler = handler;
1891 }
1892
1893 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1894 XML_SetParamEntityParsing(XML_Parser parser,
1895 enum XML_ParamEntityParsing peParsing) {
1896 if (parser == NULL)
1897 return 0;
1898 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1899 if (parser->m_parsingStatus.parsing == XML_PARSING
1900 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1901 return 0;
1902 #ifdef XML_DTD
1903 parser->m_paramEntityParsing = peParsing;
1904 return 1;
1905 #else
1906 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1907 #endif
1908 }
1909
1910 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1911 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1912 if (parser == NULL)
1913 return 0;
1914 if (parser->m_parentParser)
1915 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1916 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1917 if (parser->m_parsingStatus.parsing == XML_PARSING
1918 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1919 return 0;
1920 parser->m_hash_secret_salt = hash_salt;
1921 return 1;
1922 }
1923
1924 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1925 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1926 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1927 if (parser != NULL)
1928 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1929 return XML_STATUS_ERROR;
1930 }
1931 switch (parser->m_parsingStatus.parsing) {
1932 case XML_SUSPENDED:
1933 parser->m_errorCode = XML_ERROR_SUSPENDED;
1934 return XML_STATUS_ERROR;
1935 case XML_FINISHED:
1936 parser->m_errorCode = XML_ERROR_FINISHED;
1937 return XML_STATUS_ERROR;
1938 case XML_INITIALIZED:
1939 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1940 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1941 return XML_STATUS_ERROR;
1942 }
1943 /* fall through */
1944 default:
1945 parser->m_parsingStatus.parsing = XML_PARSING;
1946 }
1947
1948 #if XML_CONTEXT_BYTES == 0
1949 if (parser->m_bufferPtr == parser->m_bufferEnd) {
1950 const char *end;
1951 int nLeftOver;
1952 enum XML_Status result;
1953 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1954 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1955 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1956 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1957 parser->m_processor = errorProcessor;
1958 return XML_STATUS_ERROR;
1959 }
1960 // though this isn't a buffer request, we assume that `len` is the app's
1961 // preferred buffer fill size, and therefore save it here.
1962 parser->m_lastBufferRequestSize = len;
1963 parser->m_parseEndByteIndex += len;
1964 parser->m_positionPtr = s;
1965 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1966
1967 parser->m_errorCode
1968 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
1969
1970 if (parser->m_errorCode != XML_ERROR_NONE) {
1971 parser->m_eventEndPtr = parser->m_eventPtr;
1972 parser->m_processor = errorProcessor;
1973 return XML_STATUS_ERROR;
1974 } else {
1975 switch (parser->m_parsingStatus.parsing) {
1976 case XML_SUSPENDED:
1977 result = XML_STATUS_SUSPENDED;
1978 break;
1979 case XML_INITIALIZED:
1980 case XML_PARSING:
1981 if (isFinal) {
1982 parser->m_parsingStatus.parsing = XML_FINISHED;
1983 return XML_STATUS_OK;
1984 }
1985 /* fall through */
1986 default:
1987 result = XML_STATUS_OK;
1988 }
1989 }
1990
1991 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1992 &parser->m_position);
1993 nLeftOver = s + len - end;
1994 if (nLeftOver) {
1995 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
1996 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
1997 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
1998 parser->m_parsingStatus.parsing = XML_PARSING;
1999 void *const temp = XML_GetBuffer(parser, nLeftOver);
2000 parser->m_parsingStatus.parsing = originalStatus;
2001 // GetBuffer may have overwritten this, but we want to remember what the
2002 // app requested, not how many bytes were left over after parsing.
2003 parser->m_lastBufferRequestSize = len;
2004 if (temp == NULL) {
2005 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2006 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2007 parser->m_processor = errorProcessor;
2008 return XML_STATUS_ERROR;
2009 }
2010 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2011 // don't have any data to preserve, and can copy straight into the start
2012 // of the buffer rather than the GetBuffer return pointer (which may be
2013 // pointing further into the allocated buffer).
2014 memcpy(parser->m_buffer, end, nLeftOver);
2015 }
2016 parser->m_bufferPtr = parser->m_buffer;
2017 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2018 parser->m_positionPtr = parser->m_bufferPtr;
2019 parser->m_parseEndPtr = parser->m_bufferEnd;
2020 parser->m_eventPtr = parser->m_bufferPtr;
2021 parser->m_eventEndPtr = parser->m_bufferPtr;
2022 return result;
2023 }
2024 #endif /* XML_CONTEXT_BYTES == 0 */
2025 void *buff = XML_GetBuffer(parser, len);
2026 if (buff == NULL)
2027 return XML_STATUS_ERROR;
2028 if (len > 0) {
2029 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2030 memcpy(buff, s, len);
2031 }
2032 return XML_ParseBuffer(parser, len, isFinal);
2033 }
2034
2035 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)2036 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2037 const char *start;
2038 enum XML_Status result = XML_STATUS_OK;
2039
2040 if (parser == NULL)
2041 return XML_STATUS_ERROR;
2042
2043 if (len < 0) {
2044 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2045 return XML_STATUS_ERROR;
2046 }
2047
2048 switch (parser->m_parsingStatus.parsing) {
2049 case XML_SUSPENDED:
2050 parser->m_errorCode = XML_ERROR_SUSPENDED;
2051 return XML_STATUS_ERROR;
2052 case XML_FINISHED:
2053 parser->m_errorCode = XML_ERROR_FINISHED;
2054 return XML_STATUS_ERROR;
2055 case XML_INITIALIZED:
2056 /* Has someone called XML_GetBuffer successfully before? */
2057 if (! parser->m_bufferPtr) {
2058 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2059 return XML_STATUS_ERROR;
2060 }
2061
2062 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2063 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2064 return XML_STATUS_ERROR;
2065 }
2066 /* fall through */
2067 default:
2068 parser->m_parsingStatus.parsing = XML_PARSING;
2069 }
2070
2071 start = parser->m_bufferPtr;
2072 parser->m_positionPtr = start;
2073 parser->m_bufferEnd += len;
2074 parser->m_parseEndPtr = parser->m_bufferEnd;
2075 parser->m_parseEndByteIndex += len;
2076 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2077
2078 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2079 &parser->m_bufferPtr);
2080
2081 if (parser->m_errorCode != XML_ERROR_NONE) {
2082 parser->m_eventEndPtr = parser->m_eventPtr;
2083 parser->m_processor = errorProcessor;
2084 return XML_STATUS_ERROR;
2085 } else {
2086 switch (parser->m_parsingStatus.parsing) {
2087 case XML_SUSPENDED:
2088 result = XML_STATUS_SUSPENDED;
2089 break;
2090 case XML_INITIALIZED:
2091 case XML_PARSING:
2092 if (isFinal) {
2093 parser->m_parsingStatus.parsing = XML_FINISHED;
2094 return result;
2095 }
2096 default:; /* should not happen */
2097 }
2098 }
2099
2100 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2101 parser->m_bufferPtr, &parser->m_position);
2102 parser->m_positionPtr = parser->m_bufferPtr;
2103 return result;
2104 }
2105
2106 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2107 XML_GetBuffer(XML_Parser parser, int len) {
2108 if (parser == NULL)
2109 return NULL;
2110 if (len < 0) {
2111 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2112 return NULL;
2113 }
2114 switch (parser->m_parsingStatus.parsing) {
2115 case XML_SUSPENDED:
2116 parser->m_errorCode = XML_ERROR_SUSPENDED;
2117 return NULL;
2118 case XML_FINISHED:
2119 parser->m_errorCode = XML_ERROR_FINISHED;
2120 return NULL;
2121 default:;
2122 }
2123
2124 // whether or not the request succeeds, `len` seems to be the app's preferred
2125 // buffer fill size; remember it.
2126 parser->m_lastBufferRequestSize = len;
2127 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2128 || parser->m_buffer == NULL) {
2129 #if XML_CONTEXT_BYTES > 0
2130 int keep;
2131 #endif /* XML_CONTEXT_BYTES > 0 */
2132 /* Do not invoke signed arithmetic overflow: */
2133 int neededSize = (int)((unsigned)len
2134 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2135 parser->m_bufferEnd, parser->m_bufferPtr));
2136 if (neededSize < 0) {
2137 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2138 return NULL;
2139 }
2140 #if XML_CONTEXT_BYTES > 0
2141 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2142 if (keep > XML_CONTEXT_BYTES)
2143 keep = XML_CONTEXT_BYTES;
2144 /* Detect and prevent integer overflow */
2145 if (keep > INT_MAX - neededSize) {
2146 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2147 return NULL;
2148 }
2149 neededSize += keep;
2150 #endif /* XML_CONTEXT_BYTES > 0 */
2151 if (parser->m_buffer && parser->m_bufferPtr
2152 && neededSize
2153 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2154 #if XML_CONTEXT_BYTES > 0
2155 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2156 int offset
2157 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2158 - keep;
2159 /* The buffer pointers cannot be NULL here; we have at least some bytes
2160 * in the buffer */
2161 memmove(parser->m_buffer, &parser->m_buffer[offset],
2162 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2163 parser->m_bufferEnd -= offset;
2164 parser->m_bufferPtr -= offset;
2165 }
2166 #else
2167 memmove(parser->m_buffer, parser->m_bufferPtr,
2168 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2169 parser->m_bufferEnd
2170 = parser->m_buffer
2171 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2172 parser->m_bufferPtr = parser->m_buffer;
2173 #endif /* XML_CONTEXT_BYTES > 0 */
2174 } else {
2175 char *newBuf;
2176 int bufferSize
2177 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2178 if (bufferSize == 0)
2179 bufferSize = INIT_BUFFER_SIZE;
2180 do {
2181 /* Do not invoke signed arithmetic overflow: */
2182 bufferSize = (int)(2U * (unsigned)bufferSize);
2183 } while (bufferSize < neededSize && bufferSize > 0);
2184 if (bufferSize <= 0) {
2185 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2186 return NULL;
2187 }
2188 newBuf = (char *)MALLOC(parser, bufferSize);
2189 if (newBuf == 0) {
2190 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2191 return NULL;
2192 }
2193 parser->m_bufferLim = newBuf + bufferSize;
2194 #if XML_CONTEXT_BYTES > 0
2195 if (parser->m_bufferPtr) {
2196 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2197 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2198 + keep);
2199 FREE(parser, parser->m_buffer);
2200 parser->m_buffer = newBuf;
2201 parser->m_bufferEnd
2202 = parser->m_buffer
2203 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2204 + keep;
2205 parser->m_bufferPtr = parser->m_buffer + keep;
2206 } else {
2207 /* This must be a brand new buffer with no data in it yet */
2208 parser->m_bufferEnd = newBuf;
2209 parser->m_bufferPtr = parser->m_buffer = newBuf;
2210 }
2211 #else
2212 if (parser->m_bufferPtr) {
2213 memcpy(newBuf, parser->m_bufferPtr,
2214 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2215 FREE(parser, parser->m_buffer);
2216 parser->m_bufferEnd
2217 = newBuf
2218 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2219 } else {
2220 /* This must be a brand new buffer with no data in it yet */
2221 parser->m_bufferEnd = newBuf;
2222 }
2223 parser->m_bufferPtr = parser->m_buffer = newBuf;
2224 #endif /* XML_CONTEXT_BYTES > 0 */
2225 }
2226 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2227 parser->m_positionPtr = NULL;
2228 }
2229 return parser->m_bufferEnd;
2230 }
2231
2232 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2233 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2234 if (parser == NULL)
2235 return XML_STATUS_ERROR;
2236 switch (parser->m_parsingStatus.parsing) {
2237 case XML_SUSPENDED:
2238 if (resumable) {
2239 parser->m_errorCode = XML_ERROR_SUSPENDED;
2240 return XML_STATUS_ERROR;
2241 }
2242 parser->m_parsingStatus.parsing = XML_FINISHED;
2243 break;
2244 case XML_FINISHED:
2245 parser->m_errorCode = XML_ERROR_FINISHED;
2246 return XML_STATUS_ERROR;
2247 default:
2248 if (resumable) {
2249 #ifdef XML_DTD
2250 if (parser->m_isParamEntity) {
2251 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2252 return XML_STATUS_ERROR;
2253 }
2254 #endif
2255 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2256 } else
2257 parser->m_parsingStatus.parsing = XML_FINISHED;
2258 }
2259 return XML_STATUS_OK;
2260 }
2261
2262 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2263 XML_ResumeParser(XML_Parser parser) {
2264 enum XML_Status result = XML_STATUS_OK;
2265
2266 if (parser == NULL)
2267 return XML_STATUS_ERROR;
2268 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2269 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2270 return XML_STATUS_ERROR;
2271 }
2272 parser->m_parsingStatus.parsing = XML_PARSING;
2273
2274 parser->m_errorCode = callProcessor(
2275 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2276
2277 if (parser->m_errorCode != XML_ERROR_NONE) {
2278 parser->m_eventEndPtr = parser->m_eventPtr;
2279 parser->m_processor = errorProcessor;
2280 return XML_STATUS_ERROR;
2281 } else {
2282 switch (parser->m_parsingStatus.parsing) {
2283 case XML_SUSPENDED:
2284 result = XML_STATUS_SUSPENDED;
2285 break;
2286 case XML_INITIALIZED:
2287 case XML_PARSING:
2288 if (parser->m_parsingStatus.finalBuffer) {
2289 parser->m_parsingStatus.parsing = XML_FINISHED;
2290 return result;
2291 }
2292 default:;
2293 }
2294 }
2295
2296 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2297 parser->m_bufferPtr, &parser->m_position);
2298 parser->m_positionPtr = parser->m_bufferPtr;
2299 return result;
2300 }
2301
2302 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2303 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2304 if (parser == NULL)
2305 return;
2306 assert(status != NULL);
2307 *status = parser->m_parsingStatus;
2308 }
2309
2310 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2311 XML_GetErrorCode(XML_Parser parser) {
2312 if (parser == NULL)
2313 return XML_ERROR_INVALID_ARGUMENT;
2314 return parser->m_errorCode;
2315 }
2316
2317 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2318 XML_GetCurrentByteIndex(XML_Parser parser) {
2319 if (parser == NULL)
2320 return -1;
2321 if (parser->m_eventPtr)
2322 return (XML_Index)(parser->m_parseEndByteIndex
2323 - (parser->m_parseEndPtr - parser->m_eventPtr));
2324 return -1;
2325 }
2326
2327 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2328 XML_GetCurrentByteCount(XML_Parser parser) {
2329 if (parser == NULL)
2330 return 0;
2331 if (parser->m_eventEndPtr && parser->m_eventPtr)
2332 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2333 return 0;
2334 }
2335
2336 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2337 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2338 #if XML_CONTEXT_BYTES > 0
2339 if (parser == NULL)
2340 return NULL;
2341 if (parser->m_eventPtr && parser->m_buffer) {
2342 if (offset != NULL)
2343 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2344 if (size != NULL)
2345 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2346 return parser->m_buffer;
2347 }
2348 #else
2349 (void)parser;
2350 (void)offset;
2351 (void)size;
2352 #endif /* XML_CONTEXT_BYTES > 0 */
2353 return (const char *)0;
2354 }
2355
2356 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2357 XML_GetCurrentLineNumber(XML_Parser parser) {
2358 if (parser == NULL)
2359 return 0;
2360 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2361 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2362 parser->m_eventPtr, &parser->m_position);
2363 parser->m_positionPtr = parser->m_eventPtr;
2364 }
2365 return parser->m_position.lineNumber + 1;
2366 }
2367
2368 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2369 XML_GetCurrentColumnNumber(XML_Parser parser) {
2370 if (parser == NULL)
2371 return 0;
2372 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2373 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2374 parser->m_eventPtr, &parser->m_position);
2375 parser->m_positionPtr = parser->m_eventPtr;
2376 }
2377 return parser->m_position.columnNumber;
2378 }
2379
2380 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2381 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2382 if (parser != NULL)
2383 FREE(parser, model);
2384 }
2385
2386 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2387 XML_MemMalloc(XML_Parser parser, size_t size) {
2388 if (parser == NULL)
2389 return NULL;
2390 return MALLOC(parser, size);
2391 }
2392
2393 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2394 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2395 if (parser == NULL)
2396 return NULL;
2397 return REALLOC(parser, ptr, size);
2398 }
2399
2400 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2401 XML_MemFree(XML_Parser parser, void *ptr) {
2402 if (parser != NULL)
2403 FREE(parser, ptr);
2404 }
2405
2406 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2407 XML_DefaultCurrent(XML_Parser parser) {
2408 if (parser == NULL)
2409 return;
2410 if (parser->m_defaultHandler) {
2411 if (parser->m_openInternalEntities)
2412 reportDefault(parser, parser->m_internalEncoding,
2413 parser->m_openInternalEntities->internalEventPtr,
2414 parser->m_openInternalEntities->internalEventEndPtr);
2415 else
2416 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2417 parser->m_eventEndPtr);
2418 }
2419 }
2420
2421 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2422 XML_ErrorString(enum XML_Error code) {
2423 switch (code) {
2424 case XML_ERROR_NONE:
2425 return NULL;
2426 case XML_ERROR_NO_MEMORY:
2427 return XML_L("out of memory");
2428 case XML_ERROR_SYNTAX:
2429 return XML_L("syntax error");
2430 case XML_ERROR_NO_ELEMENTS:
2431 return XML_L("no element found");
2432 case XML_ERROR_INVALID_TOKEN:
2433 return XML_L("not well-formed (invalid token)");
2434 case XML_ERROR_UNCLOSED_TOKEN:
2435 return XML_L("unclosed token");
2436 case XML_ERROR_PARTIAL_CHAR:
2437 return XML_L("partial character");
2438 case XML_ERROR_TAG_MISMATCH:
2439 return XML_L("mismatched tag");
2440 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2441 return XML_L("duplicate attribute");
2442 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2443 return XML_L("junk after document element");
2444 case XML_ERROR_PARAM_ENTITY_REF:
2445 return XML_L("illegal parameter entity reference");
2446 case XML_ERROR_UNDEFINED_ENTITY:
2447 return XML_L("undefined entity");
2448 case XML_ERROR_RECURSIVE_ENTITY_REF:
2449 return XML_L("recursive entity reference");
2450 case XML_ERROR_ASYNC_ENTITY:
2451 return XML_L("asynchronous entity");
2452 case XML_ERROR_BAD_CHAR_REF:
2453 return XML_L("reference to invalid character number");
2454 case XML_ERROR_BINARY_ENTITY_REF:
2455 return XML_L("reference to binary entity");
2456 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2457 return XML_L("reference to external entity in attribute");
2458 case XML_ERROR_MISPLACED_XML_PI:
2459 return XML_L("XML or text declaration not at start of entity");
2460 case XML_ERROR_UNKNOWN_ENCODING:
2461 return XML_L("unknown encoding");
2462 case XML_ERROR_INCORRECT_ENCODING:
2463 return XML_L("encoding specified in XML declaration is incorrect");
2464 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2465 return XML_L("unclosed CDATA section");
2466 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2467 return XML_L("error in processing external entity reference");
2468 case XML_ERROR_NOT_STANDALONE:
2469 return XML_L("document is not standalone");
2470 case XML_ERROR_UNEXPECTED_STATE:
2471 return XML_L("unexpected parser state - please send a bug report");
2472 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2473 return XML_L("entity declared in parameter entity");
2474 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2475 return XML_L("requested feature requires XML_DTD support in Expat");
2476 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2477 return XML_L("cannot change setting once parsing has begun");
2478 /* Added in 1.95.7. */
2479 case XML_ERROR_UNBOUND_PREFIX:
2480 return XML_L("unbound prefix");
2481 /* Added in 1.95.8. */
2482 case XML_ERROR_UNDECLARING_PREFIX:
2483 return XML_L("must not undeclare prefix");
2484 case XML_ERROR_INCOMPLETE_PE:
2485 return XML_L("incomplete markup in parameter entity");
2486 case XML_ERROR_XML_DECL:
2487 return XML_L("XML declaration not well-formed");
2488 case XML_ERROR_TEXT_DECL:
2489 return XML_L("text declaration not well-formed");
2490 case XML_ERROR_PUBLICID:
2491 return XML_L("illegal character(s) in public id");
2492 case XML_ERROR_SUSPENDED:
2493 return XML_L("parser suspended");
2494 case XML_ERROR_NOT_SUSPENDED:
2495 return XML_L("parser not suspended");
2496 case XML_ERROR_ABORTED:
2497 return XML_L("parsing aborted");
2498 case XML_ERROR_FINISHED:
2499 return XML_L("parsing finished");
2500 case XML_ERROR_SUSPEND_PE:
2501 return XML_L("cannot suspend in external parameter entity");
2502 /* Added in 2.0.0. */
2503 case XML_ERROR_RESERVED_PREFIX_XML:
2504 return XML_L(
2505 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2506 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2507 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2508 case XML_ERROR_RESERVED_NAMESPACE_URI:
2509 return XML_L(
2510 "prefix must not be bound to one of the reserved namespace names");
2511 /* Added in 2.2.5. */
2512 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2513 return XML_L("invalid argument");
2514 /* Added in 2.3.0. */
2515 case XML_ERROR_NO_BUFFER:
2516 return XML_L(
2517 "a successful prior call to function XML_GetBuffer is required");
2518 /* Added in 2.4.0. */
2519 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2520 return XML_L(
2521 "limit on input amplification factor (from DTD and entities) breached");
2522 }
2523 return NULL;
2524 }
2525
2526 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2527 XML_ExpatVersion(void) {
2528 /* V1 is used to string-ize the version number. However, it would
2529 string-ize the actual version macro *names* unless we get them
2530 substituted before being passed to V1. CPP is defined to expand
2531 a macro, then rescan for more expansions. Thus, we use V2 to expand
2532 the version macros, then CPP will expand the resulting V1() macro
2533 with the correct numerals. */
2534 /* ### I'm assuming cpp is portable in this respect... */
2535
2536 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2537 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2538
2539 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2540
2541 #undef V1
2542 #undef V2
2543 }
2544
2545 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2546 XML_ExpatVersionInfo(void) {
2547 XML_Expat_Version version;
2548
2549 version.major = XML_MAJOR_VERSION;
2550 version.minor = XML_MINOR_VERSION;
2551 version.micro = XML_MICRO_VERSION;
2552
2553 return version;
2554 }
2555
2556 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2557 XML_GetFeatureList(void) {
2558 static const XML_Feature features[] = {
2559 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2560 sizeof(XML_Char)},
2561 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2562 sizeof(XML_LChar)},
2563 #ifdef XML_UNICODE
2564 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2565 #endif
2566 #ifdef XML_UNICODE_WCHAR_T
2567 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2568 #endif
2569 #ifdef XML_DTD
2570 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2571 #endif
2572 #if XML_CONTEXT_BYTES > 0
2573 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2574 XML_CONTEXT_BYTES},
2575 #endif
2576 #ifdef XML_MIN_SIZE
2577 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2578 #endif
2579 #ifdef XML_NS
2580 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2581 #endif
2582 #ifdef XML_LARGE_SIZE
2583 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2584 #endif
2585 #ifdef XML_ATTR_INFO
2586 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2587 #endif
2588 #if XML_GE == 1
2589 /* Added in Expat 2.4.0 for XML_DTD defined and
2590 * added in Expat 2.6.0 for XML_GE == 1. */
2591 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2592 XML_L("XML_BLAP_MAX_AMP"),
2593 (long int)
2594 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2595 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2596 XML_L("XML_BLAP_ACT_THRES"),
2597 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2598 /* Added in Expat 2.6.0. */
2599 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2600 #endif
2601 {XML_FEATURE_END, NULL, 0}};
2602
2603 return features;
2604 }
2605
2606 #if XML_GE == 1
2607 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2608 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2609 XML_Parser parser, float maximumAmplificationFactor) {
2610 if ((parser == NULL) || (parser->m_parentParser != NULL)
2611 || isnan(maximumAmplificationFactor)
2612 || (maximumAmplificationFactor < 1.0f)) {
2613 return XML_FALSE;
2614 }
2615 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2616 return XML_TRUE;
2617 }
2618
2619 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2620 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2621 XML_Parser parser, unsigned long long activationThresholdBytes) {
2622 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2623 return XML_FALSE;
2624 }
2625 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2626 return XML_TRUE;
2627 }
2628 #endif /* XML_GE == 1 */
2629
2630 XML_Bool XMLCALL
XML_SetReparseDeferralEnabled(XML_Parser parser,XML_Bool enabled)2631 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2632 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2633 parser->m_reparseDeferralEnabled = enabled;
2634 return XML_TRUE;
2635 }
2636 return XML_FALSE;
2637 }
2638
2639 /* Initially tag->rawName always points into the parse buffer;
2640 for those TAG instances opened while the current parse buffer was
2641 processed, and not yet closed, we need to store tag->rawName in a more
2642 permanent location, since the parse buffer is about to be discarded.
2643 */
2644 static XML_Bool
storeRawNames(XML_Parser parser)2645 storeRawNames(XML_Parser parser) {
2646 TAG *tag = parser->m_tagStack;
2647 while (tag) {
2648 int bufSize;
2649 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2650 size_t rawNameLen;
2651 char *rawNameBuf = tag->buf + nameLen;
2652 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2653 at the first entry that has already been copied; everything
2654 below it in the stack is already been accounted for in a
2655 previous call to this function.
2656 */
2657 if (tag->rawName == rawNameBuf)
2658 break;
2659 /* For reuse purposes we need to ensure that the
2660 size of tag->buf is a multiple of sizeof(XML_Char).
2661 */
2662 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2663 /* Detect and prevent integer overflow. */
2664 if (rawNameLen > (size_t)INT_MAX - nameLen)
2665 return XML_FALSE;
2666 bufSize = nameLen + (int)rawNameLen;
2667 if (bufSize > tag->bufEnd - tag->buf) {
2668 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2669 if (temp == NULL)
2670 return XML_FALSE;
2671 /* if tag->name.str points to tag->buf (only when namespace
2672 processing is off) then we have to update it
2673 */
2674 if (tag->name.str == (XML_Char *)tag->buf)
2675 tag->name.str = (XML_Char *)temp;
2676 /* if tag->name.localPart is set (when namespace processing is on)
2677 then update it as well, since it will always point into tag->buf
2678 */
2679 if (tag->name.localPart)
2680 tag->name.localPart
2681 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2682 tag->buf = temp;
2683 tag->bufEnd = temp + bufSize;
2684 rawNameBuf = temp + nameLen;
2685 }
2686 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2687 tag->rawName = rawNameBuf;
2688 tag = tag->parent;
2689 }
2690 return XML_TRUE;
2691 }
2692
2693 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2694 contentProcessor(XML_Parser parser, const char *start, const char *end,
2695 const char **endPtr) {
2696 enum XML_Error result = doContent(
2697 parser, 0, parser->m_encoding, start, end, endPtr,
2698 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2699 if (result == XML_ERROR_NONE) {
2700 if (! storeRawNames(parser))
2701 return XML_ERROR_NO_MEMORY;
2702 }
2703 return result;
2704 }
2705
2706 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2707 externalEntityInitProcessor(XML_Parser parser, const char *start,
2708 const char *end, const char **endPtr) {
2709 enum XML_Error result = initializeEncoding(parser);
2710 if (result != XML_ERROR_NONE)
2711 return result;
2712 parser->m_processor = externalEntityInitProcessor2;
2713 return externalEntityInitProcessor2(parser, start, end, endPtr);
2714 }
2715
2716 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2717 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2718 const char *end, const char **endPtr) {
2719 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2720 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2721 switch (tok) {
2722 case XML_TOK_BOM:
2723 #if XML_GE == 1
2724 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2725 XML_ACCOUNT_DIRECT)) {
2726 accountingOnAbort(parser);
2727 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2728 }
2729 #endif /* XML_GE == 1 */
2730
2731 /* If we are at the end of the buffer, this would cause the next stage,
2732 i.e. externalEntityInitProcessor3, to pass control directly to
2733 doContent (by detecting XML_TOK_NONE) without processing any xml text
2734 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2735 */
2736 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2737 *endPtr = next;
2738 return XML_ERROR_NONE;
2739 }
2740 start = next;
2741 break;
2742 case XML_TOK_PARTIAL:
2743 if (! parser->m_parsingStatus.finalBuffer) {
2744 *endPtr = start;
2745 return XML_ERROR_NONE;
2746 }
2747 parser->m_eventPtr = start;
2748 return XML_ERROR_UNCLOSED_TOKEN;
2749 case XML_TOK_PARTIAL_CHAR:
2750 if (! parser->m_parsingStatus.finalBuffer) {
2751 *endPtr = start;
2752 return XML_ERROR_NONE;
2753 }
2754 parser->m_eventPtr = start;
2755 return XML_ERROR_PARTIAL_CHAR;
2756 }
2757 parser->m_processor = externalEntityInitProcessor3;
2758 return externalEntityInitProcessor3(parser, start, end, endPtr);
2759 }
2760
2761 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2762 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2763 const char *end, const char **endPtr) {
2764 int tok;
2765 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2766 parser->m_eventPtr = start;
2767 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2768 /* Note: These bytes are accounted later in:
2769 - processXmlDecl
2770 - externalEntityContentProcessor
2771 */
2772 parser->m_eventEndPtr = next;
2773
2774 switch (tok) {
2775 case XML_TOK_XML_DECL: {
2776 enum XML_Error result;
2777 result = processXmlDecl(parser, 1, start, next);
2778 if (result != XML_ERROR_NONE)
2779 return result;
2780 switch (parser->m_parsingStatus.parsing) {
2781 case XML_SUSPENDED:
2782 *endPtr = next;
2783 return XML_ERROR_NONE;
2784 case XML_FINISHED:
2785 return XML_ERROR_ABORTED;
2786 default:
2787 start = next;
2788 }
2789 } break;
2790 case XML_TOK_PARTIAL:
2791 if (! parser->m_parsingStatus.finalBuffer) {
2792 *endPtr = start;
2793 return XML_ERROR_NONE;
2794 }
2795 return XML_ERROR_UNCLOSED_TOKEN;
2796 case XML_TOK_PARTIAL_CHAR:
2797 if (! parser->m_parsingStatus.finalBuffer) {
2798 *endPtr = start;
2799 return XML_ERROR_NONE;
2800 }
2801 return XML_ERROR_PARTIAL_CHAR;
2802 }
2803 parser->m_processor = externalEntityContentProcessor;
2804 parser->m_tagLevel = 1;
2805 return externalEntityContentProcessor(parser, start, end, endPtr);
2806 }
2807
2808 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2809 externalEntityContentProcessor(XML_Parser parser, const char *start,
2810 const char *end, const char **endPtr) {
2811 enum XML_Error result
2812 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2813 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2814 XML_ACCOUNT_ENTITY_EXPANSION);
2815 if (result == XML_ERROR_NONE) {
2816 if (! storeRawNames(parser))
2817 return XML_ERROR_NO_MEMORY;
2818 }
2819 return result;
2820 }
2821
2822 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2823 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2824 const char *s, const char *end, const char **nextPtr,
2825 XML_Bool haveMore, enum XML_Account account) {
2826 /* save one level of indirection */
2827 DTD *const dtd = parser->m_dtd;
2828
2829 const char **eventPP;
2830 const char **eventEndPP;
2831 if (enc == parser->m_encoding) {
2832 eventPP = &parser->m_eventPtr;
2833 eventEndPP = &parser->m_eventEndPtr;
2834 } else {
2835 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2836 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2837 }
2838 *eventPP = s;
2839
2840 for (;;) {
2841 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2842 int tok = XmlContentTok(enc, s, end, &next);
2843 #if XML_GE == 1
2844 const char *accountAfter
2845 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2846 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2847 : next;
2848 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2849 account)) {
2850 accountingOnAbort(parser);
2851 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2852 }
2853 #endif
2854 *eventEndPP = next;
2855 switch (tok) {
2856 case XML_TOK_TRAILING_CR:
2857 if (haveMore) {
2858 *nextPtr = s;
2859 return XML_ERROR_NONE;
2860 }
2861 *eventEndPP = end;
2862 if (parser->m_characterDataHandler) {
2863 XML_Char c = 0xA;
2864 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2865 } else if (parser->m_defaultHandler)
2866 reportDefault(parser, enc, s, end);
2867 /* We are at the end of the final buffer, should we check for
2868 XML_SUSPENDED, XML_FINISHED?
2869 */
2870 if (startTagLevel == 0)
2871 return XML_ERROR_NO_ELEMENTS;
2872 if (parser->m_tagLevel != startTagLevel)
2873 return XML_ERROR_ASYNC_ENTITY;
2874 *nextPtr = end;
2875 return XML_ERROR_NONE;
2876 case XML_TOK_NONE:
2877 if (haveMore) {
2878 *nextPtr = s;
2879 return XML_ERROR_NONE;
2880 }
2881 if (startTagLevel > 0) {
2882 if (parser->m_tagLevel != startTagLevel)
2883 return XML_ERROR_ASYNC_ENTITY;
2884 *nextPtr = s;
2885 return XML_ERROR_NONE;
2886 }
2887 return XML_ERROR_NO_ELEMENTS;
2888 case XML_TOK_INVALID:
2889 *eventPP = next;
2890 return XML_ERROR_INVALID_TOKEN;
2891 case XML_TOK_PARTIAL:
2892 if (haveMore) {
2893 *nextPtr = s;
2894 return XML_ERROR_NONE;
2895 }
2896 return XML_ERROR_UNCLOSED_TOKEN;
2897 case XML_TOK_PARTIAL_CHAR:
2898 if (haveMore) {
2899 *nextPtr = s;
2900 return XML_ERROR_NONE;
2901 }
2902 return XML_ERROR_PARTIAL_CHAR;
2903 case XML_TOK_ENTITY_REF: {
2904 const XML_Char *name;
2905 ENTITY *entity;
2906 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2907 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2908 if (ch) {
2909 #if XML_GE == 1
2910 /* NOTE: We are replacing 4-6 characters original input for 1 character
2911 * so there is no amplification and hence recording without
2912 * protection. */
2913 accountingDiffTolerated(parser, tok, (char *)&ch,
2914 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2915 XML_ACCOUNT_ENTITY_EXPANSION);
2916 #endif /* XML_GE == 1 */
2917 if (parser->m_characterDataHandler)
2918 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2919 else if (parser->m_defaultHandler)
2920 reportDefault(parser, enc, s, next);
2921 break;
2922 }
2923 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2924 next - enc->minBytesPerChar);
2925 if (! name)
2926 return XML_ERROR_NO_MEMORY;
2927 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2928 poolDiscard(&dtd->pool);
2929 /* First, determine if a check for an existing declaration is needed;
2930 if yes, check that the entity exists, and that it is internal,
2931 otherwise call the skipped entity or default handler.
2932 */
2933 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2934 if (! entity)
2935 return XML_ERROR_UNDEFINED_ENTITY;
2936 else if (! entity->is_internal)
2937 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2938 } else if (! entity) {
2939 if (parser->m_skippedEntityHandler)
2940 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2941 else if (parser->m_defaultHandler)
2942 reportDefault(parser, enc, s, next);
2943 break;
2944 }
2945 if (entity->open)
2946 return XML_ERROR_RECURSIVE_ENTITY_REF;
2947 if (entity->notation)
2948 return XML_ERROR_BINARY_ENTITY_REF;
2949 if (entity->textPtr) {
2950 enum XML_Error result;
2951 if (! parser->m_defaultExpandInternalEntities) {
2952 if (parser->m_skippedEntityHandler)
2953 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2954 0);
2955 else if (parser->m_defaultHandler)
2956 reportDefault(parser, enc, s, next);
2957 break;
2958 }
2959 result = processInternalEntity(parser, entity, XML_FALSE);
2960 if (result != XML_ERROR_NONE)
2961 return result;
2962 } else if (parser->m_externalEntityRefHandler) {
2963 const XML_Char *context;
2964 entity->open = XML_TRUE;
2965 context = getContext(parser);
2966 entity->open = XML_FALSE;
2967 if (! context)
2968 return XML_ERROR_NO_MEMORY;
2969 if (! parser->m_externalEntityRefHandler(
2970 parser->m_externalEntityRefHandlerArg, context, entity->base,
2971 entity->systemId, entity->publicId))
2972 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2973 poolDiscard(&parser->m_tempPool);
2974 } else if (parser->m_defaultHandler)
2975 reportDefault(parser, enc, s, next);
2976 break;
2977 }
2978 case XML_TOK_START_TAG_NO_ATTS:
2979 /* fall through */
2980 case XML_TOK_START_TAG_WITH_ATTS: {
2981 TAG *tag;
2982 enum XML_Error result;
2983 XML_Char *toPtr;
2984 if (parser->m_freeTagList) {
2985 tag = parser->m_freeTagList;
2986 parser->m_freeTagList = parser->m_freeTagList->parent;
2987 } else {
2988 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2989 if (! tag)
2990 return XML_ERROR_NO_MEMORY;
2991 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2992 if (! tag->buf) {
2993 FREE(parser, tag);
2994 return XML_ERROR_NO_MEMORY;
2995 }
2996 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2997 }
2998 tag->bindings = NULL;
2999 tag->parent = parser->m_tagStack;
3000 parser->m_tagStack = tag;
3001 tag->name.localPart = NULL;
3002 tag->name.prefix = NULL;
3003 tag->rawName = s + enc->minBytesPerChar;
3004 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3005 ++parser->m_tagLevel;
3006 {
3007 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3008 const char *fromPtr = tag->rawName;
3009 toPtr = (XML_Char *)tag->buf;
3010 for (;;) {
3011 int bufSize;
3012 int convLen;
3013 const enum XML_Convert_Result convert_res
3014 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3015 (ICHAR *)tag->bufEnd - 1);
3016 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3017 if ((fromPtr >= rawNameEnd)
3018 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3019 tag->name.strLen = convLen;
3020 break;
3021 }
3022 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3023 {
3024 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3025 if (temp == NULL)
3026 return XML_ERROR_NO_MEMORY;
3027 tag->buf = temp;
3028 tag->bufEnd = temp + bufSize;
3029 toPtr = (XML_Char *)temp + convLen;
3030 }
3031 }
3032 }
3033 tag->name.str = (XML_Char *)tag->buf;
3034 *toPtr = XML_T('\0');
3035 result
3036 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3037 if (result)
3038 return result;
3039 if (parser->m_startElementHandler)
3040 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3041 (const XML_Char **)parser->m_atts);
3042 else if (parser->m_defaultHandler)
3043 reportDefault(parser, enc, s, next);
3044 poolClear(&parser->m_tempPool);
3045 break;
3046 }
3047 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3048 /* fall through */
3049 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3050 const char *rawName = s + enc->minBytesPerChar;
3051 enum XML_Error result;
3052 BINDING *bindings = NULL;
3053 XML_Bool noElmHandlers = XML_TRUE;
3054 TAG_NAME name;
3055 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3056 rawName + XmlNameLength(enc, rawName));
3057 if (! name.str)
3058 return XML_ERROR_NO_MEMORY;
3059 poolFinish(&parser->m_tempPool);
3060 result = storeAtts(parser, enc, s, &name, &bindings,
3061 XML_ACCOUNT_NONE /* token spans whole start tag */);
3062 if (result != XML_ERROR_NONE) {
3063 freeBindings(parser, bindings);
3064 return result;
3065 }
3066 poolFinish(&parser->m_tempPool);
3067 if (parser->m_startElementHandler) {
3068 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3069 (const XML_Char **)parser->m_atts);
3070 noElmHandlers = XML_FALSE;
3071 }
3072 if (parser->m_endElementHandler) {
3073 if (parser->m_startElementHandler)
3074 *eventPP = *eventEndPP;
3075 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3076 noElmHandlers = XML_FALSE;
3077 }
3078 if (noElmHandlers && parser->m_defaultHandler)
3079 reportDefault(parser, enc, s, next);
3080 poolClear(&parser->m_tempPool);
3081 freeBindings(parser, bindings);
3082 }
3083 if ((parser->m_tagLevel == 0)
3084 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3085 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3086 parser->m_processor = epilogProcessor;
3087 else
3088 return epilogProcessor(parser, next, end, nextPtr);
3089 }
3090 break;
3091 case XML_TOK_END_TAG:
3092 if (parser->m_tagLevel == startTagLevel)
3093 return XML_ERROR_ASYNC_ENTITY;
3094 else {
3095 int len;
3096 const char *rawName;
3097 TAG *tag = parser->m_tagStack;
3098 rawName = s + enc->minBytesPerChar * 2;
3099 len = XmlNameLength(enc, rawName);
3100 if (len != tag->rawNameLength
3101 || memcmp(tag->rawName, rawName, len) != 0) {
3102 *eventPP = rawName;
3103 return XML_ERROR_TAG_MISMATCH;
3104 }
3105 parser->m_tagStack = tag->parent;
3106 tag->parent = parser->m_freeTagList;
3107 parser->m_freeTagList = tag;
3108 --parser->m_tagLevel;
3109 if (parser->m_endElementHandler) {
3110 const XML_Char *localPart;
3111 const XML_Char *prefix;
3112 XML_Char *uri;
3113 localPart = tag->name.localPart;
3114 if (parser->m_ns && localPart) {
3115 /* localPart and prefix may have been overwritten in
3116 tag->name.str, since this points to the binding->uri
3117 buffer which gets reused; so we have to add them again
3118 */
3119 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3120 /* don't need to check for space - already done in storeAtts() */
3121 while (*localPart)
3122 *uri++ = *localPart++;
3123 prefix = tag->name.prefix;
3124 if (parser->m_ns_triplets && prefix) {
3125 *uri++ = parser->m_namespaceSeparator;
3126 while (*prefix)
3127 *uri++ = *prefix++;
3128 }
3129 *uri = XML_T('\0');
3130 }
3131 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3132 } else if (parser->m_defaultHandler)
3133 reportDefault(parser, enc, s, next);
3134 while (tag->bindings) {
3135 BINDING *b = tag->bindings;
3136 if (parser->m_endNamespaceDeclHandler)
3137 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3138 b->prefix->name);
3139 tag->bindings = tag->bindings->nextTagBinding;
3140 b->nextTagBinding = parser->m_freeBindingList;
3141 parser->m_freeBindingList = b;
3142 b->prefix->binding = b->prevPrefixBinding;
3143 }
3144 if ((parser->m_tagLevel == 0)
3145 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3146 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3147 parser->m_processor = epilogProcessor;
3148 else
3149 return epilogProcessor(parser, next, end, nextPtr);
3150 }
3151 }
3152 break;
3153 case XML_TOK_CHAR_REF: {
3154 int n = XmlCharRefNumber(enc, s);
3155 if (n < 0)
3156 return XML_ERROR_BAD_CHAR_REF;
3157 if (parser->m_characterDataHandler) {
3158 XML_Char buf[XML_ENCODE_MAX];
3159 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3160 XmlEncode(n, (ICHAR *)buf));
3161 } else if (parser->m_defaultHandler)
3162 reportDefault(parser, enc, s, next);
3163 } break;
3164 case XML_TOK_XML_DECL:
3165 return XML_ERROR_MISPLACED_XML_PI;
3166 case XML_TOK_DATA_NEWLINE:
3167 if (parser->m_characterDataHandler) {
3168 XML_Char c = 0xA;
3169 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3170 } else if (parser->m_defaultHandler)
3171 reportDefault(parser, enc, s, next);
3172 break;
3173 case XML_TOK_CDATA_SECT_OPEN: {
3174 enum XML_Error result;
3175 if (parser->m_startCdataSectionHandler)
3176 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3177 /* BEGIN disabled code */
3178 /* Suppose you doing a transformation on a document that involves
3179 changing only the character data. You set up a defaultHandler
3180 and a characterDataHandler. The defaultHandler simply copies
3181 characters through. The characterDataHandler does the
3182 transformation and writes the characters out escaping them as
3183 necessary. This case will fail to work if we leave out the
3184 following two lines (because & and < inside CDATA sections will
3185 be incorrectly escaped).
3186
3187 However, now we have a start/endCdataSectionHandler, so it seems
3188 easier to let the user deal with this.
3189 */
3190 else if ((0) && parser->m_characterDataHandler)
3191 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3192 0);
3193 /* END disabled code */
3194 else if (parser->m_defaultHandler)
3195 reportDefault(parser, enc, s, next);
3196 result
3197 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3198 if (result != XML_ERROR_NONE)
3199 return result;
3200 else if (! next) {
3201 parser->m_processor = cdataSectionProcessor;
3202 return result;
3203 }
3204 } break;
3205 case XML_TOK_TRAILING_RSQB:
3206 if (haveMore) {
3207 *nextPtr = s;
3208 return XML_ERROR_NONE;
3209 }
3210 if (parser->m_characterDataHandler) {
3211 if (MUST_CONVERT(enc, s)) {
3212 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3213 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3214 parser->m_characterDataHandler(
3215 parser->m_handlerArg, parser->m_dataBuf,
3216 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3217 } else
3218 parser->m_characterDataHandler(
3219 parser->m_handlerArg, (const XML_Char *)s,
3220 (int)((const XML_Char *)end - (const XML_Char *)s));
3221 } else if (parser->m_defaultHandler)
3222 reportDefault(parser, enc, s, end);
3223 /* We are at the end of the final buffer, should we check for
3224 XML_SUSPENDED, XML_FINISHED?
3225 */
3226 if (startTagLevel == 0) {
3227 *eventPP = end;
3228 return XML_ERROR_NO_ELEMENTS;
3229 }
3230 if (parser->m_tagLevel != startTagLevel) {
3231 *eventPP = end;
3232 return XML_ERROR_ASYNC_ENTITY;
3233 }
3234 *nextPtr = end;
3235 return XML_ERROR_NONE;
3236 case XML_TOK_DATA_CHARS: {
3237 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3238 if (charDataHandler) {
3239 if (MUST_CONVERT(enc, s)) {
3240 for (;;) {
3241 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3242 const enum XML_Convert_Result convert_res = XmlConvert(
3243 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3244 *eventEndPP = s;
3245 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3246 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3247 if ((convert_res == XML_CONVERT_COMPLETED)
3248 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3249 break;
3250 *eventPP = s;
3251 }
3252 } else
3253 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3254 (int)((const XML_Char *)next - (const XML_Char *)s));
3255 } else if (parser->m_defaultHandler)
3256 reportDefault(parser, enc, s, next);
3257 } break;
3258 case XML_TOK_PI:
3259 if (! reportProcessingInstruction(parser, enc, s, next))
3260 return XML_ERROR_NO_MEMORY;
3261 break;
3262 case XML_TOK_COMMENT:
3263 if (! reportComment(parser, enc, s, next))
3264 return XML_ERROR_NO_MEMORY;
3265 break;
3266 default:
3267 /* All of the tokens produced by XmlContentTok() have their own
3268 * explicit cases, so this default is not strictly necessary.
3269 * However it is a useful safety net, so we retain the code and
3270 * simply exclude it from the coverage tests.
3271 *
3272 * LCOV_EXCL_START
3273 */
3274 if (parser->m_defaultHandler)
3275 reportDefault(parser, enc, s, next);
3276 break;
3277 /* LCOV_EXCL_STOP */
3278 }
3279 *eventPP = s = next;
3280 switch (parser->m_parsingStatus.parsing) {
3281 case XML_SUSPENDED:
3282 *nextPtr = next;
3283 return XML_ERROR_NONE;
3284 case XML_FINISHED:
3285 return XML_ERROR_ABORTED;
3286 default:;
3287 }
3288 }
3289 /* not reached */
3290 }
3291
3292 /* This function does not call free() on the allocated memory, merely
3293 * moving it to the parser's m_freeBindingList where it can be freed or
3294 * reused as appropriate.
3295 */
3296 static void
freeBindings(XML_Parser parser,BINDING * bindings)3297 freeBindings(XML_Parser parser, BINDING *bindings) {
3298 while (bindings) {
3299 BINDING *b = bindings;
3300
3301 /* m_startNamespaceDeclHandler will have been called for this
3302 * binding in addBindings(), so call the end handler now.
3303 */
3304 if (parser->m_endNamespaceDeclHandler)
3305 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3306
3307 bindings = bindings->nextTagBinding;
3308 b->nextTagBinding = parser->m_freeBindingList;
3309 parser->m_freeBindingList = b;
3310 b->prefix->binding = b->prevPrefixBinding;
3311 }
3312 }
3313
3314 /* Precondition: all arguments must be non-NULL;
3315 Purpose:
3316 - normalize attributes
3317 - check attributes for well-formedness
3318 - generate namespace aware attribute names (URI, prefix)
3319 - build list of attributes for startElementHandler
3320 - default attributes
3321 - process namespace declarations (check and report them)
3322 - generate namespace aware element name (URI, prefix)
3323 */
3324 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3325 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3326 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3327 enum XML_Account account) {
3328 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3329 ELEMENT_TYPE *elementType;
3330 int nDefaultAtts;
3331 const XML_Char **appAtts; /* the attribute list for the application */
3332 int attIndex = 0;
3333 int prefixLen;
3334 int i;
3335 int n;
3336 XML_Char *uri;
3337 int nPrefixes = 0;
3338 BINDING *binding;
3339 const XML_Char *localPart;
3340
3341 /* lookup the element type name */
3342 elementType
3343 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3344 if (! elementType) {
3345 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3346 if (! name)
3347 return XML_ERROR_NO_MEMORY;
3348 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3349 sizeof(ELEMENT_TYPE));
3350 if (! elementType)
3351 return XML_ERROR_NO_MEMORY;
3352 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3353 return XML_ERROR_NO_MEMORY;
3354 }
3355 nDefaultAtts = elementType->nDefaultAtts;
3356
3357 /* get the attributes from the tokenizer */
3358 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3359
3360 /* Detect and prevent integer overflow */
3361 if (n > INT_MAX - nDefaultAtts) {
3362 return XML_ERROR_NO_MEMORY;
3363 }
3364
3365 if (n + nDefaultAtts > parser->m_attsSize) {
3366 int oldAttsSize = parser->m_attsSize;
3367 ATTRIBUTE *temp;
3368 #ifdef XML_ATTR_INFO
3369 XML_AttrInfo *temp2;
3370 #endif
3371
3372 /* Detect and prevent integer overflow */
3373 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3374 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3375 return XML_ERROR_NO_MEMORY;
3376 }
3377
3378 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3379
3380 /* Detect and prevent integer overflow.
3381 * The preprocessor guard addresses the "always false" warning
3382 * from -Wtype-limits on platforms where
3383 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3384 #if UINT_MAX >= SIZE_MAX
3385 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3386 parser->m_attsSize = oldAttsSize;
3387 return XML_ERROR_NO_MEMORY;
3388 }
3389 #endif
3390
3391 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3392 parser->m_attsSize * sizeof(ATTRIBUTE));
3393 if (temp == NULL) {
3394 parser->m_attsSize = oldAttsSize;
3395 return XML_ERROR_NO_MEMORY;
3396 }
3397 parser->m_atts = temp;
3398 #ifdef XML_ATTR_INFO
3399 /* Detect and prevent integer overflow.
3400 * The preprocessor guard addresses the "always false" warning
3401 * from -Wtype-limits on platforms where
3402 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3403 # if UINT_MAX >= SIZE_MAX
3404 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3405 parser->m_attsSize = oldAttsSize;
3406 return XML_ERROR_NO_MEMORY;
3407 }
3408 # endif
3409
3410 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3411 parser->m_attsSize * sizeof(XML_AttrInfo));
3412 if (temp2 == NULL) {
3413 parser->m_attsSize = oldAttsSize;
3414 return XML_ERROR_NO_MEMORY;
3415 }
3416 parser->m_attInfo = temp2;
3417 #endif
3418 if (n > oldAttsSize)
3419 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3420 }
3421
3422 appAtts = (const XML_Char **)parser->m_atts;
3423 for (i = 0; i < n; i++) {
3424 ATTRIBUTE *currAtt = &parser->m_atts[i];
3425 #ifdef XML_ATTR_INFO
3426 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3427 #endif
3428 /* add the name and value to the attribute list */
3429 ATTRIBUTE_ID *attId
3430 = getAttributeId(parser, enc, currAtt->name,
3431 currAtt->name + XmlNameLength(enc, currAtt->name));
3432 if (! attId)
3433 return XML_ERROR_NO_MEMORY;
3434 #ifdef XML_ATTR_INFO
3435 currAttInfo->nameStart
3436 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3437 currAttInfo->nameEnd
3438 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3439 currAttInfo->valueStart = parser->m_parseEndByteIndex
3440 - (parser->m_parseEndPtr - currAtt->valuePtr);
3441 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3442 - (parser->m_parseEndPtr - currAtt->valueEnd);
3443 #endif
3444 /* Detect duplicate attributes by their QNames. This does not work when
3445 namespace processing is turned on and different prefixes for the same
3446 namespace are used. For this case we have a check further down.
3447 */
3448 if ((attId->name)[-1]) {
3449 if (enc == parser->m_encoding)
3450 parser->m_eventPtr = parser->m_atts[i].name;
3451 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3452 }
3453 (attId->name)[-1] = 1;
3454 appAtts[attIndex++] = attId->name;
3455 if (! parser->m_atts[i].normalized) {
3456 enum XML_Error result;
3457 XML_Bool isCdata = XML_TRUE;
3458
3459 /* figure out whether declared as other than CDATA */
3460 if (attId->maybeTokenized) {
3461 int j;
3462 for (j = 0; j < nDefaultAtts; j++) {
3463 if (attId == elementType->defaultAtts[j].id) {
3464 isCdata = elementType->defaultAtts[j].isCdata;
3465 break;
3466 }
3467 }
3468 }
3469
3470 /* normalize the attribute value */
3471 result = storeAttributeValue(
3472 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3473 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3474 if (result)
3475 return result;
3476 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3477 poolFinish(&parser->m_tempPool);
3478 } else {
3479 /* the value did not need normalizing */
3480 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3481 parser->m_atts[i].valuePtr,
3482 parser->m_atts[i].valueEnd);
3483 if (appAtts[attIndex] == 0)
3484 return XML_ERROR_NO_MEMORY;
3485 poolFinish(&parser->m_tempPool);
3486 }
3487 /* handle prefixed attribute names */
3488 if (attId->prefix) {
3489 if (attId->xmlns) {
3490 /* deal with namespace declarations here */
3491 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3492 appAtts[attIndex], bindingsPtr);
3493 if (result)
3494 return result;
3495 --attIndex;
3496 } else {
3497 /* deal with other prefixed names later */
3498 attIndex++;
3499 nPrefixes++;
3500 (attId->name)[-1] = 2;
3501 }
3502 } else
3503 attIndex++;
3504 }
3505
3506 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3507 parser->m_nSpecifiedAtts = attIndex;
3508 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3509 for (i = 0; i < attIndex; i += 2)
3510 if (appAtts[i] == elementType->idAtt->name) {
3511 parser->m_idAttIndex = i;
3512 break;
3513 }
3514 } else
3515 parser->m_idAttIndex = -1;
3516
3517 /* do attribute defaulting */
3518 for (i = 0; i < nDefaultAtts; i++) {
3519 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3520 if (! (da->id->name)[-1] && da->value) {
3521 if (da->id->prefix) {
3522 if (da->id->xmlns) {
3523 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3524 da->value, bindingsPtr);
3525 if (result)
3526 return result;
3527 } else {
3528 (da->id->name)[-1] = 2;
3529 nPrefixes++;
3530 appAtts[attIndex++] = da->id->name;
3531 appAtts[attIndex++] = da->value;
3532 }
3533 } else {
3534 (da->id->name)[-1] = 1;
3535 appAtts[attIndex++] = da->id->name;
3536 appAtts[attIndex++] = da->value;
3537 }
3538 }
3539 }
3540 appAtts[attIndex] = 0;
3541
3542 /* expand prefixed attribute names, check for duplicates,
3543 and clear flags that say whether attributes were specified */
3544 i = 0;
3545 if (nPrefixes) {
3546 int j; /* hash table index */
3547 unsigned long version = parser->m_nsAttsVersion;
3548
3549 /* Detect and prevent invalid shift */
3550 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3551 return XML_ERROR_NO_MEMORY;
3552 }
3553
3554 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3555 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3556 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3557 if ((nPrefixes << 1)
3558 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3559 NS_ATT *temp;
3560 /* hash table size must also be a power of 2 and >= 8 */
3561 while (nPrefixes >> parser->m_nsAttsPower++)
3562 ;
3563 if (parser->m_nsAttsPower < 3)
3564 parser->m_nsAttsPower = 3;
3565
3566 /* Detect and prevent invalid shift */
3567 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3568 /* Restore actual size of memory in m_nsAtts */
3569 parser->m_nsAttsPower = oldNsAttsPower;
3570 return XML_ERROR_NO_MEMORY;
3571 }
3572
3573 nsAttsSize = 1u << parser->m_nsAttsPower;
3574
3575 /* Detect and prevent integer overflow.
3576 * The preprocessor guard addresses the "always false" warning
3577 * from -Wtype-limits on platforms where
3578 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3579 #if UINT_MAX >= SIZE_MAX
3580 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3581 /* Restore actual size of memory in m_nsAtts */
3582 parser->m_nsAttsPower = oldNsAttsPower;
3583 return XML_ERROR_NO_MEMORY;
3584 }
3585 #endif
3586
3587 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3588 nsAttsSize * sizeof(NS_ATT));
3589 if (! temp) {
3590 /* Restore actual size of memory in m_nsAtts */
3591 parser->m_nsAttsPower = oldNsAttsPower;
3592 return XML_ERROR_NO_MEMORY;
3593 }
3594 parser->m_nsAtts = temp;
3595 version = 0; /* force re-initialization of m_nsAtts hash table */
3596 }
3597 /* using a version flag saves us from initializing m_nsAtts every time */
3598 if (! version) { /* initialize version flags when version wraps around */
3599 version = INIT_ATTS_VERSION;
3600 for (j = nsAttsSize; j != 0;)
3601 parser->m_nsAtts[--j].version = version;
3602 }
3603 parser->m_nsAttsVersion = --version;
3604
3605 /* expand prefixed names and check for duplicates */
3606 for (; i < attIndex; i += 2) {
3607 const XML_Char *s = appAtts[i];
3608 if (s[-1] == 2) { /* prefixed */
3609 ATTRIBUTE_ID *id;
3610 const BINDING *b;
3611 unsigned long uriHash;
3612 struct siphash sip_state;
3613 struct sipkey sip_key;
3614
3615 copy_salt_to_sipkey(parser, &sip_key);
3616 sip24_init(&sip_state, &sip_key);
3617
3618 ((XML_Char *)s)[-1] = 0; /* clear flag */
3619 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3620 if (! id || ! id->prefix) {
3621 /* This code is walking through the appAtts array, dealing
3622 * with (in this case) a prefixed attribute name. To be in
3623 * the array, the attribute must have already been bound, so
3624 * has to have passed through the hash table lookup once
3625 * already. That implies that an entry for it already
3626 * exists, so the lookup above will return a pointer to
3627 * already allocated memory. There is no opportunaity for
3628 * the allocator to fail, so the condition above cannot be
3629 * fulfilled.
3630 *
3631 * Since it is difficult to be certain that the above
3632 * analysis is complete, we retain the test and merely
3633 * remove the code from coverage tests.
3634 */
3635 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3636 }
3637 b = id->prefix->binding;
3638 if (! b)
3639 return XML_ERROR_UNBOUND_PREFIX;
3640
3641 for (j = 0; j < b->uriLen; j++) {
3642 const XML_Char c = b->uri[j];
3643 if (! poolAppendChar(&parser->m_tempPool, c))
3644 return XML_ERROR_NO_MEMORY;
3645 }
3646
3647 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3648
3649 while (*s++ != XML_T(ASCII_COLON))
3650 ;
3651
3652 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3653
3654 do { /* copies null terminator */
3655 if (! poolAppendChar(&parser->m_tempPool, *s))
3656 return XML_ERROR_NO_MEMORY;
3657 } while (*s++);
3658
3659 uriHash = (unsigned long)sip24_final(&sip_state);
3660
3661 { /* Check hash table for duplicate of expanded name (uriName).
3662 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3663 */
3664 unsigned char step = 0;
3665 unsigned long mask = nsAttsSize - 1;
3666 j = uriHash & mask; /* index into hash table */
3667 while (parser->m_nsAtts[j].version == version) {
3668 /* for speed we compare stored hash values first */
3669 if (uriHash == parser->m_nsAtts[j].hash) {
3670 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3671 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3672 /* s1 is null terminated, but not s2 */
3673 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3674 ;
3675 if (*s1 == 0)
3676 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3677 }
3678 if (! step)
3679 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3680 j < step ? (j += nsAttsSize - step) : (j -= step);
3681 }
3682 }
3683
3684 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3685 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3686 s = b->prefix->name;
3687 do {
3688 if (! poolAppendChar(&parser->m_tempPool, *s))
3689 return XML_ERROR_NO_MEMORY;
3690 } while (*s++);
3691 }
3692
3693 /* store expanded name in attribute list */
3694 s = poolStart(&parser->m_tempPool);
3695 poolFinish(&parser->m_tempPool);
3696 appAtts[i] = s;
3697
3698 /* fill empty slot with new version, uriName and hash value */
3699 parser->m_nsAtts[j].version = version;
3700 parser->m_nsAtts[j].hash = uriHash;
3701 parser->m_nsAtts[j].uriName = s;
3702
3703 if (! --nPrefixes) {
3704 i += 2;
3705 break;
3706 }
3707 } else /* not prefixed */
3708 ((XML_Char *)s)[-1] = 0; /* clear flag */
3709 }
3710 }
3711 /* clear flags for the remaining attributes */
3712 for (; i < attIndex; i += 2)
3713 ((XML_Char *)(appAtts[i]))[-1] = 0;
3714 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3715 binding->attId->name[-1] = 0;
3716
3717 if (! parser->m_ns)
3718 return XML_ERROR_NONE;
3719
3720 /* expand the element type name */
3721 if (elementType->prefix) {
3722 binding = elementType->prefix->binding;
3723 if (! binding)
3724 return XML_ERROR_UNBOUND_PREFIX;
3725 localPart = tagNamePtr->str;
3726 while (*localPart++ != XML_T(ASCII_COLON))
3727 ;
3728 } else if (dtd->defaultPrefix.binding) {
3729 binding = dtd->defaultPrefix.binding;
3730 localPart = tagNamePtr->str;
3731 } else
3732 return XML_ERROR_NONE;
3733 prefixLen = 0;
3734 if (parser->m_ns_triplets && binding->prefix->name) {
3735 for (; binding->prefix->name[prefixLen++];)
3736 ; /* prefixLen includes null terminator */
3737 }
3738 tagNamePtr->localPart = localPart;
3739 tagNamePtr->uriLen = binding->uriLen;
3740 tagNamePtr->prefix = binding->prefix->name;
3741 tagNamePtr->prefixLen = prefixLen;
3742 for (i = 0; localPart[i++];)
3743 ; /* i includes null terminator */
3744
3745 /* Detect and prevent integer overflow */
3746 if (binding->uriLen > INT_MAX - prefixLen
3747 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3748 return XML_ERROR_NO_MEMORY;
3749 }
3750
3751 n = i + binding->uriLen + prefixLen;
3752 if (n > binding->uriAlloc) {
3753 TAG *p;
3754
3755 /* Detect and prevent integer overflow */
3756 if (n > INT_MAX - EXPAND_SPARE) {
3757 return XML_ERROR_NO_MEMORY;
3758 }
3759 /* Detect and prevent integer overflow.
3760 * The preprocessor guard addresses the "always false" warning
3761 * from -Wtype-limits on platforms where
3762 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3763 #if UINT_MAX >= SIZE_MAX
3764 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3765 return XML_ERROR_NO_MEMORY;
3766 }
3767 #endif
3768
3769 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3770 if (! uri)
3771 return XML_ERROR_NO_MEMORY;
3772 binding->uriAlloc = n + EXPAND_SPARE;
3773 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3774 for (p = parser->m_tagStack; p; p = p->parent)
3775 if (p->name.str == binding->uri)
3776 p->name.str = uri;
3777 FREE(parser, binding->uri);
3778 binding->uri = uri;
3779 }
3780 /* if m_namespaceSeparator != '\0' then uri includes it already */
3781 uri = binding->uri + binding->uriLen;
3782 memcpy(uri, localPart, i * sizeof(XML_Char));
3783 /* we always have a namespace separator between localPart and prefix */
3784 if (prefixLen) {
3785 uri += i - 1;
3786 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3787 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3788 }
3789 tagNamePtr->str = binding->uri;
3790 return XML_ERROR_NONE;
3791 }
3792
3793 static XML_Bool
is_rfc3986_uri_char(XML_Char candidate)3794 is_rfc3986_uri_char(XML_Char candidate) {
3795 // For the RFC 3986 ANBF grammar see
3796 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3797
3798 switch (candidate) {
3799 // From rule "ALPHA" (uppercase half)
3800 case 'A':
3801 case 'B':
3802 case 'C':
3803 case 'D':
3804 case 'E':
3805 case 'F':
3806 case 'G':
3807 case 'H':
3808 case 'I':
3809 case 'J':
3810 case 'K':
3811 case 'L':
3812 case 'M':
3813 case 'N':
3814 case 'O':
3815 case 'P':
3816 case 'Q':
3817 case 'R':
3818 case 'S':
3819 case 'T':
3820 case 'U':
3821 case 'V':
3822 case 'W':
3823 case 'X':
3824 case 'Y':
3825 case 'Z':
3826
3827 // From rule "ALPHA" (lowercase half)
3828 case 'a':
3829 case 'b':
3830 case 'c':
3831 case 'd':
3832 case 'e':
3833 case 'f':
3834 case 'g':
3835 case 'h':
3836 case 'i':
3837 case 'j':
3838 case 'k':
3839 case 'l':
3840 case 'm':
3841 case 'n':
3842 case 'o':
3843 case 'p':
3844 case 'q':
3845 case 'r':
3846 case 's':
3847 case 't':
3848 case 'u':
3849 case 'v':
3850 case 'w':
3851 case 'x':
3852 case 'y':
3853 case 'z':
3854
3855 // From rule "DIGIT"
3856 case '0':
3857 case '1':
3858 case '2':
3859 case '3':
3860 case '4':
3861 case '5':
3862 case '6':
3863 case '7':
3864 case '8':
3865 case '9':
3866
3867 // From rule "pct-encoded"
3868 case '%':
3869
3870 // From rule "unreserved"
3871 case '-':
3872 case '.':
3873 case '_':
3874 case '~':
3875
3876 // From rule "gen-delims"
3877 case ':':
3878 case '/':
3879 case '?':
3880 case '#':
3881 case '[':
3882 case ']':
3883 case '@':
3884
3885 // From rule "sub-delims"
3886 case '!':
3887 case '$':
3888 case '&':
3889 case '\'':
3890 case '(':
3891 case ')':
3892 case '*':
3893 case '+':
3894 case ',':
3895 case ';':
3896 case '=':
3897 return XML_TRUE;
3898
3899 default:
3900 return XML_FALSE;
3901 }
3902 }
3903
3904 /* addBinding() overwrites the value of prefix->binding without checking.
3905 Therefore one must keep track of the old value outside of addBinding().
3906 */
3907 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3908 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3909 const XML_Char *uri, BINDING **bindingsPtr) {
3910 // "http://www.w3.org/XML/1998/namespace"
3911 static const XML_Char xmlNamespace[]
3912 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3913 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3914 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3915 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3916 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3917 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3918 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3919 ASCII_e, '\0'};
3920 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3921 // "http://www.w3.org/2000/xmlns/"
3922 static const XML_Char xmlnsNamespace[]
3923 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3924 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3925 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3926 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3927 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3928 static const int xmlnsLen
3929 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3930
3931 XML_Bool mustBeXML = XML_FALSE;
3932 XML_Bool isXML = XML_TRUE;
3933 XML_Bool isXMLNS = XML_TRUE;
3934
3935 BINDING *b;
3936 int len;
3937
3938 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3939 if (*uri == XML_T('\0') && prefix->name)
3940 return XML_ERROR_UNDECLARING_PREFIX;
3941
3942 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3943 && prefix->name[1] == XML_T(ASCII_m)
3944 && prefix->name[2] == XML_T(ASCII_l)) {
3945 /* Not allowed to bind xmlns */
3946 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3947 && prefix->name[5] == XML_T('\0'))
3948 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3949
3950 if (prefix->name[3] == XML_T('\0'))
3951 mustBeXML = XML_TRUE;
3952 }
3953
3954 for (len = 0; uri[len]; len++) {
3955 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3956 isXML = XML_FALSE;
3957
3958 if (! mustBeXML && isXMLNS
3959 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3960 isXMLNS = XML_FALSE;
3961
3962 // NOTE: While Expat does not validate namespace URIs against RFC 3986
3963 // today (and is not REQUIRED to do so with regard to the XML 1.0
3964 // namespaces specification) we have to at least make sure, that
3965 // the application on top of Expat (that is likely splitting expanded
3966 // element names ("qualified names") of form
3967 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3968 // in its element handler code) cannot be confused by an attacker
3969 // putting additional namespace separator characters into namespace
3970 // declarations. That would be ambiguous and not to be expected.
3971 //
3972 // While the HTML API docs of function XML_ParserCreateNS have been
3973 // advising against use of a namespace separator character that can
3974 // appear in a URI for >20 years now, some widespread applications
3975 // are using URI characters (':' (colon) in particular) for a
3976 // namespace separator, in practice. To keep these applications
3977 // functional, we only reject namespaces URIs containing the
3978 // application-chosen namespace separator if the chosen separator
3979 // is a non-URI character with regard to RFC 3986.
3980 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3981 && ! is_rfc3986_uri_char(uri[len])) {
3982 return XML_ERROR_SYNTAX;
3983 }
3984 }
3985 isXML = isXML && len == xmlLen;
3986 isXMLNS = isXMLNS && len == xmlnsLen;
3987
3988 if (mustBeXML != isXML)
3989 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3990 : XML_ERROR_RESERVED_NAMESPACE_URI;
3991
3992 if (isXMLNS)
3993 return XML_ERROR_RESERVED_NAMESPACE_URI;
3994
3995 if (parser->m_namespaceSeparator)
3996 len++;
3997 if (parser->m_freeBindingList) {
3998 b = parser->m_freeBindingList;
3999 if (len > b->uriAlloc) {
4000 /* Detect and prevent integer overflow */
4001 if (len > INT_MAX - EXPAND_SPARE) {
4002 return XML_ERROR_NO_MEMORY;
4003 }
4004
4005 /* Detect and prevent integer overflow.
4006 * The preprocessor guard addresses the "always false" warning
4007 * from -Wtype-limits on platforms where
4008 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4009 #if UINT_MAX >= SIZE_MAX
4010 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4011 return XML_ERROR_NO_MEMORY;
4012 }
4013 #endif
4014
4015 XML_Char *temp = (XML_Char *)REALLOC(
4016 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4017 if (temp == NULL)
4018 return XML_ERROR_NO_MEMORY;
4019 b->uri = temp;
4020 b->uriAlloc = len + EXPAND_SPARE;
4021 }
4022 parser->m_freeBindingList = b->nextTagBinding;
4023 } else {
4024 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4025 if (! b)
4026 return XML_ERROR_NO_MEMORY;
4027
4028 /* Detect and prevent integer overflow */
4029 if (len > INT_MAX - EXPAND_SPARE) {
4030 return XML_ERROR_NO_MEMORY;
4031 }
4032 /* Detect and prevent integer overflow.
4033 * The preprocessor guard addresses the "always false" warning
4034 * from -Wtype-limits on platforms where
4035 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4036 #if UINT_MAX >= SIZE_MAX
4037 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4038 return XML_ERROR_NO_MEMORY;
4039 }
4040 #endif
4041
4042 b->uri
4043 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4044 if (! b->uri) {
4045 FREE(parser, b);
4046 return XML_ERROR_NO_MEMORY;
4047 }
4048 b->uriAlloc = len + EXPAND_SPARE;
4049 }
4050 b->uriLen = len;
4051 memcpy(b->uri, uri, len * sizeof(XML_Char));
4052 if (parser->m_namespaceSeparator)
4053 b->uri[len - 1] = parser->m_namespaceSeparator;
4054 b->prefix = prefix;
4055 b->attId = attId;
4056 b->prevPrefixBinding = prefix->binding;
4057 /* NULL binding when default namespace undeclared */
4058 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4059 prefix->binding = NULL;
4060 else
4061 prefix->binding = b;
4062 b->nextTagBinding = *bindingsPtr;
4063 *bindingsPtr = b;
4064 /* if attId == NULL then we are not starting a namespace scope */
4065 if (attId && parser->m_startNamespaceDeclHandler)
4066 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4067 prefix->binding ? uri : 0);
4068 return XML_ERROR_NONE;
4069 }
4070
4071 /* The idea here is to avoid using stack for each CDATA section when
4072 the whole file is parsed with one call.
4073 */
4074 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4075 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4076 const char **endPtr) {
4077 enum XML_Error result = doCdataSection(
4078 parser, parser->m_encoding, &start, end, endPtr,
4079 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4080 if (result != XML_ERROR_NONE)
4081 return result;
4082 if (start) {
4083 if (parser->m_parentParser) { /* we are parsing an external entity */
4084 parser->m_processor = externalEntityContentProcessor;
4085 return externalEntityContentProcessor(parser, start, end, endPtr);
4086 } else {
4087 parser->m_processor = contentProcessor;
4088 return contentProcessor(parser, start, end, endPtr);
4089 }
4090 }
4091 return result;
4092 }
4093
4094 /* startPtr gets set to non-null if the section is closed, and to null if
4095 the section is not yet closed.
4096 */
4097 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)4098 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4099 const char *end, const char **nextPtr, XML_Bool haveMore,
4100 enum XML_Account account) {
4101 const char *s = *startPtr;
4102 const char **eventPP;
4103 const char **eventEndPP;
4104 if (enc == parser->m_encoding) {
4105 eventPP = &parser->m_eventPtr;
4106 *eventPP = s;
4107 eventEndPP = &parser->m_eventEndPtr;
4108 } else {
4109 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4110 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4111 }
4112 *eventPP = s;
4113 *startPtr = NULL;
4114
4115 for (;;) {
4116 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4117 int tok = XmlCdataSectionTok(enc, s, end, &next);
4118 #if XML_GE == 1
4119 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4120 accountingOnAbort(parser);
4121 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4122 }
4123 #else
4124 UNUSED_P(account);
4125 #endif
4126 *eventEndPP = next;
4127 switch (tok) {
4128 case XML_TOK_CDATA_SECT_CLOSE:
4129 if (parser->m_endCdataSectionHandler)
4130 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4131 /* BEGIN disabled code */
4132 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4133 else if ((0) && parser->m_characterDataHandler)
4134 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4135 0);
4136 /* END disabled code */
4137 else if (parser->m_defaultHandler)
4138 reportDefault(parser, enc, s, next);
4139 *startPtr = next;
4140 *nextPtr = next;
4141 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4142 return XML_ERROR_ABORTED;
4143 else
4144 return XML_ERROR_NONE;
4145 case XML_TOK_DATA_NEWLINE:
4146 if (parser->m_characterDataHandler) {
4147 XML_Char c = 0xA;
4148 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4149 } else if (parser->m_defaultHandler)
4150 reportDefault(parser, enc, s, next);
4151 break;
4152 case XML_TOK_DATA_CHARS: {
4153 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4154 if (charDataHandler) {
4155 if (MUST_CONVERT(enc, s)) {
4156 for (;;) {
4157 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4158 const enum XML_Convert_Result convert_res = XmlConvert(
4159 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4160 *eventEndPP = next;
4161 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4162 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4163 if ((convert_res == XML_CONVERT_COMPLETED)
4164 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4165 break;
4166 *eventPP = s;
4167 }
4168 } else
4169 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4170 (int)((const XML_Char *)next - (const XML_Char *)s));
4171 } else if (parser->m_defaultHandler)
4172 reportDefault(parser, enc, s, next);
4173 } break;
4174 case XML_TOK_INVALID:
4175 *eventPP = next;
4176 return XML_ERROR_INVALID_TOKEN;
4177 case XML_TOK_PARTIAL_CHAR:
4178 if (haveMore) {
4179 *nextPtr = s;
4180 return XML_ERROR_NONE;
4181 }
4182 return XML_ERROR_PARTIAL_CHAR;
4183 case XML_TOK_PARTIAL:
4184 case XML_TOK_NONE:
4185 if (haveMore) {
4186 *nextPtr = s;
4187 return XML_ERROR_NONE;
4188 }
4189 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4190 default:
4191 /* Every token returned by XmlCdataSectionTok() has its own
4192 * explicit case, so this default case will never be executed.
4193 * We retain it as a safety net and exclude it from the coverage
4194 * statistics.
4195 *
4196 * LCOV_EXCL_START
4197 */
4198 *eventPP = next;
4199 return XML_ERROR_UNEXPECTED_STATE;
4200 /* LCOV_EXCL_STOP */
4201 }
4202
4203 *eventPP = s = next;
4204 switch (parser->m_parsingStatus.parsing) {
4205 case XML_SUSPENDED:
4206 *nextPtr = next;
4207 return XML_ERROR_NONE;
4208 case XML_FINISHED:
4209 return XML_ERROR_ABORTED;
4210 default:;
4211 }
4212 }
4213 /* not reached */
4214 }
4215
4216 #ifdef XML_DTD
4217
4218 /* The idea here is to avoid using stack for each IGNORE section when
4219 the whole file is parsed with one call.
4220 */
4221 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4222 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4223 const char **endPtr) {
4224 enum XML_Error result
4225 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4226 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4227 if (result != XML_ERROR_NONE)
4228 return result;
4229 if (start) {
4230 parser->m_processor = prologProcessor;
4231 return prologProcessor(parser, start, end, endPtr);
4232 }
4233 return result;
4234 }
4235
4236 /* startPtr gets set to non-null is the section is closed, and to null
4237 if the section is not yet closed.
4238 */
4239 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4240 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4241 const char *end, const char **nextPtr, XML_Bool haveMore) {
4242 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4243 int tok;
4244 const char *s = *startPtr;
4245 const char **eventPP;
4246 const char **eventEndPP;
4247 if (enc == parser->m_encoding) {
4248 eventPP = &parser->m_eventPtr;
4249 *eventPP = s;
4250 eventEndPP = &parser->m_eventEndPtr;
4251 } else {
4252 /* It's not entirely clear, but it seems the following two lines
4253 * of code cannot be executed. The only occasions on which 'enc'
4254 * is not 'encoding' are when this function is called
4255 * from the internal entity processing, and IGNORE sections are an
4256 * error in internal entities.
4257 *
4258 * Since it really isn't clear that this is true, we keep the code
4259 * and just remove it from our coverage tests.
4260 *
4261 * LCOV_EXCL_START
4262 */
4263 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4264 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4265 /* LCOV_EXCL_STOP */
4266 }
4267 *eventPP = s;
4268 *startPtr = NULL;
4269 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4270 # if XML_GE == 1
4271 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4272 XML_ACCOUNT_DIRECT)) {
4273 accountingOnAbort(parser);
4274 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4275 }
4276 # endif
4277 *eventEndPP = next;
4278 switch (tok) {
4279 case XML_TOK_IGNORE_SECT:
4280 if (parser->m_defaultHandler)
4281 reportDefault(parser, enc, s, next);
4282 *startPtr = next;
4283 *nextPtr = next;
4284 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4285 return XML_ERROR_ABORTED;
4286 else
4287 return XML_ERROR_NONE;
4288 case XML_TOK_INVALID:
4289 *eventPP = next;
4290 return XML_ERROR_INVALID_TOKEN;
4291 case XML_TOK_PARTIAL_CHAR:
4292 if (haveMore) {
4293 *nextPtr = s;
4294 return XML_ERROR_NONE;
4295 }
4296 return XML_ERROR_PARTIAL_CHAR;
4297 case XML_TOK_PARTIAL:
4298 case XML_TOK_NONE:
4299 if (haveMore) {
4300 *nextPtr = s;
4301 return XML_ERROR_NONE;
4302 }
4303 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4304 default:
4305 /* All of the tokens that XmlIgnoreSectionTok() returns have
4306 * explicit cases to handle them, so this default case is never
4307 * executed. We keep it as a safety net anyway, and remove it
4308 * from our test coverage statistics.
4309 *
4310 * LCOV_EXCL_START
4311 */
4312 *eventPP = next;
4313 return XML_ERROR_UNEXPECTED_STATE;
4314 /* LCOV_EXCL_STOP */
4315 }
4316 /* not reached */
4317 }
4318
4319 #endif /* XML_DTD */
4320
4321 static enum XML_Error
initializeEncoding(XML_Parser parser)4322 initializeEncoding(XML_Parser parser) {
4323 const char *s;
4324 #ifdef XML_UNICODE
4325 char encodingBuf[128];
4326 /* See comments about `protocolEncodingName` in parserInit() */
4327 if (! parser->m_protocolEncodingName)
4328 s = NULL;
4329 else {
4330 int i;
4331 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4332 if (i == sizeof(encodingBuf) - 1
4333 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4334 encodingBuf[0] = '\0';
4335 break;
4336 }
4337 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4338 }
4339 encodingBuf[i] = '\0';
4340 s = encodingBuf;
4341 }
4342 #else
4343 s = parser->m_protocolEncodingName;
4344 #endif
4345 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4346 &parser->m_initEncoding, &parser->m_encoding, s))
4347 return XML_ERROR_NONE;
4348 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4349 }
4350
4351 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4352 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4353 const char *next) {
4354 const char *encodingName = NULL;
4355 const XML_Char *storedEncName = NULL;
4356 const ENCODING *newEncoding = NULL;
4357 const char *version = NULL;
4358 const char *versionend = NULL;
4359 const XML_Char *storedversion = NULL;
4360 int standalone = -1;
4361
4362 #if XML_GE == 1
4363 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4364 XML_ACCOUNT_DIRECT)) {
4365 accountingOnAbort(parser);
4366 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4367 }
4368 #endif
4369
4370 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4371 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4372 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4373 if (isGeneralTextEntity)
4374 return XML_ERROR_TEXT_DECL;
4375 else
4376 return XML_ERROR_XML_DECL;
4377 }
4378 if (! isGeneralTextEntity && standalone == 1) {
4379 parser->m_dtd->standalone = XML_TRUE;
4380 #ifdef XML_DTD
4381 if (parser->m_paramEntityParsing
4382 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4383 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4384 #endif /* XML_DTD */
4385 }
4386 if (parser->m_xmlDeclHandler) {
4387 if (encodingName != NULL) {
4388 storedEncName = poolStoreString(
4389 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4390 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4391 if (! storedEncName)
4392 return XML_ERROR_NO_MEMORY;
4393 poolFinish(&parser->m_temp2Pool);
4394 }
4395 if (version) {
4396 storedversion
4397 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4398 versionend - parser->m_encoding->minBytesPerChar);
4399 if (! storedversion)
4400 return XML_ERROR_NO_MEMORY;
4401 }
4402 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4403 standalone);
4404 } else if (parser->m_defaultHandler)
4405 reportDefault(parser, parser->m_encoding, s, next);
4406 if (parser->m_protocolEncodingName == NULL) {
4407 if (newEncoding) {
4408 /* Check that the specified encoding does not conflict with what
4409 * the parser has already deduced. Do we have the same number
4410 * of bytes in the smallest representation of a character? If
4411 * this is UTF-16, is it the same endianness?
4412 */
4413 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4414 || (newEncoding->minBytesPerChar == 2
4415 && newEncoding != parser->m_encoding)) {
4416 parser->m_eventPtr = encodingName;
4417 return XML_ERROR_INCORRECT_ENCODING;
4418 }
4419 parser->m_encoding = newEncoding;
4420 } else if (encodingName) {
4421 enum XML_Error result;
4422 if (! storedEncName) {
4423 storedEncName = poolStoreString(
4424 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4425 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4426 if (! storedEncName)
4427 return XML_ERROR_NO_MEMORY;
4428 }
4429 result = handleUnknownEncoding(parser, storedEncName);
4430 poolClear(&parser->m_temp2Pool);
4431 if (result == XML_ERROR_UNKNOWN_ENCODING)
4432 parser->m_eventPtr = encodingName;
4433 return result;
4434 }
4435 }
4436
4437 if (storedEncName || storedversion)
4438 poolClear(&parser->m_temp2Pool);
4439
4440 return XML_ERROR_NONE;
4441 }
4442
4443 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4444 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4445 if (parser->m_unknownEncodingHandler) {
4446 XML_Encoding info;
4447 int i;
4448 for (i = 0; i < 256; i++)
4449 info.map[i] = -1;
4450 info.convert = NULL;
4451 info.data = NULL;
4452 info.release = NULL;
4453 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4454 encodingName, &info)) {
4455 ENCODING *enc;
4456 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4457 if (! parser->m_unknownEncodingMem) {
4458 if (info.release)
4459 info.release(info.data);
4460 return XML_ERROR_NO_MEMORY;
4461 }
4462 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4463 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4464 if (enc) {
4465 parser->m_unknownEncodingData = info.data;
4466 parser->m_unknownEncodingRelease = info.release;
4467 parser->m_encoding = enc;
4468 return XML_ERROR_NONE;
4469 }
4470 }
4471 if (info.release != NULL)
4472 info.release(info.data);
4473 }
4474 return XML_ERROR_UNKNOWN_ENCODING;
4475 }
4476
4477 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4478 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4479 const char **nextPtr) {
4480 enum XML_Error result = initializeEncoding(parser);
4481 if (result != XML_ERROR_NONE)
4482 return result;
4483 parser->m_processor = prologProcessor;
4484 return prologProcessor(parser, s, end, nextPtr);
4485 }
4486
4487 #ifdef XML_DTD
4488
4489 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4490 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4491 const char **nextPtr) {
4492 enum XML_Error result = initializeEncoding(parser);
4493 if (result != XML_ERROR_NONE)
4494 return result;
4495
4496 /* we know now that XML_Parse(Buffer) has been called,
4497 so we consider the external parameter entity read */
4498 parser->m_dtd->paramEntityRead = XML_TRUE;
4499
4500 if (parser->m_prologState.inEntityValue) {
4501 parser->m_processor = entityValueInitProcessor;
4502 return entityValueInitProcessor(parser, s, end, nextPtr);
4503 } else {
4504 parser->m_processor = externalParEntProcessor;
4505 return externalParEntProcessor(parser, s, end, nextPtr);
4506 }
4507 }
4508
4509 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4510 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4511 const char **nextPtr) {
4512 int tok;
4513 const char *start = s;
4514 const char *next = start;
4515 parser->m_eventPtr = start;
4516
4517 for (;;) {
4518 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4519 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4520 - storeEntityValue
4521 - processXmlDecl
4522 */
4523 parser->m_eventEndPtr = next;
4524 if (tok <= 0) {
4525 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4526 *nextPtr = s;
4527 return XML_ERROR_NONE;
4528 }
4529 switch (tok) {
4530 case XML_TOK_INVALID:
4531 return XML_ERROR_INVALID_TOKEN;
4532 case XML_TOK_PARTIAL:
4533 return XML_ERROR_UNCLOSED_TOKEN;
4534 case XML_TOK_PARTIAL_CHAR:
4535 return XML_ERROR_PARTIAL_CHAR;
4536 case XML_TOK_NONE: /* start == end */
4537 default:
4538 break;
4539 }
4540 /* found end of entity value - can store it now */
4541 return storeEntityValue(parser, parser->m_encoding, s, end,
4542 XML_ACCOUNT_DIRECT);
4543 } else if (tok == XML_TOK_XML_DECL) {
4544 enum XML_Error result;
4545 result = processXmlDecl(parser, 0, start, next);
4546 if (result != XML_ERROR_NONE)
4547 return result;
4548 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4549 * that to happen, a parameter entity parsing handler must have attempted
4550 * to suspend the parser, which fails and raises an error. The parser can
4551 * be aborted, but can't be suspended.
4552 */
4553 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4554 return XML_ERROR_ABORTED;
4555 *nextPtr = next;
4556 /* stop scanning for text declaration - we found one */
4557 parser->m_processor = entityValueProcessor;
4558 return entityValueProcessor(parser, next, end, nextPtr);
4559 }
4560 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4561 must move s and nextPtr forward to consume the BOM.
4562
4563 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4564 would leave the BOM in the buffer and return. On the next call to this
4565 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4566 is not valid to have multiple BOMs.
4567 */
4568 else if (tok == XML_TOK_BOM) {
4569 # if XML_GE == 1
4570 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4571 XML_ACCOUNT_DIRECT)) {
4572 accountingOnAbort(parser);
4573 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4574 }
4575 # endif
4576
4577 *nextPtr = next;
4578 s = next;
4579 }
4580 /* If we get this token, we have the start of what might be a
4581 normal tag, but not a declaration (i.e. it doesn't begin with
4582 "<!"). In a DTD context, that isn't legal.
4583 */
4584 else if (tok == XML_TOK_INSTANCE_START) {
4585 *nextPtr = next;
4586 return XML_ERROR_SYNTAX;
4587 }
4588 start = next;
4589 parser->m_eventPtr = start;
4590 }
4591 }
4592
4593 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4594 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4595 const char **nextPtr) {
4596 const char *next = s;
4597 int tok;
4598
4599 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4600 if (tok <= 0) {
4601 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4602 *nextPtr = s;
4603 return XML_ERROR_NONE;
4604 }
4605 switch (tok) {
4606 case XML_TOK_INVALID:
4607 return XML_ERROR_INVALID_TOKEN;
4608 case XML_TOK_PARTIAL:
4609 return XML_ERROR_UNCLOSED_TOKEN;
4610 case XML_TOK_PARTIAL_CHAR:
4611 return XML_ERROR_PARTIAL_CHAR;
4612 case XML_TOK_NONE: /* start == end */
4613 default:
4614 break;
4615 }
4616 }
4617 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4618 However, when parsing an external subset, doProlog will not accept a BOM
4619 as valid, and report a syntax error, so we have to skip the BOM, and
4620 account for the BOM bytes.
4621 */
4622 else if (tok == XML_TOK_BOM) {
4623 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4624 XML_ACCOUNT_DIRECT)) {
4625 accountingOnAbort(parser);
4626 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4627 }
4628
4629 s = next;
4630 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4631 }
4632
4633 parser->m_processor = prologProcessor;
4634 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4635 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4636 XML_ACCOUNT_DIRECT);
4637 }
4638
4639 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4640 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4641 const char **nextPtr) {
4642 const char *start = s;
4643 const char *next = s;
4644 const ENCODING *enc = parser->m_encoding;
4645 int tok;
4646
4647 for (;;) {
4648 tok = XmlPrologTok(enc, start, end, &next);
4649 /* Note: These bytes are accounted later in:
4650 - storeEntityValue
4651 */
4652 if (tok <= 0) {
4653 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4654 *nextPtr = s;
4655 return XML_ERROR_NONE;
4656 }
4657 switch (tok) {
4658 case XML_TOK_INVALID:
4659 return XML_ERROR_INVALID_TOKEN;
4660 case XML_TOK_PARTIAL:
4661 return XML_ERROR_UNCLOSED_TOKEN;
4662 case XML_TOK_PARTIAL_CHAR:
4663 return XML_ERROR_PARTIAL_CHAR;
4664 case XML_TOK_NONE: /* start == end */
4665 default:
4666 break;
4667 }
4668 /* found end of entity value - can store it now */
4669 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4670 }
4671 start = next;
4672 }
4673 }
4674
4675 #endif /* XML_DTD */
4676
4677 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4678 prologProcessor(XML_Parser parser, const char *s, const char *end,
4679 const char **nextPtr) {
4680 const char *next = s;
4681 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4682 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4683 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4684 XML_ACCOUNT_DIRECT);
4685 }
4686
4687 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4688 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4689 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4690 XML_Bool allowClosingDoctype, enum XML_Account account) {
4691 #ifdef XML_DTD
4692 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4693 #endif /* XML_DTD */
4694 static const XML_Char atypeCDATA[]
4695 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4696 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4697 static const XML_Char atypeIDREF[]
4698 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4699 static const XML_Char atypeIDREFS[]
4700 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4701 static const XML_Char atypeENTITY[]
4702 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4703 static const XML_Char atypeENTITIES[]
4704 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4705 ASCII_I, ASCII_E, ASCII_S, '\0'};
4706 static const XML_Char atypeNMTOKEN[]
4707 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4708 static const XML_Char atypeNMTOKENS[]
4709 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4710 ASCII_E, ASCII_N, ASCII_S, '\0'};
4711 static const XML_Char notationPrefix[]
4712 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4713 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4714 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4715 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4716
4717 #ifndef XML_DTD
4718 UNUSED_P(account);
4719 #endif
4720
4721 /* save one level of indirection */
4722 DTD *const dtd = parser->m_dtd;
4723
4724 const char **eventPP;
4725 const char **eventEndPP;
4726 enum XML_Content_Quant quant;
4727
4728 if (enc == parser->m_encoding) {
4729 eventPP = &parser->m_eventPtr;
4730 eventEndPP = &parser->m_eventEndPtr;
4731 } else {
4732 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4733 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4734 }
4735
4736 for (;;) {
4737 int role;
4738 XML_Bool handleDefault = XML_TRUE;
4739 *eventPP = s;
4740 *eventEndPP = next;
4741 if (tok <= 0) {
4742 if (haveMore && tok != XML_TOK_INVALID) {
4743 *nextPtr = s;
4744 return XML_ERROR_NONE;
4745 }
4746 switch (tok) {
4747 case XML_TOK_INVALID:
4748 *eventPP = next;
4749 return XML_ERROR_INVALID_TOKEN;
4750 case XML_TOK_PARTIAL:
4751 return XML_ERROR_UNCLOSED_TOKEN;
4752 case XML_TOK_PARTIAL_CHAR:
4753 return XML_ERROR_PARTIAL_CHAR;
4754 case -XML_TOK_PROLOG_S:
4755 tok = -tok;
4756 break;
4757 case XML_TOK_NONE:
4758 #ifdef XML_DTD
4759 /* for internal PE NOT referenced between declarations */
4760 if (enc != parser->m_encoding
4761 && ! parser->m_openInternalEntities->betweenDecl) {
4762 *nextPtr = s;
4763 return XML_ERROR_NONE;
4764 }
4765 /* WFC: PE Between Declarations - must check that PE contains
4766 complete markup, not only for external PEs, but also for
4767 internal PEs if the reference occurs between declarations.
4768 */
4769 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4770 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4771 == XML_ROLE_ERROR)
4772 return XML_ERROR_INCOMPLETE_PE;
4773 *nextPtr = s;
4774 return XML_ERROR_NONE;
4775 }
4776 #endif /* XML_DTD */
4777 return XML_ERROR_NO_ELEMENTS;
4778 default:
4779 tok = -tok;
4780 next = end;
4781 break;
4782 }
4783 }
4784 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4785 #if XML_GE == 1
4786 switch (role) {
4787 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4788 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4789 # ifdef XML_DTD
4790 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4791 # endif
4792 break;
4793 default:
4794 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4795 accountingOnAbort(parser);
4796 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4797 }
4798 }
4799 #endif
4800 switch (role) {
4801 case XML_ROLE_XML_DECL: {
4802 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4803 if (result != XML_ERROR_NONE)
4804 return result;
4805 enc = parser->m_encoding;
4806 handleDefault = XML_FALSE;
4807 } break;
4808 case XML_ROLE_DOCTYPE_NAME:
4809 if (parser->m_startDoctypeDeclHandler) {
4810 parser->m_doctypeName
4811 = poolStoreString(&parser->m_tempPool, enc, s, next);
4812 if (! parser->m_doctypeName)
4813 return XML_ERROR_NO_MEMORY;
4814 poolFinish(&parser->m_tempPool);
4815 parser->m_doctypePubid = NULL;
4816 handleDefault = XML_FALSE;
4817 }
4818 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4819 break;
4820 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4821 if (parser->m_startDoctypeDeclHandler) {
4822 parser->m_startDoctypeDeclHandler(
4823 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4824 parser->m_doctypePubid, 1);
4825 parser->m_doctypeName = NULL;
4826 poolClear(&parser->m_tempPool);
4827 handleDefault = XML_FALSE;
4828 }
4829 break;
4830 #ifdef XML_DTD
4831 case XML_ROLE_TEXT_DECL: {
4832 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4833 if (result != XML_ERROR_NONE)
4834 return result;
4835 enc = parser->m_encoding;
4836 handleDefault = XML_FALSE;
4837 } break;
4838 #endif /* XML_DTD */
4839 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4840 #ifdef XML_DTD
4841 parser->m_useForeignDTD = XML_FALSE;
4842 parser->m_declEntity = (ENTITY *)lookup(
4843 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4844 if (! parser->m_declEntity)
4845 return XML_ERROR_NO_MEMORY;
4846 #endif /* XML_DTD */
4847 dtd->hasParamEntityRefs = XML_TRUE;
4848 if (parser->m_startDoctypeDeclHandler) {
4849 XML_Char *pubId;
4850 if (! XmlIsPublicId(enc, s, next, eventPP))
4851 return XML_ERROR_PUBLICID;
4852 pubId = poolStoreString(&parser->m_tempPool, enc,
4853 s + enc->minBytesPerChar,
4854 next - enc->minBytesPerChar);
4855 if (! pubId)
4856 return XML_ERROR_NO_MEMORY;
4857 normalizePublicId(pubId);
4858 poolFinish(&parser->m_tempPool);
4859 parser->m_doctypePubid = pubId;
4860 handleDefault = XML_FALSE;
4861 goto alreadyChecked;
4862 }
4863 /* fall through */
4864 case XML_ROLE_ENTITY_PUBLIC_ID:
4865 if (! XmlIsPublicId(enc, s, next, eventPP))
4866 return XML_ERROR_PUBLICID;
4867 alreadyChecked:
4868 if (dtd->keepProcessing && parser->m_declEntity) {
4869 XML_Char *tem
4870 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4871 next - enc->minBytesPerChar);
4872 if (! tem)
4873 return XML_ERROR_NO_MEMORY;
4874 normalizePublicId(tem);
4875 parser->m_declEntity->publicId = tem;
4876 poolFinish(&dtd->pool);
4877 /* Don't suppress the default handler if we fell through from
4878 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4879 */
4880 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4881 handleDefault = XML_FALSE;
4882 }
4883 break;
4884 case XML_ROLE_DOCTYPE_CLOSE:
4885 if (allowClosingDoctype != XML_TRUE) {
4886 /* Must not close doctype from within expanded parameter entities */
4887 return XML_ERROR_INVALID_TOKEN;
4888 }
4889
4890 if (parser->m_doctypeName) {
4891 parser->m_startDoctypeDeclHandler(
4892 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4893 parser->m_doctypePubid, 0);
4894 poolClear(&parser->m_tempPool);
4895 handleDefault = XML_FALSE;
4896 }
4897 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4898 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4899 was not set, indicating an external subset
4900 */
4901 #ifdef XML_DTD
4902 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4903 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4904 dtd->hasParamEntityRefs = XML_TRUE;
4905 if (parser->m_paramEntityParsing
4906 && parser->m_externalEntityRefHandler) {
4907 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4908 externalSubsetName, sizeof(ENTITY));
4909 if (! entity) {
4910 /* The external subset name "#" will have already been
4911 * inserted into the hash table at the start of the
4912 * external entity parsing, so no allocation will happen
4913 * and lookup() cannot fail.
4914 */
4915 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4916 }
4917 if (parser->m_useForeignDTD)
4918 entity->base = parser->m_curBase;
4919 dtd->paramEntityRead = XML_FALSE;
4920 if (! parser->m_externalEntityRefHandler(
4921 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4922 entity->systemId, entity->publicId))
4923 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4924 if (dtd->paramEntityRead) {
4925 if (! dtd->standalone && parser->m_notStandaloneHandler
4926 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4927 return XML_ERROR_NOT_STANDALONE;
4928 }
4929 /* if we didn't read the foreign DTD then this means that there
4930 is no external subset and we must reset dtd->hasParamEntityRefs
4931 */
4932 else if (! parser->m_doctypeSysid)
4933 dtd->hasParamEntityRefs = hadParamEntityRefs;
4934 /* end of DTD - no need to update dtd->keepProcessing */
4935 }
4936 parser->m_useForeignDTD = XML_FALSE;
4937 }
4938 #endif /* XML_DTD */
4939 if (parser->m_endDoctypeDeclHandler) {
4940 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4941 handleDefault = XML_FALSE;
4942 }
4943 break;
4944 case XML_ROLE_INSTANCE_START:
4945 #ifdef XML_DTD
4946 /* if there is no DOCTYPE declaration then now is the
4947 last chance to read the foreign DTD
4948 */
4949 if (parser->m_useForeignDTD) {
4950 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4951 dtd->hasParamEntityRefs = XML_TRUE;
4952 if (parser->m_paramEntityParsing
4953 && parser->m_externalEntityRefHandler) {
4954 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4955 externalSubsetName, sizeof(ENTITY));
4956 if (! entity)
4957 return XML_ERROR_NO_MEMORY;
4958 entity->base = parser->m_curBase;
4959 dtd->paramEntityRead = XML_FALSE;
4960 if (! parser->m_externalEntityRefHandler(
4961 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4962 entity->systemId, entity->publicId))
4963 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4964 if (dtd->paramEntityRead) {
4965 if (! dtd->standalone && parser->m_notStandaloneHandler
4966 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4967 return XML_ERROR_NOT_STANDALONE;
4968 }
4969 /* if we didn't read the foreign DTD then this means that there
4970 is no external subset and we must reset dtd->hasParamEntityRefs
4971 */
4972 else
4973 dtd->hasParamEntityRefs = hadParamEntityRefs;
4974 /* end of DTD - no need to update dtd->keepProcessing */
4975 }
4976 }
4977 #endif /* XML_DTD */
4978 parser->m_processor = contentProcessor;
4979 return contentProcessor(parser, s, end, nextPtr);
4980 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4981 parser->m_declElementType = getElementType(parser, enc, s, next);
4982 if (! parser->m_declElementType)
4983 return XML_ERROR_NO_MEMORY;
4984 goto checkAttListDeclHandler;
4985 case XML_ROLE_ATTRIBUTE_NAME:
4986 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4987 if (! parser->m_declAttributeId)
4988 return XML_ERROR_NO_MEMORY;
4989 parser->m_declAttributeIsCdata = XML_FALSE;
4990 parser->m_declAttributeType = NULL;
4991 parser->m_declAttributeIsId = XML_FALSE;
4992 goto checkAttListDeclHandler;
4993 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4994 parser->m_declAttributeIsCdata = XML_TRUE;
4995 parser->m_declAttributeType = atypeCDATA;
4996 goto checkAttListDeclHandler;
4997 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4998 parser->m_declAttributeIsId = XML_TRUE;
4999 parser->m_declAttributeType = atypeID;
5000 goto checkAttListDeclHandler;
5001 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5002 parser->m_declAttributeType = atypeIDREF;
5003 goto checkAttListDeclHandler;
5004 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5005 parser->m_declAttributeType = atypeIDREFS;
5006 goto checkAttListDeclHandler;
5007 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5008 parser->m_declAttributeType = atypeENTITY;
5009 goto checkAttListDeclHandler;
5010 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5011 parser->m_declAttributeType = atypeENTITIES;
5012 goto checkAttListDeclHandler;
5013 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5014 parser->m_declAttributeType = atypeNMTOKEN;
5015 goto checkAttListDeclHandler;
5016 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5017 parser->m_declAttributeType = atypeNMTOKENS;
5018 checkAttListDeclHandler:
5019 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5020 handleDefault = XML_FALSE;
5021 break;
5022 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5023 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5024 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5025 const XML_Char *prefix;
5026 if (parser->m_declAttributeType) {
5027 prefix = enumValueSep;
5028 } else {
5029 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5030 : enumValueStart);
5031 }
5032 if (! poolAppendString(&parser->m_tempPool, prefix))
5033 return XML_ERROR_NO_MEMORY;
5034 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5035 return XML_ERROR_NO_MEMORY;
5036 parser->m_declAttributeType = parser->m_tempPool.start;
5037 handleDefault = XML_FALSE;
5038 }
5039 break;
5040 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5041 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5042 if (dtd->keepProcessing) {
5043 if (! defineAttribute(parser->m_declElementType,
5044 parser->m_declAttributeId,
5045 parser->m_declAttributeIsCdata,
5046 parser->m_declAttributeIsId, 0, parser))
5047 return XML_ERROR_NO_MEMORY;
5048 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5049 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5050 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5051 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5052 /* Enumerated or Notation type */
5053 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5054 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5055 return XML_ERROR_NO_MEMORY;
5056 parser->m_declAttributeType = parser->m_tempPool.start;
5057 poolFinish(&parser->m_tempPool);
5058 }
5059 *eventEndPP = s;
5060 parser->m_attlistDeclHandler(
5061 parser->m_handlerArg, parser->m_declElementType->name,
5062 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5063 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5064 handleDefault = XML_FALSE;
5065 }
5066 }
5067 poolClear(&parser->m_tempPool);
5068 break;
5069 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5070 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5071 if (dtd->keepProcessing) {
5072 const XML_Char *attVal;
5073 enum XML_Error result = storeAttributeValue(
5074 parser, enc, parser->m_declAttributeIsCdata,
5075 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5076 XML_ACCOUNT_NONE);
5077 if (result)
5078 return result;
5079 attVal = poolStart(&dtd->pool);
5080 poolFinish(&dtd->pool);
5081 /* ID attributes aren't allowed to have a default */
5082 if (! defineAttribute(
5083 parser->m_declElementType, parser->m_declAttributeId,
5084 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5085 return XML_ERROR_NO_MEMORY;
5086 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5087 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5088 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5089 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5090 /* Enumerated or Notation type */
5091 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5092 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5093 return XML_ERROR_NO_MEMORY;
5094 parser->m_declAttributeType = parser->m_tempPool.start;
5095 poolFinish(&parser->m_tempPool);
5096 }
5097 *eventEndPP = s;
5098 parser->m_attlistDeclHandler(
5099 parser->m_handlerArg, parser->m_declElementType->name,
5100 parser->m_declAttributeId->name, parser->m_declAttributeType,
5101 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5102 poolClear(&parser->m_tempPool);
5103 handleDefault = XML_FALSE;
5104 }
5105 }
5106 break;
5107 case XML_ROLE_ENTITY_VALUE:
5108 if (dtd->keepProcessing) {
5109 #if XML_GE == 1
5110 // This will store the given replacement text in
5111 // parser->m_declEntity->textPtr.
5112 enum XML_Error result
5113 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5114 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5115 if (parser->m_declEntity) {
5116 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5117 parser->m_declEntity->textLen
5118 = (int)(poolLength(&dtd->entityValuePool));
5119 poolFinish(&dtd->entityValuePool);
5120 if (parser->m_entityDeclHandler) {
5121 *eventEndPP = s;
5122 parser->m_entityDeclHandler(
5123 parser->m_handlerArg, parser->m_declEntity->name,
5124 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5125 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5126 handleDefault = XML_FALSE;
5127 }
5128 } else
5129 poolDiscard(&dtd->entityValuePool);
5130 if (result != XML_ERROR_NONE)
5131 return result;
5132 #else
5133 // This will store "&entity123;" in parser->m_declEntity->textPtr
5134 // to end up as "&entity123;" in the handler.
5135 if (parser->m_declEntity != NULL) {
5136 const enum XML_Error result
5137 = storeSelfEntityValue(parser, parser->m_declEntity);
5138 if (result != XML_ERROR_NONE)
5139 return result;
5140
5141 if (parser->m_entityDeclHandler) {
5142 *eventEndPP = s;
5143 parser->m_entityDeclHandler(
5144 parser->m_handlerArg, parser->m_declEntity->name,
5145 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5146 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5147 handleDefault = XML_FALSE;
5148 }
5149 }
5150 #endif
5151 }
5152 break;
5153 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5154 #ifdef XML_DTD
5155 parser->m_useForeignDTD = XML_FALSE;
5156 #endif /* XML_DTD */
5157 dtd->hasParamEntityRefs = XML_TRUE;
5158 if (parser->m_startDoctypeDeclHandler) {
5159 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5160 s + enc->minBytesPerChar,
5161 next - enc->minBytesPerChar);
5162 if (parser->m_doctypeSysid == NULL)
5163 return XML_ERROR_NO_MEMORY;
5164 poolFinish(&parser->m_tempPool);
5165 handleDefault = XML_FALSE;
5166 }
5167 #ifdef XML_DTD
5168 else
5169 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5170 for the case where no parser->m_startDoctypeDeclHandler is set */
5171 parser->m_doctypeSysid = externalSubsetName;
5172 #endif /* XML_DTD */
5173 if (! dtd->standalone
5174 #ifdef XML_DTD
5175 && ! parser->m_paramEntityParsing
5176 #endif /* XML_DTD */
5177 && parser->m_notStandaloneHandler
5178 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5179 return XML_ERROR_NOT_STANDALONE;
5180 #ifndef XML_DTD
5181 break;
5182 #else /* XML_DTD */
5183 if (! parser->m_declEntity) {
5184 parser->m_declEntity = (ENTITY *)lookup(
5185 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5186 if (! parser->m_declEntity)
5187 return XML_ERROR_NO_MEMORY;
5188 parser->m_declEntity->publicId = NULL;
5189 }
5190 #endif /* XML_DTD */
5191 /* fall through */
5192 case XML_ROLE_ENTITY_SYSTEM_ID:
5193 if (dtd->keepProcessing && parser->m_declEntity) {
5194 parser->m_declEntity->systemId
5195 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5196 next - enc->minBytesPerChar);
5197 if (! parser->m_declEntity->systemId)
5198 return XML_ERROR_NO_MEMORY;
5199 parser->m_declEntity->base = parser->m_curBase;
5200 poolFinish(&dtd->pool);
5201 /* Don't suppress the default handler if we fell through from
5202 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5203 */
5204 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5205 handleDefault = XML_FALSE;
5206 }
5207 break;
5208 case XML_ROLE_ENTITY_COMPLETE:
5209 #if XML_GE == 0
5210 // This will store "&entity123;" in entity->textPtr
5211 // to end up as "&entity123;" in the handler.
5212 if (parser->m_declEntity != NULL) {
5213 const enum XML_Error result
5214 = storeSelfEntityValue(parser, parser->m_declEntity);
5215 if (result != XML_ERROR_NONE)
5216 return result;
5217 }
5218 #endif
5219 if (dtd->keepProcessing && parser->m_declEntity
5220 && parser->m_entityDeclHandler) {
5221 *eventEndPP = s;
5222 parser->m_entityDeclHandler(
5223 parser->m_handlerArg, parser->m_declEntity->name,
5224 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5225 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5226 handleDefault = XML_FALSE;
5227 }
5228 break;
5229 case XML_ROLE_ENTITY_NOTATION_NAME:
5230 if (dtd->keepProcessing && parser->m_declEntity) {
5231 parser->m_declEntity->notation
5232 = poolStoreString(&dtd->pool, enc, s, next);
5233 if (! parser->m_declEntity->notation)
5234 return XML_ERROR_NO_MEMORY;
5235 poolFinish(&dtd->pool);
5236 if (parser->m_unparsedEntityDeclHandler) {
5237 *eventEndPP = s;
5238 parser->m_unparsedEntityDeclHandler(
5239 parser->m_handlerArg, parser->m_declEntity->name,
5240 parser->m_declEntity->base, parser->m_declEntity->systemId,
5241 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5242 handleDefault = XML_FALSE;
5243 } else if (parser->m_entityDeclHandler) {
5244 *eventEndPP = s;
5245 parser->m_entityDeclHandler(
5246 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5247 parser->m_declEntity->base, parser->m_declEntity->systemId,
5248 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5249 handleDefault = XML_FALSE;
5250 }
5251 }
5252 break;
5253 case XML_ROLE_GENERAL_ENTITY_NAME: {
5254 if (XmlPredefinedEntityName(enc, s, next)) {
5255 parser->m_declEntity = NULL;
5256 break;
5257 }
5258 if (dtd->keepProcessing) {
5259 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5260 if (! name)
5261 return XML_ERROR_NO_MEMORY;
5262 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5263 name, sizeof(ENTITY));
5264 if (! parser->m_declEntity)
5265 return XML_ERROR_NO_MEMORY;
5266 if (parser->m_declEntity->name != name) {
5267 poolDiscard(&dtd->pool);
5268 parser->m_declEntity = NULL;
5269 } else {
5270 poolFinish(&dtd->pool);
5271 parser->m_declEntity->publicId = NULL;
5272 parser->m_declEntity->is_param = XML_FALSE;
5273 /* if we have a parent parser or are reading an internal parameter
5274 entity, then the entity declaration is not considered "internal"
5275 */
5276 parser->m_declEntity->is_internal
5277 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5278 if (parser->m_entityDeclHandler)
5279 handleDefault = XML_FALSE;
5280 }
5281 } else {
5282 poolDiscard(&dtd->pool);
5283 parser->m_declEntity = NULL;
5284 }
5285 } break;
5286 case XML_ROLE_PARAM_ENTITY_NAME:
5287 #ifdef XML_DTD
5288 if (dtd->keepProcessing) {
5289 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5290 if (! name)
5291 return XML_ERROR_NO_MEMORY;
5292 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5293 name, sizeof(ENTITY));
5294 if (! parser->m_declEntity)
5295 return XML_ERROR_NO_MEMORY;
5296 if (parser->m_declEntity->name != name) {
5297 poolDiscard(&dtd->pool);
5298 parser->m_declEntity = NULL;
5299 } else {
5300 poolFinish(&dtd->pool);
5301 parser->m_declEntity->publicId = NULL;
5302 parser->m_declEntity->is_param = XML_TRUE;
5303 /* if we have a parent parser or are reading an internal parameter
5304 entity, then the entity declaration is not considered "internal"
5305 */
5306 parser->m_declEntity->is_internal
5307 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5308 if (parser->m_entityDeclHandler)
5309 handleDefault = XML_FALSE;
5310 }
5311 } else {
5312 poolDiscard(&dtd->pool);
5313 parser->m_declEntity = NULL;
5314 }
5315 #else /* not XML_DTD */
5316 parser->m_declEntity = NULL;
5317 #endif /* XML_DTD */
5318 break;
5319 case XML_ROLE_NOTATION_NAME:
5320 parser->m_declNotationPublicId = NULL;
5321 parser->m_declNotationName = NULL;
5322 if (parser->m_notationDeclHandler) {
5323 parser->m_declNotationName
5324 = poolStoreString(&parser->m_tempPool, enc, s, next);
5325 if (! parser->m_declNotationName)
5326 return XML_ERROR_NO_MEMORY;
5327 poolFinish(&parser->m_tempPool);
5328 handleDefault = XML_FALSE;
5329 }
5330 break;
5331 case XML_ROLE_NOTATION_PUBLIC_ID:
5332 if (! XmlIsPublicId(enc, s, next, eventPP))
5333 return XML_ERROR_PUBLICID;
5334 if (parser
5335 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5336 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5337 s + enc->minBytesPerChar,
5338 next - enc->minBytesPerChar);
5339 if (! tem)
5340 return XML_ERROR_NO_MEMORY;
5341 normalizePublicId(tem);
5342 parser->m_declNotationPublicId = tem;
5343 poolFinish(&parser->m_tempPool);
5344 handleDefault = XML_FALSE;
5345 }
5346 break;
5347 case XML_ROLE_NOTATION_SYSTEM_ID:
5348 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5349 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5350 s + enc->minBytesPerChar,
5351 next - enc->minBytesPerChar);
5352 if (! systemId)
5353 return XML_ERROR_NO_MEMORY;
5354 *eventEndPP = s;
5355 parser->m_notationDeclHandler(
5356 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5357 systemId, parser->m_declNotationPublicId);
5358 handleDefault = XML_FALSE;
5359 }
5360 poolClear(&parser->m_tempPool);
5361 break;
5362 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5363 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5364 *eventEndPP = s;
5365 parser->m_notationDeclHandler(
5366 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5367 0, parser->m_declNotationPublicId);
5368 handleDefault = XML_FALSE;
5369 }
5370 poolClear(&parser->m_tempPool);
5371 break;
5372 case XML_ROLE_ERROR:
5373 switch (tok) {
5374 case XML_TOK_PARAM_ENTITY_REF:
5375 /* PE references in internal subset are
5376 not allowed within declarations. */
5377 return XML_ERROR_PARAM_ENTITY_REF;
5378 case XML_TOK_XML_DECL:
5379 return XML_ERROR_MISPLACED_XML_PI;
5380 default:
5381 return XML_ERROR_SYNTAX;
5382 }
5383 #ifdef XML_DTD
5384 case XML_ROLE_IGNORE_SECT: {
5385 enum XML_Error result;
5386 if (parser->m_defaultHandler)
5387 reportDefault(parser, enc, s, next);
5388 handleDefault = XML_FALSE;
5389 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5390 if (result != XML_ERROR_NONE)
5391 return result;
5392 else if (! next) {
5393 parser->m_processor = ignoreSectionProcessor;
5394 return result;
5395 }
5396 } break;
5397 #endif /* XML_DTD */
5398 case XML_ROLE_GROUP_OPEN:
5399 if (parser->m_prologState.level >= parser->m_groupSize) {
5400 if (parser->m_groupSize) {
5401 {
5402 /* Detect and prevent integer overflow */
5403 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5404 return XML_ERROR_NO_MEMORY;
5405 }
5406
5407 char *const new_connector = (char *)REALLOC(
5408 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5409 if (new_connector == NULL) {
5410 parser->m_groupSize /= 2;
5411 return XML_ERROR_NO_MEMORY;
5412 }
5413 parser->m_groupConnector = new_connector;
5414 }
5415
5416 if (dtd->scaffIndex) {
5417 /* Detect and prevent integer overflow.
5418 * The preprocessor guard addresses the "always false" warning
5419 * from -Wtype-limits on platforms where
5420 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5421 #if UINT_MAX >= SIZE_MAX
5422 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5423 return XML_ERROR_NO_MEMORY;
5424 }
5425 #endif
5426
5427 int *const new_scaff_index = (int *)REALLOC(
5428 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5429 if (new_scaff_index == NULL)
5430 return XML_ERROR_NO_MEMORY;
5431 dtd->scaffIndex = new_scaff_index;
5432 }
5433 } else {
5434 parser->m_groupConnector
5435 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5436 if (! parser->m_groupConnector) {
5437 parser->m_groupSize = 0;
5438 return XML_ERROR_NO_MEMORY;
5439 }
5440 }
5441 }
5442 parser->m_groupConnector[parser->m_prologState.level] = 0;
5443 if (dtd->in_eldecl) {
5444 int myindex = nextScaffoldPart(parser);
5445 if (myindex < 0)
5446 return XML_ERROR_NO_MEMORY;
5447 assert(dtd->scaffIndex != NULL);
5448 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5449 dtd->scaffLevel++;
5450 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5451 if (parser->m_elementDeclHandler)
5452 handleDefault = XML_FALSE;
5453 }
5454 break;
5455 case XML_ROLE_GROUP_SEQUENCE:
5456 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5457 return XML_ERROR_SYNTAX;
5458 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5459 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5460 handleDefault = XML_FALSE;
5461 break;
5462 case XML_ROLE_GROUP_CHOICE:
5463 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5464 return XML_ERROR_SYNTAX;
5465 if (dtd->in_eldecl
5466 && ! parser->m_groupConnector[parser->m_prologState.level]
5467 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5468 != XML_CTYPE_MIXED)) {
5469 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5470 = XML_CTYPE_CHOICE;
5471 if (parser->m_elementDeclHandler)
5472 handleDefault = XML_FALSE;
5473 }
5474 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5475 break;
5476 case XML_ROLE_PARAM_ENTITY_REF:
5477 #ifdef XML_DTD
5478 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5479 dtd->hasParamEntityRefs = XML_TRUE;
5480 if (! parser->m_paramEntityParsing)
5481 dtd->keepProcessing = dtd->standalone;
5482 else {
5483 const XML_Char *name;
5484 ENTITY *entity;
5485 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5486 next - enc->minBytesPerChar);
5487 if (! name)
5488 return XML_ERROR_NO_MEMORY;
5489 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5490 poolDiscard(&dtd->pool);
5491 /* first, determine if a check for an existing declaration is needed;
5492 if yes, check that the entity exists, and that it is internal,
5493 otherwise call the skipped entity handler
5494 */
5495 if (parser->m_prologState.documentEntity
5496 && (dtd->standalone ? ! parser->m_openInternalEntities
5497 : ! dtd->hasParamEntityRefs)) {
5498 if (! entity)
5499 return XML_ERROR_UNDEFINED_ENTITY;
5500 else if (! entity->is_internal) {
5501 /* It's hard to exhaustively search the code to be sure,
5502 * but there doesn't seem to be a way of executing the
5503 * following line. There are two cases:
5504 *
5505 * If 'standalone' is false, the DTD must have no
5506 * parameter entities or we wouldn't have passed the outer
5507 * 'if' statement. That means the only entity in the hash
5508 * table is the external subset name "#" which cannot be
5509 * given as a parameter entity name in XML syntax, so the
5510 * lookup must have returned NULL and we don't even reach
5511 * the test for an internal entity.
5512 *
5513 * If 'standalone' is true, it does not seem to be
5514 * possible to create entities taking this code path that
5515 * are not internal entities, so fail the test above.
5516 *
5517 * Because this analysis is very uncertain, the code is
5518 * being left in place and merely removed from the
5519 * coverage test statistics.
5520 */
5521 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5522 }
5523 } else if (! entity) {
5524 dtd->keepProcessing = dtd->standalone;
5525 /* cannot report skipped entities in declarations */
5526 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5527 && parser->m_skippedEntityHandler) {
5528 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5529 handleDefault = XML_FALSE;
5530 }
5531 break;
5532 }
5533 if (entity->open)
5534 return XML_ERROR_RECURSIVE_ENTITY_REF;
5535 if (entity->textPtr) {
5536 enum XML_Error result;
5537 XML_Bool betweenDecl
5538 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5539 result = processInternalEntity(parser, entity, betweenDecl);
5540 if (result != XML_ERROR_NONE)
5541 return result;
5542 handleDefault = XML_FALSE;
5543 break;
5544 }
5545 if (parser->m_externalEntityRefHandler) {
5546 dtd->paramEntityRead = XML_FALSE;
5547 entity->open = XML_TRUE;
5548 entityTrackingOnOpen(parser, entity, __LINE__);
5549 if (! parser->m_externalEntityRefHandler(
5550 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5551 entity->systemId, entity->publicId)) {
5552 entityTrackingOnClose(parser, entity, __LINE__);
5553 entity->open = XML_FALSE;
5554 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5555 }
5556 entityTrackingOnClose(parser, entity, __LINE__);
5557 entity->open = XML_FALSE;
5558 handleDefault = XML_FALSE;
5559 if (! dtd->paramEntityRead) {
5560 dtd->keepProcessing = dtd->standalone;
5561 break;
5562 }
5563 } else {
5564 dtd->keepProcessing = dtd->standalone;
5565 break;
5566 }
5567 }
5568 #endif /* XML_DTD */
5569 if (! dtd->standalone && parser->m_notStandaloneHandler
5570 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5571 return XML_ERROR_NOT_STANDALONE;
5572 break;
5573
5574 /* Element declaration stuff */
5575
5576 case XML_ROLE_ELEMENT_NAME:
5577 if (parser->m_elementDeclHandler) {
5578 parser->m_declElementType = getElementType(parser, enc, s, next);
5579 if (! parser->m_declElementType)
5580 return XML_ERROR_NO_MEMORY;
5581 dtd->scaffLevel = 0;
5582 dtd->scaffCount = 0;
5583 dtd->in_eldecl = XML_TRUE;
5584 handleDefault = XML_FALSE;
5585 }
5586 break;
5587
5588 case XML_ROLE_CONTENT_ANY:
5589 case XML_ROLE_CONTENT_EMPTY:
5590 if (dtd->in_eldecl) {
5591 if (parser->m_elementDeclHandler) {
5592 XML_Content *content
5593 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5594 if (! content)
5595 return XML_ERROR_NO_MEMORY;
5596 content->quant = XML_CQUANT_NONE;
5597 content->name = NULL;
5598 content->numchildren = 0;
5599 content->children = NULL;
5600 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5601 : XML_CTYPE_EMPTY);
5602 *eventEndPP = s;
5603 parser->m_elementDeclHandler(
5604 parser->m_handlerArg, parser->m_declElementType->name, content);
5605 handleDefault = XML_FALSE;
5606 }
5607 dtd->in_eldecl = XML_FALSE;
5608 }
5609 break;
5610
5611 case XML_ROLE_CONTENT_PCDATA:
5612 if (dtd->in_eldecl) {
5613 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5614 = XML_CTYPE_MIXED;
5615 if (parser->m_elementDeclHandler)
5616 handleDefault = XML_FALSE;
5617 }
5618 break;
5619
5620 case XML_ROLE_CONTENT_ELEMENT:
5621 quant = XML_CQUANT_NONE;
5622 goto elementContent;
5623 case XML_ROLE_CONTENT_ELEMENT_OPT:
5624 quant = XML_CQUANT_OPT;
5625 goto elementContent;
5626 case XML_ROLE_CONTENT_ELEMENT_REP:
5627 quant = XML_CQUANT_REP;
5628 goto elementContent;
5629 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5630 quant = XML_CQUANT_PLUS;
5631 elementContent:
5632 if (dtd->in_eldecl) {
5633 ELEMENT_TYPE *el;
5634 const XML_Char *name;
5635 size_t nameLen;
5636 const char *nxt
5637 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5638 int myindex = nextScaffoldPart(parser);
5639 if (myindex < 0)
5640 return XML_ERROR_NO_MEMORY;
5641 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5642 dtd->scaffold[myindex].quant = quant;
5643 el = getElementType(parser, enc, s, nxt);
5644 if (! el)
5645 return XML_ERROR_NO_MEMORY;
5646 name = el->name;
5647 dtd->scaffold[myindex].name = name;
5648 nameLen = 0;
5649 for (; name[nameLen++];)
5650 ;
5651
5652 /* Detect and prevent integer overflow */
5653 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5654 return XML_ERROR_NO_MEMORY;
5655 }
5656
5657 dtd->contentStringLen += (unsigned)nameLen;
5658 if (parser->m_elementDeclHandler)
5659 handleDefault = XML_FALSE;
5660 }
5661 break;
5662
5663 case XML_ROLE_GROUP_CLOSE:
5664 quant = XML_CQUANT_NONE;
5665 goto closeGroup;
5666 case XML_ROLE_GROUP_CLOSE_OPT:
5667 quant = XML_CQUANT_OPT;
5668 goto closeGroup;
5669 case XML_ROLE_GROUP_CLOSE_REP:
5670 quant = XML_CQUANT_REP;
5671 goto closeGroup;
5672 case XML_ROLE_GROUP_CLOSE_PLUS:
5673 quant = XML_CQUANT_PLUS;
5674 closeGroup:
5675 if (dtd->in_eldecl) {
5676 if (parser->m_elementDeclHandler)
5677 handleDefault = XML_FALSE;
5678 dtd->scaffLevel--;
5679 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5680 if (dtd->scaffLevel == 0) {
5681 if (! handleDefault) {
5682 XML_Content *model = build_model(parser);
5683 if (! model)
5684 return XML_ERROR_NO_MEMORY;
5685 *eventEndPP = s;
5686 parser->m_elementDeclHandler(
5687 parser->m_handlerArg, parser->m_declElementType->name, model);
5688 }
5689 dtd->in_eldecl = XML_FALSE;
5690 dtd->contentStringLen = 0;
5691 }
5692 }
5693 break;
5694 /* End element declaration stuff */
5695
5696 case XML_ROLE_PI:
5697 if (! reportProcessingInstruction(parser, enc, s, next))
5698 return XML_ERROR_NO_MEMORY;
5699 handleDefault = XML_FALSE;
5700 break;
5701 case XML_ROLE_COMMENT:
5702 if (! reportComment(parser, enc, s, next))
5703 return XML_ERROR_NO_MEMORY;
5704 handleDefault = XML_FALSE;
5705 break;
5706 case XML_ROLE_NONE:
5707 switch (tok) {
5708 case XML_TOK_BOM:
5709 handleDefault = XML_FALSE;
5710 break;
5711 }
5712 break;
5713 case XML_ROLE_DOCTYPE_NONE:
5714 if (parser->m_startDoctypeDeclHandler)
5715 handleDefault = XML_FALSE;
5716 break;
5717 case XML_ROLE_ENTITY_NONE:
5718 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5719 handleDefault = XML_FALSE;
5720 break;
5721 case XML_ROLE_NOTATION_NONE:
5722 if (parser->m_notationDeclHandler)
5723 handleDefault = XML_FALSE;
5724 break;
5725 case XML_ROLE_ATTLIST_NONE:
5726 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5727 handleDefault = XML_FALSE;
5728 break;
5729 case XML_ROLE_ELEMENT_NONE:
5730 if (parser->m_elementDeclHandler)
5731 handleDefault = XML_FALSE;
5732 break;
5733 } /* end of big switch */
5734
5735 if (handleDefault && parser->m_defaultHandler)
5736 reportDefault(parser, enc, s, next);
5737
5738 switch (parser->m_parsingStatus.parsing) {
5739 case XML_SUSPENDED:
5740 *nextPtr = next;
5741 return XML_ERROR_NONE;
5742 case XML_FINISHED:
5743 return XML_ERROR_ABORTED;
5744 default:
5745 s = next;
5746 tok = XmlPrologTok(enc, s, end, &next);
5747 }
5748 }
5749 /* not reached */
5750 }
5751
5752 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5753 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5754 const char **nextPtr) {
5755 parser->m_processor = epilogProcessor;
5756 parser->m_eventPtr = s;
5757 for (;;) {
5758 const char *next = NULL;
5759 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5760 #if XML_GE == 1
5761 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5762 XML_ACCOUNT_DIRECT)) {
5763 accountingOnAbort(parser);
5764 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5765 }
5766 #endif
5767 parser->m_eventEndPtr = next;
5768 switch (tok) {
5769 /* report partial linebreak - it might be the last token */
5770 case -XML_TOK_PROLOG_S:
5771 if (parser->m_defaultHandler) {
5772 reportDefault(parser, parser->m_encoding, s, next);
5773 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5774 return XML_ERROR_ABORTED;
5775 }
5776 *nextPtr = next;
5777 return XML_ERROR_NONE;
5778 case XML_TOK_NONE:
5779 *nextPtr = s;
5780 return XML_ERROR_NONE;
5781 case XML_TOK_PROLOG_S:
5782 if (parser->m_defaultHandler)
5783 reportDefault(parser, parser->m_encoding, s, next);
5784 break;
5785 case XML_TOK_PI:
5786 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5787 return XML_ERROR_NO_MEMORY;
5788 break;
5789 case XML_TOK_COMMENT:
5790 if (! reportComment(parser, parser->m_encoding, s, next))
5791 return XML_ERROR_NO_MEMORY;
5792 break;
5793 case XML_TOK_INVALID:
5794 parser->m_eventPtr = next;
5795 return XML_ERROR_INVALID_TOKEN;
5796 case XML_TOK_PARTIAL:
5797 if (! parser->m_parsingStatus.finalBuffer) {
5798 *nextPtr = s;
5799 return XML_ERROR_NONE;
5800 }
5801 return XML_ERROR_UNCLOSED_TOKEN;
5802 case XML_TOK_PARTIAL_CHAR:
5803 if (! parser->m_parsingStatus.finalBuffer) {
5804 *nextPtr = s;
5805 return XML_ERROR_NONE;
5806 }
5807 return XML_ERROR_PARTIAL_CHAR;
5808 default:
5809 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5810 }
5811 parser->m_eventPtr = s = next;
5812 switch (parser->m_parsingStatus.parsing) {
5813 case XML_SUSPENDED:
5814 *nextPtr = next;
5815 return XML_ERROR_NONE;
5816 case XML_FINISHED:
5817 return XML_ERROR_ABORTED;
5818 default:;
5819 }
5820 }
5821 }
5822
5823 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5824 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5825 const char *textStart, *textEnd;
5826 const char *next;
5827 enum XML_Error result;
5828 OPEN_INTERNAL_ENTITY *openEntity;
5829
5830 if (parser->m_freeInternalEntities) {
5831 openEntity = parser->m_freeInternalEntities;
5832 parser->m_freeInternalEntities = openEntity->next;
5833 } else {
5834 openEntity
5835 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5836 if (! openEntity)
5837 return XML_ERROR_NO_MEMORY;
5838 }
5839 entity->open = XML_TRUE;
5840 #if XML_GE == 1
5841 entityTrackingOnOpen(parser, entity, __LINE__);
5842 #endif
5843 entity->processed = 0;
5844 openEntity->next = parser->m_openInternalEntities;
5845 parser->m_openInternalEntities = openEntity;
5846 openEntity->entity = entity;
5847 openEntity->startTagLevel = parser->m_tagLevel;
5848 openEntity->betweenDecl = betweenDecl;
5849 openEntity->internalEventPtr = NULL;
5850 openEntity->internalEventEndPtr = NULL;
5851 textStart = (const char *)entity->textPtr;
5852 textEnd = (const char *)(entity->textPtr + entity->textLen);
5853 /* Set a safe default value in case 'next' does not get set */
5854 next = textStart;
5855
5856 if (entity->is_param) {
5857 int tok
5858 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5859 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5860 tok, next, &next, XML_FALSE, XML_FALSE,
5861 XML_ACCOUNT_ENTITY_EXPANSION);
5862 } else {
5863 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5864 textStart, textEnd, &next, XML_FALSE,
5865 XML_ACCOUNT_ENTITY_EXPANSION);
5866 }
5867
5868 if (result == XML_ERROR_NONE) {
5869 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5870 entity->processed = (int)(next - textStart);
5871 parser->m_processor = internalEntityProcessor;
5872 } else if (parser->m_openInternalEntities->entity == entity) {
5873 #if XML_GE == 1
5874 entityTrackingOnClose(parser, entity, __LINE__);
5875 #endif /* XML_GE == 1 */
5876 entity->open = XML_FALSE;
5877 parser->m_openInternalEntities = openEntity->next;
5878 /* put openEntity back in list of free instances */
5879 openEntity->next = parser->m_freeInternalEntities;
5880 parser->m_freeInternalEntities = openEntity;
5881 }
5882 }
5883 return result;
5884 }
5885
5886 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5887 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5888 const char **nextPtr) {
5889 ENTITY *entity;
5890 const char *textStart, *textEnd;
5891 const char *next;
5892 enum XML_Error result;
5893 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5894 if (! openEntity)
5895 return XML_ERROR_UNEXPECTED_STATE;
5896
5897 entity = openEntity->entity;
5898 textStart = ((const char *)entity->textPtr) + entity->processed;
5899 textEnd = (const char *)(entity->textPtr + entity->textLen);
5900 /* Set a safe default value in case 'next' does not get set */
5901 next = textStart;
5902
5903 if (entity->is_param) {
5904 int tok
5905 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5906 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5907 tok, next, &next, XML_FALSE, XML_TRUE,
5908 XML_ACCOUNT_ENTITY_EXPANSION);
5909 } else {
5910 result = doContent(parser, openEntity->startTagLevel,
5911 parser->m_internalEncoding, textStart, textEnd, &next,
5912 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5913 }
5914
5915 if (result != XML_ERROR_NONE)
5916 return result;
5917
5918 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5919 entity->processed = (int)(next - (const char *)entity->textPtr);
5920 return result;
5921 }
5922
5923 #if XML_GE == 1
5924 entityTrackingOnClose(parser, entity, __LINE__);
5925 #endif
5926 entity->open = XML_FALSE;
5927 parser->m_openInternalEntities = openEntity->next;
5928 /* put openEntity back in list of free instances */
5929 openEntity->next = parser->m_freeInternalEntities;
5930 parser->m_freeInternalEntities = openEntity;
5931
5932 // If there are more open entities we want to stop right here and have the
5933 // upcoming call to XML_ResumeParser continue with entity content, or it would
5934 // be ignored altogether.
5935 if (parser->m_openInternalEntities != NULL
5936 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5937 return XML_ERROR_NONE;
5938 }
5939
5940 if (entity->is_param) {
5941 int tok;
5942 parser->m_processor = prologProcessor;
5943 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5944 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5945 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5946 XML_ACCOUNT_DIRECT);
5947 } else {
5948 parser->m_processor = contentProcessor;
5949 /* see externalEntityContentProcessor vs contentProcessor */
5950 result = doContent(parser, parser->m_parentParser ? 1 : 0,
5951 parser->m_encoding, s, end, nextPtr,
5952 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5953 XML_ACCOUNT_DIRECT);
5954 if (result == XML_ERROR_NONE) {
5955 if (! storeRawNames(parser))
5956 return XML_ERROR_NO_MEMORY;
5957 }
5958 return result;
5959 }
5960 }
5961
5962 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5963 errorProcessor(XML_Parser parser, const char *s, const char *end,
5964 const char **nextPtr) {
5965 UNUSED_P(s);
5966 UNUSED_P(end);
5967 UNUSED_P(nextPtr);
5968 return parser->m_errorCode;
5969 }
5970
5971 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5972 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5973 const char *ptr, const char *end, STRING_POOL *pool,
5974 enum XML_Account account) {
5975 enum XML_Error result
5976 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5977 if (result)
5978 return result;
5979 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5980 poolChop(pool);
5981 if (! poolAppendChar(pool, XML_T('\0')))
5982 return XML_ERROR_NO_MEMORY;
5983 return XML_ERROR_NONE;
5984 }
5985
5986 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5987 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5988 const char *ptr, const char *end, STRING_POOL *pool,
5989 enum XML_Account account) {
5990 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5991 #ifndef XML_DTD
5992 UNUSED_P(account);
5993 #endif
5994
5995 for (;;) {
5996 const char *next
5997 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5998 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5999 #if XML_GE == 1
6000 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6001 accountingOnAbort(parser);
6002 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6003 }
6004 #endif
6005 switch (tok) {
6006 case XML_TOK_NONE:
6007 return XML_ERROR_NONE;
6008 case XML_TOK_INVALID:
6009 if (enc == parser->m_encoding)
6010 parser->m_eventPtr = next;
6011 return XML_ERROR_INVALID_TOKEN;
6012 case XML_TOK_PARTIAL:
6013 if (enc == parser->m_encoding)
6014 parser->m_eventPtr = ptr;
6015 return XML_ERROR_INVALID_TOKEN;
6016 case XML_TOK_CHAR_REF: {
6017 XML_Char buf[XML_ENCODE_MAX];
6018 int i;
6019 int n = XmlCharRefNumber(enc, ptr);
6020 if (n < 0) {
6021 if (enc == parser->m_encoding)
6022 parser->m_eventPtr = ptr;
6023 return XML_ERROR_BAD_CHAR_REF;
6024 }
6025 if (! isCdata && n == 0x20 /* space */
6026 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6027 break;
6028 n = XmlEncode(n, (ICHAR *)buf);
6029 /* The XmlEncode() functions can never return 0 here. That
6030 * error return happens if the code point passed in is either
6031 * negative or greater than or equal to 0x110000. The
6032 * XmlCharRefNumber() functions will all return a number
6033 * strictly less than 0x110000 or a negative value if an error
6034 * occurred. The negative value is intercepted above, so
6035 * XmlEncode() is never passed a value it might return an
6036 * error for.
6037 */
6038 for (i = 0; i < n; i++) {
6039 if (! poolAppendChar(pool, buf[i]))
6040 return XML_ERROR_NO_MEMORY;
6041 }
6042 } break;
6043 case XML_TOK_DATA_CHARS:
6044 if (! poolAppend(pool, enc, ptr, next))
6045 return XML_ERROR_NO_MEMORY;
6046 break;
6047 case XML_TOK_TRAILING_CR:
6048 next = ptr + enc->minBytesPerChar;
6049 /* fall through */
6050 case XML_TOK_ATTRIBUTE_VALUE_S:
6051 case XML_TOK_DATA_NEWLINE:
6052 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6053 break;
6054 if (! poolAppendChar(pool, 0x20))
6055 return XML_ERROR_NO_MEMORY;
6056 break;
6057 case XML_TOK_ENTITY_REF: {
6058 const XML_Char *name;
6059 ENTITY *entity;
6060 char checkEntityDecl;
6061 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6062 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6063 if (ch) {
6064 #if XML_GE == 1
6065 /* NOTE: We are replacing 4-6 characters original input for 1 character
6066 * so there is no amplification and hence recording without
6067 * protection. */
6068 accountingDiffTolerated(parser, tok, (char *)&ch,
6069 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6070 XML_ACCOUNT_ENTITY_EXPANSION);
6071 #endif /* XML_GE == 1 */
6072 if (! poolAppendChar(pool, ch))
6073 return XML_ERROR_NO_MEMORY;
6074 break;
6075 }
6076 name = poolStoreString(&parser->m_temp2Pool, enc,
6077 ptr + enc->minBytesPerChar,
6078 next - enc->minBytesPerChar);
6079 if (! name)
6080 return XML_ERROR_NO_MEMORY;
6081 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6082 poolDiscard(&parser->m_temp2Pool);
6083 /* First, determine if a check for an existing declaration is needed;
6084 if yes, check that the entity exists, and that it is internal.
6085 */
6086 if (pool == &dtd->pool) /* are we called from prolog? */
6087 checkEntityDecl =
6088 #ifdef XML_DTD
6089 parser->m_prologState.documentEntity &&
6090 #endif /* XML_DTD */
6091 (dtd->standalone ? ! parser->m_openInternalEntities
6092 : ! dtd->hasParamEntityRefs);
6093 else /* if (pool == &parser->m_tempPool): we are called from content */
6094 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6095 if (checkEntityDecl) {
6096 if (! entity)
6097 return XML_ERROR_UNDEFINED_ENTITY;
6098 else if (! entity->is_internal)
6099 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6100 } else if (! entity) {
6101 /* Cannot report skipped entity here - see comments on
6102 parser->m_skippedEntityHandler.
6103 if (parser->m_skippedEntityHandler)
6104 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6105 */
6106 /* Cannot call the default handler because this would be
6107 out of sync with the call to the startElementHandler.
6108 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6109 reportDefault(parser, enc, ptr, next);
6110 */
6111 break;
6112 }
6113 if (entity->open) {
6114 if (enc == parser->m_encoding) {
6115 /* It does not appear that this line can be executed.
6116 *
6117 * The "if (entity->open)" check catches recursive entity
6118 * definitions. In order to be called with an open
6119 * entity, it must have gone through this code before and
6120 * been through the recursive call to
6121 * appendAttributeValue() some lines below. That call
6122 * sets the local encoding ("enc") to the parser's
6123 * internal encoding (internal_utf8 or internal_utf16),
6124 * which can never be the same as the principle encoding.
6125 * It doesn't appear there is another code path that gets
6126 * here with entity->open being TRUE.
6127 *
6128 * Since it is not certain that this logic is watertight,
6129 * we keep the line and merely exclude it from coverage
6130 * tests.
6131 */
6132 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6133 }
6134 return XML_ERROR_RECURSIVE_ENTITY_REF;
6135 }
6136 if (entity->notation) {
6137 if (enc == parser->m_encoding)
6138 parser->m_eventPtr = ptr;
6139 return XML_ERROR_BINARY_ENTITY_REF;
6140 }
6141 if (! entity->textPtr) {
6142 if (enc == parser->m_encoding)
6143 parser->m_eventPtr = ptr;
6144 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6145 } else {
6146 enum XML_Error result;
6147 const XML_Char *textEnd = entity->textPtr + entity->textLen;
6148 entity->open = XML_TRUE;
6149 #if XML_GE == 1
6150 entityTrackingOnOpen(parser, entity, __LINE__);
6151 #endif
6152 result = appendAttributeValue(parser, parser->m_internalEncoding,
6153 isCdata, (const char *)entity->textPtr,
6154 (const char *)textEnd, pool,
6155 XML_ACCOUNT_ENTITY_EXPANSION);
6156 #if XML_GE == 1
6157 entityTrackingOnClose(parser, entity, __LINE__);
6158 #endif
6159 entity->open = XML_FALSE;
6160 if (result)
6161 return result;
6162 }
6163 } break;
6164 default:
6165 /* The only token returned by XmlAttributeValueTok() that does
6166 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6167 * Getting that would require an entity name to contain an
6168 * incomplete XML character (e.g. \xE2\x82); however previous
6169 * tokenisers will have already recognised and rejected such
6170 * names before XmlAttributeValueTok() gets a look-in. This
6171 * default case should be retained as a safety net, but the code
6172 * excluded from coverage tests.
6173 *
6174 * LCOV_EXCL_START
6175 */
6176 if (enc == parser->m_encoding)
6177 parser->m_eventPtr = ptr;
6178 return XML_ERROR_UNEXPECTED_STATE;
6179 /* LCOV_EXCL_STOP */
6180 }
6181 ptr = next;
6182 }
6183 /* not reached */
6184 }
6185
6186 #if XML_GE == 1
6187 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)6188 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6189 const char *entityTextPtr, const char *entityTextEnd,
6190 enum XML_Account account) {
6191 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6192 STRING_POOL *pool = &(dtd->entityValuePool);
6193 enum XML_Error result = XML_ERROR_NONE;
6194 # ifdef XML_DTD
6195 int oldInEntityValue = parser->m_prologState.inEntityValue;
6196 parser->m_prologState.inEntityValue = 1;
6197 # else
6198 UNUSED_P(account);
6199 # endif /* XML_DTD */
6200 /* never return Null for the value argument in EntityDeclHandler,
6201 since this would indicate an external entity; therefore we
6202 have to make sure that entityValuePool.start is not null */
6203 if (! pool->blocks) {
6204 if (! poolGrow(pool))
6205 return XML_ERROR_NO_MEMORY;
6206 }
6207
6208 for (;;) {
6209 const char *next
6210 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6211 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6212
6213 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6214 account)) {
6215 accountingOnAbort(parser);
6216 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6217 goto endEntityValue;
6218 }
6219
6220 switch (tok) {
6221 case XML_TOK_PARAM_ENTITY_REF:
6222 # ifdef XML_DTD
6223 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6224 const XML_Char *name;
6225 ENTITY *entity;
6226 name = poolStoreString(&parser->m_tempPool, enc,
6227 entityTextPtr + enc->minBytesPerChar,
6228 next - enc->minBytesPerChar);
6229 if (! name) {
6230 result = XML_ERROR_NO_MEMORY;
6231 goto endEntityValue;
6232 }
6233 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6234 poolDiscard(&parser->m_tempPool);
6235 if (! entity) {
6236 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6237 /* cannot report skipped entity here - see comments on
6238 parser->m_skippedEntityHandler
6239 if (parser->m_skippedEntityHandler)
6240 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6241 */
6242 dtd->keepProcessing = dtd->standalone;
6243 goto endEntityValue;
6244 }
6245 if (entity->open || (entity == parser->m_declEntity)) {
6246 if (enc == parser->m_encoding)
6247 parser->m_eventPtr = entityTextPtr;
6248 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6249 goto endEntityValue;
6250 }
6251 if (entity->systemId) {
6252 if (parser->m_externalEntityRefHandler) {
6253 dtd->paramEntityRead = XML_FALSE;
6254 entity->open = XML_TRUE;
6255 entityTrackingOnOpen(parser, entity, __LINE__);
6256 if (! parser->m_externalEntityRefHandler(
6257 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6258 entity->systemId, entity->publicId)) {
6259 entityTrackingOnClose(parser, entity, __LINE__);
6260 entity->open = XML_FALSE;
6261 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6262 goto endEntityValue;
6263 }
6264 entityTrackingOnClose(parser, entity, __LINE__);
6265 entity->open = XML_FALSE;
6266 if (! dtd->paramEntityRead)
6267 dtd->keepProcessing = dtd->standalone;
6268 } else
6269 dtd->keepProcessing = dtd->standalone;
6270 } else {
6271 entity->open = XML_TRUE;
6272 entityTrackingOnOpen(parser, entity, __LINE__);
6273 result = storeEntityValue(
6274 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6275 (const char *)(entity->textPtr + entity->textLen),
6276 XML_ACCOUNT_ENTITY_EXPANSION);
6277 entityTrackingOnClose(parser, entity, __LINE__);
6278 entity->open = XML_FALSE;
6279 if (result)
6280 goto endEntityValue;
6281 }
6282 break;
6283 }
6284 # endif /* XML_DTD */
6285 /* In the internal subset, PE references are not legal
6286 within markup declarations, e.g entity values in this case. */
6287 parser->m_eventPtr = entityTextPtr;
6288 result = XML_ERROR_PARAM_ENTITY_REF;
6289 goto endEntityValue;
6290 case XML_TOK_NONE:
6291 result = XML_ERROR_NONE;
6292 goto endEntityValue;
6293 case XML_TOK_ENTITY_REF:
6294 case XML_TOK_DATA_CHARS:
6295 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6296 result = XML_ERROR_NO_MEMORY;
6297 goto endEntityValue;
6298 }
6299 break;
6300 case XML_TOK_TRAILING_CR:
6301 next = entityTextPtr + enc->minBytesPerChar;
6302 /* fall through */
6303 case XML_TOK_DATA_NEWLINE:
6304 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6305 result = XML_ERROR_NO_MEMORY;
6306 goto endEntityValue;
6307 }
6308 *(pool->ptr)++ = 0xA;
6309 break;
6310 case XML_TOK_CHAR_REF: {
6311 XML_Char buf[XML_ENCODE_MAX];
6312 int i;
6313 int n = XmlCharRefNumber(enc, entityTextPtr);
6314 if (n < 0) {
6315 if (enc == parser->m_encoding)
6316 parser->m_eventPtr = entityTextPtr;
6317 result = XML_ERROR_BAD_CHAR_REF;
6318 goto endEntityValue;
6319 }
6320 n = XmlEncode(n, (ICHAR *)buf);
6321 /* The XmlEncode() functions can never return 0 here. That
6322 * error return happens if the code point passed in is either
6323 * negative or greater than or equal to 0x110000. The
6324 * XmlCharRefNumber() functions will all return a number
6325 * strictly less than 0x110000 or a negative value if an error
6326 * occurred. The negative value is intercepted above, so
6327 * XmlEncode() is never passed a value it might return an
6328 * error for.
6329 */
6330 for (i = 0; i < n; i++) {
6331 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6332 result = XML_ERROR_NO_MEMORY;
6333 goto endEntityValue;
6334 }
6335 *(pool->ptr)++ = buf[i];
6336 }
6337 } break;
6338 case XML_TOK_PARTIAL:
6339 if (enc == parser->m_encoding)
6340 parser->m_eventPtr = entityTextPtr;
6341 result = XML_ERROR_INVALID_TOKEN;
6342 goto endEntityValue;
6343 case XML_TOK_INVALID:
6344 if (enc == parser->m_encoding)
6345 parser->m_eventPtr = next;
6346 result = XML_ERROR_INVALID_TOKEN;
6347 goto endEntityValue;
6348 default:
6349 /* This default case should be unnecessary -- all the tokens
6350 * that XmlEntityValueTok() can return have their own explicit
6351 * cases -- but should be retained for safety. We do however
6352 * exclude it from the coverage statistics.
6353 *
6354 * LCOV_EXCL_START
6355 */
6356 if (enc == parser->m_encoding)
6357 parser->m_eventPtr = entityTextPtr;
6358 result = XML_ERROR_UNEXPECTED_STATE;
6359 goto endEntityValue;
6360 /* LCOV_EXCL_STOP */
6361 }
6362 entityTextPtr = next;
6363 }
6364 endEntityValue:
6365 # ifdef XML_DTD
6366 parser->m_prologState.inEntityValue = oldInEntityValue;
6367 # endif /* XML_DTD */
6368 return result;
6369 }
6370
6371 #else /* XML_GE == 0 */
6372
6373 static enum XML_Error
storeSelfEntityValue(XML_Parser parser,ENTITY * entity)6374 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6375 // This will store "&entity123;" in entity->textPtr
6376 // to end up as "&entity123;" in the handler.
6377 const char *const entity_start = "&";
6378 const char *const entity_end = ";";
6379
6380 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6381 if (! poolAppendString(pool, entity_start)
6382 || ! poolAppendString(pool, entity->name)
6383 || ! poolAppendString(pool, entity_end)) {
6384 poolDiscard(pool);
6385 return XML_ERROR_NO_MEMORY;
6386 }
6387
6388 entity->textPtr = poolStart(pool);
6389 entity->textLen = (int)(poolLength(pool));
6390 poolFinish(pool);
6391
6392 return XML_ERROR_NONE;
6393 }
6394
6395 #endif /* XML_GE == 0 */
6396
6397 static void FASTCALL
normalizeLines(XML_Char * s)6398 normalizeLines(XML_Char *s) {
6399 XML_Char *p;
6400 for (;; s++) {
6401 if (*s == XML_T('\0'))
6402 return;
6403 if (*s == 0xD)
6404 break;
6405 }
6406 p = s;
6407 do {
6408 if (*s == 0xD) {
6409 *p++ = 0xA;
6410 if (*++s == 0xA)
6411 s++;
6412 } else
6413 *p++ = *s++;
6414 } while (*s);
6415 *p = XML_T('\0');
6416 }
6417
6418 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6419 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6420 const char *start, const char *end) {
6421 const XML_Char *target;
6422 XML_Char *data;
6423 const char *tem;
6424 if (! parser->m_processingInstructionHandler) {
6425 if (parser->m_defaultHandler)
6426 reportDefault(parser, enc, start, end);
6427 return 1;
6428 }
6429 start += enc->minBytesPerChar * 2;
6430 tem = start + XmlNameLength(enc, start);
6431 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6432 if (! target)
6433 return 0;
6434 poolFinish(&parser->m_tempPool);
6435 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6436 end - enc->minBytesPerChar * 2);
6437 if (! data)
6438 return 0;
6439 normalizeLines(data);
6440 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6441 poolClear(&parser->m_tempPool);
6442 return 1;
6443 }
6444
6445 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6446 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6447 const char *end) {
6448 XML_Char *data;
6449 if (! parser->m_commentHandler) {
6450 if (parser->m_defaultHandler)
6451 reportDefault(parser, enc, start, end);
6452 return 1;
6453 }
6454 data = poolStoreString(&parser->m_tempPool, enc,
6455 start + enc->minBytesPerChar * 4,
6456 end - enc->minBytesPerChar * 3);
6457 if (! data)
6458 return 0;
6459 normalizeLines(data);
6460 parser->m_commentHandler(parser->m_handlerArg, data);
6461 poolClear(&parser->m_tempPool);
6462 return 1;
6463 }
6464
6465 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6466 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6467 const char *end) {
6468 if (MUST_CONVERT(enc, s)) {
6469 enum XML_Convert_Result convert_res;
6470 const char **eventPP;
6471 const char **eventEndPP;
6472 if (enc == parser->m_encoding) {
6473 eventPP = &parser->m_eventPtr;
6474 eventEndPP = &parser->m_eventEndPtr;
6475 } else {
6476 /* To get here, two things must be true; the parser must be
6477 * using a character encoding that is not the same as the
6478 * encoding passed in, and the encoding passed in must need
6479 * conversion to the internal format (UTF-8 unless XML_UNICODE
6480 * is defined). The only occasions on which the encoding passed
6481 * in is not the same as the parser's encoding are when it is
6482 * the internal encoding (e.g. a previously defined parameter
6483 * entity, already converted to internal format). This by
6484 * definition doesn't need conversion, so the whole branch never
6485 * gets executed.
6486 *
6487 * For safety's sake we don't delete these lines and merely
6488 * exclude them from coverage statistics.
6489 *
6490 * LCOV_EXCL_START
6491 */
6492 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6493 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6494 /* LCOV_EXCL_STOP */
6495 }
6496 do {
6497 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6498 convert_res
6499 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6500 *eventEndPP = s;
6501 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6502 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6503 *eventPP = s;
6504 } while ((convert_res != XML_CONVERT_COMPLETED)
6505 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6506 } else
6507 parser->m_defaultHandler(
6508 parser->m_handlerArg, (const XML_Char *)s,
6509 (int)((const XML_Char *)end - (const XML_Char *)s));
6510 }
6511
6512 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6513 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6514 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6515 DEFAULT_ATTRIBUTE *att;
6516 if (value || isId) {
6517 /* The handling of default attributes gets messed up if we have
6518 a default which duplicates a non-default. */
6519 int i;
6520 for (i = 0; i < type->nDefaultAtts; i++)
6521 if (attId == type->defaultAtts[i].id)
6522 return 1;
6523 if (isId && ! type->idAtt && ! attId->xmlns)
6524 type->idAtt = attId;
6525 }
6526 if (type->nDefaultAtts == type->allocDefaultAtts) {
6527 if (type->allocDefaultAtts == 0) {
6528 type->allocDefaultAtts = 8;
6529 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6530 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6531 if (! type->defaultAtts) {
6532 type->allocDefaultAtts = 0;
6533 return 0;
6534 }
6535 } else {
6536 DEFAULT_ATTRIBUTE *temp;
6537
6538 /* Detect and prevent integer overflow */
6539 if (type->allocDefaultAtts > INT_MAX / 2) {
6540 return 0;
6541 }
6542
6543 int count = type->allocDefaultAtts * 2;
6544
6545 /* Detect and prevent integer overflow.
6546 * The preprocessor guard addresses the "always false" warning
6547 * from -Wtype-limits on platforms where
6548 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6549 #if UINT_MAX >= SIZE_MAX
6550 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6551 return 0;
6552 }
6553 #endif
6554
6555 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6556 (count * sizeof(DEFAULT_ATTRIBUTE)));
6557 if (temp == NULL)
6558 return 0;
6559 type->allocDefaultAtts = count;
6560 type->defaultAtts = temp;
6561 }
6562 }
6563 att = type->defaultAtts + type->nDefaultAtts;
6564 att->id = attId;
6565 att->value = value;
6566 att->isCdata = isCdata;
6567 if (! isCdata)
6568 attId->maybeTokenized = XML_TRUE;
6569 type->nDefaultAtts += 1;
6570 return 1;
6571 }
6572
6573 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6574 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6575 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6576 const XML_Char *name;
6577 for (name = elementType->name; *name; name++) {
6578 if (*name == XML_T(ASCII_COLON)) {
6579 PREFIX *prefix;
6580 const XML_Char *s;
6581 for (s = elementType->name; s != name; s++) {
6582 if (! poolAppendChar(&dtd->pool, *s))
6583 return 0;
6584 }
6585 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6586 return 0;
6587 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6588 sizeof(PREFIX));
6589 if (! prefix)
6590 return 0;
6591 if (prefix->name == poolStart(&dtd->pool))
6592 poolFinish(&dtd->pool);
6593 else
6594 poolDiscard(&dtd->pool);
6595 elementType->prefix = prefix;
6596 break;
6597 }
6598 }
6599 return 1;
6600 }
6601
6602 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6603 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6604 const char *end) {
6605 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6606 ATTRIBUTE_ID *id;
6607 const XML_Char *name;
6608 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6609 return NULL;
6610 name = poolStoreString(&dtd->pool, enc, start, end);
6611 if (! name)
6612 return NULL;
6613 /* skip quotation mark - its storage will be reused (like in name[-1]) */
6614 ++name;
6615 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6616 sizeof(ATTRIBUTE_ID));
6617 if (! id)
6618 return NULL;
6619 if (id->name != name)
6620 poolDiscard(&dtd->pool);
6621 else {
6622 poolFinish(&dtd->pool);
6623 if (! parser->m_ns)
6624 ;
6625 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6626 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6627 && name[4] == XML_T(ASCII_s)
6628 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6629 if (name[5] == XML_T('\0'))
6630 id->prefix = &dtd->defaultPrefix;
6631 else
6632 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6633 sizeof(PREFIX));
6634 id->xmlns = XML_TRUE;
6635 } else {
6636 int i;
6637 for (i = 0; name[i]; i++) {
6638 /* attributes without prefix are *not* in the default namespace */
6639 if (name[i] == XML_T(ASCII_COLON)) {
6640 int j;
6641 for (j = 0; j < i; j++) {
6642 if (! poolAppendChar(&dtd->pool, name[j]))
6643 return NULL;
6644 }
6645 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6646 return NULL;
6647 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6648 poolStart(&dtd->pool), sizeof(PREFIX));
6649 if (! id->prefix)
6650 return NULL;
6651 if (id->prefix->name == poolStart(&dtd->pool))
6652 poolFinish(&dtd->pool);
6653 else
6654 poolDiscard(&dtd->pool);
6655 break;
6656 }
6657 }
6658 }
6659 }
6660 return id;
6661 }
6662
6663 #define CONTEXT_SEP XML_T(ASCII_FF)
6664
6665 static const XML_Char *
getContext(XML_Parser parser)6666 getContext(XML_Parser parser) {
6667 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6668 HASH_TABLE_ITER iter;
6669 XML_Bool needSep = XML_FALSE;
6670
6671 if (dtd->defaultPrefix.binding) {
6672 int i;
6673 int len;
6674 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6675 return NULL;
6676 len = dtd->defaultPrefix.binding->uriLen;
6677 if (parser->m_namespaceSeparator)
6678 len--;
6679 for (i = 0; i < len; i++) {
6680 if (! poolAppendChar(&parser->m_tempPool,
6681 dtd->defaultPrefix.binding->uri[i])) {
6682 /* Because of memory caching, I don't believe this line can be
6683 * executed.
6684 *
6685 * This is part of a loop copying the default prefix binding
6686 * URI into the parser's temporary string pool. Previously,
6687 * that URI was copied into the same string pool, with a
6688 * terminating NUL character, as part of setContext(). When
6689 * the pool was cleared, that leaves a block definitely big
6690 * enough to hold the URI on the free block list of the pool.
6691 * The URI copy in getContext() therefore cannot run out of
6692 * memory.
6693 *
6694 * If the pool is used between the setContext() and
6695 * getContext() calls, the worst it can do is leave a bigger
6696 * block on the front of the free list. Given that this is
6697 * all somewhat inobvious and program logic can be changed, we
6698 * don't delete the line but we do exclude it from the test
6699 * coverage statistics.
6700 */
6701 return NULL; /* LCOV_EXCL_LINE */
6702 }
6703 }
6704 needSep = XML_TRUE;
6705 }
6706
6707 hashTableIterInit(&iter, &(dtd->prefixes));
6708 for (;;) {
6709 int i;
6710 int len;
6711 const XML_Char *s;
6712 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6713 if (! prefix)
6714 break;
6715 if (! prefix->binding) {
6716 /* This test appears to be (justifiable) paranoia. There does
6717 * not seem to be a way of injecting a prefix without a binding
6718 * that doesn't get errored long before this function is called.
6719 * The test should remain for safety's sake, so we instead
6720 * exclude the following line from the coverage statistics.
6721 */
6722 continue; /* LCOV_EXCL_LINE */
6723 }
6724 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6725 return NULL;
6726 for (s = prefix->name; *s; s++)
6727 if (! poolAppendChar(&parser->m_tempPool, *s))
6728 return NULL;
6729 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6730 return NULL;
6731 len = prefix->binding->uriLen;
6732 if (parser->m_namespaceSeparator)
6733 len--;
6734 for (i = 0; i < len; i++)
6735 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6736 return NULL;
6737 needSep = XML_TRUE;
6738 }
6739
6740 hashTableIterInit(&iter, &(dtd->generalEntities));
6741 for (;;) {
6742 const XML_Char *s;
6743 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6744 if (! e)
6745 break;
6746 if (! e->open)
6747 continue;
6748 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6749 return NULL;
6750 for (s = e->name; *s; s++)
6751 if (! poolAppendChar(&parser->m_tempPool, *s))
6752 return 0;
6753 needSep = XML_TRUE;
6754 }
6755
6756 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6757 return NULL;
6758 return parser->m_tempPool.start;
6759 }
6760
6761 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6762 setContext(XML_Parser parser, const XML_Char *context) {
6763 if (context == NULL) {
6764 return XML_FALSE;
6765 }
6766
6767 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6768 const XML_Char *s = context;
6769
6770 while (*context != XML_T('\0')) {
6771 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6772 ENTITY *e;
6773 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6774 return XML_FALSE;
6775 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6776 poolStart(&parser->m_tempPool), 0);
6777 if (e)
6778 e->open = XML_TRUE;
6779 if (*s != XML_T('\0'))
6780 s++;
6781 context = s;
6782 poolDiscard(&parser->m_tempPool);
6783 } else if (*s == XML_T(ASCII_EQUALS)) {
6784 PREFIX *prefix;
6785 if (poolLength(&parser->m_tempPool) == 0)
6786 prefix = &dtd->defaultPrefix;
6787 else {
6788 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6789 return XML_FALSE;
6790 prefix
6791 = (PREFIX *)lookup(parser, &dtd->prefixes,
6792 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6793 if (! prefix)
6794 return XML_FALSE;
6795 if (prefix->name == poolStart(&parser->m_tempPool)) {
6796 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6797 if (! prefix->name)
6798 return XML_FALSE;
6799 }
6800 poolDiscard(&parser->m_tempPool);
6801 }
6802 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6803 context++)
6804 if (! poolAppendChar(&parser->m_tempPool, *context))
6805 return XML_FALSE;
6806 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6807 return XML_FALSE;
6808 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6809 &parser->m_inheritedBindings)
6810 != XML_ERROR_NONE)
6811 return XML_FALSE;
6812 poolDiscard(&parser->m_tempPool);
6813 if (*context != XML_T('\0'))
6814 ++context;
6815 s = context;
6816 } else {
6817 if (! poolAppendChar(&parser->m_tempPool, *s))
6818 return XML_FALSE;
6819 s++;
6820 }
6821 }
6822 return XML_TRUE;
6823 }
6824
6825 static void FASTCALL
normalizePublicId(XML_Char * publicId)6826 normalizePublicId(XML_Char *publicId) {
6827 XML_Char *p = publicId;
6828 XML_Char *s;
6829 for (s = publicId; *s; s++) {
6830 switch (*s) {
6831 case 0x20:
6832 case 0xD:
6833 case 0xA:
6834 if (p != publicId && p[-1] != 0x20)
6835 *p++ = 0x20;
6836 break;
6837 default:
6838 *p++ = *s;
6839 }
6840 }
6841 if (p != publicId && p[-1] == 0x20)
6842 --p;
6843 *p = XML_T('\0');
6844 }
6845
6846 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6847 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6848 DTD *p = ms->malloc_fcn(sizeof(DTD));
6849 if (p == NULL)
6850 return p;
6851 poolInit(&(p->pool), ms);
6852 poolInit(&(p->entityValuePool), ms);
6853 hashTableInit(&(p->generalEntities), ms);
6854 hashTableInit(&(p->elementTypes), ms);
6855 hashTableInit(&(p->attributeIds), ms);
6856 hashTableInit(&(p->prefixes), ms);
6857 #ifdef XML_DTD
6858 p->paramEntityRead = XML_FALSE;
6859 hashTableInit(&(p->paramEntities), ms);
6860 #endif /* XML_DTD */
6861 p->defaultPrefix.name = NULL;
6862 p->defaultPrefix.binding = NULL;
6863
6864 p->in_eldecl = XML_FALSE;
6865 p->scaffIndex = NULL;
6866 p->scaffold = NULL;
6867 p->scaffLevel = 0;
6868 p->scaffSize = 0;
6869 p->scaffCount = 0;
6870 p->contentStringLen = 0;
6871
6872 p->keepProcessing = XML_TRUE;
6873 p->hasParamEntityRefs = XML_FALSE;
6874 p->standalone = XML_FALSE;
6875 return p;
6876 }
6877
6878 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6879 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6880 HASH_TABLE_ITER iter;
6881 hashTableIterInit(&iter, &(p->elementTypes));
6882 for (;;) {
6883 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6884 if (! e)
6885 break;
6886 if (e->allocDefaultAtts != 0)
6887 ms->free_fcn(e->defaultAtts);
6888 }
6889 hashTableClear(&(p->generalEntities));
6890 #ifdef XML_DTD
6891 p->paramEntityRead = XML_FALSE;
6892 hashTableClear(&(p->paramEntities));
6893 #endif /* XML_DTD */
6894 hashTableClear(&(p->elementTypes));
6895 hashTableClear(&(p->attributeIds));
6896 hashTableClear(&(p->prefixes));
6897 poolClear(&(p->pool));
6898 poolClear(&(p->entityValuePool));
6899 p->defaultPrefix.name = NULL;
6900 p->defaultPrefix.binding = NULL;
6901
6902 p->in_eldecl = XML_FALSE;
6903
6904 ms->free_fcn(p->scaffIndex);
6905 p->scaffIndex = NULL;
6906 ms->free_fcn(p->scaffold);
6907 p->scaffold = NULL;
6908
6909 p->scaffLevel = 0;
6910 p->scaffSize = 0;
6911 p->scaffCount = 0;
6912 p->contentStringLen = 0;
6913
6914 p->keepProcessing = XML_TRUE;
6915 p->hasParamEntityRefs = XML_FALSE;
6916 p->standalone = XML_FALSE;
6917 }
6918
6919 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6920 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6921 HASH_TABLE_ITER iter;
6922 hashTableIterInit(&iter, &(p->elementTypes));
6923 for (;;) {
6924 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6925 if (! e)
6926 break;
6927 if (e->allocDefaultAtts != 0)
6928 ms->free_fcn(e->defaultAtts);
6929 }
6930 hashTableDestroy(&(p->generalEntities));
6931 #ifdef XML_DTD
6932 hashTableDestroy(&(p->paramEntities));
6933 #endif /* XML_DTD */
6934 hashTableDestroy(&(p->elementTypes));
6935 hashTableDestroy(&(p->attributeIds));
6936 hashTableDestroy(&(p->prefixes));
6937 poolDestroy(&(p->pool));
6938 poolDestroy(&(p->entityValuePool));
6939 if (isDocEntity) {
6940 ms->free_fcn(p->scaffIndex);
6941 ms->free_fcn(p->scaffold);
6942 }
6943 ms->free_fcn(p);
6944 }
6945
6946 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6947 The new DTD has already been initialized.
6948 */
6949 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6950 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6951 const XML_Memory_Handling_Suite *ms) {
6952 HASH_TABLE_ITER iter;
6953
6954 /* Copy the prefix table. */
6955
6956 hashTableIterInit(&iter, &(oldDtd->prefixes));
6957 for (;;) {
6958 const XML_Char *name;
6959 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6960 if (! oldP)
6961 break;
6962 name = poolCopyString(&(newDtd->pool), oldP->name);
6963 if (! name)
6964 return 0;
6965 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6966 return 0;
6967 }
6968
6969 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6970
6971 /* Copy the attribute id table. */
6972
6973 for (;;) {
6974 ATTRIBUTE_ID *newA;
6975 const XML_Char *name;
6976 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6977
6978 if (! oldA)
6979 break;
6980 /* Remember to allocate the scratch byte before the name. */
6981 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6982 return 0;
6983 name = poolCopyString(&(newDtd->pool), oldA->name);
6984 if (! name)
6985 return 0;
6986 ++name;
6987 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6988 sizeof(ATTRIBUTE_ID));
6989 if (! newA)
6990 return 0;
6991 newA->maybeTokenized = oldA->maybeTokenized;
6992 if (oldA->prefix) {
6993 newA->xmlns = oldA->xmlns;
6994 if (oldA->prefix == &oldDtd->defaultPrefix)
6995 newA->prefix = &newDtd->defaultPrefix;
6996 else
6997 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6998 oldA->prefix->name, 0);
6999 }
7000 }
7001
7002 /* Copy the element type table. */
7003
7004 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7005
7006 for (;;) {
7007 int i;
7008 ELEMENT_TYPE *newE;
7009 const XML_Char *name;
7010 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7011 if (! oldE)
7012 break;
7013 name = poolCopyString(&(newDtd->pool), oldE->name);
7014 if (! name)
7015 return 0;
7016 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7017 sizeof(ELEMENT_TYPE));
7018 if (! newE)
7019 return 0;
7020 if (oldE->nDefaultAtts) {
7021 /* Detect and prevent integer overflow.
7022 * The preprocessor guard addresses the "always false" warning
7023 * from -Wtype-limits on platforms where
7024 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7025 #if UINT_MAX >= SIZE_MAX
7026 if ((size_t)oldE->nDefaultAtts
7027 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7028 return 0;
7029 }
7030 #endif
7031 newE->defaultAtts
7032 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7033 if (! newE->defaultAtts) {
7034 return 0;
7035 }
7036 }
7037 if (oldE->idAtt)
7038 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7039 oldE->idAtt->name, 0);
7040 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7041 if (oldE->prefix)
7042 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7043 oldE->prefix->name, 0);
7044 for (i = 0; i < newE->nDefaultAtts; i++) {
7045 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7046 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7047 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7048 if (oldE->defaultAtts[i].value) {
7049 newE->defaultAtts[i].value
7050 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7051 if (! newE->defaultAtts[i].value)
7052 return 0;
7053 } else
7054 newE->defaultAtts[i].value = NULL;
7055 }
7056 }
7057
7058 /* Copy the entity tables. */
7059 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7060 &(oldDtd->generalEntities)))
7061 return 0;
7062
7063 #ifdef XML_DTD
7064 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7065 &(oldDtd->paramEntities)))
7066 return 0;
7067 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7068 #endif /* XML_DTD */
7069
7070 newDtd->keepProcessing = oldDtd->keepProcessing;
7071 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7072 newDtd->standalone = oldDtd->standalone;
7073
7074 /* Don't want deep copying for scaffolding */
7075 newDtd->in_eldecl = oldDtd->in_eldecl;
7076 newDtd->scaffold = oldDtd->scaffold;
7077 newDtd->contentStringLen = oldDtd->contentStringLen;
7078 newDtd->scaffSize = oldDtd->scaffSize;
7079 newDtd->scaffLevel = oldDtd->scaffLevel;
7080 newDtd->scaffIndex = oldDtd->scaffIndex;
7081
7082 return 1;
7083 } /* End dtdCopy */
7084
7085 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)7086 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7087 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7088 HASH_TABLE_ITER iter;
7089 const XML_Char *cachedOldBase = NULL;
7090 const XML_Char *cachedNewBase = NULL;
7091
7092 hashTableIterInit(&iter, oldTable);
7093
7094 for (;;) {
7095 ENTITY *newE;
7096 const XML_Char *name;
7097 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7098 if (! oldE)
7099 break;
7100 name = poolCopyString(newPool, oldE->name);
7101 if (! name)
7102 return 0;
7103 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7104 if (! newE)
7105 return 0;
7106 if (oldE->systemId) {
7107 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7108 if (! tem)
7109 return 0;
7110 newE->systemId = tem;
7111 if (oldE->base) {
7112 if (oldE->base == cachedOldBase)
7113 newE->base = cachedNewBase;
7114 else {
7115 cachedOldBase = oldE->base;
7116 tem = poolCopyString(newPool, cachedOldBase);
7117 if (! tem)
7118 return 0;
7119 cachedNewBase = newE->base = tem;
7120 }
7121 }
7122 if (oldE->publicId) {
7123 tem = poolCopyString(newPool, oldE->publicId);
7124 if (! tem)
7125 return 0;
7126 newE->publicId = tem;
7127 }
7128 } else {
7129 const XML_Char *tem
7130 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7131 if (! tem)
7132 return 0;
7133 newE->textPtr = tem;
7134 newE->textLen = oldE->textLen;
7135 }
7136 if (oldE->notation) {
7137 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7138 if (! tem)
7139 return 0;
7140 newE->notation = tem;
7141 }
7142 newE->is_param = oldE->is_param;
7143 newE->is_internal = oldE->is_internal;
7144 }
7145 return 1;
7146 }
7147
7148 #define INIT_POWER 6
7149
7150 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)7151 keyeq(KEY s1, KEY s2) {
7152 for (; *s1 == *s2; s1++, s2++)
7153 if (*s1 == 0)
7154 return XML_TRUE;
7155 return XML_FALSE;
7156 }
7157
7158 static size_t
keylen(KEY s)7159 keylen(KEY s) {
7160 size_t len = 0;
7161 for (; *s; s++, len++)
7162 ;
7163 return len;
7164 }
7165
7166 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)7167 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7168 key->k[0] = 0;
7169 key->k[1] = get_hash_secret_salt(parser);
7170 }
7171
7172 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)7173 hash(XML_Parser parser, KEY s) {
7174 struct siphash state;
7175 struct sipkey key;
7176 (void)sip24_valid;
7177 copy_salt_to_sipkey(parser, &key);
7178 sip24_init(&state, &key);
7179 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7180 return (unsigned long)sip24_final(&state);
7181 }
7182
7183 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)7184 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7185 size_t i;
7186 if (table->size == 0) {
7187 size_t tsize;
7188 if (! createSize)
7189 return NULL;
7190 table->power = INIT_POWER;
7191 /* table->size is a power of 2 */
7192 table->size = (size_t)1 << INIT_POWER;
7193 tsize = table->size * sizeof(NAMED *);
7194 table->v = table->mem->malloc_fcn(tsize);
7195 if (! table->v) {
7196 table->size = 0;
7197 return NULL;
7198 }
7199 memset(table->v, 0, tsize);
7200 i = hash(parser, name) & ((unsigned long)table->size - 1);
7201 } else {
7202 unsigned long h = hash(parser, name);
7203 unsigned long mask = (unsigned long)table->size - 1;
7204 unsigned char step = 0;
7205 i = h & mask;
7206 while (table->v[i]) {
7207 if (keyeq(name, table->v[i]->name))
7208 return table->v[i];
7209 if (! step)
7210 step = PROBE_STEP(h, mask, table->power);
7211 i < step ? (i += table->size - step) : (i -= step);
7212 }
7213 if (! createSize)
7214 return NULL;
7215
7216 /* check for overflow (table is half full) */
7217 if (table->used >> (table->power - 1)) {
7218 unsigned char newPower = table->power + 1;
7219
7220 /* Detect and prevent invalid shift */
7221 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7222 return NULL;
7223 }
7224
7225 size_t newSize = (size_t)1 << newPower;
7226 unsigned long newMask = (unsigned long)newSize - 1;
7227
7228 /* Detect and prevent integer overflow */
7229 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7230 return NULL;
7231 }
7232
7233 size_t tsize = newSize * sizeof(NAMED *);
7234 NAMED **newV = table->mem->malloc_fcn(tsize);
7235 if (! newV)
7236 return NULL;
7237 memset(newV, 0, tsize);
7238 for (i = 0; i < table->size; i++)
7239 if (table->v[i]) {
7240 unsigned long newHash = hash(parser, table->v[i]->name);
7241 size_t j = newHash & newMask;
7242 step = 0;
7243 while (newV[j]) {
7244 if (! step)
7245 step = PROBE_STEP(newHash, newMask, newPower);
7246 j < step ? (j += newSize - step) : (j -= step);
7247 }
7248 newV[j] = table->v[i];
7249 }
7250 table->mem->free_fcn(table->v);
7251 table->v = newV;
7252 table->power = newPower;
7253 table->size = newSize;
7254 i = h & newMask;
7255 step = 0;
7256 while (table->v[i]) {
7257 if (! step)
7258 step = PROBE_STEP(h, newMask, newPower);
7259 i < step ? (i += newSize - step) : (i -= step);
7260 }
7261 }
7262 }
7263 table->v[i] = table->mem->malloc_fcn(createSize);
7264 if (! table->v[i])
7265 return NULL;
7266 memset(table->v[i], 0, createSize);
7267 table->v[i]->name = name;
7268 (table->used)++;
7269 return table->v[i];
7270 }
7271
7272 static void FASTCALL
hashTableClear(HASH_TABLE * table)7273 hashTableClear(HASH_TABLE *table) {
7274 size_t i;
7275 for (i = 0; i < table->size; i++) {
7276 table->mem->free_fcn(table->v[i]);
7277 table->v[i] = NULL;
7278 }
7279 table->used = 0;
7280 }
7281
7282 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)7283 hashTableDestroy(HASH_TABLE *table) {
7284 size_t i;
7285 for (i = 0; i < table->size; i++)
7286 table->mem->free_fcn(table->v[i]);
7287 table->mem->free_fcn(table->v);
7288 }
7289
7290 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)7291 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7292 p->power = 0;
7293 p->size = 0;
7294 p->used = 0;
7295 p->v = NULL;
7296 p->mem = ms;
7297 }
7298
7299 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7300 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7301 iter->p = table->v;
7302 iter->end = iter->p ? iter->p + table->size : NULL;
7303 }
7304
7305 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7306 hashTableIterNext(HASH_TABLE_ITER *iter) {
7307 while (iter->p != iter->end) {
7308 NAMED *tem = *(iter->p)++;
7309 if (tem)
7310 return tem;
7311 }
7312 return NULL;
7313 }
7314
7315 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7316 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7317 pool->blocks = NULL;
7318 pool->freeBlocks = NULL;
7319 pool->start = NULL;
7320 pool->ptr = NULL;
7321 pool->end = NULL;
7322 pool->mem = ms;
7323 }
7324
7325 static void FASTCALL
poolClear(STRING_POOL * pool)7326 poolClear(STRING_POOL *pool) {
7327 if (! pool->freeBlocks)
7328 pool->freeBlocks = pool->blocks;
7329 else {
7330 BLOCK *p = pool->blocks;
7331 while (p) {
7332 BLOCK *tem = p->next;
7333 p->next = pool->freeBlocks;
7334 pool->freeBlocks = p;
7335 p = tem;
7336 }
7337 }
7338 pool->blocks = NULL;
7339 pool->start = NULL;
7340 pool->ptr = NULL;
7341 pool->end = NULL;
7342 }
7343
7344 static void FASTCALL
poolDestroy(STRING_POOL * pool)7345 poolDestroy(STRING_POOL *pool) {
7346 BLOCK *p = pool->blocks;
7347 while (p) {
7348 BLOCK *tem = p->next;
7349 pool->mem->free_fcn(p);
7350 p = tem;
7351 }
7352 p = pool->freeBlocks;
7353 while (p) {
7354 BLOCK *tem = p->next;
7355 pool->mem->free_fcn(p);
7356 p = tem;
7357 }
7358 }
7359
7360 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7361 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7362 const char *end) {
7363 if (! pool->ptr && ! poolGrow(pool))
7364 return NULL;
7365 for (;;) {
7366 const enum XML_Convert_Result convert_res = XmlConvert(
7367 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7368 if ((convert_res == XML_CONVERT_COMPLETED)
7369 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7370 break;
7371 if (! poolGrow(pool))
7372 return NULL;
7373 }
7374 return pool->start;
7375 }
7376
7377 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7378 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7379 do {
7380 if (! poolAppendChar(pool, *s))
7381 return NULL;
7382 } while (*s++);
7383 s = pool->start;
7384 poolFinish(pool);
7385 return s;
7386 }
7387
7388 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7389 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7390 if (! pool->ptr && ! poolGrow(pool)) {
7391 /* The following line is unreachable given the current usage of
7392 * poolCopyStringN(). Currently it is called from exactly one
7393 * place to copy the text of a simple general entity. By that
7394 * point, the name of the entity is already stored in the pool, so
7395 * pool->ptr cannot be NULL.
7396 *
7397 * If poolCopyStringN() is used elsewhere as it well might be,
7398 * this line may well become executable again. Regardless, this
7399 * sort of check shouldn't be removed lightly, so we just exclude
7400 * it from the coverage statistics.
7401 */
7402 return NULL; /* LCOV_EXCL_LINE */
7403 }
7404 for (; n > 0; --n, s++) {
7405 if (! poolAppendChar(pool, *s))
7406 return NULL;
7407 }
7408 s = pool->start;
7409 poolFinish(pool);
7410 return s;
7411 }
7412
7413 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7414 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7415 while (*s) {
7416 if (! poolAppendChar(pool, *s))
7417 return NULL;
7418 s++;
7419 }
7420 return pool->start;
7421 }
7422
7423 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7424 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7425 const char *end) {
7426 if (! poolAppend(pool, enc, ptr, end))
7427 return NULL;
7428 if (pool->ptr == pool->end && ! poolGrow(pool))
7429 return NULL;
7430 *(pool->ptr)++ = 0;
7431 return pool->start;
7432 }
7433
7434 static size_t
poolBytesToAllocateFor(int blockSize)7435 poolBytesToAllocateFor(int blockSize) {
7436 /* Unprotected math would be:
7437 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7438 **
7439 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7440 ** For a + b * c we check b * c in isolation first, so that addition of a
7441 ** on top has no chance of making us accept a small non-negative number
7442 */
7443 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7444
7445 if (blockSize <= 0)
7446 return 0;
7447
7448 if (blockSize > (int)(INT_MAX / stretch))
7449 return 0;
7450
7451 {
7452 const int stretchedBlockSize = blockSize * (int)stretch;
7453 const int bytesToAllocate
7454 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7455 if (bytesToAllocate < 0)
7456 return 0;
7457
7458 return (size_t)bytesToAllocate;
7459 }
7460 }
7461
7462 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7463 poolGrow(STRING_POOL *pool) {
7464 if (pool->freeBlocks) {
7465 if (pool->start == 0) {
7466 pool->blocks = pool->freeBlocks;
7467 pool->freeBlocks = pool->freeBlocks->next;
7468 pool->blocks->next = NULL;
7469 pool->start = pool->blocks->s;
7470 pool->end = pool->start + pool->blocks->size;
7471 pool->ptr = pool->start;
7472 return XML_TRUE;
7473 }
7474 if (pool->end - pool->start < pool->freeBlocks->size) {
7475 BLOCK *tem = pool->freeBlocks->next;
7476 pool->freeBlocks->next = pool->blocks;
7477 pool->blocks = pool->freeBlocks;
7478 pool->freeBlocks = tem;
7479 memcpy(pool->blocks->s, pool->start,
7480 (pool->end - pool->start) * sizeof(XML_Char));
7481 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7482 pool->start = pool->blocks->s;
7483 pool->end = pool->start + pool->blocks->size;
7484 return XML_TRUE;
7485 }
7486 }
7487 if (pool->blocks && pool->start == pool->blocks->s) {
7488 BLOCK *temp;
7489 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7490 size_t bytesToAllocate;
7491
7492 /* NOTE: Needs to be calculated prior to calling `realloc`
7493 to avoid dangling pointers: */
7494 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7495
7496 if (blockSize < 0) {
7497 /* This condition traps a situation where either more than
7498 * INT_MAX/2 bytes have already been allocated. This isn't
7499 * readily testable, since it is unlikely that an average
7500 * machine will have that much memory, so we exclude it from the
7501 * coverage statistics.
7502 */
7503 return XML_FALSE; /* LCOV_EXCL_LINE */
7504 }
7505
7506 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7507 if (bytesToAllocate == 0)
7508 return XML_FALSE;
7509
7510 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7511 (unsigned)bytesToAllocate);
7512 if (temp == NULL)
7513 return XML_FALSE;
7514 pool->blocks = temp;
7515 pool->blocks->size = blockSize;
7516 pool->ptr = pool->blocks->s + offsetInsideBlock;
7517 pool->start = pool->blocks->s;
7518 pool->end = pool->start + blockSize;
7519 } else {
7520 BLOCK *tem;
7521 int blockSize = (int)(pool->end - pool->start);
7522 size_t bytesToAllocate;
7523
7524 if (blockSize < 0) {
7525 /* This condition traps a situation where either more than
7526 * INT_MAX bytes have already been allocated (which is prevented
7527 * by various pieces of program logic, not least this one, never
7528 * mind the unlikelihood of actually having that much memory) or
7529 * the pool control fields have been corrupted (which could
7530 * conceivably happen in an extremely buggy user handler
7531 * function). Either way it isn't readily testable, so we
7532 * exclude it from the coverage statistics.
7533 */
7534 return XML_FALSE; /* LCOV_EXCL_LINE */
7535 }
7536
7537 if (blockSize < INIT_BLOCK_SIZE)
7538 blockSize = INIT_BLOCK_SIZE;
7539 else {
7540 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7541 if ((int)((unsigned)blockSize * 2U) < 0) {
7542 return XML_FALSE;
7543 }
7544 blockSize *= 2;
7545 }
7546
7547 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7548 if (bytesToAllocate == 0)
7549 return XML_FALSE;
7550
7551 tem = pool->mem->malloc_fcn(bytesToAllocate);
7552 if (! tem)
7553 return XML_FALSE;
7554 tem->size = blockSize;
7555 tem->next = pool->blocks;
7556 pool->blocks = tem;
7557 if (pool->ptr != pool->start)
7558 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7559 pool->ptr = tem->s + (pool->ptr - pool->start);
7560 pool->start = tem->s;
7561 pool->end = tem->s + blockSize;
7562 }
7563 return XML_TRUE;
7564 }
7565
7566 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7567 nextScaffoldPart(XML_Parser parser) {
7568 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7569 CONTENT_SCAFFOLD *me;
7570 int next;
7571
7572 if (! dtd->scaffIndex) {
7573 /* Detect and prevent integer overflow.
7574 * The preprocessor guard addresses the "always false" warning
7575 * from -Wtype-limits on platforms where
7576 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7577 #if UINT_MAX >= SIZE_MAX
7578 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
7579 return -1;
7580 }
7581 #endif
7582 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7583 if (! dtd->scaffIndex)
7584 return -1;
7585 dtd->scaffIndex[0] = 0;
7586 }
7587
7588 if (dtd->scaffCount >= dtd->scaffSize) {
7589 CONTENT_SCAFFOLD *temp;
7590 if (dtd->scaffold) {
7591 /* Detect and prevent integer overflow */
7592 if (dtd->scaffSize > UINT_MAX / 2u) {
7593 return -1;
7594 }
7595 /* Detect and prevent integer overflow.
7596 * The preprocessor guard addresses the "always false" warning
7597 * from -Wtype-limits on platforms where
7598 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7599 #if UINT_MAX >= SIZE_MAX
7600 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7601 return -1;
7602 }
7603 #endif
7604
7605 temp = (CONTENT_SCAFFOLD *)REALLOC(
7606 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7607 if (temp == NULL)
7608 return -1;
7609 dtd->scaffSize *= 2;
7610 } else {
7611 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7612 * sizeof(CONTENT_SCAFFOLD));
7613 if (temp == NULL)
7614 return -1;
7615 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7616 }
7617 dtd->scaffold = temp;
7618 }
7619 next = dtd->scaffCount++;
7620 me = &dtd->scaffold[next];
7621 if (dtd->scaffLevel) {
7622 CONTENT_SCAFFOLD *parent
7623 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7624 if (parent->lastchild) {
7625 dtd->scaffold[parent->lastchild].nextsib = next;
7626 }
7627 if (! parent->childcnt)
7628 parent->firstchild = next;
7629 parent->lastchild = next;
7630 parent->childcnt++;
7631 }
7632 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7633 return next;
7634 }
7635
7636 static XML_Content *
build_model(XML_Parser parser)7637 build_model(XML_Parser parser) {
7638 /* Function build_model transforms the existing parser->m_dtd->scaffold
7639 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7640 * XML_Content tree nodes followed by a gapless list of zero-terminated
7641 * strings. */
7642 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7643 XML_Content *ret;
7644 XML_Char *str; /* the current string writing location */
7645
7646 /* Detect and prevent integer overflow.
7647 * The preprocessor guard addresses the "always false" warning
7648 * from -Wtype-limits on platforms where
7649 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7650 #if UINT_MAX >= SIZE_MAX
7651 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7652 return NULL;
7653 }
7654 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7655 return NULL;
7656 }
7657 #endif
7658 if (dtd->scaffCount * sizeof(XML_Content)
7659 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7660 return NULL;
7661 }
7662
7663 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7664 + (dtd->contentStringLen * sizeof(XML_Char)));
7665
7666 ret = (XML_Content *)MALLOC(parser, allocsize);
7667 if (! ret)
7668 return NULL;
7669
7670 /* What follows is an iterative implementation (of what was previously done
7671 * recursively in a dedicated function called "build_node". The old recursive
7672 * build_node could be forced into stack exhaustion from input as small as a
7673 * few megabyte, and so that was a security issue. Hence, a function call
7674 * stack is avoided now by resolving recursion.)
7675 *
7676 * The iterative approach works as follows:
7677 *
7678 * - We have two writing pointers, both walking up the result array; one does
7679 * the work, the other creates "jobs" for its colleague to do, and leads
7680 * the way:
7681 *
7682 * - The faster one, pointer jobDest, always leads and writes "what job
7683 * to do" by the other, once they reach that place in the
7684 * array: leader "jobDest" stores the source node array index (relative
7685 * to array dtd->scaffold) in field "numchildren".
7686 *
7687 * - The slower one, pointer dest, looks at the value stored in the
7688 * "numchildren" field (which actually holds a source node array index
7689 * at that time) and puts the real data from dtd->scaffold in.
7690 *
7691 * - Before the loop starts, jobDest writes source array index 0
7692 * (where the root node is located) so that dest will have something to do
7693 * when it starts operation.
7694 *
7695 * - Whenever nodes with children are encountered, jobDest appends
7696 * them as new jobs, in order. As a result, tree node siblings are
7697 * adjacent in the resulting array, for example:
7698 *
7699 * [0] root, has two children
7700 * [1] first child of 0, has three children
7701 * [3] first child of 1, does not have children
7702 * [4] second child of 1, does not have children
7703 * [5] third child of 1, does not have children
7704 * [2] second child of 0, does not have children
7705 *
7706 * Or (the same data) presented in flat array view:
7707 *
7708 * [0] root, has two children
7709 *
7710 * [1] first child of 0, has three children
7711 * [2] second child of 0, does not have children
7712 *
7713 * [3] first child of 1, does not have children
7714 * [4] second child of 1, does not have children
7715 * [5] third child of 1, does not have children
7716 *
7717 * - The algorithm repeats until all target array indices have been processed.
7718 */
7719 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7720 XML_Content *const destLimit = &ret[dtd->scaffCount];
7721 XML_Content *jobDest = ret; /* next free writing location in target array */
7722 str = (XML_Char *)&ret[dtd->scaffCount];
7723
7724 /* Add the starting job, the root node (index 0) of the source tree */
7725 (jobDest++)->numchildren = 0;
7726
7727 for (; dest < destLimit; dest++) {
7728 /* Retrieve source tree array index from job storage */
7729 const int src_node = (int)dest->numchildren;
7730
7731 /* Convert item */
7732 dest->type = dtd->scaffold[src_node].type;
7733 dest->quant = dtd->scaffold[src_node].quant;
7734 if (dest->type == XML_CTYPE_NAME) {
7735 const XML_Char *src;
7736 dest->name = str;
7737 src = dtd->scaffold[src_node].name;
7738 for (;;) {
7739 *str++ = *src;
7740 if (! *src)
7741 break;
7742 src++;
7743 }
7744 dest->numchildren = 0;
7745 dest->children = NULL;
7746 } else {
7747 unsigned int i;
7748 int cn;
7749 dest->name = NULL;
7750 dest->numchildren = dtd->scaffold[src_node].childcnt;
7751 dest->children = jobDest;
7752
7753 /* Append scaffold indices of children to array */
7754 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7755 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7756 (jobDest++)->numchildren = (unsigned int)cn;
7757 }
7758 }
7759
7760 return ret;
7761 }
7762
7763 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7764 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7765 const char *end) {
7766 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7767 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7768 ELEMENT_TYPE *ret;
7769
7770 if (! name)
7771 return NULL;
7772 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7773 sizeof(ELEMENT_TYPE));
7774 if (! ret)
7775 return NULL;
7776 if (ret->name != name)
7777 poolDiscard(&dtd->pool);
7778 else {
7779 poolFinish(&dtd->pool);
7780 if (! setElementTypePrefix(parser, ret))
7781 return NULL;
7782 }
7783 return ret;
7784 }
7785
7786 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7787 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7788 size_t charsRequired = 0;
7789 XML_Char *result;
7790
7791 /* First determine how long the string is */
7792 while (s[charsRequired] != 0) {
7793 charsRequired++;
7794 }
7795 /* Include the terminator */
7796 charsRequired++;
7797
7798 /* Now allocate space for the copy */
7799 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7800 if (result == NULL)
7801 return NULL;
7802 /* Copy the original into place */
7803 memcpy(result, s, charsRequired * sizeof(XML_Char));
7804 return result;
7805 }
7806
7807 #if XML_GE == 1
7808
7809 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7810 accountingGetCurrentAmplification(XML_Parser rootParser) {
7811 // 1.........1.........12 => 22
7812 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
7813 const XmlBigCount countBytesOutput
7814 = rootParser->m_accounting.countBytesDirect
7815 + rootParser->m_accounting.countBytesIndirect;
7816 const float amplificationFactor
7817 = rootParser->m_accounting.countBytesDirect
7818 ? (countBytesOutput
7819 / (float)(rootParser->m_accounting.countBytesDirect))
7820 : ((lenOfShortestInclude
7821 + rootParser->m_accounting.countBytesIndirect)
7822 / (float)lenOfShortestInclude);
7823 assert(! rootParser->m_parentParser);
7824 return amplificationFactor;
7825 }
7826
7827 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7828 accountingReportStats(XML_Parser originParser, const char *epilog) {
7829 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7830 assert(! rootParser->m_parentParser);
7831
7832 if (rootParser->m_accounting.debugLevel == 0u) {
7833 return;
7834 }
7835
7836 const float amplificationFactor
7837 = accountingGetCurrentAmplification(rootParser);
7838 fprintf(stderr,
7839 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7840 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7841 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7842 rootParser->m_accounting.countBytesIndirect,
7843 (double)amplificationFactor, epilog);
7844 }
7845
7846 static void
accountingOnAbort(XML_Parser originParser)7847 accountingOnAbort(XML_Parser originParser) {
7848 accountingReportStats(originParser, " ABORTING\n");
7849 }
7850
7851 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7852 accountingReportDiff(XML_Parser rootParser,
7853 unsigned int levelsAwayFromRootParser, const char *before,
7854 const char *after, ptrdiff_t bytesMore, int source_line,
7855 enum XML_Account account) {
7856 assert(! rootParser->m_parentParser);
7857
7858 fprintf(stderr,
7859 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7860 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7861 levelsAwayFromRootParser, source_line, 10, "");
7862
7863 const char ellipis[] = "[..]";
7864 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7865 const unsigned int contextLength = 10;
7866
7867 /* Note: Performance is of no concern here */
7868 const char *walker = before;
7869 if ((rootParser->m_accounting.debugLevel >= 3u)
7870 || (after - before)
7871 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7872 for (; walker < after; walker++) {
7873 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7874 }
7875 } else {
7876 for (; walker < before + contextLength; walker++) {
7877 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7878 }
7879 fprintf(stderr, ellipis);
7880 walker = after - contextLength;
7881 for (; walker < after; walker++) {
7882 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7883 }
7884 }
7885 fprintf(stderr, "\"\n");
7886 }
7887
7888 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7889 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7890 const char *after, int source_line,
7891 enum XML_Account account) {
7892 /* Note: We need to check the token type *first* to be sure that
7893 * we can even access variable <after>, safely.
7894 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7895 switch (tok) {
7896 case XML_TOK_INVALID:
7897 case XML_TOK_PARTIAL:
7898 case XML_TOK_PARTIAL_CHAR:
7899 case XML_TOK_NONE:
7900 return XML_TRUE;
7901 }
7902
7903 if (account == XML_ACCOUNT_NONE)
7904 return XML_TRUE; /* because these bytes have been accounted for, already */
7905
7906 unsigned int levelsAwayFromRootParser;
7907 const XML_Parser rootParser
7908 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7909 assert(! rootParser->m_parentParser);
7910
7911 const int isDirect
7912 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7913 const ptrdiff_t bytesMore = after - before;
7914
7915 XmlBigCount *const additionTarget
7916 = isDirect ? &rootParser->m_accounting.countBytesDirect
7917 : &rootParser->m_accounting.countBytesIndirect;
7918
7919 /* Detect and avoid integer overflow */
7920 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7921 return XML_FALSE;
7922 *additionTarget += bytesMore;
7923
7924 const XmlBigCount countBytesOutput
7925 = rootParser->m_accounting.countBytesDirect
7926 + rootParser->m_accounting.countBytesIndirect;
7927 const float amplificationFactor
7928 = accountingGetCurrentAmplification(rootParser);
7929 const XML_Bool tolerated
7930 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7931 || (amplificationFactor
7932 <= rootParser->m_accounting.maximumAmplificationFactor);
7933
7934 if (rootParser->m_accounting.debugLevel >= 2u) {
7935 accountingReportStats(rootParser, "");
7936 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7937 bytesMore, source_line, account);
7938 }
7939
7940 return tolerated;
7941 }
7942
7943 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7944 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7945 if (! parser)
7946 return 0;
7947 return parser->m_accounting.countBytesDirect;
7948 }
7949
7950 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7951 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7952 if (! parser)
7953 return 0;
7954 return parser->m_accounting.countBytesIndirect;
7955 }
7956
7957 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7958 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7959 const char *action, int sourceLine) {
7960 assert(! rootParser->m_parentParser);
7961 if (rootParser->m_entity_stats.debugLevel == 0u)
7962 return;
7963
7964 # if defined(XML_UNICODE)
7965 const char *const entityName = "[..]";
7966 # else
7967 const char *const entityName = entity->name;
7968 # endif
7969
7970 fprintf(
7971 stderr,
7972 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7973 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7974 rootParser->m_entity_stats.currentDepth,
7975 rootParser->m_entity_stats.maximumDepthSeen,
7976 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7977 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7978 sourceLine);
7979 }
7980
7981 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7982 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7983 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7984 assert(! rootParser->m_parentParser);
7985
7986 rootParser->m_entity_stats.countEverOpened++;
7987 rootParser->m_entity_stats.currentDepth++;
7988 if (rootParser->m_entity_stats.currentDepth
7989 > rootParser->m_entity_stats.maximumDepthSeen) {
7990 rootParser->m_entity_stats.maximumDepthSeen++;
7991 }
7992
7993 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7994 }
7995
7996 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7997 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7998 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7999 assert(! rootParser->m_parentParser);
8000
8001 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8002 rootParser->m_entity_stats.currentDepth--;
8003 }
8004
8005 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)8006 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8007 XML_Parser rootParser = parser;
8008 unsigned int stepsTakenUpwards = 0;
8009 while (rootParser->m_parentParser) {
8010 rootParser = rootParser->m_parentParser;
8011 stepsTakenUpwards++;
8012 }
8013 assert(! rootParser->m_parentParser);
8014 if (outLevelDiff != NULL) {
8015 *outLevelDiff = stepsTakenUpwards;
8016 }
8017 return rootParser;
8018 }
8019
8020 const char *
unsignedCharToPrintable(unsigned char c)8021 unsignedCharToPrintable(unsigned char c) {
8022 switch (c) {
8023 case 0:
8024 return "\\0";
8025 case 1:
8026 return "\\x1";
8027 case 2:
8028 return "\\x2";
8029 case 3:
8030 return "\\x3";
8031 case 4:
8032 return "\\x4";
8033 case 5:
8034 return "\\x5";
8035 case 6:
8036 return "\\x6";
8037 case 7:
8038 return "\\x7";
8039 case 8:
8040 return "\\x8";
8041 case 9:
8042 return "\\t";
8043 case 10:
8044 return "\\n";
8045 case 11:
8046 return "\\xB";
8047 case 12:
8048 return "\\xC";
8049 case 13:
8050 return "\\r";
8051 case 14:
8052 return "\\xE";
8053 case 15:
8054 return "\\xF";
8055 case 16:
8056 return "\\x10";
8057 case 17:
8058 return "\\x11";
8059 case 18:
8060 return "\\x12";
8061 case 19:
8062 return "\\x13";
8063 case 20:
8064 return "\\x14";
8065 case 21:
8066 return "\\x15";
8067 case 22:
8068 return "\\x16";
8069 case 23:
8070 return "\\x17";
8071 case 24:
8072 return "\\x18";
8073 case 25:
8074 return "\\x19";
8075 case 26:
8076 return "\\x1A";
8077 case 27:
8078 return "\\x1B";
8079 case 28:
8080 return "\\x1C";
8081 case 29:
8082 return "\\x1D";
8083 case 30:
8084 return "\\x1E";
8085 case 31:
8086 return "\\x1F";
8087 case 32:
8088 return " ";
8089 case 33:
8090 return "!";
8091 case 34:
8092 return "\\\"";
8093 case 35:
8094 return "#";
8095 case 36:
8096 return "$";
8097 case 37:
8098 return "%";
8099 case 38:
8100 return "&";
8101 case 39:
8102 return "'";
8103 case 40:
8104 return "(";
8105 case 41:
8106 return ")";
8107 case 42:
8108 return "*";
8109 case 43:
8110 return "+";
8111 case 44:
8112 return ",";
8113 case 45:
8114 return "-";
8115 case 46:
8116 return ".";
8117 case 47:
8118 return "/";
8119 case 48:
8120 return "0";
8121 case 49:
8122 return "1";
8123 case 50:
8124 return "2";
8125 case 51:
8126 return "3";
8127 case 52:
8128 return "4";
8129 case 53:
8130 return "5";
8131 case 54:
8132 return "6";
8133 case 55:
8134 return "7";
8135 case 56:
8136 return "8";
8137 case 57:
8138 return "9";
8139 case 58:
8140 return ":";
8141 case 59:
8142 return ";";
8143 case 60:
8144 return "<";
8145 case 61:
8146 return "=";
8147 case 62:
8148 return ">";
8149 case 63:
8150 return "?";
8151 case 64:
8152 return "@";
8153 case 65:
8154 return "A";
8155 case 66:
8156 return "B";
8157 case 67:
8158 return "C";
8159 case 68:
8160 return "D";
8161 case 69:
8162 return "E";
8163 case 70:
8164 return "F";
8165 case 71:
8166 return "G";
8167 case 72:
8168 return "H";
8169 case 73:
8170 return "I";
8171 case 74:
8172 return "J";
8173 case 75:
8174 return "K";
8175 case 76:
8176 return "L";
8177 case 77:
8178 return "M";
8179 case 78:
8180 return "N";
8181 case 79:
8182 return "O";
8183 case 80:
8184 return "P";
8185 case 81:
8186 return "Q";
8187 case 82:
8188 return "R";
8189 case 83:
8190 return "S";
8191 case 84:
8192 return "T";
8193 case 85:
8194 return "U";
8195 case 86:
8196 return "V";
8197 case 87:
8198 return "W";
8199 case 88:
8200 return "X";
8201 case 89:
8202 return "Y";
8203 case 90:
8204 return "Z";
8205 case 91:
8206 return "[";
8207 case 92:
8208 return "\\\\";
8209 case 93:
8210 return "]";
8211 case 94:
8212 return "^";
8213 case 95:
8214 return "_";
8215 case 96:
8216 return "`";
8217 case 97:
8218 return "a";
8219 case 98:
8220 return "b";
8221 case 99:
8222 return "c";
8223 case 100:
8224 return "d";
8225 case 101:
8226 return "e";
8227 case 102:
8228 return "f";
8229 case 103:
8230 return "g";
8231 case 104:
8232 return "h";
8233 case 105:
8234 return "i";
8235 case 106:
8236 return "j";
8237 case 107:
8238 return "k";
8239 case 108:
8240 return "l";
8241 case 109:
8242 return "m";
8243 case 110:
8244 return "n";
8245 case 111:
8246 return "o";
8247 case 112:
8248 return "p";
8249 case 113:
8250 return "q";
8251 case 114:
8252 return "r";
8253 case 115:
8254 return "s";
8255 case 116:
8256 return "t";
8257 case 117:
8258 return "u";
8259 case 118:
8260 return "v";
8261 case 119:
8262 return "w";
8263 case 120:
8264 return "x";
8265 case 121:
8266 return "y";
8267 case 122:
8268 return "z";
8269 case 123:
8270 return "{";
8271 case 124:
8272 return "|";
8273 case 125:
8274 return "}";
8275 case 126:
8276 return "~";
8277 case 127:
8278 return "\\x7F";
8279 case 128:
8280 return "\\x80";
8281 case 129:
8282 return "\\x81";
8283 case 130:
8284 return "\\x82";
8285 case 131:
8286 return "\\x83";
8287 case 132:
8288 return "\\x84";
8289 case 133:
8290 return "\\x85";
8291 case 134:
8292 return "\\x86";
8293 case 135:
8294 return "\\x87";
8295 case 136:
8296 return "\\x88";
8297 case 137:
8298 return "\\x89";
8299 case 138:
8300 return "\\x8A";
8301 case 139:
8302 return "\\x8B";
8303 case 140:
8304 return "\\x8C";
8305 case 141:
8306 return "\\x8D";
8307 case 142:
8308 return "\\x8E";
8309 case 143:
8310 return "\\x8F";
8311 case 144:
8312 return "\\x90";
8313 case 145:
8314 return "\\x91";
8315 case 146:
8316 return "\\x92";
8317 case 147:
8318 return "\\x93";
8319 case 148:
8320 return "\\x94";
8321 case 149:
8322 return "\\x95";
8323 case 150:
8324 return "\\x96";
8325 case 151:
8326 return "\\x97";
8327 case 152:
8328 return "\\x98";
8329 case 153:
8330 return "\\x99";
8331 case 154:
8332 return "\\x9A";
8333 case 155:
8334 return "\\x9B";
8335 case 156:
8336 return "\\x9C";
8337 case 157:
8338 return "\\x9D";
8339 case 158:
8340 return "\\x9E";
8341 case 159:
8342 return "\\x9F";
8343 case 160:
8344 return "\\xA0";
8345 case 161:
8346 return "\\xA1";
8347 case 162:
8348 return "\\xA2";
8349 case 163:
8350 return "\\xA3";
8351 case 164:
8352 return "\\xA4";
8353 case 165:
8354 return "\\xA5";
8355 case 166:
8356 return "\\xA6";
8357 case 167:
8358 return "\\xA7";
8359 case 168:
8360 return "\\xA8";
8361 case 169:
8362 return "\\xA9";
8363 case 170:
8364 return "\\xAA";
8365 case 171:
8366 return "\\xAB";
8367 case 172:
8368 return "\\xAC";
8369 case 173:
8370 return "\\xAD";
8371 case 174:
8372 return "\\xAE";
8373 case 175:
8374 return "\\xAF";
8375 case 176:
8376 return "\\xB0";
8377 case 177:
8378 return "\\xB1";
8379 case 178:
8380 return "\\xB2";
8381 case 179:
8382 return "\\xB3";
8383 case 180:
8384 return "\\xB4";
8385 case 181:
8386 return "\\xB5";
8387 case 182:
8388 return "\\xB6";
8389 case 183:
8390 return "\\xB7";
8391 case 184:
8392 return "\\xB8";
8393 case 185:
8394 return "\\xB9";
8395 case 186:
8396 return "\\xBA";
8397 case 187:
8398 return "\\xBB";
8399 case 188:
8400 return "\\xBC";
8401 case 189:
8402 return "\\xBD";
8403 case 190:
8404 return "\\xBE";
8405 case 191:
8406 return "\\xBF";
8407 case 192:
8408 return "\\xC0";
8409 case 193:
8410 return "\\xC1";
8411 case 194:
8412 return "\\xC2";
8413 case 195:
8414 return "\\xC3";
8415 case 196:
8416 return "\\xC4";
8417 case 197:
8418 return "\\xC5";
8419 case 198:
8420 return "\\xC6";
8421 case 199:
8422 return "\\xC7";
8423 case 200:
8424 return "\\xC8";
8425 case 201:
8426 return "\\xC9";
8427 case 202:
8428 return "\\xCA";
8429 case 203:
8430 return "\\xCB";
8431 case 204:
8432 return "\\xCC";
8433 case 205:
8434 return "\\xCD";
8435 case 206:
8436 return "\\xCE";
8437 case 207:
8438 return "\\xCF";
8439 case 208:
8440 return "\\xD0";
8441 case 209:
8442 return "\\xD1";
8443 case 210:
8444 return "\\xD2";
8445 case 211:
8446 return "\\xD3";
8447 case 212:
8448 return "\\xD4";
8449 case 213:
8450 return "\\xD5";
8451 case 214:
8452 return "\\xD6";
8453 case 215:
8454 return "\\xD7";
8455 case 216:
8456 return "\\xD8";
8457 case 217:
8458 return "\\xD9";
8459 case 218:
8460 return "\\xDA";
8461 case 219:
8462 return "\\xDB";
8463 case 220:
8464 return "\\xDC";
8465 case 221:
8466 return "\\xDD";
8467 case 222:
8468 return "\\xDE";
8469 case 223:
8470 return "\\xDF";
8471 case 224:
8472 return "\\xE0";
8473 case 225:
8474 return "\\xE1";
8475 case 226:
8476 return "\\xE2";
8477 case 227:
8478 return "\\xE3";
8479 case 228:
8480 return "\\xE4";
8481 case 229:
8482 return "\\xE5";
8483 case 230:
8484 return "\\xE6";
8485 case 231:
8486 return "\\xE7";
8487 case 232:
8488 return "\\xE8";
8489 case 233:
8490 return "\\xE9";
8491 case 234:
8492 return "\\xEA";
8493 case 235:
8494 return "\\xEB";
8495 case 236:
8496 return "\\xEC";
8497 case 237:
8498 return "\\xED";
8499 case 238:
8500 return "\\xEE";
8501 case 239:
8502 return "\\xEF";
8503 case 240:
8504 return "\\xF0";
8505 case 241:
8506 return "\\xF1";
8507 case 242:
8508 return "\\xF2";
8509 case 243:
8510 return "\\xF3";
8511 case 244:
8512 return "\\xF4";
8513 case 245:
8514 return "\\xF5";
8515 case 246:
8516 return "\\xF6";
8517 case 247:
8518 return "\\xF7";
8519 case 248:
8520 return "\\xF8";
8521 case 249:
8522 return "\\xF9";
8523 case 250:
8524 return "\\xFA";
8525 case 251:
8526 return "\\xFB";
8527 case 252:
8528 return "\\xFC";
8529 case 253:
8530 return "\\xFD";
8531 case 254:
8532 return "\\xFE";
8533 case 255:
8534 return "\\xFF";
8535 default:
8536 assert(0); /* never gets here */
8537 return "dead code";
8538 }
8539 assert(0); /* never gets here */
8540 }
8541
8542 #endif /* XML_GE == 1 */
8543
8544 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8545 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8546 const char *const valueOrNull = getenv(variableName);
8547 if (valueOrNull == NULL) {
8548 return defaultDebugLevel;
8549 }
8550 const char *const value = valueOrNull;
8551
8552 errno = 0;
8553 char *afterValue = NULL;
8554 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8555 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8556 errno = 0;
8557 return defaultDebugLevel;
8558 }
8559
8560 return debugLevel;
8561 }
8562