1 /* c5625880f4bf417c1463deee4eb92d86ff413f802048621c57e25fe483eb59e4 (2.6.4+) 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> 16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com> 18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> 19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr> 20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl> 22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io> 24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me> 25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com> 26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de> 27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org> 28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org> 32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org> 34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> 35 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> 37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com> 38 Copyright (c) 2022 Jann Horn <jannh@google.com> 39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 40 Copyright (c) 2023 Owain Davies <owaind@bath.edu> 41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 42 Copyright (c) 2024 Berkay Eren Ürün <berkay.ueruen@siemens.com> 43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 44 Licensed under the MIT license: 45 46 Permission is hereby granted, free of charge, to any person obtaining 47 a copy of this software and associated documentation files (the 48 "Software"), to deal in the Software without restriction, including 49 without limitation the rights to use, copy, modify, merge, publish, 50 distribute, sublicense, and/or sell copies of the Software, and to permit 51 persons to whom the Software is furnished to do so, subject to the 52 following conditions: 53 54 The above copyright notice and this permission notice shall be included 55 in all copies or substantial portions of the Software. 56 57 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 58 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 59 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 60 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 61 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 62 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 63 USE OR OTHER DEALINGS IN THE SOFTWARE. 64 */ 65 66 #define XML_BUILDING_EXPAT 1 67 68 #include "expat_config.h" 69 70 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) 71 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) 72 #endif 73 74 #if defined(XML_DTD) && XML_GE == 0 75 # error Either undefine XML_DTD or define XML_GE to 1. 76 #endif 77 78 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \ 79 || (XML_CONTEXT_BYTES + 0 < 0) 80 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) 81 #endif 82 83 #if defined(HAVE_SYSCALL_GETRANDOM) 84 # if ! defined(_GNU_SOURCE) 85 # define _GNU_SOURCE 1 /* syscall prototype */ 86 # endif 87 #endif 88 89 #ifdef _WIN32 90 /* force stdlib to define rand_s() */ 91 # if ! defined(_CRT_RAND_S) 92 # define _CRT_RAND_S 93 # endif 94 #endif 95 96 #include <stdbool.h> 97 #include <stddef.h> 98 #include <string.h> /* memset(), memcpy() */ 99 #include <assert.h> 100 #include <limits.h> /* UINT_MAX */ 101 #include <stdio.h> /* fprintf */ 102 #include <stdlib.h> /* getenv, rand_s */ 103 #include <stdint.h> /* uintptr_t */ 104 #include <math.h> /* isnan */ 105 106 #ifdef _WIN32 107 # define getpid GetCurrentProcessId 108 #else 109 # include <sys/time.h> /* gettimeofday() */ 110 # include <sys/types.h> /* getpid() */ 111 # include <unistd.h> /* getpid() */ 112 # include <fcntl.h> /* O_RDONLY */ 113 # include <errno.h> 114 #endif 115 116 #ifdef _WIN32 117 # include "winconfig.h" 118 #endif 119 120 #include "ascii.h" 121 #include "expat.h" 122 #include "siphash.h" 123 124 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 125 # if defined(HAVE_GETRANDOM) 126 # include <sys/random.h> /* getrandom */ 127 # else 128 # include <unistd.h> /* syscall */ 129 # include <sys/syscall.h> /* SYS_getrandom */ 130 # endif 131 # if ! defined(GRND_NONBLOCK) 132 # define GRND_NONBLOCK 0x0001 133 # endif /* defined(GRND_NONBLOCK) */ 134 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 135 136 #if defined(HAVE_LIBBSD) \ 137 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) 138 # include <bsd/stdlib.h> 139 #endif 140 141 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) 142 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 143 #endif 144 145 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ 146 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ 147 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ 148 && ! defined(XML_POOR_ENTROPY) 149 # error You do not have support for any sources of high quality entropy \ 150 enabled. For end user security, that is probably not what you want. \ 151 \ 152 Your options include: \ 153 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ 154 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ 155 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ 156 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ 157 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ 158 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ 159 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ 160 * Windows >=Vista (rand_s): _WIN32. \ 161 \ 162 If insist on not using any of these, bypass this error by defining \ 163 XML_POOR_ENTROPY; you have been warned. \ 164 \ 165 If you have reasons to patch this detection code away or need changes \ 166 to the build system, please open a bug. Thank you! 167 #endif 168 169 #ifdef XML_UNICODE 170 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX 171 # define XmlConvert XmlUtf16Convert 172 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding 173 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS 174 # define XmlEncode XmlUtf16Encode 175 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) 176 typedef unsigned short ICHAR; 177 #else 178 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX 179 # define XmlConvert XmlUtf8Convert 180 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding 181 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS 182 # define XmlEncode XmlUtf8Encode 183 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8) 184 typedef char ICHAR; 185 #endif 186 187 #ifndef XML_NS 188 189 # define XmlInitEncodingNS XmlInitEncoding 190 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding 191 # undef XmlGetInternalEncodingNS 192 # define XmlGetInternalEncodingNS XmlGetInternalEncoding 193 # define XmlParseXmlDeclNS XmlParseXmlDecl 194 195 #endif 196 197 #ifdef XML_UNICODE 198 199 # ifdef XML_UNICODE_WCHAR_T 200 # define XML_T(x) (const wchar_t) x 201 # define XML_L(x) L##x 202 # else 203 # define XML_T(x) (const unsigned short)x 204 # define XML_L(x) x 205 # endif 206 207 #else 208 209 # define XML_T(x) x 210 # define XML_L(x) x 211 212 #endif 213 214 /* Round up n to be a multiple of sz, where sz is a power of 2. */ 215 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) 216 217 /* Do safe (NULL-aware) pointer arithmetic */ 218 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) 219 220 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) 221 222 #include "internal.h" 223 #include "xmltok.h" 224 #include "xmlrole.h" 225 226 typedef const XML_Char *KEY; 227 228 typedef struct { 229 KEY name; 230 } NAMED; 231 232 typedef struct { 233 NAMED **v; 234 unsigned char power; 235 size_t size; 236 size_t used; 237 const XML_Memory_Handling_Suite *mem; 238 } HASH_TABLE; 239 240 static size_t keylen(KEY s); 241 242 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); 243 244 /* For probing (after a collision) we need a step size relative prime 245 to the hash table size, which is a power of 2. We use double-hashing, 246 since we can calculate a second hash value cheaply by taking those bits 247 of the first hash value that were discarded (masked out) when the table 248 index was calculated: index = hash & mask, where mask = table->size - 1. 249 We limit the maximum step size to table->size / 4 (mask >> 2) and make 250 it odd, since odd numbers are always relative prime to a power of 2. 251 */ 252 #define SECOND_HASH(hash, mask, power) \ 253 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) 254 #define PROBE_STEP(hash, mask, power) \ 255 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) 256 257 typedef struct { 258 NAMED **p; 259 NAMED **end; 260 } HASH_TABLE_ITER; 261 262 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ 263 #define INIT_DATA_BUF_SIZE 1024 264 #define INIT_ATTS_SIZE 16 265 #define INIT_ATTS_VERSION 0xFFFFFFFF 266 #define INIT_BLOCK_SIZE 1024 267 #define INIT_BUFFER_SIZE 1024 268 269 #define EXPAND_SPARE 24 270 271 typedef struct binding { 272 struct prefix *prefix; 273 struct binding *nextTagBinding; 274 struct binding *prevPrefixBinding; 275 const struct attribute_id *attId; 276 XML_Char *uri; 277 int uriLen; 278 int uriAlloc; 279 } BINDING; 280 281 typedef struct prefix { 282 const XML_Char *name; 283 BINDING *binding; 284 } PREFIX; 285 286 typedef struct { 287 const XML_Char *str; 288 const XML_Char *localPart; 289 const XML_Char *prefix; 290 int strLen; 291 int uriLen; 292 int prefixLen; 293 } TAG_NAME; 294 295 /* TAG represents an open element. 296 The name of the element is stored in both the document and API 297 encodings. The memory buffer 'buf' is a separately-allocated 298 memory area which stores the name. During the XML_Parse()/ 299 XML_ParseBuffer() when the element is open, the memory for the 'raw' 300 version of the name (in the document encoding) is shared with the 301 document buffer. If the element is open across calls to 302 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to 303 contain the 'raw' name as well. 304 305 A parser reuses these structures, maintaining a list of allocated 306 TAG objects in a free list. 307 */ 308 typedef struct tag { 309 struct tag *parent; /* parent of this element */ 310 const char *rawName; /* tagName in the original encoding */ 311 int rawNameLength; 312 TAG_NAME name; /* tagName in the API encoding */ 313 char *buf; /* buffer for name components */ 314 char *bufEnd; /* end of the buffer */ 315 BINDING *bindings; 316 } TAG; 317 318 typedef struct { 319 const XML_Char *name; 320 const XML_Char *textPtr; 321 int textLen; /* length in XML_Chars */ 322 int processed; /* # of processed bytes - when suspended */ 323 const XML_Char *systemId; 324 const XML_Char *base; 325 const XML_Char *publicId; 326 const XML_Char *notation; 327 XML_Bool open; 328 XML_Bool is_param; 329 XML_Bool is_internal; /* true if declared in internal subset outside PE */ 330 } ENTITY; 331 332 typedef struct { 333 enum XML_Content_Type type; 334 enum XML_Content_Quant quant; 335 const XML_Char *name; 336 int firstchild; 337 int lastchild; 338 int childcnt; 339 int nextsib; 340 } CONTENT_SCAFFOLD; 341 342 #define INIT_SCAFFOLD_ELEMENTS 32 343 344 typedef struct block { 345 struct block *next; 346 int size; 347 XML_Char s[1]; 348 } BLOCK; 349 350 typedef struct { 351 BLOCK *blocks; 352 BLOCK *freeBlocks; 353 const XML_Char *end; 354 XML_Char *ptr; 355 XML_Char *start; 356 const XML_Memory_Handling_Suite *mem; 357 } STRING_POOL; 358 359 /* The XML_Char before the name is used to determine whether 360 an attribute has been specified. */ 361 typedef struct attribute_id { 362 XML_Char *name; 363 PREFIX *prefix; 364 XML_Bool maybeTokenized; 365 XML_Bool xmlns; 366 } ATTRIBUTE_ID; 367 368 typedef struct { 369 const ATTRIBUTE_ID *id; 370 XML_Bool isCdata; 371 const XML_Char *value; 372 } DEFAULT_ATTRIBUTE; 373 374 typedef struct { 375 unsigned long version; 376 unsigned long hash; 377 const XML_Char *uriName; 378 } NS_ATT; 379 380 typedef struct { 381 const XML_Char *name; 382 PREFIX *prefix; 383 const ATTRIBUTE_ID *idAtt; 384 int nDefaultAtts; 385 int allocDefaultAtts; 386 DEFAULT_ATTRIBUTE *defaultAtts; 387 } ELEMENT_TYPE; 388 389 typedef struct { 390 HASH_TABLE generalEntities; 391 HASH_TABLE elementTypes; 392 HASH_TABLE attributeIds; 393 HASH_TABLE prefixes; 394 STRING_POOL pool; 395 STRING_POOL entityValuePool; 396 /* false once a parameter entity reference has been skipped */ 397 XML_Bool keepProcessing; 398 /* true once an internal or external PE reference has been encountered; 399 this includes the reference to an external subset */ 400 XML_Bool hasParamEntityRefs; 401 XML_Bool standalone; 402 #ifdef XML_DTD 403 /* indicates if external PE has been read */ 404 XML_Bool paramEntityRead; 405 HASH_TABLE paramEntities; 406 #endif /* XML_DTD */ 407 PREFIX defaultPrefix; 408 /* === scaffolding for building content model === */ 409 XML_Bool in_eldecl; 410 CONTENT_SCAFFOLD *scaffold; 411 unsigned contentStringLen; 412 unsigned scaffSize; 413 unsigned scaffCount; 414 int scaffLevel; 415 int *scaffIndex; 416 } DTD; 417 418 typedef struct open_internal_entity { 419 const char *internalEventPtr; 420 const char *internalEventEndPtr; 421 struct open_internal_entity *next; 422 ENTITY *entity; 423 int startTagLevel; 424 XML_Bool betweenDecl; /* WFC: PE Between Declarations */ 425 } OPEN_INTERNAL_ENTITY; 426 427 enum XML_Account { 428 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ 429 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity 430 expansion */ 431 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ 432 }; 433 434 #if XML_GE == 1 435 typedef unsigned long long XmlBigCount; 436 typedef struct accounting { 437 XmlBigCount countBytesDirect; 438 XmlBigCount countBytesIndirect; 439 unsigned long debugLevel; 440 float maximumAmplificationFactor; // >=1.0 441 unsigned long long activationThresholdBytes; 442 } ACCOUNTING; 443 444 typedef struct entity_stats { 445 unsigned int countEverOpened; 446 unsigned int currentDepth; 447 unsigned int maximumDepthSeen; 448 unsigned long debugLevel; 449 } ENTITY_STATS; 450 #endif /* XML_GE == 1 */ 451 452 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, 453 const char *end, const char **endPtr); 454 455 static Processor prologProcessor; 456 static Processor prologInitProcessor; 457 static Processor contentProcessor; 458 static Processor cdataSectionProcessor; 459 #ifdef XML_DTD 460 static Processor ignoreSectionProcessor; 461 static Processor externalParEntProcessor; 462 static Processor externalParEntInitProcessor; 463 static Processor entityValueProcessor; 464 static Processor entityValueInitProcessor; 465 #endif /* XML_DTD */ 466 static Processor epilogProcessor; 467 static Processor errorProcessor; 468 static Processor externalEntityInitProcessor; 469 static Processor externalEntityInitProcessor2; 470 static Processor externalEntityInitProcessor3; 471 static Processor externalEntityContentProcessor; 472 static Processor internalEntityProcessor; 473 474 static enum XML_Error handleUnknownEncoding(XML_Parser parser, 475 const XML_Char *encodingName); 476 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, 477 const char *s, const char *next); 478 static enum XML_Error initializeEncoding(XML_Parser parser); 479 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, 480 const char *s, const char *end, int tok, 481 const char *next, const char **nextPtr, 482 XML_Bool haveMore, XML_Bool allowClosingDoctype, 483 enum XML_Account account); 484 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, 485 XML_Bool betweenDecl); 486 static enum XML_Error doContent(XML_Parser parser, int startTagLevel, 487 const ENCODING *enc, const char *start, 488 const char *end, const char **endPtr, 489 XML_Bool haveMore, enum XML_Account account); 490 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, 491 const char **startPtr, const char *end, 492 const char **nextPtr, XML_Bool haveMore, 493 enum XML_Account account); 494 #ifdef XML_DTD 495 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, 496 const char **startPtr, const char *end, 497 const char **nextPtr, XML_Bool haveMore); 498 #endif /* XML_DTD */ 499 500 static void freeBindings(XML_Parser parser, BINDING *bindings); 501 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, 502 const char *attStr, TAG_NAME *tagNamePtr, 503 BINDING **bindingsPtr, 504 enum XML_Account account); 505 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, 506 const ATTRIBUTE_ID *attId, const XML_Char *uri, 507 BINDING **bindingsPtr); 508 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, 509 XML_Bool isCdata, XML_Bool isId, 510 const XML_Char *value, XML_Parser parser); 511 static enum XML_Error storeAttributeValue(XML_Parser parser, 512 const ENCODING *enc, XML_Bool isCdata, 513 const char *ptr, const char *end, 514 STRING_POOL *pool, 515 enum XML_Account account); 516 static enum XML_Error appendAttributeValue(XML_Parser parser, 517 const ENCODING *enc, 518 XML_Bool isCdata, const char *ptr, 519 const char *end, STRING_POOL *pool, 520 enum XML_Account account); 521 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, 522 const char *start, const char *end); 523 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); 524 #if XML_GE == 1 525 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, 526 const char *start, const char *end, 527 enum XML_Account account); 528 #else 529 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); 530 #endif 531 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 532 const char *start, const char *end); 533 static int reportComment(XML_Parser parser, const ENCODING *enc, 534 const char *start, const char *end); 535 static void reportDefault(XML_Parser parser, const ENCODING *enc, 536 const char *start, const char *end); 537 538 static const XML_Char *getContext(XML_Parser parser); 539 static XML_Bool setContext(XML_Parser parser, const XML_Char *context); 540 541 static void FASTCALL normalizePublicId(XML_Char *s); 542 543 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); 544 /* do not call if m_parentParser != NULL */ 545 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); 546 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, 547 const XML_Memory_Handling_Suite *ms); 548 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 549 const XML_Memory_Handling_Suite *ms); 550 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 551 STRING_POOL *newPool, const HASH_TABLE *oldTable); 552 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, 553 size_t createSize); 554 static void FASTCALL hashTableInit(HASH_TABLE *table, 555 const XML_Memory_Handling_Suite *ms); 556 static void FASTCALL hashTableClear(HASH_TABLE *table); 557 static void FASTCALL hashTableDestroy(HASH_TABLE *table); 558 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, 559 const HASH_TABLE *table); 560 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); 561 562 static void FASTCALL poolInit(STRING_POOL *pool, 563 const XML_Memory_Handling_Suite *ms); 564 static void FASTCALL poolClear(STRING_POOL *pool); 565 static void FASTCALL poolDestroy(STRING_POOL *pool); 566 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, 567 const char *ptr, const char *end); 568 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, 569 const char *ptr, const char *end); 570 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); 571 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, 572 const XML_Char *s); 573 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, 574 int n); 575 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, 576 const XML_Char *s); 577 578 static int FASTCALL nextScaffoldPart(XML_Parser parser); 579 static XML_Content *build_model(XML_Parser parser); 580 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, 581 const char *ptr, const char *end); 582 583 static XML_Char *copyString(const XML_Char *s, 584 const XML_Memory_Handling_Suite *memsuite); 585 586 static unsigned long generate_hash_secret_salt(XML_Parser parser); 587 static XML_Bool startParsing(XML_Parser parser); 588 589 static XML_Parser parserCreate(const XML_Char *encodingName, 590 const XML_Memory_Handling_Suite *memsuite, 591 const XML_Char *nameSep, DTD *dtd); 592 593 static void parserInit(XML_Parser parser, const XML_Char *encodingName); 594 595 #if XML_GE == 1 596 static float accountingGetCurrentAmplification(XML_Parser rootParser); 597 static void accountingReportStats(XML_Parser originParser, const char *epilog); 598 static void accountingOnAbort(XML_Parser originParser); 599 static void accountingReportDiff(XML_Parser rootParser, 600 unsigned int levelsAwayFromRootParser, 601 const char *before, const char *after, 602 ptrdiff_t bytesMore, int source_line, 603 enum XML_Account account); 604 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, 605 const char *before, const char *after, 606 int source_line, 607 enum XML_Account account); 608 609 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, 610 const char *action, int sourceLine); 611 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, 612 int sourceLine); 613 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, 614 int sourceLine); 615 616 static XML_Parser getRootParserOf(XML_Parser parser, 617 unsigned int *outLevelDiff); 618 #endif /* XML_GE == 1 */ 619 620 static unsigned long getDebugLevel(const char *variableName, 621 unsigned long defaultDebugLevel); 622 623 #define poolStart(pool) ((pool)->start) 624 #define poolLength(pool) ((pool)->ptr - (pool)->start) 625 #define poolChop(pool) ((void)--(pool->ptr)) 626 #define poolLastChar(pool) (((pool)->ptr)[-1]) 627 #define poolDiscard(pool) ((pool)->ptr = (pool)->start) 628 #define poolFinish(pool) ((pool)->start = (pool)->ptr) 629 #define poolAppendChar(pool, c) \ 630 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ 631 ? 0 \ 632 : ((*((pool)->ptr)++ = c), 1)) 633 634 #if ! defined(XML_TESTING) 635 const 636 #endif 637 XML_Bool g_reparseDeferralEnabledDefault 638 = XML_TRUE; // write ONLY in runtests.c 639 #if defined(XML_TESTING) 640 unsigned int g_bytesScanned = 0; // used for testing only 641 #endif 642 643 struct XML_ParserStruct { 644 /* The first member must be m_userData so that the XML_GetUserData 645 macro works. */ 646 void *m_userData; 647 void *m_handlerArg; 648 649 // How the four parse buffer pointers below relate in time and space: 650 // 651 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim 652 // | | | | 653 // <--parsed-->| | | 654 // <---parsing--->| | 655 // <--unoccupied-->| 656 // <---------total-malloced/realloced-------->| 657 658 char *m_buffer; // malloc/realloc base pointer of parse buffer 659 const XML_Memory_Handling_Suite m_mem; 660 const char *m_bufferPtr; // first character to be parsed 661 char *m_bufferEnd; // past last character to be parsed 662 const char *m_bufferLim; // allocated end of m_buffer 663 664 XML_Index m_parseEndByteIndex; 665 const char *m_parseEndPtr; 666 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ 667 XML_Bool m_reparseDeferralEnabled; 668 int m_lastBufferRequestSize; 669 XML_Char *m_dataBuf; 670 XML_Char *m_dataBufEnd; 671 XML_StartElementHandler m_startElementHandler; 672 XML_EndElementHandler m_endElementHandler; 673 XML_CharacterDataHandler m_characterDataHandler; 674 XML_ProcessingInstructionHandler m_processingInstructionHandler; 675 XML_CommentHandler m_commentHandler; 676 XML_StartCdataSectionHandler m_startCdataSectionHandler; 677 XML_EndCdataSectionHandler m_endCdataSectionHandler; 678 XML_DefaultHandler m_defaultHandler; 679 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; 680 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; 681 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; 682 XML_NotationDeclHandler m_notationDeclHandler; 683 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; 684 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; 685 XML_NotStandaloneHandler m_notStandaloneHandler; 686 XML_ExternalEntityRefHandler m_externalEntityRefHandler; 687 XML_Parser m_externalEntityRefHandlerArg; 688 XML_SkippedEntityHandler m_skippedEntityHandler; 689 XML_UnknownEncodingHandler m_unknownEncodingHandler; 690 XML_ElementDeclHandler m_elementDeclHandler; 691 XML_AttlistDeclHandler m_attlistDeclHandler; 692 XML_EntityDeclHandler m_entityDeclHandler; 693 XML_XmlDeclHandler m_xmlDeclHandler; 694 const ENCODING *m_encoding; 695 INIT_ENCODING m_initEncoding; 696 const ENCODING *m_internalEncoding; 697 const XML_Char *m_protocolEncodingName; 698 XML_Bool m_ns; 699 XML_Bool m_ns_triplets; 700 void *m_unknownEncodingMem; 701 void *m_unknownEncodingData; 702 void *m_unknownEncodingHandlerData; 703 void(XMLCALL *m_unknownEncodingRelease)(void *); 704 PROLOG_STATE m_prologState; 705 Processor *m_processor; 706 enum XML_Error m_errorCode; 707 const char *m_eventPtr; 708 const char *m_eventEndPtr; 709 const char *m_positionPtr; 710 OPEN_INTERNAL_ENTITY *m_openInternalEntities; 711 OPEN_INTERNAL_ENTITY *m_freeInternalEntities; 712 XML_Bool m_defaultExpandInternalEntities; 713 int m_tagLevel; 714 ENTITY *m_declEntity; 715 const XML_Char *m_doctypeName; 716 const XML_Char *m_doctypeSysid; 717 const XML_Char *m_doctypePubid; 718 const XML_Char *m_declAttributeType; 719 const XML_Char *m_declNotationName; 720 const XML_Char *m_declNotationPublicId; 721 ELEMENT_TYPE *m_declElementType; 722 ATTRIBUTE_ID *m_declAttributeId; 723 XML_Bool m_declAttributeIsCdata; 724 XML_Bool m_declAttributeIsId; 725 DTD *m_dtd; 726 const XML_Char *m_curBase; 727 TAG *m_tagStack; 728 TAG *m_freeTagList; 729 BINDING *m_inheritedBindings; 730 BINDING *m_freeBindingList; 731 int m_attsSize; 732 int m_nSpecifiedAtts; 733 int m_idAttIndex; 734 ATTRIBUTE *m_atts; 735 NS_ATT *m_nsAtts; 736 unsigned long m_nsAttsVersion; 737 unsigned char m_nsAttsPower; 738 #ifdef XML_ATTR_INFO 739 XML_AttrInfo *m_attInfo; 740 #endif 741 POSITION m_position; 742 STRING_POOL m_tempPool; 743 STRING_POOL m_temp2Pool; 744 char *m_groupConnector; 745 unsigned int m_groupSize; 746 XML_Char m_namespaceSeparator; 747 XML_Parser m_parentParser; 748 XML_ParsingStatus m_parsingStatus; 749 #ifdef XML_DTD 750 XML_Bool m_isParamEntity; 751 XML_Bool m_useForeignDTD; 752 enum XML_ParamEntityParsing m_paramEntityParsing; 753 #endif 754 unsigned long m_hash_secret_salt; 755 #if XML_GE == 1 756 ACCOUNTING m_accounting; 757 ENTITY_STATS m_entity_stats; 758 #endif 759 }; 760 761 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) 762 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) 763 #define FREE(parser, p) (parser->m_mem.free_fcn((p))) 764 765 XML_Parser XMLCALL 766 XML_ParserCreate(const XML_Char *encodingName) { 767 return XML_ParserCreate_MM(encodingName, NULL, NULL); 768 } 769 770 XML_Parser XMLCALL 771 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { 772 XML_Char tmp[2] = {nsSep, 0}; 773 return XML_ParserCreate_MM(encodingName, NULL, tmp); 774 } 775 776 // "xml=http://www.w3.org/XML/1998/namespace" 777 static const XML_Char implicitContext[] 778 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, 779 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 780 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, 781 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, 782 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, 783 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, 784 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, 785 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, 786 '\0'}; 787 788 /* To avoid warnings about unused functions: */ 789 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 790 791 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 792 793 /* Obtain entropy on Linux 3.17+ */ 794 static int 795 writeRandomBytes_getrandom_nonblock(void *target, size_t count) { 796 int success = 0; /* full count bytes written? */ 797 size_t bytesWrittenTotal = 0; 798 const unsigned int getrandomFlags = GRND_NONBLOCK; 799 800 do { 801 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 802 const size_t bytesToWrite = count - bytesWrittenTotal; 803 804 const int bytesWrittenMore = 805 # if defined(HAVE_GETRANDOM) 806 getrandom(currentTarget, bytesToWrite, getrandomFlags); 807 # else 808 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); 809 # endif 810 811 if (bytesWrittenMore > 0) { 812 bytesWrittenTotal += bytesWrittenMore; 813 if (bytesWrittenTotal >= count) 814 success = 1; 815 } 816 } while (! success && (errno == EINTR)); 817 818 return success; 819 } 820 821 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 822 823 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 824 825 /* Extract entropy from /dev/urandom */ 826 static int 827 writeRandomBytes_dev_urandom(void *target, size_t count) { 828 int success = 0; /* full count bytes written? */ 829 size_t bytesWrittenTotal = 0; 830 831 const int fd = open("/dev/urandom", O_RDONLY); 832 if (fd < 0) { 833 return 0; 834 } 835 836 do { 837 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 838 const size_t bytesToWrite = count - bytesWrittenTotal; 839 840 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); 841 842 if (bytesWrittenMore > 0) { 843 bytesWrittenTotal += bytesWrittenMore; 844 if (bytesWrittenTotal >= count) 845 success = 1; 846 } 847 } while (! success && (errno == EINTR)); 848 849 close(fd); 850 return success; 851 } 852 853 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 854 855 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 856 857 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) 858 859 static void 860 writeRandomBytes_arc4random(void *target, size_t count) { 861 size_t bytesWrittenTotal = 0; 862 863 while (bytesWrittenTotal < count) { 864 const uint32_t random32 = arc4random(); 865 size_t i = 0; 866 867 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 868 i++, bytesWrittenTotal++) { 869 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 870 ((uint8_t *)target)[bytesWrittenTotal] = random8; 871 } 872 } 873 } 874 875 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ 876 877 #ifdef _WIN32 878 879 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), 880 as it didn't declare it in its header prior to version 5.3.0 of its 881 runtime package (mingwrt, containing stdlib.h). The upstream fix 882 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ 883 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ 884 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) 885 __declspec(dllimport) int rand_s(unsigned int *); 886 # endif 887 888 /* Obtain entropy on Windows using the rand_s() function which 889 * generates cryptographically secure random numbers. Internally it 890 * uses RtlGenRandom API which is present in Windows XP and later. 891 */ 892 static int 893 writeRandomBytes_rand_s(void *target, size_t count) { 894 size_t bytesWrittenTotal = 0; 895 896 while (bytesWrittenTotal < count) { 897 unsigned int random32 = 0; 898 size_t i = 0; 899 900 if (rand_s(&random32)) 901 return 0; /* failure */ 902 903 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 904 i++, bytesWrittenTotal++) { 905 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 906 ((uint8_t *)target)[bytesWrittenTotal] = random8; 907 } 908 } 909 return 1; /* success */ 910 } 911 912 #endif /* _WIN32 */ 913 914 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 915 916 static unsigned long 917 gather_time_entropy(void) { 918 # ifdef _WIN32 919 FILETIME ft; 920 GetSystemTimeAsFileTime(&ft); /* never fails */ 921 return ft.dwHighDateTime ^ ft.dwLowDateTime; 922 # else 923 struct timeval tv; 924 int gettimeofday_res; 925 926 gettimeofday_res = gettimeofday(&tv, NULL); 927 928 # if defined(NDEBUG) 929 (void)gettimeofday_res; 930 # else 931 assert(gettimeofday_res == 0); 932 # endif /* defined(NDEBUG) */ 933 934 /* Microseconds time is <20 bits entropy */ 935 return tv.tv_usec; 936 # endif 937 } 938 939 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 940 941 static unsigned long 942 ENTROPY_DEBUG(const char *label, unsigned long entropy) { 943 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { 944 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, 945 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); 946 } 947 return entropy; 948 } 949 950 static unsigned long 951 generate_hash_secret_salt(XML_Parser parser) { 952 unsigned long entropy; 953 (void)parser; 954 955 /* "Failproof" high quality providers: */ 956 #if defined(HAVE_ARC4RANDOM_BUF) 957 arc4random_buf(&entropy, sizeof(entropy)); 958 return ENTROPY_DEBUG("arc4random_buf", entropy); 959 #elif defined(HAVE_ARC4RANDOM) 960 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); 961 return ENTROPY_DEBUG("arc4random", entropy); 962 #else 963 /* Try high quality providers first .. */ 964 # ifdef _WIN32 965 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { 966 return ENTROPY_DEBUG("rand_s", entropy); 967 } 968 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 969 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { 970 return ENTROPY_DEBUG("getrandom", entropy); 971 } 972 # endif 973 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 974 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { 975 return ENTROPY_DEBUG("/dev/urandom", entropy); 976 } 977 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 978 /* .. and self-made low quality for backup: */ 979 980 /* Process ID is 0 bits entropy if attacker has local access */ 981 entropy = gather_time_entropy() ^ getpid(); 982 983 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ 984 if (sizeof(unsigned long) == 4) { 985 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); 986 } else { 987 return ENTROPY_DEBUG("fallback(8)", 988 entropy * (unsigned long)2305843009213693951ULL); 989 } 990 #endif 991 } 992 993 static unsigned long 994 get_hash_secret_salt(XML_Parser parser) { 995 if (parser->m_parentParser != NULL) 996 return get_hash_secret_salt(parser->m_parentParser); 997 return parser->m_hash_secret_salt; 998 } 999 1000 static enum XML_Error 1001 callProcessor(XML_Parser parser, const char *start, const char *end, 1002 const char **endPtr) { 1003 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); 1004 1005 if (parser->m_reparseDeferralEnabled 1006 && ! parser->m_parsingStatus.finalBuffer) { 1007 // Heuristic: don't try to parse a partial token again until the amount of 1008 // available data has increased significantly. 1009 const size_t had_before = parser->m_partialTokenBytesBefore; 1010 // ...but *do* try anyway if we're close to causing a reallocation. 1011 size_t available_buffer 1012 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 1013 #if XML_CONTEXT_BYTES > 0 1014 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); 1015 #endif 1016 available_buffer 1017 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); 1018 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok 1019 const bool enough 1020 = (have_now >= 2 * had_before) 1021 || ((size_t)parser->m_lastBufferRequestSize > available_buffer); 1022 1023 if (! enough) { 1024 *endPtr = start; // callers may expect this to be set 1025 return XML_ERROR_NONE; 1026 } 1027 } 1028 #if defined(XML_TESTING) 1029 g_bytesScanned += (unsigned)have_now; 1030 #endif 1031 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); 1032 if (ret == XML_ERROR_NONE) { 1033 // if we consumed nothing, remember what we had on this parse attempt. 1034 if (*endPtr == start) { 1035 parser->m_partialTokenBytesBefore = have_now; 1036 } else { 1037 parser->m_partialTokenBytesBefore = 0; 1038 } 1039 } 1040 return ret; 1041 } 1042 1043 static XML_Bool /* only valid for root parser */ 1044 startParsing(XML_Parser parser) { 1045 /* hash functions must be initialized before setContext() is called */ 1046 if (parser->m_hash_secret_salt == 0) 1047 parser->m_hash_secret_salt = generate_hash_secret_salt(parser); 1048 if (parser->m_ns) { 1049 /* implicit context only set for root parser, since child 1050 parsers (i.e. external entity parsers) will inherit it 1051 */ 1052 return setContext(parser, implicitContext); 1053 } 1054 return XML_TRUE; 1055 } 1056 1057 XML_Parser XMLCALL 1058 XML_ParserCreate_MM(const XML_Char *encodingName, 1059 const XML_Memory_Handling_Suite *memsuite, 1060 const XML_Char *nameSep) { 1061 return parserCreate(encodingName, memsuite, nameSep, NULL); 1062 } 1063 1064 static XML_Parser 1065 parserCreate(const XML_Char *encodingName, 1066 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, 1067 DTD *dtd) { 1068 XML_Parser parser; 1069 1070 if (memsuite) { 1071 XML_Memory_Handling_Suite *mtemp; 1072 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); 1073 if (parser != NULL) { 1074 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1075 mtemp->malloc_fcn = memsuite->malloc_fcn; 1076 mtemp->realloc_fcn = memsuite->realloc_fcn; 1077 mtemp->free_fcn = memsuite->free_fcn; 1078 } 1079 } else { 1080 XML_Memory_Handling_Suite *mtemp; 1081 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); 1082 if (parser != NULL) { 1083 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1084 mtemp->malloc_fcn = malloc; 1085 mtemp->realloc_fcn = realloc; 1086 mtemp->free_fcn = free; 1087 } 1088 } 1089 1090 if (! parser) 1091 return parser; 1092 1093 parser->m_buffer = NULL; 1094 parser->m_bufferLim = NULL; 1095 1096 parser->m_attsSize = INIT_ATTS_SIZE; 1097 parser->m_atts 1098 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); 1099 if (parser->m_atts == NULL) { 1100 FREE(parser, parser); 1101 return NULL; 1102 } 1103 #ifdef XML_ATTR_INFO 1104 parser->m_attInfo = (XML_AttrInfo *)MALLOC( 1105 parser, parser->m_attsSize * sizeof(XML_AttrInfo)); 1106 if (parser->m_attInfo == NULL) { 1107 FREE(parser, parser->m_atts); 1108 FREE(parser, parser); 1109 return NULL; 1110 } 1111 #endif 1112 parser->m_dataBuf 1113 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); 1114 if (parser->m_dataBuf == NULL) { 1115 FREE(parser, parser->m_atts); 1116 #ifdef XML_ATTR_INFO 1117 FREE(parser, parser->m_attInfo); 1118 #endif 1119 FREE(parser, parser); 1120 return NULL; 1121 } 1122 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; 1123 1124 if (dtd) 1125 parser->m_dtd = dtd; 1126 else { 1127 parser->m_dtd = dtdCreate(&parser->m_mem); 1128 if (parser->m_dtd == NULL) { 1129 FREE(parser, parser->m_dataBuf); 1130 FREE(parser, parser->m_atts); 1131 #ifdef XML_ATTR_INFO 1132 FREE(parser, parser->m_attInfo); 1133 #endif 1134 FREE(parser, parser); 1135 return NULL; 1136 } 1137 } 1138 1139 parser->m_freeBindingList = NULL; 1140 parser->m_freeTagList = NULL; 1141 parser->m_freeInternalEntities = NULL; 1142 1143 parser->m_groupSize = 0; 1144 parser->m_groupConnector = NULL; 1145 1146 parser->m_unknownEncodingHandler = NULL; 1147 parser->m_unknownEncodingHandlerData = NULL; 1148 1149 parser->m_namespaceSeparator = ASCII_EXCL; 1150 parser->m_ns = XML_FALSE; 1151 parser->m_ns_triplets = XML_FALSE; 1152 1153 parser->m_nsAtts = NULL; 1154 parser->m_nsAttsVersion = 0; 1155 parser->m_nsAttsPower = 0; 1156 1157 parser->m_protocolEncodingName = NULL; 1158 1159 poolInit(&parser->m_tempPool, &(parser->m_mem)); 1160 poolInit(&parser->m_temp2Pool, &(parser->m_mem)); 1161 parserInit(parser, encodingName); 1162 1163 if (encodingName && ! parser->m_protocolEncodingName) { 1164 if (dtd) { 1165 // We need to stop the upcoming call to XML_ParserFree from happily 1166 // destroying parser->m_dtd because the DTD is shared with the parent 1167 // parser and the only guard that keeps XML_ParserFree from destroying 1168 // parser->m_dtd is parser->m_isParamEntity but it will be set to 1169 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). 1170 parser->m_dtd = NULL; 1171 } 1172 XML_ParserFree(parser); 1173 return NULL; 1174 } 1175 1176 if (nameSep) { 1177 parser->m_ns = XML_TRUE; 1178 parser->m_internalEncoding = XmlGetInternalEncodingNS(); 1179 parser->m_namespaceSeparator = *nameSep; 1180 } else { 1181 parser->m_internalEncoding = XmlGetInternalEncoding(); 1182 } 1183 1184 return parser; 1185 } 1186 1187 static void 1188 parserInit(XML_Parser parser, const XML_Char *encodingName) { 1189 parser->m_processor = prologInitProcessor; 1190 XmlPrologStateInit(&parser->m_prologState); 1191 if (encodingName != NULL) { 1192 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); 1193 } 1194 parser->m_curBase = NULL; 1195 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); 1196 parser->m_userData = NULL; 1197 parser->m_handlerArg = NULL; 1198 parser->m_startElementHandler = NULL; 1199 parser->m_endElementHandler = NULL; 1200 parser->m_characterDataHandler = NULL; 1201 parser->m_processingInstructionHandler = NULL; 1202 parser->m_commentHandler = NULL; 1203 parser->m_startCdataSectionHandler = NULL; 1204 parser->m_endCdataSectionHandler = NULL; 1205 parser->m_defaultHandler = NULL; 1206 parser->m_startDoctypeDeclHandler = NULL; 1207 parser->m_endDoctypeDeclHandler = NULL; 1208 parser->m_unparsedEntityDeclHandler = NULL; 1209 parser->m_notationDeclHandler = NULL; 1210 parser->m_startNamespaceDeclHandler = NULL; 1211 parser->m_endNamespaceDeclHandler = NULL; 1212 parser->m_notStandaloneHandler = NULL; 1213 parser->m_externalEntityRefHandler = NULL; 1214 parser->m_externalEntityRefHandlerArg = parser; 1215 parser->m_skippedEntityHandler = NULL; 1216 parser->m_elementDeclHandler = NULL; 1217 parser->m_attlistDeclHandler = NULL; 1218 parser->m_entityDeclHandler = NULL; 1219 parser->m_xmlDeclHandler = NULL; 1220 parser->m_bufferPtr = parser->m_buffer; 1221 parser->m_bufferEnd = parser->m_buffer; 1222 parser->m_parseEndByteIndex = 0; 1223 parser->m_parseEndPtr = NULL; 1224 parser->m_partialTokenBytesBefore = 0; 1225 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; 1226 parser->m_lastBufferRequestSize = 0; 1227 parser->m_declElementType = NULL; 1228 parser->m_declAttributeId = NULL; 1229 parser->m_declEntity = NULL; 1230 parser->m_doctypeName = NULL; 1231 parser->m_doctypeSysid = NULL; 1232 parser->m_doctypePubid = NULL; 1233 parser->m_declAttributeType = NULL; 1234 parser->m_declNotationName = NULL; 1235 parser->m_declNotationPublicId = NULL; 1236 parser->m_declAttributeIsCdata = XML_FALSE; 1237 parser->m_declAttributeIsId = XML_FALSE; 1238 memset(&parser->m_position, 0, sizeof(POSITION)); 1239 parser->m_errorCode = XML_ERROR_NONE; 1240 parser->m_eventPtr = NULL; 1241 parser->m_eventEndPtr = NULL; 1242 parser->m_positionPtr = NULL; 1243 parser->m_openInternalEntities = NULL; 1244 parser->m_defaultExpandInternalEntities = XML_TRUE; 1245 parser->m_tagLevel = 0; 1246 parser->m_tagStack = NULL; 1247 parser->m_inheritedBindings = NULL; 1248 parser->m_nSpecifiedAtts = 0; 1249 parser->m_unknownEncodingMem = NULL; 1250 parser->m_unknownEncodingRelease = NULL; 1251 parser->m_unknownEncodingData = NULL; 1252 parser->m_parentParser = NULL; 1253 parser->m_parsingStatus.parsing = XML_INITIALIZED; 1254 #ifdef XML_DTD 1255 parser->m_isParamEntity = XML_FALSE; 1256 parser->m_useForeignDTD = XML_FALSE; 1257 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 1258 #endif 1259 parser->m_hash_secret_salt = 0; 1260 1261 #if XML_GE == 1 1262 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); 1263 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); 1264 parser->m_accounting.maximumAmplificationFactor 1265 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; 1266 parser->m_accounting.activationThresholdBytes 1267 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; 1268 1269 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); 1270 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); 1271 #endif 1272 } 1273 1274 /* moves list of bindings to m_freeBindingList */ 1275 static void FASTCALL 1276 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { 1277 while (bindings) { 1278 BINDING *b = bindings; 1279 bindings = bindings->nextTagBinding; 1280 b->nextTagBinding = parser->m_freeBindingList; 1281 parser->m_freeBindingList = b; 1282 } 1283 } 1284 1285 XML_Bool XMLCALL 1286 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { 1287 TAG *tStk; 1288 OPEN_INTERNAL_ENTITY *openEntityList; 1289 1290 if (parser == NULL) 1291 return XML_FALSE; 1292 1293 if (parser->m_parentParser) 1294 return XML_FALSE; 1295 /* move m_tagStack to m_freeTagList */ 1296 tStk = parser->m_tagStack; 1297 while (tStk) { 1298 TAG *tag = tStk; 1299 tStk = tStk->parent; 1300 tag->parent = parser->m_freeTagList; 1301 moveToFreeBindingList(parser, tag->bindings); 1302 tag->bindings = NULL; 1303 parser->m_freeTagList = tag; 1304 } 1305 /* move m_openInternalEntities to m_freeInternalEntities */ 1306 openEntityList = parser->m_openInternalEntities; 1307 while (openEntityList) { 1308 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1309 openEntityList = openEntity->next; 1310 openEntity->next = parser->m_freeInternalEntities; 1311 parser->m_freeInternalEntities = openEntity; 1312 } 1313 moveToFreeBindingList(parser, parser->m_inheritedBindings); 1314 FREE(parser, parser->m_unknownEncodingMem); 1315 if (parser->m_unknownEncodingRelease) 1316 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1317 poolClear(&parser->m_tempPool); 1318 poolClear(&parser->m_temp2Pool); 1319 FREE(parser, (void *)parser->m_protocolEncodingName); 1320 parser->m_protocolEncodingName = NULL; 1321 parserInit(parser, encodingName); 1322 dtdReset(parser->m_dtd, &parser->m_mem); 1323 return XML_TRUE; 1324 } 1325 1326 enum XML_Status XMLCALL 1327 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { 1328 if (parser == NULL) 1329 return XML_STATUS_ERROR; 1330 /* Block after XML_Parse()/XML_ParseBuffer() has been called. 1331 XXX There's no way for the caller to determine which of the 1332 XXX possible error cases caused the XML_STATUS_ERROR return. 1333 */ 1334 if (parser->m_parsingStatus.parsing == XML_PARSING 1335 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1336 return XML_STATUS_ERROR; 1337 1338 /* Get rid of any previous encoding name */ 1339 FREE(parser, (void *)parser->m_protocolEncodingName); 1340 1341 if (encodingName == NULL) 1342 /* No new encoding name */ 1343 parser->m_protocolEncodingName = NULL; 1344 else { 1345 /* Copy the new encoding name into allocated memory */ 1346 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); 1347 if (! parser->m_protocolEncodingName) 1348 return XML_STATUS_ERROR; 1349 } 1350 return XML_STATUS_OK; 1351 } 1352 1353 XML_Parser XMLCALL 1354 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, 1355 const XML_Char *encodingName) { 1356 XML_Parser parser = oldParser; 1357 DTD *newDtd = NULL; 1358 DTD *oldDtd; 1359 XML_StartElementHandler oldStartElementHandler; 1360 XML_EndElementHandler oldEndElementHandler; 1361 XML_CharacterDataHandler oldCharacterDataHandler; 1362 XML_ProcessingInstructionHandler oldProcessingInstructionHandler; 1363 XML_CommentHandler oldCommentHandler; 1364 XML_StartCdataSectionHandler oldStartCdataSectionHandler; 1365 XML_EndCdataSectionHandler oldEndCdataSectionHandler; 1366 XML_DefaultHandler oldDefaultHandler; 1367 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; 1368 XML_NotationDeclHandler oldNotationDeclHandler; 1369 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; 1370 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; 1371 XML_NotStandaloneHandler oldNotStandaloneHandler; 1372 XML_ExternalEntityRefHandler oldExternalEntityRefHandler; 1373 XML_SkippedEntityHandler oldSkippedEntityHandler; 1374 XML_UnknownEncodingHandler oldUnknownEncodingHandler; 1375 XML_ElementDeclHandler oldElementDeclHandler; 1376 XML_AttlistDeclHandler oldAttlistDeclHandler; 1377 XML_EntityDeclHandler oldEntityDeclHandler; 1378 XML_XmlDeclHandler oldXmlDeclHandler; 1379 ELEMENT_TYPE *oldDeclElementType; 1380 1381 void *oldUserData; 1382 void *oldHandlerArg; 1383 XML_Bool oldDefaultExpandInternalEntities; 1384 XML_Parser oldExternalEntityRefHandlerArg; 1385 #ifdef XML_DTD 1386 enum XML_ParamEntityParsing oldParamEntityParsing; 1387 int oldInEntityValue; 1388 #endif 1389 XML_Bool oldns_triplets; 1390 /* Note that the new parser shares the same hash secret as the old 1391 parser, so that dtdCopy and copyEntityTable can lookup values 1392 from hash tables associated with either parser without us having 1393 to worry which hash secrets each table has. 1394 */ 1395 unsigned long oldhash_secret_salt; 1396 XML_Bool oldReparseDeferralEnabled; 1397 1398 /* Validate the oldParser parameter before we pull everything out of it */ 1399 if (oldParser == NULL) 1400 return NULL; 1401 1402 /* Stash the original parser contents on the stack */ 1403 oldDtd = parser->m_dtd; 1404 oldStartElementHandler = parser->m_startElementHandler; 1405 oldEndElementHandler = parser->m_endElementHandler; 1406 oldCharacterDataHandler = parser->m_characterDataHandler; 1407 oldProcessingInstructionHandler = parser->m_processingInstructionHandler; 1408 oldCommentHandler = parser->m_commentHandler; 1409 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; 1410 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; 1411 oldDefaultHandler = parser->m_defaultHandler; 1412 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; 1413 oldNotationDeclHandler = parser->m_notationDeclHandler; 1414 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; 1415 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; 1416 oldNotStandaloneHandler = parser->m_notStandaloneHandler; 1417 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; 1418 oldSkippedEntityHandler = parser->m_skippedEntityHandler; 1419 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; 1420 oldElementDeclHandler = parser->m_elementDeclHandler; 1421 oldAttlistDeclHandler = parser->m_attlistDeclHandler; 1422 oldEntityDeclHandler = parser->m_entityDeclHandler; 1423 oldXmlDeclHandler = parser->m_xmlDeclHandler; 1424 oldDeclElementType = parser->m_declElementType; 1425 1426 oldUserData = parser->m_userData; 1427 oldHandlerArg = parser->m_handlerArg; 1428 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; 1429 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; 1430 #ifdef XML_DTD 1431 oldParamEntityParsing = parser->m_paramEntityParsing; 1432 oldInEntityValue = parser->m_prologState.inEntityValue; 1433 #endif 1434 oldns_triplets = parser->m_ns_triplets; 1435 /* Note that the new parser shares the same hash secret as the old 1436 parser, so that dtdCopy and copyEntityTable can lookup values 1437 from hash tables associated with either parser without us having 1438 to worry which hash secrets each table has. 1439 */ 1440 oldhash_secret_salt = parser->m_hash_secret_salt; 1441 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; 1442 1443 #ifdef XML_DTD 1444 if (! context) 1445 newDtd = oldDtd; 1446 #endif /* XML_DTD */ 1447 1448 /* Note that the magical uses of the pre-processor to make field 1449 access look more like C++ require that `parser' be overwritten 1450 here. This makes this function more painful to follow than it 1451 would be otherwise. 1452 */ 1453 if (parser->m_ns) { 1454 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; 1455 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); 1456 } else { 1457 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); 1458 } 1459 1460 if (! parser) 1461 return NULL; 1462 1463 parser->m_startElementHandler = oldStartElementHandler; 1464 parser->m_endElementHandler = oldEndElementHandler; 1465 parser->m_characterDataHandler = oldCharacterDataHandler; 1466 parser->m_processingInstructionHandler = oldProcessingInstructionHandler; 1467 parser->m_commentHandler = oldCommentHandler; 1468 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; 1469 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; 1470 parser->m_defaultHandler = oldDefaultHandler; 1471 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; 1472 parser->m_notationDeclHandler = oldNotationDeclHandler; 1473 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; 1474 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; 1475 parser->m_notStandaloneHandler = oldNotStandaloneHandler; 1476 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; 1477 parser->m_skippedEntityHandler = oldSkippedEntityHandler; 1478 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; 1479 parser->m_elementDeclHandler = oldElementDeclHandler; 1480 parser->m_attlistDeclHandler = oldAttlistDeclHandler; 1481 parser->m_entityDeclHandler = oldEntityDeclHandler; 1482 parser->m_xmlDeclHandler = oldXmlDeclHandler; 1483 parser->m_declElementType = oldDeclElementType; 1484 parser->m_userData = oldUserData; 1485 if (oldUserData == oldHandlerArg) 1486 parser->m_handlerArg = parser->m_userData; 1487 else 1488 parser->m_handlerArg = parser; 1489 if (oldExternalEntityRefHandlerArg != oldParser) 1490 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; 1491 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; 1492 parser->m_ns_triplets = oldns_triplets; 1493 parser->m_hash_secret_salt = oldhash_secret_salt; 1494 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; 1495 parser->m_parentParser = oldParser; 1496 #ifdef XML_DTD 1497 parser->m_paramEntityParsing = oldParamEntityParsing; 1498 parser->m_prologState.inEntityValue = oldInEntityValue; 1499 if (context) { 1500 #endif /* XML_DTD */ 1501 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) 1502 || ! setContext(parser, context)) { 1503 XML_ParserFree(parser); 1504 return NULL; 1505 } 1506 parser->m_processor = externalEntityInitProcessor; 1507 #ifdef XML_DTD 1508 } else { 1509 /* The DTD instance referenced by parser->m_dtd is shared between the 1510 document's root parser and external PE parsers, therefore one does not 1511 need to call setContext. In addition, one also *must* not call 1512 setContext, because this would overwrite existing prefix->binding 1513 pointers in parser->m_dtd with ones that get destroyed with the external 1514 PE parser. This would leave those prefixes with dangling pointers. 1515 */ 1516 parser->m_isParamEntity = XML_TRUE; 1517 XmlPrologStateInitExternalEntity(&parser->m_prologState); 1518 parser->m_processor = externalParEntInitProcessor; 1519 } 1520 #endif /* XML_DTD */ 1521 return parser; 1522 } 1523 1524 static void FASTCALL 1525 destroyBindings(BINDING *bindings, XML_Parser parser) { 1526 for (;;) { 1527 BINDING *b = bindings; 1528 if (! b) 1529 break; 1530 bindings = b->nextTagBinding; 1531 FREE(parser, b->uri); 1532 FREE(parser, b); 1533 } 1534 } 1535 1536 void XMLCALL 1537 XML_ParserFree(XML_Parser parser) { 1538 TAG *tagList; 1539 OPEN_INTERNAL_ENTITY *entityList; 1540 if (parser == NULL) 1541 return; 1542 /* free m_tagStack and m_freeTagList */ 1543 tagList = parser->m_tagStack; 1544 for (;;) { 1545 TAG *p; 1546 if (tagList == NULL) { 1547 if (parser->m_freeTagList == NULL) 1548 break; 1549 tagList = parser->m_freeTagList; 1550 parser->m_freeTagList = NULL; 1551 } 1552 p = tagList; 1553 tagList = tagList->parent; 1554 FREE(parser, p->buf); 1555 destroyBindings(p->bindings, parser); 1556 FREE(parser, p); 1557 } 1558 /* free m_openInternalEntities and m_freeInternalEntities */ 1559 entityList = parser->m_openInternalEntities; 1560 for (;;) { 1561 OPEN_INTERNAL_ENTITY *openEntity; 1562 if (entityList == NULL) { 1563 if (parser->m_freeInternalEntities == NULL) 1564 break; 1565 entityList = parser->m_freeInternalEntities; 1566 parser->m_freeInternalEntities = NULL; 1567 } 1568 openEntity = entityList; 1569 entityList = entityList->next; 1570 FREE(parser, openEntity); 1571 } 1572 1573 destroyBindings(parser->m_freeBindingList, parser); 1574 destroyBindings(parser->m_inheritedBindings, parser); 1575 poolDestroy(&parser->m_tempPool); 1576 poolDestroy(&parser->m_temp2Pool); 1577 FREE(parser, (void *)parser->m_protocolEncodingName); 1578 #ifdef XML_DTD 1579 /* external parameter entity parsers share the DTD structure 1580 parser->m_dtd with the root parser, so we must not destroy it 1581 */ 1582 if (! parser->m_isParamEntity && parser->m_dtd) 1583 #else 1584 if (parser->m_dtd) 1585 #endif /* XML_DTD */ 1586 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, 1587 &parser->m_mem); 1588 FREE(parser, (void *)parser->m_atts); 1589 #ifdef XML_ATTR_INFO 1590 FREE(parser, (void *)parser->m_attInfo); 1591 #endif 1592 FREE(parser, parser->m_groupConnector); 1593 FREE(parser, parser->m_buffer); 1594 FREE(parser, parser->m_dataBuf); 1595 FREE(parser, parser->m_nsAtts); 1596 FREE(parser, parser->m_unknownEncodingMem); 1597 if (parser->m_unknownEncodingRelease) 1598 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1599 FREE(parser, parser); 1600 } 1601 1602 void XMLCALL 1603 XML_UseParserAsHandlerArg(XML_Parser parser) { 1604 if (parser != NULL) 1605 parser->m_handlerArg = parser; 1606 } 1607 1608 enum XML_Error XMLCALL 1609 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { 1610 if (parser == NULL) 1611 return XML_ERROR_INVALID_ARGUMENT; 1612 #ifdef XML_DTD 1613 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1614 if (parser->m_parsingStatus.parsing == XML_PARSING 1615 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1616 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; 1617 parser->m_useForeignDTD = useDTD; 1618 return XML_ERROR_NONE; 1619 #else 1620 UNUSED_P(useDTD); 1621 return XML_ERROR_FEATURE_REQUIRES_XML_DTD; 1622 #endif 1623 } 1624 1625 void XMLCALL 1626 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { 1627 if (parser == NULL) 1628 return; 1629 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1630 if (parser->m_parsingStatus.parsing == XML_PARSING 1631 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1632 return; 1633 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; 1634 } 1635 1636 void XMLCALL 1637 XML_SetUserData(XML_Parser parser, void *p) { 1638 if (parser == NULL) 1639 return; 1640 if (parser->m_handlerArg == parser->m_userData) 1641 parser->m_handlerArg = parser->m_userData = p; 1642 else 1643 parser->m_userData = p; 1644 } 1645 1646 enum XML_Status XMLCALL 1647 XML_SetBase(XML_Parser parser, const XML_Char *p) { 1648 if (parser == NULL) 1649 return XML_STATUS_ERROR; 1650 if (p) { 1651 p = poolCopyString(&parser->m_dtd->pool, p); 1652 if (! p) 1653 return XML_STATUS_ERROR; 1654 parser->m_curBase = p; 1655 } else 1656 parser->m_curBase = NULL; 1657 return XML_STATUS_OK; 1658 } 1659 1660 const XML_Char *XMLCALL 1661 XML_GetBase(XML_Parser parser) { 1662 if (parser == NULL) 1663 return NULL; 1664 return parser->m_curBase; 1665 } 1666 1667 int XMLCALL 1668 XML_GetSpecifiedAttributeCount(XML_Parser parser) { 1669 if (parser == NULL) 1670 return -1; 1671 return parser->m_nSpecifiedAtts; 1672 } 1673 1674 int XMLCALL 1675 XML_GetIdAttributeIndex(XML_Parser parser) { 1676 if (parser == NULL) 1677 return -1; 1678 return parser->m_idAttIndex; 1679 } 1680 1681 #ifdef XML_ATTR_INFO 1682 const XML_AttrInfo *XMLCALL 1683 XML_GetAttributeInfo(XML_Parser parser) { 1684 if (parser == NULL) 1685 return NULL; 1686 return parser->m_attInfo; 1687 } 1688 #endif 1689 1690 void XMLCALL 1691 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, 1692 XML_EndElementHandler end) { 1693 if (parser == NULL) 1694 return; 1695 parser->m_startElementHandler = start; 1696 parser->m_endElementHandler = end; 1697 } 1698 1699 void XMLCALL 1700 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { 1701 if (parser != NULL) 1702 parser->m_startElementHandler = start; 1703 } 1704 1705 void XMLCALL 1706 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { 1707 if (parser != NULL) 1708 parser->m_endElementHandler = end; 1709 } 1710 1711 void XMLCALL 1712 XML_SetCharacterDataHandler(XML_Parser parser, 1713 XML_CharacterDataHandler handler) { 1714 if (parser != NULL) 1715 parser->m_characterDataHandler = handler; 1716 } 1717 1718 void XMLCALL 1719 XML_SetProcessingInstructionHandler(XML_Parser parser, 1720 XML_ProcessingInstructionHandler handler) { 1721 if (parser != NULL) 1722 parser->m_processingInstructionHandler = handler; 1723 } 1724 1725 void XMLCALL 1726 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { 1727 if (parser != NULL) 1728 parser->m_commentHandler = handler; 1729 } 1730 1731 void XMLCALL 1732 XML_SetCdataSectionHandler(XML_Parser parser, 1733 XML_StartCdataSectionHandler start, 1734 XML_EndCdataSectionHandler end) { 1735 if (parser == NULL) 1736 return; 1737 parser->m_startCdataSectionHandler = start; 1738 parser->m_endCdataSectionHandler = end; 1739 } 1740 1741 void XMLCALL 1742 XML_SetStartCdataSectionHandler(XML_Parser parser, 1743 XML_StartCdataSectionHandler start) { 1744 if (parser != NULL) 1745 parser->m_startCdataSectionHandler = start; 1746 } 1747 1748 void XMLCALL 1749 XML_SetEndCdataSectionHandler(XML_Parser parser, 1750 XML_EndCdataSectionHandler end) { 1751 if (parser != NULL) 1752 parser->m_endCdataSectionHandler = end; 1753 } 1754 1755 void XMLCALL 1756 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { 1757 if (parser == NULL) 1758 return; 1759 parser->m_defaultHandler = handler; 1760 parser->m_defaultExpandInternalEntities = XML_FALSE; 1761 } 1762 1763 void XMLCALL 1764 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { 1765 if (parser == NULL) 1766 return; 1767 parser->m_defaultHandler = handler; 1768 parser->m_defaultExpandInternalEntities = XML_TRUE; 1769 } 1770 1771 void XMLCALL 1772 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, 1773 XML_EndDoctypeDeclHandler end) { 1774 if (parser == NULL) 1775 return; 1776 parser->m_startDoctypeDeclHandler = start; 1777 parser->m_endDoctypeDeclHandler = end; 1778 } 1779 1780 void XMLCALL 1781 XML_SetStartDoctypeDeclHandler(XML_Parser parser, 1782 XML_StartDoctypeDeclHandler start) { 1783 if (parser != NULL) 1784 parser->m_startDoctypeDeclHandler = start; 1785 } 1786 1787 void XMLCALL 1788 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { 1789 if (parser != NULL) 1790 parser->m_endDoctypeDeclHandler = end; 1791 } 1792 1793 void XMLCALL 1794 XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 1795 XML_UnparsedEntityDeclHandler handler) { 1796 if (parser != NULL) 1797 parser->m_unparsedEntityDeclHandler = handler; 1798 } 1799 1800 void XMLCALL 1801 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { 1802 if (parser != NULL) 1803 parser->m_notationDeclHandler = handler; 1804 } 1805 1806 void XMLCALL 1807 XML_SetNamespaceDeclHandler(XML_Parser parser, 1808 XML_StartNamespaceDeclHandler start, 1809 XML_EndNamespaceDeclHandler end) { 1810 if (parser == NULL) 1811 return; 1812 parser->m_startNamespaceDeclHandler = start; 1813 parser->m_endNamespaceDeclHandler = end; 1814 } 1815 1816 void XMLCALL 1817 XML_SetStartNamespaceDeclHandler(XML_Parser parser, 1818 XML_StartNamespaceDeclHandler start) { 1819 if (parser != NULL) 1820 parser->m_startNamespaceDeclHandler = start; 1821 } 1822 1823 void XMLCALL 1824 XML_SetEndNamespaceDeclHandler(XML_Parser parser, 1825 XML_EndNamespaceDeclHandler end) { 1826 if (parser != NULL) 1827 parser->m_endNamespaceDeclHandler = end; 1828 } 1829 1830 void XMLCALL 1831 XML_SetNotStandaloneHandler(XML_Parser parser, 1832 XML_NotStandaloneHandler handler) { 1833 if (parser != NULL) 1834 parser->m_notStandaloneHandler = handler; 1835 } 1836 1837 void XMLCALL 1838 XML_SetExternalEntityRefHandler(XML_Parser parser, 1839 XML_ExternalEntityRefHandler handler) { 1840 if (parser != NULL) 1841 parser->m_externalEntityRefHandler = handler; 1842 } 1843 1844 void XMLCALL 1845 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { 1846 if (parser == NULL) 1847 return; 1848 if (arg) 1849 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; 1850 else 1851 parser->m_externalEntityRefHandlerArg = parser; 1852 } 1853 1854 void XMLCALL 1855 XML_SetSkippedEntityHandler(XML_Parser parser, 1856 XML_SkippedEntityHandler handler) { 1857 if (parser != NULL) 1858 parser->m_skippedEntityHandler = handler; 1859 } 1860 1861 void XMLCALL 1862 XML_SetUnknownEncodingHandler(XML_Parser parser, 1863 XML_UnknownEncodingHandler handler, void *data) { 1864 if (parser == NULL) 1865 return; 1866 parser->m_unknownEncodingHandler = handler; 1867 parser->m_unknownEncodingHandlerData = data; 1868 } 1869 1870 void XMLCALL 1871 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { 1872 if (parser != NULL) 1873 parser->m_elementDeclHandler = eldecl; 1874 } 1875 1876 void XMLCALL 1877 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { 1878 if (parser != NULL) 1879 parser->m_attlistDeclHandler = attdecl; 1880 } 1881 1882 void XMLCALL 1883 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { 1884 if (parser != NULL) 1885 parser->m_entityDeclHandler = handler; 1886 } 1887 1888 void XMLCALL 1889 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { 1890 if (parser != NULL) 1891 parser->m_xmlDeclHandler = handler; 1892 } 1893 1894 int XMLCALL 1895 XML_SetParamEntityParsing(XML_Parser parser, 1896 enum XML_ParamEntityParsing peParsing) { 1897 if (parser == NULL) 1898 return 0; 1899 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1900 if (parser->m_parsingStatus.parsing == XML_PARSING 1901 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1902 return 0; 1903 #ifdef XML_DTD 1904 parser->m_paramEntityParsing = peParsing; 1905 return 1; 1906 #else 1907 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; 1908 #endif 1909 } 1910 1911 int XMLCALL 1912 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { 1913 if (parser == NULL) 1914 return 0; 1915 if (parser->m_parentParser) 1916 return XML_SetHashSalt(parser->m_parentParser, hash_salt); 1917 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1918 if (parser->m_parsingStatus.parsing == XML_PARSING 1919 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1920 return 0; 1921 parser->m_hash_secret_salt = hash_salt; 1922 return 1; 1923 } 1924 1925 enum XML_Status XMLCALL 1926 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { 1927 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { 1928 if (parser != NULL) 1929 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 1930 return XML_STATUS_ERROR; 1931 } 1932 switch (parser->m_parsingStatus.parsing) { 1933 case XML_SUSPENDED: 1934 parser->m_errorCode = XML_ERROR_SUSPENDED; 1935 return XML_STATUS_ERROR; 1936 case XML_FINISHED: 1937 parser->m_errorCode = XML_ERROR_FINISHED; 1938 return XML_STATUS_ERROR; 1939 case XML_INITIALIZED: 1940 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 1941 parser->m_errorCode = XML_ERROR_NO_MEMORY; 1942 return XML_STATUS_ERROR; 1943 } 1944 /* fall through */ 1945 default: 1946 parser->m_parsingStatus.parsing = XML_PARSING; 1947 } 1948 1949 #if XML_CONTEXT_BYTES == 0 1950 if (parser->m_bufferPtr == parser->m_bufferEnd) { 1951 const char *end; 1952 int nLeftOver; 1953 enum XML_Status result; 1954 /* Detect overflow (a+b > MAX <==> b > MAX-a) */ 1955 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { 1956 parser->m_errorCode = XML_ERROR_NO_MEMORY; 1957 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 1958 parser->m_processor = errorProcessor; 1959 return XML_STATUS_ERROR; 1960 } 1961 // though this isn't a buffer request, we assume that `len` is the app's 1962 // preferred buffer fill size, and therefore save it here. 1963 parser->m_lastBufferRequestSize = len; 1964 parser->m_parseEndByteIndex += len; 1965 parser->m_positionPtr = s; 1966 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 1967 1968 parser->m_errorCode 1969 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); 1970 1971 if (parser->m_errorCode != XML_ERROR_NONE) { 1972 parser->m_eventEndPtr = parser->m_eventPtr; 1973 parser->m_processor = errorProcessor; 1974 return XML_STATUS_ERROR; 1975 } else { 1976 switch (parser->m_parsingStatus.parsing) { 1977 case XML_SUSPENDED: 1978 result = XML_STATUS_SUSPENDED; 1979 break; 1980 case XML_INITIALIZED: 1981 case XML_PARSING: 1982 if (isFinal) { 1983 parser->m_parsingStatus.parsing = XML_FINISHED; 1984 return XML_STATUS_OK; 1985 } 1986 /* fall through */ 1987 default: 1988 result = XML_STATUS_OK; 1989 } 1990 } 1991 1992 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, 1993 &parser->m_position); 1994 nLeftOver = s + len - end; 1995 if (nLeftOver) { 1996 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED 1997 // (and XML_ERROR_FINISHED) from XML_GetBuffer. 1998 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; 1999 parser->m_parsingStatus.parsing = XML_PARSING; 2000 void *const temp = XML_GetBuffer(parser, nLeftOver); 2001 parser->m_parsingStatus.parsing = originalStatus; 2002 // GetBuffer may have overwritten this, but we want to remember what the 2003 // app requested, not how many bytes were left over after parsing. 2004 parser->m_lastBufferRequestSize = len; 2005 if (temp == NULL) { 2006 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). 2007 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2008 parser->m_processor = errorProcessor; 2009 return XML_STATUS_ERROR; 2010 } 2011 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we 2012 // don't have any data to preserve, and can copy straight into the start 2013 // of the buffer rather than the GetBuffer return pointer (which may be 2014 // pointing further into the allocated buffer). 2015 memcpy(parser->m_buffer, end, nLeftOver); 2016 } 2017 parser->m_bufferPtr = parser->m_buffer; 2018 parser->m_bufferEnd = parser->m_buffer + nLeftOver; 2019 parser->m_positionPtr = parser->m_bufferPtr; 2020 parser->m_parseEndPtr = parser->m_bufferEnd; 2021 parser->m_eventPtr = parser->m_bufferPtr; 2022 parser->m_eventEndPtr = parser->m_bufferPtr; 2023 return result; 2024 } 2025 #endif /* XML_CONTEXT_BYTES == 0 */ 2026 void *buff = XML_GetBuffer(parser, len); 2027 if (buff == NULL) 2028 return XML_STATUS_ERROR; 2029 if (len > 0) { 2030 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above 2031 memcpy(buff, s, len); 2032 } 2033 return XML_ParseBuffer(parser, len, isFinal); 2034 } 2035 2036 enum XML_Status XMLCALL 2037 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { 2038 const char *start; 2039 enum XML_Status result = XML_STATUS_OK; 2040 2041 if (parser == NULL) 2042 return XML_STATUS_ERROR; 2043 2044 if (len < 0) { 2045 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2046 return XML_STATUS_ERROR; 2047 } 2048 2049 switch (parser->m_parsingStatus.parsing) { 2050 case XML_SUSPENDED: 2051 parser->m_errorCode = XML_ERROR_SUSPENDED; 2052 return XML_STATUS_ERROR; 2053 case XML_FINISHED: 2054 parser->m_errorCode = XML_ERROR_FINISHED; 2055 return XML_STATUS_ERROR; 2056 case XML_INITIALIZED: 2057 /* Has someone called XML_GetBuffer successfully before? */ 2058 if (! parser->m_bufferPtr) { 2059 parser->m_errorCode = XML_ERROR_NO_BUFFER; 2060 return XML_STATUS_ERROR; 2061 } 2062 2063 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 2064 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2065 return XML_STATUS_ERROR; 2066 } 2067 /* fall through */ 2068 default: 2069 parser->m_parsingStatus.parsing = XML_PARSING; 2070 } 2071 2072 start = parser->m_bufferPtr; 2073 parser->m_positionPtr = start; 2074 parser->m_bufferEnd += len; 2075 parser->m_parseEndPtr = parser->m_bufferEnd; 2076 parser->m_parseEndByteIndex += len; 2077 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2078 2079 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, 2080 &parser->m_bufferPtr); 2081 2082 if (parser->m_errorCode != XML_ERROR_NONE) { 2083 parser->m_eventEndPtr = parser->m_eventPtr; 2084 parser->m_processor = errorProcessor; 2085 return XML_STATUS_ERROR; 2086 } else { 2087 switch (parser->m_parsingStatus.parsing) { 2088 case XML_SUSPENDED: 2089 result = XML_STATUS_SUSPENDED; 2090 break; 2091 case XML_INITIALIZED: 2092 case XML_PARSING: 2093 if (isFinal) { 2094 parser->m_parsingStatus.parsing = XML_FINISHED; 2095 return result; 2096 } 2097 default:; /* should not happen */ 2098 } 2099 } 2100 2101 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2102 parser->m_bufferPtr, &parser->m_position); 2103 parser->m_positionPtr = parser->m_bufferPtr; 2104 return result; 2105 } 2106 2107 void *XMLCALL 2108 XML_GetBuffer(XML_Parser parser, int len) { 2109 if (parser == NULL) 2110 return NULL; 2111 if (len < 0) { 2112 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2113 return NULL; 2114 } 2115 switch (parser->m_parsingStatus.parsing) { 2116 case XML_SUSPENDED: 2117 parser->m_errorCode = XML_ERROR_SUSPENDED; 2118 return NULL; 2119 case XML_FINISHED: 2120 parser->m_errorCode = XML_ERROR_FINISHED; 2121 return NULL; 2122 default:; 2123 } 2124 2125 // whether or not the request succeeds, `len` seems to be the app's preferred 2126 // buffer fill size; remember it. 2127 parser->m_lastBufferRequestSize = len; 2128 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) 2129 || parser->m_buffer == NULL) { 2130 #if XML_CONTEXT_BYTES > 0 2131 int keep; 2132 #endif /* XML_CONTEXT_BYTES > 0 */ 2133 /* Do not invoke signed arithmetic overflow: */ 2134 int neededSize = (int)((unsigned)len 2135 + (unsigned)EXPAT_SAFE_PTR_DIFF( 2136 parser->m_bufferEnd, parser->m_bufferPtr)); 2137 if (neededSize < 0) { 2138 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2139 return NULL; 2140 } 2141 #if XML_CONTEXT_BYTES > 0 2142 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 2143 if (keep > XML_CONTEXT_BYTES) 2144 keep = XML_CONTEXT_BYTES; 2145 /* Detect and prevent integer overflow */ 2146 if (keep > INT_MAX - neededSize) { 2147 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2148 return NULL; 2149 } 2150 neededSize += keep; 2151 #endif /* XML_CONTEXT_BYTES > 0 */ 2152 if (parser->m_buffer && parser->m_bufferPtr 2153 && neededSize 2154 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { 2155 #if XML_CONTEXT_BYTES > 0 2156 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { 2157 int offset 2158 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) 2159 - keep; 2160 /* The buffer pointers cannot be NULL here; we have at least some bytes 2161 * in the buffer */ 2162 memmove(parser->m_buffer, &parser->m_buffer[offset], 2163 parser->m_bufferEnd - parser->m_bufferPtr + keep); 2164 parser->m_bufferEnd -= offset; 2165 parser->m_bufferPtr -= offset; 2166 } 2167 #else 2168 memmove(parser->m_buffer, parser->m_bufferPtr, 2169 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2170 parser->m_bufferEnd 2171 = parser->m_buffer 2172 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2173 parser->m_bufferPtr = parser->m_buffer; 2174 #endif /* XML_CONTEXT_BYTES > 0 */ 2175 } else { 2176 char *newBuf; 2177 int bufferSize 2178 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); 2179 if (bufferSize == 0) 2180 bufferSize = INIT_BUFFER_SIZE; 2181 do { 2182 /* Do not invoke signed arithmetic overflow: */ 2183 bufferSize = (int)(2U * (unsigned)bufferSize); 2184 } while (bufferSize < neededSize && bufferSize > 0); 2185 if (bufferSize <= 0) { 2186 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2187 return NULL; 2188 } 2189 newBuf = (char *)MALLOC(parser, bufferSize); 2190 if (newBuf == 0) { 2191 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2192 return NULL; 2193 } 2194 parser->m_bufferLim = newBuf + bufferSize; 2195 #if XML_CONTEXT_BYTES > 0 2196 if (parser->m_bufferPtr) { 2197 memcpy(newBuf, &parser->m_bufferPtr[-keep], 2198 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2199 + keep); 2200 FREE(parser, parser->m_buffer); 2201 parser->m_buffer = newBuf; 2202 parser->m_bufferEnd 2203 = parser->m_buffer 2204 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2205 + keep; 2206 parser->m_bufferPtr = parser->m_buffer + keep; 2207 } else { 2208 /* This must be a brand new buffer with no data in it yet */ 2209 parser->m_bufferEnd = newBuf; 2210 parser->m_bufferPtr = parser->m_buffer = newBuf; 2211 } 2212 #else 2213 if (parser->m_bufferPtr) { 2214 memcpy(newBuf, parser->m_bufferPtr, 2215 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2216 FREE(parser, parser->m_buffer); 2217 parser->m_bufferEnd 2218 = newBuf 2219 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2220 } else { 2221 /* This must be a brand new buffer with no data in it yet */ 2222 parser->m_bufferEnd = newBuf; 2223 } 2224 parser->m_bufferPtr = parser->m_buffer = newBuf; 2225 #endif /* XML_CONTEXT_BYTES > 0 */ 2226 } 2227 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2228 parser->m_positionPtr = NULL; 2229 } 2230 return parser->m_bufferEnd; 2231 } 2232 2233 enum XML_Status XMLCALL 2234 XML_StopParser(XML_Parser parser, XML_Bool resumable) { 2235 if (parser == NULL) 2236 return XML_STATUS_ERROR; 2237 switch (parser->m_parsingStatus.parsing) { 2238 case XML_INITIALIZED: 2239 parser->m_errorCode = XML_ERROR_NOT_STARTED; 2240 return XML_STATUS_ERROR; 2241 case XML_SUSPENDED: 2242 if (resumable) { 2243 parser->m_errorCode = XML_ERROR_SUSPENDED; 2244 return XML_STATUS_ERROR; 2245 } 2246 parser->m_parsingStatus.parsing = XML_FINISHED; 2247 break; 2248 case XML_FINISHED: 2249 parser->m_errorCode = XML_ERROR_FINISHED; 2250 return XML_STATUS_ERROR; 2251 case XML_PARSING: 2252 if (resumable) { 2253 #ifdef XML_DTD 2254 if (parser->m_isParamEntity) { 2255 parser->m_errorCode = XML_ERROR_SUSPEND_PE; 2256 return XML_STATUS_ERROR; 2257 } 2258 #endif 2259 parser->m_parsingStatus.parsing = XML_SUSPENDED; 2260 } else 2261 parser->m_parsingStatus.parsing = XML_FINISHED; 2262 break; 2263 default: 2264 assert(0); 2265 } 2266 return XML_STATUS_OK; 2267 } 2268 2269 enum XML_Status XMLCALL 2270 XML_ResumeParser(XML_Parser parser) { 2271 enum XML_Status result = XML_STATUS_OK; 2272 2273 if (parser == NULL) 2274 return XML_STATUS_ERROR; 2275 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { 2276 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; 2277 return XML_STATUS_ERROR; 2278 } 2279 parser->m_parsingStatus.parsing = XML_PARSING; 2280 2281 parser->m_errorCode = callProcessor( 2282 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); 2283 2284 if (parser->m_errorCode != XML_ERROR_NONE) { 2285 parser->m_eventEndPtr = parser->m_eventPtr; 2286 parser->m_processor = errorProcessor; 2287 return XML_STATUS_ERROR; 2288 } else { 2289 switch (parser->m_parsingStatus.parsing) { 2290 case XML_SUSPENDED: 2291 result = XML_STATUS_SUSPENDED; 2292 break; 2293 case XML_INITIALIZED: 2294 case XML_PARSING: 2295 if (parser->m_parsingStatus.finalBuffer) { 2296 parser->m_parsingStatus.parsing = XML_FINISHED; 2297 return result; 2298 } 2299 default:; 2300 } 2301 } 2302 2303 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2304 parser->m_bufferPtr, &parser->m_position); 2305 parser->m_positionPtr = parser->m_bufferPtr; 2306 return result; 2307 } 2308 2309 void XMLCALL 2310 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { 2311 if (parser == NULL) 2312 return; 2313 assert(status != NULL); 2314 *status = parser->m_parsingStatus; 2315 } 2316 2317 enum XML_Error XMLCALL 2318 XML_GetErrorCode(XML_Parser parser) { 2319 if (parser == NULL) 2320 return XML_ERROR_INVALID_ARGUMENT; 2321 return parser->m_errorCode; 2322 } 2323 2324 XML_Index XMLCALL 2325 XML_GetCurrentByteIndex(XML_Parser parser) { 2326 if (parser == NULL) 2327 return -1; 2328 if (parser->m_eventPtr) 2329 return (XML_Index)(parser->m_parseEndByteIndex 2330 - (parser->m_parseEndPtr - parser->m_eventPtr)); 2331 return -1; 2332 } 2333 2334 int XMLCALL 2335 XML_GetCurrentByteCount(XML_Parser parser) { 2336 if (parser == NULL) 2337 return 0; 2338 if (parser->m_eventEndPtr && parser->m_eventPtr) 2339 return (int)(parser->m_eventEndPtr - parser->m_eventPtr); 2340 return 0; 2341 } 2342 2343 const char *XMLCALL 2344 XML_GetInputContext(XML_Parser parser, int *offset, int *size) { 2345 #if XML_CONTEXT_BYTES > 0 2346 if (parser == NULL) 2347 return NULL; 2348 if (parser->m_eventPtr && parser->m_buffer) { 2349 if (offset != NULL) 2350 *offset = (int)(parser->m_eventPtr - parser->m_buffer); 2351 if (size != NULL) 2352 *size = (int)(parser->m_bufferEnd - parser->m_buffer); 2353 return parser->m_buffer; 2354 } 2355 #else 2356 (void)parser; 2357 (void)offset; 2358 (void)size; 2359 #endif /* XML_CONTEXT_BYTES > 0 */ 2360 return (const char *)0; 2361 } 2362 2363 XML_Size XMLCALL 2364 XML_GetCurrentLineNumber(XML_Parser parser) { 2365 if (parser == NULL) 2366 return 0; 2367 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2368 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2369 parser->m_eventPtr, &parser->m_position); 2370 parser->m_positionPtr = parser->m_eventPtr; 2371 } 2372 return parser->m_position.lineNumber + 1; 2373 } 2374 2375 XML_Size XMLCALL 2376 XML_GetCurrentColumnNumber(XML_Parser parser) { 2377 if (parser == NULL) 2378 return 0; 2379 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2380 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2381 parser->m_eventPtr, &parser->m_position); 2382 parser->m_positionPtr = parser->m_eventPtr; 2383 } 2384 return parser->m_position.columnNumber; 2385 } 2386 2387 void XMLCALL 2388 XML_FreeContentModel(XML_Parser parser, XML_Content *model) { 2389 if (parser != NULL) 2390 FREE(parser, model); 2391 } 2392 2393 void *XMLCALL 2394 XML_MemMalloc(XML_Parser parser, size_t size) { 2395 if (parser == NULL) 2396 return NULL; 2397 return MALLOC(parser, size); 2398 } 2399 2400 void *XMLCALL 2401 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { 2402 if (parser == NULL) 2403 return NULL; 2404 return REALLOC(parser, ptr, size); 2405 } 2406 2407 void XMLCALL 2408 XML_MemFree(XML_Parser parser, void *ptr) { 2409 if (parser != NULL) 2410 FREE(parser, ptr); 2411 } 2412 2413 void XMLCALL 2414 XML_DefaultCurrent(XML_Parser parser) { 2415 if (parser == NULL) 2416 return; 2417 if (parser->m_defaultHandler) { 2418 if (parser->m_openInternalEntities) 2419 reportDefault(parser, parser->m_internalEncoding, 2420 parser->m_openInternalEntities->internalEventPtr, 2421 parser->m_openInternalEntities->internalEventEndPtr); 2422 else 2423 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, 2424 parser->m_eventEndPtr); 2425 } 2426 } 2427 2428 const XML_LChar *XMLCALL 2429 XML_ErrorString(enum XML_Error code) { 2430 switch (code) { 2431 case XML_ERROR_NONE: 2432 return NULL; 2433 case XML_ERROR_NO_MEMORY: 2434 return XML_L("out of memory"); 2435 case XML_ERROR_SYNTAX: 2436 return XML_L("syntax error"); 2437 case XML_ERROR_NO_ELEMENTS: 2438 return XML_L("no element found"); 2439 case XML_ERROR_INVALID_TOKEN: 2440 return XML_L("not well-formed (invalid token)"); 2441 case XML_ERROR_UNCLOSED_TOKEN: 2442 return XML_L("unclosed token"); 2443 case XML_ERROR_PARTIAL_CHAR: 2444 return XML_L("partial character"); 2445 case XML_ERROR_TAG_MISMATCH: 2446 return XML_L("mismatched tag"); 2447 case XML_ERROR_DUPLICATE_ATTRIBUTE: 2448 return XML_L("duplicate attribute"); 2449 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: 2450 return XML_L("junk after document element"); 2451 case XML_ERROR_PARAM_ENTITY_REF: 2452 return XML_L("illegal parameter entity reference"); 2453 case XML_ERROR_UNDEFINED_ENTITY: 2454 return XML_L("undefined entity"); 2455 case XML_ERROR_RECURSIVE_ENTITY_REF: 2456 return XML_L("recursive entity reference"); 2457 case XML_ERROR_ASYNC_ENTITY: 2458 return XML_L("asynchronous entity"); 2459 case XML_ERROR_BAD_CHAR_REF: 2460 return XML_L("reference to invalid character number"); 2461 case XML_ERROR_BINARY_ENTITY_REF: 2462 return XML_L("reference to binary entity"); 2463 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: 2464 return XML_L("reference to external entity in attribute"); 2465 case XML_ERROR_MISPLACED_XML_PI: 2466 return XML_L("XML or text declaration not at start of entity"); 2467 case XML_ERROR_UNKNOWN_ENCODING: 2468 return XML_L("unknown encoding"); 2469 case XML_ERROR_INCORRECT_ENCODING: 2470 return XML_L("encoding specified in XML declaration is incorrect"); 2471 case XML_ERROR_UNCLOSED_CDATA_SECTION: 2472 return XML_L("unclosed CDATA section"); 2473 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: 2474 return XML_L("error in processing external entity reference"); 2475 case XML_ERROR_NOT_STANDALONE: 2476 return XML_L("document is not standalone"); 2477 case XML_ERROR_UNEXPECTED_STATE: 2478 return XML_L("unexpected parser state - please send a bug report"); 2479 case XML_ERROR_ENTITY_DECLARED_IN_PE: 2480 return XML_L("entity declared in parameter entity"); 2481 case XML_ERROR_FEATURE_REQUIRES_XML_DTD: 2482 return XML_L("requested feature requires XML_DTD support in Expat"); 2483 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: 2484 return XML_L("cannot change setting once parsing has begun"); 2485 /* Added in 1.95.7. */ 2486 case XML_ERROR_UNBOUND_PREFIX: 2487 return XML_L("unbound prefix"); 2488 /* Added in 1.95.8. */ 2489 case XML_ERROR_UNDECLARING_PREFIX: 2490 return XML_L("must not undeclare prefix"); 2491 case XML_ERROR_INCOMPLETE_PE: 2492 return XML_L("incomplete markup in parameter entity"); 2493 case XML_ERROR_XML_DECL: 2494 return XML_L("XML declaration not well-formed"); 2495 case XML_ERROR_TEXT_DECL: 2496 return XML_L("text declaration not well-formed"); 2497 case XML_ERROR_PUBLICID: 2498 return XML_L("illegal character(s) in public id"); 2499 case XML_ERROR_SUSPENDED: 2500 return XML_L("parser suspended"); 2501 case XML_ERROR_NOT_SUSPENDED: 2502 return XML_L("parser not suspended"); 2503 case XML_ERROR_ABORTED: 2504 return XML_L("parsing aborted"); 2505 case XML_ERROR_FINISHED: 2506 return XML_L("parsing finished"); 2507 case XML_ERROR_SUSPEND_PE: 2508 return XML_L("cannot suspend in external parameter entity"); 2509 /* Added in 2.0.0. */ 2510 case XML_ERROR_RESERVED_PREFIX_XML: 2511 return XML_L( 2512 "reserved prefix (xml) must not be undeclared or bound to another namespace name"); 2513 case XML_ERROR_RESERVED_PREFIX_XMLNS: 2514 return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); 2515 case XML_ERROR_RESERVED_NAMESPACE_URI: 2516 return XML_L( 2517 "prefix must not be bound to one of the reserved namespace names"); 2518 /* Added in 2.2.5. */ 2519 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ 2520 return XML_L("invalid argument"); 2521 /* Added in 2.3.0. */ 2522 case XML_ERROR_NO_BUFFER: 2523 return XML_L( 2524 "a successful prior call to function XML_GetBuffer is required"); 2525 /* Added in 2.4.0. */ 2526 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: 2527 return XML_L( 2528 "limit on input amplification factor (from DTD and entities) breached"); 2529 /* Added in 2.6.4. */ 2530 case XML_ERROR_NOT_STARTED: 2531 return XML_L("parser not started"); 2532 } 2533 return NULL; 2534 } 2535 2536 const XML_LChar *XMLCALL 2537 XML_ExpatVersion(void) { 2538 /* V1 is used to string-ize the version number. However, it would 2539 string-ize the actual version macro *names* unless we get them 2540 substituted before being passed to V1. CPP is defined to expand 2541 a macro, then rescan for more expansions. Thus, we use V2 to expand 2542 the version macros, then CPP will expand the resulting V1() macro 2543 with the correct numerals. */ 2544 /* ### I'm assuming cpp is portable in this respect... */ 2545 2546 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) 2547 #define V2(a, b, c) XML_L("expat_") V1(a, b, c) 2548 2549 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); 2550 2551 #undef V1 2552 #undef V2 2553 } 2554 2555 XML_Expat_Version XMLCALL 2556 XML_ExpatVersionInfo(void) { 2557 XML_Expat_Version version; 2558 2559 version.major = XML_MAJOR_VERSION; 2560 version.minor = XML_MINOR_VERSION; 2561 version.micro = XML_MICRO_VERSION; 2562 2563 return version; 2564 } 2565 2566 const XML_Feature *XMLCALL 2567 XML_GetFeatureList(void) { 2568 static const XML_Feature features[] = { 2569 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), 2570 sizeof(XML_Char)}, 2571 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), 2572 sizeof(XML_LChar)}, 2573 #ifdef XML_UNICODE 2574 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, 2575 #endif 2576 #ifdef XML_UNICODE_WCHAR_T 2577 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, 2578 #endif 2579 #ifdef XML_DTD 2580 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, 2581 #endif 2582 #if XML_CONTEXT_BYTES > 0 2583 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), 2584 XML_CONTEXT_BYTES}, 2585 #endif 2586 #ifdef XML_MIN_SIZE 2587 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, 2588 #endif 2589 #ifdef XML_NS 2590 {XML_FEATURE_NS, XML_L("XML_NS"), 0}, 2591 #endif 2592 #ifdef XML_LARGE_SIZE 2593 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, 2594 #endif 2595 #ifdef XML_ATTR_INFO 2596 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, 2597 #endif 2598 #if XML_GE == 1 2599 /* Added in Expat 2.4.0 for XML_DTD defined and 2600 * added in Expat 2.6.0 for XML_GE == 1. */ 2601 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, 2602 XML_L("XML_BLAP_MAX_AMP"), 2603 (long int) 2604 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, 2605 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, 2606 XML_L("XML_BLAP_ACT_THRES"), 2607 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, 2608 /* Added in Expat 2.6.0. */ 2609 {XML_FEATURE_GE, XML_L("XML_GE"), 0}, 2610 #endif 2611 {XML_FEATURE_END, NULL, 0}}; 2612 2613 return features; 2614 } 2615 2616 #if XML_GE == 1 2617 XML_Bool XMLCALL 2618 XML_SetBillionLaughsAttackProtectionMaximumAmplification( 2619 XML_Parser parser, float maximumAmplificationFactor) { 2620 if ((parser == NULL) || (parser->m_parentParser != NULL) 2621 || isnan(maximumAmplificationFactor) 2622 || (maximumAmplificationFactor < 1.0f)) { 2623 return XML_FALSE; 2624 } 2625 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; 2626 return XML_TRUE; 2627 } 2628 2629 XML_Bool XMLCALL 2630 XML_SetBillionLaughsAttackProtectionActivationThreshold( 2631 XML_Parser parser, unsigned long long activationThresholdBytes) { 2632 if ((parser == NULL) || (parser->m_parentParser != NULL)) { 2633 return XML_FALSE; 2634 } 2635 parser->m_accounting.activationThresholdBytes = activationThresholdBytes; 2636 return XML_TRUE; 2637 } 2638 #endif /* XML_GE == 1 */ 2639 2640 XML_Bool XMLCALL 2641 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { 2642 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { 2643 parser->m_reparseDeferralEnabled = enabled; 2644 return XML_TRUE; 2645 } 2646 return XML_FALSE; 2647 } 2648 2649 /* Initially tag->rawName always points into the parse buffer; 2650 for those TAG instances opened while the current parse buffer was 2651 processed, and not yet closed, we need to store tag->rawName in a more 2652 permanent location, since the parse buffer is about to be discarded. 2653 */ 2654 static XML_Bool 2655 storeRawNames(XML_Parser parser) { 2656 TAG *tag = parser->m_tagStack; 2657 while (tag) { 2658 int bufSize; 2659 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); 2660 size_t rawNameLen; 2661 char *rawNameBuf = tag->buf + nameLen; 2662 /* Stop if already stored. Since m_tagStack is a stack, we can stop 2663 at the first entry that has already been copied; everything 2664 below it in the stack is already been accounted for in a 2665 previous call to this function. 2666 */ 2667 if (tag->rawName == rawNameBuf) 2668 break; 2669 /* For reuse purposes we need to ensure that the 2670 size of tag->buf is a multiple of sizeof(XML_Char). 2671 */ 2672 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); 2673 /* Detect and prevent integer overflow. */ 2674 if (rawNameLen > (size_t)INT_MAX - nameLen) 2675 return XML_FALSE; 2676 bufSize = nameLen + (int)rawNameLen; 2677 if (bufSize > tag->bufEnd - tag->buf) { 2678 char *temp = (char *)REALLOC(parser, tag->buf, bufSize); 2679 if (temp == NULL) 2680 return XML_FALSE; 2681 /* if tag->name.str points to tag->buf (only when namespace 2682 processing is off) then we have to update it 2683 */ 2684 if (tag->name.str == (XML_Char *)tag->buf) 2685 tag->name.str = (XML_Char *)temp; 2686 /* if tag->name.localPart is set (when namespace processing is on) 2687 then update it as well, since it will always point into tag->buf 2688 */ 2689 if (tag->name.localPart) 2690 tag->name.localPart 2691 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); 2692 tag->buf = temp; 2693 tag->bufEnd = temp + bufSize; 2694 rawNameBuf = temp + nameLen; 2695 } 2696 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); 2697 tag->rawName = rawNameBuf; 2698 tag = tag->parent; 2699 } 2700 return XML_TRUE; 2701 } 2702 2703 static enum XML_Error PTRCALL 2704 contentProcessor(XML_Parser parser, const char *start, const char *end, 2705 const char **endPtr) { 2706 enum XML_Error result = doContent( 2707 parser, 0, parser->m_encoding, start, end, endPtr, 2708 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 2709 if (result == XML_ERROR_NONE) { 2710 if (! storeRawNames(parser)) 2711 return XML_ERROR_NO_MEMORY; 2712 } 2713 return result; 2714 } 2715 2716 static enum XML_Error PTRCALL 2717 externalEntityInitProcessor(XML_Parser parser, const char *start, 2718 const char *end, const char **endPtr) { 2719 enum XML_Error result = initializeEncoding(parser); 2720 if (result != XML_ERROR_NONE) 2721 return result; 2722 parser->m_processor = externalEntityInitProcessor2; 2723 return externalEntityInitProcessor2(parser, start, end, endPtr); 2724 } 2725 2726 static enum XML_Error PTRCALL 2727 externalEntityInitProcessor2(XML_Parser parser, const char *start, 2728 const char *end, const char **endPtr) { 2729 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 2730 int tok = XmlContentTok(parser->m_encoding, start, end, &next); 2731 switch (tok) { 2732 case XML_TOK_BOM: 2733 #if XML_GE == 1 2734 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, 2735 XML_ACCOUNT_DIRECT)) { 2736 accountingOnAbort(parser); 2737 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2738 } 2739 #endif /* XML_GE == 1 */ 2740 2741 /* If we are at the end of the buffer, this would cause the next stage, 2742 i.e. externalEntityInitProcessor3, to pass control directly to 2743 doContent (by detecting XML_TOK_NONE) without processing any xml text 2744 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. 2745 */ 2746 if (next == end && ! parser->m_parsingStatus.finalBuffer) { 2747 *endPtr = next; 2748 return XML_ERROR_NONE; 2749 } 2750 start = next; 2751 break; 2752 case XML_TOK_PARTIAL: 2753 if (! parser->m_parsingStatus.finalBuffer) { 2754 *endPtr = start; 2755 return XML_ERROR_NONE; 2756 } 2757 parser->m_eventPtr = start; 2758 return XML_ERROR_UNCLOSED_TOKEN; 2759 case XML_TOK_PARTIAL_CHAR: 2760 if (! parser->m_parsingStatus.finalBuffer) { 2761 *endPtr = start; 2762 return XML_ERROR_NONE; 2763 } 2764 parser->m_eventPtr = start; 2765 return XML_ERROR_PARTIAL_CHAR; 2766 } 2767 parser->m_processor = externalEntityInitProcessor3; 2768 return externalEntityInitProcessor3(parser, start, end, endPtr); 2769 } 2770 2771 static enum XML_Error PTRCALL 2772 externalEntityInitProcessor3(XML_Parser parser, const char *start, 2773 const char *end, const char **endPtr) { 2774 int tok; 2775 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 2776 parser->m_eventPtr = start; 2777 tok = XmlContentTok(parser->m_encoding, start, end, &next); 2778 /* Note: These bytes are accounted later in: 2779 - processXmlDecl 2780 - externalEntityContentProcessor 2781 */ 2782 parser->m_eventEndPtr = next; 2783 2784 switch (tok) { 2785 case XML_TOK_XML_DECL: { 2786 enum XML_Error result; 2787 result = processXmlDecl(parser, 1, start, next); 2788 if (result != XML_ERROR_NONE) 2789 return result; 2790 switch (parser->m_parsingStatus.parsing) { 2791 case XML_SUSPENDED: 2792 *endPtr = next; 2793 return XML_ERROR_NONE; 2794 case XML_FINISHED: 2795 return XML_ERROR_ABORTED; 2796 default: 2797 start = next; 2798 } 2799 } break; 2800 case XML_TOK_PARTIAL: 2801 if (! parser->m_parsingStatus.finalBuffer) { 2802 *endPtr = start; 2803 return XML_ERROR_NONE; 2804 } 2805 return XML_ERROR_UNCLOSED_TOKEN; 2806 case XML_TOK_PARTIAL_CHAR: 2807 if (! parser->m_parsingStatus.finalBuffer) { 2808 *endPtr = start; 2809 return XML_ERROR_NONE; 2810 } 2811 return XML_ERROR_PARTIAL_CHAR; 2812 } 2813 parser->m_processor = externalEntityContentProcessor; 2814 parser->m_tagLevel = 1; 2815 return externalEntityContentProcessor(parser, start, end, endPtr); 2816 } 2817 2818 static enum XML_Error PTRCALL 2819 externalEntityContentProcessor(XML_Parser parser, const char *start, 2820 const char *end, const char **endPtr) { 2821 enum XML_Error result 2822 = doContent(parser, 1, parser->m_encoding, start, end, endPtr, 2823 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 2824 XML_ACCOUNT_ENTITY_EXPANSION); 2825 if (result == XML_ERROR_NONE) { 2826 if (! storeRawNames(parser)) 2827 return XML_ERROR_NO_MEMORY; 2828 } 2829 return result; 2830 } 2831 2832 static enum XML_Error 2833 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, 2834 const char *s, const char *end, const char **nextPtr, 2835 XML_Bool haveMore, enum XML_Account account) { 2836 /* save one level of indirection */ 2837 DTD *const dtd = parser->m_dtd; 2838 2839 const char **eventPP; 2840 const char **eventEndPP; 2841 if (enc == parser->m_encoding) { 2842 eventPP = &parser->m_eventPtr; 2843 eventEndPP = &parser->m_eventEndPtr; 2844 } else { 2845 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 2846 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 2847 } 2848 *eventPP = s; 2849 2850 for (;;) { 2851 const char *next = s; /* XmlContentTok doesn't always set the last arg */ 2852 int tok = XmlContentTok(enc, s, end, &next); 2853 #if XML_GE == 1 2854 const char *accountAfter 2855 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) 2856 ? (haveMore ? s /* i.e. 0 bytes */ : end) 2857 : next; 2858 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, 2859 account)) { 2860 accountingOnAbort(parser); 2861 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2862 } 2863 #endif 2864 *eventEndPP = next; 2865 switch (tok) { 2866 case XML_TOK_TRAILING_CR: 2867 if (haveMore) { 2868 *nextPtr = s; 2869 return XML_ERROR_NONE; 2870 } 2871 *eventEndPP = end; 2872 if (parser->m_characterDataHandler) { 2873 XML_Char c = 0xA; 2874 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 2875 } else if (parser->m_defaultHandler) 2876 reportDefault(parser, enc, s, end); 2877 /* We are at the end of the final buffer, should we check for 2878 XML_SUSPENDED, XML_FINISHED? 2879 */ 2880 if (startTagLevel == 0) 2881 return XML_ERROR_NO_ELEMENTS; 2882 if (parser->m_tagLevel != startTagLevel) 2883 return XML_ERROR_ASYNC_ENTITY; 2884 *nextPtr = end; 2885 return XML_ERROR_NONE; 2886 case XML_TOK_NONE: 2887 if (haveMore) { 2888 *nextPtr = s; 2889 return XML_ERROR_NONE; 2890 } 2891 if (startTagLevel > 0) { 2892 if (parser->m_tagLevel != startTagLevel) 2893 return XML_ERROR_ASYNC_ENTITY; 2894 *nextPtr = s; 2895 return XML_ERROR_NONE; 2896 } 2897 return XML_ERROR_NO_ELEMENTS; 2898 case XML_TOK_INVALID: 2899 *eventPP = next; 2900 return XML_ERROR_INVALID_TOKEN; 2901 case XML_TOK_PARTIAL: 2902 if (haveMore) { 2903 *nextPtr = s; 2904 return XML_ERROR_NONE; 2905 } 2906 return XML_ERROR_UNCLOSED_TOKEN; 2907 case XML_TOK_PARTIAL_CHAR: 2908 if (haveMore) { 2909 *nextPtr = s; 2910 return XML_ERROR_NONE; 2911 } 2912 return XML_ERROR_PARTIAL_CHAR; 2913 case XML_TOK_ENTITY_REF: { 2914 const XML_Char *name; 2915 ENTITY *entity; 2916 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 2917 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 2918 if (ch) { 2919 #if XML_GE == 1 2920 /* NOTE: We are replacing 4-6 characters original input for 1 character 2921 * so there is no amplification and hence recording without 2922 * protection. */ 2923 accountingDiffTolerated(parser, tok, (char *)&ch, 2924 ((char *)&ch) + sizeof(XML_Char), __LINE__, 2925 XML_ACCOUNT_ENTITY_EXPANSION); 2926 #endif /* XML_GE == 1 */ 2927 if (parser->m_characterDataHandler) 2928 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); 2929 else if (parser->m_defaultHandler) 2930 reportDefault(parser, enc, s, next); 2931 break; 2932 } 2933 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 2934 next - enc->minBytesPerChar); 2935 if (! name) 2936 return XML_ERROR_NO_MEMORY; 2937 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 2938 poolDiscard(&dtd->pool); 2939 /* First, determine if a check for an existing declaration is needed; 2940 if yes, check that the entity exists, and that it is internal, 2941 otherwise call the skipped entity or default handler. 2942 */ 2943 if (! dtd->hasParamEntityRefs || dtd->standalone) { 2944 if (! entity) 2945 return XML_ERROR_UNDEFINED_ENTITY; 2946 else if (! entity->is_internal) 2947 return XML_ERROR_ENTITY_DECLARED_IN_PE; 2948 } else if (! entity) { 2949 if (parser->m_skippedEntityHandler) 2950 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 2951 else if (parser->m_defaultHandler) 2952 reportDefault(parser, enc, s, next); 2953 break; 2954 } 2955 if (entity->open) 2956 return XML_ERROR_RECURSIVE_ENTITY_REF; 2957 if (entity->notation) 2958 return XML_ERROR_BINARY_ENTITY_REF; 2959 if (entity->textPtr) { 2960 enum XML_Error result; 2961 if (! parser->m_defaultExpandInternalEntities) { 2962 if (parser->m_skippedEntityHandler) 2963 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 2964 0); 2965 else if (parser->m_defaultHandler) 2966 reportDefault(parser, enc, s, next); 2967 break; 2968 } 2969 result = processInternalEntity(parser, entity, XML_FALSE); 2970 if (result != XML_ERROR_NONE) 2971 return result; 2972 } else if (parser->m_externalEntityRefHandler) { 2973 const XML_Char *context; 2974 entity->open = XML_TRUE; 2975 context = getContext(parser); 2976 entity->open = XML_FALSE; 2977 if (! context) 2978 return XML_ERROR_NO_MEMORY; 2979 if (! parser->m_externalEntityRefHandler( 2980 parser->m_externalEntityRefHandlerArg, context, entity->base, 2981 entity->systemId, entity->publicId)) 2982 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 2983 poolDiscard(&parser->m_tempPool); 2984 } else if (parser->m_defaultHandler) 2985 reportDefault(parser, enc, s, next); 2986 break; 2987 } 2988 case XML_TOK_START_TAG_NO_ATTS: 2989 /* fall through */ 2990 case XML_TOK_START_TAG_WITH_ATTS: { 2991 TAG *tag; 2992 enum XML_Error result; 2993 XML_Char *toPtr; 2994 if (parser->m_freeTagList) { 2995 tag = parser->m_freeTagList; 2996 parser->m_freeTagList = parser->m_freeTagList->parent; 2997 } else { 2998 tag = (TAG *)MALLOC(parser, sizeof(TAG)); 2999 if (! tag) 3000 return XML_ERROR_NO_MEMORY; 3001 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); 3002 if (! tag->buf) { 3003 FREE(parser, tag); 3004 return XML_ERROR_NO_MEMORY; 3005 } 3006 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; 3007 } 3008 tag->bindings = NULL; 3009 tag->parent = parser->m_tagStack; 3010 parser->m_tagStack = tag; 3011 tag->name.localPart = NULL; 3012 tag->name.prefix = NULL; 3013 tag->rawName = s + enc->minBytesPerChar; 3014 tag->rawNameLength = XmlNameLength(enc, tag->rawName); 3015 ++parser->m_tagLevel; 3016 { 3017 const char *rawNameEnd = tag->rawName + tag->rawNameLength; 3018 const char *fromPtr = tag->rawName; 3019 toPtr = (XML_Char *)tag->buf; 3020 for (;;) { 3021 int bufSize; 3022 int convLen; 3023 const enum XML_Convert_Result convert_res 3024 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, 3025 (ICHAR *)tag->bufEnd - 1); 3026 convLen = (int)(toPtr - (XML_Char *)tag->buf); 3027 if ((fromPtr >= rawNameEnd) 3028 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { 3029 tag->name.strLen = convLen; 3030 break; 3031 } 3032 bufSize = (int)(tag->bufEnd - tag->buf) << 1; 3033 { 3034 char *temp = (char *)REALLOC(parser, tag->buf, bufSize); 3035 if (temp == NULL) 3036 return XML_ERROR_NO_MEMORY; 3037 tag->buf = temp; 3038 tag->bufEnd = temp + bufSize; 3039 toPtr = (XML_Char *)temp + convLen; 3040 } 3041 } 3042 } 3043 tag->name.str = (XML_Char *)tag->buf; 3044 *toPtr = XML_T('\0'); 3045 result 3046 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); 3047 if (result) 3048 return result; 3049 if (parser->m_startElementHandler) 3050 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, 3051 (const XML_Char **)parser->m_atts); 3052 else if (parser->m_defaultHandler) 3053 reportDefault(parser, enc, s, next); 3054 poolClear(&parser->m_tempPool); 3055 break; 3056 } 3057 case XML_TOK_EMPTY_ELEMENT_NO_ATTS: 3058 /* fall through */ 3059 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { 3060 const char *rawName = s + enc->minBytesPerChar; 3061 enum XML_Error result; 3062 BINDING *bindings = NULL; 3063 XML_Bool noElmHandlers = XML_TRUE; 3064 TAG_NAME name; 3065 name.str = poolStoreString(&parser->m_tempPool, enc, rawName, 3066 rawName + XmlNameLength(enc, rawName)); 3067 if (! name.str) 3068 return XML_ERROR_NO_MEMORY; 3069 poolFinish(&parser->m_tempPool); 3070 result = storeAtts(parser, enc, s, &name, &bindings, 3071 XML_ACCOUNT_NONE /* token spans whole start tag */); 3072 if (result != XML_ERROR_NONE) { 3073 freeBindings(parser, bindings); 3074 return result; 3075 } 3076 poolFinish(&parser->m_tempPool); 3077 if (parser->m_startElementHandler) { 3078 parser->m_startElementHandler(parser->m_handlerArg, name.str, 3079 (const XML_Char **)parser->m_atts); 3080 noElmHandlers = XML_FALSE; 3081 } 3082 if (parser->m_endElementHandler) { 3083 if (parser->m_startElementHandler) 3084 *eventPP = *eventEndPP; 3085 parser->m_endElementHandler(parser->m_handlerArg, name.str); 3086 noElmHandlers = XML_FALSE; 3087 } 3088 if (noElmHandlers && parser->m_defaultHandler) 3089 reportDefault(parser, enc, s, next); 3090 poolClear(&parser->m_tempPool); 3091 freeBindings(parser, bindings); 3092 } 3093 if ((parser->m_tagLevel == 0) 3094 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3095 if (parser->m_parsingStatus.parsing == XML_SUSPENDED) 3096 parser->m_processor = epilogProcessor; 3097 else 3098 return epilogProcessor(parser, next, end, nextPtr); 3099 } 3100 break; 3101 case XML_TOK_END_TAG: 3102 if (parser->m_tagLevel == startTagLevel) 3103 return XML_ERROR_ASYNC_ENTITY; 3104 else { 3105 int len; 3106 const char *rawName; 3107 TAG *tag = parser->m_tagStack; 3108 rawName = s + enc->minBytesPerChar * 2; 3109 len = XmlNameLength(enc, rawName); 3110 if (len != tag->rawNameLength 3111 || memcmp(tag->rawName, rawName, len) != 0) { 3112 *eventPP = rawName; 3113 return XML_ERROR_TAG_MISMATCH; 3114 } 3115 parser->m_tagStack = tag->parent; 3116 tag->parent = parser->m_freeTagList; 3117 parser->m_freeTagList = tag; 3118 --parser->m_tagLevel; 3119 if (parser->m_endElementHandler) { 3120 const XML_Char *localPart; 3121 const XML_Char *prefix; 3122 XML_Char *uri; 3123 localPart = tag->name.localPart; 3124 if (parser->m_ns && localPart) { 3125 /* localPart and prefix may have been overwritten in 3126 tag->name.str, since this points to the binding->uri 3127 buffer which gets reused; so we have to add them again 3128 */ 3129 uri = (XML_Char *)tag->name.str + tag->name.uriLen; 3130 /* don't need to check for space - already done in storeAtts() */ 3131 while (*localPart) 3132 *uri++ = *localPart++; 3133 prefix = tag->name.prefix; 3134 if (parser->m_ns_triplets && prefix) { 3135 *uri++ = parser->m_namespaceSeparator; 3136 while (*prefix) 3137 *uri++ = *prefix++; 3138 } 3139 *uri = XML_T('\0'); 3140 } 3141 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); 3142 } else if (parser->m_defaultHandler) 3143 reportDefault(parser, enc, s, next); 3144 while (tag->bindings) { 3145 BINDING *b = tag->bindings; 3146 if (parser->m_endNamespaceDeclHandler) 3147 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, 3148 b->prefix->name); 3149 tag->bindings = tag->bindings->nextTagBinding; 3150 b->nextTagBinding = parser->m_freeBindingList; 3151 parser->m_freeBindingList = b; 3152 b->prefix->binding = b->prevPrefixBinding; 3153 } 3154 if ((parser->m_tagLevel == 0) 3155 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3156 if (parser->m_parsingStatus.parsing == XML_SUSPENDED) 3157 parser->m_processor = epilogProcessor; 3158 else 3159 return epilogProcessor(parser, next, end, nextPtr); 3160 } 3161 } 3162 break; 3163 case XML_TOK_CHAR_REF: { 3164 int n = XmlCharRefNumber(enc, s); 3165 if (n < 0) 3166 return XML_ERROR_BAD_CHAR_REF; 3167 if (parser->m_characterDataHandler) { 3168 XML_Char buf[XML_ENCODE_MAX]; 3169 parser->m_characterDataHandler(parser->m_handlerArg, buf, 3170 XmlEncode(n, (ICHAR *)buf)); 3171 } else if (parser->m_defaultHandler) 3172 reportDefault(parser, enc, s, next); 3173 } break; 3174 case XML_TOK_XML_DECL: 3175 return XML_ERROR_MISPLACED_XML_PI; 3176 case XML_TOK_DATA_NEWLINE: 3177 if (parser->m_characterDataHandler) { 3178 XML_Char c = 0xA; 3179 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3180 } else if (parser->m_defaultHandler) 3181 reportDefault(parser, enc, s, next); 3182 break; 3183 case XML_TOK_CDATA_SECT_OPEN: { 3184 enum XML_Error result; 3185 if (parser->m_startCdataSectionHandler) 3186 parser->m_startCdataSectionHandler(parser->m_handlerArg); 3187 /* BEGIN disabled code */ 3188 /* Suppose you doing a transformation on a document that involves 3189 changing only the character data. You set up a defaultHandler 3190 and a characterDataHandler. The defaultHandler simply copies 3191 characters through. The characterDataHandler does the 3192 transformation and writes the characters out escaping them as 3193 necessary. This case will fail to work if we leave out the 3194 following two lines (because & and < inside CDATA sections will 3195 be incorrectly escaped). 3196 3197 However, now we have a start/endCdataSectionHandler, so it seems 3198 easier to let the user deal with this. 3199 */ 3200 else if ((0) && parser->m_characterDataHandler) 3201 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3202 0); 3203 /* END disabled code */ 3204 else if (parser->m_defaultHandler) 3205 reportDefault(parser, enc, s, next); 3206 result 3207 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); 3208 if (result != XML_ERROR_NONE) 3209 return result; 3210 else if (! next) { 3211 parser->m_processor = cdataSectionProcessor; 3212 return result; 3213 } 3214 } break; 3215 case XML_TOK_TRAILING_RSQB: 3216 if (haveMore) { 3217 *nextPtr = s; 3218 return XML_ERROR_NONE; 3219 } 3220 if (parser->m_characterDataHandler) { 3221 if (MUST_CONVERT(enc, s)) { 3222 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3223 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3224 parser->m_characterDataHandler( 3225 parser->m_handlerArg, parser->m_dataBuf, 3226 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3227 } else 3228 parser->m_characterDataHandler( 3229 parser->m_handlerArg, (const XML_Char *)s, 3230 (int)((const XML_Char *)end - (const XML_Char *)s)); 3231 } else if (parser->m_defaultHandler) 3232 reportDefault(parser, enc, s, end); 3233 /* We are at the end of the final buffer, should we check for 3234 XML_SUSPENDED, XML_FINISHED? 3235 */ 3236 if (startTagLevel == 0) { 3237 *eventPP = end; 3238 return XML_ERROR_NO_ELEMENTS; 3239 } 3240 if (parser->m_tagLevel != startTagLevel) { 3241 *eventPP = end; 3242 return XML_ERROR_ASYNC_ENTITY; 3243 } 3244 *nextPtr = end; 3245 return XML_ERROR_NONE; 3246 case XML_TOK_DATA_CHARS: { 3247 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 3248 if (charDataHandler) { 3249 if (MUST_CONVERT(enc, s)) { 3250 for (;;) { 3251 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3252 const enum XML_Convert_Result convert_res = XmlConvert( 3253 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3254 *eventEndPP = s; 3255 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3256 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3257 if ((convert_res == XML_CONVERT_COMPLETED) 3258 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 3259 break; 3260 *eventPP = s; 3261 } 3262 } else 3263 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 3264 (int)((const XML_Char *)next - (const XML_Char *)s)); 3265 } else if (parser->m_defaultHandler) 3266 reportDefault(parser, enc, s, next); 3267 } break; 3268 case XML_TOK_PI: 3269 if (! reportProcessingInstruction(parser, enc, s, next)) 3270 return XML_ERROR_NO_MEMORY; 3271 break; 3272 case XML_TOK_COMMENT: 3273 if (! reportComment(parser, enc, s, next)) 3274 return XML_ERROR_NO_MEMORY; 3275 break; 3276 default: 3277 /* All of the tokens produced by XmlContentTok() have their own 3278 * explicit cases, so this default is not strictly necessary. 3279 * However it is a useful safety net, so we retain the code and 3280 * simply exclude it from the coverage tests. 3281 * 3282 * LCOV_EXCL_START 3283 */ 3284 if (parser->m_defaultHandler) 3285 reportDefault(parser, enc, s, next); 3286 break; 3287 /* LCOV_EXCL_STOP */ 3288 } 3289 *eventPP = s = next; 3290 switch (parser->m_parsingStatus.parsing) { 3291 case XML_SUSPENDED: 3292 *nextPtr = next; 3293 return XML_ERROR_NONE; 3294 case XML_FINISHED: 3295 return XML_ERROR_ABORTED; 3296 default:; 3297 } 3298 } 3299 /* not reached */ 3300 } 3301 3302 /* This function does not call free() on the allocated memory, merely 3303 * moving it to the parser's m_freeBindingList where it can be freed or 3304 * reused as appropriate. 3305 */ 3306 static void 3307 freeBindings(XML_Parser parser, BINDING *bindings) { 3308 while (bindings) { 3309 BINDING *b = bindings; 3310 3311 /* m_startNamespaceDeclHandler will have been called for this 3312 * binding in addBindings(), so call the end handler now. 3313 */ 3314 if (parser->m_endNamespaceDeclHandler) 3315 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); 3316 3317 bindings = bindings->nextTagBinding; 3318 b->nextTagBinding = parser->m_freeBindingList; 3319 parser->m_freeBindingList = b; 3320 b->prefix->binding = b->prevPrefixBinding; 3321 } 3322 } 3323 3324 /* Precondition: all arguments must be non-NULL; 3325 Purpose: 3326 - normalize attributes 3327 - check attributes for well-formedness 3328 - generate namespace aware attribute names (URI, prefix) 3329 - build list of attributes for startElementHandler 3330 - default attributes 3331 - process namespace declarations (check and report them) 3332 - generate namespace aware element name (URI, prefix) 3333 */ 3334 static enum XML_Error 3335 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, 3336 TAG_NAME *tagNamePtr, BINDING **bindingsPtr, 3337 enum XML_Account account) { 3338 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 3339 ELEMENT_TYPE *elementType; 3340 int nDefaultAtts; 3341 const XML_Char **appAtts; /* the attribute list for the application */ 3342 int attIndex = 0; 3343 int prefixLen; 3344 int i; 3345 int n; 3346 XML_Char *uri; 3347 int nPrefixes = 0; 3348 BINDING *binding; 3349 const XML_Char *localPart; 3350 3351 /* lookup the element type name */ 3352 elementType 3353 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); 3354 if (! elementType) { 3355 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); 3356 if (! name) 3357 return XML_ERROR_NO_MEMORY; 3358 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 3359 sizeof(ELEMENT_TYPE)); 3360 if (! elementType) 3361 return XML_ERROR_NO_MEMORY; 3362 if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) 3363 return XML_ERROR_NO_MEMORY; 3364 } 3365 nDefaultAtts = elementType->nDefaultAtts; 3366 3367 /* get the attributes from the tokenizer */ 3368 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); 3369 3370 /* Detect and prevent integer overflow */ 3371 if (n > INT_MAX - nDefaultAtts) { 3372 return XML_ERROR_NO_MEMORY; 3373 } 3374 3375 if (n + nDefaultAtts > parser->m_attsSize) { 3376 int oldAttsSize = parser->m_attsSize; 3377 ATTRIBUTE *temp; 3378 #ifdef XML_ATTR_INFO 3379 XML_AttrInfo *temp2; 3380 #endif 3381 3382 /* Detect and prevent integer overflow */ 3383 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) 3384 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { 3385 return XML_ERROR_NO_MEMORY; 3386 } 3387 3388 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; 3389 3390 /* Detect and prevent integer overflow. 3391 * The preprocessor guard addresses the "always false" warning 3392 * from -Wtype-limits on platforms where 3393 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3394 #if UINT_MAX >= SIZE_MAX 3395 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { 3396 parser->m_attsSize = oldAttsSize; 3397 return XML_ERROR_NO_MEMORY; 3398 } 3399 #endif 3400 3401 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, 3402 parser->m_attsSize * sizeof(ATTRIBUTE)); 3403 if (temp == NULL) { 3404 parser->m_attsSize = oldAttsSize; 3405 return XML_ERROR_NO_MEMORY; 3406 } 3407 parser->m_atts = temp; 3408 #ifdef XML_ATTR_INFO 3409 /* Detect and prevent integer overflow. 3410 * The preprocessor guard addresses the "always false" warning 3411 * from -Wtype-limits on platforms where 3412 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3413 # if UINT_MAX >= SIZE_MAX 3414 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { 3415 parser->m_attsSize = oldAttsSize; 3416 return XML_ERROR_NO_MEMORY; 3417 } 3418 # endif 3419 3420 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, 3421 parser->m_attsSize * sizeof(XML_AttrInfo)); 3422 if (temp2 == NULL) { 3423 parser->m_attsSize = oldAttsSize; 3424 return XML_ERROR_NO_MEMORY; 3425 } 3426 parser->m_attInfo = temp2; 3427 #endif 3428 if (n > oldAttsSize) 3429 XmlGetAttributes(enc, attStr, n, parser->m_atts); 3430 } 3431 3432 appAtts = (const XML_Char **)parser->m_atts; 3433 for (i = 0; i < n; i++) { 3434 ATTRIBUTE *currAtt = &parser->m_atts[i]; 3435 #ifdef XML_ATTR_INFO 3436 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; 3437 #endif 3438 /* add the name and value to the attribute list */ 3439 ATTRIBUTE_ID *attId 3440 = getAttributeId(parser, enc, currAtt->name, 3441 currAtt->name + XmlNameLength(enc, currAtt->name)); 3442 if (! attId) 3443 return XML_ERROR_NO_MEMORY; 3444 #ifdef XML_ATTR_INFO 3445 currAttInfo->nameStart 3446 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); 3447 currAttInfo->nameEnd 3448 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); 3449 currAttInfo->valueStart = parser->m_parseEndByteIndex 3450 - (parser->m_parseEndPtr - currAtt->valuePtr); 3451 currAttInfo->valueEnd = parser->m_parseEndByteIndex 3452 - (parser->m_parseEndPtr - currAtt->valueEnd); 3453 #endif 3454 /* Detect duplicate attributes by their QNames. This does not work when 3455 namespace processing is turned on and different prefixes for the same 3456 namespace are used. For this case we have a check further down. 3457 */ 3458 if ((attId->name)[-1]) { 3459 if (enc == parser->m_encoding) 3460 parser->m_eventPtr = parser->m_atts[i].name; 3461 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3462 } 3463 (attId->name)[-1] = 1; 3464 appAtts[attIndex++] = attId->name; 3465 if (! parser->m_atts[i].normalized) { 3466 enum XML_Error result; 3467 XML_Bool isCdata = XML_TRUE; 3468 3469 /* figure out whether declared as other than CDATA */ 3470 if (attId->maybeTokenized) { 3471 int j; 3472 for (j = 0; j < nDefaultAtts; j++) { 3473 if (attId == elementType->defaultAtts[j].id) { 3474 isCdata = elementType->defaultAtts[j].isCdata; 3475 break; 3476 } 3477 } 3478 } 3479 3480 /* normalize the attribute value */ 3481 result = storeAttributeValue( 3482 parser, enc, isCdata, parser->m_atts[i].valuePtr, 3483 parser->m_atts[i].valueEnd, &parser->m_tempPool, account); 3484 if (result) 3485 return result; 3486 appAtts[attIndex] = poolStart(&parser->m_tempPool); 3487 poolFinish(&parser->m_tempPool); 3488 } else { 3489 /* the value did not need normalizing */ 3490 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, 3491 parser->m_atts[i].valuePtr, 3492 parser->m_atts[i].valueEnd); 3493 if (appAtts[attIndex] == 0) 3494 return XML_ERROR_NO_MEMORY; 3495 poolFinish(&parser->m_tempPool); 3496 } 3497 /* handle prefixed attribute names */ 3498 if (attId->prefix) { 3499 if (attId->xmlns) { 3500 /* deal with namespace declarations here */ 3501 enum XML_Error result = addBinding(parser, attId->prefix, attId, 3502 appAtts[attIndex], bindingsPtr); 3503 if (result) 3504 return result; 3505 --attIndex; 3506 } else { 3507 /* deal with other prefixed names later */ 3508 attIndex++; 3509 nPrefixes++; 3510 (attId->name)[-1] = 2; 3511 } 3512 } else 3513 attIndex++; 3514 } 3515 3516 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ 3517 parser->m_nSpecifiedAtts = attIndex; 3518 if (elementType->idAtt && (elementType->idAtt->name)[-1]) { 3519 for (i = 0; i < attIndex; i += 2) 3520 if (appAtts[i] == elementType->idAtt->name) { 3521 parser->m_idAttIndex = i; 3522 break; 3523 } 3524 } else 3525 parser->m_idAttIndex = -1; 3526 3527 /* do attribute defaulting */ 3528 for (i = 0; i < nDefaultAtts; i++) { 3529 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; 3530 if (! (da->id->name)[-1] && da->value) { 3531 if (da->id->prefix) { 3532 if (da->id->xmlns) { 3533 enum XML_Error result = addBinding(parser, da->id->prefix, da->id, 3534 da->value, bindingsPtr); 3535 if (result) 3536 return result; 3537 } else { 3538 (da->id->name)[-1] = 2; 3539 nPrefixes++; 3540 appAtts[attIndex++] = da->id->name; 3541 appAtts[attIndex++] = da->value; 3542 } 3543 } else { 3544 (da->id->name)[-1] = 1; 3545 appAtts[attIndex++] = da->id->name; 3546 appAtts[attIndex++] = da->value; 3547 } 3548 } 3549 } 3550 appAtts[attIndex] = 0; 3551 3552 /* expand prefixed attribute names, check for duplicates, 3553 and clear flags that say whether attributes were specified */ 3554 i = 0; 3555 if (nPrefixes) { 3556 int j; /* hash table index */ 3557 unsigned long version = parser->m_nsAttsVersion; 3558 3559 /* Detect and prevent invalid shift */ 3560 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { 3561 return XML_ERROR_NO_MEMORY; 3562 } 3563 3564 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; 3565 unsigned char oldNsAttsPower = parser->m_nsAttsPower; 3566 /* size of hash table must be at least 2 * (# of prefixed attributes) */ 3567 if ((nPrefixes << 1) 3568 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ 3569 NS_ATT *temp; 3570 /* hash table size must also be a power of 2 and >= 8 */ 3571 while (nPrefixes >> parser->m_nsAttsPower++) 3572 ; 3573 if (parser->m_nsAttsPower < 3) 3574 parser->m_nsAttsPower = 3; 3575 3576 /* Detect and prevent invalid shift */ 3577 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { 3578 /* Restore actual size of memory in m_nsAtts */ 3579 parser->m_nsAttsPower = oldNsAttsPower; 3580 return XML_ERROR_NO_MEMORY; 3581 } 3582 3583 nsAttsSize = 1u << parser->m_nsAttsPower; 3584 3585 /* Detect and prevent integer overflow. 3586 * The preprocessor guard addresses the "always false" warning 3587 * from -Wtype-limits on platforms where 3588 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3589 #if UINT_MAX >= SIZE_MAX 3590 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { 3591 /* Restore actual size of memory in m_nsAtts */ 3592 parser->m_nsAttsPower = oldNsAttsPower; 3593 return XML_ERROR_NO_MEMORY; 3594 } 3595 #endif 3596 3597 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, 3598 nsAttsSize * sizeof(NS_ATT)); 3599 if (! temp) { 3600 /* Restore actual size of memory in m_nsAtts */ 3601 parser->m_nsAttsPower = oldNsAttsPower; 3602 return XML_ERROR_NO_MEMORY; 3603 } 3604 parser->m_nsAtts = temp; 3605 version = 0; /* force re-initialization of m_nsAtts hash table */ 3606 } 3607 /* using a version flag saves us from initializing m_nsAtts every time */ 3608 if (! version) { /* initialize version flags when version wraps around */ 3609 version = INIT_ATTS_VERSION; 3610 for (j = nsAttsSize; j != 0;) 3611 parser->m_nsAtts[--j].version = version; 3612 } 3613 parser->m_nsAttsVersion = --version; 3614 3615 /* expand prefixed names and check for duplicates */ 3616 for (; i < attIndex; i += 2) { 3617 const XML_Char *s = appAtts[i]; 3618 if (s[-1] == 2) { /* prefixed */ 3619 ATTRIBUTE_ID *id; 3620 const BINDING *b; 3621 unsigned long uriHash; 3622 struct siphash sip_state; 3623 struct sipkey sip_key; 3624 3625 copy_salt_to_sipkey(parser, &sip_key); 3626 sip24_init(&sip_state, &sip_key); 3627 3628 ((XML_Char *)s)[-1] = 0; /* clear flag */ 3629 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); 3630 if (! id || ! id->prefix) { 3631 /* This code is walking through the appAtts array, dealing 3632 * with (in this case) a prefixed attribute name. To be in 3633 * the array, the attribute must have already been bound, so 3634 * has to have passed through the hash table lookup once 3635 * already. That implies that an entry for it already 3636 * exists, so the lookup above will return a pointer to 3637 * already allocated memory. There is no opportunaity for 3638 * the allocator to fail, so the condition above cannot be 3639 * fulfilled. 3640 * 3641 * Since it is difficult to be certain that the above 3642 * analysis is complete, we retain the test and merely 3643 * remove the code from coverage tests. 3644 */ 3645 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 3646 } 3647 b = id->prefix->binding; 3648 if (! b) 3649 return XML_ERROR_UNBOUND_PREFIX; 3650 3651 for (j = 0; j < b->uriLen; j++) { 3652 const XML_Char c = b->uri[j]; 3653 if (! poolAppendChar(&parser->m_tempPool, c)) 3654 return XML_ERROR_NO_MEMORY; 3655 } 3656 3657 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); 3658 3659 while (*s++ != XML_T(ASCII_COLON)) 3660 ; 3661 3662 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); 3663 3664 do { /* copies null terminator */ 3665 if (! poolAppendChar(&parser->m_tempPool, *s)) 3666 return XML_ERROR_NO_MEMORY; 3667 } while (*s++); 3668 3669 uriHash = (unsigned long)sip24_final(&sip_state); 3670 3671 { /* Check hash table for duplicate of expanded name (uriName). 3672 Derived from code in lookup(parser, HASH_TABLE *table, ...). 3673 */ 3674 unsigned char step = 0; 3675 unsigned long mask = nsAttsSize - 1; 3676 j = uriHash & mask; /* index into hash table */ 3677 while (parser->m_nsAtts[j].version == version) { 3678 /* for speed we compare stored hash values first */ 3679 if (uriHash == parser->m_nsAtts[j].hash) { 3680 const XML_Char *s1 = poolStart(&parser->m_tempPool); 3681 const XML_Char *s2 = parser->m_nsAtts[j].uriName; 3682 /* s1 is null terminated, but not s2 */ 3683 for (; *s1 == *s2 && *s1 != 0; s1++, s2++) 3684 ; 3685 if (*s1 == 0) 3686 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3687 } 3688 if (! step) 3689 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); 3690 j < step ? (j += nsAttsSize - step) : (j -= step); 3691 } 3692 } 3693 3694 if (parser->m_ns_triplets) { /* append namespace separator and prefix */ 3695 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; 3696 s = b->prefix->name; 3697 do { 3698 if (! poolAppendChar(&parser->m_tempPool, *s)) 3699 return XML_ERROR_NO_MEMORY; 3700 } while (*s++); 3701 } 3702 3703 /* store expanded name in attribute list */ 3704 s = poolStart(&parser->m_tempPool); 3705 poolFinish(&parser->m_tempPool); 3706 appAtts[i] = s; 3707 3708 /* fill empty slot with new version, uriName and hash value */ 3709 parser->m_nsAtts[j].version = version; 3710 parser->m_nsAtts[j].hash = uriHash; 3711 parser->m_nsAtts[j].uriName = s; 3712 3713 if (! --nPrefixes) { 3714 i += 2; 3715 break; 3716 } 3717 } else /* not prefixed */ 3718 ((XML_Char *)s)[-1] = 0; /* clear flag */ 3719 } 3720 } 3721 /* clear flags for the remaining attributes */ 3722 for (; i < attIndex; i += 2) 3723 ((XML_Char *)(appAtts[i]))[-1] = 0; 3724 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) 3725 binding->attId->name[-1] = 0; 3726 3727 if (! parser->m_ns) 3728 return XML_ERROR_NONE; 3729 3730 /* expand the element type name */ 3731 if (elementType->prefix) { 3732 binding = elementType->prefix->binding; 3733 if (! binding) 3734 return XML_ERROR_UNBOUND_PREFIX; 3735 localPart = tagNamePtr->str; 3736 while (*localPart++ != XML_T(ASCII_COLON)) 3737 ; 3738 } else if (dtd->defaultPrefix.binding) { 3739 binding = dtd->defaultPrefix.binding; 3740 localPart = tagNamePtr->str; 3741 } else 3742 return XML_ERROR_NONE; 3743 prefixLen = 0; 3744 if (parser->m_ns_triplets && binding->prefix->name) { 3745 for (; binding->prefix->name[prefixLen++];) 3746 ; /* prefixLen includes null terminator */ 3747 } 3748 tagNamePtr->localPart = localPart; 3749 tagNamePtr->uriLen = binding->uriLen; 3750 tagNamePtr->prefix = binding->prefix->name; 3751 tagNamePtr->prefixLen = prefixLen; 3752 for (i = 0; localPart[i++];) 3753 ; /* i includes null terminator */ 3754 3755 /* Detect and prevent integer overflow */ 3756 if (binding->uriLen > INT_MAX - prefixLen 3757 || i > INT_MAX - (binding->uriLen + prefixLen)) { 3758 return XML_ERROR_NO_MEMORY; 3759 } 3760 3761 n = i + binding->uriLen + prefixLen; 3762 if (n > binding->uriAlloc) { 3763 TAG *p; 3764 3765 /* Detect and prevent integer overflow */ 3766 if (n > INT_MAX - EXPAND_SPARE) { 3767 return XML_ERROR_NO_MEMORY; 3768 } 3769 /* Detect and prevent integer overflow. 3770 * The preprocessor guard addresses the "always false" warning 3771 * from -Wtype-limits on platforms where 3772 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3773 #if UINT_MAX >= SIZE_MAX 3774 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 3775 return XML_ERROR_NO_MEMORY; 3776 } 3777 #endif 3778 3779 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); 3780 if (! uri) 3781 return XML_ERROR_NO_MEMORY; 3782 binding->uriAlloc = n + EXPAND_SPARE; 3783 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); 3784 for (p = parser->m_tagStack; p; p = p->parent) 3785 if (p->name.str == binding->uri) 3786 p->name.str = uri; 3787 FREE(parser, binding->uri); 3788 binding->uri = uri; 3789 } 3790 /* if m_namespaceSeparator != '\0' then uri includes it already */ 3791 uri = binding->uri + binding->uriLen; 3792 memcpy(uri, localPart, i * sizeof(XML_Char)); 3793 /* we always have a namespace separator between localPart and prefix */ 3794 if (prefixLen) { 3795 uri += i - 1; 3796 *uri = parser->m_namespaceSeparator; /* replace null terminator */ 3797 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); 3798 } 3799 tagNamePtr->str = binding->uri; 3800 return XML_ERROR_NONE; 3801 } 3802 3803 static XML_Bool 3804 is_rfc3986_uri_char(XML_Char candidate) { 3805 // For the RFC 3986 ANBF grammar see 3806 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A 3807 3808 switch (candidate) { 3809 // From rule "ALPHA" (uppercase half) 3810 case 'A': 3811 case 'B': 3812 case 'C': 3813 case 'D': 3814 case 'E': 3815 case 'F': 3816 case 'G': 3817 case 'H': 3818 case 'I': 3819 case 'J': 3820 case 'K': 3821 case 'L': 3822 case 'M': 3823 case 'N': 3824 case 'O': 3825 case 'P': 3826 case 'Q': 3827 case 'R': 3828 case 'S': 3829 case 'T': 3830 case 'U': 3831 case 'V': 3832 case 'W': 3833 case 'X': 3834 case 'Y': 3835 case 'Z': 3836 3837 // From rule "ALPHA" (lowercase half) 3838 case 'a': 3839 case 'b': 3840 case 'c': 3841 case 'd': 3842 case 'e': 3843 case 'f': 3844 case 'g': 3845 case 'h': 3846 case 'i': 3847 case 'j': 3848 case 'k': 3849 case 'l': 3850 case 'm': 3851 case 'n': 3852 case 'o': 3853 case 'p': 3854 case 'q': 3855 case 'r': 3856 case 's': 3857 case 't': 3858 case 'u': 3859 case 'v': 3860 case 'w': 3861 case 'x': 3862 case 'y': 3863 case 'z': 3864 3865 // From rule "DIGIT" 3866 case '0': 3867 case '1': 3868 case '2': 3869 case '3': 3870 case '4': 3871 case '5': 3872 case '6': 3873 case '7': 3874 case '8': 3875 case '9': 3876 3877 // From rule "pct-encoded" 3878 case '%': 3879 3880 // From rule "unreserved" 3881 case '-': 3882 case '.': 3883 case '_': 3884 case '~': 3885 3886 // From rule "gen-delims" 3887 case ':': 3888 case '/': 3889 case '?': 3890 case '#': 3891 case '[': 3892 case ']': 3893 case '@': 3894 3895 // From rule "sub-delims" 3896 case '!': 3897 case '$': 3898 case '&': 3899 case '\'': 3900 case '(': 3901 case ')': 3902 case '*': 3903 case '+': 3904 case ',': 3905 case ';': 3906 case '=': 3907 return XML_TRUE; 3908 3909 default: 3910 return XML_FALSE; 3911 } 3912 } 3913 3914 /* addBinding() overwrites the value of prefix->binding without checking. 3915 Therefore one must keep track of the old value outside of addBinding(). 3916 */ 3917 static enum XML_Error 3918 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, 3919 const XML_Char *uri, BINDING **bindingsPtr) { 3920 // "http://www.w3.org/XML/1998/namespace" 3921 static const XML_Char xmlNamespace[] 3922 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, 3923 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, 3924 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, 3925 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, 3926 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, 3927 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, 3928 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, 3929 ASCII_e, '\0'}; 3930 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; 3931 // "http://www.w3.org/2000/xmlns/" 3932 static const XML_Char xmlnsNamespace[] 3933 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 3934 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, 3935 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, 3936 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, 3937 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; 3938 static const int xmlnsLen 3939 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; 3940 3941 XML_Bool mustBeXML = XML_FALSE; 3942 XML_Bool isXML = XML_TRUE; 3943 XML_Bool isXMLNS = XML_TRUE; 3944 3945 BINDING *b; 3946 int len; 3947 3948 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ 3949 if (*uri == XML_T('\0') && prefix->name) 3950 return XML_ERROR_UNDECLARING_PREFIX; 3951 3952 if (prefix->name && prefix->name[0] == XML_T(ASCII_x) 3953 && prefix->name[1] == XML_T(ASCII_m) 3954 && prefix->name[2] == XML_T(ASCII_l)) { 3955 /* Not allowed to bind xmlns */ 3956 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) 3957 && prefix->name[5] == XML_T('\0')) 3958 return XML_ERROR_RESERVED_PREFIX_XMLNS; 3959 3960 if (prefix->name[3] == XML_T('\0')) 3961 mustBeXML = XML_TRUE; 3962 } 3963 3964 for (len = 0; uri[len]; len++) { 3965 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) 3966 isXML = XML_FALSE; 3967 3968 if (! mustBeXML && isXMLNS 3969 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) 3970 isXMLNS = XML_FALSE; 3971 3972 // NOTE: While Expat does not validate namespace URIs against RFC 3986 3973 // today (and is not REQUIRED to do so with regard to the XML 1.0 3974 // namespaces specification) we have to at least make sure, that 3975 // the application on top of Expat (that is likely splitting expanded 3976 // element names ("qualified names") of form 3977 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces 3978 // in its element handler code) cannot be confused by an attacker 3979 // putting additional namespace separator characters into namespace 3980 // declarations. That would be ambiguous and not to be expected. 3981 // 3982 // While the HTML API docs of function XML_ParserCreateNS have been 3983 // advising against use of a namespace separator character that can 3984 // appear in a URI for >20 years now, some widespread applications 3985 // are using URI characters (':' (colon) in particular) for a 3986 // namespace separator, in practice. To keep these applications 3987 // functional, we only reject namespaces URIs containing the 3988 // application-chosen namespace separator if the chosen separator 3989 // is a non-URI character with regard to RFC 3986. 3990 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) 3991 && ! is_rfc3986_uri_char(uri[len])) { 3992 return XML_ERROR_SYNTAX; 3993 } 3994 } 3995 isXML = isXML && len == xmlLen; 3996 isXMLNS = isXMLNS && len == xmlnsLen; 3997 3998 if (mustBeXML != isXML) 3999 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML 4000 : XML_ERROR_RESERVED_NAMESPACE_URI; 4001 4002 if (isXMLNS) 4003 return XML_ERROR_RESERVED_NAMESPACE_URI; 4004 4005 if (parser->m_namespaceSeparator) 4006 len++; 4007 if (parser->m_freeBindingList) { 4008 b = parser->m_freeBindingList; 4009 if (len > b->uriAlloc) { 4010 /* Detect and prevent integer overflow */ 4011 if (len > INT_MAX - EXPAND_SPARE) { 4012 return XML_ERROR_NO_MEMORY; 4013 } 4014 4015 /* Detect and prevent integer overflow. 4016 * The preprocessor guard addresses the "always false" warning 4017 * from -Wtype-limits on platforms where 4018 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4019 #if UINT_MAX >= SIZE_MAX 4020 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4021 return XML_ERROR_NO_MEMORY; 4022 } 4023 #endif 4024 4025 XML_Char *temp = (XML_Char *)REALLOC( 4026 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4027 if (temp == NULL) 4028 return XML_ERROR_NO_MEMORY; 4029 b->uri = temp; 4030 b->uriAlloc = len + EXPAND_SPARE; 4031 } 4032 parser->m_freeBindingList = b->nextTagBinding; 4033 } else { 4034 b = (BINDING *)MALLOC(parser, sizeof(BINDING)); 4035 if (! b) 4036 return XML_ERROR_NO_MEMORY; 4037 4038 /* Detect and prevent integer overflow */ 4039 if (len > INT_MAX - EXPAND_SPARE) { 4040 return XML_ERROR_NO_MEMORY; 4041 } 4042 /* Detect and prevent integer overflow. 4043 * The preprocessor guard addresses the "always false" warning 4044 * from -Wtype-limits on platforms where 4045 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4046 #if UINT_MAX >= SIZE_MAX 4047 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4048 return XML_ERROR_NO_MEMORY; 4049 } 4050 #endif 4051 4052 b->uri 4053 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4054 if (! b->uri) { 4055 FREE(parser, b); 4056 return XML_ERROR_NO_MEMORY; 4057 } 4058 b->uriAlloc = len + EXPAND_SPARE; 4059 } 4060 b->uriLen = len; 4061 memcpy(b->uri, uri, len * sizeof(XML_Char)); 4062 if (parser->m_namespaceSeparator) 4063 b->uri[len - 1] = parser->m_namespaceSeparator; 4064 b->prefix = prefix; 4065 b->attId = attId; 4066 b->prevPrefixBinding = prefix->binding; 4067 /* NULL binding when default namespace undeclared */ 4068 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) 4069 prefix->binding = NULL; 4070 else 4071 prefix->binding = b; 4072 b->nextTagBinding = *bindingsPtr; 4073 *bindingsPtr = b; 4074 /* if attId == NULL then we are not starting a namespace scope */ 4075 if (attId && parser->m_startNamespaceDeclHandler) 4076 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, 4077 prefix->binding ? uri : 0); 4078 return XML_ERROR_NONE; 4079 } 4080 4081 /* The idea here is to avoid using stack for each CDATA section when 4082 the whole file is parsed with one call. 4083 */ 4084 static enum XML_Error PTRCALL 4085 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, 4086 const char **endPtr) { 4087 enum XML_Error result = doCdataSection( 4088 parser, parser->m_encoding, &start, end, endPtr, 4089 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 4090 if (result != XML_ERROR_NONE) 4091 return result; 4092 if (start) { 4093 if (parser->m_parentParser) { /* we are parsing an external entity */ 4094 parser->m_processor = externalEntityContentProcessor; 4095 return externalEntityContentProcessor(parser, start, end, endPtr); 4096 } else { 4097 parser->m_processor = contentProcessor; 4098 return contentProcessor(parser, start, end, endPtr); 4099 } 4100 } 4101 return result; 4102 } 4103 4104 /* startPtr gets set to non-null if the section is closed, and to null if 4105 the section is not yet closed. 4106 */ 4107 static enum XML_Error 4108 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4109 const char *end, const char **nextPtr, XML_Bool haveMore, 4110 enum XML_Account account) { 4111 const char *s = *startPtr; 4112 const char **eventPP; 4113 const char **eventEndPP; 4114 if (enc == parser->m_encoding) { 4115 eventPP = &parser->m_eventPtr; 4116 *eventPP = s; 4117 eventEndPP = &parser->m_eventEndPtr; 4118 } else { 4119 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4120 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4121 } 4122 *eventPP = s; 4123 *startPtr = NULL; 4124 4125 for (;;) { 4126 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4127 int tok = XmlCdataSectionTok(enc, s, end, &next); 4128 #if XML_GE == 1 4129 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4130 accountingOnAbort(parser); 4131 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4132 } 4133 #else 4134 UNUSED_P(account); 4135 #endif 4136 *eventEndPP = next; 4137 switch (tok) { 4138 case XML_TOK_CDATA_SECT_CLOSE: 4139 if (parser->m_endCdataSectionHandler) 4140 parser->m_endCdataSectionHandler(parser->m_handlerArg); 4141 /* BEGIN disabled code */ 4142 /* see comment under XML_TOK_CDATA_SECT_OPEN */ 4143 else if ((0) && parser->m_characterDataHandler) 4144 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4145 0); 4146 /* END disabled code */ 4147 else if (parser->m_defaultHandler) 4148 reportDefault(parser, enc, s, next); 4149 *startPtr = next; 4150 *nextPtr = next; 4151 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4152 return XML_ERROR_ABORTED; 4153 else 4154 return XML_ERROR_NONE; 4155 case XML_TOK_DATA_NEWLINE: 4156 if (parser->m_characterDataHandler) { 4157 XML_Char c = 0xA; 4158 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 4159 } else if (parser->m_defaultHandler) 4160 reportDefault(parser, enc, s, next); 4161 break; 4162 case XML_TOK_DATA_CHARS: { 4163 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 4164 if (charDataHandler) { 4165 if (MUST_CONVERT(enc, s)) { 4166 for (;;) { 4167 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 4168 const enum XML_Convert_Result convert_res = XmlConvert( 4169 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 4170 *eventEndPP = next; 4171 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4172 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 4173 if ((convert_res == XML_CONVERT_COMPLETED) 4174 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 4175 break; 4176 *eventPP = s; 4177 } 4178 } else 4179 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 4180 (int)((const XML_Char *)next - (const XML_Char *)s)); 4181 } else if (parser->m_defaultHandler) 4182 reportDefault(parser, enc, s, next); 4183 } break; 4184 case XML_TOK_INVALID: 4185 *eventPP = next; 4186 return XML_ERROR_INVALID_TOKEN; 4187 case XML_TOK_PARTIAL_CHAR: 4188 if (haveMore) { 4189 *nextPtr = s; 4190 return XML_ERROR_NONE; 4191 } 4192 return XML_ERROR_PARTIAL_CHAR; 4193 case XML_TOK_PARTIAL: 4194 case XML_TOK_NONE: 4195 if (haveMore) { 4196 *nextPtr = s; 4197 return XML_ERROR_NONE; 4198 } 4199 return XML_ERROR_UNCLOSED_CDATA_SECTION; 4200 default: 4201 /* Every token returned by XmlCdataSectionTok() has its own 4202 * explicit case, so this default case will never be executed. 4203 * We retain it as a safety net and exclude it from the coverage 4204 * statistics. 4205 * 4206 * LCOV_EXCL_START 4207 */ 4208 *eventPP = next; 4209 return XML_ERROR_UNEXPECTED_STATE; 4210 /* LCOV_EXCL_STOP */ 4211 } 4212 4213 *eventPP = s = next; 4214 switch (parser->m_parsingStatus.parsing) { 4215 case XML_SUSPENDED: 4216 *nextPtr = next; 4217 return XML_ERROR_NONE; 4218 case XML_FINISHED: 4219 return XML_ERROR_ABORTED; 4220 default:; 4221 } 4222 } 4223 /* not reached */ 4224 } 4225 4226 #ifdef XML_DTD 4227 4228 /* The idea here is to avoid using stack for each IGNORE section when 4229 the whole file is parsed with one call. 4230 */ 4231 static enum XML_Error PTRCALL 4232 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, 4233 const char **endPtr) { 4234 enum XML_Error result 4235 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, 4236 (XML_Bool)! parser->m_parsingStatus.finalBuffer); 4237 if (result != XML_ERROR_NONE) 4238 return result; 4239 if (start) { 4240 parser->m_processor = prologProcessor; 4241 return prologProcessor(parser, start, end, endPtr); 4242 } 4243 return result; 4244 } 4245 4246 /* startPtr gets set to non-null is the section is closed, and to null 4247 if the section is not yet closed. 4248 */ 4249 static enum XML_Error 4250 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4251 const char *end, const char **nextPtr, XML_Bool haveMore) { 4252 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4253 int tok; 4254 const char *s = *startPtr; 4255 const char **eventPP; 4256 const char **eventEndPP; 4257 if (enc == parser->m_encoding) { 4258 eventPP = &parser->m_eventPtr; 4259 *eventPP = s; 4260 eventEndPP = &parser->m_eventEndPtr; 4261 } else { 4262 /* It's not entirely clear, but it seems the following two lines 4263 * of code cannot be executed. The only occasions on which 'enc' 4264 * is not 'encoding' are when this function is called 4265 * from the internal entity processing, and IGNORE sections are an 4266 * error in internal entities. 4267 * 4268 * Since it really isn't clear that this is true, we keep the code 4269 * and just remove it from our coverage tests. 4270 * 4271 * LCOV_EXCL_START 4272 */ 4273 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4274 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4275 /* LCOV_EXCL_STOP */ 4276 } 4277 *eventPP = s; 4278 *startPtr = NULL; 4279 tok = XmlIgnoreSectionTok(enc, s, end, &next); 4280 # if XML_GE == 1 4281 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4282 XML_ACCOUNT_DIRECT)) { 4283 accountingOnAbort(parser); 4284 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4285 } 4286 # endif 4287 *eventEndPP = next; 4288 switch (tok) { 4289 case XML_TOK_IGNORE_SECT: 4290 if (parser->m_defaultHandler) 4291 reportDefault(parser, enc, s, next); 4292 *startPtr = next; 4293 *nextPtr = next; 4294 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4295 return XML_ERROR_ABORTED; 4296 else 4297 return XML_ERROR_NONE; 4298 case XML_TOK_INVALID: 4299 *eventPP = next; 4300 return XML_ERROR_INVALID_TOKEN; 4301 case XML_TOK_PARTIAL_CHAR: 4302 if (haveMore) { 4303 *nextPtr = s; 4304 return XML_ERROR_NONE; 4305 } 4306 return XML_ERROR_PARTIAL_CHAR; 4307 case XML_TOK_PARTIAL: 4308 case XML_TOK_NONE: 4309 if (haveMore) { 4310 *nextPtr = s; 4311 return XML_ERROR_NONE; 4312 } 4313 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ 4314 default: 4315 /* All of the tokens that XmlIgnoreSectionTok() returns have 4316 * explicit cases to handle them, so this default case is never 4317 * executed. We keep it as a safety net anyway, and remove it 4318 * from our test coverage statistics. 4319 * 4320 * LCOV_EXCL_START 4321 */ 4322 *eventPP = next; 4323 return XML_ERROR_UNEXPECTED_STATE; 4324 /* LCOV_EXCL_STOP */ 4325 } 4326 /* not reached */ 4327 } 4328 4329 #endif /* XML_DTD */ 4330 4331 static enum XML_Error 4332 initializeEncoding(XML_Parser parser) { 4333 const char *s; 4334 #ifdef XML_UNICODE 4335 char encodingBuf[128]; 4336 /* See comments about `protocolEncodingName` in parserInit() */ 4337 if (! parser->m_protocolEncodingName) 4338 s = NULL; 4339 else { 4340 int i; 4341 for (i = 0; parser->m_protocolEncodingName[i]; i++) { 4342 if (i == sizeof(encodingBuf) - 1 4343 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { 4344 encodingBuf[0] = '\0'; 4345 break; 4346 } 4347 encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; 4348 } 4349 encodingBuf[i] = '\0'; 4350 s = encodingBuf; 4351 } 4352 #else 4353 s = parser->m_protocolEncodingName; 4354 #endif 4355 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( 4356 &parser->m_initEncoding, &parser->m_encoding, s)) 4357 return XML_ERROR_NONE; 4358 return handleUnknownEncoding(parser, parser->m_protocolEncodingName); 4359 } 4360 4361 static enum XML_Error 4362 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, 4363 const char *next) { 4364 const char *encodingName = NULL; 4365 const XML_Char *storedEncName = NULL; 4366 const ENCODING *newEncoding = NULL; 4367 const char *version = NULL; 4368 const char *versionend = NULL; 4369 const XML_Char *storedversion = NULL; 4370 int standalone = -1; 4371 4372 #if XML_GE == 1 4373 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, 4374 XML_ACCOUNT_DIRECT)) { 4375 accountingOnAbort(parser); 4376 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4377 } 4378 #endif 4379 4380 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( 4381 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, 4382 &version, &versionend, &encodingName, &newEncoding, &standalone)) { 4383 if (isGeneralTextEntity) 4384 return XML_ERROR_TEXT_DECL; 4385 else 4386 return XML_ERROR_XML_DECL; 4387 } 4388 if (! isGeneralTextEntity && standalone == 1) { 4389 parser->m_dtd->standalone = XML_TRUE; 4390 #ifdef XML_DTD 4391 if (parser->m_paramEntityParsing 4392 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 4393 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 4394 #endif /* XML_DTD */ 4395 } 4396 if (parser->m_xmlDeclHandler) { 4397 if (encodingName != NULL) { 4398 storedEncName = poolStoreString( 4399 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4400 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4401 if (! storedEncName) 4402 return XML_ERROR_NO_MEMORY; 4403 poolFinish(&parser->m_temp2Pool); 4404 } 4405 if (version) { 4406 storedversion 4407 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, 4408 versionend - parser->m_encoding->minBytesPerChar); 4409 if (! storedversion) 4410 return XML_ERROR_NO_MEMORY; 4411 } 4412 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, 4413 standalone); 4414 } else if (parser->m_defaultHandler) 4415 reportDefault(parser, parser->m_encoding, s, next); 4416 if (parser->m_protocolEncodingName == NULL) { 4417 if (newEncoding) { 4418 /* Check that the specified encoding does not conflict with what 4419 * the parser has already deduced. Do we have the same number 4420 * of bytes in the smallest representation of a character? If 4421 * this is UTF-16, is it the same endianness? 4422 */ 4423 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar 4424 || (newEncoding->minBytesPerChar == 2 4425 && newEncoding != parser->m_encoding)) { 4426 parser->m_eventPtr = encodingName; 4427 return XML_ERROR_INCORRECT_ENCODING; 4428 } 4429 parser->m_encoding = newEncoding; 4430 } else if (encodingName) { 4431 enum XML_Error result; 4432 if (! storedEncName) { 4433 storedEncName = poolStoreString( 4434 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4435 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4436 if (! storedEncName) 4437 return XML_ERROR_NO_MEMORY; 4438 } 4439 result = handleUnknownEncoding(parser, storedEncName); 4440 poolClear(&parser->m_temp2Pool); 4441 if (result == XML_ERROR_UNKNOWN_ENCODING) 4442 parser->m_eventPtr = encodingName; 4443 return result; 4444 } 4445 } 4446 4447 if (storedEncName || storedversion) 4448 poolClear(&parser->m_temp2Pool); 4449 4450 return XML_ERROR_NONE; 4451 } 4452 4453 static enum XML_Error 4454 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { 4455 if (parser->m_unknownEncodingHandler) { 4456 XML_Encoding info; 4457 int i; 4458 for (i = 0; i < 256; i++) 4459 info.map[i] = -1; 4460 info.convert = NULL; 4461 info.data = NULL; 4462 info.release = NULL; 4463 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, 4464 encodingName, &info)) { 4465 ENCODING *enc; 4466 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); 4467 if (! parser->m_unknownEncodingMem) { 4468 if (info.release) 4469 info.release(info.data); 4470 return XML_ERROR_NO_MEMORY; 4471 } 4472 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( 4473 parser->m_unknownEncodingMem, info.map, info.convert, info.data); 4474 if (enc) { 4475 parser->m_unknownEncodingData = info.data; 4476 parser->m_unknownEncodingRelease = info.release; 4477 parser->m_encoding = enc; 4478 return XML_ERROR_NONE; 4479 } 4480 } 4481 if (info.release != NULL) 4482 info.release(info.data); 4483 } 4484 return XML_ERROR_UNKNOWN_ENCODING; 4485 } 4486 4487 static enum XML_Error PTRCALL 4488 prologInitProcessor(XML_Parser parser, const char *s, const char *end, 4489 const char **nextPtr) { 4490 enum XML_Error result = initializeEncoding(parser); 4491 if (result != XML_ERROR_NONE) 4492 return result; 4493 parser->m_processor = prologProcessor; 4494 return prologProcessor(parser, s, end, nextPtr); 4495 } 4496 4497 #ifdef XML_DTD 4498 4499 static enum XML_Error PTRCALL 4500 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, 4501 const char **nextPtr) { 4502 enum XML_Error result = initializeEncoding(parser); 4503 if (result != XML_ERROR_NONE) 4504 return result; 4505 4506 /* we know now that XML_Parse(Buffer) has been called, 4507 so we consider the external parameter entity read */ 4508 parser->m_dtd->paramEntityRead = XML_TRUE; 4509 4510 if (parser->m_prologState.inEntityValue) { 4511 parser->m_processor = entityValueInitProcessor; 4512 return entityValueInitProcessor(parser, s, end, nextPtr); 4513 } else { 4514 parser->m_processor = externalParEntProcessor; 4515 return externalParEntProcessor(parser, s, end, nextPtr); 4516 } 4517 } 4518 4519 static enum XML_Error PTRCALL 4520 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, 4521 const char **nextPtr) { 4522 int tok; 4523 const char *start = s; 4524 const char *next = start; 4525 parser->m_eventPtr = start; 4526 4527 for (;;) { 4528 tok = XmlPrologTok(parser->m_encoding, start, end, &next); 4529 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: 4530 - storeEntityValue 4531 - processXmlDecl 4532 */ 4533 parser->m_eventEndPtr = next; 4534 if (tok <= 0) { 4535 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4536 *nextPtr = s; 4537 return XML_ERROR_NONE; 4538 } 4539 switch (tok) { 4540 case XML_TOK_INVALID: 4541 return XML_ERROR_INVALID_TOKEN; 4542 case XML_TOK_PARTIAL: 4543 return XML_ERROR_UNCLOSED_TOKEN; 4544 case XML_TOK_PARTIAL_CHAR: 4545 return XML_ERROR_PARTIAL_CHAR; 4546 case XML_TOK_NONE: /* start == end */ 4547 default: 4548 break; 4549 } 4550 /* found end of entity value - can store it now */ 4551 return storeEntityValue(parser, parser->m_encoding, s, end, 4552 XML_ACCOUNT_DIRECT); 4553 } else if (tok == XML_TOK_XML_DECL) { 4554 enum XML_Error result; 4555 result = processXmlDecl(parser, 0, start, next); 4556 if (result != XML_ERROR_NONE) 4557 return result; 4558 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For 4559 * that to happen, a parameter entity parsing handler must have attempted 4560 * to suspend the parser, which fails and raises an error. The parser can 4561 * be aborted, but can't be suspended. 4562 */ 4563 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4564 return XML_ERROR_ABORTED; 4565 *nextPtr = next; 4566 /* stop scanning for text declaration - we found one */ 4567 parser->m_processor = entityValueProcessor; 4568 return entityValueProcessor(parser, next, end, nextPtr); 4569 } 4570 /* XmlPrologTok has now set the encoding based on the BOM it found, and we 4571 must move s and nextPtr forward to consume the BOM. 4572 4573 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we 4574 would leave the BOM in the buffer and return. On the next call to this 4575 function, our XmlPrologTok call would return XML_TOK_INVALID, since it 4576 is not valid to have multiple BOMs. 4577 */ 4578 else if (tok == XML_TOK_BOM) { 4579 # if XML_GE == 1 4580 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4581 XML_ACCOUNT_DIRECT)) { 4582 accountingOnAbort(parser); 4583 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4584 } 4585 # endif 4586 4587 *nextPtr = next; 4588 s = next; 4589 } 4590 /* If we get this token, we have the start of what might be a 4591 normal tag, but not a declaration (i.e. it doesn't begin with 4592 "<!"). In a DTD context, that isn't legal. 4593 */ 4594 else if (tok == XML_TOK_INSTANCE_START) { 4595 *nextPtr = next; 4596 return XML_ERROR_SYNTAX; 4597 } 4598 start = next; 4599 parser->m_eventPtr = start; 4600 } 4601 } 4602 4603 static enum XML_Error PTRCALL 4604 externalParEntProcessor(XML_Parser parser, const char *s, const char *end, 4605 const char **nextPtr) { 4606 const char *next = s; 4607 int tok; 4608 4609 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4610 if (tok <= 0) { 4611 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4612 *nextPtr = s; 4613 return XML_ERROR_NONE; 4614 } 4615 switch (tok) { 4616 case XML_TOK_INVALID: 4617 return XML_ERROR_INVALID_TOKEN; 4618 case XML_TOK_PARTIAL: 4619 return XML_ERROR_UNCLOSED_TOKEN; 4620 case XML_TOK_PARTIAL_CHAR: 4621 return XML_ERROR_PARTIAL_CHAR; 4622 case XML_TOK_NONE: /* start == end */ 4623 default: 4624 break; 4625 } 4626 } 4627 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. 4628 However, when parsing an external subset, doProlog will not accept a BOM 4629 as valid, and report a syntax error, so we have to skip the BOM, and 4630 account for the BOM bytes. 4631 */ 4632 else if (tok == XML_TOK_BOM) { 4633 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4634 XML_ACCOUNT_DIRECT)) { 4635 accountingOnAbort(parser); 4636 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4637 } 4638 4639 s = next; 4640 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4641 } 4642 4643 parser->m_processor = prologProcessor; 4644 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4645 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 4646 XML_ACCOUNT_DIRECT); 4647 } 4648 4649 static enum XML_Error PTRCALL 4650 entityValueProcessor(XML_Parser parser, const char *s, const char *end, 4651 const char **nextPtr) { 4652 const char *start = s; 4653 const char *next = s; 4654 const ENCODING *enc = parser->m_encoding; 4655 int tok; 4656 4657 for (;;) { 4658 tok = XmlPrologTok(enc, start, end, &next); 4659 /* Note: These bytes are accounted later in: 4660 - storeEntityValue 4661 */ 4662 if (tok <= 0) { 4663 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4664 *nextPtr = s; 4665 return XML_ERROR_NONE; 4666 } 4667 switch (tok) { 4668 case XML_TOK_INVALID: 4669 return XML_ERROR_INVALID_TOKEN; 4670 case XML_TOK_PARTIAL: 4671 return XML_ERROR_UNCLOSED_TOKEN; 4672 case XML_TOK_PARTIAL_CHAR: 4673 return XML_ERROR_PARTIAL_CHAR; 4674 case XML_TOK_NONE: /* start == end */ 4675 default: 4676 break; 4677 } 4678 /* found end of entity value - can store it now */ 4679 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); 4680 } 4681 start = next; 4682 } 4683 } 4684 4685 #endif /* XML_DTD */ 4686 4687 static enum XML_Error PTRCALL 4688 prologProcessor(XML_Parser parser, const char *s, const char *end, 4689 const char **nextPtr) { 4690 const char *next = s; 4691 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4692 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4693 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 4694 XML_ACCOUNT_DIRECT); 4695 } 4696 4697 static enum XML_Error 4698 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, 4699 int tok, const char *next, const char **nextPtr, XML_Bool haveMore, 4700 XML_Bool allowClosingDoctype, enum XML_Account account) { 4701 #ifdef XML_DTD 4702 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; 4703 #endif /* XML_DTD */ 4704 static const XML_Char atypeCDATA[] 4705 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 4706 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; 4707 static const XML_Char atypeIDREF[] 4708 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; 4709 static const XML_Char atypeIDREFS[] 4710 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; 4711 static const XML_Char atypeENTITY[] 4712 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; 4713 static const XML_Char atypeENTITIES[] 4714 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, 4715 ASCII_I, ASCII_E, ASCII_S, '\0'}; 4716 static const XML_Char atypeNMTOKEN[] 4717 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; 4718 static const XML_Char atypeNMTOKENS[] 4719 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, 4720 ASCII_E, ASCII_N, ASCII_S, '\0'}; 4721 static const XML_Char notationPrefix[] 4722 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, 4723 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; 4724 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; 4725 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; 4726 4727 #ifndef XML_DTD 4728 UNUSED_P(account); 4729 #endif 4730 4731 /* save one level of indirection */ 4732 DTD *const dtd = parser->m_dtd; 4733 4734 const char **eventPP; 4735 const char **eventEndPP; 4736 enum XML_Content_Quant quant; 4737 4738 if (enc == parser->m_encoding) { 4739 eventPP = &parser->m_eventPtr; 4740 eventEndPP = &parser->m_eventEndPtr; 4741 } else { 4742 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4743 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4744 } 4745 4746 for (;;) { 4747 int role; 4748 XML_Bool handleDefault = XML_TRUE; 4749 *eventPP = s; 4750 *eventEndPP = next; 4751 if (tok <= 0) { 4752 if (haveMore && tok != XML_TOK_INVALID) { 4753 *nextPtr = s; 4754 return XML_ERROR_NONE; 4755 } 4756 switch (tok) { 4757 case XML_TOK_INVALID: 4758 *eventPP = next; 4759 return XML_ERROR_INVALID_TOKEN; 4760 case XML_TOK_PARTIAL: 4761 return XML_ERROR_UNCLOSED_TOKEN; 4762 case XML_TOK_PARTIAL_CHAR: 4763 return XML_ERROR_PARTIAL_CHAR; 4764 case -XML_TOK_PROLOG_S: 4765 tok = -tok; 4766 break; 4767 case XML_TOK_NONE: 4768 #ifdef XML_DTD 4769 /* for internal PE NOT referenced between declarations */ 4770 if (enc != parser->m_encoding 4771 && ! parser->m_openInternalEntities->betweenDecl) { 4772 *nextPtr = s; 4773 return XML_ERROR_NONE; 4774 } 4775 /* WFC: PE Between Declarations - must check that PE contains 4776 complete markup, not only for external PEs, but also for 4777 internal PEs if the reference occurs between declarations. 4778 */ 4779 if (parser->m_isParamEntity || enc != parser->m_encoding) { 4780 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) 4781 == XML_ROLE_ERROR) 4782 return XML_ERROR_INCOMPLETE_PE; 4783 *nextPtr = s; 4784 return XML_ERROR_NONE; 4785 } 4786 #endif /* XML_DTD */ 4787 return XML_ERROR_NO_ELEMENTS; 4788 default: 4789 tok = -tok; 4790 next = end; 4791 break; 4792 } 4793 } 4794 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); 4795 #if XML_GE == 1 4796 switch (role) { 4797 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor 4798 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl 4799 # ifdef XML_DTD 4800 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl 4801 # endif 4802 break; 4803 default: 4804 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4805 accountingOnAbort(parser); 4806 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4807 } 4808 } 4809 #endif 4810 switch (role) { 4811 case XML_ROLE_XML_DECL: { 4812 enum XML_Error result = processXmlDecl(parser, 0, s, next); 4813 if (result != XML_ERROR_NONE) 4814 return result; 4815 enc = parser->m_encoding; 4816 handleDefault = XML_FALSE; 4817 } break; 4818 case XML_ROLE_DOCTYPE_NAME: 4819 if (parser->m_startDoctypeDeclHandler) { 4820 parser->m_doctypeName 4821 = poolStoreString(&parser->m_tempPool, enc, s, next); 4822 if (! parser->m_doctypeName) 4823 return XML_ERROR_NO_MEMORY; 4824 poolFinish(&parser->m_tempPool); 4825 parser->m_doctypePubid = NULL; 4826 handleDefault = XML_FALSE; 4827 } 4828 parser->m_doctypeSysid = NULL; /* always initialize to NULL */ 4829 break; 4830 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: 4831 if (parser->m_startDoctypeDeclHandler) { 4832 parser->m_startDoctypeDeclHandler( 4833 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4834 parser->m_doctypePubid, 1); 4835 parser->m_doctypeName = NULL; 4836 poolClear(&parser->m_tempPool); 4837 handleDefault = XML_FALSE; 4838 } 4839 break; 4840 #ifdef XML_DTD 4841 case XML_ROLE_TEXT_DECL: { 4842 enum XML_Error result = processXmlDecl(parser, 1, s, next); 4843 if (result != XML_ERROR_NONE) 4844 return result; 4845 enc = parser->m_encoding; 4846 handleDefault = XML_FALSE; 4847 } break; 4848 #endif /* XML_DTD */ 4849 case XML_ROLE_DOCTYPE_PUBLIC_ID: 4850 #ifdef XML_DTD 4851 parser->m_useForeignDTD = XML_FALSE; 4852 parser->m_declEntity = (ENTITY *)lookup( 4853 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 4854 if (! parser->m_declEntity) 4855 return XML_ERROR_NO_MEMORY; 4856 #endif /* XML_DTD */ 4857 dtd->hasParamEntityRefs = XML_TRUE; 4858 if (parser->m_startDoctypeDeclHandler) { 4859 XML_Char *pubId; 4860 if (! XmlIsPublicId(enc, s, next, eventPP)) 4861 return XML_ERROR_PUBLICID; 4862 pubId = poolStoreString(&parser->m_tempPool, enc, 4863 s + enc->minBytesPerChar, 4864 next - enc->minBytesPerChar); 4865 if (! pubId) 4866 return XML_ERROR_NO_MEMORY; 4867 normalizePublicId(pubId); 4868 poolFinish(&parser->m_tempPool); 4869 parser->m_doctypePubid = pubId; 4870 handleDefault = XML_FALSE; 4871 goto alreadyChecked; 4872 } 4873 /* fall through */ 4874 case XML_ROLE_ENTITY_PUBLIC_ID: 4875 if (! XmlIsPublicId(enc, s, next, eventPP)) 4876 return XML_ERROR_PUBLICID; 4877 alreadyChecked: 4878 if (dtd->keepProcessing && parser->m_declEntity) { 4879 XML_Char *tem 4880 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 4881 next - enc->minBytesPerChar); 4882 if (! tem) 4883 return XML_ERROR_NO_MEMORY; 4884 normalizePublicId(tem); 4885 parser->m_declEntity->publicId = tem; 4886 poolFinish(&dtd->pool); 4887 /* Don't suppress the default handler if we fell through from 4888 * the XML_ROLE_DOCTYPE_PUBLIC_ID case. 4889 */ 4890 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) 4891 handleDefault = XML_FALSE; 4892 } 4893 break; 4894 case XML_ROLE_DOCTYPE_CLOSE: 4895 if (allowClosingDoctype != XML_TRUE) { 4896 /* Must not close doctype from within expanded parameter entities */ 4897 return XML_ERROR_INVALID_TOKEN; 4898 } 4899 4900 if (parser->m_doctypeName) { 4901 parser->m_startDoctypeDeclHandler( 4902 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4903 parser->m_doctypePubid, 0); 4904 poolClear(&parser->m_tempPool); 4905 handleDefault = XML_FALSE; 4906 } 4907 /* parser->m_doctypeSysid will be non-NULL in the case of a previous 4908 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler 4909 was not set, indicating an external subset 4910 */ 4911 #ifdef XML_DTD 4912 if (parser->m_doctypeSysid || parser->m_useForeignDTD) { 4913 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4914 dtd->hasParamEntityRefs = XML_TRUE; 4915 if (parser->m_paramEntityParsing 4916 && parser->m_externalEntityRefHandler) { 4917 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 4918 externalSubsetName, sizeof(ENTITY)); 4919 if (! entity) { 4920 /* The external subset name "#" will have already been 4921 * inserted into the hash table at the start of the 4922 * external entity parsing, so no allocation will happen 4923 * and lookup() cannot fail. 4924 */ 4925 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 4926 } 4927 if (parser->m_useForeignDTD) 4928 entity->base = parser->m_curBase; 4929 dtd->paramEntityRead = XML_FALSE; 4930 if (! parser->m_externalEntityRefHandler( 4931 parser->m_externalEntityRefHandlerArg, 0, entity->base, 4932 entity->systemId, entity->publicId)) 4933 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4934 if (dtd->paramEntityRead) { 4935 if (! dtd->standalone && parser->m_notStandaloneHandler 4936 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 4937 return XML_ERROR_NOT_STANDALONE; 4938 } 4939 /* if we didn't read the foreign DTD then this means that there 4940 is no external subset and we must reset dtd->hasParamEntityRefs 4941 */ 4942 else if (! parser->m_doctypeSysid) 4943 dtd->hasParamEntityRefs = hadParamEntityRefs; 4944 /* end of DTD - no need to update dtd->keepProcessing */ 4945 } 4946 parser->m_useForeignDTD = XML_FALSE; 4947 } 4948 #endif /* XML_DTD */ 4949 if (parser->m_endDoctypeDeclHandler) { 4950 parser->m_endDoctypeDeclHandler(parser->m_handlerArg); 4951 handleDefault = XML_FALSE; 4952 } 4953 break; 4954 case XML_ROLE_INSTANCE_START: 4955 #ifdef XML_DTD 4956 /* if there is no DOCTYPE declaration then now is the 4957 last chance to read the foreign DTD 4958 */ 4959 if (parser->m_useForeignDTD) { 4960 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4961 dtd->hasParamEntityRefs = XML_TRUE; 4962 if (parser->m_paramEntityParsing 4963 && parser->m_externalEntityRefHandler) { 4964 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 4965 externalSubsetName, sizeof(ENTITY)); 4966 if (! entity) 4967 return XML_ERROR_NO_MEMORY; 4968 entity->base = parser->m_curBase; 4969 dtd->paramEntityRead = XML_FALSE; 4970 if (! parser->m_externalEntityRefHandler( 4971 parser->m_externalEntityRefHandlerArg, 0, entity->base, 4972 entity->systemId, entity->publicId)) 4973 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4974 if (dtd->paramEntityRead) { 4975 if (! dtd->standalone && parser->m_notStandaloneHandler 4976 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 4977 return XML_ERROR_NOT_STANDALONE; 4978 } 4979 /* if we didn't read the foreign DTD then this means that there 4980 is no external subset and we must reset dtd->hasParamEntityRefs 4981 */ 4982 else 4983 dtd->hasParamEntityRefs = hadParamEntityRefs; 4984 /* end of DTD - no need to update dtd->keepProcessing */ 4985 } 4986 } 4987 #endif /* XML_DTD */ 4988 parser->m_processor = contentProcessor; 4989 return contentProcessor(parser, s, end, nextPtr); 4990 case XML_ROLE_ATTLIST_ELEMENT_NAME: 4991 parser->m_declElementType = getElementType(parser, enc, s, next); 4992 if (! parser->m_declElementType) 4993 return XML_ERROR_NO_MEMORY; 4994 goto checkAttListDeclHandler; 4995 case XML_ROLE_ATTRIBUTE_NAME: 4996 parser->m_declAttributeId = getAttributeId(parser, enc, s, next); 4997 if (! parser->m_declAttributeId) 4998 return XML_ERROR_NO_MEMORY; 4999 parser->m_declAttributeIsCdata = XML_FALSE; 5000 parser->m_declAttributeType = NULL; 5001 parser->m_declAttributeIsId = XML_FALSE; 5002 goto checkAttListDeclHandler; 5003 case XML_ROLE_ATTRIBUTE_TYPE_CDATA: 5004 parser->m_declAttributeIsCdata = XML_TRUE; 5005 parser->m_declAttributeType = atypeCDATA; 5006 goto checkAttListDeclHandler; 5007 case XML_ROLE_ATTRIBUTE_TYPE_ID: 5008 parser->m_declAttributeIsId = XML_TRUE; 5009 parser->m_declAttributeType = atypeID; 5010 goto checkAttListDeclHandler; 5011 case XML_ROLE_ATTRIBUTE_TYPE_IDREF: 5012 parser->m_declAttributeType = atypeIDREF; 5013 goto checkAttListDeclHandler; 5014 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: 5015 parser->m_declAttributeType = atypeIDREFS; 5016 goto checkAttListDeclHandler; 5017 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: 5018 parser->m_declAttributeType = atypeENTITY; 5019 goto checkAttListDeclHandler; 5020 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: 5021 parser->m_declAttributeType = atypeENTITIES; 5022 goto checkAttListDeclHandler; 5023 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: 5024 parser->m_declAttributeType = atypeNMTOKEN; 5025 goto checkAttListDeclHandler; 5026 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: 5027 parser->m_declAttributeType = atypeNMTOKENS; 5028 checkAttListDeclHandler: 5029 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5030 handleDefault = XML_FALSE; 5031 break; 5032 case XML_ROLE_ATTRIBUTE_ENUM_VALUE: 5033 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: 5034 if (dtd->keepProcessing && parser->m_attlistDeclHandler) { 5035 const XML_Char *prefix; 5036 if (parser->m_declAttributeType) { 5037 prefix = enumValueSep; 5038 } else { 5039 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix 5040 : enumValueStart); 5041 } 5042 if (! poolAppendString(&parser->m_tempPool, prefix)) 5043 return XML_ERROR_NO_MEMORY; 5044 if (! poolAppend(&parser->m_tempPool, enc, s, next)) 5045 return XML_ERROR_NO_MEMORY; 5046 parser->m_declAttributeType = parser->m_tempPool.start; 5047 handleDefault = XML_FALSE; 5048 } 5049 break; 5050 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: 5051 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: 5052 if (dtd->keepProcessing) { 5053 if (! defineAttribute(parser->m_declElementType, 5054 parser->m_declAttributeId, 5055 parser->m_declAttributeIsCdata, 5056 parser->m_declAttributeIsId, 0, parser)) 5057 return XML_ERROR_NO_MEMORY; 5058 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5059 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5060 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5061 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5062 /* Enumerated or Notation type */ 5063 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5064 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5065 return XML_ERROR_NO_MEMORY; 5066 parser->m_declAttributeType = parser->m_tempPool.start; 5067 poolFinish(&parser->m_tempPool); 5068 } 5069 *eventEndPP = s; 5070 parser->m_attlistDeclHandler( 5071 parser->m_handlerArg, parser->m_declElementType->name, 5072 parser->m_declAttributeId->name, parser->m_declAttributeType, 0, 5073 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); 5074 handleDefault = XML_FALSE; 5075 } 5076 } 5077 poolClear(&parser->m_tempPool); 5078 break; 5079 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: 5080 case XML_ROLE_FIXED_ATTRIBUTE_VALUE: 5081 if (dtd->keepProcessing) { 5082 const XML_Char *attVal; 5083 enum XML_Error result = storeAttributeValue( 5084 parser, enc, parser->m_declAttributeIsCdata, 5085 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, 5086 XML_ACCOUNT_NONE); 5087 if (result) 5088 return result; 5089 attVal = poolStart(&dtd->pool); 5090 poolFinish(&dtd->pool); 5091 /* ID attributes aren't allowed to have a default */ 5092 if (! defineAttribute( 5093 parser->m_declElementType, parser->m_declAttributeId, 5094 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) 5095 return XML_ERROR_NO_MEMORY; 5096 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5097 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5098 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5099 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5100 /* Enumerated or Notation type */ 5101 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5102 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5103 return XML_ERROR_NO_MEMORY; 5104 parser->m_declAttributeType = parser->m_tempPool.start; 5105 poolFinish(&parser->m_tempPool); 5106 } 5107 *eventEndPP = s; 5108 parser->m_attlistDeclHandler( 5109 parser->m_handlerArg, parser->m_declElementType->name, 5110 parser->m_declAttributeId->name, parser->m_declAttributeType, 5111 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); 5112 poolClear(&parser->m_tempPool); 5113 handleDefault = XML_FALSE; 5114 } 5115 } 5116 break; 5117 case XML_ROLE_ENTITY_VALUE: 5118 if (dtd->keepProcessing) { 5119 #if XML_GE == 1 5120 // This will store the given replacement text in 5121 // parser->m_declEntity->textPtr. 5122 enum XML_Error result 5123 = storeEntityValue(parser, enc, s + enc->minBytesPerChar, 5124 next - enc->minBytesPerChar, XML_ACCOUNT_NONE); 5125 if (parser->m_declEntity) { 5126 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); 5127 parser->m_declEntity->textLen 5128 = (int)(poolLength(&dtd->entityValuePool)); 5129 poolFinish(&dtd->entityValuePool); 5130 if (parser->m_entityDeclHandler) { 5131 *eventEndPP = s; 5132 parser->m_entityDeclHandler( 5133 parser->m_handlerArg, parser->m_declEntity->name, 5134 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5135 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5136 handleDefault = XML_FALSE; 5137 } 5138 } else 5139 poolDiscard(&dtd->entityValuePool); 5140 if (result != XML_ERROR_NONE) 5141 return result; 5142 #else 5143 // This will store "&entity123;" in parser->m_declEntity->textPtr 5144 // to end up as "&entity123;" in the handler. 5145 if (parser->m_declEntity != NULL) { 5146 const enum XML_Error result 5147 = storeSelfEntityValue(parser, parser->m_declEntity); 5148 if (result != XML_ERROR_NONE) 5149 return result; 5150 5151 if (parser->m_entityDeclHandler) { 5152 *eventEndPP = s; 5153 parser->m_entityDeclHandler( 5154 parser->m_handlerArg, parser->m_declEntity->name, 5155 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5156 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5157 handleDefault = XML_FALSE; 5158 } 5159 } 5160 #endif 5161 } 5162 break; 5163 case XML_ROLE_DOCTYPE_SYSTEM_ID: 5164 #ifdef XML_DTD 5165 parser->m_useForeignDTD = XML_FALSE; 5166 #endif /* XML_DTD */ 5167 dtd->hasParamEntityRefs = XML_TRUE; 5168 if (parser->m_startDoctypeDeclHandler) { 5169 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, 5170 s + enc->minBytesPerChar, 5171 next - enc->minBytesPerChar); 5172 if (parser->m_doctypeSysid == NULL) 5173 return XML_ERROR_NO_MEMORY; 5174 poolFinish(&parser->m_tempPool); 5175 handleDefault = XML_FALSE; 5176 } 5177 #ifdef XML_DTD 5178 else 5179 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL 5180 for the case where no parser->m_startDoctypeDeclHandler is set */ 5181 parser->m_doctypeSysid = externalSubsetName; 5182 #endif /* XML_DTD */ 5183 if (! dtd->standalone 5184 #ifdef XML_DTD 5185 && ! parser->m_paramEntityParsing 5186 #endif /* XML_DTD */ 5187 && parser->m_notStandaloneHandler 5188 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5189 return XML_ERROR_NOT_STANDALONE; 5190 #ifndef XML_DTD 5191 break; 5192 #else /* XML_DTD */ 5193 if (! parser->m_declEntity) { 5194 parser->m_declEntity = (ENTITY *)lookup( 5195 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5196 if (! parser->m_declEntity) 5197 return XML_ERROR_NO_MEMORY; 5198 parser->m_declEntity->publicId = NULL; 5199 } 5200 #endif /* XML_DTD */ 5201 /* fall through */ 5202 case XML_ROLE_ENTITY_SYSTEM_ID: 5203 if (dtd->keepProcessing && parser->m_declEntity) { 5204 parser->m_declEntity->systemId 5205 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5206 next - enc->minBytesPerChar); 5207 if (! parser->m_declEntity->systemId) 5208 return XML_ERROR_NO_MEMORY; 5209 parser->m_declEntity->base = parser->m_curBase; 5210 poolFinish(&dtd->pool); 5211 /* Don't suppress the default handler if we fell through from 5212 * the XML_ROLE_DOCTYPE_SYSTEM_ID case. 5213 */ 5214 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) 5215 handleDefault = XML_FALSE; 5216 } 5217 break; 5218 case XML_ROLE_ENTITY_COMPLETE: 5219 #if XML_GE == 0 5220 // This will store "&entity123;" in entity->textPtr 5221 // to end up as "&entity123;" in the handler. 5222 if (parser->m_declEntity != NULL) { 5223 const enum XML_Error result 5224 = storeSelfEntityValue(parser, parser->m_declEntity); 5225 if (result != XML_ERROR_NONE) 5226 return result; 5227 } 5228 #endif 5229 if (dtd->keepProcessing && parser->m_declEntity 5230 && parser->m_entityDeclHandler) { 5231 *eventEndPP = s; 5232 parser->m_entityDeclHandler( 5233 parser->m_handlerArg, parser->m_declEntity->name, 5234 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, 5235 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); 5236 handleDefault = XML_FALSE; 5237 } 5238 break; 5239 case XML_ROLE_ENTITY_NOTATION_NAME: 5240 if (dtd->keepProcessing && parser->m_declEntity) { 5241 parser->m_declEntity->notation 5242 = poolStoreString(&dtd->pool, enc, s, next); 5243 if (! parser->m_declEntity->notation) 5244 return XML_ERROR_NO_MEMORY; 5245 poolFinish(&dtd->pool); 5246 if (parser->m_unparsedEntityDeclHandler) { 5247 *eventEndPP = s; 5248 parser->m_unparsedEntityDeclHandler( 5249 parser->m_handlerArg, parser->m_declEntity->name, 5250 parser->m_declEntity->base, parser->m_declEntity->systemId, 5251 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5252 handleDefault = XML_FALSE; 5253 } else if (parser->m_entityDeclHandler) { 5254 *eventEndPP = s; 5255 parser->m_entityDeclHandler( 5256 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, 5257 parser->m_declEntity->base, parser->m_declEntity->systemId, 5258 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5259 handleDefault = XML_FALSE; 5260 } 5261 } 5262 break; 5263 case XML_ROLE_GENERAL_ENTITY_NAME: { 5264 if (XmlPredefinedEntityName(enc, s, next)) { 5265 parser->m_declEntity = NULL; 5266 break; 5267 } 5268 if (dtd->keepProcessing) { 5269 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5270 if (! name) 5271 return XML_ERROR_NO_MEMORY; 5272 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, 5273 name, sizeof(ENTITY)); 5274 if (! parser->m_declEntity) 5275 return XML_ERROR_NO_MEMORY; 5276 if (parser->m_declEntity->name != name) { 5277 poolDiscard(&dtd->pool); 5278 parser->m_declEntity = NULL; 5279 } else { 5280 poolFinish(&dtd->pool); 5281 parser->m_declEntity->publicId = NULL; 5282 parser->m_declEntity->is_param = XML_FALSE; 5283 /* if we have a parent parser or are reading an internal parameter 5284 entity, then the entity declaration is not considered "internal" 5285 */ 5286 parser->m_declEntity->is_internal 5287 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5288 if (parser->m_entityDeclHandler) 5289 handleDefault = XML_FALSE; 5290 } 5291 } else { 5292 poolDiscard(&dtd->pool); 5293 parser->m_declEntity = NULL; 5294 } 5295 } break; 5296 case XML_ROLE_PARAM_ENTITY_NAME: 5297 #ifdef XML_DTD 5298 if (dtd->keepProcessing) { 5299 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5300 if (! name) 5301 return XML_ERROR_NO_MEMORY; 5302 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5303 name, sizeof(ENTITY)); 5304 if (! parser->m_declEntity) 5305 return XML_ERROR_NO_MEMORY; 5306 if (parser->m_declEntity->name != name) { 5307 poolDiscard(&dtd->pool); 5308 parser->m_declEntity = NULL; 5309 } else { 5310 poolFinish(&dtd->pool); 5311 parser->m_declEntity->publicId = NULL; 5312 parser->m_declEntity->is_param = XML_TRUE; 5313 /* if we have a parent parser or are reading an internal parameter 5314 entity, then the entity declaration is not considered "internal" 5315 */ 5316 parser->m_declEntity->is_internal 5317 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5318 if (parser->m_entityDeclHandler) 5319 handleDefault = XML_FALSE; 5320 } 5321 } else { 5322 poolDiscard(&dtd->pool); 5323 parser->m_declEntity = NULL; 5324 } 5325 #else /* not XML_DTD */ 5326 parser->m_declEntity = NULL; 5327 #endif /* XML_DTD */ 5328 break; 5329 case XML_ROLE_NOTATION_NAME: 5330 parser->m_declNotationPublicId = NULL; 5331 parser->m_declNotationName = NULL; 5332 if (parser->m_notationDeclHandler) { 5333 parser->m_declNotationName 5334 = poolStoreString(&parser->m_tempPool, enc, s, next); 5335 if (! parser->m_declNotationName) 5336 return XML_ERROR_NO_MEMORY; 5337 poolFinish(&parser->m_tempPool); 5338 handleDefault = XML_FALSE; 5339 } 5340 break; 5341 case XML_ROLE_NOTATION_PUBLIC_ID: 5342 if (! XmlIsPublicId(enc, s, next, eventPP)) 5343 return XML_ERROR_PUBLICID; 5344 if (parser 5345 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ 5346 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, 5347 s + enc->minBytesPerChar, 5348 next - enc->minBytesPerChar); 5349 if (! tem) 5350 return XML_ERROR_NO_MEMORY; 5351 normalizePublicId(tem); 5352 parser->m_declNotationPublicId = tem; 5353 poolFinish(&parser->m_tempPool); 5354 handleDefault = XML_FALSE; 5355 } 5356 break; 5357 case XML_ROLE_NOTATION_SYSTEM_ID: 5358 if (parser->m_declNotationName && parser->m_notationDeclHandler) { 5359 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, 5360 s + enc->minBytesPerChar, 5361 next - enc->minBytesPerChar); 5362 if (! systemId) 5363 return XML_ERROR_NO_MEMORY; 5364 *eventEndPP = s; 5365 parser->m_notationDeclHandler( 5366 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5367 systemId, parser->m_declNotationPublicId); 5368 handleDefault = XML_FALSE; 5369 } 5370 poolClear(&parser->m_tempPool); 5371 break; 5372 case XML_ROLE_NOTATION_NO_SYSTEM_ID: 5373 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { 5374 *eventEndPP = s; 5375 parser->m_notationDeclHandler( 5376 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5377 0, parser->m_declNotationPublicId); 5378 handleDefault = XML_FALSE; 5379 } 5380 poolClear(&parser->m_tempPool); 5381 break; 5382 case XML_ROLE_ERROR: 5383 switch (tok) { 5384 case XML_TOK_PARAM_ENTITY_REF: 5385 /* PE references in internal subset are 5386 not allowed within declarations. */ 5387 return XML_ERROR_PARAM_ENTITY_REF; 5388 case XML_TOK_XML_DECL: 5389 return XML_ERROR_MISPLACED_XML_PI; 5390 default: 5391 return XML_ERROR_SYNTAX; 5392 } 5393 #ifdef XML_DTD 5394 case XML_ROLE_IGNORE_SECT: { 5395 enum XML_Error result; 5396 if (parser->m_defaultHandler) 5397 reportDefault(parser, enc, s, next); 5398 handleDefault = XML_FALSE; 5399 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); 5400 if (result != XML_ERROR_NONE) 5401 return result; 5402 else if (! next) { 5403 parser->m_processor = ignoreSectionProcessor; 5404 return result; 5405 } 5406 } break; 5407 #endif /* XML_DTD */ 5408 case XML_ROLE_GROUP_OPEN: 5409 if (parser->m_prologState.level >= parser->m_groupSize) { 5410 if (parser->m_groupSize) { 5411 { 5412 /* Detect and prevent integer overflow */ 5413 if (parser->m_groupSize > (unsigned int)(-1) / 2u) { 5414 return XML_ERROR_NO_MEMORY; 5415 } 5416 5417 char *const new_connector = (char *)REALLOC( 5418 parser, parser->m_groupConnector, parser->m_groupSize *= 2); 5419 if (new_connector == NULL) { 5420 parser->m_groupSize /= 2; 5421 return XML_ERROR_NO_MEMORY; 5422 } 5423 parser->m_groupConnector = new_connector; 5424 } 5425 5426 if (dtd->scaffIndex) { 5427 /* Detect and prevent integer overflow. 5428 * The preprocessor guard addresses the "always false" warning 5429 * from -Wtype-limits on platforms where 5430 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 5431 #if UINT_MAX >= SIZE_MAX 5432 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { 5433 return XML_ERROR_NO_MEMORY; 5434 } 5435 #endif 5436 5437 int *const new_scaff_index = (int *)REALLOC( 5438 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); 5439 if (new_scaff_index == NULL) 5440 return XML_ERROR_NO_MEMORY; 5441 dtd->scaffIndex = new_scaff_index; 5442 } 5443 } else { 5444 parser->m_groupConnector 5445 = (char *)MALLOC(parser, parser->m_groupSize = 32); 5446 if (! parser->m_groupConnector) { 5447 parser->m_groupSize = 0; 5448 return XML_ERROR_NO_MEMORY; 5449 } 5450 } 5451 } 5452 parser->m_groupConnector[parser->m_prologState.level] = 0; 5453 if (dtd->in_eldecl) { 5454 int myindex = nextScaffoldPart(parser); 5455 if (myindex < 0) 5456 return XML_ERROR_NO_MEMORY; 5457 assert(dtd->scaffIndex != NULL); 5458 dtd->scaffIndex[dtd->scaffLevel] = myindex; 5459 dtd->scaffLevel++; 5460 dtd->scaffold[myindex].type = XML_CTYPE_SEQ; 5461 if (parser->m_elementDeclHandler) 5462 handleDefault = XML_FALSE; 5463 } 5464 break; 5465 case XML_ROLE_GROUP_SEQUENCE: 5466 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) 5467 return XML_ERROR_SYNTAX; 5468 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; 5469 if (dtd->in_eldecl && parser->m_elementDeclHandler) 5470 handleDefault = XML_FALSE; 5471 break; 5472 case XML_ROLE_GROUP_CHOICE: 5473 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) 5474 return XML_ERROR_SYNTAX; 5475 if (dtd->in_eldecl 5476 && ! parser->m_groupConnector[parser->m_prologState.level] 5477 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5478 != XML_CTYPE_MIXED)) { 5479 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5480 = XML_CTYPE_CHOICE; 5481 if (parser->m_elementDeclHandler) 5482 handleDefault = XML_FALSE; 5483 } 5484 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; 5485 break; 5486 case XML_ROLE_PARAM_ENTITY_REF: 5487 #ifdef XML_DTD 5488 case XML_ROLE_INNER_PARAM_ENTITY_REF: 5489 dtd->hasParamEntityRefs = XML_TRUE; 5490 if (! parser->m_paramEntityParsing) 5491 dtd->keepProcessing = dtd->standalone; 5492 else { 5493 const XML_Char *name; 5494 ENTITY *entity; 5495 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5496 next - enc->minBytesPerChar); 5497 if (! name) 5498 return XML_ERROR_NO_MEMORY; 5499 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 5500 poolDiscard(&dtd->pool); 5501 /* first, determine if a check for an existing declaration is needed; 5502 if yes, check that the entity exists, and that it is internal, 5503 otherwise call the skipped entity handler 5504 */ 5505 if (parser->m_prologState.documentEntity 5506 && (dtd->standalone ? ! parser->m_openInternalEntities 5507 : ! dtd->hasParamEntityRefs)) { 5508 if (! entity) 5509 return XML_ERROR_UNDEFINED_ENTITY; 5510 else if (! entity->is_internal) { 5511 /* It's hard to exhaustively search the code to be sure, 5512 * but there doesn't seem to be a way of executing the 5513 * following line. There are two cases: 5514 * 5515 * If 'standalone' is false, the DTD must have no 5516 * parameter entities or we wouldn't have passed the outer 5517 * 'if' statement. That means the only entity in the hash 5518 * table is the external subset name "#" which cannot be 5519 * given as a parameter entity name in XML syntax, so the 5520 * lookup must have returned NULL and we don't even reach 5521 * the test for an internal entity. 5522 * 5523 * If 'standalone' is true, it does not seem to be 5524 * possible to create entities taking this code path that 5525 * are not internal entities, so fail the test above. 5526 * 5527 * Because this analysis is very uncertain, the code is 5528 * being left in place and merely removed from the 5529 * coverage test statistics. 5530 */ 5531 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ 5532 } 5533 } else if (! entity) { 5534 dtd->keepProcessing = dtd->standalone; 5535 /* cannot report skipped entities in declarations */ 5536 if ((role == XML_ROLE_PARAM_ENTITY_REF) 5537 && parser->m_skippedEntityHandler) { 5538 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); 5539 handleDefault = XML_FALSE; 5540 } 5541 break; 5542 } 5543 if (entity->open) 5544 return XML_ERROR_RECURSIVE_ENTITY_REF; 5545 if (entity->textPtr) { 5546 enum XML_Error result; 5547 XML_Bool betweenDecl 5548 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); 5549 result = processInternalEntity(parser, entity, betweenDecl); 5550 if (result != XML_ERROR_NONE) 5551 return result; 5552 handleDefault = XML_FALSE; 5553 break; 5554 } 5555 if (parser->m_externalEntityRefHandler) { 5556 dtd->paramEntityRead = XML_FALSE; 5557 entity->open = XML_TRUE; 5558 entityTrackingOnOpen(parser, entity, __LINE__); 5559 if (! parser->m_externalEntityRefHandler( 5560 parser->m_externalEntityRefHandlerArg, 0, entity->base, 5561 entity->systemId, entity->publicId)) { 5562 entityTrackingOnClose(parser, entity, __LINE__); 5563 entity->open = XML_FALSE; 5564 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5565 } 5566 entityTrackingOnClose(parser, entity, __LINE__); 5567 entity->open = XML_FALSE; 5568 handleDefault = XML_FALSE; 5569 if (! dtd->paramEntityRead) { 5570 dtd->keepProcessing = dtd->standalone; 5571 break; 5572 } 5573 } else { 5574 dtd->keepProcessing = dtd->standalone; 5575 break; 5576 } 5577 } 5578 #endif /* XML_DTD */ 5579 if (! dtd->standalone && parser->m_notStandaloneHandler 5580 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5581 return XML_ERROR_NOT_STANDALONE; 5582 break; 5583 5584 /* Element declaration stuff */ 5585 5586 case XML_ROLE_ELEMENT_NAME: 5587 if (parser->m_elementDeclHandler) { 5588 parser->m_declElementType = getElementType(parser, enc, s, next); 5589 if (! parser->m_declElementType) 5590 return XML_ERROR_NO_MEMORY; 5591 dtd->scaffLevel = 0; 5592 dtd->scaffCount = 0; 5593 dtd->in_eldecl = XML_TRUE; 5594 handleDefault = XML_FALSE; 5595 } 5596 break; 5597 5598 case XML_ROLE_CONTENT_ANY: 5599 case XML_ROLE_CONTENT_EMPTY: 5600 if (dtd->in_eldecl) { 5601 if (parser->m_elementDeclHandler) { 5602 XML_Content *content 5603 = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); 5604 if (! content) 5605 return XML_ERROR_NO_MEMORY; 5606 content->quant = XML_CQUANT_NONE; 5607 content->name = NULL; 5608 content->numchildren = 0; 5609 content->children = NULL; 5610 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY 5611 : XML_CTYPE_EMPTY); 5612 *eventEndPP = s; 5613 parser->m_elementDeclHandler( 5614 parser->m_handlerArg, parser->m_declElementType->name, content); 5615 handleDefault = XML_FALSE; 5616 } 5617 dtd->in_eldecl = XML_FALSE; 5618 } 5619 break; 5620 5621 case XML_ROLE_CONTENT_PCDATA: 5622 if (dtd->in_eldecl) { 5623 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5624 = XML_CTYPE_MIXED; 5625 if (parser->m_elementDeclHandler) 5626 handleDefault = XML_FALSE; 5627 } 5628 break; 5629 5630 case XML_ROLE_CONTENT_ELEMENT: 5631 quant = XML_CQUANT_NONE; 5632 goto elementContent; 5633 case XML_ROLE_CONTENT_ELEMENT_OPT: 5634 quant = XML_CQUANT_OPT; 5635 goto elementContent; 5636 case XML_ROLE_CONTENT_ELEMENT_REP: 5637 quant = XML_CQUANT_REP; 5638 goto elementContent; 5639 case XML_ROLE_CONTENT_ELEMENT_PLUS: 5640 quant = XML_CQUANT_PLUS; 5641 elementContent: 5642 if (dtd->in_eldecl) { 5643 ELEMENT_TYPE *el; 5644 const XML_Char *name; 5645 size_t nameLen; 5646 const char *nxt 5647 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); 5648 int myindex = nextScaffoldPart(parser); 5649 if (myindex < 0) 5650 return XML_ERROR_NO_MEMORY; 5651 dtd->scaffold[myindex].type = XML_CTYPE_NAME; 5652 dtd->scaffold[myindex].quant = quant; 5653 el = getElementType(parser, enc, s, nxt); 5654 if (! el) 5655 return XML_ERROR_NO_MEMORY; 5656 name = el->name; 5657 dtd->scaffold[myindex].name = name; 5658 nameLen = 0; 5659 for (; name[nameLen++];) 5660 ; 5661 5662 /* Detect and prevent integer overflow */ 5663 if (nameLen > UINT_MAX - dtd->contentStringLen) { 5664 return XML_ERROR_NO_MEMORY; 5665 } 5666 5667 dtd->contentStringLen += (unsigned)nameLen; 5668 if (parser->m_elementDeclHandler) 5669 handleDefault = XML_FALSE; 5670 } 5671 break; 5672 5673 case XML_ROLE_GROUP_CLOSE: 5674 quant = XML_CQUANT_NONE; 5675 goto closeGroup; 5676 case XML_ROLE_GROUP_CLOSE_OPT: 5677 quant = XML_CQUANT_OPT; 5678 goto closeGroup; 5679 case XML_ROLE_GROUP_CLOSE_REP: 5680 quant = XML_CQUANT_REP; 5681 goto closeGroup; 5682 case XML_ROLE_GROUP_CLOSE_PLUS: 5683 quant = XML_CQUANT_PLUS; 5684 closeGroup: 5685 if (dtd->in_eldecl) { 5686 if (parser->m_elementDeclHandler) 5687 handleDefault = XML_FALSE; 5688 dtd->scaffLevel--; 5689 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; 5690 if (dtd->scaffLevel == 0) { 5691 if (! handleDefault) { 5692 XML_Content *model = build_model(parser); 5693 if (! model) 5694 return XML_ERROR_NO_MEMORY; 5695 *eventEndPP = s; 5696 parser->m_elementDeclHandler( 5697 parser->m_handlerArg, parser->m_declElementType->name, model); 5698 } 5699 dtd->in_eldecl = XML_FALSE; 5700 dtd->contentStringLen = 0; 5701 } 5702 } 5703 break; 5704 /* End element declaration stuff */ 5705 5706 case XML_ROLE_PI: 5707 if (! reportProcessingInstruction(parser, enc, s, next)) 5708 return XML_ERROR_NO_MEMORY; 5709 handleDefault = XML_FALSE; 5710 break; 5711 case XML_ROLE_COMMENT: 5712 if (! reportComment(parser, enc, s, next)) 5713 return XML_ERROR_NO_MEMORY; 5714 handleDefault = XML_FALSE; 5715 break; 5716 case XML_ROLE_NONE: 5717 switch (tok) { 5718 case XML_TOK_BOM: 5719 handleDefault = XML_FALSE; 5720 break; 5721 } 5722 break; 5723 case XML_ROLE_DOCTYPE_NONE: 5724 if (parser->m_startDoctypeDeclHandler) 5725 handleDefault = XML_FALSE; 5726 break; 5727 case XML_ROLE_ENTITY_NONE: 5728 if (dtd->keepProcessing && parser->m_entityDeclHandler) 5729 handleDefault = XML_FALSE; 5730 break; 5731 case XML_ROLE_NOTATION_NONE: 5732 if (parser->m_notationDeclHandler) 5733 handleDefault = XML_FALSE; 5734 break; 5735 case XML_ROLE_ATTLIST_NONE: 5736 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5737 handleDefault = XML_FALSE; 5738 break; 5739 case XML_ROLE_ELEMENT_NONE: 5740 if (parser->m_elementDeclHandler) 5741 handleDefault = XML_FALSE; 5742 break; 5743 } /* end of big switch */ 5744 5745 if (handleDefault && parser->m_defaultHandler) 5746 reportDefault(parser, enc, s, next); 5747 5748 switch (parser->m_parsingStatus.parsing) { 5749 case XML_SUSPENDED: 5750 *nextPtr = next; 5751 return XML_ERROR_NONE; 5752 case XML_FINISHED: 5753 return XML_ERROR_ABORTED; 5754 default: 5755 s = next; 5756 tok = XmlPrologTok(enc, s, end, &next); 5757 } 5758 } 5759 /* not reached */ 5760 } 5761 5762 static enum XML_Error PTRCALL 5763 epilogProcessor(XML_Parser parser, const char *s, const char *end, 5764 const char **nextPtr) { 5765 parser->m_processor = epilogProcessor; 5766 parser->m_eventPtr = s; 5767 for (;;) { 5768 const char *next = NULL; 5769 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5770 #if XML_GE == 1 5771 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 5772 XML_ACCOUNT_DIRECT)) { 5773 accountingOnAbort(parser); 5774 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5775 } 5776 #endif 5777 parser->m_eventEndPtr = next; 5778 switch (tok) { 5779 /* report partial linebreak - it might be the last token */ 5780 case -XML_TOK_PROLOG_S: 5781 if (parser->m_defaultHandler) { 5782 reportDefault(parser, parser->m_encoding, s, next); 5783 if (parser->m_parsingStatus.parsing == XML_FINISHED) 5784 return XML_ERROR_ABORTED; 5785 } 5786 *nextPtr = next; 5787 return XML_ERROR_NONE; 5788 case XML_TOK_NONE: 5789 *nextPtr = s; 5790 return XML_ERROR_NONE; 5791 case XML_TOK_PROLOG_S: 5792 if (parser->m_defaultHandler) 5793 reportDefault(parser, parser->m_encoding, s, next); 5794 break; 5795 case XML_TOK_PI: 5796 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) 5797 return XML_ERROR_NO_MEMORY; 5798 break; 5799 case XML_TOK_COMMENT: 5800 if (! reportComment(parser, parser->m_encoding, s, next)) 5801 return XML_ERROR_NO_MEMORY; 5802 break; 5803 case XML_TOK_INVALID: 5804 parser->m_eventPtr = next; 5805 return XML_ERROR_INVALID_TOKEN; 5806 case XML_TOK_PARTIAL: 5807 if (! parser->m_parsingStatus.finalBuffer) { 5808 *nextPtr = s; 5809 return XML_ERROR_NONE; 5810 } 5811 return XML_ERROR_UNCLOSED_TOKEN; 5812 case XML_TOK_PARTIAL_CHAR: 5813 if (! parser->m_parsingStatus.finalBuffer) { 5814 *nextPtr = s; 5815 return XML_ERROR_NONE; 5816 } 5817 return XML_ERROR_PARTIAL_CHAR; 5818 default: 5819 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; 5820 } 5821 parser->m_eventPtr = s = next; 5822 switch (parser->m_parsingStatus.parsing) { 5823 case XML_SUSPENDED: 5824 *nextPtr = next; 5825 return XML_ERROR_NONE; 5826 case XML_FINISHED: 5827 return XML_ERROR_ABORTED; 5828 default:; 5829 } 5830 } 5831 } 5832 5833 static enum XML_Error 5834 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { 5835 const char *textStart, *textEnd; 5836 const char *next; 5837 enum XML_Error result; 5838 OPEN_INTERNAL_ENTITY *openEntity; 5839 5840 if (parser->m_freeInternalEntities) { 5841 openEntity = parser->m_freeInternalEntities; 5842 parser->m_freeInternalEntities = openEntity->next; 5843 } else { 5844 openEntity 5845 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); 5846 if (! openEntity) 5847 return XML_ERROR_NO_MEMORY; 5848 } 5849 entity->open = XML_TRUE; 5850 #if XML_GE == 1 5851 entityTrackingOnOpen(parser, entity, __LINE__); 5852 #endif 5853 entity->processed = 0; 5854 openEntity->next = parser->m_openInternalEntities; 5855 parser->m_openInternalEntities = openEntity; 5856 openEntity->entity = entity; 5857 openEntity->startTagLevel = parser->m_tagLevel; 5858 openEntity->betweenDecl = betweenDecl; 5859 openEntity->internalEventPtr = NULL; 5860 openEntity->internalEventEndPtr = NULL; 5861 textStart = (const char *)entity->textPtr; 5862 textEnd = (const char *)(entity->textPtr + entity->textLen); 5863 /* Set a safe default value in case 'next' does not get set */ 5864 next = textStart; 5865 5866 if (entity->is_param) { 5867 int tok 5868 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 5869 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 5870 tok, next, &next, XML_FALSE, XML_FALSE, 5871 XML_ACCOUNT_ENTITY_EXPANSION); 5872 } else { 5873 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, 5874 textStart, textEnd, &next, XML_FALSE, 5875 XML_ACCOUNT_ENTITY_EXPANSION); 5876 } 5877 5878 if (result == XML_ERROR_NONE) { 5879 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5880 entity->processed = (int)(next - textStart); 5881 parser->m_processor = internalEntityProcessor; 5882 } else if (parser->m_openInternalEntities->entity == entity) { 5883 #if XML_GE == 1 5884 entityTrackingOnClose(parser, entity, __LINE__); 5885 #endif /* XML_GE == 1 */ 5886 entity->open = XML_FALSE; 5887 parser->m_openInternalEntities = openEntity->next; 5888 /* put openEntity back in list of free instances */ 5889 openEntity->next = parser->m_freeInternalEntities; 5890 parser->m_freeInternalEntities = openEntity; 5891 } 5892 } 5893 return result; 5894 } 5895 5896 static enum XML_Error PTRCALL 5897 internalEntityProcessor(XML_Parser parser, const char *s, const char *end, 5898 const char **nextPtr) { 5899 ENTITY *entity; 5900 const char *textStart, *textEnd; 5901 const char *next; 5902 enum XML_Error result; 5903 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; 5904 if (! openEntity) 5905 return XML_ERROR_UNEXPECTED_STATE; 5906 5907 entity = openEntity->entity; 5908 textStart = ((const char *)entity->textPtr) + entity->processed; 5909 textEnd = (const char *)(entity->textPtr + entity->textLen); 5910 /* Set a safe default value in case 'next' does not get set */ 5911 next = textStart; 5912 5913 if (entity->is_param) { 5914 int tok 5915 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 5916 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 5917 tok, next, &next, XML_FALSE, XML_TRUE, 5918 XML_ACCOUNT_ENTITY_EXPANSION); 5919 } else { 5920 result = doContent(parser, openEntity->startTagLevel, 5921 parser->m_internalEncoding, textStart, textEnd, &next, 5922 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); 5923 } 5924 5925 if (result != XML_ERROR_NONE) 5926 return result; 5927 5928 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5929 entity->processed = (int)(next - (const char *)entity->textPtr); 5930 return result; 5931 } 5932 5933 #if XML_GE == 1 5934 entityTrackingOnClose(parser, entity, __LINE__); 5935 #endif 5936 entity->open = XML_FALSE; 5937 parser->m_openInternalEntities = openEntity->next; 5938 /* put openEntity back in list of free instances */ 5939 openEntity->next = parser->m_freeInternalEntities; 5940 parser->m_freeInternalEntities = openEntity; 5941 5942 // If there are more open entities we want to stop right here and have the 5943 // upcoming call to XML_ResumeParser continue with entity content, or it would 5944 // be ignored altogether. 5945 if (parser->m_openInternalEntities != NULL 5946 && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5947 return XML_ERROR_NONE; 5948 } 5949 5950 if (entity->is_param) { 5951 int tok; 5952 parser->m_processor = prologProcessor; 5953 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5954 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 5955 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 5956 XML_ACCOUNT_DIRECT); 5957 } else { 5958 parser->m_processor = contentProcessor; 5959 /* see externalEntityContentProcessor vs contentProcessor */ 5960 result = doContent(parser, parser->m_parentParser ? 1 : 0, 5961 parser->m_encoding, s, end, nextPtr, 5962 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 5963 XML_ACCOUNT_DIRECT); 5964 if (result == XML_ERROR_NONE) { 5965 if (! storeRawNames(parser)) 5966 return XML_ERROR_NO_MEMORY; 5967 } 5968 return result; 5969 } 5970 } 5971 5972 static enum XML_Error PTRCALL 5973 errorProcessor(XML_Parser parser, const char *s, const char *end, 5974 const char **nextPtr) { 5975 UNUSED_P(s); 5976 UNUSED_P(end); 5977 UNUSED_P(nextPtr); 5978 return parser->m_errorCode; 5979 } 5980 5981 static enum XML_Error 5982 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 5983 const char *ptr, const char *end, STRING_POOL *pool, 5984 enum XML_Account account) { 5985 enum XML_Error result 5986 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); 5987 if (result) 5988 return result; 5989 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) 5990 poolChop(pool); 5991 if (! poolAppendChar(pool, XML_T('\0'))) 5992 return XML_ERROR_NO_MEMORY; 5993 return XML_ERROR_NONE; 5994 } 5995 5996 static enum XML_Error 5997 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 5998 const char *ptr, const char *end, STRING_POOL *pool, 5999 enum XML_Account account) { 6000 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6001 #ifndef XML_DTD 6002 UNUSED_P(account); 6003 #endif 6004 6005 for (;;) { 6006 const char *next 6007 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ 6008 int tok = XmlAttributeValueTok(enc, ptr, end, &next); 6009 #if XML_GE == 1 6010 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { 6011 accountingOnAbort(parser); 6012 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6013 } 6014 #endif 6015 switch (tok) { 6016 case XML_TOK_NONE: 6017 return XML_ERROR_NONE; 6018 case XML_TOK_INVALID: 6019 if (enc == parser->m_encoding) 6020 parser->m_eventPtr = next; 6021 return XML_ERROR_INVALID_TOKEN; 6022 case XML_TOK_PARTIAL: 6023 if (enc == parser->m_encoding) 6024 parser->m_eventPtr = ptr; 6025 return XML_ERROR_INVALID_TOKEN; 6026 case XML_TOK_CHAR_REF: { 6027 XML_Char buf[XML_ENCODE_MAX]; 6028 int i; 6029 int n = XmlCharRefNumber(enc, ptr); 6030 if (n < 0) { 6031 if (enc == parser->m_encoding) 6032 parser->m_eventPtr = ptr; 6033 return XML_ERROR_BAD_CHAR_REF; 6034 } 6035 if (! isCdata && n == 0x20 /* space */ 6036 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6037 break; 6038 n = XmlEncode(n, (ICHAR *)buf); 6039 /* The XmlEncode() functions can never return 0 here. That 6040 * error return happens if the code point passed in is either 6041 * negative or greater than or equal to 0x110000. The 6042 * XmlCharRefNumber() functions will all return a number 6043 * strictly less than 0x110000 or a negative value if an error 6044 * occurred. The negative value is intercepted above, so 6045 * XmlEncode() is never passed a value it might return an 6046 * error for. 6047 */ 6048 for (i = 0; i < n; i++) { 6049 if (! poolAppendChar(pool, buf[i])) 6050 return XML_ERROR_NO_MEMORY; 6051 } 6052 } break; 6053 case XML_TOK_DATA_CHARS: 6054 if (! poolAppend(pool, enc, ptr, next)) 6055 return XML_ERROR_NO_MEMORY; 6056 break; 6057 case XML_TOK_TRAILING_CR: 6058 next = ptr + enc->minBytesPerChar; 6059 /* fall through */ 6060 case XML_TOK_ATTRIBUTE_VALUE_S: 6061 case XML_TOK_DATA_NEWLINE: 6062 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6063 break; 6064 if (! poolAppendChar(pool, 0x20)) 6065 return XML_ERROR_NO_MEMORY; 6066 break; 6067 case XML_TOK_ENTITY_REF: { 6068 const XML_Char *name; 6069 ENTITY *entity; 6070 char checkEntityDecl; 6071 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 6072 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); 6073 if (ch) { 6074 #if XML_GE == 1 6075 /* NOTE: We are replacing 4-6 characters original input for 1 character 6076 * so there is no amplification and hence recording without 6077 * protection. */ 6078 accountingDiffTolerated(parser, tok, (char *)&ch, 6079 ((char *)&ch) + sizeof(XML_Char), __LINE__, 6080 XML_ACCOUNT_ENTITY_EXPANSION); 6081 #endif /* XML_GE == 1 */ 6082 if (! poolAppendChar(pool, ch)) 6083 return XML_ERROR_NO_MEMORY; 6084 break; 6085 } 6086 name = poolStoreString(&parser->m_temp2Pool, enc, 6087 ptr + enc->minBytesPerChar, 6088 next - enc->minBytesPerChar); 6089 if (! name) 6090 return XML_ERROR_NO_MEMORY; 6091 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 6092 poolDiscard(&parser->m_temp2Pool); 6093 /* First, determine if a check for an existing declaration is needed; 6094 if yes, check that the entity exists, and that it is internal. 6095 */ 6096 if (pool == &dtd->pool) /* are we called from prolog? */ 6097 checkEntityDecl = 6098 #ifdef XML_DTD 6099 parser->m_prologState.documentEntity && 6100 #endif /* XML_DTD */ 6101 (dtd->standalone ? ! parser->m_openInternalEntities 6102 : ! dtd->hasParamEntityRefs); 6103 else /* if (pool == &parser->m_tempPool): we are called from content */ 6104 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; 6105 if (checkEntityDecl) { 6106 if (! entity) 6107 return XML_ERROR_UNDEFINED_ENTITY; 6108 else if (! entity->is_internal) 6109 return XML_ERROR_ENTITY_DECLARED_IN_PE; 6110 } else if (! entity) { 6111 /* Cannot report skipped entity here - see comments on 6112 parser->m_skippedEntityHandler. 6113 if (parser->m_skippedEntityHandler) 6114 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6115 */ 6116 /* Cannot call the default handler because this would be 6117 out of sync with the call to the startElementHandler. 6118 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) 6119 reportDefault(parser, enc, ptr, next); 6120 */ 6121 break; 6122 } 6123 if (entity->open) { 6124 if (enc == parser->m_encoding) { 6125 /* It does not appear that this line can be executed. 6126 * 6127 * The "if (entity->open)" check catches recursive entity 6128 * definitions. In order to be called with an open 6129 * entity, it must have gone through this code before and 6130 * been through the recursive call to 6131 * appendAttributeValue() some lines below. That call 6132 * sets the local encoding ("enc") to the parser's 6133 * internal encoding (internal_utf8 or internal_utf16), 6134 * which can never be the same as the principle encoding. 6135 * It doesn't appear there is another code path that gets 6136 * here with entity->open being TRUE. 6137 * 6138 * Since it is not certain that this logic is watertight, 6139 * we keep the line and merely exclude it from coverage 6140 * tests. 6141 */ 6142 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ 6143 } 6144 return XML_ERROR_RECURSIVE_ENTITY_REF; 6145 } 6146 if (entity->notation) { 6147 if (enc == parser->m_encoding) 6148 parser->m_eventPtr = ptr; 6149 return XML_ERROR_BINARY_ENTITY_REF; 6150 } 6151 if (! entity->textPtr) { 6152 if (enc == parser->m_encoding) 6153 parser->m_eventPtr = ptr; 6154 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; 6155 } else { 6156 enum XML_Error result; 6157 const XML_Char *textEnd = entity->textPtr + entity->textLen; 6158 entity->open = XML_TRUE; 6159 #if XML_GE == 1 6160 entityTrackingOnOpen(parser, entity, __LINE__); 6161 #endif 6162 result = appendAttributeValue(parser, parser->m_internalEncoding, 6163 isCdata, (const char *)entity->textPtr, 6164 (const char *)textEnd, pool, 6165 XML_ACCOUNT_ENTITY_EXPANSION); 6166 #if XML_GE == 1 6167 entityTrackingOnClose(parser, entity, __LINE__); 6168 #endif 6169 entity->open = XML_FALSE; 6170 if (result) 6171 return result; 6172 } 6173 } break; 6174 default: 6175 /* The only token returned by XmlAttributeValueTok() that does 6176 * not have an explicit case here is XML_TOK_PARTIAL_CHAR. 6177 * Getting that would require an entity name to contain an 6178 * incomplete XML character (e.g. \xE2\x82); however previous 6179 * tokenisers will have already recognised and rejected such 6180 * names before XmlAttributeValueTok() gets a look-in. This 6181 * default case should be retained as a safety net, but the code 6182 * excluded from coverage tests. 6183 * 6184 * LCOV_EXCL_START 6185 */ 6186 if (enc == parser->m_encoding) 6187 parser->m_eventPtr = ptr; 6188 return XML_ERROR_UNEXPECTED_STATE; 6189 /* LCOV_EXCL_STOP */ 6190 } 6191 ptr = next; 6192 } 6193 /* not reached */ 6194 } 6195 6196 #if XML_GE == 1 6197 static enum XML_Error 6198 storeEntityValue(XML_Parser parser, const ENCODING *enc, 6199 const char *entityTextPtr, const char *entityTextEnd, 6200 enum XML_Account account) { 6201 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6202 STRING_POOL *pool = &(dtd->entityValuePool); 6203 enum XML_Error result = XML_ERROR_NONE; 6204 # ifdef XML_DTD 6205 int oldInEntityValue = parser->m_prologState.inEntityValue; 6206 parser->m_prologState.inEntityValue = 1; 6207 # else 6208 UNUSED_P(account); 6209 # endif /* XML_DTD */ 6210 /* never return Null for the value argument in EntityDeclHandler, 6211 since this would indicate an external entity; therefore we 6212 have to make sure that entityValuePool.start is not null */ 6213 if (! pool->blocks) { 6214 if (! poolGrow(pool)) 6215 return XML_ERROR_NO_MEMORY; 6216 } 6217 6218 for (;;) { 6219 const char *next 6220 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ 6221 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); 6222 6223 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, 6224 account)) { 6225 accountingOnAbort(parser); 6226 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6227 goto endEntityValue; 6228 } 6229 6230 switch (tok) { 6231 case XML_TOK_PARAM_ENTITY_REF: 6232 # ifdef XML_DTD 6233 if (parser->m_isParamEntity || enc != parser->m_encoding) { 6234 const XML_Char *name; 6235 ENTITY *entity; 6236 name = poolStoreString(&parser->m_tempPool, enc, 6237 entityTextPtr + enc->minBytesPerChar, 6238 next - enc->minBytesPerChar); 6239 if (! name) { 6240 result = XML_ERROR_NO_MEMORY; 6241 goto endEntityValue; 6242 } 6243 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 6244 poolDiscard(&parser->m_tempPool); 6245 if (! entity) { 6246 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ 6247 /* cannot report skipped entity here - see comments on 6248 parser->m_skippedEntityHandler 6249 if (parser->m_skippedEntityHandler) 6250 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6251 */ 6252 dtd->keepProcessing = dtd->standalone; 6253 goto endEntityValue; 6254 } 6255 if (entity->open || (entity == parser->m_declEntity)) { 6256 if (enc == parser->m_encoding) 6257 parser->m_eventPtr = entityTextPtr; 6258 result = XML_ERROR_RECURSIVE_ENTITY_REF; 6259 goto endEntityValue; 6260 } 6261 if (entity->systemId) { 6262 if (parser->m_externalEntityRefHandler) { 6263 dtd->paramEntityRead = XML_FALSE; 6264 entity->open = XML_TRUE; 6265 entityTrackingOnOpen(parser, entity, __LINE__); 6266 if (! parser->m_externalEntityRefHandler( 6267 parser->m_externalEntityRefHandlerArg, 0, entity->base, 6268 entity->systemId, entity->publicId)) { 6269 entityTrackingOnClose(parser, entity, __LINE__); 6270 entity->open = XML_FALSE; 6271 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6272 goto endEntityValue; 6273 } 6274 entityTrackingOnClose(parser, entity, __LINE__); 6275 entity->open = XML_FALSE; 6276 if (! dtd->paramEntityRead) 6277 dtd->keepProcessing = dtd->standalone; 6278 } else 6279 dtd->keepProcessing = dtd->standalone; 6280 } else { 6281 entity->open = XML_TRUE; 6282 entityTrackingOnOpen(parser, entity, __LINE__); 6283 result = storeEntityValue( 6284 parser, parser->m_internalEncoding, (const char *)entity->textPtr, 6285 (const char *)(entity->textPtr + entity->textLen), 6286 XML_ACCOUNT_ENTITY_EXPANSION); 6287 entityTrackingOnClose(parser, entity, __LINE__); 6288 entity->open = XML_FALSE; 6289 if (result) 6290 goto endEntityValue; 6291 } 6292 break; 6293 } 6294 # endif /* XML_DTD */ 6295 /* In the internal subset, PE references are not legal 6296 within markup declarations, e.g entity values in this case. */ 6297 parser->m_eventPtr = entityTextPtr; 6298 result = XML_ERROR_PARAM_ENTITY_REF; 6299 goto endEntityValue; 6300 case XML_TOK_NONE: 6301 result = XML_ERROR_NONE; 6302 goto endEntityValue; 6303 case XML_TOK_ENTITY_REF: 6304 case XML_TOK_DATA_CHARS: 6305 if (! poolAppend(pool, enc, entityTextPtr, next)) { 6306 result = XML_ERROR_NO_MEMORY; 6307 goto endEntityValue; 6308 } 6309 break; 6310 case XML_TOK_TRAILING_CR: 6311 next = entityTextPtr + enc->minBytesPerChar; 6312 /* fall through */ 6313 case XML_TOK_DATA_NEWLINE: 6314 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6315 result = XML_ERROR_NO_MEMORY; 6316 goto endEntityValue; 6317 } 6318 *(pool->ptr)++ = 0xA; 6319 break; 6320 case XML_TOK_CHAR_REF: { 6321 XML_Char buf[XML_ENCODE_MAX]; 6322 int i; 6323 int n = XmlCharRefNumber(enc, entityTextPtr); 6324 if (n < 0) { 6325 if (enc == parser->m_encoding) 6326 parser->m_eventPtr = entityTextPtr; 6327 result = XML_ERROR_BAD_CHAR_REF; 6328 goto endEntityValue; 6329 } 6330 n = XmlEncode(n, (ICHAR *)buf); 6331 /* The XmlEncode() functions can never return 0 here. That 6332 * error return happens if the code point passed in is either 6333 * negative or greater than or equal to 0x110000. The 6334 * XmlCharRefNumber() functions will all return a number 6335 * strictly less than 0x110000 or a negative value if an error 6336 * occurred. The negative value is intercepted above, so 6337 * XmlEncode() is never passed a value it might return an 6338 * error for. 6339 */ 6340 for (i = 0; i < n; i++) { 6341 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6342 result = XML_ERROR_NO_MEMORY; 6343 goto endEntityValue; 6344 } 6345 *(pool->ptr)++ = buf[i]; 6346 } 6347 } break; 6348 case XML_TOK_PARTIAL: 6349 if (enc == parser->m_encoding) 6350 parser->m_eventPtr = entityTextPtr; 6351 result = XML_ERROR_INVALID_TOKEN; 6352 goto endEntityValue; 6353 case XML_TOK_INVALID: 6354 if (enc == parser->m_encoding) 6355 parser->m_eventPtr = next; 6356 result = XML_ERROR_INVALID_TOKEN; 6357 goto endEntityValue; 6358 default: 6359 /* This default case should be unnecessary -- all the tokens 6360 * that XmlEntityValueTok() can return have their own explicit 6361 * cases -- but should be retained for safety. We do however 6362 * exclude it from the coverage statistics. 6363 * 6364 * LCOV_EXCL_START 6365 */ 6366 if (enc == parser->m_encoding) 6367 parser->m_eventPtr = entityTextPtr; 6368 result = XML_ERROR_UNEXPECTED_STATE; 6369 goto endEntityValue; 6370 /* LCOV_EXCL_STOP */ 6371 } 6372 entityTextPtr = next; 6373 } 6374 endEntityValue: 6375 # ifdef XML_DTD 6376 parser->m_prologState.inEntityValue = oldInEntityValue; 6377 # endif /* XML_DTD */ 6378 return result; 6379 } 6380 6381 #else /* XML_GE == 0 */ 6382 6383 static enum XML_Error 6384 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { 6385 // This will store "&entity123;" in entity->textPtr 6386 // to end up as "&entity123;" in the handler. 6387 const char *const entity_start = "&"; 6388 const char *const entity_end = ";"; 6389 6390 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool); 6391 if (! poolAppendString(pool, entity_start) 6392 || ! poolAppendString(pool, entity->name) 6393 || ! poolAppendString(pool, entity_end)) { 6394 poolDiscard(pool); 6395 return XML_ERROR_NO_MEMORY; 6396 } 6397 6398 entity->textPtr = poolStart(pool); 6399 entity->textLen = (int)(poolLength(pool)); 6400 poolFinish(pool); 6401 6402 return XML_ERROR_NONE; 6403 } 6404 6405 #endif /* XML_GE == 0 */ 6406 6407 static void FASTCALL 6408 normalizeLines(XML_Char *s) { 6409 XML_Char *p; 6410 for (;; s++) { 6411 if (*s == XML_T('\0')) 6412 return; 6413 if (*s == 0xD) 6414 break; 6415 } 6416 p = s; 6417 do { 6418 if (*s == 0xD) { 6419 *p++ = 0xA; 6420 if (*++s == 0xA) 6421 s++; 6422 } else 6423 *p++ = *s++; 6424 } while (*s); 6425 *p = XML_T('\0'); 6426 } 6427 6428 static int 6429 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 6430 const char *start, const char *end) { 6431 const XML_Char *target; 6432 XML_Char *data; 6433 const char *tem; 6434 if (! parser->m_processingInstructionHandler) { 6435 if (parser->m_defaultHandler) 6436 reportDefault(parser, enc, start, end); 6437 return 1; 6438 } 6439 start += enc->minBytesPerChar * 2; 6440 tem = start + XmlNameLength(enc, start); 6441 target = poolStoreString(&parser->m_tempPool, enc, start, tem); 6442 if (! target) 6443 return 0; 6444 poolFinish(&parser->m_tempPool); 6445 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), 6446 end - enc->minBytesPerChar * 2); 6447 if (! data) 6448 return 0; 6449 normalizeLines(data); 6450 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); 6451 poolClear(&parser->m_tempPool); 6452 return 1; 6453 } 6454 6455 static int 6456 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, 6457 const char *end) { 6458 XML_Char *data; 6459 if (! parser->m_commentHandler) { 6460 if (parser->m_defaultHandler) 6461 reportDefault(parser, enc, start, end); 6462 return 1; 6463 } 6464 data = poolStoreString(&parser->m_tempPool, enc, 6465 start + enc->minBytesPerChar * 4, 6466 end - enc->minBytesPerChar * 3); 6467 if (! data) 6468 return 0; 6469 normalizeLines(data); 6470 parser->m_commentHandler(parser->m_handlerArg, data); 6471 poolClear(&parser->m_tempPool); 6472 return 1; 6473 } 6474 6475 static void 6476 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, 6477 const char *end) { 6478 if (MUST_CONVERT(enc, s)) { 6479 enum XML_Convert_Result convert_res; 6480 const char **eventPP; 6481 const char **eventEndPP; 6482 if (enc == parser->m_encoding) { 6483 eventPP = &parser->m_eventPtr; 6484 eventEndPP = &parser->m_eventEndPtr; 6485 } else { 6486 /* To get here, two things must be true; the parser must be 6487 * using a character encoding that is not the same as the 6488 * encoding passed in, and the encoding passed in must need 6489 * conversion to the internal format (UTF-8 unless XML_UNICODE 6490 * is defined). The only occasions on which the encoding passed 6491 * in is not the same as the parser's encoding are when it is 6492 * the internal encoding (e.g. a previously defined parameter 6493 * entity, already converted to internal format). This by 6494 * definition doesn't need conversion, so the whole branch never 6495 * gets executed. 6496 * 6497 * For safety's sake we don't delete these lines and merely 6498 * exclude them from coverage statistics. 6499 * 6500 * LCOV_EXCL_START 6501 */ 6502 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 6503 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 6504 /* LCOV_EXCL_STOP */ 6505 } 6506 do { 6507 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 6508 convert_res 6509 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 6510 *eventEndPP = s; 6511 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, 6512 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 6513 *eventPP = s; 6514 } while ((convert_res != XML_CONVERT_COMPLETED) 6515 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); 6516 } else 6517 parser->m_defaultHandler( 6518 parser->m_handlerArg, (const XML_Char *)s, 6519 (int)((const XML_Char *)end - (const XML_Char *)s)); 6520 } 6521 6522 static int 6523 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, 6524 XML_Bool isId, const XML_Char *value, XML_Parser parser) { 6525 DEFAULT_ATTRIBUTE *att; 6526 if (value || isId) { 6527 /* The handling of default attributes gets messed up if we have 6528 a default which duplicates a non-default. */ 6529 int i; 6530 for (i = 0; i < type->nDefaultAtts; i++) 6531 if (attId == type->defaultAtts[i].id) 6532 return 1; 6533 if (isId && ! type->idAtt && ! attId->xmlns) 6534 type->idAtt = attId; 6535 } 6536 if (type->nDefaultAtts == type->allocDefaultAtts) { 6537 if (type->allocDefaultAtts == 0) { 6538 type->allocDefaultAtts = 8; 6539 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( 6540 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 6541 if (! type->defaultAtts) { 6542 type->allocDefaultAtts = 0; 6543 return 0; 6544 } 6545 } else { 6546 DEFAULT_ATTRIBUTE *temp; 6547 6548 /* Detect and prevent integer overflow */ 6549 if (type->allocDefaultAtts > INT_MAX / 2) { 6550 return 0; 6551 } 6552 6553 int count = type->allocDefaultAtts * 2; 6554 6555 /* Detect and prevent integer overflow. 6556 * The preprocessor guard addresses the "always false" warning 6557 * from -Wtype-limits on platforms where 6558 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 6559 #if UINT_MAX >= SIZE_MAX 6560 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { 6561 return 0; 6562 } 6563 #endif 6564 6565 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, 6566 (count * sizeof(DEFAULT_ATTRIBUTE))); 6567 if (temp == NULL) 6568 return 0; 6569 type->allocDefaultAtts = count; 6570 type->defaultAtts = temp; 6571 } 6572 } 6573 att = type->defaultAtts + type->nDefaultAtts; 6574 att->id = attId; 6575 att->value = value; 6576 att->isCdata = isCdata; 6577 if (! isCdata) 6578 attId->maybeTokenized = XML_TRUE; 6579 type->nDefaultAtts += 1; 6580 return 1; 6581 } 6582 6583 static int 6584 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { 6585 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6586 const XML_Char *name; 6587 for (name = elementType->name; *name; name++) { 6588 if (*name == XML_T(ASCII_COLON)) { 6589 PREFIX *prefix; 6590 const XML_Char *s; 6591 for (s = elementType->name; s != name; s++) { 6592 if (! poolAppendChar(&dtd->pool, *s)) 6593 return 0; 6594 } 6595 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6596 return 0; 6597 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), 6598 sizeof(PREFIX)); 6599 if (! prefix) 6600 return 0; 6601 if (prefix->name == poolStart(&dtd->pool)) 6602 poolFinish(&dtd->pool); 6603 else 6604 poolDiscard(&dtd->pool); 6605 elementType->prefix = prefix; 6606 break; 6607 } 6608 } 6609 return 1; 6610 } 6611 6612 static ATTRIBUTE_ID * 6613 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, 6614 const char *end) { 6615 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6616 ATTRIBUTE_ID *id; 6617 const XML_Char *name; 6618 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6619 return NULL; 6620 name = poolStoreString(&dtd->pool, enc, start, end); 6621 if (! name) 6622 return NULL; 6623 /* skip quotation mark - its storage will be reused (like in name[-1]) */ 6624 ++name; 6625 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, 6626 sizeof(ATTRIBUTE_ID)); 6627 if (! id) 6628 return NULL; 6629 if (id->name != name) 6630 poolDiscard(&dtd->pool); 6631 else { 6632 poolFinish(&dtd->pool); 6633 if (! parser->m_ns) 6634 ; 6635 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) 6636 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) 6637 && name[4] == XML_T(ASCII_s) 6638 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { 6639 if (name[5] == XML_T('\0')) 6640 id->prefix = &dtd->defaultPrefix; 6641 else 6642 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, 6643 sizeof(PREFIX)); 6644 id->xmlns = XML_TRUE; 6645 } else { 6646 int i; 6647 for (i = 0; name[i]; i++) { 6648 /* attributes without prefix are *not* in the default namespace */ 6649 if (name[i] == XML_T(ASCII_COLON)) { 6650 int j; 6651 for (j = 0; j < i; j++) { 6652 if (! poolAppendChar(&dtd->pool, name[j])) 6653 return NULL; 6654 } 6655 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6656 return NULL; 6657 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, 6658 poolStart(&dtd->pool), sizeof(PREFIX)); 6659 if (! id->prefix) 6660 return NULL; 6661 if (id->prefix->name == poolStart(&dtd->pool)) 6662 poolFinish(&dtd->pool); 6663 else 6664 poolDiscard(&dtd->pool); 6665 break; 6666 } 6667 } 6668 } 6669 } 6670 return id; 6671 } 6672 6673 #define CONTEXT_SEP XML_T(ASCII_FF) 6674 6675 static const XML_Char * 6676 getContext(XML_Parser parser) { 6677 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6678 HASH_TABLE_ITER iter; 6679 XML_Bool needSep = XML_FALSE; 6680 6681 if (dtd->defaultPrefix.binding) { 6682 int i; 6683 int len; 6684 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 6685 return NULL; 6686 len = dtd->defaultPrefix.binding->uriLen; 6687 if (parser->m_namespaceSeparator) 6688 len--; 6689 for (i = 0; i < len; i++) { 6690 if (! poolAppendChar(&parser->m_tempPool, 6691 dtd->defaultPrefix.binding->uri[i])) { 6692 /* Because of memory caching, I don't believe this line can be 6693 * executed. 6694 * 6695 * This is part of a loop copying the default prefix binding 6696 * URI into the parser's temporary string pool. Previously, 6697 * that URI was copied into the same string pool, with a 6698 * terminating NUL character, as part of setContext(). When 6699 * the pool was cleared, that leaves a block definitely big 6700 * enough to hold the URI on the free block list of the pool. 6701 * The URI copy in getContext() therefore cannot run out of 6702 * memory. 6703 * 6704 * If the pool is used between the setContext() and 6705 * getContext() calls, the worst it can do is leave a bigger 6706 * block on the front of the free list. Given that this is 6707 * all somewhat inobvious and program logic can be changed, we 6708 * don't delete the line but we do exclude it from the test 6709 * coverage statistics. 6710 */ 6711 return NULL; /* LCOV_EXCL_LINE */ 6712 } 6713 } 6714 needSep = XML_TRUE; 6715 } 6716 6717 hashTableIterInit(&iter, &(dtd->prefixes)); 6718 for (;;) { 6719 int i; 6720 int len; 6721 const XML_Char *s; 6722 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); 6723 if (! prefix) 6724 break; 6725 if (! prefix->binding) { 6726 /* This test appears to be (justifiable) paranoia. There does 6727 * not seem to be a way of injecting a prefix without a binding 6728 * that doesn't get errored long before this function is called. 6729 * The test should remain for safety's sake, so we instead 6730 * exclude the following line from the coverage statistics. 6731 */ 6732 continue; /* LCOV_EXCL_LINE */ 6733 } 6734 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 6735 return NULL; 6736 for (s = prefix->name; *s; s++) 6737 if (! poolAppendChar(&parser->m_tempPool, *s)) 6738 return NULL; 6739 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 6740 return NULL; 6741 len = prefix->binding->uriLen; 6742 if (parser->m_namespaceSeparator) 6743 len--; 6744 for (i = 0; i < len; i++) 6745 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) 6746 return NULL; 6747 needSep = XML_TRUE; 6748 } 6749 6750 hashTableIterInit(&iter, &(dtd->generalEntities)); 6751 for (;;) { 6752 const XML_Char *s; 6753 ENTITY *e = (ENTITY *)hashTableIterNext(&iter); 6754 if (! e) 6755 break; 6756 if (! e->open) 6757 continue; 6758 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 6759 return NULL; 6760 for (s = e->name; *s; s++) 6761 if (! poolAppendChar(&parser->m_tempPool, *s)) 6762 return 0; 6763 needSep = XML_TRUE; 6764 } 6765 6766 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6767 return NULL; 6768 return parser->m_tempPool.start; 6769 } 6770 6771 static XML_Bool 6772 setContext(XML_Parser parser, const XML_Char *context) { 6773 if (context == NULL) { 6774 return XML_FALSE; 6775 } 6776 6777 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6778 const XML_Char *s = context; 6779 6780 while (*context != XML_T('\0')) { 6781 if (*s == CONTEXT_SEP || *s == XML_T('\0')) { 6782 ENTITY *e; 6783 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6784 return XML_FALSE; 6785 e = (ENTITY *)lookup(parser, &dtd->generalEntities, 6786 poolStart(&parser->m_tempPool), 0); 6787 if (e) 6788 e->open = XML_TRUE; 6789 if (*s != XML_T('\0')) 6790 s++; 6791 context = s; 6792 poolDiscard(&parser->m_tempPool); 6793 } else if (*s == XML_T(ASCII_EQUALS)) { 6794 PREFIX *prefix; 6795 if (poolLength(&parser->m_tempPool) == 0) 6796 prefix = &dtd->defaultPrefix; 6797 else { 6798 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6799 return XML_FALSE; 6800 prefix 6801 = (PREFIX *)lookup(parser, &dtd->prefixes, 6802 poolStart(&parser->m_tempPool), sizeof(PREFIX)); 6803 if (! prefix) 6804 return XML_FALSE; 6805 if (prefix->name == poolStart(&parser->m_tempPool)) { 6806 prefix->name = poolCopyString(&dtd->pool, prefix->name); 6807 if (! prefix->name) 6808 return XML_FALSE; 6809 } 6810 poolDiscard(&parser->m_tempPool); 6811 } 6812 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); 6813 context++) 6814 if (! poolAppendChar(&parser->m_tempPool, *context)) 6815 return XML_FALSE; 6816 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6817 return XML_FALSE; 6818 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), 6819 &parser->m_inheritedBindings) 6820 != XML_ERROR_NONE) 6821 return XML_FALSE; 6822 poolDiscard(&parser->m_tempPool); 6823 if (*context != XML_T('\0')) 6824 ++context; 6825 s = context; 6826 } else { 6827 if (! poolAppendChar(&parser->m_tempPool, *s)) 6828 return XML_FALSE; 6829 s++; 6830 } 6831 } 6832 return XML_TRUE; 6833 } 6834 6835 static void FASTCALL 6836 normalizePublicId(XML_Char *publicId) { 6837 XML_Char *p = publicId; 6838 XML_Char *s; 6839 for (s = publicId; *s; s++) { 6840 switch (*s) { 6841 case 0x20: 6842 case 0xD: 6843 case 0xA: 6844 if (p != publicId && p[-1] != 0x20) 6845 *p++ = 0x20; 6846 break; 6847 default: 6848 *p++ = *s; 6849 } 6850 } 6851 if (p != publicId && p[-1] == 0x20) 6852 --p; 6853 *p = XML_T('\0'); 6854 } 6855 6856 static DTD * 6857 dtdCreate(const XML_Memory_Handling_Suite *ms) { 6858 DTD *p = ms->malloc_fcn(sizeof(DTD)); 6859 if (p == NULL) 6860 return p; 6861 poolInit(&(p->pool), ms); 6862 poolInit(&(p->entityValuePool), ms); 6863 hashTableInit(&(p->generalEntities), ms); 6864 hashTableInit(&(p->elementTypes), ms); 6865 hashTableInit(&(p->attributeIds), ms); 6866 hashTableInit(&(p->prefixes), ms); 6867 #ifdef XML_DTD 6868 p->paramEntityRead = XML_FALSE; 6869 hashTableInit(&(p->paramEntities), ms); 6870 #endif /* XML_DTD */ 6871 p->defaultPrefix.name = NULL; 6872 p->defaultPrefix.binding = NULL; 6873 6874 p->in_eldecl = XML_FALSE; 6875 p->scaffIndex = NULL; 6876 p->scaffold = NULL; 6877 p->scaffLevel = 0; 6878 p->scaffSize = 0; 6879 p->scaffCount = 0; 6880 p->contentStringLen = 0; 6881 6882 p->keepProcessing = XML_TRUE; 6883 p->hasParamEntityRefs = XML_FALSE; 6884 p->standalone = XML_FALSE; 6885 return p; 6886 } 6887 6888 static void 6889 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { 6890 HASH_TABLE_ITER iter; 6891 hashTableIterInit(&iter, &(p->elementTypes)); 6892 for (;;) { 6893 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 6894 if (! e) 6895 break; 6896 if (e->allocDefaultAtts != 0) 6897 ms->free_fcn(e->defaultAtts); 6898 } 6899 hashTableClear(&(p->generalEntities)); 6900 #ifdef XML_DTD 6901 p->paramEntityRead = XML_FALSE; 6902 hashTableClear(&(p->paramEntities)); 6903 #endif /* XML_DTD */ 6904 hashTableClear(&(p->elementTypes)); 6905 hashTableClear(&(p->attributeIds)); 6906 hashTableClear(&(p->prefixes)); 6907 poolClear(&(p->pool)); 6908 poolClear(&(p->entityValuePool)); 6909 p->defaultPrefix.name = NULL; 6910 p->defaultPrefix.binding = NULL; 6911 6912 p->in_eldecl = XML_FALSE; 6913 6914 ms->free_fcn(p->scaffIndex); 6915 p->scaffIndex = NULL; 6916 ms->free_fcn(p->scaffold); 6917 p->scaffold = NULL; 6918 6919 p->scaffLevel = 0; 6920 p->scaffSize = 0; 6921 p->scaffCount = 0; 6922 p->contentStringLen = 0; 6923 6924 p->keepProcessing = XML_TRUE; 6925 p->hasParamEntityRefs = XML_FALSE; 6926 p->standalone = XML_FALSE; 6927 } 6928 6929 static void 6930 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { 6931 HASH_TABLE_ITER iter; 6932 hashTableIterInit(&iter, &(p->elementTypes)); 6933 for (;;) { 6934 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 6935 if (! e) 6936 break; 6937 if (e->allocDefaultAtts != 0) 6938 ms->free_fcn(e->defaultAtts); 6939 } 6940 hashTableDestroy(&(p->generalEntities)); 6941 #ifdef XML_DTD 6942 hashTableDestroy(&(p->paramEntities)); 6943 #endif /* XML_DTD */ 6944 hashTableDestroy(&(p->elementTypes)); 6945 hashTableDestroy(&(p->attributeIds)); 6946 hashTableDestroy(&(p->prefixes)); 6947 poolDestroy(&(p->pool)); 6948 poolDestroy(&(p->entityValuePool)); 6949 if (isDocEntity) { 6950 ms->free_fcn(p->scaffIndex); 6951 ms->free_fcn(p->scaffold); 6952 } 6953 ms->free_fcn(p); 6954 } 6955 6956 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. 6957 The new DTD has already been initialized. 6958 */ 6959 static int 6960 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 6961 const XML_Memory_Handling_Suite *ms) { 6962 HASH_TABLE_ITER iter; 6963 6964 /* Copy the prefix table. */ 6965 6966 hashTableIterInit(&iter, &(oldDtd->prefixes)); 6967 for (;;) { 6968 const XML_Char *name; 6969 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); 6970 if (! oldP) 6971 break; 6972 name = poolCopyString(&(newDtd->pool), oldP->name); 6973 if (! name) 6974 return 0; 6975 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) 6976 return 0; 6977 } 6978 6979 hashTableIterInit(&iter, &(oldDtd->attributeIds)); 6980 6981 /* Copy the attribute id table. */ 6982 6983 for (;;) { 6984 ATTRIBUTE_ID *newA; 6985 const XML_Char *name; 6986 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); 6987 6988 if (! oldA) 6989 break; 6990 /* Remember to allocate the scratch byte before the name. */ 6991 if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) 6992 return 0; 6993 name = poolCopyString(&(newDtd->pool), oldA->name); 6994 if (! name) 6995 return 0; 6996 ++name; 6997 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, 6998 sizeof(ATTRIBUTE_ID)); 6999 if (! newA) 7000 return 0; 7001 newA->maybeTokenized = oldA->maybeTokenized; 7002 if (oldA->prefix) { 7003 newA->xmlns = oldA->xmlns; 7004 if (oldA->prefix == &oldDtd->defaultPrefix) 7005 newA->prefix = &newDtd->defaultPrefix; 7006 else 7007 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7008 oldA->prefix->name, 0); 7009 } 7010 } 7011 7012 /* Copy the element type table. */ 7013 7014 hashTableIterInit(&iter, &(oldDtd->elementTypes)); 7015 7016 for (;;) { 7017 int i; 7018 ELEMENT_TYPE *newE; 7019 const XML_Char *name; 7020 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7021 if (! oldE) 7022 break; 7023 name = poolCopyString(&(newDtd->pool), oldE->name); 7024 if (! name) 7025 return 0; 7026 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, 7027 sizeof(ELEMENT_TYPE)); 7028 if (! newE) 7029 return 0; 7030 if (oldE->nDefaultAtts) { 7031 /* Detect and prevent integer overflow. 7032 * The preprocessor guard addresses the "always false" warning 7033 * from -Wtype-limits on platforms where 7034 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ 7035 #if UINT_MAX >= SIZE_MAX 7036 if ((size_t)oldE->nDefaultAtts 7037 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { 7038 return 0; 7039 } 7040 #endif 7041 newE->defaultAtts 7042 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 7043 if (! newE->defaultAtts) { 7044 return 0; 7045 } 7046 } 7047 if (oldE->idAtt) 7048 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), 7049 oldE->idAtt->name, 0); 7050 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; 7051 if (oldE->prefix) 7052 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7053 oldE->prefix->name, 0); 7054 for (i = 0; i < newE->nDefaultAtts; i++) { 7055 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( 7056 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); 7057 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; 7058 if (oldE->defaultAtts[i].value) { 7059 newE->defaultAtts[i].value 7060 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); 7061 if (! newE->defaultAtts[i].value) 7062 return 0; 7063 } else 7064 newE->defaultAtts[i].value = NULL; 7065 } 7066 } 7067 7068 /* Copy the entity tables. */ 7069 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), 7070 &(oldDtd->generalEntities))) 7071 return 0; 7072 7073 #ifdef XML_DTD 7074 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), 7075 &(oldDtd->paramEntities))) 7076 return 0; 7077 newDtd->paramEntityRead = oldDtd->paramEntityRead; 7078 #endif /* XML_DTD */ 7079 7080 newDtd->keepProcessing = oldDtd->keepProcessing; 7081 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; 7082 newDtd->standalone = oldDtd->standalone; 7083 7084 /* Don't want deep copying for scaffolding */ 7085 newDtd->in_eldecl = oldDtd->in_eldecl; 7086 newDtd->scaffold = oldDtd->scaffold; 7087 newDtd->contentStringLen = oldDtd->contentStringLen; 7088 newDtd->scaffSize = oldDtd->scaffSize; 7089 newDtd->scaffLevel = oldDtd->scaffLevel; 7090 newDtd->scaffIndex = oldDtd->scaffIndex; 7091 7092 return 1; 7093 } /* End dtdCopy */ 7094 7095 static int 7096 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 7097 STRING_POOL *newPool, const HASH_TABLE *oldTable) { 7098 HASH_TABLE_ITER iter; 7099 const XML_Char *cachedOldBase = NULL; 7100 const XML_Char *cachedNewBase = NULL; 7101 7102 hashTableIterInit(&iter, oldTable); 7103 7104 for (;;) { 7105 ENTITY *newE; 7106 const XML_Char *name; 7107 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); 7108 if (! oldE) 7109 break; 7110 name = poolCopyString(newPool, oldE->name); 7111 if (! name) 7112 return 0; 7113 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); 7114 if (! newE) 7115 return 0; 7116 if (oldE->systemId) { 7117 const XML_Char *tem = poolCopyString(newPool, oldE->systemId); 7118 if (! tem) 7119 return 0; 7120 newE->systemId = tem; 7121 if (oldE->base) { 7122 if (oldE->base == cachedOldBase) 7123 newE->base = cachedNewBase; 7124 else { 7125 cachedOldBase = oldE->base; 7126 tem = poolCopyString(newPool, cachedOldBase); 7127 if (! tem) 7128 return 0; 7129 cachedNewBase = newE->base = tem; 7130 } 7131 } 7132 if (oldE->publicId) { 7133 tem = poolCopyString(newPool, oldE->publicId); 7134 if (! tem) 7135 return 0; 7136 newE->publicId = tem; 7137 } 7138 } else { 7139 const XML_Char *tem 7140 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); 7141 if (! tem) 7142 return 0; 7143 newE->textPtr = tem; 7144 newE->textLen = oldE->textLen; 7145 } 7146 if (oldE->notation) { 7147 const XML_Char *tem = poolCopyString(newPool, oldE->notation); 7148 if (! tem) 7149 return 0; 7150 newE->notation = tem; 7151 } 7152 newE->is_param = oldE->is_param; 7153 newE->is_internal = oldE->is_internal; 7154 } 7155 return 1; 7156 } 7157 7158 #define INIT_POWER 6 7159 7160 static XML_Bool FASTCALL 7161 keyeq(KEY s1, KEY s2) { 7162 for (; *s1 == *s2; s1++, s2++) 7163 if (*s1 == 0) 7164 return XML_TRUE; 7165 return XML_FALSE; 7166 } 7167 7168 static size_t 7169 keylen(KEY s) { 7170 size_t len = 0; 7171 for (; *s; s++, len++) 7172 ; 7173 return len; 7174 } 7175 7176 static void 7177 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { 7178 key->k[0] = 0; 7179 key->k[1] = get_hash_secret_salt(parser); 7180 } 7181 7182 static unsigned long FASTCALL 7183 hash(XML_Parser parser, KEY s) { 7184 struct siphash state; 7185 struct sipkey key; 7186 (void)sip24_valid; 7187 copy_salt_to_sipkey(parser, &key); 7188 sip24_init(&state, &key); 7189 sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); 7190 return (unsigned long)sip24_final(&state); 7191 } 7192 7193 static NAMED * 7194 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { 7195 size_t i; 7196 if (table->size == 0) { 7197 size_t tsize; 7198 if (! createSize) 7199 return NULL; 7200 table->power = INIT_POWER; 7201 /* table->size is a power of 2 */ 7202 table->size = (size_t)1 << INIT_POWER; 7203 tsize = table->size * sizeof(NAMED *); 7204 table->v = table->mem->malloc_fcn(tsize); 7205 if (! table->v) { 7206 table->size = 0; 7207 return NULL; 7208 } 7209 memset(table->v, 0, tsize); 7210 i = hash(parser, name) & ((unsigned long)table->size - 1); 7211 } else { 7212 unsigned long h = hash(parser, name); 7213 unsigned long mask = (unsigned long)table->size - 1; 7214 unsigned char step = 0; 7215 i = h & mask; 7216 while (table->v[i]) { 7217 if (keyeq(name, table->v[i]->name)) 7218 return table->v[i]; 7219 if (! step) 7220 step = PROBE_STEP(h, mask, table->power); 7221 i < step ? (i += table->size - step) : (i -= step); 7222 } 7223 if (! createSize) 7224 return NULL; 7225 7226 /* check for overflow (table is half full) */ 7227 if (table->used >> (table->power - 1)) { 7228 unsigned char newPower = table->power + 1; 7229 7230 /* Detect and prevent invalid shift */ 7231 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { 7232 return NULL; 7233 } 7234 7235 size_t newSize = (size_t)1 << newPower; 7236 unsigned long newMask = (unsigned long)newSize - 1; 7237 7238 /* Detect and prevent integer overflow */ 7239 if (newSize > (size_t)(-1) / sizeof(NAMED *)) { 7240 return NULL; 7241 } 7242 7243 size_t tsize = newSize * sizeof(NAMED *); 7244 NAMED **newV = table->mem->malloc_fcn(tsize); 7245 if (! newV) 7246 return NULL; 7247 memset(newV, 0, tsize); 7248 for (i = 0; i < table->size; i++) 7249 if (table->v[i]) { 7250 unsigned long newHash = hash(parser, table->v[i]->name); 7251 size_t j = newHash & newMask; 7252 step = 0; 7253 while (newV[j]) { 7254 if (! step) 7255 step = PROBE_STEP(newHash, newMask, newPower); 7256 j < step ? (j += newSize - step) : (j -= step); 7257 } 7258 newV[j] = table->v[i]; 7259 } 7260 table->mem->free_fcn(table->v); 7261 table->v = newV; 7262 table->power = newPower; 7263 table->size = newSize; 7264 i = h & newMask; 7265 step = 0; 7266 while (table->v[i]) { 7267 if (! step) 7268 step = PROBE_STEP(h, newMask, newPower); 7269 i < step ? (i += newSize - step) : (i -= step); 7270 } 7271 } 7272 } 7273 table->v[i] = table->mem->malloc_fcn(createSize); 7274 if (! table->v[i]) 7275 return NULL; 7276 memset(table->v[i], 0, createSize); 7277 table->v[i]->name = name; 7278 (table->used)++; 7279 return table->v[i]; 7280 } 7281 7282 static void FASTCALL 7283 hashTableClear(HASH_TABLE *table) { 7284 size_t i; 7285 for (i = 0; i < table->size; i++) { 7286 table->mem->free_fcn(table->v[i]); 7287 table->v[i] = NULL; 7288 } 7289 table->used = 0; 7290 } 7291 7292 static void FASTCALL 7293 hashTableDestroy(HASH_TABLE *table) { 7294 size_t i; 7295 for (i = 0; i < table->size; i++) 7296 table->mem->free_fcn(table->v[i]); 7297 table->mem->free_fcn(table->v); 7298 } 7299 7300 static void FASTCALL 7301 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { 7302 p->power = 0; 7303 p->size = 0; 7304 p->used = 0; 7305 p->v = NULL; 7306 p->mem = ms; 7307 } 7308 7309 static void FASTCALL 7310 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { 7311 iter->p = table->v; 7312 iter->end = iter->p ? iter->p + table->size : NULL; 7313 } 7314 7315 static NAMED *FASTCALL 7316 hashTableIterNext(HASH_TABLE_ITER *iter) { 7317 while (iter->p != iter->end) { 7318 NAMED *tem = *(iter->p)++; 7319 if (tem) 7320 return tem; 7321 } 7322 return NULL; 7323 } 7324 7325 static void FASTCALL 7326 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { 7327 pool->blocks = NULL; 7328 pool->freeBlocks = NULL; 7329 pool->start = NULL; 7330 pool->ptr = NULL; 7331 pool->end = NULL; 7332 pool->mem = ms; 7333 } 7334 7335 static void FASTCALL 7336 poolClear(STRING_POOL *pool) { 7337 if (! pool->freeBlocks) 7338 pool->freeBlocks = pool->blocks; 7339 else { 7340 BLOCK *p = pool->blocks; 7341 while (p) { 7342 BLOCK *tem = p->next; 7343 p->next = pool->freeBlocks; 7344 pool->freeBlocks = p; 7345 p = tem; 7346 } 7347 } 7348 pool->blocks = NULL; 7349 pool->start = NULL; 7350 pool->ptr = NULL; 7351 pool->end = NULL; 7352 } 7353 7354 static void FASTCALL 7355 poolDestroy(STRING_POOL *pool) { 7356 BLOCK *p = pool->blocks; 7357 while (p) { 7358 BLOCK *tem = p->next; 7359 pool->mem->free_fcn(p); 7360 p = tem; 7361 } 7362 p = pool->freeBlocks; 7363 while (p) { 7364 BLOCK *tem = p->next; 7365 pool->mem->free_fcn(p); 7366 p = tem; 7367 } 7368 } 7369 7370 static XML_Char * 7371 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 7372 const char *end) { 7373 if (! pool->ptr && ! poolGrow(pool)) 7374 return NULL; 7375 for (;;) { 7376 const enum XML_Convert_Result convert_res = XmlConvert( 7377 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end); 7378 if ((convert_res == XML_CONVERT_COMPLETED) 7379 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 7380 break; 7381 if (! poolGrow(pool)) 7382 return NULL; 7383 } 7384 return pool->start; 7385 } 7386 7387 static const XML_Char *FASTCALL 7388 poolCopyString(STRING_POOL *pool, const XML_Char *s) { 7389 do { 7390 if (! poolAppendChar(pool, *s)) 7391 return NULL; 7392 } while (*s++); 7393 s = pool->start; 7394 poolFinish(pool); 7395 return s; 7396 } 7397 7398 static const XML_Char * 7399 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { 7400 if (! pool->ptr && ! poolGrow(pool)) { 7401 /* The following line is unreachable given the current usage of 7402 * poolCopyStringN(). Currently it is called from exactly one 7403 * place to copy the text of a simple general entity. By that 7404 * point, the name of the entity is already stored in the pool, so 7405 * pool->ptr cannot be NULL. 7406 * 7407 * If poolCopyStringN() is used elsewhere as it well might be, 7408 * this line may well become executable again. Regardless, this 7409 * sort of check shouldn't be removed lightly, so we just exclude 7410 * it from the coverage statistics. 7411 */ 7412 return NULL; /* LCOV_EXCL_LINE */ 7413 } 7414 for (; n > 0; --n, s++) { 7415 if (! poolAppendChar(pool, *s)) 7416 return NULL; 7417 } 7418 s = pool->start; 7419 poolFinish(pool); 7420 return s; 7421 } 7422 7423 static const XML_Char *FASTCALL 7424 poolAppendString(STRING_POOL *pool, const XML_Char *s) { 7425 while (*s) { 7426 if (! poolAppendChar(pool, *s)) 7427 return NULL; 7428 s++; 7429 } 7430 return pool->start; 7431 } 7432 7433 static XML_Char * 7434 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 7435 const char *end) { 7436 if (! poolAppend(pool, enc, ptr, end)) 7437 return NULL; 7438 if (pool->ptr == pool->end && ! poolGrow(pool)) 7439 return NULL; 7440 *(pool->ptr)++ = 0; 7441 return pool->start; 7442 } 7443 7444 static size_t 7445 poolBytesToAllocateFor(int blockSize) { 7446 /* Unprotected math would be: 7447 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); 7448 ** 7449 ** Detect overflow, avoiding _signed_ overflow undefined behavior 7450 ** For a + b * c we check b * c in isolation first, so that addition of a 7451 ** on top has no chance of making us accept a small non-negative number 7452 */ 7453 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ 7454 7455 if (blockSize <= 0) 7456 return 0; 7457 7458 if (blockSize > (int)(INT_MAX / stretch)) 7459 return 0; 7460 7461 { 7462 const int stretchedBlockSize = blockSize * (int)stretch; 7463 const int bytesToAllocate 7464 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); 7465 if (bytesToAllocate < 0) 7466 return 0; 7467 7468 return (size_t)bytesToAllocate; 7469 } 7470 } 7471 7472 static XML_Bool FASTCALL 7473 poolGrow(STRING_POOL *pool) { 7474 if (pool->freeBlocks) { 7475 if (pool->start == 0) { 7476 pool->blocks = pool->freeBlocks; 7477 pool->freeBlocks = pool->freeBlocks->next; 7478 pool->blocks->next = NULL; 7479 pool->start = pool->blocks->s; 7480 pool->end = pool->start + pool->blocks->size; 7481 pool->ptr = pool->start; 7482 return XML_TRUE; 7483 } 7484 if (pool->end - pool->start < pool->freeBlocks->size) { 7485 BLOCK *tem = pool->freeBlocks->next; 7486 pool->freeBlocks->next = pool->blocks; 7487 pool->blocks = pool->freeBlocks; 7488 pool->freeBlocks = tem; 7489 memcpy(pool->blocks->s, pool->start, 7490 (pool->end - pool->start) * sizeof(XML_Char)); 7491 pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 7492 pool->start = pool->blocks->s; 7493 pool->end = pool->start + pool->blocks->size; 7494 return XML_TRUE; 7495 } 7496 } 7497 if (pool->blocks && pool->start == pool->blocks->s) { 7498 BLOCK *temp; 7499 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); 7500 size_t bytesToAllocate; 7501 7502 /* NOTE: Needs to be calculated prior to calling `realloc` 7503 to avoid dangling pointers: */ 7504 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; 7505 7506 if (blockSize < 0) { 7507 /* This condition traps a situation where either more than 7508 * INT_MAX/2 bytes have already been allocated. This isn't 7509 * readily testable, since it is unlikely that an average 7510 * machine will have that much memory, so we exclude it from the 7511 * coverage statistics. 7512 */ 7513 return XML_FALSE; /* LCOV_EXCL_LINE */ 7514 } 7515 7516 bytesToAllocate = poolBytesToAllocateFor(blockSize); 7517 if (bytesToAllocate == 0) 7518 return XML_FALSE; 7519 7520 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, 7521 (unsigned)bytesToAllocate); 7522 if (temp == NULL) 7523 return XML_FALSE; 7524 pool->blocks = temp; 7525 pool->blocks->size = blockSize; 7526 pool->ptr = pool->blocks->s + offsetInsideBlock; 7527 pool->start = pool->blocks->s; 7528 pool->end = pool->start + blockSize; 7529 } else { 7530 BLOCK *tem; 7531 int blockSize = (int)(pool->end - pool->start); 7532 size_t bytesToAllocate; 7533 7534 if (blockSize < 0) { 7535 /* This condition traps a situation where either more than 7536 * INT_MAX bytes have already been allocated (which is prevented 7537 * by various pieces of program logic, not least this one, never 7538 * mind the unlikelihood of actually having that much memory) or 7539 * the pool control fields have been corrupted (which could 7540 * conceivably happen in an extremely buggy user handler 7541 * function). Either way it isn't readily testable, so we 7542 * exclude it from the coverage statistics. 7543 */ 7544 return XML_FALSE; /* LCOV_EXCL_LINE */ 7545 } 7546 7547 if (blockSize < INIT_BLOCK_SIZE) 7548 blockSize = INIT_BLOCK_SIZE; 7549 else { 7550 /* Detect overflow, avoiding _signed_ overflow undefined behavior */ 7551 if ((int)((unsigned)blockSize * 2U) < 0) { 7552 return XML_FALSE; 7553 } 7554 blockSize *= 2; 7555 } 7556 7557 bytesToAllocate = poolBytesToAllocateFor(blockSize); 7558 if (bytesToAllocate == 0) 7559 return XML_FALSE; 7560 7561 tem = pool->mem->malloc_fcn(bytesToAllocate); 7562 if (! tem) 7563 return XML_FALSE; 7564 tem->size = blockSize; 7565 tem->next = pool->blocks; 7566 pool->blocks = tem; 7567 if (pool->ptr != pool->start) 7568 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); 7569 pool->ptr = tem->s + (pool->ptr - pool->start); 7570 pool->start = tem->s; 7571 pool->end = tem->s + blockSize; 7572 } 7573 return XML_TRUE; 7574 } 7575 7576 static int FASTCALL 7577 nextScaffoldPart(XML_Parser parser) { 7578 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7579 CONTENT_SCAFFOLD *me; 7580 int next; 7581 7582 if (! dtd->scaffIndex) { 7583 /* Detect and prevent integer overflow. 7584 * The preprocessor guard addresses the "always false" warning 7585 * from -Wtype-limits on platforms where 7586 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7587 #if UINT_MAX >= SIZE_MAX 7588 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { 7589 return -1; 7590 } 7591 #endif 7592 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); 7593 if (! dtd->scaffIndex) 7594 return -1; 7595 dtd->scaffIndex[0] = 0; 7596 } 7597 7598 if (dtd->scaffCount >= dtd->scaffSize) { 7599 CONTENT_SCAFFOLD *temp; 7600 if (dtd->scaffold) { 7601 /* Detect and prevent integer overflow */ 7602 if (dtd->scaffSize > UINT_MAX / 2u) { 7603 return -1; 7604 } 7605 /* Detect and prevent integer overflow. 7606 * The preprocessor guard addresses the "always false" warning 7607 * from -Wtype-limits on platforms where 7608 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7609 #if UINT_MAX >= SIZE_MAX 7610 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { 7611 return -1; 7612 } 7613 #endif 7614 7615 temp = (CONTENT_SCAFFOLD *)REALLOC( 7616 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); 7617 if (temp == NULL) 7618 return -1; 7619 dtd->scaffSize *= 2; 7620 } else { 7621 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS 7622 * sizeof(CONTENT_SCAFFOLD)); 7623 if (temp == NULL) 7624 return -1; 7625 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; 7626 } 7627 dtd->scaffold = temp; 7628 } 7629 next = dtd->scaffCount++; 7630 me = &dtd->scaffold[next]; 7631 if (dtd->scaffLevel) { 7632 CONTENT_SCAFFOLD *parent 7633 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; 7634 if (parent->lastchild) { 7635 dtd->scaffold[parent->lastchild].nextsib = next; 7636 } 7637 if (! parent->childcnt) 7638 parent->firstchild = next; 7639 parent->lastchild = next; 7640 parent->childcnt++; 7641 } 7642 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; 7643 return next; 7644 } 7645 7646 static XML_Content * 7647 build_model(XML_Parser parser) { 7648 /* Function build_model transforms the existing parser->m_dtd->scaffold 7649 * array of CONTENT_SCAFFOLD tree nodes into a new array of 7650 * XML_Content tree nodes followed by a gapless list of zero-terminated 7651 * strings. */ 7652 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7653 XML_Content *ret; 7654 XML_Char *str; /* the current string writing location */ 7655 7656 /* Detect and prevent integer overflow. 7657 * The preprocessor guard addresses the "always false" warning 7658 * from -Wtype-limits on platforms where 7659 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7660 #if UINT_MAX >= SIZE_MAX 7661 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { 7662 return NULL; 7663 } 7664 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { 7665 return NULL; 7666 } 7667 #endif 7668 if (dtd->scaffCount * sizeof(XML_Content) 7669 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { 7670 return NULL; 7671 } 7672 7673 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) 7674 + (dtd->contentStringLen * sizeof(XML_Char))); 7675 7676 ret = (XML_Content *)MALLOC(parser, allocsize); 7677 if (! ret) 7678 return NULL; 7679 7680 /* What follows is an iterative implementation (of what was previously done 7681 * recursively in a dedicated function called "build_node". The old recursive 7682 * build_node could be forced into stack exhaustion from input as small as a 7683 * few megabyte, and so that was a security issue. Hence, a function call 7684 * stack is avoided now by resolving recursion.) 7685 * 7686 * The iterative approach works as follows: 7687 * 7688 * - We have two writing pointers, both walking up the result array; one does 7689 * the work, the other creates "jobs" for its colleague to do, and leads 7690 * the way: 7691 * 7692 * - The faster one, pointer jobDest, always leads and writes "what job 7693 * to do" by the other, once they reach that place in the 7694 * array: leader "jobDest" stores the source node array index (relative 7695 * to array dtd->scaffold) in field "numchildren". 7696 * 7697 * - The slower one, pointer dest, looks at the value stored in the 7698 * "numchildren" field (which actually holds a source node array index 7699 * at that time) and puts the real data from dtd->scaffold in. 7700 * 7701 * - Before the loop starts, jobDest writes source array index 0 7702 * (where the root node is located) so that dest will have something to do 7703 * when it starts operation. 7704 * 7705 * - Whenever nodes with children are encountered, jobDest appends 7706 * them as new jobs, in order. As a result, tree node siblings are 7707 * adjacent in the resulting array, for example: 7708 * 7709 * [0] root, has two children 7710 * [1] first child of 0, has three children 7711 * [3] first child of 1, does not have children 7712 * [4] second child of 1, does not have children 7713 * [5] third child of 1, does not have children 7714 * [2] second child of 0, does not have children 7715 * 7716 * Or (the same data) presented in flat array view: 7717 * 7718 * [0] root, has two children 7719 * 7720 * [1] first child of 0, has three children 7721 * [2] second child of 0, does not have children 7722 * 7723 * [3] first child of 1, does not have children 7724 * [4] second child of 1, does not have children 7725 * [5] third child of 1, does not have children 7726 * 7727 * - The algorithm repeats until all target array indices have been processed. 7728 */ 7729 XML_Content *dest = ret; /* tree node writing location, moves upwards */ 7730 XML_Content *const destLimit = &ret[dtd->scaffCount]; 7731 XML_Content *jobDest = ret; /* next free writing location in target array */ 7732 str = (XML_Char *)&ret[dtd->scaffCount]; 7733 7734 /* Add the starting job, the root node (index 0) of the source tree */ 7735 (jobDest++)->numchildren = 0; 7736 7737 for (; dest < destLimit; dest++) { 7738 /* Retrieve source tree array index from job storage */ 7739 const int src_node = (int)dest->numchildren; 7740 7741 /* Convert item */ 7742 dest->type = dtd->scaffold[src_node].type; 7743 dest->quant = dtd->scaffold[src_node].quant; 7744 if (dest->type == XML_CTYPE_NAME) { 7745 const XML_Char *src; 7746 dest->name = str; 7747 src = dtd->scaffold[src_node].name; 7748 for (;;) { 7749 *str++ = *src; 7750 if (! *src) 7751 break; 7752 src++; 7753 } 7754 dest->numchildren = 0; 7755 dest->children = NULL; 7756 } else { 7757 unsigned int i; 7758 int cn; 7759 dest->name = NULL; 7760 dest->numchildren = dtd->scaffold[src_node].childcnt; 7761 dest->children = jobDest; 7762 7763 /* Append scaffold indices of children to array */ 7764 for (i = 0, cn = dtd->scaffold[src_node].firstchild; 7765 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) 7766 (jobDest++)->numchildren = (unsigned int)cn; 7767 } 7768 } 7769 7770 return ret; 7771 } 7772 7773 static ELEMENT_TYPE * 7774 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, 7775 const char *end) { 7776 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7777 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); 7778 ELEMENT_TYPE *ret; 7779 7780 if (! name) 7781 return NULL; 7782 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 7783 sizeof(ELEMENT_TYPE)); 7784 if (! ret) 7785 return NULL; 7786 if (ret->name != name) 7787 poolDiscard(&dtd->pool); 7788 else { 7789 poolFinish(&dtd->pool); 7790 if (! setElementTypePrefix(parser, ret)) 7791 return NULL; 7792 } 7793 return ret; 7794 } 7795 7796 static XML_Char * 7797 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { 7798 size_t charsRequired = 0; 7799 XML_Char *result; 7800 7801 /* First determine how long the string is */ 7802 while (s[charsRequired] != 0) { 7803 charsRequired++; 7804 } 7805 /* Include the terminator */ 7806 charsRequired++; 7807 7808 /* Now allocate space for the copy */ 7809 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); 7810 if (result == NULL) 7811 return NULL; 7812 /* Copy the original into place */ 7813 memcpy(result, s, charsRequired * sizeof(XML_Char)); 7814 return result; 7815 } 7816 7817 #if XML_GE == 1 7818 7819 static float 7820 accountingGetCurrentAmplification(XML_Parser rootParser) { 7821 // 1.........1.........12 => 22 7822 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1; 7823 const XmlBigCount countBytesOutput 7824 = rootParser->m_accounting.countBytesDirect 7825 + rootParser->m_accounting.countBytesIndirect; 7826 const float amplificationFactor 7827 = rootParser->m_accounting.countBytesDirect 7828 ? (countBytesOutput 7829 / (float)(rootParser->m_accounting.countBytesDirect)) 7830 : ((lenOfShortestInclude 7831 + rootParser->m_accounting.countBytesIndirect) 7832 / (float)lenOfShortestInclude); 7833 assert(! rootParser->m_parentParser); 7834 return amplificationFactor; 7835 } 7836 7837 static void 7838 accountingReportStats(XML_Parser originParser, const char *epilog) { 7839 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7840 assert(! rootParser->m_parentParser); 7841 7842 if (rootParser->m_accounting.debugLevel == 0u) { 7843 return; 7844 } 7845 7846 const float amplificationFactor 7847 = accountingGetCurrentAmplification(rootParser); 7848 fprintf(stderr, 7849 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( 7850 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", 7851 (void *)rootParser, rootParser->m_accounting.countBytesDirect, 7852 rootParser->m_accounting.countBytesIndirect, 7853 (double)amplificationFactor, epilog); 7854 } 7855 7856 static void 7857 accountingOnAbort(XML_Parser originParser) { 7858 accountingReportStats(originParser, " ABORTING\n"); 7859 } 7860 7861 static void 7862 accountingReportDiff(XML_Parser rootParser, 7863 unsigned int levelsAwayFromRootParser, const char *before, 7864 const char *after, ptrdiff_t bytesMore, int source_line, 7865 enum XML_Account account) { 7866 assert(! rootParser->m_parentParser); 7867 7868 fprintf(stderr, 7869 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"", 7870 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", 7871 levelsAwayFromRootParser, source_line, 10, ""); 7872 7873 const char ellipis[] = "[..]"; 7874 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; 7875 const unsigned int contextLength = 10; 7876 7877 /* Note: Performance is of no concern here */ 7878 const char *walker = before; 7879 if ((rootParser->m_accounting.debugLevel >= 3u) 7880 || (after - before) 7881 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { 7882 for (; walker < after; walker++) { 7883 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7884 } 7885 } else { 7886 for (; walker < before + contextLength; walker++) { 7887 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7888 } 7889 fprintf(stderr, ellipis); 7890 walker = after - contextLength; 7891 for (; walker < after; walker++) { 7892 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7893 } 7894 } 7895 fprintf(stderr, "\"\n"); 7896 } 7897 7898 static XML_Bool 7899 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, 7900 const char *after, int source_line, 7901 enum XML_Account account) { 7902 /* Note: We need to check the token type *first* to be sure that 7903 * we can even access variable <after>, safely. 7904 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ 7905 switch (tok) { 7906 case XML_TOK_INVALID: 7907 case XML_TOK_PARTIAL: 7908 case XML_TOK_PARTIAL_CHAR: 7909 case XML_TOK_NONE: 7910 return XML_TRUE; 7911 } 7912 7913 if (account == XML_ACCOUNT_NONE) 7914 return XML_TRUE; /* because these bytes have been accounted for, already */ 7915 7916 unsigned int levelsAwayFromRootParser; 7917 const XML_Parser rootParser 7918 = getRootParserOf(originParser, &levelsAwayFromRootParser); 7919 assert(! rootParser->m_parentParser); 7920 7921 const int isDirect 7922 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); 7923 const ptrdiff_t bytesMore = after - before; 7924 7925 XmlBigCount *const additionTarget 7926 = isDirect ? &rootParser->m_accounting.countBytesDirect 7927 : &rootParser->m_accounting.countBytesIndirect; 7928 7929 /* Detect and avoid integer overflow */ 7930 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) 7931 return XML_FALSE; 7932 *additionTarget += bytesMore; 7933 7934 const XmlBigCount countBytesOutput 7935 = rootParser->m_accounting.countBytesDirect 7936 + rootParser->m_accounting.countBytesIndirect; 7937 const float amplificationFactor 7938 = accountingGetCurrentAmplification(rootParser); 7939 const XML_Bool tolerated 7940 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) 7941 || (amplificationFactor 7942 <= rootParser->m_accounting.maximumAmplificationFactor); 7943 7944 if (rootParser->m_accounting.debugLevel >= 2u) { 7945 accountingReportStats(rootParser, ""); 7946 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, 7947 bytesMore, source_line, account); 7948 } 7949 7950 return tolerated; 7951 } 7952 7953 unsigned long long 7954 testingAccountingGetCountBytesDirect(XML_Parser parser) { 7955 if (! parser) 7956 return 0; 7957 return parser->m_accounting.countBytesDirect; 7958 } 7959 7960 unsigned long long 7961 testingAccountingGetCountBytesIndirect(XML_Parser parser) { 7962 if (! parser) 7963 return 0; 7964 return parser->m_accounting.countBytesIndirect; 7965 } 7966 7967 static void 7968 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, 7969 const char *action, int sourceLine) { 7970 assert(! rootParser->m_parentParser); 7971 if (rootParser->m_entity_stats.debugLevel == 0u) 7972 return; 7973 7974 # if defined(XML_UNICODE) 7975 const char *const entityName = "[..]"; 7976 # else 7977 const char *const entityName = entity->name; 7978 # endif 7979 7980 fprintf( 7981 stderr, 7982 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n", 7983 (void *)rootParser, rootParser->m_entity_stats.countEverOpened, 7984 rootParser->m_entity_stats.currentDepth, 7985 rootParser->m_entity_stats.maximumDepthSeen, 7986 (rootParser->m_entity_stats.currentDepth - 1) * 2, "", 7987 entity->is_param ? "%" : "&", entityName, action, entity->textLen, 7988 sourceLine); 7989 } 7990 7991 static void 7992 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { 7993 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7994 assert(! rootParser->m_parentParser); 7995 7996 rootParser->m_entity_stats.countEverOpened++; 7997 rootParser->m_entity_stats.currentDepth++; 7998 if (rootParser->m_entity_stats.currentDepth 7999 > rootParser->m_entity_stats.maximumDepthSeen) { 8000 rootParser->m_entity_stats.maximumDepthSeen++; 8001 } 8002 8003 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); 8004 } 8005 8006 static void 8007 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { 8008 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 8009 assert(! rootParser->m_parentParser); 8010 8011 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); 8012 rootParser->m_entity_stats.currentDepth--; 8013 } 8014 8015 static XML_Parser 8016 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { 8017 XML_Parser rootParser = parser; 8018 unsigned int stepsTakenUpwards = 0; 8019 while (rootParser->m_parentParser) { 8020 rootParser = rootParser->m_parentParser; 8021 stepsTakenUpwards++; 8022 } 8023 assert(! rootParser->m_parentParser); 8024 if (outLevelDiff != NULL) { 8025 *outLevelDiff = stepsTakenUpwards; 8026 } 8027 return rootParser; 8028 } 8029 8030 const char * 8031 unsignedCharToPrintable(unsigned char c) { 8032 switch (c) { 8033 case 0: 8034 return "\\0"; 8035 case 1: 8036 return "\\x1"; 8037 case 2: 8038 return "\\x2"; 8039 case 3: 8040 return "\\x3"; 8041 case 4: 8042 return "\\x4"; 8043 case 5: 8044 return "\\x5"; 8045 case 6: 8046 return "\\x6"; 8047 case 7: 8048 return "\\x7"; 8049 case 8: 8050 return "\\x8"; 8051 case 9: 8052 return "\\t"; 8053 case 10: 8054 return "\\n"; 8055 case 11: 8056 return "\\xB"; 8057 case 12: 8058 return "\\xC"; 8059 case 13: 8060 return "\\r"; 8061 case 14: 8062 return "\\xE"; 8063 case 15: 8064 return "\\xF"; 8065 case 16: 8066 return "\\x10"; 8067 case 17: 8068 return "\\x11"; 8069 case 18: 8070 return "\\x12"; 8071 case 19: 8072 return "\\x13"; 8073 case 20: 8074 return "\\x14"; 8075 case 21: 8076 return "\\x15"; 8077 case 22: 8078 return "\\x16"; 8079 case 23: 8080 return "\\x17"; 8081 case 24: 8082 return "\\x18"; 8083 case 25: 8084 return "\\x19"; 8085 case 26: 8086 return "\\x1A"; 8087 case 27: 8088 return "\\x1B"; 8089 case 28: 8090 return "\\x1C"; 8091 case 29: 8092 return "\\x1D"; 8093 case 30: 8094 return "\\x1E"; 8095 case 31: 8096 return "\\x1F"; 8097 case 32: 8098 return " "; 8099 case 33: 8100 return "!"; 8101 case 34: 8102 return "\\\""; 8103 case 35: 8104 return "#"; 8105 case 36: 8106 return "$"; 8107 case 37: 8108 return "%"; 8109 case 38: 8110 return "&"; 8111 case 39: 8112 return "'"; 8113 case 40: 8114 return "("; 8115 case 41: 8116 return ")"; 8117 case 42: 8118 return "*"; 8119 case 43: 8120 return "+"; 8121 case 44: 8122 return ","; 8123 case 45: 8124 return "-"; 8125 case 46: 8126 return "."; 8127 case 47: 8128 return "/"; 8129 case 48: 8130 return "0"; 8131 case 49: 8132 return "1"; 8133 case 50: 8134 return "2"; 8135 case 51: 8136 return "3"; 8137 case 52: 8138 return "4"; 8139 case 53: 8140 return "5"; 8141 case 54: 8142 return "6"; 8143 case 55: 8144 return "7"; 8145 case 56: 8146 return "8"; 8147 case 57: 8148 return "9"; 8149 case 58: 8150 return ":"; 8151 case 59: 8152 return ";"; 8153 case 60: 8154 return "<"; 8155 case 61: 8156 return "="; 8157 case 62: 8158 return ">"; 8159 case 63: 8160 return "?"; 8161 case 64: 8162 return "@"; 8163 case 65: 8164 return "A"; 8165 case 66: 8166 return "B"; 8167 case 67: 8168 return "C"; 8169 case 68: 8170 return "D"; 8171 case 69: 8172 return "E"; 8173 case 70: 8174 return "F"; 8175 case 71: 8176 return "G"; 8177 case 72: 8178 return "H"; 8179 case 73: 8180 return "I"; 8181 case 74: 8182 return "J"; 8183 case 75: 8184 return "K"; 8185 case 76: 8186 return "L"; 8187 case 77: 8188 return "M"; 8189 case 78: 8190 return "N"; 8191 case 79: 8192 return "O"; 8193 case 80: 8194 return "P"; 8195 case 81: 8196 return "Q"; 8197 case 82: 8198 return "R"; 8199 case 83: 8200 return "S"; 8201 case 84: 8202 return "T"; 8203 case 85: 8204 return "U"; 8205 case 86: 8206 return "V"; 8207 case 87: 8208 return "W"; 8209 case 88: 8210 return "X"; 8211 case 89: 8212 return "Y"; 8213 case 90: 8214 return "Z"; 8215 case 91: 8216 return "["; 8217 case 92: 8218 return "\\\\"; 8219 case 93: 8220 return "]"; 8221 case 94: 8222 return "^"; 8223 case 95: 8224 return "_"; 8225 case 96: 8226 return "`"; 8227 case 97: 8228 return "a"; 8229 case 98: 8230 return "b"; 8231 case 99: 8232 return "c"; 8233 case 100: 8234 return "d"; 8235 case 101: 8236 return "e"; 8237 case 102: 8238 return "f"; 8239 case 103: 8240 return "g"; 8241 case 104: 8242 return "h"; 8243 case 105: 8244 return "i"; 8245 case 106: 8246 return "j"; 8247 case 107: 8248 return "k"; 8249 case 108: 8250 return "l"; 8251 case 109: 8252 return "m"; 8253 case 110: 8254 return "n"; 8255 case 111: 8256 return "o"; 8257 case 112: 8258 return "p"; 8259 case 113: 8260 return "q"; 8261 case 114: 8262 return "r"; 8263 case 115: 8264 return "s"; 8265 case 116: 8266 return "t"; 8267 case 117: 8268 return "u"; 8269 case 118: 8270 return "v"; 8271 case 119: 8272 return "w"; 8273 case 120: 8274 return "x"; 8275 case 121: 8276 return "y"; 8277 case 122: 8278 return "z"; 8279 case 123: 8280 return "{"; 8281 case 124: 8282 return "|"; 8283 case 125: 8284 return "}"; 8285 case 126: 8286 return "~"; 8287 case 127: 8288 return "\\x7F"; 8289 case 128: 8290 return "\\x80"; 8291 case 129: 8292 return "\\x81"; 8293 case 130: 8294 return "\\x82"; 8295 case 131: 8296 return "\\x83"; 8297 case 132: 8298 return "\\x84"; 8299 case 133: 8300 return "\\x85"; 8301 case 134: 8302 return "\\x86"; 8303 case 135: 8304 return "\\x87"; 8305 case 136: 8306 return "\\x88"; 8307 case 137: 8308 return "\\x89"; 8309 case 138: 8310 return "\\x8A"; 8311 case 139: 8312 return "\\x8B"; 8313 case 140: 8314 return "\\x8C"; 8315 case 141: 8316 return "\\x8D"; 8317 case 142: 8318 return "\\x8E"; 8319 case 143: 8320 return "\\x8F"; 8321 case 144: 8322 return "\\x90"; 8323 case 145: 8324 return "\\x91"; 8325 case 146: 8326 return "\\x92"; 8327 case 147: 8328 return "\\x93"; 8329 case 148: 8330 return "\\x94"; 8331 case 149: 8332 return "\\x95"; 8333 case 150: 8334 return "\\x96"; 8335 case 151: 8336 return "\\x97"; 8337 case 152: 8338 return "\\x98"; 8339 case 153: 8340 return "\\x99"; 8341 case 154: 8342 return "\\x9A"; 8343 case 155: 8344 return "\\x9B"; 8345 case 156: 8346 return "\\x9C"; 8347 case 157: 8348 return "\\x9D"; 8349 case 158: 8350 return "\\x9E"; 8351 case 159: 8352 return "\\x9F"; 8353 case 160: 8354 return "\\xA0"; 8355 case 161: 8356 return "\\xA1"; 8357 case 162: 8358 return "\\xA2"; 8359 case 163: 8360 return "\\xA3"; 8361 case 164: 8362 return "\\xA4"; 8363 case 165: 8364 return "\\xA5"; 8365 case 166: 8366 return "\\xA6"; 8367 case 167: 8368 return "\\xA7"; 8369 case 168: 8370 return "\\xA8"; 8371 case 169: 8372 return "\\xA9"; 8373 case 170: 8374 return "\\xAA"; 8375 case 171: 8376 return "\\xAB"; 8377 case 172: 8378 return "\\xAC"; 8379 case 173: 8380 return "\\xAD"; 8381 case 174: 8382 return "\\xAE"; 8383 case 175: 8384 return "\\xAF"; 8385 case 176: 8386 return "\\xB0"; 8387 case 177: 8388 return "\\xB1"; 8389 case 178: 8390 return "\\xB2"; 8391 case 179: 8392 return "\\xB3"; 8393 case 180: 8394 return "\\xB4"; 8395 case 181: 8396 return "\\xB5"; 8397 case 182: 8398 return "\\xB6"; 8399 case 183: 8400 return "\\xB7"; 8401 case 184: 8402 return "\\xB8"; 8403 case 185: 8404 return "\\xB9"; 8405 case 186: 8406 return "\\xBA"; 8407 case 187: 8408 return "\\xBB"; 8409 case 188: 8410 return "\\xBC"; 8411 case 189: 8412 return "\\xBD"; 8413 case 190: 8414 return "\\xBE"; 8415 case 191: 8416 return "\\xBF"; 8417 case 192: 8418 return "\\xC0"; 8419 case 193: 8420 return "\\xC1"; 8421 case 194: 8422 return "\\xC2"; 8423 case 195: 8424 return "\\xC3"; 8425 case 196: 8426 return "\\xC4"; 8427 case 197: 8428 return "\\xC5"; 8429 case 198: 8430 return "\\xC6"; 8431 case 199: 8432 return "\\xC7"; 8433 case 200: 8434 return "\\xC8"; 8435 case 201: 8436 return "\\xC9"; 8437 case 202: 8438 return "\\xCA"; 8439 case 203: 8440 return "\\xCB"; 8441 case 204: 8442 return "\\xCC"; 8443 case 205: 8444 return "\\xCD"; 8445 case 206: 8446 return "\\xCE"; 8447 case 207: 8448 return "\\xCF"; 8449 case 208: 8450 return "\\xD0"; 8451 case 209: 8452 return "\\xD1"; 8453 case 210: 8454 return "\\xD2"; 8455 case 211: 8456 return "\\xD3"; 8457 case 212: 8458 return "\\xD4"; 8459 case 213: 8460 return "\\xD5"; 8461 case 214: 8462 return "\\xD6"; 8463 case 215: 8464 return "\\xD7"; 8465 case 216: 8466 return "\\xD8"; 8467 case 217: 8468 return "\\xD9"; 8469 case 218: 8470 return "\\xDA"; 8471 case 219: 8472 return "\\xDB"; 8473 case 220: 8474 return "\\xDC"; 8475 case 221: 8476 return "\\xDD"; 8477 case 222: 8478 return "\\xDE"; 8479 case 223: 8480 return "\\xDF"; 8481 case 224: 8482 return "\\xE0"; 8483 case 225: 8484 return "\\xE1"; 8485 case 226: 8486 return "\\xE2"; 8487 case 227: 8488 return "\\xE3"; 8489 case 228: 8490 return "\\xE4"; 8491 case 229: 8492 return "\\xE5"; 8493 case 230: 8494 return "\\xE6"; 8495 case 231: 8496 return "\\xE7"; 8497 case 232: 8498 return "\\xE8"; 8499 case 233: 8500 return "\\xE9"; 8501 case 234: 8502 return "\\xEA"; 8503 case 235: 8504 return "\\xEB"; 8505 case 236: 8506 return "\\xEC"; 8507 case 237: 8508 return "\\xED"; 8509 case 238: 8510 return "\\xEE"; 8511 case 239: 8512 return "\\xEF"; 8513 case 240: 8514 return "\\xF0"; 8515 case 241: 8516 return "\\xF1"; 8517 case 242: 8518 return "\\xF2"; 8519 case 243: 8520 return "\\xF3"; 8521 case 244: 8522 return "\\xF4"; 8523 case 245: 8524 return "\\xF5"; 8525 case 246: 8526 return "\\xF6"; 8527 case 247: 8528 return "\\xF7"; 8529 case 248: 8530 return "\\xF8"; 8531 case 249: 8532 return "\\xF9"; 8533 case 250: 8534 return "\\xFA"; 8535 case 251: 8536 return "\\xFB"; 8537 case 252: 8538 return "\\xFC"; 8539 case 253: 8540 return "\\xFD"; 8541 case 254: 8542 return "\\xFE"; 8543 case 255: 8544 return "\\xFF"; 8545 default: 8546 assert(0); /* never gets here */ 8547 return "dead code"; 8548 } 8549 assert(0); /* never gets here */ 8550 } 8551 8552 #endif /* XML_GE == 1 */ 8553 8554 static unsigned long 8555 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { 8556 const char *const valueOrNull = getenv(variableName); 8557 if (valueOrNull == NULL) { 8558 return defaultDebugLevel; 8559 } 8560 const char *const value = valueOrNull; 8561 8562 errno = 0; 8563 char *afterValue = NULL; 8564 unsigned long debugLevel = strtoul(value, &afterValue, 10); 8565 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { 8566 errno = 0; 8567 return defaultDebugLevel; 8568 } 8569 8570 return debugLevel; 8571 } 8572