1 /* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+) 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> 16 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> 17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com> 18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> 19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr> 20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl> 22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io> 24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me> 25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com> 26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de> 27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org> 28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org> 32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org> 34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> 35 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> 37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com> 38 Copyright (c) 2022 Jann Horn <jannh@google.com> 39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 40 Copyright (c) 2023 Owain Davies <owaind@bath.edu> 41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 42 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> 43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 44 Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> 45 Licensed under the MIT license: 46 47 Permission is hereby granted, free of charge, to any person obtaining 48 a copy of this software and associated documentation files (the 49 "Software"), to deal in the Software without restriction, including 50 without limitation the rights to use, copy, modify, merge, publish, 51 distribute, sublicense, and/or sell copies of the Software, and to permit 52 persons to whom the Software is furnished to do so, subject to the 53 following conditions: 54 55 The above copyright notice and this permission notice shall be included 56 in all copies or substantial portions of the Software. 57 58 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 59 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 60 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 61 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 62 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 63 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 64 USE OR OTHER DEALINGS IN THE SOFTWARE. 65 */ 66 67 #define XML_BUILDING_EXPAT 1 68 69 #include "expat_config.h" 70 71 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) 72 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) 73 #endif 74 75 #if defined(XML_DTD) && XML_GE == 0 76 # error Either undefine XML_DTD or define XML_GE to 1. 77 #endif 78 79 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \ 80 || (XML_CONTEXT_BYTES + 0 < 0) 81 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) 82 #endif 83 84 #if defined(HAVE_SYSCALL_GETRANDOM) 85 # if ! defined(_GNU_SOURCE) 86 # define _GNU_SOURCE 1 /* syscall prototype */ 87 # endif 88 #endif 89 90 #ifdef _WIN32 91 /* force stdlib to define rand_s() */ 92 # if ! defined(_CRT_RAND_S) 93 # define _CRT_RAND_S 94 # endif 95 #endif 96 97 #include <stdbool.h> 98 #include <stddef.h> 99 #include <string.h> /* memset(), memcpy() */ 100 #include <assert.h> 101 #include <limits.h> /* INT_MAX, UINT_MAX */ 102 #include <stdio.h> /* fprintf */ 103 #include <stdlib.h> /* getenv, rand_s */ 104 #include <stdint.h> /* uintptr_t */ 105 #include <math.h> /* isnan */ 106 107 #ifdef _WIN32 108 # define getpid GetCurrentProcessId 109 #else 110 # include <sys/time.h> /* gettimeofday() */ 111 # include <sys/types.h> /* getpid() */ 112 # include <unistd.h> /* getpid() */ 113 # include <fcntl.h> /* O_RDONLY */ 114 # include <errno.h> 115 #endif 116 117 #ifdef _WIN32 118 # include "winconfig.h" 119 #endif 120 121 #include "ascii.h" 122 #include "expat.h" 123 #include "siphash.h" 124 125 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 126 # if defined(HAVE_GETRANDOM) 127 # include <sys/random.h> /* getrandom */ 128 # else 129 # include <unistd.h> /* syscall */ 130 # include <sys/syscall.h> /* SYS_getrandom */ 131 # endif 132 # if ! defined(GRND_NONBLOCK) 133 # define GRND_NONBLOCK 0x0001 134 # endif /* defined(GRND_NONBLOCK) */ 135 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 136 137 #if defined(HAVE_LIBBSD) \ 138 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) 139 # include <bsd/stdlib.h> 140 #endif 141 142 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) 143 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 144 #endif 145 146 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ 147 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ 148 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ 149 && ! defined(XML_POOR_ENTROPY) 150 # error You do not have support for any sources of high quality entropy \ 151 enabled. For end user security, that is probably not what you want. \ 152 \ 153 Your options include: \ 154 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ 155 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ 156 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ 157 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ 158 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ 159 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ 160 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ 161 * Windows >=Vista (rand_s): _WIN32. \ 162 \ 163 If insist on not using any of these, bypass this error by defining \ 164 XML_POOR_ENTROPY; you have been warned. \ 165 \ 166 If you have reasons to patch this detection code away or need changes \ 167 to the build system, please open a bug. Thank you! 168 #endif 169 170 #ifdef XML_UNICODE 171 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX 172 # define XmlConvert XmlUtf16Convert 173 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding 174 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS 175 # define XmlEncode XmlUtf16Encode 176 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) 177 typedef unsigned short ICHAR; 178 #else 179 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX 180 # define XmlConvert XmlUtf8Convert 181 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding 182 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS 183 # define XmlEncode XmlUtf8Encode 184 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8) 185 typedef char ICHAR; 186 #endif 187 188 #ifndef XML_NS 189 190 # define XmlInitEncodingNS XmlInitEncoding 191 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding 192 # undef XmlGetInternalEncodingNS 193 # define XmlGetInternalEncodingNS XmlGetInternalEncoding 194 # define XmlParseXmlDeclNS XmlParseXmlDecl 195 196 #endif 197 198 #ifdef XML_UNICODE 199 200 # ifdef XML_UNICODE_WCHAR_T 201 # define XML_T(x) (const wchar_t) x 202 # define XML_L(x) L##x 203 # else 204 # define XML_T(x) (const unsigned short)x 205 # define XML_L(x) x 206 # endif 207 208 #else 209 210 # define XML_T(x) x 211 # define XML_L(x) x 212 213 #endif 214 215 /* Round up n to be a multiple of sz, where sz is a power of 2. */ 216 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) 217 218 /* Do safe (NULL-aware) pointer arithmetic */ 219 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) 220 221 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) 222 223 #include "internal.h" 224 #include "xmltok.h" 225 #include "xmlrole.h" 226 227 typedef const XML_Char *KEY; 228 229 typedef struct { 230 KEY name; 231 } NAMED; 232 233 typedef struct { 234 NAMED **v; 235 unsigned char power; 236 size_t size; 237 size_t used; 238 XML_Parser parser; 239 } HASH_TABLE; 240 241 static size_t keylen(KEY s); 242 243 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); 244 245 /* For probing (after a collision) we need a step size relative prime 246 to the hash table size, which is a power of 2. We use double-hashing, 247 since we can calculate a second hash value cheaply by taking those bits 248 of the first hash value that were discarded (masked out) when the table 249 index was calculated: index = hash & mask, where mask = table->size - 1. 250 We limit the maximum step size to table->size / 4 (mask >> 2) and make 251 it odd, since odd numbers are always relative prime to a power of 2. 252 */ 253 #define SECOND_HASH(hash, mask, power) \ 254 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) 255 #define PROBE_STEP(hash, mask, power) \ 256 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) 257 258 typedef struct { 259 NAMED **p; 260 NAMED **end; 261 } HASH_TABLE_ITER; 262 263 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ 264 #define INIT_DATA_BUF_SIZE 1024 265 #define INIT_ATTS_SIZE 16 266 #define INIT_ATTS_VERSION 0xFFFFFFFF 267 #define INIT_BLOCK_SIZE 1024 268 #define INIT_BUFFER_SIZE 1024 269 270 #define EXPAND_SPARE 24 271 272 typedef struct binding { 273 struct prefix *prefix; 274 struct binding *nextTagBinding; 275 struct binding *prevPrefixBinding; 276 const struct attribute_id *attId; 277 XML_Char *uri; 278 int uriLen; 279 int uriAlloc; 280 } BINDING; 281 282 typedef struct prefix { 283 const XML_Char *name; 284 BINDING *binding; 285 } PREFIX; 286 287 typedef struct { 288 const XML_Char *str; 289 const XML_Char *localPart; 290 const XML_Char *prefix; 291 int strLen; 292 int uriLen; 293 int prefixLen; 294 } TAG_NAME; 295 296 /* TAG represents an open element. 297 The name of the element is stored in both the document and API 298 encodings. The memory buffer 'buf' is a separately-allocated 299 memory area which stores the name. During the XML_Parse()/ 300 XML_ParseBuffer() when the element is open, the memory for the 'raw' 301 version of the name (in the document encoding) is shared with the 302 document buffer. If the element is open across calls to 303 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to 304 contain the 'raw' name as well. 305 306 A parser reuses these structures, maintaining a list of allocated 307 TAG objects in a free list. 308 */ 309 typedef struct tag { 310 struct tag *parent; /* parent of this element */ 311 const char *rawName; /* tagName in the original encoding */ 312 int rawNameLength; 313 TAG_NAME name; /* tagName in the API encoding */ 314 char *buf; /* buffer for name components */ 315 char *bufEnd; /* end of the buffer */ 316 BINDING *bindings; 317 } TAG; 318 319 typedef struct { 320 const XML_Char *name; 321 const XML_Char *textPtr; 322 int textLen; /* length in XML_Chars */ 323 int processed; /* # of processed bytes - when suspended */ 324 const XML_Char *systemId; 325 const XML_Char *base; 326 const XML_Char *publicId; 327 const XML_Char *notation; 328 XML_Bool open; 329 XML_Bool hasMore; /* true if entity has not been completely processed */ 330 /* An entity can be open while being already completely processed (hasMore == 331 XML_FALSE). The reason is the delayed closing of entities until their inner 332 entities are processed and closed */ 333 XML_Bool is_param; 334 XML_Bool is_internal; /* true if declared in internal subset outside PE */ 335 } ENTITY; 336 337 typedef struct { 338 enum XML_Content_Type type; 339 enum XML_Content_Quant quant; 340 const XML_Char *name; 341 int firstchild; 342 int lastchild; 343 int childcnt; 344 int nextsib; 345 } CONTENT_SCAFFOLD; 346 347 #define INIT_SCAFFOLD_ELEMENTS 32 348 349 typedef struct block { 350 struct block *next; 351 int size; 352 XML_Char s[1]; 353 } BLOCK; 354 355 typedef struct { 356 BLOCK *blocks; 357 BLOCK *freeBlocks; 358 const XML_Char *end; 359 XML_Char *ptr; 360 XML_Char *start; 361 XML_Parser parser; 362 } STRING_POOL; 363 364 /* The XML_Char before the name is used to determine whether 365 an attribute has been specified. */ 366 typedef struct attribute_id { 367 XML_Char *name; 368 PREFIX *prefix; 369 XML_Bool maybeTokenized; 370 XML_Bool xmlns; 371 } ATTRIBUTE_ID; 372 373 typedef struct { 374 const ATTRIBUTE_ID *id; 375 XML_Bool isCdata; 376 const XML_Char *value; 377 } DEFAULT_ATTRIBUTE; 378 379 typedef struct { 380 unsigned long version; 381 unsigned long hash; 382 const XML_Char *uriName; 383 } NS_ATT; 384 385 typedef struct { 386 const XML_Char *name; 387 PREFIX *prefix; 388 const ATTRIBUTE_ID *idAtt; 389 int nDefaultAtts; 390 int allocDefaultAtts; 391 DEFAULT_ATTRIBUTE *defaultAtts; 392 } ELEMENT_TYPE; 393 394 typedef struct { 395 HASH_TABLE generalEntities; 396 HASH_TABLE elementTypes; 397 HASH_TABLE attributeIds; 398 HASH_TABLE prefixes; 399 STRING_POOL pool; 400 STRING_POOL entityValuePool; 401 /* false once a parameter entity reference has been skipped */ 402 XML_Bool keepProcessing; 403 /* true once an internal or external PE reference has been encountered; 404 this includes the reference to an external subset */ 405 XML_Bool hasParamEntityRefs; 406 XML_Bool standalone; 407 #ifdef XML_DTD 408 /* indicates if external PE has been read */ 409 XML_Bool paramEntityRead; 410 HASH_TABLE paramEntities; 411 #endif /* XML_DTD */ 412 PREFIX defaultPrefix; 413 /* === scaffolding for building content model === */ 414 XML_Bool in_eldecl; 415 CONTENT_SCAFFOLD *scaffold; 416 unsigned contentStringLen; 417 unsigned scaffSize; 418 unsigned scaffCount; 419 int scaffLevel; 420 int *scaffIndex; 421 } DTD; 422 423 enum EntityType { 424 ENTITY_INTERNAL, 425 ENTITY_ATTRIBUTE, 426 ENTITY_VALUE, 427 }; 428 429 typedef struct open_internal_entity { 430 const char *internalEventPtr; 431 const char *internalEventEndPtr; 432 struct open_internal_entity *next; 433 ENTITY *entity; 434 int startTagLevel; 435 XML_Bool betweenDecl; /* WFC: PE Between Declarations */ 436 enum EntityType type; 437 } OPEN_INTERNAL_ENTITY; 438 439 enum XML_Account { 440 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ 441 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity 442 expansion */ 443 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ 444 }; 445 446 #if XML_GE == 1 447 typedef unsigned long long XmlBigCount; 448 typedef struct accounting { 449 XmlBigCount countBytesDirect; 450 XmlBigCount countBytesIndirect; 451 unsigned long debugLevel; 452 float maximumAmplificationFactor; // >=1.0 453 unsigned long long activationThresholdBytes; 454 } ACCOUNTING; 455 456 typedef struct MALLOC_TRACKER { 457 XmlBigCount bytesAllocated; 458 XmlBigCount peakBytesAllocated; // updated live only for debug level >=2 459 unsigned long debugLevel; 460 float maximumAmplificationFactor; // >=1.0 461 XmlBigCount activationThresholdBytes; 462 } MALLOC_TRACKER; 463 464 typedef struct entity_stats { 465 unsigned int countEverOpened; 466 unsigned int currentDepth; 467 unsigned int maximumDepthSeen; 468 unsigned long debugLevel; 469 } ENTITY_STATS; 470 #endif /* XML_GE == 1 */ 471 472 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, 473 const char *end, const char **endPtr); 474 475 static Processor prologProcessor; 476 static Processor prologInitProcessor; 477 static Processor contentProcessor; 478 static Processor cdataSectionProcessor; 479 #ifdef XML_DTD 480 static Processor ignoreSectionProcessor; 481 static Processor externalParEntProcessor; 482 static Processor externalParEntInitProcessor; 483 static Processor entityValueProcessor; 484 static Processor entityValueInitProcessor; 485 #endif /* XML_DTD */ 486 static Processor epilogProcessor; 487 static Processor errorProcessor; 488 static Processor externalEntityInitProcessor; 489 static Processor externalEntityInitProcessor2; 490 static Processor externalEntityInitProcessor3; 491 static Processor externalEntityContentProcessor; 492 static Processor internalEntityProcessor; 493 494 static enum XML_Error handleUnknownEncoding(XML_Parser parser, 495 const XML_Char *encodingName); 496 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, 497 const char *s, const char *next); 498 static enum XML_Error initializeEncoding(XML_Parser parser); 499 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, 500 const char *s, const char *end, int tok, 501 const char *next, const char **nextPtr, 502 XML_Bool haveMore, XML_Bool allowClosingDoctype, 503 enum XML_Account account); 504 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity, 505 XML_Bool betweenDecl, enum EntityType type); 506 static enum XML_Error doContent(XML_Parser parser, int startTagLevel, 507 const ENCODING *enc, const char *start, 508 const char *end, const char **endPtr, 509 XML_Bool haveMore, enum XML_Account account); 510 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, 511 const char **startPtr, const char *end, 512 const char **nextPtr, XML_Bool haveMore, 513 enum XML_Account account); 514 #ifdef XML_DTD 515 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, 516 const char **startPtr, const char *end, 517 const char **nextPtr, XML_Bool haveMore); 518 #endif /* XML_DTD */ 519 520 static void freeBindings(XML_Parser parser, BINDING *bindings); 521 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, 522 const char *attStr, TAG_NAME *tagNamePtr, 523 BINDING **bindingsPtr, 524 enum XML_Account account); 525 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, 526 const ATTRIBUTE_ID *attId, const XML_Char *uri, 527 BINDING **bindingsPtr); 528 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, 529 XML_Bool isCdata, XML_Bool isId, 530 const XML_Char *value, XML_Parser parser); 531 static enum XML_Error storeAttributeValue(XML_Parser parser, 532 const ENCODING *enc, XML_Bool isCdata, 533 const char *ptr, const char *end, 534 STRING_POOL *pool, 535 enum XML_Account account); 536 static enum XML_Error 537 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 538 const char *ptr, const char *end, STRING_POOL *pool, 539 enum XML_Account account, const char **nextPtr); 540 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, 541 const char *start, const char *end); 542 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); 543 #if XML_GE == 1 544 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, 545 const char *start, const char *end, 546 enum XML_Account account, 547 const char **nextPtr); 548 static enum XML_Error callStoreEntityValue(XML_Parser parser, 549 const ENCODING *enc, 550 const char *start, const char *end, 551 enum XML_Account account); 552 #else 553 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); 554 #endif 555 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 556 const char *start, const char *end); 557 static int reportComment(XML_Parser parser, const ENCODING *enc, 558 const char *start, const char *end); 559 static void reportDefault(XML_Parser parser, const ENCODING *enc, 560 const char *start, const char *end); 561 562 static const XML_Char *getContext(XML_Parser parser); 563 static XML_Bool setContext(XML_Parser parser, const XML_Char *context); 564 565 static void FASTCALL normalizePublicId(XML_Char *s); 566 567 static DTD *dtdCreate(XML_Parser parser); 568 /* do not call if m_parentParser != NULL */ 569 static void dtdReset(DTD *p, XML_Parser parser); 570 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser); 571 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 572 XML_Parser parser); 573 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 574 STRING_POOL *newPool, const HASH_TABLE *oldTable); 575 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, 576 size_t createSize); 577 static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser); 578 static void FASTCALL hashTableClear(HASH_TABLE *table); 579 static void FASTCALL hashTableDestroy(HASH_TABLE *table); 580 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, 581 const HASH_TABLE *table); 582 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); 583 584 static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser); 585 static void FASTCALL poolClear(STRING_POOL *pool); 586 static void FASTCALL poolDestroy(STRING_POOL *pool); 587 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, 588 const char *ptr, const char *end); 589 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, 590 const char *ptr, const char *end); 591 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); 592 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, 593 const XML_Char *s); 594 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, 595 int n); 596 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, 597 const XML_Char *s); 598 599 static int FASTCALL nextScaffoldPart(XML_Parser parser); 600 static XML_Content *build_model(XML_Parser parser); 601 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, 602 const char *ptr, const char *end); 603 604 static XML_Char *copyString(const XML_Char *s, XML_Parser parser); 605 606 static unsigned long generate_hash_secret_salt(XML_Parser parser); 607 static XML_Bool startParsing(XML_Parser parser); 608 609 static XML_Parser parserCreate(const XML_Char *encodingName, 610 const XML_Memory_Handling_Suite *memsuite, 611 const XML_Char *nameSep, DTD *dtd, 612 XML_Parser parentParser); 613 614 static void parserInit(XML_Parser parser, const XML_Char *encodingName); 615 616 #if XML_GE == 1 617 static float accountingGetCurrentAmplification(XML_Parser rootParser); 618 static void accountingReportStats(XML_Parser originParser, const char *epilog); 619 static void accountingOnAbort(XML_Parser originParser); 620 static void accountingReportDiff(XML_Parser rootParser, 621 unsigned int levelsAwayFromRootParser, 622 const char *before, const char *after, 623 ptrdiff_t bytesMore, int source_line, 624 enum XML_Account account); 625 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, 626 const char *before, const char *after, 627 int source_line, 628 enum XML_Account account); 629 630 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, 631 const char *action, int sourceLine); 632 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, 633 int sourceLine); 634 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, 635 int sourceLine); 636 #endif /* XML_GE == 1 */ 637 638 static XML_Parser getRootParserOf(XML_Parser parser, 639 unsigned int *outLevelDiff); 640 641 static unsigned long getDebugLevel(const char *variableName, 642 unsigned long defaultDebugLevel); 643 644 #define poolStart(pool) ((pool)->start) 645 #define poolLength(pool) ((pool)->ptr - (pool)->start) 646 #define poolChop(pool) ((void)--(pool->ptr)) 647 #define poolLastChar(pool) (((pool)->ptr)[-1]) 648 #define poolDiscard(pool) ((pool)->ptr = (pool)->start) 649 #define poolFinish(pool) ((pool)->start = (pool)->ptr) 650 #define poolAppendChar(pool, c) \ 651 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ 652 ? 0 \ 653 : ((*((pool)->ptr)++ = c), 1)) 654 655 #if ! defined(XML_TESTING) 656 const 657 #endif 658 XML_Bool g_reparseDeferralEnabledDefault 659 = XML_TRUE; // write ONLY in runtests.c 660 #if defined(XML_TESTING) 661 unsigned int g_bytesScanned = 0; // used for testing only 662 #endif 663 664 struct XML_ParserStruct { 665 /* The first member must be m_userData so that the XML_GetUserData 666 macro works. */ 667 void *m_userData; 668 void *m_handlerArg; 669 670 // How the four parse buffer pointers below relate in time and space: 671 // 672 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim 673 // | | | | 674 // <--parsed-->| | | 675 // <---parsing--->| | 676 // <--unoccupied-->| 677 // <---------total-malloced/realloced-------->| 678 679 char *m_buffer; // malloc/realloc base pointer of parse buffer 680 const XML_Memory_Handling_Suite m_mem; 681 const char *m_bufferPtr; // first character to be parsed 682 char *m_bufferEnd; // past last character to be parsed 683 const char *m_bufferLim; // allocated end of m_buffer 684 685 XML_Index m_parseEndByteIndex; 686 const char *m_parseEndPtr; 687 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ 688 XML_Bool m_reparseDeferralEnabled; 689 int m_lastBufferRequestSize; 690 XML_Char *m_dataBuf; 691 XML_Char *m_dataBufEnd; 692 XML_StartElementHandler m_startElementHandler; 693 XML_EndElementHandler m_endElementHandler; 694 XML_CharacterDataHandler m_characterDataHandler; 695 XML_ProcessingInstructionHandler m_processingInstructionHandler; 696 XML_CommentHandler m_commentHandler; 697 XML_StartCdataSectionHandler m_startCdataSectionHandler; 698 XML_EndCdataSectionHandler m_endCdataSectionHandler; 699 XML_DefaultHandler m_defaultHandler; 700 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; 701 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; 702 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; 703 XML_NotationDeclHandler m_notationDeclHandler; 704 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; 705 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; 706 XML_NotStandaloneHandler m_notStandaloneHandler; 707 XML_ExternalEntityRefHandler m_externalEntityRefHandler; 708 XML_Parser m_externalEntityRefHandlerArg; 709 XML_SkippedEntityHandler m_skippedEntityHandler; 710 XML_UnknownEncodingHandler m_unknownEncodingHandler; 711 XML_ElementDeclHandler m_elementDeclHandler; 712 XML_AttlistDeclHandler m_attlistDeclHandler; 713 XML_EntityDeclHandler m_entityDeclHandler; 714 XML_XmlDeclHandler m_xmlDeclHandler; 715 const ENCODING *m_encoding; 716 INIT_ENCODING m_initEncoding; 717 const ENCODING *m_internalEncoding; 718 const XML_Char *m_protocolEncodingName; 719 XML_Bool m_ns; 720 XML_Bool m_ns_triplets; 721 void *m_unknownEncodingMem; 722 void *m_unknownEncodingData; 723 void *m_unknownEncodingHandlerData; 724 void(XMLCALL *m_unknownEncodingRelease)(void *); 725 PROLOG_STATE m_prologState; 726 Processor *m_processor; 727 enum XML_Error m_errorCode; 728 const char *m_eventPtr; 729 const char *m_eventEndPtr; 730 const char *m_positionPtr; 731 OPEN_INTERNAL_ENTITY *m_openInternalEntities; 732 OPEN_INTERNAL_ENTITY *m_freeInternalEntities; 733 OPEN_INTERNAL_ENTITY *m_openAttributeEntities; 734 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities; 735 OPEN_INTERNAL_ENTITY *m_openValueEntities; 736 OPEN_INTERNAL_ENTITY *m_freeValueEntities; 737 XML_Bool m_defaultExpandInternalEntities; 738 int m_tagLevel; 739 ENTITY *m_declEntity; 740 const XML_Char *m_doctypeName; 741 const XML_Char *m_doctypeSysid; 742 const XML_Char *m_doctypePubid; 743 const XML_Char *m_declAttributeType; 744 const XML_Char *m_declNotationName; 745 const XML_Char *m_declNotationPublicId; 746 ELEMENT_TYPE *m_declElementType; 747 ATTRIBUTE_ID *m_declAttributeId; 748 XML_Bool m_declAttributeIsCdata; 749 XML_Bool m_declAttributeIsId; 750 DTD *m_dtd; 751 const XML_Char *m_curBase; 752 TAG *m_tagStack; 753 TAG *m_freeTagList; 754 BINDING *m_inheritedBindings; 755 BINDING *m_freeBindingList; 756 int m_attsSize; 757 int m_nSpecifiedAtts; 758 int m_idAttIndex; 759 ATTRIBUTE *m_atts; 760 NS_ATT *m_nsAtts; 761 unsigned long m_nsAttsVersion; 762 unsigned char m_nsAttsPower; 763 #ifdef XML_ATTR_INFO 764 XML_AttrInfo *m_attInfo; 765 #endif 766 POSITION m_position; 767 STRING_POOL m_tempPool; 768 STRING_POOL m_temp2Pool; 769 char *m_groupConnector; 770 unsigned int m_groupSize; 771 XML_Char m_namespaceSeparator; 772 XML_Parser m_parentParser; 773 XML_ParsingStatus m_parsingStatus; 774 #ifdef XML_DTD 775 XML_Bool m_isParamEntity; 776 XML_Bool m_useForeignDTD; 777 enum XML_ParamEntityParsing m_paramEntityParsing; 778 #endif 779 unsigned long m_hash_secret_salt; 780 #if XML_GE == 1 781 ACCOUNTING m_accounting; 782 MALLOC_TRACKER m_alloc_tracker; 783 ENTITY_STATS m_entity_stats; 784 #endif 785 XML_Bool m_reenter; 786 }; 787 788 #if XML_GE == 1 789 # define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__)) 790 # define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__)) 791 # define FREE(parser, p) (expat_free((parser), (p), __LINE__)) 792 #else 793 # define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) 794 # define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) 795 # define FREE(parser, p) (parser->m_mem.free_fcn((p))) 796 #endif 797 798 #if XML_GE == 1 799 static void 800 expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff, 801 XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) { 802 // NOTE: This can be +infinity or -nan 803 const float amplification 804 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; 805 fprintf( 806 stderr, 807 "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL( 808 "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n", 809 (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator, 810 absDiff, newTotal, peakTotal, (double)amplification, sourceLine); 811 } 812 813 static bool 814 expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase, 815 int sourceLine) { 816 assert(rootParser != NULL); 817 assert(increase > 0); 818 819 XmlBigCount newTotal = 0; 820 bool tolerable = true; 821 822 // Detect integer overflow 823 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) { 824 tolerable = false; 825 } else { 826 newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase; 827 828 if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) { 829 assert(newTotal > 0); 830 // NOTE: This can be +infinity when dividing by zero but not -nan 831 const float amplification 832 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; 833 if (amplification 834 > rootParser->m_alloc_tracker.maximumAmplificationFactor) { 835 tolerable = false; 836 } 837 } 838 } 839 840 if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) { 841 expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine); 842 } 843 844 return tolerable; 845 } 846 847 # if defined(XML_TESTING) 848 void * 849 # else 850 static void * 851 # endif 852 expat_malloc(XML_Parser parser, size_t size, int sourceLine) { 853 // Detect integer overflow 854 if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) { 855 return NULL; 856 } 857 858 const XML_Parser rootParser = getRootParserOf(parser, NULL); 859 assert(rootParser->m_parentParser == NULL); 860 861 const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size; 862 863 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated 864 < bytesToAllocate) { 865 return NULL; // i.e. signal integer overflow as out-of-memory 866 } 867 868 if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate, 869 sourceLine)) { 870 return NULL; // i.e. signal violation as out-of-memory 871 } 872 873 // Actually allocate 874 void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate); 875 876 if (mallocedPtr == NULL) { 877 return NULL; 878 } 879 880 // Update in-block recorded size 881 *(size_t *)mallocedPtr = size; 882 883 // Update accounting 884 rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate; 885 886 // Report as needed 887 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 888 if (rootParser->m_alloc_tracker.bytesAllocated 889 > rootParser->m_alloc_tracker.peakBytesAllocated) { 890 rootParser->m_alloc_tracker.peakBytesAllocated 891 = rootParser->m_alloc_tracker.bytesAllocated; 892 } 893 expat_heap_stat(rootParser, '+', bytesToAllocate, 894 rootParser->m_alloc_tracker.bytesAllocated, 895 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 896 } 897 898 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; 899 } 900 901 # if defined(XML_TESTING) 902 void 903 # else 904 static void 905 # endif 906 expat_free(XML_Parser parser, void *ptr, int sourceLine) { 907 assert(parser != NULL); 908 909 if (ptr == NULL) { 910 return; 911 } 912 913 const XML_Parser rootParser = getRootParserOf(parser, NULL); 914 assert(rootParser->m_parentParser == NULL); 915 916 // Extract size (to the eyes of malloc_fcn/realloc_fcn) and 917 // the original pointer returned by malloc/realloc 918 void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); 919 const size_t bytesAllocated 920 = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr; 921 922 // Update accounting 923 assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated); 924 rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated; 925 926 // Report as needed 927 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 928 expat_heap_stat(rootParser, '-', bytesAllocated, 929 rootParser->m_alloc_tracker.bytesAllocated, 930 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 931 } 932 933 // NOTE: This may be freeing rootParser, so freeing has to come last 934 parser->m_mem.free_fcn(mallocedPtr); 935 } 936 937 # if defined(XML_TESTING) 938 void * 939 # else 940 static void * 941 # endif 942 expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) { 943 assert(parser != NULL); 944 945 if (ptr == NULL) { 946 return expat_malloc(parser, size, sourceLine); 947 } 948 949 if (size == 0) { 950 expat_free(parser, ptr, sourceLine); 951 return NULL; 952 } 953 954 const XML_Parser rootParser = getRootParserOf(parser, NULL); 955 assert(rootParser->m_parentParser == NULL); 956 957 // Extract original size (to the eyes of the caller) and the original 958 // pointer returned by malloc/realloc 959 void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); 960 const size_t prevSize = *(size_t *)mallocedPtr; 961 962 // Classify upcoming change 963 const bool isIncrease = (size > prevSize); 964 const size_t absDiff 965 = (size > prevSize) ? (size - prevSize) : (prevSize - size); 966 967 // Ask for permission from accounting 968 if (isIncrease) { 969 if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) { 970 return NULL; // i.e. signal violation as out-of-memory 971 } 972 } 973 974 // NOTE: Integer overflow detection has already been done for us 975 // by expat_heap_increase_tolerable(..) above 976 assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size); 977 978 // Actually allocate 979 mallocedPtr = parser->m_mem.realloc_fcn( 980 mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size); 981 982 if (mallocedPtr == NULL) { 983 return NULL; 984 } 985 986 // Update accounting 987 if (isIncrease) { 988 assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated 989 >= absDiff); 990 rootParser->m_alloc_tracker.bytesAllocated += absDiff; 991 } else { // i.e. decrease 992 assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff); 993 rootParser->m_alloc_tracker.bytesAllocated -= absDiff; 994 } 995 996 // Report as needed 997 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 998 if (rootParser->m_alloc_tracker.bytesAllocated 999 > rootParser->m_alloc_tracker.peakBytesAllocated) { 1000 rootParser->m_alloc_tracker.peakBytesAllocated 1001 = rootParser->m_alloc_tracker.bytesAllocated; 1002 } 1003 expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff, 1004 rootParser->m_alloc_tracker.bytesAllocated, 1005 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 1006 } 1007 1008 // Update in-block recorded size 1009 *(size_t *)mallocedPtr = size; 1010 1011 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; 1012 } 1013 #endif // XML_GE == 1 1014 1015 XML_Parser XMLCALL 1016 XML_ParserCreate(const XML_Char *encodingName) { 1017 return XML_ParserCreate_MM(encodingName, NULL, NULL); 1018 } 1019 1020 XML_Parser XMLCALL 1021 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { 1022 XML_Char tmp[2] = {nsSep, 0}; 1023 return XML_ParserCreate_MM(encodingName, NULL, tmp); 1024 } 1025 1026 // "xml=http://www.w3.org/XML/1998/namespace" 1027 static const XML_Char implicitContext[] 1028 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, 1029 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 1030 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, 1031 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, 1032 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, 1033 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, 1034 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, 1035 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, 1036 '\0'}; 1037 1038 /* To avoid warnings about unused functions: */ 1039 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 1040 1041 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 1042 1043 /* Obtain entropy on Linux 3.17+ */ 1044 static int 1045 writeRandomBytes_getrandom_nonblock(void *target, size_t count) { 1046 int success = 0; /* full count bytes written? */ 1047 size_t bytesWrittenTotal = 0; 1048 const unsigned int getrandomFlags = GRND_NONBLOCK; 1049 1050 do { 1051 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 1052 const size_t bytesToWrite = count - bytesWrittenTotal; 1053 1054 assert(bytesToWrite <= INT_MAX); 1055 1056 const int bytesWrittenMore = 1057 # if defined(HAVE_GETRANDOM) 1058 (int)getrandom(currentTarget, bytesToWrite, getrandomFlags); 1059 # else 1060 (int)syscall(SYS_getrandom, currentTarget, bytesToWrite, 1061 getrandomFlags); 1062 # endif 1063 1064 if (bytesWrittenMore > 0) { 1065 bytesWrittenTotal += bytesWrittenMore; 1066 if (bytesWrittenTotal >= count) 1067 success = 1; 1068 } 1069 } while (! success && (errno == EINTR)); 1070 1071 return success; 1072 } 1073 1074 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 1075 1076 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 1077 1078 /* Extract entropy from /dev/urandom */ 1079 static int 1080 writeRandomBytes_dev_urandom(void *target, size_t count) { 1081 int success = 0; /* full count bytes written? */ 1082 size_t bytesWrittenTotal = 0; 1083 1084 const int fd = open("/dev/urandom", O_RDONLY); 1085 if (fd < 0) { 1086 return 0; 1087 } 1088 1089 do { 1090 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 1091 const size_t bytesToWrite = count - bytesWrittenTotal; 1092 1093 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); 1094 1095 if (bytesWrittenMore > 0) { 1096 bytesWrittenTotal += bytesWrittenMore; 1097 if (bytesWrittenTotal >= count) 1098 success = 1; 1099 } 1100 } while (! success && (errno == EINTR)); 1101 1102 close(fd); 1103 return success; 1104 } 1105 1106 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 1107 1108 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 1109 1110 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) 1111 1112 static void 1113 writeRandomBytes_arc4random(void *target, size_t count) { 1114 size_t bytesWrittenTotal = 0; 1115 1116 while (bytesWrittenTotal < count) { 1117 const uint32_t random32 = arc4random(); 1118 size_t i = 0; 1119 1120 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 1121 i++, bytesWrittenTotal++) { 1122 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 1123 ((uint8_t *)target)[bytesWrittenTotal] = random8; 1124 } 1125 } 1126 } 1127 1128 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ 1129 1130 #ifdef _WIN32 1131 1132 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), 1133 as it didn't declare it in its header prior to version 5.3.0 of its 1134 runtime package (mingwrt, containing stdlib.h). The upstream fix 1135 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ 1136 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ 1137 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) 1138 __declspec(dllimport) int rand_s(unsigned int *); 1139 # endif 1140 1141 /* Obtain entropy on Windows using the rand_s() function which 1142 * generates cryptographically secure random numbers. Internally it 1143 * uses RtlGenRandom API which is present in Windows XP and later. 1144 */ 1145 static int 1146 writeRandomBytes_rand_s(void *target, size_t count) { 1147 size_t bytesWrittenTotal = 0; 1148 1149 while (bytesWrittenTotal < count) { 1150 unsigned int random32 = 0; 1151 size_t i = 0; 1152 1153 if (rand_s(&random32)) 1154 return 0; /* failure */ 1155 1156 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 1157 i++, bytesWrittenTotal++) { 1158 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 1159 ((uint8_t *)target)[bytesWrittenTotal] = random8; 1160 } 1161 } 1162 return 1; /* success */ 1163 } 1164 1165 #endif /* _WIN32 */ 1166 1167 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 1168 1169 static unsigned long 1170 gather_time_entropy(void) { 1171 # ifdef _WIN32 1172 FILETIME ft; 1173 GetSystemTimeAsFileTime(&ft); /* never fails */ 1174 return ft.dwHighDateTime ^ ft.dwLowDateTime; 1175 # else 1176 struct timeval tv; 1177 int gettimeofday_res; 1178 1179 gettimeofday_res = gettimeofday(&tv, NULL); 1180 1181 # if defined(NDEBUG) 1182 (void)gettimeofday_res; 1183 # else 1184 assert(gettimeofday_res == 0); 1185 # endif /* defined(NDEBUG) */ 1186 1187 /* Microseconds time is <20 bits entropy */ 1188 return tv.tv_usec; 1189 # endif 1190 } 1191 1192 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 1193 1194 static unsigned long 1195 ENTROPY_DEBUG(const char *label, unsigned long entropy) { 1196 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { 1197 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, 1198 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); 1199 } 1200 return entropy; 1201 } 1202 1203 static unsigned long 1204 generate_hash_secret_salt(XML_Parser parser) { 1205 unsigned long entropy; 1206 (void)parser; 1207 1208 /* "Failproof" high quality providers: */ 1209 #if defined(HAVE_ARC4RANDOM_BUF) 1210 arc4random_buf(&entropy, sizeof(entropy)); 1211 return ENTROPY_DEBUG("arc4random_buf", entropy); 1212 #elif defined(HAVE_ARC4RANDOM) 1213 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); 1214 return ENTROPY_DEBUG("arc4random", entropy); 1215 #else 1216 /* Try high quality providers first .. */ 1217 # ifdef _WIN32 1218 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { 1219 return ENTROPY_DEBUG("rand_s", entropy); 1220 } 1221 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 1222 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { 1223 return ENTROPY_DEBUG("getrandom", entropy); 1224 } 1225 # endif 1226 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 1227 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { 1228 return ENTROPY_DEBUG("/dev/urandom", entropy); 1229 } 1230 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 1231 /* .. and self-made low quality for backup: */ 1232 1233 /* Process ID is 0 bits entropy if attacker has local access */ 1234 entropy = gather_time_entropy() ^ getpid(); 1235 1236 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ 1237 if (sizeof(unsigned long) == 4) { 1238 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); 1239 } else { 1240 return ENTROPY_DEBUG("fallback(8)", 1241 entropy * (unsigned long)2305843009213693951ULL); 1242 } 1243 #endif 1244 } 1245 1246 static unsigned long 1247 get_hash_secret_salt(XML_Parser parser) { 1248 const XML_Parser rootParser = getRootParserOf(parser, NULL); 1249 assert(! rootParser->m_parentParser); 1250 1251 return rootParser->m_hash_secret_salt; 1252 } 1253 1254 static enum XML_Error 1255 callProcessor(XML_Parser parser, const char *start, const char *end, 1256 const char **endPtr) { 1257 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); 1258 1259 if (parser->m_reparseDeferralEnabled 1260 && ! parser->m_parsingStatus.finalBuffer) { 1261 // Heuristic: don't try to parse a partial token again until the amount of 1262 // available data has increased significantly. 1263 const size_t had_before = parser->m_partialTokenBytesBefore; 1264 // ...but *do* try anyway if we're close to causing a reallocation. 1265 size_t available_buffer 1266 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 1267 #if XML_CONTEXT_BYTES > 0 1268 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); 1269 #endif 1270 available_buffer 1271 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); 1272 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok 1273 const bool enough 1274 = (have_now >= 2 * had_before) 1275 || ((size_t)parser->m_lastBufferRequestSize > available_buffer); 1276 1277 if (! enough) { 1278 *endPtr = start; // callers may expect this to be set 1279 return XML_ERROR_NONE; 1280 } 1281 } 1282 #if defined(XML_TESTING) 1283 g_bytesScanned += (unsigned)have_now; 1284 #endif 1285 // Run in a loop to eliminate dangerous recursion depths 1286 enum XML_Error ret; 1287 *endPtr = start; 1288 while (1) { 1289 // Use endPtr as the new start in each iteration, since it will 1290 // be set to the next start point by m_processor. 1291 ret = parser->m_processor(parser, *endPtr, end, endPtr); 1292 1293 // Make parsing status (and in particular XML_SUSPENDED) take 1294 // precedence over re-enter flag when they disagree 1295 if (parser->m_parsingStatus.parsing != XML_PARSING) { 1296 parser->m_reenter = XML_FALSE; 1297 } 1298 1299 if (! parser->m_reenter) { 1300 break; 1301 } 1302 1303 parser->m_reenter = XML_FALSE; 1304 if (ret != XML_ERROR_NONE) 1305 return ret; 1306 } 1307 1308 if (ret == XML_ERROR_NONE) { 1309 // if we consumed nothing, remember what we had on this parse attempt. 1310 if (*endPtr == start) { 1311 parser->m_partialTokenBytesBefore = have_now; 1312 } else { 1313 parser->m_partialTokenBytesBefore = 0; 1314 } 1315 } 1316 return ret; 1317 } 1318 1319 static XML_Bool /* only valid for root parser */ 1320 startParsing(XML_Parser parser) { 1321 /* hash functions must be initialized before setContext() is called */ 1322 if (parser->m_hash_secret_salt == 0) 1323 parser->m_hash_secret_salt = generate_hash_secret_salt(parser); 1324 if (parser->m_ns) { 1325 /* implicit context only set for root parser, since child 1326 parsers (i.e. external entity parsers) will inherit it 1327 */ 1328 return setContext(parser, implicitContext); 1329 } 1330 return XML_TRUE; 1331 } 1332 1333 XML_Parser XMLCALL 1334 XML_ParserCreate_MM(const XML_Char *encodingName, 1335 const XML_Memory_Handling_Suite *memsuite, 1336 const XML_Char *nameSep) { 1337 return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); 1338 } 1339 1340 static XML_Parser 1341 parserCreate(const XML_Char *encodingName, 1342 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, 1343 DTD *dtd, XML_Parser parentParser) { 1344 XML_Parser parser = NULL; 1345 1346 #if XML_GE == 1 1347 const size_t increase 1348 = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct); 1349 1350 if (parentParser != NULL) { 1351 const XML_Parser rootParser = getRootParserOf(parentParser, NULL); 1352 if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) { 1353 return NULL; 1354 } 1355 } 1356 #else 1357 UNUSED_P(parentParser); 1358 #endif 1359 1360 if (memsuite) { 1361 XML_Memory_Handling_Suite *mtemp; 1362 #if XML_GE == 1 1363 void *const sizeAndParser 1364 = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING 1365 + sizeof(struct XML_ParserStruct)); 1366 if (sizeAndParser != NULL) { 1367 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); 1368 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) 1369 + EXPAT_MALLOC_PADDING); 1370 #else 1371 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); 1372 if (parser != NULL) { 1373 #endif 1374 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1375 mtemp->malloc_fcn = memsuite->malloc_fcn; 1376 mtemp->realloc_fcn = memsuite->realloc_fcn; 1377 mtemp->free_fcn = memsuite->free_fcn; 1378 } 1379 } else { 1380 XML_Memory_Handling_Suite *mtemp; 1381 #if XML_GE == 1 1382 void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING 1383 + sizeof(struct XML_ParserStruct)); 1384 if (sizeAndParser != NULL) { 1385 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); 1386 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) 1387 + EXPAT_MALLOC_PADDING); 1388 #else 1389 parser = malloc(sizeof(struct XML_ParserStruct)); 1390 if (parser != NULL) { 1391 #endif 1392 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1393 mtemp->malloc_fcn = malloc; 1394 mtemp->realloc_fcn = realloc; 1395 mtemp->free_fcn = free; 1396 } 1397 } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0 1398 1399 if (! parser) 1400 return parser; 1401 1402 #if XML_GE == 1 1403 // Initialize .m_alloc_tracker 1404 memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER)); 1405 if (parentParser == NULL) { 1406 parser->m_alloc_tracker.debugLevel 1407 = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u); 1408 parser->m_alloc_tracker.maximumAmplificationFactor 1409 = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT; 1410 parser->m_alloc_tracker.activationThresholdBytes 1411 = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT; 1412 1413 // NOTE: This initialization needs to come this early because these fields 1414 // are read by allocation tracking code 1415 parser->m_parentParser = NULL; 1416 parser->m_accounting.countBytesDirect = 0; 1417 } else { 1418 parser->m_parentParser = parentParser; 1419 } 1420 1421 // Record XML_ParserStruct allocation we did a few lines up before 1422 const XML_Parser rootParser = getRootParserOf(parser, NULL); 1423 assert(rootParser->m_parentParser == NULL); 1424 assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase); 1425 rootParser->m_alloc_tracker.bytesAllocated += increase; 1426 1427 // Report on allocation 1428 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 1429 if (rootParser->m_alloc_tracker.bytesAllocated 1430 > rootParser->m_alloc_tracker.peakBytesAllocated) { 1431 rootParser->m_alloc_tracker.peakBytesAllocated 1432 = rootParser->m_alloc_tracker.bytesAllocated; 1433 } 1434 1435 expat_heap_stat(rootParser, '+', increase, 1436 rootParser->m_alloc_tracker.bytesAllocated, 1437 rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__); 1438 } 1439 #else 1440 parser->m_parentParser = NULL; 1441 #endif // XML_GE == 1 1442 1443 parser->m_buffer = NULL; 1444 parser->m_bufferLim = NULL; 1445 1446 parser->m_attsSize = INIT_ATTS_SIZE; 1447 parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); 1448 if (parser->m_atts == NULL) { 1449 FREE(parser, parser); 1450 return NULL; 1451 } 1452 #ifdef XML_ATTR_INFO 1453 parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo)); 1454 if (parser->m_attInfo == NULL) { 1455 FREE(parser, parser->m_atts); 1456 FREE(parser, parser); 1457 return NULL; 1458 } 1459 #endif 1460 parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); 1461 if (parser->m_dataBuf == NULL) { 1462 FREE(parser, parser->m_atts); 1463 #ifdef XML_ATTR_INFO 1464 FREE(parser, parser->m_attInfo); 1465 #endif 1466 FREE(parser, parser); 1467 return NULL; 1468 } 1469 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; 1470 1471 if (dtd) 1472 parser->m_dtd = dtd; 1473 else { 1474 parser->m_dtd = dtdCreate(parser); 1475 if (parser->m_dtd == NULL) { 1476 FREE(parser, parser->m_dataBuf); 1477 FREE(parser, parser->m_atts); 1478 #ifdef XML_ATTR_INFO 1479 FREE(parser, parser->m_attInfo); 1480 #endif 1481 FREE(parser, parser); 1482 return NULL; 1483 } 1484 } 1485 1486 parser->m_freeBindingList = NULL; 1487 parser->m_freeTagList = NULL; 1488 parser->m_freeInternalEntities = NULL; 1489 parser->m_freeAttributeEntities = NULL; 1490 parser->m_freeValueEntities = NULL; 1491 1492 parser->m_groupSize = 0; 1493 parser->m_groupConnector = NULL; 1494 1495 parser->m_unknownEncodingHandler = NULL; 1496 parser->m_unknownEncodingHandlerData = NULL; 1497 1498 parser->m_namespaceSeparator = ASCII_EXCL; 1499 parser->m_ns = XML_FALSE; 1500 parser->m_ns_triplets = XML_FALSE; 1501 1502 parser->m_nsAtts = NULL; 1503 parser->m_nsAttsVersion = 0; 1504 parser->m_nsAttsPower = 0; 1505 1506 parser->m_protocolEncodingName = NULL; 1507 1508 poolInit(&parser->m_tempPool, parser); 1509 poolInit(&parser->m_temp2Pool, parser); 1510 parserInit(parser, encodingName); 1511 1512 if (encodingName && ! parser->m_protocolEncodingName) { 1513 if (dtd) { 1514 // We need to stop the upcoming call to XML_ParserFree from happily 1515 // destroying parser->m_dtd because the DTD is shared with the parent 1516 // parser and the only guard that keeps XML_ParserFree from destroying 1517 // parser->m_dtd is parser->m_isParamEntity but it will be set to 1518 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). 1519 parser->m_dtd = NULL; 1520 } 1521 XML_ParserFree(parser); 1522 return NULL; 1523 } 1524 1525 if (nameSep) { 1526 parser->m_ns = XML_TRUE; 1527 parser->m_internalEncoding = XmlGetInternalEncodingNS(); 1528 parser->m_namespaceSeparator = *nameSep; 1529 } else { 1530 parser->m_internalEncoding = XmlGetInternalEncoding(); 1531 } 1532 1533 return parser; 1534 } 1535 1536 static void 1537 parserInit(XML_Parser parser, const XML_Char *encodingName) { 1538 parser->m_processor = prologInitProcessor; 1539 XmlPrologStateInit(&parser->m_prologState); 1540 if (encodingName != NULL) { 1541 parser->m_protocolEncodingName = copyString(encodingName, parser); 1542 } 1543 parser->m_curBase = NULL; 1544 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); 1545 parser->m_userData = NULL; 1546 parser->m_handlerArg = NULL; 1547 parser->m_startElementHandler = NULL; 1548 parser->m_endElementHandler = NULL; 1549 parser->m_characterDataHandler = NULL; 1550 parser->m_processingInstructionHandler = NULL; 1551 parser->m_commentHandler = NULL; 1552 parser->m_startCdataSectionHandler = NULL; 1553 parser->m_endCdataSectionHandler = NULL; 1554 parser->m_defaultHandler = NULL; 1555 parser->m_startDoctypeDeclHandler = NULL; 1556 parser->m_endDoctypeDeclHandler = NULL; 1557 parser->m_unparsedEntityDeclHandler = NULL; 1558 parser->m_notationDeclHandler = NULL; 1559 parser->m_startNamespaceDeclHandler = NULL; 1560 parser->m_endNamespaceDeclHandler = NULL; 1561 parser->m_notStandaloneHandler = NULL; 1562 parser->m_externalEntityRefHandler = NULL; 1563 parser->m_externalEntityRefHandlerArg = parser; 1564 parser->m_skippedEntityHandler = NULL; 1565 parser->m_elementDeclHandler = NULL; 1566 parser->m_attlistDeclHandler = NULL; 1567 parser->m_entityDeclHandler = NULL; 1568 parser->m_xmlDeclHandler = NULL; 1569 parser->m_bufferPtr = parser->m_buffer; 1570 parser->m_bufferEnd = parser->m_buffer; 1571 parser->m_parseEndByteIndex = 0; 1572 parser->m_parseEndPtr = NULL; 1573 parser->m_partialTokenBytesBefore = 0; 1574 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; 1575 parser->m_lastBufferRequestSize = 0; 1576 parser->m_declElementType = NULL; 1577 parser->m_declAttributeId = NULL; 1578 parser->m_declEntity = NULL; 1579 parser->m_doctypeName = NULL; 1580 parser->m_doctypeSysid = NULL; 1581 parser->m_doctypePubid = NULL; 1582 parser->m_declAttributeType = NULL; 1583 parser->m_declNotationName = NULL; 1584 parser->m_declNotationPublicId = NULL; 1585 parser->m_declAttributeIsCdata = XML_FALSE; 1586 parser->m_declAttributeIsId = XML_FALSE; 1587 memset(&parser->m_position, 0, sizeof(POSITION)); 1588 parser->m_errorCode = XML_ERROR_NONE; 1589 parser->m_eventPtr = NULL; 1590 parser->m_eventEndPtr = NULL; 1591 parser->m_positionPtr = NULL; 1592 parser->m_openInternalEntities = NULL; 1593 parser->m_openAttributeEntities = NULL; 1594 parser->m_openValueEntities = NULL; 1595 parser->m_defaultExpandInternalEntities = XML_TRUE; 1596 parser->m_tagLevel = 0; 1597 parser->m_tagStack = NULL; 1598 parser->m_inheritedBindings = NULL; 1599 parser->m_nSpecifiedAtts = 0; 1600 parser->m_unknownEncodingMem = NULL; 1601 parser->m_unknownEncodingRelease = NULL; 1602 parser->m_unknownEncodingData = NULL; 1603 parser->m_parsingStatus.parsing = XML_INITIALIZED; 1604 // Reentry can only be triggered inside m_processor calls 1605 parser->m_reenter = XML_FALSE; 1606 #ifdef XML_DTD 1607 parser->m_isParamEntity = XML_FALSE; 1608 parser->m_useForeignDTD = XML_FALSE; 1609 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 1610 #endif 1611 parser->m_hash_secret_salt = 0; 1612 1613 #if XML_GE == 1 1614 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); 1615 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); 1616 parser->m_accounting.maximumAmplificationFactor 1617 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; 1618 parser->m_accounting.activationThresholdBytes 1619 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; 1620 1621 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); 1622 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); 1623 #endif 1624 } 1625 1626 /* moves list of bindings to m_freeBindingList */ 1627 static void FASTCALL 1628 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { 1629 while (bindings) { 1630 BINDING *b = bindings; 1631 bindings = bindings->nextTagBinding; 1632 b->nextTagBinding = parser->m_freeBindingList; 1633 parser->m_freeBindingList = b; 1634 } 1635 } 1636 1637 XML_Bool XMLCALL 1638 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { 1639 TAG *tStk; 1640 OPEN_INTERNAL_ENTITY *openEntityList; 1641 1642 if (parser == NULL) 1643 return XML_FALSE; 1644 1645 if (parser->m_parentParser) 1646 return XML_FALSE; 1647 /* move m_tagStack to m_freeTagList */ 1648 tStk = parser->m_tagStack; 1649 while (tStk) { 1650 TAG *tag = tStk; 1651 tStk = tStk->parent; 1652 tag->parent = parser->m_freeTagList; 1653 moveToFreeBindingList(parser, tag->bindings); 1654 tag->bindings = NULL; 1655 parser->m_freeTagList = tag; 1656 } 1657 /* move m_openInternalEntities to m_freeInternalEntities */ 1658 openEntityList = parser->m_openInternalEntities; 1659 while (openEntityList) { 1660 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1661 openEntityList = openEntity->next; 1662 openEntity->next = parser->m_freeInternalEntities; 1663 parser->m_freeInternalEntities = openEntity; 1664 } 1665 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but 1666 * for attributes) */ 1667 openEntityList = parser->m_openAttributeEntities; 1668 while (openEntityList) { 1669 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1670 openEntityList = openEntity->next; 1671 openEntity->next = parser->m_freeAttributeEntities; 1672 parser->m_freeAttributeEntities = openEntity; 1673 } 1674 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but 1675 * for value entities) */ 1676 openEntityList = parser->m_openValueEntities; 1677 while (openEntityList) { 1678 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1679 openEntityList = openEntity->next; 1680 openEntity->next = parser->m_freeValueEntities; 1681 parser->m_freeValueEntities = openEntity; 1682 } 1683 moveToFreeBindingList(parser, parser->m_inheritedBindings); 1684 FREE(parser, parser->m_unknownEncodingMem); 1685 if (parser->m_unknownEncodingRelease) 1686 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1687 poolClear(&parser->m_tempPool); 1688 poolClear(&parser->m_temp2Pool); 1689 FREE(parser, (void *)parser->m_protocolEncodingName); 1690 parser->m_protocolEncodingName = NULL; 1691 parserInit(parser, encodingName); 1692 dtdReset(parser->m_dtd, parser); 1693 return XML_TRUE; 1694 } 1695 1696 static XML_Bool 1697 parserBusy(XML_Parser parser) { 1698 switch (parser->m_parsingStatus.parsing) { 1699 case XML_PARSING: 1700 case XML_SUSPENDED: 1701 return XML_TRUE; 1702 case XML_INITIALIZED: 1703 case XML_FINISHED: 1704 default: 1705 return XML_FALSE; 1706 } 1707 } 1708 1709 enum XML_Status XMLCALL 1710 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { 1711 if (parser == NULL) 1712 return XML_STATUS_ERROR; 1713 /* Block after XML_Parse()/XML_ParseBuffer() has been called. 1714 XXX There's no way for the caller to determine which of the 1715 XXX possible error cases caused the XML_STATUS_ERROR return. 1716 */ 1717 if (parserBusy(parser)) 1718 return XML_STATUS_ERROR; 1719 1720 /* Get rid of any previous encoding name */ 1721 FREE(parser, (void *)parser->m_protocolEncodingName); 1722 1723 if (encodingName == NULL) 1724 /* No new encoding name */ 1725 parser->m_protocolEncodingName = NULL; 1726 else { 1727 /* Copy the new encoding name into allocated memory */ 1728 parser->m_protocolEncodingName = copyString(encodingName, parser); 1729 if (! parser->m_protocolEncodingName) 1730 return XML_STATUS_ERROR; 1731 } 1732 return XML_STATUS_OK; 1733 } 1734 1735 XML_Parser XMLCALL 1736 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, 1737 const XML_Char *encodingName) { 1738 XML_Parser parser = oldParser; 1739 DTD *newDtd = NULL; 1740 DTD *oldDtd; 1741 XML_StartElementHandler oldStartElementHandler; 1742 XML_EndElementHandler oldEndElementHandler; 1743 XML_CharacterDataHandler oldCharacterDataHandler; 1744 XML_ProcessingInstructionHandler oldProcessingInstructionHandler; 1745 XML_CommentHandler oldCommentHandler; 1746 XML_StartCdataSectionHandler oldStartCdataSectionHandler; 1747 XML_EndCdataSectionHandler oldEndCdataSectionHandler; 1748 XML_DefaultHandler oldDefaultHandler; 1749 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; 1750 XML_NotationDeclHandler oldNotationDeclHandler; 1751 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; 1752 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; 1753 XML_NotStandaloneHandler oldNotStandaloneHandler; 1754 XML_ExternalEntityRefHandler oldExternalEntityRefHandler; 1755 XML_SkippedEntityHandler oldSkippedEntityHandler; 1756 XML_UnknownEncodingHandler oldUnknownEncodingHandler; 1757 XML_ElementDeclHandler oldElementDeclHandler; 1758 XML_AttlistDeclHandler oldAttlistDeclHandler; 1759 XML_EntityDeclHandler oldEntityDeclHandler; 1760 XML_XmlDeclHandler oldXmlDeclHandler; 1761 ELEMENT_TYPE *oldDeclElementType; 1762 1763 void *oldUserData; 1764 void *oldHandlerArg; 1765 XML_Bool oldDefaultExpandInternalEntities; 1766 XML_Parser oldExternalEntityRefHandlerArg; 1767 #ifdef XML_DTD 1768 enum XML_ParamEntityParsing oldParamEntityParsing; 1769 int oldInEntityValue; 1770 #endif 1771 XML_Bool oldns_triplets; 1772 /* Note that the new parser shares the same hash secret as the old 1773 parser, so that dtdCopy and copyEntityTable can lookup values 1774 from hash tables associated with either parser without us having 1775 to worry which hash secrets each table has. 1776 */ 1777 unsigned long oldhash_secret_salt; 1778 XML_Bool oldReparseDeferralEnabled; 1779 1780 /* Validate the oldParser parameter before we pull everything out of it */ 1781 if (oldParser == NULL) 1782 return NULL; 1783 1784 /* Stash the original parser contents on the stack */ 1785 oldDtd = parser->m_dtd; 1786 oldStartElementHandler = parser->m_startElementHandler; 1787 oldEndElementHandler = parser->m_endElementHandler; 1788 oldCharacterDataHandler = parser->m_characterDataHandler; 1789 oldProcessingInstructionHandler = parser->m_processingInstructionHandler; 1790 oldCommentHandler = parser->m_commentHandler; 1791 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; 1792 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; 1793 oldDefaultHandler = parser->m_defaultHandler; 1794 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; 1795 oldNotationDeclHandler = parser->m_notationDeclHandler; 1796 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; 1797 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; 1798 oldNotStandaloneHandler = parser->m_notStandaloneHandler; 1799 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; 1800 oldSkippedEntityHandler = parser->m_skippedEntityHandler; 1801 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; 1802 oldElementDeclHandler = parser->m_elementDeclHandler; 1803 oldAttlistDeclHandler = parser->m_attlistDeclHandler; 1804 oldEntityDeclHandler = parser->m_entityDeclHandler; 1805 oldXmlDeclHandler = parser->m_xmlDeclHandler; 1806 oldDeclElementType = parser->m_declElementType; 1807 1808 oldUserData = parser->m_userData; 1809 oldHandlerArg = parser->m_handlerArg; 1810 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; 1811 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; 1812 #ifdef XML_DTD 1813 oldParamEntityParsing = parser->m_paramEntityParsing; 1814 oldInEntityValue = parser->m_prologState.inEntityValue; 1815 #endif 1816 oldns_triplets = parser->m_ns_triplets; 1817 /* Note that the new parser shares the same hash secret as the old 1818 parser, so that dtdCopy and copyEntityTable can lookup values 1819 from hash tables associated with either parser without us having 1820 to worry which hash secrets each table has. 1821 */ 1822 oldhash_secret_salt = parser->m_hash_secret_salt; 1823 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; 1824 1825 #ifdef XML_DTD 1826 if (! context) 1827 newDtd = oldDtd; 1828 #endif /* XML_DTD */ 1829 1830 /* Note that the magical uses of the pre-processor to make field 1831 access look more like C++ require that `parser' be overwritten 1832 here. This makes this function more painful to follow than it 1833 would be otherwise. 1834 */ 1835 if (parser->m_ns) { 1836 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; 1837 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); 1838 } else { 1839 parser 1840 = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser); 1841 } 1842 1843 if (! parser) 1844 return NULL; 1845 1846 parser->m_startElementHandler = oldStartElementHandler; 1847 parser->m_endElementHandler = oldEndElementHandler; 1848 parser->m_characterDataHandler = oldCharacterDataHandler; 1849 parser->m_processingInstructionHandler = oldProcessingInstructionHandler; 1850 parser->m_commentHandler = oldCommentHandler; 1851 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; 1852 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; 1853 parser->m_defaultHandler = oldDefaultHandler; 1854 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; 1855 parser->m_notationDeclHandler = oldNotationDeclHandler; 1856 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; 1857 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; 1858 parser->m_notStandaloneHandler = oldNotStandaloneHandler; 1859 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; 1860 parser->m_skippedEntityHandler = oldSkippedEntityHandler; 1861 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; 1862 parser->m_elementDeclHandler = oldElementDeclHandler; 1863 parser->m_attlistDeclHandler = oldAttlistDeclHandler; 1864 parser->m_entityDeclHandler = oldEntityDeclHandler; 1865 parser->m_xmlDeclHandler = oldXmlDeclHandler; 1866 parser->m_declElementType = oldDeclElementType; 1867 parser->m_userData = oldUserData; 1868 if (oldUserData == oldHandlerArg) 1869 parser->m_handlerArg = parser->m_userData; 1870 else 1871 parser->m_handlerArg = parser; 1872 if (oldExternalEntityRefHandlerArg != oldParser) 1873 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; 1874 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; 1875 parser->m_ns_triplets = oldns_triplets; 1876 parser->m_hash_secret_salt = oldhash_secret_salt; 1877 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; 1878 parser->m_parentParser = oldParser; 1879 #ifdef XML_DTD 1880 parser->m_paramEntityParsing = oldParamEntityParsing; 1881 parser->m_prologState.inEntityValue = oldInEntityValue; 1882 if (context) { 1883 #endif /* XML_DTD */ 1884 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser) 1885 || ! setContext(parser, context)) { 1886 XML_ParserFree(parser); 1887 return NULL; 1888 } 1889 parser->m_processor = externalEntityInitProcessor; 1890 #ifdef XML_DTD 1891 } else { 1892 /* The DTD instance referenced by parser->m_dtd is shared between the 1893 document's root parser and external PE parsers, therefore one does not 1894 need to call setContext. In addition, one also *must* not call 1895 setContext, because this would overwrite existing prefix->binding 1896 pointers in parser->m_dtd with ones that get destroyed with the external 1897 PE parser. This would leave those prefixes with dangling pointers. 1898 */ 1899 parser->m_isParamEntity = XML_TRUE; 1900 XmlPrologStateInitExternalEntity(&parser->m_prologState); 1901 parser->m_processor = externalParEntInitProcessor; 1902 } 1903 #endif /* XML_DTD */ 1904 return parser; 1905 } 1906 1907 static void FASTCALL 1908 destroyBindings(BINDING *bindings, XML_Parser parser) { 1909 for (;;) { 1910 BINDING *b = bindings; 1911 if (! b) 1912 break; 1913 bindings = b->nextTagBinding; 1914 FREE(parser, b->uri); 1915 FREE(parser, b); 1916 } 1917 } 1918 1919 void XMLCALL 1920 XML_ParserFree(XML_Parser parser) { 1921 TAG *tagList; 1922 OPEN_INTERNAL_ENTITY *entityList; 1923 if (parser == NULL) 1924 return; 1925 /* free m_tagStack and m_freeTagList */ 1926 tagList = parser->m_tagStack; 1927 for (;;) { 1928 TAG *p; 1929 if (tagList == NULL) { 1930 if (parser->m_freeTagList == NULL) 1931 break; 1932 tagList = parser->m_freeTagList; 1933 parser->m_freeTagList = NULL; 1934 } 1935 p = tagList; 1936 tagList = tagList->parent; 1937 FREE(parser, p->buf); 1938 destroyBindings(p->bindings, parser); 1939 FREE(parser, p); 1940 } 1941 /* free m_openInternalEntities and m_freeInternalEntities */ 1942 entityList = parser->m_openInternalEntities; 1943 for (;;) { 1944 OPEN_INTERNAL_ENTITY *openEntity; 1945 if (entityList == NULL) { 1946 if (parser->m_freeInternalEntities == NULL) 1947 break; 1948 entityList = parser->m_freeInternalEntities; 1949 parser->m_freeInternalEntities = NULL; 1950 } 1951 openEntity = entityList; 1952 entityList = entityList->next; 1953 FREE(parser, openEntity); 1954 } 1955 /* free m_openAttributeEntities and m_freeAttributeEntities */ 1956 entityList = parser->m_openAttributeEntities; 1957 for (;;) { 1958 OPEN_INTERNAL_ENTITY *openEntity; 1959 if (entityList == NULL) { 1960 if (parser->m_freeAttributeEntities == NULL) 1961 break; 1962 entityList = parser->m_freeAttributeEntities; 1963 parser->m_freeAttributeEntities = NULL; 1964 } 1965 openEntity = entityList; 1966 entityList = entityList->next; 1967 FREE(parser, openEntity); 1968 } 1969 /* free m_openValueEntities and m_freeValueEntities */ 1970 entityList = parser->m_openValueEntities; 1971 for (;;) { 1972 OPEN_INTERNAL_ENTITY *openEntity; 1973 if (entityList == NULL) { 1974 if (parser->m_freeValueEntities == NULL) 1975 break; 1976 entityList = parser->m_freeValueEntities; 1977 parser->m_freeValueEntities = NULL; 1978 } 1979 openEntity = entityList; 1980 entityList = entityList->next; 1981 FREE(parser, openEntity); 1982 } 1983 destroyBindings(parser->m_freeBindingList, parser); 1984 destroyBindings(parser->m_inheritedBindings, parser); 1985 poolDestroy(&parser->m_tempPool); 1986 poolDestroy(&parser->m_temp2Pool); 1987 FREE(parser, (void *)parser->m_protocolEncodingName); 1988 #ifdef XML_DTD 1989 /* external parameter entity parsers share the DTD structure 1990 parser->m_dtd with the root parser, so we must not destroy it 1991 */ 1992 if (! parser->m_isParamEntity && parser->m_dtd) 1993 #else 1994 if (parser->m_dtd) 1995 #endif /* XML_DTD */ 1996 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser); 1997 FREE(parser, parser->m_atts); 1998 #ifdef XML_ATTR_INFO 1999 FREE(parser, parser->m_attInfo); 2000 #endif 2001 FREE(parser, parser->m_groupConnector); 2002 // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2003 // is not being allocated with MALLOC(..) but with plain 2004 // .malloc_fcn(..). 2005 parser->m_mem.free_fcn(parser->m_buffer); 2006 FREE(parser, parser->m_dataBuf); 2007 FREE(parser, parser->m_nsAtts); 2008 FREE(parser, parser->m_unknownEncodingMem); 2009 if (parser->m_unknownEncodingRelease) 2010 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 2011 FREE(parser, parser); 2012 } 2013 2014 void XMLCALL 2015 XML_UseParserAsHandlerArg(XML_Parser parser) { 2016 if (parser != NULL) 2017 parser->m_handlerArg = parser; 2018 } 2019 2020 enum XML_Error XMLCALL 2021 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { 2022 if (parser == NULL) 2023 return XML_ERROR_INVALID_ARGUMENT; 2024 #ifdef XML_DTD 2025 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2026 if (parserBusy(parser)) 2027 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; 2028 parser->m_useForeignDTD = useDTD; 2029 return XML_ERROR_NONE; 2030 #else 2031 UNUSED_P(useDTD); 2032 return XML_ERROR_FEATURE_REQUIRES_XML_DTD; 2033 #endif 2034 } 2035 2036 void XMLCALL 2037 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { 2038 if (parser == NULL) 2039 return; 2040 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2041 if (parserBusy(parser)) 2042 return; 2043 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; 2044 } 2045 2046 void XMLCALL 2047 XML_SetUserData(XML_Parser parser, void *p) { 2048 if (parser == NULL) 2049 return; 2050 if (parser->m_handlerArg == parser->m_userData) 2051 parser->m_handlerArg = parser->m_userData = p; 2052 else 2053 parser->m_userData = p; 2054 } 2055 2056 enum XML_Status XMLCALL 2057 XML_SetBase(XML_Parser parser, const XML_Char *p) { 2058 if (parser == NULL) 2059 return XML_STATUS_ERROR; 2060 if (p) { 2061 p = poolCopyString(&parser->m_dtd->pool, p); 2062 if (! p) 2063 return XML_STATUS_ERROR; 2064 parser->m_curBase = p; 2065 } else 2066 parser->m_curBase = NULL; 2067 return XML_STATUS_OK; 2068 } 2069 2070 const XML_Char *XMLCALL 2071 XML_GetBase(XML_Parser parser) { 2072 if (parser == NULL) 2073 return NULL; 2074 return parser->m_curBase; 2075 } 2076 2077 int XMLCALL 2078 XML_GetSpecifiedAttributeCount(XML_Parser parser) { 2079 if (parser == NULL) 2080 return -1; 2081 return parser->m_nSpecifiedAtts; 2082 } 2083 2084 int XMLCALL 2085 XML_GetIdAttributeIndex(XML_Parser parser) { 2086 if (parser == NULL) 2087 return -1; 2088 return parser->m_idAttIndex; 2089 } 2090 2091 #ifdef XML_ATTR_INFO 2092 const XML_AttrInfo *XMLCALL 2093 XML_GetAttributeInfo(XML_Parser parser) { 2094 if (parser == NULL) 2095 return NULL; 2096 return parser->m_attInfo; 2097 } 2098 #endif 2099 2100 void XMLCALL 2101 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, 2102 XML_EndElementHandler end) { 2103 if (parser == NULL) 2104 return; 2105 parser->m_startElementHandler = start; 2106 parser->m_endElementHandler = end; 2107 } 2108 2109 void XMLCALL 2110 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { 2111 if (parser != NULL) 2112 parser->m_startElementHandler = start; 2113 } 2114 2115 void XMLCALL 2116 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { 2117 if (parser != NULL) 2118 parser->m_endElementHandler = end; 2119 } 2120 2121 void XMLCALL 2122 XML_SetCharacterDataHandler(XML_Parser parser, 2123 XML_CharacterDataHandler handler) { 2124 if (parser != NULL) 2125 parser->m_characterDataHandler = handler; 2126 } 2127 2128 void XMLCALL 2129 XML_SetProcessingInstructionHandler(XML_Parser parser, 2130 XML_ProcessingInstructionHandler handler) { 2131 if (parser != NULL) 2132 parser->m_processingInstructionHandler = handler; 2133 } 2134 2135 void XMLCALL 2136 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { 2137 if (parser != NULL) 2138 parser->m_commentHandler = handler; 2139 } 2140 2141 void XMLCALL 2142 XML_SetCdataSectionHandler(XML_Parser parser, 2143 XML_StartCdataSectionHandler start, 2144 XML_EndCdataSectionHandler end) { 2145 if (parser == NULL) 2146 return; 2147 parser->m_startCdataSectionHandler = start; 2148 parser->m_endCdataSectionHandler = end; 2149 } 2150 2151 void XMLCALL 2152 XML_SetStartCdataSectionHandler(XML_Parser parser, 2153 XML_StartCdataSectionHandler start) { 2154 if (parser != NULL) 2155 parser->m_startCdataSectionHandler = start; 2156 } 2157 2158 void XMLCALL 2159 XML_SetEndCdataSectionHandler(XML_Parser parser, 2160 XML_EndCdataSectionHandler end) { 2161 if (parser != NULL) 2162 parser->m_endCdataSectionHandler = end; 2163 } 2164 2165 void XMLCALL 2166 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { 2167 if (parser == NULL) 2168 return; 2169 parser->m_defaultHandler = handler; 2170 parser->m_defaultExpandInternalEntities = XML_FALSE; 2171 } 2172 2173 void XMLCALL 2174 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { 2175 if (parser == NULL) 2176 return; 2177 parser->m_defaultHandler = handler; 2178 parser->m_defaultExpandInternalEntities = XML_TRUE; 2179 } 2180 2181 void XMLCALL 2182 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, 2183 XML_EndDoctypeDeclHandler end) { 2184 if (parser == NULL) 2185 return; 2186 parser->m_startDoctypeDeclHandler = start; 2187 parser->m_endDoctypeDeclHandler = end; 2188 } 2189 2190 void XMLCALL 2191 XML_SetStartDoctypeDeclHandler(XML_Parser parser, 2192 XML_StartDoctypeDeclHandler start) { 2193 if (parser != NULL) 2194 parser->m_startDoctypeDeclHandler = start; 2195 } 2196 2197 void XMLCALL 2198 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { 2199 if (parser != NULL) 2200 parser->m_endDoctypeDeclHandler = end; 2201 } 2202 2203 void XMLCALL 2204 XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 2205 XML_UnparsedEntityDeclHandler handler) { 2206 if (parser != NULL) 2207 parser->m_unparsedEntityDeclHandler = handler; 2208 } 2209 2210 void XMLCALL 2211 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { 2212 if (parser != NULL) 2213 parser->m_notationDeclHandler = handler; 2214 } 2215 2216 void XMLCALL 2217 XML_SetNamespaceDeclHandler(XML_Parser parser, 2218 XML_StartNamespaceDeclHandler start, 2219 XML_EndNamespaceDeclHandler end) { 2220 if (parser == NULL) 2221 return; 2222 parser->m_startNamespaceDeclHandler = start; 2223 parser->m_endNamespaceDeclHandler = end; 2224 } 2225 2226 void XMLCALL 2227 XML_SetStartNamespaceDeclHandler(XML_Parser parser, 2228 XML_StartNamespaceDeclHandler start) { 2229 if (parser != NULL) 2230 parser->m_startNamespaceDeclHandler = start; 2231 } 2232 2233 void XMLCALL 2234 XML_SetEndNamespaceDeclHandler(XML_Parser parser, 2235 XML_EndNamespaceDeclHandler end) { 2236 if (parser != NULL) 2237 parser->m_endNamespaceDeclHandler = end; 2238 } 2239 2240 void XMLCALL 2241 XML_SetNotStandaloneHandler(XML_Parser parser, 2242 XML_NotStandaloneHandler handler) { 2243 if (parser != NULL) 2244 parser->m_notStandaloneHandler = handler; 2245 } 2246 2247 void XMLCALL 2248 XML_SetExternalEntityRefHandler(XML_Parser parser, 2249 XML_ExternalEntityRefHandler handler) { 2250 if (parser != NULL) 2251 parser->m_externalEntityRefHandler = handler; 2252 } 2253 2254 void XMLCALL 2255 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { 2256 if (parser == NULL) 2257 return; 2258 if (arg) 2259 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; 2260 else 2261 parser->m_externalEntityRefHandlerArg = parser; 2262 } 2263 2264 void XMLCALL 2265 XML_SetSkippedEntityHandler(XML_Parser parser, 2266 XML_SkippedEntityHandler handler) { 2267 if (parser != NULL) 2268 parser->m_skippedEntityHandler = handler; 2269 } 2270 2271 void XMLCALL 2272 XML_SetUnknownEncodingHandler(XML_Parser parser, 2273 XML_UnknownEncodingHandler handler, void *data) { 2274 if (parser == NULL) 2275 return; 2276 parser->m_unknownEncodingHandler = handler; 2277 parser->m_unknownEncodingHandlerData = data; 2278 } 2279 2280 void XMLCALL 2281 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { 2282 if (parser != NULL) 2283 parser->m_elementDeclHandler = eldecl; 2284 } 2285 2286 void XMLCALL 2287 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { 2288 if (parser != NULL) 2289 parser->m_attlistDeclHandler = attdecl; 2290 } 2291 2292 void XMLCALL 2293 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { 2294 if (parser != NULL) 2295 parser->m_entityDeclHandler = handler; 2296 } 2297 2298 void XMLCALL 2299 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { 2300 if (parser != NULL) 2301 parser->m_xmlDeclHandler = handler; 2302 } 2303 2304 int XMLCALL 2305 XML_SetParamEntityParsing(XML_Parser parser, 2306 enum XML_ParamEntityParsing peParsing) { 2307 if (parser == NULL) 2308 return 0; 2309 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2310 if (parserBusy(parser)) 2311 return 0; 2312 #ifdef XML_DTD 2313 parser->m_paramEntityParsing = peParsing; 2314 return 1; 2315 #else 2316 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; 2317 #endif 2318 } 2319 2320 int XMLCALL 2321 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { 2322 if (parser == NULL) 2323 return 0; 2324 2325 const XML_Parser rootParser = getRootParserOf(parser, NULL); 2326 assert(! rootParser->m_parentParser); 2327 2328 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2329 if (parserBusy(rootParser)) 2330 return 0; 2331 rootParser->m_hash_secret_salt = hash_salt; 2332 return 1; 2333 } 2334 2335 enum XML_Status XMLCALL 2336 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { 2337 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { 2338 if (parser != NULL) 2339 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2340 return XML_STATUS_ERROR; 2341 } 2342 switch (parser->m_parsingStatus.parsing) { 2343 case XML_SUSPENDED: 2344 parser->m_errorCode = XML_ERROR_SUSPENDED; 2345 return XML_STATUS_ERROR; 2346 case XML_FINISHED: 2347 parser->m_errorCode = XML_ERROR_FINISHED; 2348 return XML_STATUS_ERROR; 2349 case XML_INITIALIZED: 2350 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 2351 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2352 return XML_STATUS_ERROR; 2353 } 2354 /* fall through */ 2355 default: 2356 parser->m_parsingStatus.parsing = XML_PARSING; 2357 } 2358 2359 #if XML_CONTEXT_BYTES == 0 2360 if (parser->m_bufferPtr == parser->m_bufferEnd) { 2361 const char *end; 2362 int nLeftOver; 2363 enum XML_Status result; 2364 /* Detect overflow (a+b > MAX <==> b > MAX-a) */ 2365 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { 2366 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2367 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2368 parser->m_processor = errorProcessor; 2369 return XML_STATUS_ERROR; 2370 } 2371 // though this isn't a buffer request, we assume that `len` is the app's 2372 // preferred buffer fill size, and therefore save it here. 2373 parser->m_lastBufferRequestSize = len; 2374 parser->m_parseEndByteIndex += len; 2375 parser->m_positionPtr = s; 2376 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2377 2378 parser->m_errorCode 2379 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); 2380 2381 if (parser->m_errorCode != XML_ERROR_NONE) { 2382 parser->m_eventEndPtr = parser->m_eventPtr; 2383 parser->m_processor = errorProcessor; 2384 return XML_STATUS_ERROR; 2385 } else { 2386 switch (parser->m_parsingStatus.parsing) { 2387 case XML_SUSPENDED: 2388 result = XML_STATUS_SUSPENDED; 2389 break; 2390 case XML_INITIALIZED: 2391 case XML_PARSING: 2392 if (isFinal) { 2393 parser->m_parsingStatus.parsing = XML_FINISHED; 2394 return XML_STATUS_OK; 2395 } 2396 /* fall through */ 2397 default: 2398 result = XML_STATUS_OK; 2399 } 2400 } 2401 2402 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, 2403 &parser->m_position); 2404 nLeftOver = s + len - end; 2405 if (nLeftOver) { 2406 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED 2407 // (and XML_ERROR_FINISHED) from XML_GetBuffer. 2408 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; 2409 parser->m_parsingStatus.parsing = XML_PARSING; 2410 void *const temp = XML_GetBuffer(parser, nLeftOver); 2411 parser->m_parsingStatus.parsing = originalStatus; 2412 // GetBuffer may have overwritten this, but we want to remember what the 2413 // app requested, not how many bytes were left over after parsing. 2414 parser->m_lastBufferRequestSize = len; 2415 if (temp == NULL) { 2416 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). 2417 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2418 parser->m_processor = errorProcessor; 2419 return XML_STATUS_ERROR; 2420 } 2421 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we 2422 // don't have any data to preserve, and can copy straight into the start 2423 // of the buffer rather than the GetBuffer return pointer (which may be 2424 // pointing further into the allocated buffer). 2425 memcpy(parser->m_buffer, end, nLeftOver); 2426 } 2427 parser->m_bufferPtr = parser->m_buffer; 2428 parser->m_bufferEnd = parser->m_buffer + nLeftOver; 2429 parser->m_positionPtr = parser->m_bufferPtr; 2430 parser->m_parseEndPtr = parser->m_bufferEnd; 2431 parser->m_eventPtr = parser->m_bufferPtr; 2432 parser->m_eventEndPtr = parser->m_bufferPtr; 2433 return result; 2434 } 2435 #endif /* XML_CONTEXT_BYTES == 0 */ 2436 void *buff = XML_GetBuffer(parser, len); 2437 if (buff == NULL) 2438 return XML_STATUS_ERROR; 2439 if (len > 0) { 2440 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above 2441 memcpy(buff, s, len); 2442 } 2443 return XML_ParseBuffer(parser, len, isFinal); 2444 } 2445 2446 enum XML_Status XMLCALL 2447 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { 2448 const char *start; 2449 enum XML_Status result = XML_STATUS_OK; 2450 2451 if (parser == NULL) 2452 return XML_STATUS_ERROR; 2453 2454 if (len < 0) { 2455 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2456 return XML_STATUS_ERROR; 2457 } 2458 2459 switch (parser->m_parsingStatus.parsing) { 2460 case XML_SUSPENDED: 2461 parser->m_errorCode = XML_ERROR_SUSPENDED; 2462 return XML_STATUS_ERROR; 2463 case XML_FINISHED: 2464 parser->m_errorCode = XML_ERROR_FINISHED; 2465 return XML_STATUS_ERROR; 2466 case XML_INITIALIZED: 2467 /* Has someone called XML_GetBuffer successfully before? */ 2468 if (! parser->m_bufferPtr) { 2469 parser->m_errorCode = XML_ERROR_NO_BUFFER; 2470 return XML_STATUS_ERROR; 2471 } 2472 2473 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 2474 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2475 return XML_STATUS_ERROR; 2476 } 2477 /* fall through */ 2478 default: 2479 parser->m_parsingStatus.parsing = XML_PARSING; 2480 } 2481 2482 start = parser->m_bufferPtr; 2483 parser->m_positionPtr = start; 2484 parser->m_bufferEnd += len; 2485 parser->m_parseEndPtr = parser->m_bufferEnd; 2486 parser->m_parseEndByteIndex += len; 2487 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2488 2489 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, 2490 &parser->m_bufferPtr); 2491 2492 if (parser->m_errorCode != XML_ERROR_NONE) { 2493 parser->m_eventEndPtr = parser->m_eventPtr; 2494 parser->m_processor = errorProcessor; 2495 return XML_STATUS_ERROR; 2496 } else { 2497 switch (parser->m_parsingStatus.parsing) { 2498 case XML_SUSPENDED: 2499 result = XML_STATUS_SUSPENDED; 2500 break; 2501 case XML_INITIALIZED: 2502 case XML_PARSING: 2503 if (isFinal) { 2504 parser->m_parsingStatus.parsing = XML_FINISHED; 2505 return result; 2506 } 2507 default:; /* should not happen */ 2508 } 2509 } 2510 2511 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2512 parser->m_bufferPtr, &parser->m_position); 2513 parser->m_positionPtr = parser->m_bufferPtr; 2514 return result; 2515 } 2516 2517 void *XMLCALL 2518 XML_GetBuffer(XML_Parser parser, int len) { 2519 if (parser == NULL) 2520 return NULL; 2521 if (len < 0) { 2522 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2523 return NULL; 2524 } 2525 switch (parser->m_parsingStatus.parsing) { 2526 case XML_SUSPENDED: 2527 parser->m_errorCode = XML_ERROR_SUSPENDED; 2528 return NULL; 2529 case XML_FINISHED: 2530 parser->m_errorCode = XML_ERROR_FINISHED; 2531 return NULL; 2532 default:; 2533 } 2534 2535 // whether or not the request succeeds, `len` seems to be the app's preferred 2536 // buffer fill size; remember it. 2537 parser->m_lastBufferRequestSize = len; 2538 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) 2539 || parser->m_buffer == NULL) { 2540 #if XML_CONTEXT_BYTES > 0 2541 int keep; 2542 #endif /* XML_CONTEXT_BYTES > 0 */ 2543 /* Do not invoke signed arithmetic overflow: */ 2544 int neededSize = (int)((unsigned)len 2545 + (unsigned)EXPAT_SAFE_PTR_DIFF( 2546 parser->m_bufferEnd, parser->m_bufferPtr)); 2547 if (neededSize < 0) { 2548 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2549 return NULL; 2550 } 2551 #if XML_CONTEXT_BYTES > 0 2552 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 2553 if (keep > XML_CONTEXT_BYTES) 2554 keep = XML_CONTEXT_BYTES; 2555 /* Detect and prevent integer overflow */ 2556 if (keep > INT_MAX - neededSize) { 2557 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2558 return NULL; 2559 } 2560 neededSize += keep; 2561 #endif /* XML_CONTEXT_BYTES > 0 */ 2562 if (parser->m_buffer && parser->m_bufferPtr 2563 && neededSize 2564 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { 2565 #if XML_CONTEXT_BYTES > 0 2566 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { 2567 int offset 2568 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) 2569 - keep; 2570 /* The buffer pointers cannot be NULL here; we have at least some bytes 2571 * in the buffer */ 2572 memmove(parser->m_buffer, &parser->m_buffer[offset], 2573 parser->m_bufferEnd - parser->m_bufferPtr + keep); 2574 parser->m_bufferEnd -= offset; 2575 parser->m_bufferPtr -= offset; 2576 } 2577 #else 2578 memmove(parser->m_buffer, parser->m_bufferPtr, 2579 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2580 parser->m_bufferEnd 2581 = parser->m_buffer 2582 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2583 parser->m_bufferPtr = parser->m_buffer; 2584 #endif /* XML_CONTEXT_BYTES > 0 */ 2585 } else { 2586 char *newBuf; 2587 int bufferSize 2588 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); 2589 if (bufferSize == 0) 2590 bufferSize = INIT_BUFFER_SIZE; 2591 do { 2592 /* Do not invoke signed arithmetic overflow: */ 2593 bufferSize = (int)(2U * (unsigned)bufferSize); 2594 } while (bufferSize < neededSize && bufferSize > 0); 2595 if (bufferSize <= 0) { 2596 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2597 return NULL; 2598 } 2599 // NOTE: We are avoiding MALLOC(..) here to leave limiting 2600 // the input size to the application using Expat. 2601 newBuf = parser->m_mem.malloc_fcn(bufferSize); 2602 if (newBuf == 0) { 2603 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2604 return NULL; 2605 } 2606 parser->m_bufferLim = newBuf + bufferSize; 2607 #if XML_CONTEXT_BYTES > 0 2608 if (parser->m_bufferPtr) { 2609 memcpy(newBuf, &parser->m_bufferPtr[-keep], 2610 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2611 + keep); 2612 // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2613 // is not being allocated with MALLOC(..) but with plain 2614 // .malloc_fcn(..). 2615 parser->m_mem.free_fcn(parser->m_buffer); 2616 parser->m_buffer = newBuf; 2617 parser->m_bufferEnd 2618 = parser->m_buffer 2619 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2620 + keep; 2621 parser->m_bufferPtr = parser->m_buffer + keep; 2622 } else { 2623 /* This must be a brand new buffer with no data in it yet */ 2624 parser->m_bufferEnd = newBuf; 2625 parser->m_bufferPtr = parser->m_buffer = newBuf; 2626 } 2627 #else 2628 if (parser->m_bufferPtr) { 2629 memcpy(newBuf, parser->m_bufferPtr, 2630 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2631 // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2632 // is not being allocated with MALLOC(..) but with plain 2633 // .malloc_fcn(..). 2634 parser->m_mem.free_fcn(parser->m_buffer); 2635 parser->m_bufferEnd 2636 = newBuf 2637 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2638 } else { 2639 /* This must be a brand new buffer with no data in it yet */ 2640 parser->m_bufferEnd = newBuf; 2641 } 2642 parser->m_bufferPtr = parser->m_buffer = newBuf; 2643 #endif /* XML_CONTEXT_BYTES > 0 */ 2644 } 2645 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2646 parser->m_positionPtr = NULL; 2647 } 2648 return parser->m_bufferEnd; 2649 } 2650 2651 static void 2652 triggerReenter(XML_Parser parser) { 2653 parser->m_reenter = XML_TRUE; 2654 } 2655 2656 enum XML_Status XMLCALL 2657 XML_StopParser(XML_Parser parser, XML_Bool resumable) { 2658 if (parser == NULL) 2659 return XML_STATUS_ERROR; 2660 switch (parser->m_parsingStatus.parsing) { 2661 case XML_INITIALIZED: 2662 parser->m_errorCode = XML_ERROR_NOT_STARTED; 2663 return XML_STATUS_ERROR; 2664 case XML_SUSPENDED: 2665 if (resumable) { 2666 parser->m_errorCode = XML_ERROR_SUSPENDED; 2667 return XML_STATUS_ERROR; 2668 } 2669 parser->m_parsingStatus.parsing = XML_FINISHED; 2670 break; 2671 case XML_FINISHED: 2672 parser->m_errorCode = XML_ERROR_FINISHED; 2673 return XML_STATUS_ERROR; 2674 case XML_PARSING: 2675 if (resumable) { 2676 #ifdef XML_DTD 2677 if (parser->m_isParamEntity) { 2678 parser->m_errorCode = XML_ERROR_SUSPEND_PE; 2679 return XML_STATUS_ERROR; 2680 } 2681 #endif 2682 parser->m_parsingStatus.parsing = XML_SUSPENDED; 2683 } else 2684 parser->m_parsingStatus.parsing = XML_FINISHED; 2685 break; 2686 default: 2687 assert(0); 2688 } 2689 return XML_STATUS_OK; 2690 } 2691 2692 enum XML_Status XMLCALL 2693 XML_ResumeParser(XML_Parser parser) { 2694 enum XML_Status result = XML_STATUS_OK; 2695 2696 if (parser == NULL) 2697 return XML_STATUS_ERROR; 2698 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { 2699 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; 2700 return XML_STATUS_ERROR; 2701 } 2702 parser->m_parsingStatus.parsing = XML_PARSING; 2703 2704 parser->m_errorCode = callProcessor( 2705 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); 2706 2707 if (parser->m_errorCode != XML_ERROR_NONE) { 2708 parser->m_eventEndPtr = parser->m_eventPtr; 2709 parser->m_processor = errorProcessor; 2710 return XML_STATUS_ERROR; 2711 } else { 2712 switch (parser->m_parsingStatus.parsing) { 2713 case XML_SUSPENDED: 2714 result = XML_STATUS_SUSPENDED; 2715 break; 2716 case XML_INITIALIZED: 2717 case XML_PARSING: 2718 if (parser->m_parsingStatus.finalBuffer) { 2719 parser->m_parsingStatus.parsing = XML_FINISHED; 2720 return result; 2721 } 2722 default:; 2723 } 2724 } 2725 2726 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2727 parser->m_bufferPtr, &parser->m_position); 2728 parser->m_positionPtr = parser->m_bufferPtr; 2729 return result; 2730 } 2731 2732 void XMLCALL 2733 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { 2734 if (parser == NULL) 2735 return; 2736 assert(status != NULL); 2737 *status = parser->m_parsingStatus; 2738 } 2739 2740 enum XML_Error XMLCALL 2741 XML_GetErrorCode(XML_Parser parser) { 2742 if (parser == NULL) 2743 return XML_ERROR_INVALID_ARGUMENT; 2744 return parser->m_errorCode; 2745 } 2746 2747 XML_Index XMLCALL 2748 XML_GetCurrentByteIndex(XML_Parser parser) { 2749 if (parser == NULL) 2750 return -1; 2751 if (parser->m_eventPtr) 2752 return (XML_Index)(parser->m_parseEndByteIndex 2753 - (parser->m_parseEndPtr - parser->m_eventPtr)); 2754 return -1; 2755 } 2756 2757 int XMLCALL 2758 XML_GetCurrentByteCount(XML_Parser parser) { 2759 if (parser == NULL) 2760 return 0; 2761 if (parser->m_eventEndPtr && parser->m_eventPtr) 2762 return (int)(parser->m_eventEndPtr - parser->m_eventPtr); 2763 return 0; 2764 } 2765 2766 const char *XMLCALL 2767 XML_GetInputContext(XML_Parser parser, int *offset, int *size) { 2768 #if XML_CONTEXT_BYTES > 0 2769 if (parser == NULL) 2770 return NULL; 2771 if (parser->m_eventPtr && parser->m_buffer) { 2772 if (offset != NULL) 2773 *offset = (int)(parser->m_eventPtr - parser->m_buffer); 2774 if (size != NULL) 2775 *size = (int)(parser->m_bufferEnd - parser->m_buffer); 2776 return parser->m_buffer; 2777 } 2778 #else 2779 (void)parser; 2780 (void)offset; 2781 (void)size; 2782 #endif /* XML_CONTEXT_BYTES > 0 */ 2783 return (const char *)0; 2784 } 2785 2786 XML_Size XMLCALL 2787 XML_GetCurrentLineNumber(XML_Parser parser) { 2788 if (parser == NULL) 2789 return 0; 2790 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2791 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2792 parser->m_eventPtr, &parser->m_position); 2793 parser->m_positionPtr = parser->m_eventPtr; 2794 } 2795 return parser->m_position.lineNumber + 1; 2796 } 2797 2798 XML_Size XMLCALL 2799 XML_GetCurrentColumnNumber(XML_Parser parser) { 2800 if (parser == NULL) 2801 return 0; 2802 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2803 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2804 parser->m_eventPtr, &parser->m_position); 2805 parser->m_positionPtr = parser->m_eventPtr; 2806 } 2807 return parser->m_position.columnNumber; 2808 } 2809 2810 void XMLCALL 2811 XML_FreeContentModel(XML_Parser parser, XML_Content *model) { 2812 if (parser == NULL) 2813 return; 2814 2815 // NOTE: We are avoiding FREE(..) here because the content model 2816 // has been created using plain .malloc_fcn(..) rather than MALLOC(..). 2817 parser->m_mem.free_fcn(model); 2818 } 2819 2820 void *XMLCALL 2821 XML_MemMalloc(XML_Parser parser, size_t size) { 2822 if (parser == NULL) 2823 return NULL; 2824 2825 // NOTE: We are avoiding MALLOC(..) here to not include 2826 // user allocations with allocation tracking and limiting. 2827 return parser->m_mem.malloc_fcn(size); 2828 } 2829 2830 void *XMLCALL 2831 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { 2832 if (parser == NULL) 2833 return NULL; 2834 2835 // NOTE: We are avoiding REALLOC(..) here to not include 2836 // user allocations with allocation tracking and limiting. 2837 return parser->m_mem.realloc_fcn(ptr, size); 2838 } 2839 2840 void XMLCALL 2841 XML_MemFree(XML_Parser parser, void *ptr) { 2842 if (parser == NULL) 2843 return; 2844 2845 // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and 2846 // XML_MemRealloc are not using MALLOC(..) and REALLOC(..) 2847 // but plain .malloc_fcn(..) and .realloc_fcn(..), internally. 2848 parser->m_mem.free_fcn(ptr); 2849 } 2850 2851 void XMLCALL 2852 XML_DefaultCurrent(XML_Parser parser) { 2853 if (parser == NULL) 2854 return; 2855 if (parser->m_defaultHandler) { 2856 if (parser->m_openInternalEntities) 2857 reportDefault(parser, parser->m_internalEncoding, 2858 parser->m_openInternalEntities->internalEventPtr, 2859 parser->m_openInternalEntities->internalEventEndPtr); 2860 else 2861 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, 2862 parser->m_eventEndPtr); 2863 } 2864 } 2865 2866 const XML_LChar *XMLCALL 2867 XML_ErrorString(enum XML_Error code) { 2868 switch (code) { 2869 case XML_ERROR_NONE: 2870 return NULL; 2871 case XML_ERROR_NO_MEMORY: 2872 return XML_L("out of memory"); 2873 case XML_ERROR_SYNTAX: 2874 return XML_L("syntax error"); 2875 case XML_ERROR_NO_ELEMENTS: 2876 return XML_L("no element found"); 2877 case XML_ERROR_INVALID_TOKEN: 2878 return XML_L("not well-formed (invalid token)"); 2879 case XML_ERROR_UNCLOSED_TOKEN: 2880 return XML_L("unclosed token"); 2881 case XML_ERROR_PARTIAL_CHAR: 2882 return XML_L("partial character"); 2883 case XML_ERROR_TAG_MISMATCH: 2884 return XML_L("mismatched tag"); 2885 case XML_ERROR_DUPLICATE_ATTRIBUTE: 2886 return XML_L("duplicate attribute"); 2887 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: 2888 return XML_L("junk after document element"); 2889 case XML_ERROR_PARAM_ENTITY_REF: 2890 return XML_L("illegal parameter entity reference"); 2891 case XML_ERROR_UNDEFINED_ENTITY: 2892 return XML_L("undefined entity"); 2893 case XML_ERROR_RECURSIVE_ENTITY_REF: 2894 return XML_L("recursive entity reference"); 2895 case XML_ERROR_ASYNC_ENTITY: 2896 return XML_L("asynchronous entity"); 2897 case XML_ERROR_BAD_CHAR_REF: 2898 return XML_L("reference to invalid character number"); 2899 case XML_ERROR_BINARY_ENTITY_REF: 2900 return XML_L("reference to binary entity"); 2901 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: 2902 return XML_L("reference to external entity in attribute"); 2903 case XML_ERROR_MISPLACED_XML_PI: 2904 return XML_L("XML or text declaration not at start of entity"); 2905 case XML_ERROR_UNKNOWN_ENCODING: 2906 return XML_L("unknown encoding"); 2907 case XML_ERROR_INCORRECT_ENCODING: 2908 return XML_L("encoding specified in XML declaration is incorrect"); 2909 case XML_ERROR_UNCLOSED_CDATA_SECTION: 2910 return XML_L("unclosed CDATA section"); 2911 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: 2912 return XML_L("error in processing external entity reference"); 2913 case XML_ERROR_NOT_STANDALONE: 2914 return XML_L("document is not standalone"); 2915 case XML_ERROR_UNEXPECTED_STATE: 2916 return XML_L("unexpected parser state - please send a bug report"); 2917 case XML_ERROR_ENTITY_DECLARED_IN_PE: 2918 return XML_L("entity declared in parameter entity"); 2919 case XML_ERROR_FEATURE_REQUIRES_XML_DTD: 2920 return XML_L("requested feature requires XML_DTD support in Expat"); 2921 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: 2922 return XML_L("cannot change setting once parsing has begun"); 2923 /* Added in 1.95.7. */ 2924 case XML_ERROR_UNBOUND_PREFIX: 2925 return XML_L("unbound prefix"); 2926 /* Added in 1.95.8. */ 2927 case XML_ERROR_UNDECLARING_PREFIX: 2928 return XML_L("must not undeclare prefix"); 2929 case XML_ERROR_INCOMPLETE_PE: 2930 return XML_L("incomplete markup in parameter entity"); 2931 case XML_ERROR_XML_DECL: 2932 return XML_L("XML declaration not well-formed"); 2933 case XML_ERROR_TEXT_DECL: 2934 return XML_L("text declaration not well-formed"); 2935 case XML_ERROR_PUBLICID: 2936 return XML_L("illegal character(s) in public id"); 2937 case XML_ERROR_SUSPENDED: 2938 return XML_L("parser suspended"); 2939 case XML_ERROR_NOT_SUSPENDED: 2940 return XML_L("parser not suspended"); 2941 case XML_ERROR_ABORTED: 2942 return XML_L("parsing aborted"); 2943 case XML_ERROR_FINISHED: 2944 return XML_L("parsing finished"); 2945 case XML_ERROR_SUSPEND_PE: 2946 return XML_L("cannot suspend in external parameter entity"); 2947 /* Added in 2.0.0. */ 2948 case XML_ERROR_RESERVED_PREFIX_XML: 2949 return XML_L( 2950 "reserved prefix (xml) must not be undeclared or bound to another namespace name"); 2951 case XML_ERROR_RESERVED_PREFIX_XMLNS: 2952 return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); 2953 case XML_ERROR_RESERVED_NAMESPACE_URI: 2954 return XML_L( 2955 "prefix must not be bound to one of the reserved namespace names"); 2956 /* Added in 2.2.5. */ 2957 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ 2958 return XML_L("invalid argument"); 2959 /* Added in 2.3.0. */ 2960 case XML_ERROR_NO_BUFFER: 2961 return XML_L( 2962 "a successful prior call to function XML_GetBuffer is required"); 2963 /* Added in 2.4.0. */ 2964 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: 2965 return XML_L( 2966 "limit on input amplification factor (from DTD and entities) breached"); 2967 /* Added in 2.6.4. */ 2968 case XML_ERROR_NOT_STARTED: 2969 return XML_L("parser not started"); 2970 } 2971 return NULL; 2972 } 2973 2974 const XML_LChar *XMLCALL 2975 XML_ExpatVersion(void) { 2976 /* V1 is used to string-ize the version number. However, it would 2977 string-ize the actual version macro *names* unless we get them 2978 substituted before being passed to V1. CPP is defined to expand 2979 a macro, then rescan for more expansions. Thus, we use V2 to expand 2980 the version macros, then CPP will expand the resulting V1() macro 2981 with the correct numerals. */ 2982 /* ### I'm assuming cpp is portable in this respect... */ 2983 2984 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) 2985 #define V2(a, b, c) XML_L("expat_") V1(a, b, c) 2986 2987 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); 2988 2989 #undef V1 2990 #undef V2 2991 } 2992 2993 XML_Expat_Version XMLCALL 2994 XML_ExpatVersionInfo(void) { 2995 XML_Expat_Version version; 2996 2997 version.major = XML_MAJOR_VERSION; 2998 version.minor = XML_MINOR_VERSION; 2999 version.micro = XML_MICRO_VERSION; 3000 3001 return version; 3002 } 3003 3004 const XML_Feature *XMLCALL 3005 XML_GetFeatureList(void) { 3006 static const XML_Feature features[] = { 3007 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), 3008 sizeof(XML_Char)}, 3009 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), 3010 sizeof(XML_LChar)}, 3011 #ifdef XML_UNICODE 3012 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, 3013 #endif 3014 #ifdef XML_UNICODE_WCHAR_T 3015 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, 3016 #endif 3017 #ifdef XML_DTD 3018 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, 3019 #endif 3020 #if XML_CONTEXT_BYTES > 0 3021 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), 3022 XML_CONTEXT_BYTES}, 3023 #endif 3024 #ifdef XML_MIN_SIZE 3025 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, 3026 #endif 3027 #ifdef XML_NS 3028 {XML_FEATURE_NS, XML_L("XML_NS"), 0}, 3029 #endif 3030 #ifdef XML_LARGE_SIZE 3031 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, 3032 #endif 3033 #ifdef XML_ATTR_INFO 3034 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, 3035 #endif 3036 #if XML_GE == 1 3037 /* Added in Expat 2.4.0 for XML_DTD defined and 3038 * added in Expat 2.6.0 for XML_GE == 1. */ 3039 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, 3040 XML_L("XML_BLAP_MAX_AMP"), 3041 (long int) 3042 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, 3043 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, 3044 XML_L("XML_BLAP_ACT_THRES"), 3045 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, 3046 /* Added in Expat 2.6.0. */ 3047 {XML_FEATURE_GE, XML_L("XML_GE"), 0}, 3048 /* Added in Expat 2.7.2. */ 3049 {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, 3050 XML_L("XML_AT_MAX_AMP"), 3051 (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT}, 3052 {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, 3053 XML_L("XML_AT_ACT_THRES"), 3054 (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT}, 3055 #endif 3056 {XML_FEATURE_END, NULL, 0}}; 3057 3058 return features; 3059 } 3060 3061 #if XML_GE == 1 3062 XML_Bool XMLCALL 3063 XML_SetBillionLaughsAttackProtectionMaximumAmplification( 3064 XML_Parser parser, float maximumAmplificationFactor) { 3065 if ((parser == NULL) || (parser->m_parentParser != NULL) 3066 || isnan(maximumAmplificationFactor) 3067 || (maximumAmplificationFactor < 1.0f)) { 3068 return XML_FALSE; 3069 } 3070 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; 3071 return XML_TRUE; 3072 } 3073 3074 XML_Bool XMLCALL 3075 XML_SetBillionLaughsAttackProtectionActivationThreshold( 3076 XML_Parser parser, unsigned long long activationThresholdBytes) { 3077 if ((parser == NULL) || (parser->m_parentParser != NULL)) { 3078 return XML_FALSE; 3079 } 3080 parser->m_accounting.activationThresholdBytes = activationThresholdBytes; 3081 return XML_TRUE; 3082 } 3083 3084 XML_Bool XMLCALL 3085 XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, 3086 float maximumAmplificationFactor) { 3087 if ((parser == NULL) || (parser->m_parentParser != NULL) 3088 || isnan(maximumAmplificationFactor) 3089 || (maximumAmplificationFactor < 1.0f)) { 3090 return XML_FALSE; 3091 } 3092 parser->m_alloc_tracker.maximumAmplificationFactor 3093 = maximumAmplificationFactor; 3094 return XML_TRUE; 3095 } 3096 3097 XML_Bool XMLCALL 3098 XML_SetAllocTrackerActivationThreshold( 3099 XML_Parser parser, unsigned long long activationThresholdBytes) { 3100 if ((parser == NULL) || (parser->m_parentParser != NULL)) { 3101 return XML_FALSE; 3102 } 3103 parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes; 3104 return XML_TRUE; 3105 } 3106 #endif /* XML_GE == 1 */ 3107 3108 XML_Bool XMLCALL 3109 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { 3110 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { 3111 parser->m_reparseDeferralEnabled = enabled; 3112 return XML_TRUE; 3113 } 3114 return XML_FALSE; 3115 } 3116 3117 /* Initially tag->rawName always points into the parse buffer; 3118 for those TAG instances opened while the current parse buffer was 3119 processed, and not yet closed, we need to store tag->rawName in a more 3120 permanent location, since the parse buffer is about to be discarded. 3121 */ 3122 static XML_Bool 3123 storeRawNames(XML_Parser parser) { 3124 TAG *tag = parser->m_tagStack; 3125 while (tag) { 3126 size_t bufSize; 3127 size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); 3128 size_t rawNameLen; 3129 char *rawNameBuf = tag->buf + nameLen; 3130 /* Stop if already stored. Since m_tagStack is a stack, we can stop 3131 at the first entry that has already been copied; everything 3132 below it in the stack is already been accounted for in a 3133 previous call to this function. 3134 */ 3135 if (tag->rawName == rawNameBuf) 3136 break; 3137 /* For reuse purposes we need to ensure that the 3138 size of tag->buf is a multiple of sizeof(XML_Char). 3139 */ 3140 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); 3141 /* Detect and prevent integer overflow. */ 3142 if (rawNameLen > (size_t)INT_MAX - nameLen) 3143 return XML_FALSE; 3144 bufSize = nameLen + rawNameLen; 3145 if (bufSize > (size_t)(tag->bufEnd - tag->buf)) { 3146 char *temp = REALLOC(parser, tag->buf, bufSize); 3147 if (temp == NULL) 3148 return XML_FALSE; 3149 /* if tag->name.str points to tag->buf (only when namespace 3150 processing is off) then we have to update it 3151 */ 3152 if (tag->name.str == (XML_Char *)tag->buf) 3153 tag->name.str = (XML_Char *)temp; 3154 /* if tag->name.localPart is set (when namespace processing is on) 3155 then update it as well, since it will always point into tag->buf 3156 */ 3157 if (tag->name.localPart) 3158 tag->name.localPart 3159 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); 3160 tag->buf = temp; 3161 tag->bufEnd = temp + bufSize; 3162 rawNameBuf = temp + nameLen; 3163 } 3164 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); 3165 tag->rawName = rawNameBuf; 3166 tag = tag->parent; 3167 } 3168 return XML_TRUE; 3169 } 3170 3171 static enum XML_Error PTRCALL 3172 contentProcessor(XML_Parser parser, const char *start, const char *end, 3173 const char **endPtr) { 3174 enum XML_Error result = doContent( 3175 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end, 3176 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, 3177 XML_ACCOUNT_DIRECT); 3178 if (result == XML_ERROR_NONE) { 3179 if (! storeRawNames(parser)) 3180 return XML_ERROR_NO_MEMORY; 3181 } 3182 return result; 3183 } 3184 3185 static enum XML_Error PTRCALL 3186 externalEntityInitProcessor(XML_Parser parser, const char *start, 3187 const char *end, const char **endPtr) { 3188 enum XML_Error result = initializeEncoding(parser); 3189 if (result != XML_ERROR_NONE) 3190 return result; 3191 parser->m_processor = externalEntityInitProcessor2; 3192 return externalEntityInitProcessor2(parser, start, end, endPtr); 3193 } 3194 3195 static enum XML_Error PTRCALL 3196 externalEntityInitProcessor2(XML_Parser parser, const char *start, 3197 const char *end, const char **endPtr) { 3198 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 3199 int tok = XmlContentTok(parser->m_encoding, start, end, &next); 3200 switch (tok) { 3201 case XML_TOK_BOM: 3202 #if XML_GE == 1 3203 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, 3204 XML_ACCOUNT_DIRECT)) { 3205 accountingOnAbort(parser); 3206 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 3207 } 3208 #endif /* XML_GE == 1 */ 3209 3210 /* If we are at the end of the buffer, this would cause the next stage, 3211 i.e. externalEntityInitProcessor3, to pass control directly to 3212 doContent (by detecting XML_TOK_NONE) without processing any xml text 3213 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. 3214 */ 3215 if (next == end && ! parser->m_parsingStatus.finalBuffer) { 3216 *endPtr = next; 3217 return XML_ERROR_NONE; 3218 } 3219 start = next; 3220 break; 3221 case XML_TOK_PARTIAL: 3222 if (! parser->m_parsingStatus.finalBuffer) { 3223 *endPtr = start; 3224 return XML_ERROR_NONE; 3225 } 3226 parser->m_eventPtr = start; 3227 return XML_ERROR_UNCLOSED_TOKEN; 3228 case XML_TOK_PARTIAL_CHAR: 3229 if (! parser->m_parsingStatus.finalBuffer) { 3230 *endPtr = start; 3231 return XML_ERROR_NONE; 3232 } 3233 parser->m_eventPtr = start; 3234 return XML_ERROR_PARTIAL_CHAR; 3235 } 3236 parser->m_processor = externalEntityInitProcessor3; 3237 return externalEntityInitProcessor3(parser, start, end, endPtr); 3238 } 3239 3240 static enum XML_Error PTRCALL 3241 externalEntityInitProcessor3(XML_Parser parser, const char *start, 3242 const char *end, const char **endPtr) { 3243 int tok; 3244 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 3245 parser->m_eventPtr = start; 3246 tok = XmlContentTok(parser->m_encoding, start, end, &next); 3247 /* Note: These bytes are accounted later in: 3248 - processXmlDecl 3249 - externalEntityContentProcessor 3250 */ 3251 parser->m_eventEndPtr = next; 3252 3253 switch (tok) { 3254 case XML_TOK_XML_DECL: { 3255 enum XML_Error result; 3256 result = processXmlDecl(parser, 1, start, next); 3257 if (result != XML_ERROR_NONE) 3258 return result; 3259 switch (parser->m_parsingStatus.parsing) { 3260 case XML_SUSPENDED: 3261 *endPtr = next; 3262 return XML_ERROR_NONE; 3263 case XML_FINISHED: 3264 return XML_ERROR_ABORTED; 3265 case XML_PARSING: 3266 if (parser->m_reenter) { 3267 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 3268 } 3269 /* Fall through */ 3270 default: 3271 start = next; 3272 } 3273 } break; 3274 case XML_TOK_PARTIAL: 3275 if (! parser->m_parsingStatus.finalBuffer) { 3276 *endPtr = start; 3277 return XML_ERROR_NONE; 3278 } 3279 return XML_ERROR_UNCLOSED_TOKEN; 3280 case XML_TOK_PARTIAL_CHAR: 3281 if (! parser->m_parsingStatus.finalBuffer) { 3282 *endPtr = start; 3283 return XML_ERROR_NONE; 3284 } 3285 return XML_ERROR_PARTIAL_CHAR; 3286 } 3287 parser->m_processor = externalEntityContentProcessor; 3288 parser->m_tagLevel = 1; 3289 return externalEntityContentProcessor(parser, start, end, endPtr); 3290 } 3291 3292 static enum XML_Error PTRCALL 3293 externalEntityContentProcessor(XML_Parser parser, const char *start, 3294 const char *end, const char **endPtr) { 3295 enum XML_Error result 3296 = doContent(parser, 1, parser->m_encoding, start, end, endPtr, 3297 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 3298 XML_ACCOUNT_ENTITY_EXPANSION); 3299 if (result == XML_ERROR_NONE) { 3300 if (! storeRawNames(parser)) 3301 return XML_ERROR_NO_MEMORY; 3302 } 3303 return result; 3304 } 3305 3306 static enum XML_Error 3307 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, 3308 const char *s, const char *end, const char **nextPtr, 3309 XML_Bool haveMore, enum XML_Account account) { 3310 /* save one level of indirection */ 3311 DTD *const dtd = parser->m_dtd; 3312 3313 const char **eventPP; 3314 const char **eventEndPP; 3315 if (enc == parser->m_encoding) { 3316 eventPP = &parser->m_eventPtr; 3317 eventEndPP = &parser->m_eventEndPtr; 3318 } else { 3319 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 3320 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 3321 } 3322 *eventPP = s; 3323 3324 for (;;) { 3325 const char *next = s; /* XmlContentTok doesn't always set the last arg */ 3326 int tok = XmlContentTok(enc, s, end, &next); 3327 #if XML_GE == 1 3328 const char *accountAfter 3329 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) 3330 ? (haveMore ? s /* i.e. 0 bytes */ : end) 3331 : next; 3332 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, 3333 account)) { 3334 accountingOnAbort(parser); 3335 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 3336 } 3337 #endif 3338 *eventEndPP = next; 3339 switch (tok) { 3340 case XML_TOK_TRAILING_CR: 3341 if (haveMore) { 3342 *nextPtr = s; 3343 return XML_ERROR_NONE; 3344 } 3345 *eventEndPP = end; 3346 if (parser->m_characterDataHandler) { 3347 XML_Char c = 0xA; 3348 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3349 } else if (parser->m_defaultHandler) 3350 reportDefault(parser, enc, s, end); 3351 /* We are at the end of the final buffer, should we check for 3352 XML_SUSPENDED, XML_FINISHED? 3353 */ 3354 if (startTagLevel == 0) 3355 return XML_ERROR_NO_ELEMENTS; 3356 if (parser->m_tagLevel != startTagLevel) 3357 return XML_ERROR_ASYNC_ENTITY; 3358 *nextPtr = end; 3359 return XML_ERROR_NONE; 3360 case XML_TOK_NONE: 3361 if (haveMore) { 3362 *nextPtr = s; 3363 return XML_ERROR_NONE; 3364 } 3365 if (startTagLevel > 0) { 3366 if (parser->m_tagLevel != startTagLevel) 3367 return XML_ERROR_ASYNC_ENTITY; 3368 *nextPtr = s; 3369 return XML_ERROR_NONE; 3370 } 3371 return XML_ERROR_NO_ELEMENTS; 3372 case XML_TOK_INVALID: 3373 *eventPP = next; 3374 return XML_ERROR_INVALID_TOKEN; 3375 case XML_TOK_PARTIAL: 3376 if (haveMore) { 3377 *nextPtr = s; 3378 return XML_ERROR_NONE; 3379 } 3380 return XML_ERROR_UNCLOSED_TOKEN; 3381 case XML_TOK_PARTIAL_CHAR: 3382 if (haveMore) { 3383 *nextPtr = s; 3384 return XML_ERROR_NONE; 3385 } 3386 return XML_ERROR_PARTIAL_CHAR; 3387 case XML_TOK_ENTITY_REF: { 3388 const XML_Char *name; 3389 ENTITY *entity; 3390 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 3391 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 3392 if (ch) { 3393 #if XML_GE == 1 3394 /* NOTE: We are replacing 4-6 characters original input for 1 character 3395 * so there is no amplification and hence recording without 3396 * protection. */ 3397 accountingDiffTolerated(parser, tok, (char *)&ch, 3398 ((char *)&ch) + sizeof(XML_Char), __LINE__, 3399 XML_ACCOUNT_ENTITY_EXPANSION); 3400 #endif /* XML_GE == 1 */ 3401 if (parser->m_characterDataHandler) 3402 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); 3403 else if (parser->m_defaultHandler) 3404 reportDefault(parser, enc, s, next); 3405 break; 3406 } 3407 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 3408 next - enc->minBytesPerChar); 3409 if (! name) 3410 return XML_ERROR_NO_MEMORY; 3411 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 3412 poolDiscard(&dtd->pool); 3413 /* First, determine if a check for an existing declaration is needed; 3414 if yes, check that the entity exists, and that it is internal, 3415 otherwise call the skipped entity or default handler. 3416 */ 3417 if (! dtd->hasParamEntityRefs || dtd->standalone) { 3418 if (! entity) 3419 return XML_ERROR_UNDEFINED_ENTITY; 3420 else if (! entity->is_internal) 3421 return XML_ERROR_ENTITY_DECLARED_IN_PE; 3422 } else if (! entity) { 3423 if (parser->m_skippedEntityHandler) 3424 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 3425 else if (parser->m_defaultHandler) 3426 reportDefault(parser, enc, s, next); 3427 break; 3428 } 3429 if (entity->open) 3430 return XML_ERROR_RECURSIVE_ENTITY_REF; 3431 if (entity->notation) 3432 return XML_ERROR_BINARY_ENTITY_REF; 3433 if (entity->textPtr) { 3434 enum XML_Error result; 3435 if (! parser->m_defaultExpandInternalEntities) { 3436 if (parser->m_skippedEntityHandler) 3437 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 3438 0); 3439 else if (parser->m_defaultHandler) 3440 reportDefault(parser, enc, s, next); 3441 break; 3442 } 3443 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL); 3444 if (result != XML_ERROR_NONE) 3445 return result; 3446 } else if (parser->m_externalEntityRefHandler) { 3447 const XML_Char *context; 3448 entity->open = XML_TRUE; 3449 context = getContext(parser); 3450 entity->open = XML_FALSE; 3451 if (! context) 3452 return XML_ERROR_NO_MEMORY; 3453 if (! parser->m_externalEntityRefHandler( 3454 parser->m_externalEntityRefHandlerArg, context, entity->base, 3455 entity->systemId, entity->publicId)) 3456 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 3457 poolDiscard(&parser->m_tempPool); 3458 } else if (parser->m_defaultHandler) 3459 reportDefault(parser, enc, s, next); 3460 break; 3461 } 3462 case XML_TOK_START_TAG_NO_ATTS: 3463 /* fall through */ 3464 case XML_TOK_START_TAG_WITH_ATTS: { 3465 TAG *tag; 3466 enum XML_Error result; 3467 XML_Char *toPtr; 3468 if (parser->m_freeTagList) { 3469 tag = parser->m_freeTagList; 3470 parser->m_freeTagList = parser->m_freeTagList->parent; 3471 } else { 3472 tag = MALLOC(parser, sizeof(TAG)); 3473 if (! tag) 3474 return XML_ERROR_NO_MEMORY; 3475 tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE); 3476 if (! tag->buf) { 3477 FREE(parser, tag); 3478 return XML_ERROR_NO_MEMORY; 3479 } 3480 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; 3481 } 3482 tag->bindings = NULL; 3483 tag->parent = parser->m_tagStack; 3484 parser->m_tagStack = tag; 3485 tag->name.localPart = NULL; 3486 tag->name.prefix = NULL; 3487 tag->rawName = s + enc->minBytesPerChar; 3488 tag->rawNameLength = XmlNameLength(enc, tag->rawName); 3489 ++parser->m_tagLevel; 3490 { 3491 const char *rawNameEnd = tag->rawName + tag->rawNameLength; 3492 const char *fromPtr = tag->rawName; 3493 toPtr = (XML_Char *)tag->buf; 3494 for (;;) { 3495 int bufSize; 3496 int convLen; 3497 const enum XML_Convert_Result convert_res 3498 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, 3499 (ICHAR *)tag->bufEnd - 1); 3500 convLen = (int)(toPtr - (XML_Char *)tag->buf); 3501 if ((fromPtr >= rawNameEnd) 3502 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { 3503 tag->name.strLen = convLen; 3504 break; 3505 } 3506 bufSize = (int)(tag->bufEnd - tag->buf) << 1; 3507 { 3508 char *temp = REALLOC(parser, tag->buf, bufSize); 3509 if (temp == NULL) 3510 return XML_ERROR_NO_MEMORY; 3511 tag->buf = temp; 3512 tag->bufEnd = temp + bufSize; 3513 toPtr = (XML_Char *)temp + convLen; 3514 } 3515 } 3516 } 3517 tag->name.str = (XML_Char *)tag->buf; 3518 *toPtr = XML_T('\0'); 3519 result 3520 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); 3521 if (result) 3522 return result; 3523 if (parser->m_startElementHandler) 3524 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, 3525 (const XML_Char **)parser->m_atts); 3526 else if (parser->m_defaultHandler) 3527 reportDefault(parser, enc, s, next); 3528 poolClear(&parser->m_tempPool); 3529 break; 3530 } 3531 case XML_TOK_EMPTY_ELEMENT_NO_ATTS: 3532 /* fall through */ 3533 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { 3534 const char *rawName = s + enc->minBytesPerChar; 3535 enum XML_Error result; 3536 BINDING *bindings = NULL; 3537 XML_Bool noElmHandlers = XML_TRUE; 3538 TAG_NAME name; 3539 name.str = poolStoreString(&parser->m_tempPool, enc, rawName, 3540 rawName + XmlNameLength(enc, rawName)); 3541 if (! name.str) 3542 return XML_ERROR_NO_MEMORY; 3543 poolFinish(&parser->m_tempPool); 3544 result = storeAtts(parser, enc, s, &name, &bindings, 3545 XML_ACCOUNT_NONE /* token spans whole start tag */); 3546 if (result != XML_ERROR_NONE) { 3547 freeBindings(parser, bindings); 3548 return result; 3549 } 3550 poolFinish(&parser->m_tempPool); 3551 if (parser->m_startElementHandler) { 3552 parser->m_startElementHandler(parser->m_handlerArg, name.str, 3553 (const XML_Char **)parser->m_atts); 3554 noElmHandlers = XML_FALSE; 3555 } 3556 if (parser->m_endElementHandler) { 3557 if (parser->m_startElementHandler) 3558 *eventPP = *eventEndPP; 3559 parser->m_endElementHandler(parser->m_handlerArg, name.str); 3560 noElmHandlers = XML_FALSE; 3561 } 3562 if (noElmHandlers && parser->m_defaultHandler) 3563 reportDefault(parser, enc, s, next); 3564 poolClear(&parser->m_tempPool); 3565 freeBindings(parser, bindings); 3566 } 3567 if ((parser->m_tagLevel == 0) 3568 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3569 if (parser->m_parsingStatus.parsing == XML_SUSPENDED 3570 || (parser->m_parsingStatus.parsing == XML_PARSING 3571 && parser->m_reenter)) 3572 parser->m_processor = epilogProcessor; 3573 else 3574 return epilogProcessor(parser, next, end, nextPtr); 3575 } 3576 break; 3577 case XML_TOK_END_TAG: 3578 if (parser->m_tagLevel == startTagLevel) 3579 return XML_ERROR_ASYNC_ENTITY; 3580 else { 3581 int len; 3582 const char *rawName; 3583 TAG *tag = parser->m_tagStack; 3584 rawName = s + enc->minBytesPerChar * 2; 3585 len = XmlNameLength(enc, rawName); 3586 if (len != tag->rawNameLength 3587 || memcmp(tag->rawName, rawName, len) != 0) { 3588 *eventPP = rawName; 3589 return XML_ERROR_TAG_MISMATCH; 3590 } 3591 parser->m_tagStack = tag->parent; 3592 tag->parent = parser->m_freeTagList; 3593 parser->m_freeTagList = tag; 3594 --parser->m_tagLevel; 3595 if (parser->m_endElementHandler) { 3596 const XML_Char *localPart; 3597 const XML_Char *prefix; 3598 XML_Char *uri; 3599 localPart = tag->name.localPart; 3600 if (parser->m_ns && localPart) { 3601 /* localPart and prefix may have been overwritten in 3602 tag->name.str, since this points to the binding->uri 3603 buffer which gets reused; so we have to add them again 3604 */ 3605 uri = (XML_Char *)tag->name.str + tag->name.uriLen; 3606 /* don't need to check for space - already done in storeAtts() */ 3607 while (*localPart) 3608 *uri++ = *localPart++; 3609 prefix = tag->name.prefix; 3610 if (parser->m_ns_triplets && prefix) { 3611 *uri++ = parser->m_namespaceSeparator; 3612 while (*prefix) 3613 *uri++ = *prefix++; 3614 } 3615 *uri = XML_T('\0'); 3616 } 3617 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); 3618 } else if (parser->m_defaultHandler) 3619 reportDefault(parser, enc, s, next); 3620 while (tag->bindings) { 3621 BINDING *b = tag->bindings; 3622 if (parser->m_endNamespaceDeclHandler) 3623 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, 3624 b->prefix->name); 3625 tag->bindings = tag->bindings->nextTagBinding; 3626 b->nextTagBinding = parser->m_freeBindingList; 3627 parser->m_freeBindingList = b; 3628 b->prefix->binding = b->prevPrefixBinding; 3629 } 3630 if ((parser->m_tagLevel == 0) 3631 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3632 if (parser->m_parsingStatus.parsing == XML_SUSPENDED 3633 || (parser->m_parsingStatus.parsing == XML_PARSING 3634 && parser->m_reenter)) 3635 parser->m_processor = epilogProcessor; 3636 else 3637 return epilogProcessor(parser, next, end, nextPtr); 3638 } 3639 } 3640 break; 3641 case XML_TOK_CHAR_REF: { 3642 int n = XmlCharRefNumber(enc, s); 3643 if (n < 0) 3644 return XML_ERROR_BAD_CHAR_REF; 3645 if (parser->m_characterDataHandler) { 3646 XML_Char buf[XML_ENCODE_MAX]; 3647 parser->m_characterDataHandler(parser->m_handlerArg, buf, 3648 XmlEncode(n, (ICHAR *)buf)); 3649 } else if (parser->m_defaultHandler) 3650 reportDefault(parser, enc, s, next); 3651 } break; 3652 case XML_TOK_XML_DECL: 3653 return XML_ERROR_MISPLACED_XML_PI; 3654 case XML_TOK_DATA_NEWLINE: 3655 if (parser->m_characterDataHandler) { 3656 XML_Char c = 0xA; 3657 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3658 } else if (parser->m_defaultHandler) 3659 reportDefault(parser, enc, s, next); 3660 break; 3661 case XML_TOK_CDATA_SECT_OPEN: { 3662 enum XML_Error result; 3663 if (parser->m_startCdataSectionHandler) 3664 parser->m_startCdataSectionHandler(parser->m_handlerArg); 3665 /* BEGIN disabled code */ 3666 /* Suppose you doing a transformation on a document that involves 3667 changing only the character data. You set up a defaultHandler 3668 and a characterDataHandler. The defaultHandler simply copies 3669 characters through. The characterDataHandler does the 3670 transformation and writes the characters out escaping them as 3671 necessary. This case will fail to work if we leave out the 3672 following two lines (because & and < inside CDATA sections will 3673 be incorrectly escaped). 3674 3675 However, now we have a start/endCdataSectionHandler, so it seems 3676 easier to let the user deal with this. 3677 */ 3678 else if ((0) && parser->m_characterDataHandler) 3679 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3680 0); 3681 /* END disabled code */ 3682 else if (parser->m_defaultHandler) 3683 reportDefault(parser, enc, s, next); 3684 result 3685 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); 3686 if (result != XML_ERROR_NONE) 3687 return result; 3688 else if (! next) { 3689 parser->m_processor = cdataSectionProcessor; 3690 return result; 3691 } 3692 } break; 3693 case XML_TOK_TRAILING_RSQB: 3694 if (haveMore) { 3695 *nextPtr = s; 3696 return XML_ERROR_NONE; 3697 } 3698 if (parser->m_characterDataHandler) { 3699 if (MUST_CONVERT(enc, s)) { 3700 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3701 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3702 parser->m_characterDataHandler( 3703 parser->m_handlerArg, parser->m_dataBuf, 3704 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3705 } else 3706 parser->m_characterDataHandler( 3707 parser->m_handlerArg, (const XML_Char *)s, 3708 (int)((const XML_Char *)end - (const XML_Char *)s)); 3709 } else if (parser->m_defaultHandler) 3710 reportDefault(parser, enc, s, end); 3711 /* We are at the end of the final buffer, should we check for 3712 XML_SUSPENDED, XML_FINISHED? 3713 */ 3714 if (startTagLevel == 0) { 3715 *eventPP = end; 3716 return XML_ERROR_NO_ELEMENTS; 3717 } 3718 if (parser->m_tagLevel != startTagLevel) { 3719 *eventPP = end; 3720 return XML_ERROR_ASYNC_ENTITY; 3721 } 3722 *nextPtr = end; 3723 return XML_ERROR_NONE; 3724 case XML_TOK_DATA_CHARS: { 3725 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 3726 if (charDataHandler) { 3727 if (MUST_CONVERT(enc, s)) { 3728 for (;;) { 3729 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3730 const enum XML_Convert_Result convert_res = XmlConvert( 3731 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3732 *eventEndPP = s; 3733 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3734 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3735 if ((convert_res == XML_CONVERT_COMPLETED) 3736 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 3737 break; 3738 *eventPP = s; 3739 } 3740 } else 3741 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 3742 (int)((const XML_Char *)next - (const XML_Char *)s)); 3743 } else if (parser->m_defaultHandler) 3744 reportDefault(parser, enc, s, next); 3745 } break; 3746 case XML_TOK_PI: 3747 if (! reportProcessingInstruction(parser, enc, s, next)) 3748 return XML_ERROR_NO_MEMORY; 3749 break; 3750 case XML_TOK_COMMENT: 3751 if (! reportComment(parser, enc, s, next)) 3752 return XML_ERROR_NO_MEMORY; 3753 break; 3754 default: 3755 /* All of the tokens produced by XmlContentTok() have their own 3756 * explicit cases, so this default is not strictly necessary. 3757 * However it is a useful safety net, so we retain the code and 3758 * simply exclude it from the coverage tests. 3759 * 3760 * LCOV_EXCL_START 3761 */ 3762 if (parser->m_defaultHandler) 3763 reportDefault(parser, enc, s, next); 3764 break; 3765 /* LCOV_EXCL_STOP */ 3766 } 3767 switch (parser->m_parsingStatus.parsing) { 3768 case XML_SUSPENDED: 3769 *eventPP = next; 3770 *nextPtr = next; 3771 return XML_ERROR_NONE; 3772 case XML_FINISHED: 3773 *eventPP = next; 3774 return XML_ERROR_ABORTED; 3775 case XML_PARSING: 3776 if (parser->m_reenter) { 3777 *nextPtr = next; 3778 return XML_ERROR_NONE; 3779 } 3780 /* Fall through */ 3781 default:; 3782 *eventPP = s = next; 3783 } 3784 } 3785 /* not reached */ 3786 } 3787 3788 /* This function does not call free() on the allocated memory, merely 3789 * moving it to the parser's m_freeBindingList where it can be freed or 3790 * reused as appropriate. 3791 */ 3792 static void 3793 freeBindings(XML_Parser parser, BINDING *bindings) { 3794 while (bindings) { 3795 BINDING *b = bindings; 3796 3797 /* m_startNamespaceDeclHandler will have been called for this 3798 * binding in addBindings(), so call the end handler now. 3799 */ 3800 if (parser->m_endNamespaceDeclHandler) 3801 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); 3802 3803 bindings = bindings->nextTagBinding; 3804 b->nextTagBinding = parser->m_freeBindingList; 3805 parser->m_freeBindingList = b; 3806 b->prefix->binding = b->prevPrefixBinding; 3807 } 3808 } 3809 3810 /* Precondition: all arguments must be non-NULL; 3811 Purpose: 3812 - normalize attributes 3813 - check attributes for well-formedness 3814 - generate namespace aware attribute names (URI, prefix) 3815 - build list of attributes for startElementHandler 3816 - default attributes 3817 - process namespace declarations (check and report them) 3818 - generate namespace aware element name (URI, prefix) 3819 */ 3820 static enum XML_Error 3821 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, 3822 TAG_NAME *tagNamePtr, BINDING **bindingsPtr, 3823 enum XML_Account account) { 3824 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 3825 ELEMENT_TYPE *elementType; 3826 int nDefaultAtts; 3827 const XML_Char **appAtts; /* the attribute list for the application */ 3828 int attIndex = 0; 3829 int prefixLen; 3830 int i; 3831 int n; 3832 XML_Char *uri; 3833 int nPrefixes = 0; 3834 BINDING *binding; 3835 const XML_Char *localPart; 3836 3837 /* lookup the element type name */ 3838 elementType 3839 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); 3840 if (! elementType) { 3841 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); 3842 if (! name) 3843 return XML_ERROR_NO_MEMORY; 3844 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 3845 sizeof(ELEMENT_TYPE)); 3846 if (! elementType) 3847 return XML_ERROR_NO_MEMORY; 3848 if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) 3849 return XML_ERROR_NO_MEMORY; 3850 } 3851 nDefaultAtts = elementType->nDefaultAtts; 3852 3853 /* get the attributes from the tokenizer */ 3854 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); 3855 3856 /* Detect and prevent integer overflow */ 3857 if (n > INT_MAX - nDefaultAtts) { 3858 return XML_ERROR_NO_MEMORY; 3859 } 3860 3861 if (n + nDefaultAtts > parser->m_attsSize) { 3862 int oldAttsSize = parser->m_attsSize; 3863 ATTRIBUTE *temp; 3864 #ifdef XML_ATTR_INFO 3865 XML_AttrInfo *temp2; 3866 #endif 3867 3868 /* Detect and prevent integer overflow */ 3869 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) 3870 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { 3871 return XML_ERROR_NO_MEMORY; 3872 } 3873 3874 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; 3875 3876 /* Detect and prevent integer overflow. 3877 * The preprocessor guard addresses the "always false" warning 3878 * from -Wtype-limits on platforms where 3879 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3880 #if UINT_MAX >= SIZE_MAX 3881 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { 3882 parser->m_attsSize = oldAttsSize; 3883 return XML_ERROR_NO_MEMORY; 3884 } 3885 #endif 3886 3887 temp = REALLOC(parser, parser->m_atts, 3888 parser->m_attsSize * sizeof(ATTRIBUTE)); 3889 if (temp == NULL) { 3890 parser->m_attsSize = oldAttsSize; 3891 return XML_ERROR_NO_MEMORY; 3892 } 3893 parser->m_atts = temp; 3894 #ifdef XML_ATTR_INFO 3895 /* Detect and prevent integer overflow. 3896 * The preprocessor guard addresses the "always false" warning 3897 * from -Wtype-limits on platforms where 3898 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3899 # if UINT_MAX >= SIZE_MAX 3900 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { 3901 parser->m_attsSize = oldAttsSize; 3902 return XML_ERROR_NO_MEMORY; 3903 } 3904 # endif 3905 3906 temp2 = REALLOC(parser, parser->m_attInfo, 3907 parser->m_attsSize * sizeof(XML_AttrInfo)); 3908 if (temp2 == NULL) { 3909 parser->m_attsSize = oldAttsSize; 3910 return XML_ERROR_NO_MEMORY; 3911 } 3912 parser->m_attInfo = temp2; 3913 #endif 3914 if (n > oldAttsSize) 3915 XmlGetAttributes(enc, attStr, n, parser->m_atts); 3916 } 3917 3918 appAtts = (const XML_Char **)parser->m_atts; 3919 for (i = 0; i < n; i++) { 3920 ATTRIBUTE *currAtt = &parser->m_atts[i]; 3921 #ifdef XML_ATTR_INFO 3922 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; 3923 #endif 3924 /* add the name and value to the attribute list */ 3925 ATTRIBUTE_ID *attId 3926 = getAttributeId(parser, enc, currAtt->name, 3927 currAtt->name + XmlNameLength(enc, currAtt->name)); 3928 if (! attId) 3929 return XML_ERROR_NO_MEMORY; 3930 #ifdef XML_ATTR_INFO 3931 currAttInfo->nameStart 3932 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); 3933 currAttInfo->nameEnd 3934 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); 3935 currAttInfo->valueStart = parser->m_parseEndByteIndex 3936 - (parser->m_parseEndPtr - currAtt->valuePtr); 3937 currAttInfo->valueEnd = parser->m_parseEndByteIndex 3938 - (parser->m_parseEndPtr - currAtt->valueEnd); 3939 #endif 3940 /* Detect duplicate attributes by their QNames. This does not work when 3941 namespace processing is turned on and different prefixes for the same 3942 namespace are used. For this case we have a check further down. 3943 */ 3944 if ((attId->name)[-1]) { 3945 if (enc == parser->m_encoding) 3946 parser->m_eventPtr = parser->m_atts[i].name; 3947 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3948 } 3949 (attId->name)[-1] = 1; 3950 appAtts[attIndex++] = attId->name; 3951 if (! parser->m_atts[i].normalized) { 3952 enum XML_Error result; 3953 XML_Bool isCdata = XML_TRUE; 3954 3955 /* figure out whether declared as other than CDATA */ 3956 if (attId->maybeTokenized) { 3957 int j; 3958 for (j = 0; j < nDefaultAtts; j++) { 3959 if (attId == elementType->defaultAtts[j].id) { 3960 isCdata = elementType->defaultAtts[j].isCdata; 3961 break; 3962 } 3963 } 3964 } 3965 3966 /* normalize the attribute value */ 3967 result = storeAttributeValue( 3968 parser, enc, isCdata, parser->m_atts[i].valuePtr, 3969 parser->m_atts[i].valueEnd, &parser->m_tempPool, account); 3970 if (result) 3971 return result; 3972 appAtts[attIndex] = poolStart(&parser->m_tempPool); 3973 poolFinish(&parser->m_tempPool); 3974 } else { 3975 /* the value did not need normalizing */ 3976 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, 3977 parser->m_atts[i].valuePtr, 3978 parser->m_atts[i].valueEnd); 3979 if (appAtts[attIndex] == 0) 3980 return XML_ERROR_NO_MEMORY; 3981 poolFinish(&parser->m_tempPool); 3982 } 3983 /* handle prefixed attribute names */ 3984 if (attId->prefix) { 3985 if (attId->xmlns) { 3986 /* deal with namespace declarations here */ 3987 enum XML_Error result = addBinding(parser, attId->prefix, attId, 3988 appAtts[attIndex], bindingsPtr); 3989 if (result) 3990 return result; 3991 --attIndex; 3992 } else { 3993 /* deal with other prefixed names later */ 3994 attIndex++; 3995 nPrefixes++; 3996 (attId->name)[-1] = 2; 3997 } 3998 } else 3999 attIndex++; 4000 } 4001 4002 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ 4003 parser->m_nSpecifiedAtts = attIndex; 4004 if (elementType->idAtt && (elementType->idAtt->name)[-1]) { 4005 for (i = 0; i < attIndex; i += 2) 4006 if (appAtts[i] == elementType->idAtt->name) { 4007 parser->m_idAttIndex = i; 4008 break; 4009 } 4010 } else 4011 parser->m_idAttIndex = -1; 4012 4013 /* do attribute defaulting */ 4014 for (i = 0; i < nDefaultAtts; i++) { 4015 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; 4016 if (! (da->id->name)[-1] && da->value) { 4017 if (da->id->prefix) { 4018 if (da->id->xmlns) { 4019 enum XML_Error result = addBinding(parser, da->id->prefix, da->id, 4020 da->value, bindingsPtr); 4021 if (result) 4022 return result; 4023 } else { 4024 (da->id->name)[-1] = 2; 4025 nPrefixes++; 4026 appAtts[attIndex++] = da->id->name; 4027 appAtts[attIndex++] = da->value; 4028 } 4029 } else { 4030 (da->id->name)[-1] = 1; 4031 appAtts[attIndex++] = da->id->name; 4032 appAtts[attIndex++] = da->value; 4033 } 4034 } 4035 } 4036 appAtts[attIndex] = 0; 4037 4038 /* expand prefixed attribute names, check for duplicates, 4039 and clear flags that say whether attributes were specified */ 4040 i = 0; 4041 if (nPrefixes) { 4042 unsigned int j; /* hash table index */ 4043 unsigned long version = parser->m_nsAttsVersion; 4044 4045 /* Detect and prevent invalid shift */ 4046 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { 4047 return XML_ERROR_NO_MEMORY; 4048 } 4049 4050 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; 4051 unsigned char oldNsAttsPower = parser->m_nsAttsPower; 4052 /* size of hash table must be at least 2 * (# of prefixed attributes) */ 4053 if ((nPrefixes << 1) 4054 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ 4055 NS_ATT *temp; 4056 /* hash table size must also be a power of 2 and >= 8 */ 4057 while (nPrefixes >> parser->m_nsAttsPower++) 4058 ; 4059 if (parser->m_nsAttsPower < 3) 4060 parser->m_nsAttsPower = 3; 4061 4062 /* Detect and prevent invalid shift */ 4063 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { 4064 /* Restore actual size of memory in m_nsAtts */ 4065 parser->m_nsAttsPower = oldNsAttsPower; 4066 return XML_ERROR_NO_MEMORY; 4067 } 4068 4069 nsAttsSize = 1u << parser->m_nsAttsPower; 4070 4071 /* Detect and prevent integer overflow. 4072 * The preprocessor guard addresses the "always false" warning 4073 * from -Wtype-limits on platforms where 4074 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4075 #if UINT_MAX >= SIZE_MAX 4076 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { 4077 /* Restore actual size of memory in m_nsAtts */ 4078 parser->m_nsAttsPower = oldNsAttsPower; 4079 return XML_ERROR_NO_MEMORY; 4080 } 4081 #endif 4082 4083 temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); 4084 if (! temp) { 4085 /* Restore actual size of memory in m_nsAtts */ 4086 parser->m_nsAttsPower = oldNsAttsPower; 4087 return XML_ERROR_NO_MEMORY; 4088 } 4089 parser->m_nsAtts = temp; 4090 version = 0; /* force re-initialization of m_nsAtts hash table */ 4091 } 4092 /* using a version flag saves us from initializing m_nsAtts every time */ 4093 if (! version) { /* initialize version flags when version wraps around */ 4094 version = INIT_ATTS_VERSION; 4095 for (j = nsAttsSize; j != 0;) 4096 parser->m_nsAtts[--j].version = version; 4097 } 4098 parser->m_nsAttsVersion = --version; 4099 4100 /* expand prefixed names and check for duplicates */ 4101 for (; i < attIndex; i += 2) { 4102 const XML_Char *s = appAtts[i]; 4103 if (s[-1] == 2) { /* prefixed */ 4104 ATTRIBUTE_ID *id; 4105 const BINDING *b; 4106 unsigned long uriHash; 4107 struct siphash sip_state; 4108 struct sipkey sip_key; 4109 4110 copy_salt_to_sipkey(parser, &sip_key); 4111 sip24_init(&sip_state, &sip_key); 4112 4113 ((XML_Char *)s)[-1] = 0; /* clear flag */ 4114 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); 4115 if (! id || ! id->prefix) { 4116 /* This code is walking through the appAtts array, dealing 4117 * with (in this case) a prefixed attribute name. To be in 4118 * the array, the attribute must have already been bound, so 4119 * has to have passed through the hash table lookup once 4120 * already. That implies that an entry for it already 4121 * exists, so the lookup above will return a pointer to 4122 * already allocated memory. There is no opportunaity for 4123 * the allocator to fail, so the condition above cannot be 4124 * fulfilled. 4125 * 4126 * Since it is difficult to be certain that the above 4127 * analysis is complete, we retain the test and merely 4128 * remove the code from coverage tests. 4129 */ 4130 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 4131 } 4132 b = id->prefix->binding; 4133 if (! b) 4134 return XML_ERROR_UNBOUND_PREFIX; 4135 4136 for (j = 0; j < (unsigned int)b->uriLen; j++) { 4137 const XML_Char c = b->uri[j]; 4138 if (! poolAppendChar(&parser->m_tempPool, c)) 4139 return XML_ERROR_NO_MEMORY; 4140 } 4141 4142 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); 4143 4144 while (*s++ != XML_T(ASCII_COLON)) 4145 ; 4146 4147 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); 4148 4149 do { /* copies null terminator */ 4150 if (! poolAppendChar(&parser->m_tempPool, *s)) 4151 return XML_ERROR_NO_MEMORY; 4152 } while (*s++); 4153 4154 uriHash = (unsigned long)sip24_final(&sip_state); 4155 4156 { /* Check hash table for duplicate of expanded name (uriName). 4157 Derived from code in lookup(parser, HASH_TABLE *table, ...). 4158 */ 4159 unsigned char step = 0; 4160 unsigned long mask = nsAttsSize - 1; 4161 j = uriHash & mask; /* index into hash table */ 4162 while (parser->m_nsAtts[j].version == version) { 4163 /* for speed we compare stored hash values first */ 4164 if (uriHash == parser->m_nsAtts[j].hash) { 4165 const XML_Char *s1 = poolStart(&parser->m_tempPool); 4166 const XML_Char *s2 = parser->m_nsAtts[j].uriName; 4167 /* s1 is null terminated, but not s2 */ 4168 for (; *s1 == *s2 && *s1 != 0; s1++, s2++) 4169 ; 4170 if (*s1 == 0) 4171 return XML_ERROR_DUPLICATE_ATTRIBUTE; 4172 } 4173 if (! step) 4174 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); 4175 j < step ? (j += nsAttsSize - step) : (j -= step); 4176 } 4177 } 4178 4179 if (parser->m_ns_triplets) { /* append namespace separator and prefix */ 4180 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; 4181 s = b->prefix->name; 4182 do { 4183 if (! poolAppendChar(&parser->m_tempPool, *s)) 4184 return XML_ERROR_NO_MEMORY; 4185 } while (*s++); 4186 } 4187 4188 /* store expanded name in attribute list */ 4189 s = poolStart(&parser->m_tempPool); 4190 poolFinish(&parser->m_tempPool); 4191 appAtts[i] = s; 4192 4193 /* fill empty slot with new version, uriName and hash value */ 4194 parser->m_nsAtts[j].version = version; 4195 parser->m_nsAtts[j].hash = uriHash; 4196 parser->m_nsAtts[j].uriName = s; 4197 4198 if (! --nPrefixes) { 4199 i += 2; 4200 break; 4201 } 4202 } else /* not prefixed */ 4203 ((XML_Char *)s)[-1] = 0; /* clear flag */ 4204 } 4205 } 4206 /* clear flags for the remaining attributes */ 4207 for (; i < attIndex; i += 2) 4208 ((XML_Char *)(appAtts[i]))[-1] = 0; 4209 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) 4210 binding->attId->name[-1] = 0; 4211 4212 if (! parser->m_ns) 4213 return XML_ERROR_NONE; 4214 4215 /* expand the element type name */ 4216 if (elementType->prefix) { 4217 binding = elementType->prefix->binding; 4218 if (! binding) 4219 return XML_ERROR_UNBOUND_PREFIX; 4220 localPart = tagNamePtr->str; 4221 while (*localPart++ != XML_T(ASCII_COLON)) 4222 ; 4223 } else if (dtd->defaultPrefix.binding) { 4224 binding = dtd->defaultPrefix.binding; 4225 localPart = tagNamePtr->str; 4226 } else 4227 return XML_ERROR_NONE; 4228 prefixLen = 0; 4229 if (parser->m_ns_triplets && binding->prefix->name) { 4230 while (binding->prefix->name[prefixLen++]) 4231 ; /* prefixLen includes null terminator */ 4232 } 4233 tagNamePtr->localPart = localPart; 4234 tagNamePtr->uriLen = binding->uriLen; 4235 tagNamePtr->prefix = binding->prefix->name; 4236 tagNamePtr->prefixLen = prefixLen; 4237 for (i = 0; localPart[i++];) 4238 ; /* i includes null terminator */ 4239 4240 /* Detect and prevent integer overflow */ 4241 if (binding->uriLen > INT_MAX - prefixLen 4242 || i > INT_MAX - (binding->uriLen + prefixLen)) { 4243 return XML_ERROR_NO_MEMORY; 4244 } 4245 4246 n = i + binding->uriLen + prefixLen; 4247 if (n > binding->uriAlloc) { 4248 TAG *p; 4249 4250 /* Detect and prevent integer overflow */ 4251 if (n > INT_MAX - EXPAND_SPARE) { 4252 return XML_ERROR_NO_MEMORY; 4253 } 4254 /* Detect and prevent integer overflow. 4255 * The preprocessor guard addresses the "always false" warning 4256 * from -Wtype-limits on platforms where 4257 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4258 #if UINT_MAX >= SIZE_MAX 4259 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4260 return XML_ERROR_NO_MEMORY; 4261 } 4262 #endif 4263 4264 uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); 4265 if (! uri) 4266 return XML_ERROR_NO_MEMORY; 4267 binding->uriAlloc = n + EXPAND_SPARE; 4268 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); 4269 for (p = parser->m_tagStack; p; p = p->parent) 4270 if (p->name.str == binding->uri) 4271 p->name.str = uri; 4272 FREE(parser, binding->uri); 4273 binding->uri = uri; 4274 } 4275 /* if m_namespaceSeparator != '\0' then uri includes it already */ 4276 uri = binding->uri + binding->uriLen; 4277 memcpy(uri, localPart, i * sizeof(XML_Char)); 4278 /* we always have a namespace separator between localPart and prefix */ 4279 if (prefixLen) { 4280 uri += i - 1; 4281 *uri = parser->m_namespaceSeparator; /* replace null terminator */ 4282 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); 4283 } 4284 tagNamePtr->str = binding->uri; 4285 return XML_ERROR_NONE; 4286 } 4287 4288 static XML_Bool 4289 is_rfc3986_uri_char(XML_Char candidate) { 4290 // For the RFC 3986 ANBF grammar see 4291 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A 4292 4293 switch (candidate) { 4294 // From rule "ALPHA" (uppercase half) 4295 case 'A': 4296 case 'B': 4297 case 'C': 4298 case 'D': 4299 case 'E': 4300 case 'F': 4301 case 'G': 4302 case 'H': 4303 case 'I': 4304 case 'J': 4305 case 'K': 4306 case 'L': 4307 case 'M': 4308 case 'N': 4309 case 'O': 4310 case 'P': 4311 case 'Q': 4312 case 'R': 4313 case 'S': 4314 case 'T': 4315 case 'U': 4316 case 'V': 4317 case 'W': 4318 case 'X': 4319 case 'Y': 4320 case 'Z': 4321 4322 // From rule "ALPHA" (lowercase half) 4323 case 'a': 4324 case 'b': 4325 case 'c': 4326 case 'd': 4327 case 'e': 4328 case 'f': 4329 case 'g': 4330 case 'h': 4331 case 'i': 4332 case 'j': 4333 case 'k': 4334 case 'l': 4335 case 'm': 4336 case 'n': 4337 case 'o': 4338 case 'p': 4339 case 'q': 4340 case 'r': 4341 case 's': 4342 case 't': 4343 case 'u': 4344 case 'v': 4345 case 'w': 4346 case 'x': 4347 case 'y': 4348 case 'z': 4349 4350 // From rule "DIGIT" 4351 case '0': 4352 case '1': 4353 case '2': 4354 case '3': 4355 case '4': 4356 case '5': 4357 case '6': 4358 case '7': 4359 case '8': 4360 case '9': 4361 4362 // From rule "pct-encoded" 4363 case '%': 4364 4365 // From rule "unreserved" 4366 case '-': 4367 case '.': 4368 case '_': 4369 case '~': 4370 4371 // From rule "gen-delims" 4372 case ':': 4373 case '/': 4374 case '?': 4375 case '#': 4376 case '[': 4377 case ']': 4378 case '@': 4379 4380 // From rule "sub-delims" 4381 case '!': 4382 case '$': 4383 case '&': 4384 case '\'': 4385 case '(': 4386 case ')': 4387 case '*': 4388 case '+': 4389 case ',': 4390 case ';': 4391 case '=': 4392 return XML_TRUE; 4393 4394 default: 4395 return XML_FALSE; 4396 } 4397 } 4398 4399 /* addBinding() overwrites the value of prefix->binding without checking. 4400 Therefore one must keep track of the old value outside of addBinding(). 4401 */ 4402 static enum XML_Error 4403 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, 4404 const XML_Char *uri, BINDING **bindingsPtr) { 4405 // "http://www.w3.org/XML/1998/namespace" 4406 static const XML_Char xmlNamespace[] 4407 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, 4408 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, 4409 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, 4410 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, 4411 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, 4412 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, 4413 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, 4414 ASCII_e, '\0'}; 4415 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; 4416 // "http://www.w3.org/2000/xmlns/" 4417 static const XML_Char xmlnsNamespace[] 4418 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 4419 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, 4420 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, 4421 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, 4422 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; 4423 static const int xmlnsLen 4424 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; 4425 4426 XML_Bool mustBeXML = XML_FALSE; 4427 XML_Bool isXML = XML_TRUE; 4428 XML_Bool isXMLNS = XML_TRUE; 4429 4430 BINDING *b; 4431 int len; 4432 4433 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ 4434 if (*uri == XML_T('\0') && prefix->name) 4435 return XML_ERROR_UNDECLARING_PREFIX; 4436 4437 if (prefix->name && prefix->name[0] == XML_T(ASCII_x) 4438 && prefix->name[1] == XML_T(ASCII_m) 4439 && prefix->name[2] == XML_T(ASCII_l)) { 4440 /* Not allowed to bind xmlns */ 4441 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) 4442 && prefix->name[5] == XML_T('\0')) 4443 return XML_ERROR_RESERVED_PREFIX_XMLNS; 4444 4445 if (prefix->name[3] == XML_T('\0')) 4446 mustBeXML = XML_TRUE; 4447 } 4448 4449 for (len = 0; uri[len]; len++) { 4450 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) 4451 isXML = XML_FALSE; 4452 4453 if (! mustBeXML && isXMLNS 4454 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) 4455 isXMLNS = XML_FALSE; 4456 4457 // NOTE: While Expat does not validate namespace URIs against RFC 3986 4458 // today (and is not REQUIRED to do so with regard to the XML 1.0 4459 // namespaces specification) we have to at least make sure, that 4460 // the application on top of Expat (that is likely splitting expanded 4461 // element names ("qualified names") of form 4462 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces 4463 // in its element handler code) cannot be confused by an attacker 4464 // putting additional namespace separator characters into namespace 4465 // declarations. That would be ambiguous and not to be expected. 4466 // 4467 // While the HTML API docs of function XML_ParserCreateNS have been 4468 // advising against use of a namespace separator character that can 4469 // appear in a URI for >20 years now, some widespread applications 4470 // are using URI characters (':' (colon) in particular) for a 4471 // namespace separator, in practice. To keep these applications 4472 // functional, we only reject namespaces URIs containing the 4473 // application-chosen namespace separator if the chosen separator 4474 // is a non-URI character with regard to RFC 3986. 4475 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) 4476 && ! is_rfc3986_uri_char(uri[len])) { 4477 return XML_ERROR_SYNTAX; 4478 } 4479 } 4480 isXML = isXML && len == xmlLen; 4481 isXMLNS = isXMLNS && len == xmlnsLen; 4482 4483 if (mustBeXML != isXML) 4484 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML 4485 : XML_ERROR_RESERVED_NAMESPACE_URI; 4486 4487 if (isXMLNS) 4488 return XML_ERROR_RESERVED_NAMESPACE_URI; 4489 4490 if (parser->m_namespaceSeparator) 4491 len++; 4492 if (parser->m_freeBindingList) { 4493 b = parser->m_freeBindingList; 4494 if (len > b->uriAlloc) { 4495 /* Detect and prevent integer overflow */ 4496 if (len > INT_MAX - EXPAND_SPARE) { 4497 return XML_ERROR_NO_MEMORY; 4498 } 4499 4500 /* Detect and prevent integer overflow. 4501 * The preprocessor guard addresses the "always false" warning 4502 * from -Wtype-limits on platforms where 4503 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4504 #if UINT_MAX >= SIZE_MAX 4505 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4506 return XML_ERROR_NO_MEMORY; 4507 } 4508 #endif 4509 4510 XML_Char *temp 4511 = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4512 if (temp == NULL) 4513 return XML_ERROR_NO_MEMORY; 4514 b->uri = temp; 4515 b->uriAlloc = len + EXPAND_SPARE; 4516 } 4517 parser->m_freeBindingList = b->nextTagBinding; 4518 } else { 4519 b = MALLOC(parser, sizeof(BINDING)); 4520 if (! b) 4521 return XML_ERROR_NO_MEMORY; 4522 4523 /* Detect and prevent integer overflow */ 4524 if (len > INT_MAX - EXPAND_SPARE) { 4525 return XML_ERROR_NO_MEMORY; 4526 } 4527 /* Detect and prevent integer overflow. 4528 * The preprocessor guard addresses the "always false" warning 4529 * from -Wtype-limits on platforms where 4530 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4531 #if UINT_MAX >= SIZE_MAX 4532 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4533 return XML_ERROR_NO_MEMORY; 4534 } 4535 #endif 4536 4537 b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4538 if (! b->uri) { 4539 FREE(parser, b); 4540 return XML_ERROR_NO_MEMORY; 4541 } 4542 b->uriAlloc = len + EXPAND_SPARE; 4543 } 4544 b->uriLen = len; 4545 memcpy(b->uri, uri, len * sizeof(XML_Char)); 4546 if (parser->m_namespaceSeparator) 4547 b->uri[len - 1] = parser->m_namespaceSeparator; 4548 b->prefix = prefix; 4549 b->attId = attId; 4550 b->prevPrefixBinding = prefix->binding; 4551 /* NULL binding when default namespace undeclared */ 4552 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) 4553 prefix->binding = NULL; 4554 else 4555 prefix->binding = b; 4556 b->nextTagBinding = *bindingsPtr; 4557 *bindingsPtr = b; 4558 /* if attId == NULL then we are not starting a namespace scope */ 4559 if (attId && parser->m_startNamespaceDeclHandler) 4560 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, 4561 prefix->binding ? uri : 0); 4562 return XML_ERROR_NONE; 4563 } 4564 4565 /* The idea here is to avoid using stack for each CDATA section when 4566 the whole file is parsed with one call. 4567 */ 4568 static enum XML_Error PTRCALL 4569 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, 4570 const char **endPtr) { 4571 enum XML_Error result = doCdataSection( 4572 parser, parser->m_encoding, &start, end, endPtr, 4573 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 4574 if (result != XML_ERROR_NONE) 4575 return result; 4576 if (start) { 4577 if (parser->m_parentParser) { /* we are parsing an external entity */ 4578 parser->m_processor = externalEntityContentProcessor; 4579 return externalEntityContentProcessor(parser, start, end, endPtr); 4580 } else { 4581 parser->m_processor = contentProcessor; 4582 return contentProcessor(parser, start, end, endPtr); 4583 } 4584 } 4585 return result; 4586 } 4587 4588 /* startPtr gets set to non-null if the section is closed, and to null if 4589 the section is not yet closed. 4590 */ 4591 static enum XML_Error 4592 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4593 const char *end, const char **nextPtr, XML_Bool haveMore, 4594 enum XML_Account account) { 4595 const char *s = *startPtr; 4596 const char **eventPP; 4597 const char **eventEndPP; 4598 if (enc == parser->m_encoding) { 4599 eventPP = &parser->m_eventPtr; 4600 *eventPP = s; 4601 eventEndPP = &parser->m_eventEndPtr; 4602 } else { 4603 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4604 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4605 } 4606 *eventPP = s; 4607 *startPtr = NULL; 4608 4609 for (;;) { 4610 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4611 int tok = XmlCdataSectionTok(enc, s, end, &next); 4612 #if XML_GE == 1 4613 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4614 accountingOnAbort(parser); 4615 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4616 } 4617 #else 4618 UNUSED_P(account); 4619 #endif 4620 *eventEndPP = next; 4621 switch (tok) { 4622 case XML_TOK_CDATA_SECT_CLOSE: 4623 if (parser->m_endCdataSectionHandler) 4624 parser->m_endCdataSectionHandler(parser->m_handlerArg); 4625 /* BEGIN disabled code */ 4626 /* see comment under XML_TOK_CDATA_SECT_OPEN */ 4627 else if ((0) && parser->m_characterDataHandler) 4628 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4629 0); 4630 /* END disabled code */ 4631 else if (parser->m_defaultHandler) 4632 reportDefault(parser, enc, s, next); 4633 *startPtr = next; 4634 *nextPtr = next; 4635 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4636 return XML_ERROR_ABORTED; 4637 else 4638 return XML_ERROR_NONE; 4639 case XML_TOK_DATA_NEWLINE: 4640 if (parser->m_characterDataHandler) { 4641 XML_Char c = 0xA; 4642 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 4643 } else if (parser->m_defaultHandler) 4644 reportDefault(parser, enc, s, next); 4645 break; 4646 case XML_TOK_DATA_CHARS: { 4647 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 4648 if (charDataHandler) { 4649 if (MUST_CONVERT(enc, s)) { 4650 for (;;) { 4651 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 4652 const enum XML_Convert_Result convert_res = XmlConvert( 4653 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 4654 *eventEndPP = next; 4655 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4656 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 4657 if ((convert_res == XML_CONVERT_COMPLETED) 4658 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 4659 break; 4660 *eventPP = s; 4661 } 4662 } else 4663 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 4664 (int)((const XML_Char *)next - (const XML_Char *)s)); 4665 } else if (parser->m_defaultHandler) 4666 reportDefault(parser, enc, s, next); 4667 } break; 4668 case XML_TOK_INVALID: 4669 *eventPP = next; 4670 return XML_ERROR_INVALID_TOKEN; 4671 case XML_TOK_PARTIAL_CHAR: 4672 if (haveMore) { 4673 *nextPtr = s; 4674 return XML_ERROR_NONE; 4675 } 4676 return XML_ERROR_PARTIAL_CHAR; 4677 case XML_TOK_PARTIAL: 4678 case XML_TOK_NONE: 4679 if (haveMore) { 4680 *nextPtr = s; 4681 return XML_ERROR_NONE; 4682 } 4683 return XML_ERROR_UNCLOSED_CDATA_SECTION; 4684 default: 4685 /* Every token returned by XmlCdataSectionTok() has its own 4686 * explicit case, so this default case will never be executed. 4687 * We retain it as a safety net and exclude it from the coverage 4688 * statistics. 4689 * 4690 * LCOV_EXCL_START 4691 */ 4692 *eventPP = next; 4693 return XML_ERROR_UNEXPECTED_STATE; 4694 /* LCOV_EXCL_STOP */ 4695 } 4696 4697 switch (parser->m_parsingStatus.parsing) { 4698 case XML_SUSPENDED: 4699 *eventPP = next; 4700 *nextPtr = next; 4701 return XML_ERROR_NONE; 4702 case XML_FINISHED: 4703 *eventPP = next; 4704 return XML_ERROR_ABORTED; 4705 case XML_PARSING: 4706 if (parser->m_reenter) { 4707 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 4708 } 4709 /* Fall through */ 4710 default:; 4711 *eventPP = s = next; 4712 } 4713 } 4714 /* not reached */ 4715 } 4716 4717 #ifdef XML_DTD 4718 4719 /* The idea here is to avoid using stack for each IGNORE section when 4720 the whole file is parsed with one call. 4721 */ 4722 static enum XML_Error PTRCALL 4723 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, 4724 const char **endPtr) { 4725 enum XML_Error result 4726 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, 4727 (XML_Bool)! parser->m_parsingStatus.finalBuffer); 4728 if (result != XML_ERROR_NONE) 4729 return result; 4730 if (start) { 4731 parser->m_processor = prologProcessor; 4732 return prologProcessor(parser, start, end, endPtr); 4733 } 4734 return result; 4735 } 4736 4737 /* startPtr gets set to non-null is the section is closed, and to null 4738 if the section is not yet closed. 4739 */ 4740 static enum XML_Error 4741 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4742 const char *end, const char **nextPtr, XML_Bool haveMore) { 4743 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4744 int tok; 4745 const char *s = *startPtr; 4746 const char **eventPP; 4747 const char **eventEndPP; 4748 if (enc == parser->m_encoding) { 4749 eventPP = &parser->m_eventPtr; 4750 *eventPP = s; 4751 eventEndPP = &parser->m_eventEndPtr; 4752 } else { 4753 /* It's not entirely clear, but it seems the following two lines 4754 * of code cannot be executed. The only occasions on which 'enc' 4755 * is not 'encoding' are when this function is called 4756 * from the internal entity processing, and IGNORE sections are an 4757 * error in internal entities. 4758 * 4759 * Since it really isn't clear that this is true, we keep the code 4760 * and just remove it from our coverage tests. 4761 * 4762 * LCOV_EXCL_START 4763 */ 4764 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4765 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4766 /* LCOV_EXCL_STOP */ 4767 } 4768 *eventPP = s; 4769 *startPtr = NULL; 4770 tok = XmlIgnoreSectionTok(enc, s, end, &next); 4771 # if XML_GE == 1 4772 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4773 XML_ACCOUNT_DIRECT)) { 4774 accountingOnAbort(parser); 4775 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4776 } 4777 # endif 4778 *eventEndPP = next; 4779 switch (tok) { 4780 case XML_TOK_IGNORE_SECT: 4781 if (parser->m_defaultHandler) 4782 reportDefault(parser, enc, s, next); 4783 *startPtr = next; 4784 *nextPtr = next; 4785 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4786 return XML_ERROR_ABORTED; 4787 else 4788 return XML_ERROR_NONE; 4789 case XML_TOK_INVALID: 4790 *eventPP = next; 4791 return XML_ERROR_INVALID_TOKEN; 4792 case XML_TOK_PARTIAL_CHAR: 4793 if (haveMore) { 4794 *nextPtr = s; 4795 return XML_ERROR_NONE; 4796 } 4797 return XML_ERROR_PARTIAL_CHAR; 4798 case XML_TOK_PARTIAL: 4799 case XML_TOK_NONE: 4800 if (haveMore) { 4801 *nextPtr = s; 4802 return XML_ERROR_NONE; 4803 } 4804 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ 4805 default: 4806 /* All of the tokens that XmlIgnoreSectionTok() returns have 4807 * explicit cases to handle them, so this default case is never 4808 * executed. We keep it as a safety net anyway, and remove it 4809 * from our test coverage statistics. 4810 * 4811 * LCOV_EXCL_START 4812 */ 4813 *eventPP = next; 4814 return XML_ERROR_UNEXPECTED_STATE; 4815 /* LCOV_EXCL_STOP */ 4816 } 4817 /* not reached */ 4818 } 4819 4820 #endif /* XML_DTD */ 4821 4822 static enum XML_Error 4823 initializeEncoding(XML_Parser parser) { 4824 const char *s; 4825 #ifdef XML_UNICODE 4826 char encodingBuf[128]; 4827 /* See comments about `protocolEncodingName` in parserInit() */ 4828 if (! parser->m_protocolEncodingName) 4829 s = NULL; 4830 else { 4831 int i; 4832 for (i = 0; parser->m_protocolEncodingName[i]; i++) { 4833 if (i == sizeof(encodingBuf) - 1 4834 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { 4835 encodingBuf[0] = '\0'; 4836 break; 4837 } 4838 encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; 4839 } 4840 encodingBuf[i] = '\0'; 4841 s = encodingBuf; 4842 } 4843 #else 4844 s = parser->m_protocolEncodingName; 4845 #endif 4846 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( 4847 &parser->m_initEncoding, &parser->m_encoding, s)) 4848 return XML_ERROR_NONE; 4849 return handleUnknownEncoding(parser, parser->m_protocolEncodingName); 4850 } 4851 4852 static enum XML_Error 4853 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, 4854 const char *next) { 4855 const char *encodingName = NULL; 4856 const XML_Char *storedEncName = NULL; 4857 const ENCODING *newEncoding = NULL; 4858 const char *version = NULL; 4859 const char *versionend = NULL; 4860 const XML_Char *storedversion = NULL; 4861 int standalone = -1; 4862 4863 #if XML_GE == 1 4864 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, 4865 XML_ACCOUNT_DIRECT)) { 4866 accountingOnAbort(parser); 4867 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4868 } 4869 #endif 4870 4871 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( 4872 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, 4873 &version, &versionend, &encodingName, &newEncoding, &standalone)) { 4874 if (isGeneralTextEntity) 4875 return XML_ERROR_TEXT_DECL; 4876 else 4877 return XML_ERROR_XML_DECL; 4878 } 4879 if (! isGeneralTextEntity && standalone == 1) { 4880 parser->m_dtd->standalone = XML_TRUE; 4881 #ifdef XML_DTD 4882 if (parser->m_paramEntityParsing 4883 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 4884 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 4885 #endif /* XML_DTD */ 4886 } 4887 if (parser->m_xmlDeclHandler) { 4888 if (encodingName != NULL) { 4889 storedEncName = poolStoreString( 4890 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4891 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4892 if (! storedEncName) 4893 return XML_ERROR_NO_MEMORY; 4894 poolFinish(&parser->m_temp2Pool); 4895 } 4896 if (version) { 4897 storedversion 4898 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, 4899 versionend - parser->m_encoding->minBytesPerChar); 4900 if (! storedversion) 4901 return XML_ERROR_NO_MEMORY; 4902 } 4903 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, 4904 standalone); 4905 } else if (parser->m_defaultHandler) 4906 reportDefault(parser, parser->m_encoding, s, next); 4907 if (parser->m_protocolEncodingName == NULL) { 4908 if (newEncoding) { 4909 /* Check that the specified encoding does not conflict with what 4910 * the parser has already deduced. Do we have the same number 4911 * of bytes in the smallest representation of a character? If 4912 * this is UTF-16, is it the same endianness? 4913 */ 4914 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar 4915 || (newEncoding->minBytesPerChar == 2 4916 && newEncoding != parser->m_encoding)) { 4917 parser->m_eventPtr = encodingName; 4918 return XML_ERROR_INCORRECT_ENCODING; 4919 } 4920 parser->m_encoding = newEncoding; 4921 } else if (encodingName) { 4922 enum XML_Error result; 4923 if (! storedEncName) { 4924 storedEncName = poolStoreString( 4925 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4926 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4927 if (! storedEncName) 4928 return XML_ERROR_NO_MEMORY; 4929 } 4930 result = handleUnknownEncoding(parser, storedEncName); 4931 poolClear(&parser->m_temp2Pool); 4932 if (result == XML_ERROR_UNKNOWN_ENCODING) 4933 parser->m_eventPtr = encodingName; 4934 return result; 4935 } 4936 } 4937 4938 if (storedEncName || storedversion) 4939 poolClear(&parser->m_temp2Pool); 4940 4941 return XML_ERROR_NONE; 4942 } 4943 4944 static enum XML_Error 4945 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { 4946 if (parser->m_unknownEncodingHandler) { 4947 XML_Encoding info; 4948 int i; 4949 for (i = 0; i < 256; i++) 4950 info.map[i] = -1; 4951 info.convert = NULL; 4952 info.data = NULL; 4953 info.release = NULL; 4954 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, 4955 encodingName, &info)) { 4956 ENCODING *enc; 4957 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); 4958 if (! parser->m_unknownEncodingMem) { 4959 if (info.release) 4960 info.release(info.data); 4961 return XML_ERROR_NO_MEMORY; 4962 } 4963 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( 4964 parser->m_unknownEncodingMem, info.map, info.convert, info.data); 4965 if (enc) { 4966 parser->m_unknownEncodingData = info.data; 4967 parser->m_unknownEncodingRelease = info.release; 4968 parser->m_encoding = enc; 4969 return XML_ERROR_NONE; 4970 } 4971 } 4972 if (info.release != NULL) 4973 info.release(info.data); 4974 } 4975 return XML_ERROR_UNKNOWN_ENCODING; 4976 } 4977 4978 static enum XML_Error PTRCALL 4979 prologInitProcessor(XML_Parser parser, const char *s, const char *end, 4980 const char **nextPtr) { 4981 enum XML_Error result = initializeEncoding(parser); 4982 if (result != XML_ERROR_NONE) 4983 return result; 4984 parser->m_processor = prologProcessor; 4985 return prologProcessor(parser, s, end, nextPtr); 4986 } 4987 4988 #ifdef XML_DTD 4989 4990 static enum XML_Error PTRCALL 4991 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, 4992 const char **nextPtr) { 4993 enum XML_Error result = initializeEncoding(parser); 4994 if (result != XML_ERROR_NONE) 4995 return result; 4996 4997 /* we know now that XML_Parse(Buffer) has been called, 4998 so we consider the external parameter entity read */ 4999 parser->m_dtd->paramEntityRead = XML_TRUE; 5000 5001 if (parser->m_prologState.inEntityValue) { 5002 parser->m_processor = entityValueInitProcessor; 5003 return entityValueInitProcessor(parser, s, end, nextPtr); 5004 } else { 5005 parser->m_processor = externalParEntProcessor; 5006 return externalParEntProcessor(parser, s, end, nextPtr); 5007 } 5008 } 5009 5010 static enum XML_Error PTRCALL 5011 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, 5012 const char **nextPtr) { 5013 int tok; 5014 const char *start = s; 5015 const char *next = start; 5016 parser->m_eventPtr = start; 5017 5018 for (;;) { 5019 tok = XmlPrologTok(parser->m_encoding, start, end, &next); 5020 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: 5021 - storeEntityValue 5022 - processXmlDecl 5023 */ 5024 parser->m_eventEndPtr = next; 5025 if (tok <= 0) { 5026 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 5027 *nextPtr = s; 5028 return XML_ERROR_NONE; 5029 } 5030 switch (tok) { 5031 case XML_TOK_INVALID: 5032 return XML_ERROR_INVALID_TOKEN; 5033 case XML_TOK_PARTIAL: 5034 return XML_ERROR_UNCLOSED_TOKEN; 5035 case XML_TOK_PARTIAL_CHAR: 5036 return XML_ERROR_PARTIAL_CHAR; 5037 case XML_TOK_NONE: /* start == end */ 5038 default: 5039 break; 5040 } 5041 /* found end of entity value - can store it now */ 5042 return storeEntityValue(parser, parser->m_encoding, s, end, 5043 XML_ACCOUNT_DIRECT, NULL); 5044 } else if (tok == XML_TOK_XML_DECL) { 5045 enum XML_Error result; 5046 result = processXmlDecl(parser, 0, start, next); 5047 if (result != XML_ERROR_NONE) 5048 return result; 5049 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For 5050 * that to happen, a parameter entity parsing handler must have attempted 5051 * to suspend the parser, which fails and raises an error. The parser can 5052 * be aborted, but can't be suspended. 5053 */ 5054 if (parser->m_parsingStatus.parsing == XML_FINISHED) 5055 return XML_ERROR_ABORTED; 5056 *nextPtr = next; 5057 /* stop scanning for text declaration - we found one */ 5058 parser->m_processor = entityValueProcessor; 5059 return entityValueProcessor(parser, next, end, nextPtr); 5060 } 5061 /* XmlPrologTok has now set the encoding based on the BOM it found, and we 5062 must move s and nextPtr forward to consume the BOM. 5063 5064 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we 5065 would leave the BOM in the buffer and return. On the next call to this 5066 function, our XmlPrologTok call would return XML_TOK_INVALID, since it 5067 is not valid to have multiple BOMs. 5068 */ 5069 else if (tok == XML_TOK_BOM) { 5070 # if XML_GE == 1 5071 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 5072 XML_ACCOUNT_DIRECT)) { 5073 accountingOnAbort(parser); 5074 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5075 } 5076 # endif 5077 5078 *nextPtr = next; 5079 s = next; 5080 } 5081 /* If we get this token, we have the start of what might be a 5082 normal tag, but not a declaration (i.e. it doesn't begin with 5083 "<!"). In a DTD context, that isn't legal. 5084 */ 5085 else if (tok == XML_TOK_INSTANCE_START) { 5086 *nextPtr = next; 5087 return XML_ERROR_SYNTAX; 5088 } 5089 start = next; 5090 parser->m_eventPtr = start; 5091 } 5092 } 5093 5094 static enum XML_Error PTRCALL 5095 externalParEntProcessor(XML_Parser parser, const char *s, const char *end, 5096 const char **nextPtr) { 5097 const char *next = s; 5098 int tok; 5099 5100 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5101 if (tok <= 0) { 5102 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 5103 *nextPtr = s; 5104 return XML_ERROR_NONE; 5105 } 5106 switch (tok) { 5107 case XML_TOK_INVALID: 5108 return XML_ERROR_INVALID_TOKEN; 5109 case XML_TOK_PARTIAL: 5110 return XML_ERROR_UNCLOSED_TOKEN; 5111 case XML_TOK_PARTIAL_CHAR: 5112 return XML_ERROR_PARTIAL_CHAR; 5113 case XML_TOK_NONE: /* start == end */ 5114 default: 5115 break; 5116 } 5117 } 5118 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. 5119 However, when parsing an external subset, doProlog will not accept a BOM 5120 as valid, and report a syntax error, so we have to skip the BOM, and 5121 account for the BOM bytes. 5122 */ 5123 else if (tok == XML_TOK_BOM) { 5124 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 5125 XML_ACCOUNT_DIRECT)) { 5126 accountingOnAbort(parser); 5127 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5128 } 5129 5130 s = next; 5131 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5132 } 5133 5134 parser->m_processor = prologProcessor; 5135 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 5136 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 5137 XML_ACCOUNT_DIRECT); 5138 } 5139 5140 static enum XML_Error PTRCALL 5141 entityValueProcessor(XML_Parser parser, const char *s, const char *end, 5142 const char **nextPtr) { 5143 const char *start = s; 5144 const char *next = s; 5145 const ENCODING *enc = parser->m_encoding; 5146 int tok; 5147 5148 for (;;) { 5149 tok = XmlPrologTok(enc, start, end, &next); 5150 /* Note: These bytes are accounted later in: 5151 - storeEntityValue 5152 */ 5153 if (tok <= 0) { 5154 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 5155 *nextPtr = s; 5156 return XML_ERROR_NONE; 5157 } 5158 switch (tok) { 5159 case XML_TOK_INVALID: 5160 return XML_ERROR_INVALID_TOKEN; 5161 case XML_TOK_PARTIAL: 5162 return XML_ERROR_UNCLOSED_TOKEN; 5163 case XML_TOK_PARTIAL_CHAR: 5164 return XML_ERROR_PARTIAL_CHAR; 5165 case XML_TOK_NONE: /* start == end */ 5166 default: 5167 break; 5168 } 5169 /* found end of entity value - can store it now */ 5170 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL); 5171 } 5172 start = next; 5173 } 5174 } 5175 5176 #endif /* XML_DTD */ 5177 5178 static enum XML_Error PTRCALL 5179 prologProcessor(XML_Parser parser, const char *s, const char *end, 5180 const char **nextPtr) { 5181 const char *next = s; 5182 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5183 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 5184 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 5185 XML_ACCOUNT_DIRECT); 5186 } 5187 5188 static enum XML_Error 5189 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, 5190 int tok, const char *next, const char **nextPtr, XML_Bool haveMore, 5191 XML_Bool allowClosingDoctype, enum XML_Account account) { 5192 #ifdef XML_DTD 5193 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; 5194 #endif /* XML_DTD */ 5195 static const XML_Char atypeCDATA[] 5196 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 5197 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; 5198 static const XML_Char atypeIDREF[] 5199 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; 5200 static const XML_Char atypeIDREFS[] 5201 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; 5202 static const XML_Char atypeENTITY[] 5203 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; 5204 static const XML_Char atypeENTITIES[] 5205 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, 5206 ASCII_I, ASCII_E, ASCII_S, '\0'}; 5207 static const XML_Char atypeNMTOKEN[] 5208 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; 5209 static const XML_Char atypeNMTOKENS[] 5210 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, 5211 ASCII_E, ASCII_N, ASCII_S, '\0'}; 5212 static const XML_Char notationPrefix[] 5213 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, 5214 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; 5215 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; 5216 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; 5217 5218 #ifndef XML_DTD 5219 UNUSED_P(account); 5220 #endif 5221 5222 /* save one level of indirection */ 5223 DTD *const dtd = parser->m_dtd; 5224 5225 const char **eventPP; 5226 const char **eventEndPP; 5227 enum XML_Content_Quant quant; 5228 5229 if (enc == parser->m_encoding) { 5230 eventPP = &parser->m_eventPtr; 5231 eventEndPP = &parser->m_eventEndPtr; 5232 } else { 5233 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 5234 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 5235 } 5236 5237 for (;;) { 5238 int role; 5239 XML_Bool handleDefault = XML_TRUE; 5240 *eventPP = s; 5241 *eventEndPP = next; 5242 if (tok <= 0) { 5243 if (haveMore && tok != XML_TOK_INVALID) { 5244 *nextPtr = s; 5245 return XML_ERROR_NONE; 5246 } 5247 switch (tok) { 5248 case XML_TOK_INVALID: 5249 *eventPP = next; 5250 return XML_ERROR_INVALID_TOKEN; 5251 case XML_TOK_PARTIAL: 5252 return XML_ERROR_UNCLOSED_TOKEN; 5253 case XML_TOK_PARTIAL_CHAR: 5254 return XML_ERROR_PARTIAL_CHAR; 5255 case -XML_TOK_PROLOG_S: 5256 tok = -tok; 5257 break; 5258 case XML_TOK_NONE: 5259 #ifdef XML_DTD 5260 /* for internal PE NOT referenced between declarations */ 5261 if (enc != parser->m_encoding 5262 && ! parser->m_openInternalEntities->betweenDecl) { 5263 *nextPtr = s; 5264 return XML_ERROR_NONE; 5265 } 5266 /* WFC: PE Between Declarations - must check that PE contains 5267 complete markup, not only for external PEs, but also for 5268 internal PEs if the reference occurs between declarations. 5269 */ 5270 if (parser->m_isParamEntity || enc != parser->m_encoding) { 5271 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) 5272 == XML_ROLE_ERROR) 5273 return XML_ERROR_INCOMPLETE_PE; 5274 *nextPtr = s; 5275 return XML_ERROR_NONE; 5276 } 5277 #endif /* XML_DTD */ 5278 return XML_ERROR_NO_ELEMENTS; 5279 default: 5280 tok = -tok; 5281 next = end; 5282 break; 5283 } 5284 } 5285 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); 5286 #if XML_GE == 1 5287 switch (role) { 5288 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor 5289 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl 5290 # ifdef XML_DTD 5291 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl 5292 # endif 5293 break; 5294 default: 5295 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 5296 accountingOnAbort(parser); 5297 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5298 } 5299 } 5300 #endif 5301 switch (role) { 5302 case XML_ROLE_XML_DECL: { 5303 enum XML_Error result = processXmlDecl(parser, 0, s, next); 5304 if (result != XML_ERROR_NONE) 5305 return result; 5306 enc = parser->m_encoding; 5307 handleDefault = XML_FALSE; 5308 } break; 5309 case XML_ROLE_DOCTYPE_NAME: 5310 if (parser->m_startDoctypeDeclHandler) { 5311 parser->m_doctypeName 5312 = poolStoreString(&parser->m_tempPool, enc, s, next); 5313 if (! parser->m_doctypeName) 5314 return XML_ERROR_NO_MEMORY; 5315 poolFinish(&parser->m_tempPool); 5316 parser->m_doctypePubid = NULL; 5317 handleDefault = XML_FALSE; 5318 } 5319 parser->m_doctypeSysid = NULL; /* always initialize to NULL */ 5320 break; 5321 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: 5322 if (parser->m_startDoctypeDeclHandler) { 5323 parser->m_startDoctypeDeclHandler( 5324 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 5325 parser->m_doctypePubid, 1); 5326 parser->m_doctypeName = NULL; 5327 poolClear(&parser->m_tempPool); 5328 handleDefault = XML_FALSE; 5329 } 5330 break; 5331 #ifdef XML_DTD 5332 case XML_ROLE_TEXT_DECL: { 5333 enum XML_Error result = processXmlDecl(parser, 1, s, next); 5334 if (result != XML_ERROR_NONE) 5335 return result; 5336 enc = parser->m_encoding; 5337 handleDefault = XML_FALSE; 5338 } break; 5339 #endif /* XML_DTD */ 5340 case XML_ROLE_DOCTYPE_PUBLIC_ID: 5341 #ifdef XML_DTD 5342 parser->m_useForeignDTD = XML_FALSE; 5343 parser->m_declEntity = (ENTITY *)lookup( 5344 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5345 if (! parser->m_declEntity) 5346 return XML_ERROR_NO_MEMORY; 5347 #endif /* XML_DTD */ 5348 dtd->hasParamEntityRefs = XML_TRUE; 5349 if (parser->m_startDoctypeDeclHandler) { 5350 XML_Char *pubId; 5351 if (! XmlIsPublicId(enc, s, next, eventPP)) 5352 return XML_ERROR_PUBLICID; 5353 pubId = poolStoreString(&parser->m_tempPool, enc, 5354 s + enc->minBytesPerChar, 5355 next - enc->minBytesPerChar); 5356 if (! pubId) 5357 return XML_ERROR_NO_MEMORY; 5358 normalizePublicId(pubId); 5359 poolFinish(&parser->m_tempPool); 5360 parser->m_doctypePubid = pubId; 5361 handleDefault = XML_FALSE; 5362 goto alreadyChecked; 5363 } 5364 /* fall through */ 5365 case XML_ROLE_ENTITY_PUBLIC_ID: 5366 if (! XmlIsPublicId(enc, s, next, eventPP)) 5367 return XML_ERROR_PUBLICID; 5368 alreadyChecked: 5369 if (dtd->keepProcessing && parser->m_declEntity) { 5370 XML_Char *tem 5371 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5372 next - enc->minBytesPerChar); 5373 if (! tem) 5374 return XML_ERROR_NO_MEMORY; 5375 normalizePublicId(tem); 5376 parser->m_declEntity->publicId = tem; 5377 poolFinish(&dtd->pool); 5378 /* Don't suppress the default handler if we fell through from 5379 * the XML_ROLE_DOCTYPE_PUBLIC_ID case. 5380 */ 5381 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) 5382 handleDefault = XML_FALSE; 5383 } 5384 break; 5385 case XML_ROLE_DOCTYPE_CLOSE: 5386 if (allowClosingDoctype != XML_TRUE) { 5387 /* Must not close doctype from within expanded parameter entities */ 5388 return XML_ERROR_INVALID_TOKEN; 5389 } 5390 5391 if (parser->m_doctypeName) { 5392 parser->m_startDoctypeDeclHandler( 5393 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 5394 parser->m_doctypePubid, 0); 5395 poolClear(&parser->m_tempPool); 5396 handleDefault = XML_FALSE; 5397 } 5398 /* parser->m_doctypeSysid will be non-NULL in the case of a previous 5399 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler 5400 was not set, indicating an external subset 5401 */ 5402 #ifdef XML_DTD 5403 if (parser->m_doctypeSysid || parser->m_useForeignDTD) { 5404 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 5405 dtd->hasParamEntityRefs = XML_TRUE; 5406 if (parser->m_paramEntityParsing 5407 && parser->m_externalEntityRefHandler) { 5408 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5409 externalSubsetName, sizeof(ENTITY)); 5410 if (! entity) { 5411 /* The external subset name "#" will have already been 5412 * inserted into the hash table at the start of the 5413 * external entity parsing, so no allocation will happen 5414 * and lookup() cannot fail. 5415 */ 5416 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 5417 } 5418 if (parser->m_useForeignDTD) 5419 entity->base = parser->m_curBase; 5420 dtd->paramEntityRead = XML_FALSE; 5421 if (! parser->m_externalEntityRefHandler( 5422 parser->m_externalEntityRefHandlerArg, 0, entity->base, 5423 entity->systemId, entity->publicId)) 5424 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5425 if (dtd->paramEntityRead) { 5426 if (! dtd->standalone && parser->m_notStandaloneHandler 5427 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5428 return XML_ERROR_NOT_STANDALONE; 5429 } 5430 /* if we didn't read the foreign DTD then this means that there 5431 is no external subset and we must reset dtd->hasParamEntityRefs 5432 */ 5433 else if (! parser->m_doctypeSysid) 5434 dtd->hasParamEntityRefs = hadParamEntityRefs; 5435 /* end of DTD - no need to update dtd->keepProcessing */ 5436 } 5437 parser->m_useForeignDTD = XML_FALSE; 5438 } 5439 #endif /* XML_DTD */ 5440 if (parser->m_endDoctypeDeclHandler) { 5441 parser->m_endDoctypeDeclHandler(parser->m_handlerArg); 5442 handleDefault = XML_FALSE; 5443 } 5444 break; 5445 case XML_ROLE_INSTANCE_START: 5446 #ifdef XML_DTD 5447 /* if there is no DOCTYPE declaration then now is the 5448 last chance to read the foreign DTD 5449 */ 5450 if (parser->m_useForeignDTD) { 5451 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 5452 dtd->hasParamEntityRefs = XML_TRUE; 5453 if (parser->m_paramEntityParsing 5454 && parser->m_externalEntityRefHandler) { 5455 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5456 externalSubsetName, sizeof(ENTITY)); 5457 if (! entity) 5458 return XML_ERROR_NO_MEMORY; 5459 entity->base = parser->m_curBase; 5460 dtd->paramEntityRead = XML_FALSE; 5461 if (! parser->m_externalEntityRefHandler( 5462 parser->m_externalEntityRefHandlerArg, 0, entity->base, 5463 entity->systemId, entity->publicId)) 5464 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5465 if (dtd->paramEntityRead) { 5466 if (! dtd->standalone && parser->m_notStandaloneHandler 5467 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5468 return XML_ERROR_NOT_STANDALONE; 5469 } 5470 /* if we didn't read the foreign DTD then this means that there 5471 is no external subset and we must reset dtd->hasParamEntityRefs 5472 */ 5473 else 5474 dtd->hasParamEntityRefs = hadParamEntityRefs; 5475 /* end of DTD - no need to update dtd->keepProcessing */ 5476 } 5477 } 5478 #endif /* XML_DTD */ 5479 parser->m_processor = contentProcessor; 5480 return contentProcessor(parser, s, end, nextPtr); 5481 case XML_ROLE_ATTLIST_ELEMENT_NAME: 5482 parser->m_declElementType = getElementType(parser, enc, s, next); 5483 if (! parser->m_declElementType) 5484 return XML_ERROR_NO_MEMORY; 5485 goto checkAttListDeclHandler; 5486 case XML_ROLE_ATTRIBUTE_NAME: 5487 parser->m_declAttributeId = getAttributeId(parser, enc, s, next); 5488 if (! parser->m_declAttributeId) 5489 return XML_ERROR_NO_MEMORY; 5490 parser->m_declAttributeIsCdata = XML_FALSE; 5491 parser->m_declAttributeType = NULL; 5492 parser->m_declAttributeIsId = XML_FALSE; 5493 goto checkAttListDeclHandler; 5494 case XML_ROLE_ATTRIBUTE_TYPE_CDATA: 5495 parser->m_declAttributeIsCdata = XML_TRUE; 5496 parser->m_declAttributeType = atypeCDATA; 5497 goto checkAttListDeclHandler; 5498 case XML_ROLE_ATTRIBUTE_TYPE_ID: 5499 parser->m_declAttributeIsId = XML_TRUE; 5500 parser->m_declAttributeType = atypeID; 5501 goto checkAttListDeclHandler; 5502 case XML_ROLE_ATTRIBUTE_TYPE_IDREF: 5503 parser->m_declAttributeType = atypeIDREF; 5504 goto checkAttListDeclHandler; 5505 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: 5506 parser->m_declAttributeType = atypeIDREFS; 5507 goto checkAttListDeclHandler; 5508 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: 5509 parser->m_declAttributeType = atypeENTITY; 5510 goto checkAttListDeclHandler; 5511 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: 5512 parser->m_declAttributeType = atypeENTITIES; 5513 goto checkAttListDeclHandler; 5514 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: 5515 parser->m_declAttributeType = atypeNMTOKEN; 5516 goto checkAttListDeclHandler; 5517 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: 5518 parser->m_declAttributeType = atypeNMTOKENS; 5519 checkAttListDeclHandler: 5520 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5521 handleDefault = XML_FALSE; 5522 break; 5523 case XML_ROLE_ATTRIBUTE_ENUM_VALUE: 5524 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: 5525 if (dtd->keepProcessing && parser->m_attlistDeclHandler) { 5526 const XML_Char *prefix; 5527 if (parser->m_declAttributeType) { 5528 prefix = enumValueSep; 5529 } else { 5530 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix 5531 : enumValueStart); 5532 } 5533 if (! poolAppendString(&parser->m_tempPool, prefix)) 5534 return XML_ERROR_NO_MEMORY; 5535 if (! poolAppend(&parser->m_tempPool, enc, s, next)) 5536 return XML_ERROR_NO_MEMORY; 5537 parser->m_declAttributeType = parser->m_tempPool.start; 5538 handleDefault = XML_FALSE; 5539 } 5540 break; 5541 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: 5542 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: 5543 if (dtd->keepProcessing) { 5544 if (! defineAttribute(parser->m_declElementType, 5545 parser->m_declAttributeId, 5546 parser->m_declAttributeIsCdata, 5547 parser->m_declAttributeIsId, 0, parser)) 5548 return XML_ERROR_NO_MEMORY; 5549 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5550 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5551 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5552 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5553 /* Enumerated or Notation type */ 5554 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5555 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5556 return XML_ERROR_NO_MEMORY; 5557 parser->m_declAttributeType = parser->m_tempPool.start; 5558 poolFinish(&parser->m_tempPool); 5559 } 5560 *eventEndPP = s; 5561 parser->m_attlistDeclHandler( 5562 parser->m_handlerArg, parser->m_declElementType->name, 5563 parser->m_declAttributeId->name, parser->m_declAttributeType, 0, 5564 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); 5565 handleDefault = XML_FALSE; 5566 } 5567 } 5568 poolClear(&parser->m_tempPool); 5569 break; 5570 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: 5571 case XML_ROLE_FIXED_ATTRIBUTE_VALUE: 5572 if (dtd->keepProcessing) { 5573 const XML_Char *attVal; 5574 enum XML_Error result = storeAttributeValue( 5575 parser, enc, parser->m_declAttributeIsCdata, 5576 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, 5577 XML_ACCOUNT_NONE); 5578 if (result) 5579 return result; 5580 attVal = poolStart(&dtd->pool); 5581 poolFinish(&dtd->pool); 5582 /* ID attributes aren't allowed to have a default */ 5583 if (! defineAttribute( 5584 parser->m_declElementType, parser->m_declAttributeId, 5585 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) 5586 return XML_ERROR_NO_MEMORY; 5587 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5588 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5589 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5590 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5591 /* Enumerated or Notation type */ 5592 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5593 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5594 return XML_ERROR_NO_MEMORY; 5595 parser->m_declAttributeType = parser->m_tempPool.start; 5596 poolFinish(&parser->m_tempPool); 5597 } 5598 *eventEndPP = s; 5599 parser->m_attlistDeclHandler( 5600 parser->m_handlerArg, parser->m_declElementType->name, 5601 parser->m_declAttributeId->name, parser->m_declAttributeType, 5602 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); 5603 poolClear(&parser->m_tempPool); 5604 handleDefault = XML_FALSE; 5605 } 5606 } 5607 break; 5608 case XML_ROLE_ENTITY_VALUE: 5609 if (dtd->keepProcessing) { 5610 #if XML_GE == 1 5611 // This will store the given replacement text in 5612 // parser->m_declEntity->textPtr. 5613 enum XML_Error result = callStoreEntityValue( 5614 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, 5615 XML_ACCOUNT_NONE); 5616 if (parser->m_declEntity) { 5617 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); 5618 parser->m_declEntity->textLen 5619 = (int)(poolLength(&dtd->entityValuePool)); 5620 poolFinish(&dtd->entityValuePool); 5621 if (parser->m_entityDeclHandler) { 5622 *eventEndPP = s; 5623 parser->m_entityDeclHandler( 5624 parser->m_handlerArg, parser->m_declEntity->name, 5625 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5626 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5627 handleDefault = XML_FALSE; 5628 } 5629 } else 5630 poolDiscard(&dtd->entityValuePool); 5631 if (result != XML_ERROR_NONE) 5632 return result; 5633 #else 5634 // This will store "&entity123;" in parser->m_declEntity->textPtr 5635 // to end up as "&entity123;" in the handler. 5636 if (parser->m_declEntity != NULL) { 5637 const enum XML_Error result 5638 = storeSelfEntityValue(parser, parser->m_declEntity); 5639 if (result != XML_ERROR_NONE) 5640 return result; 5641 5642 if (parser->m_entityDeclHandler) { 5643 *eventEndPP = s; 5644 parser->m_entityDeclHandler( 5645 parser->m_handlerArg, parser->m_declEntity->name, 5646 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5647 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5648 handleDefault = XML_FALSE; 5649 } 5650 } 5651 #endif 5652 } 5653 break; 5654 case XML_ROLE_DOCTYPE_SYSTEM_ID: 5655 #ifdef XML_DTD 5656 parser->m_useForeignDTD = XML_FALSE; 5657 #endif /* XML_DTD */ 5658 dtd->hasParamEntityRefs = XML_TRUE; 5659 if (parser->m_startDoctypeDeclHandler) { 5660 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, 5661 s + enc->minBytesPerChar, 5662 next - enc->minBytesPerChar); 5663 if (parser->m_doctypeSysid == NULL) 5664 return XML_ERROR_NO_MEMORY; 5665 poolFinish(&parser->m_tempPool); 5666 handleDefault = XML_FALSE; 5667 } 5668 #ifdef XML_DTD 5669 else 5670 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL 5671 for the case where no parser->m_startDoctypeDeclHandler is set */ 5672 parser->m_doctypeSysid = externalSubsetName; 5673 #endif /* XML_DTD */ 5674 if (! dtd->standalone 5675 #ifdef XML_DTD 5676 && ! parser->m_paramEntityParsing 5677 #endif /* XML_DTD */ 5678 && parser->m_notStandaloneHandler 5679 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5680 return XML_ERROR_NOT_STANDALONE; 5681 #ifndef XML_DTD 5682 break; 5683 #else /* XML_DTD */ 5684 if (! parser->m_declEntity) { 5685 parser->m_declEntity = (ENTITY *)lookup( 5686 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5687 if (! parser->m_declEntity) 5688 return XML_ERROR_NO_MEMORY; 5689 parser->m_declEntity->publicId = NULL; 5690 } 5691 #endif /* XML_DTD */ 5692 /* fall through */ 5693 case XML_ROLE_ENTITY_SYSTEM_ID: 5694 if (dtd->keepProcessing && parser->m_declEntity) { 5695 parser->m_declEntity->systemId 5696 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5697 next - enc->minBytesPerChar); 5698 if (! parser->m_declEntity->systemId) 5699 return XML_ERROR_NO_MEMORY; 5700 parser->m_declEntity->base = parser->m_curBase; 5701 poolFinish(&dtd->pool); 5702 /* Don't suppress the default handler if we fell through from 5703 * the XML_ROLE_DOCTYPE_SYSTEM_ID case. 5704 */ 5705 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) 5706 handleDefault = XML_FALSE; 5707 } 5708 break; 5709 case XML_ROLE_ENTITY_COMPLETE: 5710 #if XML_GE == 0 5711 // This will store "&entity123;" in entity->textPtr 5712 // to end up as "&entity123;" in the handler. 5713 if (parser->m_declEntity != NULL) { 5714 const enum XML_Error result 5715 = storeSelfEntityValue(parser, parser->m_declEntity); 5716 if (result != XML_ERROR_NONE) 5717 return result; 5718 } 5719 #endif 5720 if (dtd->keepProcessing && parser->m_declEntity 5721 && parser->m_entityDeclHandler) { 5722 *eventEndPP = s; 5723 parser->m_entityDeclHandler( 5724 parser->m_handlerArg, parser->m_declEntity->name, 5725 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, 5726 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); 5727 handleDefault = XML_FALSE; 5728 } 5729 break; 5730 case XML_ROLE_ENTITY_NOTATION_NAME: 5731 if (dtd->keepProcessing && parser->m_declEntity) { 5732 parser->m_declEntity->notation 5733 = poolStoreString(&dtd->pool, enc, s, next); 5734 if (! parser->m_declEntity->notation) 5735 return XML_ERROR_NO_MEMORY; 5736 poolFinish(&dtd->pool); 5737 if (parser->m_unparsedEntityDeclHandler) { 5738 *eventEndPP = s; 5739 parser->m_unparsedEntityDeclHandler( 5740 parser->m_handlerArg, parser->m_declEntity->name, 5741 parser->m_declEntity->base, parser->m_declEntity->systemId, 5742 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5743 handleDefault = XML_FALSE; 5744 } else if (parser->m_entityDeclHandler) { 5745 *eventEndPP = s; 5746 parser->m_entityDeclHandler( 5747 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, 5748 parser->m_declEntity->base, parser->m_declEntity->systemId, 5749 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5750 handleDefault = XML_FALSE; 5751 } 5752 } 5753 break; 5754 case XML_ROLE_GENERAL_ENTITY_NAME: { 5755 if (XmlPredefinedEntityName(enc, s, next)) { 5756 parser->m_declEntity = NULL; 5757 break; 5758 } 5759 if (dtd->keepProcessing) { 5760 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5761 if (! name) 5762 return XML_ERROR_NO_MEMORY; 5763 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, 5764 name, sizeof(ENTITY)); 5765 if (! parser->m_declEntity) 5766 return XML_ERROR_NO_MEMORY; 5767 if (parser->m_declEntity->name != name) { 5768 poolDiscard(&dtd->pool); 5769 parser->m_declEntity = NULL; 5770 } else { 5771 poolFinish(&dtd->pool); 5772 parser->m_declEntity->publicId = NULL; 5773 parser->m_declEntity->is_param = XML_FALSE; 5774 /* if we have a parent parser or are reading an internal parameter 5775 entity, then the entity declaration is not considered "internal" 5776 */ 5777 parser->m_declEntity->is_internal 5778 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5779 if (parser->m_entityDeclHandler) 5780 handleDefault = XML_FALSE; 5781 } 5782 } else { 5783 poolDiscard(&dtd->pool); 5784 parser->m_declEntity = NULL; 5785 } 5786 } break; 5787 case XML_ROLE_PARAM_ENTITY_NAME: 5788 #ifdef XML_DTD 5789 if (dtd->keepProcessing) { 5790 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5791 if (! name) 5792 return XML_ERROR_NO_MEMORY; 5793 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5794 name, sizeof(ENTITY)); 5795 if (! parser->m_declEntity) 5796 return XML_ERROR_NO_MEMORY; 5797 if (parser->m_declEntity->name != name) { 5798 poolDiscard(&dtd->pool); 5799 parser->m_declEntity = NULL; 5800 } else { 5801 poolFinish(&dtd->pool); 5802 parser->m_declEntity->publicId = NULL; 5803 parser->m_declEntity->is_param = XML_TRUE; 5804 /* if we have a parent parser or are reading an internal parameter 5805 entity, then the entity declaration is not considered "internal" 5806 */ 5807 parser->m_declEntity->is_internal 5808 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5809 if (parser->m_entityDeclHandler) 5810 handleDefault = XML_FALSE; 5811 } 5812 } else { 5813 poolDiscard(&dtd->pool); 5814 parser->m_declEntity = NULL; 5815 } 5816 #else /* not XML_DTD */ 5817 parser->m_declEntity = NULL; 5818 #endif /* XML_DTD */ 5819 break; 5820 case XML_ROLE_NOTATION_NAME: 5821 parser->m_declNotationPublicId = NULL; 5822 parser->m_declNotationName = NULL; 5823 if (parser->m_notationDeclHandler) { 5824 parser->m_declNotationName 5825 = poolStoreString(&parser->m_tempPool, enc, s, next); 5826 if (! parser->m_declNotationName) 5827 return XML_ERROR_NO_MEMORY; 5828 poolFinish(&parser->m_tempPool); 5829 handleDefault = XML_FALSE; 5830 } 5831 break; 5832 case XML_ROLE_NOTATION_PUBLIC_ID: 5833 if (! XmlIsPublicId(enc, s, next, eventPP)) 5834 return XML_ERROR_PUBLICID; 5835 if (parser 5836 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ 5837 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, 5838 s + enc->minBytesPerChar, 5839 next - enc->minBytesPerChar); 5840 if (! tem) 5841 return XML_ERROR_NO_MEMORY; 5842 normalizePublicId(tem); 5843 parser->m_declNotationPublicId = tem; 5844 poolFinish(&parser->m_tempPool); 5845 handleDefault = XML_FALSE; 5846 } 5847 break; 5848 case XML_ROLE_NOTATION_SYSTEM_ID: 5849 if (parser->m_declNotationName && parser->m_notationDeclHandler) { 5850 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, 5851 s + enc->minBytesPerChar, 5852 next - enc->minBytesPerChar); 5853 if (! systemId) 5854 return XML_ERROR_NO_MEMORY; 5855 *eventEndPP = s; 5856 parser->m_notationDeclHandler( 5857 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5858 systemId, parser->m_declNotationPublicId); 5859 handleDefault = XML_FALSE; 5860 } 5861 poolClear(&parser->m_tempPool); 5862 break; 5863 case XML_ROLE_NOTATION_NO_SYSTEM_ID: 5864 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { 5865 *eventEndPP = s; 5866 parser->m_notationDeclHandler( 5867 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5868 0, parser->m_declNotationPublicId); 5869 handleDefault = XML_FALSE; 5870 } 5871 poolClear(&parser->m_tempPool); 5872 break; 5873 case XML_ROLE_ERROR: 5874 switch (tok) { 5875 case XML_TOK_PARAM_ENTITY_REF: 5876 /* PE references in internal subset are 5877 not allowed within declarations. */ 5878 return XML_ERROR_PARAM_ENTITY_REF; 5879 case XML_TOK_XML_DECL: 5880 return XML_ERROR_MISPLACED_XML_PI; 5881 default: 5882 return XML_ERROR_SYNTAX; 5883 } 5884 #ifdef XML_DTD 5885 case XML_ROLE_IGNORE_SECT: { 5886 enum XML_Error result; 5887 if (parser->m_defaultHandler) 5888 reportDefault(parser, enc, s, next); 5889 handleDefault = XML_FALSE; 5890 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); 5891 if (result != XML_ERROR_NONE) 5892 return result; 5893 else if (! next) { 5894 parser->m_processor = ignoreSectionProcessor; 5895 return result; 5896 } 5897 } break; 5898 #endif /* XML_DTD */ 5899 case XML_ROLE_GROUP_OPEN: 5900 if (parser->m_prologState.level >= parser->m_groupSize) { 5901 if (parser->m_groupSize) { 5902 { 5903 /* Detect and prevent integer overflow */ 5904 if (parser->m_groupSize > (unsigned int)(-1) / 2u) { 5905 return XML_ERROR_NO_MEMORY; 5906 } 5907 5908 char *const new_connector = REALLOC( 5909 parser, parser->m_groupConnector, parser->m_groupSize *= 2); 5910 if (new_connector == NULL) { 5911 parser->m_groupSize /= 2; 5912 return XML_ERROR_NO_MEMORY; 5913 } 5914 parser->m_groupConnector = new_connector; 5915 } 5916 5917 if (dtd->scaffIndex) { 5918 /* Detect and prevent integer overflow. 5919 * The preprocessor guard addresses the "always false" warning 5920 * from -Wtype-limits on platforms where 5921 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 5922 #if UINT_MAX >= SIZE_MAX 5923 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { 5924 return XML_ERROR_NO_MEMORY; 5925 } 5926 #endif 5927 5928 int *const new_scaff_index = REALLOC( 5929 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); 5930 if (new_scaff_index == NULL) 5931 return XML_ERROR_NO_MEMORY; 5932 dtd->scaffIndex = new_scaff_index; 5933 } 5934 } else { 5935 parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32); 5936 if (! parser->m_groupConnector) { 5937 parser->m_groupSize = 0; 5938 return XML_ERROR_NO_MEMORY; 5939 } 5940 } 5941 } 5942 parser->m_groupConnector[parser->m_prologState.level] = 0; 5943 if (dtd->in_eldecl) { 5944 int myindex = nextScaffoldPart(parser); 5945 if (myindex < 0) 5946 return XML_ERROR_NO_MEMORY; 5947 assert(dtd->scaffIndex != NULL); 5948 dtd->scaffIndex[dtd->scaffLevel] = myindex; 5949 dtd->scaffLevel++; 5950 dtd->scaffold[myindex].type = XML_CTYPE_SEQ; 5951 if (parser->m_elementDeclHandler) 5952 handleDefault = XML_FALSE; 5953 } 5954 break; 5955 case XML_ROLE_GROUP_SEQUENCE: 5956 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) 5957 return XML_ERROR_SYNTAX; 5958 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; 5959 if (dtd->in_eldecl && parser->m_elementDeclHandler) 5960 handleDefault = XML_FALSE; 5961 break; 5962 case XML_ROLE_GROUP_CHOICE: 5963 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) 5964 return XML_ERROR_SYNTAX; 5965 if (dtd->in_eldecl 5966 && ! parser->m_groupConnector[parser->m_prologState.level] 5967 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5968 != XML_CTYPE_MIXED)) { 5969 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5970 = XML_CTYPE_CHOICE; 5971 if (parser->m_elementDeclHandler) 5972 handleDefault = XML_FALSE; 5973 } 5974 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; 5975 break; 5976 case XML_ROLE_PARAM_ENTITY_REF: 5977 #ifdef XML_DTD 5978 case XML_ROLE_INNER_PARAM_ENTITY_REF: 5979 dtd->hasParamEntityRefs = XML_TRUE; 5980 if (! parser->m_paramEntityParsing) 5981 dtd->keepProcessing = dtd->standalone; 5982 else { 5983 const XML_Char *name; 5984 ENTITY *entity; 5985 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5986 next - enc->minBytesPerChar); 5987 if (! name) 5988 return XML_ERROR_NO_MEMORY; 5989 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 5990 poolDiscard(&dtd->pool); 5991 /* first, determine if a check for an existing declaration is needed; 5992 if yes, check that the entity exists, and that it is internal, 5993 otherwise call the skipped entity handler 5994 */ 5995 if (parser->m_prologState.documentEntity 5996 && (dtd->standalone ? ! parser->m_openInternalEntities 5997 : ! dtd->hasParamEntityRefs)) { 5998 if (! entity) 5999 return XML_ERROR_UNDEFINED_ENTITY; 6000 else if (! entity->is_internal) { 6001 /* It's hard to exhaustively search the code to be sure, 6002 * but there doesn't seem to be a way of executing the 6003 * following line. There are two cases: 6004 * 6005 * If 'standalone' is false, the DTD must have no 6006 * parameter entities or we wouldn't have passed the outer 6007 * 'if' statement. That means the only entity in the hash 6008 * table is the external subset name "#" which cannot be 6009 * given as a parameter entity name in XML syntax, so the 6010 * lookup must have returned NULL and we don't even reach 6011 * the test for an internal entity. 6012 * 6013 * If 'standalone' is true, it does not seem to be 6014 * possible to create entities taking this code path that 6015 * are not internal entities, so fail the test above. 6016 * 6017 * Because this analysis is very uncertain, the code is 6018 * being left in place and merely removed from the 6019 * coverage test statistics. 6020 */ 6021 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ 6022 } 6023 } else if (! entity) { 6024 dtd->keepProcessing = dtd->standalone; 6025 /* cannot report skipped entities in declarations */ 6026 if ((role == XML_ROLE_PARAM_ENTITY_REF) 6027 && parser->m_skippedEntityHandler) { 6028 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); 6029 handleDefault = XML_FALSE; 6030 } 6031 break; 6032 } 6033 if (entity->open) 6034 return XML_ERROR_RECURSIVE_ENTITY_REF; 6035 if (entity->textPtr) { 6036 enum XML_Error result; 6037 XML_Bool betweenDecl 6038 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); 6039 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL); 6040 if (result != XML_ERROR_NONE) 6041 return result; 6042 handleDefault = XML_FALSE; 6043 break; 6044 } 6045 if (parser->m_externalEntityRefHandler) { 6046 dtd->paramEntityRead = XML_FALSE; 6047 entity->open = XML_TRUE; 6048 entityTrackingOnOpen(parser, entity, __LINE__); 6049 if (! parser->m_externalEntityRefHandler( 6050 parser->m_externalEntityRefHandlerArg, 0, entity->base, 6051 entity->systemId, entity->publicId)) { 6052 entityTrackingOnClose(parser, entity, __LINE__); 6053 entity->open = XML_FALSE; 6054 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6055 } 6056 entityTrackingOnClose(parser, entity, __LINE__); 6057 entity->open = XML_FALSE; 6058 handleDefault = XML_FALSE; 6059 if (! dtd->paramEntityRead) { 6060 dtd->keepProcessing = dtd->standalone; 6061 break; 6062 } 6063 } else { 6064 dtd->keepProcessing = dtd->standalone; 6065 break; 6066 } 6067 } 6068 #endif /* XML_DTD */ 6069 if (! dtd->standalone && parser->m_notStandaloneHandler 6070 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 6071 return XML_ERROR_NOT_STANDALONE; 6072 break; 6073 6074 /* Element declaration stuff */ 6075 6076 case XML_ROLE_ELEMENT_NAME: 6077 if (parser->m_elementDeclHandler) { 6078 parser->m_declElementType = getElementType(parser, enc, s, next); 6079 if (! parser->m_declElementType) 6080 return XML_ERROR_NO_MEMORY; 6081 dtd->scaffLevel = 0; 6082 dtd->scaffCount = 0; 6083 dtd->in_eldecl = XML_TRUE; 6084 handleDefault = XML_FALSE; 6085 } 6086 break; 6087 6088 case XML_ROLE_CONTENT_ANY: 6089 case XML_ROLE_CONTENT_EMPTY: 6090 if (dtd->in_eldecl) { 6091 if (parser->m_elementDeclHandler) { 6092 // NOTE: We are avoiding MALLOC(..) here to so that 6093 // applications that are not using XML_FreeContentModel but 6094 // plain free(..) or .free_fcn() to free the content model's 6095 // memory are safe. 6096 XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content)); 6097 if (! content) 6098 return XML_ERROR_NO_MEMORY; 6099 content->quant = XML_CQUANT_NONE; 6100 content->name = NULL; 6101 content->numchildren = 0; 6102 content->children = NULL; 6103 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY 6104 : XML_CTYPE_EMPTY); 6105 *eventEndPP = s; 6106 parser->m_elementDeclHandler( 6107 parser->m_handlerArg, parser->m_declElementType->name, content); 6108 handleDefault = XML_FALSE; 6109 } 6110 dtd->in_eldecl = XML_FALSE; 6111 } 6112 break; 6113 6114 case XML_ROLE_CONTENT_PCDATA: 6115 if (dtd->in_eldecl) { 6116 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 6117 = XML_CTYPE_MIXED; 6118 if (parser->m_elementDeclHandler) 6119 handleDefault = XML_FALSE; 6120 } 6121 break; 6122 6123 case XML_ROLE_CONTENT_ELEMENT: 6124 quant = XML_CQUANT_NONE; 6125 goto elementContent; 6126 case XML_ROLE_CONTENT_ELEMENT_OPT: 6127 quant = XML_CQUANT_OPT; 6128 goto elementContent; 6129 case XML_ROLE_CONTENT_ELEMENT_REP: 6130 quant = XML_CQUANT_REP; 6131 goto elementContent; 6132 case XML_ROLE_CONTENT_ELEMENT_PLUS: 6133 quant = XML_CQUANT_PLUS; 6134 elementContent: 6135 if (dtd->in_eldecl) { 6136 ELEMENT_TYPE *el; 6137 const XML_Char *name; 6138 size_t nameLen; 6139 const char *nxt 6140 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); 6141 int myindex = nextScaffoldPart(parser); 6142 if (myindex < 0) 6143 return XML_ERROR_NO_MEMORY; 6144 dtd->scaffold[myindex].type = XML_CTYPE_NAME; 6145 dtd->scaffold[myindex].quant = quant; 6146 el = getElementType(parser, enc, s, nxt); 6147 if (! el) 6148 return XML_ERROR_NO_MEMORY; 6149 name = el->name; 6150 dtd->scaffold[myindex].name = name; 6151 nameLen = 0; 6152 while (name[nameLen++]) 6153 ; 6154 6155 /* Detect and prevent integer overflow */ 6156 if (nameLen > UINT_MAX - dtd->contentStringLen) { 6157 return XML_ERROR_NO_MEMORY; 6158 } 6159 6160 dtd->contentStringLen += (unsigned)nameLen; 6161 if (parser->m_elementDeclHandler) 6162 handleDefault = XML_FALSE; 6163 } 6164 break; 6165 6166 case XML_ROLE_GROUP_CLOSE: 6167 quant = XML_CQUANT_NONE; 6168 goto closeGroup; 6169 case XML_ROLE_GROUP_CLOSE_OPT: 6170 quant = XML_CQUANT_OPT; 6171 goto closeGroup; 6172 case XML_ROLE_GROUP_CLOSE_REP: 6173 quant = XML_CQUANT_REP; 6174 goto closeGroup; 6175 case XML_ROLE_GROUP_CLOSE_PLUS: 6176 quant = XML_CQUANT_PLUS; 6177 closeGroup: 6178 if (dtd->in_eldecl) { 6179 if (parser->m_elementDeclHandler) 6180 handleDefault = XML_FALSE; 6181 dtd->scaffLevel--; 6182 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; 6183 if (dtd->scaffLevel == 0) { 6184 if (! handleDefault) { 6185 XML_Content *model = build_model(parser); 6186 if (! model) 6187 return XML_ERROR_NO_MEMORY; 6188 *eventEndPP = s; 6189 parser->m_elementDeclHandler( 6190 parser->m_handlerArg, parser->m_declElementType->name, model); 6191 } 6192 dtd->in_eldecl = XML_FALSE; 6193 dtd->contentStringLen = 0; 6194 } 6195 } 6196 break; 6197 /* End element declaration stuff */ 6198 6199 case XML_ROLE_PI: 6200 if (! reportProcessingInstruction(parser, enc, s, next)) 6201 return XML_ERROR_NO_MEMORY; 6202 handleDefault = XML_FALSE; 6203 break; 6204 case XML_ROLE_COMMENT: 6205 if (! reportComment(parser, enc, s, next)) 6206 return XML_ERROR_NO_MEMORY; 6207 handleDefault = XML_FALSE; 6208 break; 6209 case XML_ROLE_NONE: 6210 switch (tok) { 6211 case XML_TOK_BOM: 6212 handleDefault = XML_FALSE; 6213 break; 6214 } 6215 break; 6216 case XML_ROLE_DOCTYPE_NONE: 6217 if (parser->m_startDoctypeDeclHandler) 6218 handleDefault = XML_FALSE; 6219 break; 6220 case XML_ROLE_ENTITY_NONE: 6221 if (dtd->keepProcessing && parser->m_entityDeclHandler) 6222 handleDefault = XML_FALSE; 6223 break; 6224 case XML_ROLE_NOTATION_NONE: 6225 if (parser->m_notationDeclHandler) 6226 handleDefault = XML_FALSE; 6227 break; 6228 case XML_ROLE_ATTLIST_NONE: 6229 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 6230 handleDefault = XML_FALSE; 6231 break; 6232 case XML_ROLE_ELEMENT_NONE: 6233 if (parser->m_elementDeclHandler) 6234 handleDefault = XML_FALSE; 6235 break; 6236 } /* end of big switch */ 6237 6238 if (handleDefault && parser->m_defaultHandler) 6239 reportDefault(parser, enc, s, next); 6240 6241 switch (parser->m_parsingStatus.parsing) { 6242 case XML_SUSPENDED: 6243 *nextPtr = next; 6244 return XML_ERROR_NONE; 6245 case XML_FINISHED: 6246 return XML_ERROR_ABORTED; 6247 case XML_PARSING: 6248 if (parser->m_reenter) { 6249 *nextPtr = next; 6250 return XML_ERROR_NONE; 6251 } 6252 /* Fall through */ 6253 default: 6254 s = next; 6255 tok = XmlPrologTok(enc, s, end, &next); 6256 } 6257 } 6258 /* not reached */ 6259 } 6260 6261 static enum XML_Error PTRCALL 6262 epilogProcessor(XML_Parser parser, const char *s, const char *end, 6263 const char **nextPtr) { 6264 parser->m_processor = epilogProcessor; 6265 parser->m_eventPtr = s; 6266 for (;;) { 6267 const char *next = NULL; 6268 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 6269 #if XML_GE == 1 6270 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 6271 XML_ACCOUNT_DIRECT)) { 6272 accountingOnAbort(parser); 6273 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6274 } 6275 #endif 6276 parser->m_eventEndPtr = next; 6277 switch (tok) { 6278 /* report partial linebreak - it might be the last token */ 6279 case -XML_TOK_PROLOG_S: 6280 if (parser->m_defaultHandler) { 6281 reportDefault(parser, parser->m_encoding, s, next); 6282 if (parser->m_parsingStatus.parsing == XML_FINISHED) 6283 return XML_ERROR_ABORTED; 6284 } 6285 *nextPtr = next; 6286 return XML_ERROR_NONE; 6287 case XML_TOK_NONE: 6288 *nextPtr = s; 6289 return XML_ERROR_NONE; 6290 case XML_TOK_PROLOG_S: 6291 if (parser->m_defaultHandler) 6292 reportDefault(parser, parser->m_encoding, s, next); 6293 break; 6294 case XML_TOK_PI: 6295 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) 6296 return XML_ERROR_NO_MEMORY; 6297 break; 6298 case XML_TOK_COMMENT: 6299 if (! reportComment(parser, parser->m_encoding, s, next)) 6300 return XML_ERROR_NO_MEMORY; 6301 break; 6302 case XML_TOK_INVALID: 6303 parser->m_eventPtr = next; 6304 return XML_ERROR_INVALID_TOKEN; 6305 case XML_TOK_PARTIAL: 6306 if (! parser->m_parsingStatus.finalBuffer) { 6307 *nextPtr = s; 6308 return XML_ERROR_NONE; 6309 } 6310 return XML_ERROR_UNCLOSED_TOKEN; 6311 case XML_TOK_PARTIAL_CHAR: 6312 if (! parser->m_parsingStatus.finalBuffer) { 6313 *nextPtr = s; 6314 return XML_ERROR_NONE; 6315 } 6316 return XML_ERROR_PARTIAL_CHAR; 6317 default: 6318 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; 6319 } 6320 switch (parser->m_parsingStatus.parsing) { 6321 case XML_SUSPENDED: 6322 parser->m_eventPtr = next; 6323 *nextPtr = next; 6324 return XML_ERROR_NONE; 6325 case XML_FINISHED: 6326 parser->m_eventPtr = next; 6327 return XML_ERROR_ABORTED; 6328 case XML_PARSING: 6329 if (parser->m_reenter) { 6330 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 6331 } 6332 /* Fall through */ 6333 default:; 6334 parser->m_eventPtr = s = next; 6335 } 6336 } 6337 } 6338 6339 static enum XML_Error 6340 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, 6341 enum EntityType type) { 6342 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList; 6343 switch (type) { 6344 case ENTITY_INTERNAL: 6345 parser->m_processor = internalEntityProcessor; 6346 openEntityList = &parser->m_openInternalEntities; 6347 freeEntityList = &parser->m_freeInternalEntities; 6348 break; 6349 case ENTITY_ATTRIBUTE: 6350 openEntityList = &parser->m_openAttributeEntities; 6351 freeEntityList = &parser->m_freeAttributeEntities; 6352 break; 6353 case ENTITY_VALUE: 6354 openEntityList = &parser->m_openValueEntities; 6355 freeEntityList = &parser->m_freeValueEntities; 6356 break; 6357 /* default case serves merely as a safety net in case of a 6358 * wrong entityType. Therefore we exclude the following lines 6359 * from the test coverage. 6360 * 6361 * LCOV_EXCL_START 6362 */ 6363 default: 6364 // Should not reach here 6365 assert(0); 6366 /* LCOV_EXCL_STOP */ 6367 } 6368 6369 if (*freeEntityList) { 6370 openEntity = *freeEntityList; 6371 *freeEntityList = openEntity->next; 6372 } else { 6373 openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); 6374 if (! openEntity) 6375 return XML_ERROR_NO_MEMORY; 6376 } 6377 entity->open = XML_TRUE; 6378 entity->hasMore = XML_TRUE; 6379 #if XML_GE == 1 6380 entityTrackingOnOpen(parser, entity, __LINE__); 6381 #endif 6382 entity->processed = 0; 6383 openEntity->next = *openEntityList; 6384 *openEntityList = openEntity; 6385 openEntity->entity = entity; 6386 openEntity->type = type; 6387 openEntity->startTagLevel = parser->m_tagLevel; 6388 openEntity->betweenDecl = betweenDecl; 6389 openEntity->internalEventPtr = NULL; 6390 openEntity->internalEventEndPtr = NULL; 6391 6392 // Only internal entities make use of the reenter flag 6393 // therefore no need to set it for other entity types 6394 if (type == ENTITY_INTERNAL) { 6395 triggerReenter(parser); 6396 } 6397 return XML_ERROR_NONE; 6398 } 6399 6400 static enum XML_Error PTRCALL 6401 internalEntityProcessor(XML_Parser parser, const char *s, const char *end, 6402 const char **nextPtr) { 6403 UNUSED_P(s); 6404 UNUSED_P(end); 6405 UNUSED_P(nextPtr); 6406 ENTITY *entity; 6407 const char *textStart, *textEnd; 6408 const char *next; 6409 enum XML_Error result; 6410 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; 6411 if (! openEntity) 6412 return XML_ERROR_UNEXPECTED_STATE; 6413 6414 entity = openEntity->entity; 6415 6416 // This will return early 6417 if (entity->hasMore) { 6418 textStart = ((const char *)entity->textPtr) + entity->processed; 6419 textEnd = (const char *)(entity->textPtr + entity->textLen); 6420 /* Set a safe default value in case 'next' does not get set */ 6421 next = textStart; 6422 6423 if (entity->is_param) { 6424 int tok 6425 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 6426 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 6427 tok, next, &next, XML_FALSE, XML_FALSE, 6428 XML_ACCOUNT_ENTITY_EXPANSION); 6429 } else { 6430 result = doContent(parser, openEntity->startTagLevel, 6431 parser->m_internalEncoding, textStart, textEnd, &next, 6432 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); 6433 } 6434 6435 if (result != XML_ERROR_NONE) 6436 return result; 6437 // Check if entity is complete, if not, mark down how much of it is 6438 // processed 6439 if (textEnd != next 6440 && (parser->m_parsingStatus.parsing == XML_SUSPENDED 6441 || (parser->m_parsingStatus.parsing == XML_PARSING 6442 && parser->m_reenter))) { 6443 entity->processed = (int)(next - (const char *)entity->textPtr); 6444 return result; 6445 } 6446 6447 // Entity is complete. We cannot close it here since we need to first 6448 // process its possible inner entities (which are added to the 6449 // m_openInternalEntities during doProlog or doContent calls above) 6450 entity->hasMore = XML_FALSE; 6451 if (! entity->is_param 6452 && (openEntity->startTagLevel != parser->m_tagLevel)) { 6453 return XML_ERROR_ASYNC_ENTITY; 6454 } 6455 triggerReenter(parser); 6456 return result; 6457 } // End of entity processing, "if" block will return here 6458 6459 // Remove fully processed openEntity from open entity list. 6460 #if XML_GE == 1 6461 entityTrackingOnClose(parser, entity, __LINE__); 6462 #endif 6463 // openEntity is m_openInternalEntities' head, as we set it at the start of 6464 // this function and we skipped doProlog and doContent calls with hasMore set 6465 // to false. This means we can directly remove the head of 6466 // m_openInternalEntities 6467 assert(parser->m_openInternalEntities == openEntity); 6468 entity->open = XML_FALSE; 6469 parser->m_openInternalEntities = parser->m_openInternalEntities->next; 6470 6471 /* put openEntity back in list of free instances */ 6472 openEntity->next = parser->m_freeInternalEntities; 6473 parser->m_freeInternalEntities = openEntity; 6474 6475 if (parser->m_openInternalEntities == NULL) { 6476 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor; 6477 } 6478 triggerReenter(parser); 6479 return XML_ERROR_NONE; 6480 } 6481 6482 static enum XML_Error PTRCALL 6483 errorProcessor(XML_Parser parser, const char *s, const char *end, 6484 const char **nextPtr) { 6485 UNUSED_P(s); 6486 UNUSED_P(end); 6487 UNUSED_P(nextPtr); 6488 return parser->m_errorCode; 6489 } 6490 6491 static enum XML_Error 6492 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 6493 const char *ptr, const char *end, STRING_POOL *pool, 6494 enum XML_Account account) { 6495 const char *next = ptr; 6496 enum XML_Error result = XML_ERROR_NONE; 6497 6498 while (1) { 6499 if (! parser->m_openAttributeEntities) { 6500 result = appendAttributeValue(parser, enc, isCdata, next, end, pool, 6501 account, &next); 6502 } else { 6503 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities; 6504 if (! openEntity) 6505 return XML_ERROR_UNEXPECTED_STATE; 6506 6507 ENTITY *const entity = openEntity->entity; 6508 const char *const textStart 6509 = ((const char *)entity->textPtr) + entity->processed; 6510 const char *const textEnd 6511 = (const char *)(entity->textPtr + entity->textLen); 6512 /* Set a safe default value in case 'next' does not get set */ 6513 const char *nextInEntity = textStart; 6514 if (entity->hasMore) { 6515 result = appendAttributeValue( 6516 parser, parser->m_internalEncoding, isCdata, textStart, textEnd, 6517 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity); 6518 if (result != XML_ERROR_NONE) 6519 break; 6520 // Check if entity is complete, if not, mark down how much of it is 6521 // processed. A XML_SUSPENDED check here is not required as 6522 // appendAttributeValue will never suspend the parser. 6523 if (textEnd != nextInEntity) { 6524 entity->processed 6525 = (int)(nextInEntity - (const char *)entity->textPtr); 6526 continue; 6527 } 6528 6529 // Entity is complete. We cannot close it here since we need to first 6530 // process its possible inner entities (which are added to the 6531 // m_openAttributeEntities during appendAttributeValue) 6532 entity->hasMore = XML_FALSE; 6533 continue; 6534 } // End of entity processing, "if" block skips the rest 6535 6536 // Remove fully processed openEntity from open entity list. 6537 #if XML_GE == 1 6538 entityTrackingOnClose(parser, entity, __LINE__); 6539 #endif 6540 // openEntity is m_openAttributeEntities' head, since we set it at the 6541 // start of this function and because we skipped appendAttributeValue call 6542 // with hasMore set to false. This means we can directly remove the head 6543 // of m_openAttributeEntities 6544 assert(parser->m_openAttributeEntities == openEntity); 6545 entity->open = XML_FALSE; 6546 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next; 6547 6548 /* put openEntity back in list of free instances */ 6549 openEntity->next = parser->m_freeAttributeEntities; 6550 parser->m_freeAttributeEntities = openEntity; 6551 } 6552 6553 // Break if an error occurred or there is nothing left to process 6554 if (result || (parser->m_openAttributeEntities == NULL && end == next)) { 6555 break; 6556 } 6557 } 6558 6559 if (result) 6560 return result; 6561 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) 6562 poolChop(pool); 6563 if (! poolAppendChar(pool, XML_T('\0'))) 6564 return XML_ERROR_NO_MEMORY; 6565 return XML_ERROR_NONE; 6566 } 6567 6568 static enum XML_Error 6569 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 6570 const char *ptr, const char *end, STRING_POOL *pool, 6571 enum XML_Account account, const char **nextPtr) { 6572 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6573 #ifndef XML_DTD 6574 UNUSED_P(account); 6575 #endif 6576 6577 for (;;) { 6578 const char *next 6579 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ 6580 int tok = XmlAttributeValueTok(enc, ptr, end, &next); 6581 #if XML_GE == 1 6582 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { 6583 accountingOnAbort(parser); 6584 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6585 } 6586 #endif 6587 switch (tok) { 6588 case XML_TOK_NONE: 6589 if (nextPtr) { 6590 *nextPtr = next; 6591 } 6592 return XML_ERROR_NONE; 6593 case XML_TOK_INVALID: 6594 if (enc == parser->m_encoding) 6595 parser->m_eventPtr = next; 6596 return XML_ERROR_INVALID_TOKEN; 6597 case XML_TOK_PARTIAL: 6598 if (enc == parser->m_encoding) 6599 parser->m_eventPtr = ptr; 6600 return XML_ERROR_INVALID_TOKEN; 6601 case XML_TOK_CHAR_REF: { 6602 XML_Char buf[XML_ENCODE_MAX]; 6603 int i; 6604 int n = XmlCharRefNumber(enc, ptr); 6605 if (n < 0) { 6606 if (enc == parser->m_encoding) 6607 parser->m_eventPtr = ptr; 6608 return XML_ERROR_BAD_CHAR_REF; 6609 } 6610 if (! isCdata && n == 0x20 /* space */ 6611 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6612 break; 6613 n = XmlEncode(n, (ICHAR *)buf); 6614 /* The XmlEncode() functions can never return 0 here. That 6615 * error return happens if the code point passed in is either 6616 * negative or greater than or equal to 0x110000. The 6617 * XmlCharRefNumber() functions will all return a number 6618 * strictly less than 0x110000 or a negative value if an error 6619 * occurred. The negative value is intercepted above, so 6620 * XmlEncode() is never passed a value it might return an 6621 * error for. 6622 */ 6623 for (i = 0; i < n; i++) { 6624 if (! poolAppendChar(pool, buf[i])) 6625 return XML_ERROR_NO_MEMORY; 6626 } 6627 } break; 6628 case XML_TOK_DATA_CHARS: 6629 if (! poolAppend(pool, enc, ptr, next)) 6630 return XML_ERROR_NO_MEMORY; 6631 break; 6632 case XML_TOK_TRAILING_CR: 6633 next = ptr + enc->minBytesPerChar; 6634 /* fall through */ 6635 case XML_TOK_ATTRIBUTE_VALUE_S: 6636 case XML_TOK_DATA_NEWLINE: 6637 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6638 break; 6639 if (! poolAppendChar(pool, 0x20)) 6640 return XML_ERROR_NO_MEMORY; 6641 break; 6642 case XML_TOK_ENTITY_REF: { 6643 const XML_Char *name; 6644 ENTITY *entity; 6645 bool checkEntityDecl; 6646 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 6647 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); 6648 if (ch) { 6649 #if XML_GE == 1 6650 /* NOTE: We are replacing 4-6 characters original input for 1 character 6651 * so there is no amplification and hence recording without 6652 * protection. */ 6653 accountingDiffTolerated(parser, tok, (char *)&ch, 6654 ((char *)&ch) + sizeof(XML_Char), __LINE__, 6655 XML_ACCOUNT_ENTITY_EXPANSION); 6656 #endif /* XML_GE == 1 */ 6657 if (! poolAppendChar(pool, ch)) 6658 return XML_ERROR_NO_MEMORY; 6659 break; 6660 } 6661 name = poolStoreString(&parser->m_temp2Pool, enc, 6662 ptr + enc->minBytesPerChar, 6663 next - enc->minBytesPerChar); 6664 if (! name) 6665 return XML_ERROR_NO_MEMORY; 6666 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 6667 poolDiscard(&parser->m_temp2Pool); 6668 /* First, determine if a check for an existing declaration is needed; 6669 if yes, check that the entity exists, and that it is internal. 6670 */ 6671 if (pool == &dtd->pool) /* are we called from prolog? */ 6672 checkEntityDecl = 6673 #ifdef XML_DTD 6674 parser->m_prologState.documentEntity && 6675 #endif /* XML_DTD */ 6676 (dtd->standalone ? ! parser->m_openInternalEntities 6677 : ! dtd->hasParamEntityRefs); 6678 else /* if (pool == &parser->m_tempPool): we are called from content */ 6679 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; 6680 if (checkEntityDecl) { 6681 if (! entity) 6682 return XML_ERROR_UNDEFINED_ENTITY; 6683 else if (! entity->is_internal) 6684 return XML_ERROR_ENTITY_DECLARED_IN_PE; 6685 } else if (! entity) { 6686 /* Cannot report skipped entity here - see comments on 6687 parser->m_skippedEntityHandler. 6688 if (parser->m_skippedEntityHandler) 6689 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6690 */ 6691 /* Cannot call the default handler because this would be 6692 out of sync with the call to the startElementHandler. 6693 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) 6694 reportDefault(parser, enc, ptr, next); 6695 */ 6696 break; 6697 } 6698 if (entity->open) { 6699 if (enc == parser->m_encoding) { 6700 /* It does not appear that this line can be executed. 6701 * 6702 * The "if (entity->open)" check catches recursive entity 6703 * definitions. In order to be called with an open 6704 * entity, it must have gone through this code before and 6705 * been through the recursive call to 6706 * appendAttributeValue() some lines below. That call 6707 * sets the local encoding ("enc") to the parser's 6708 * internal encoding (internal_utf8 or internal_utf16), 6709 * which can never be the same as the principle encoding. 6710 * It doesn't appear there is another code path that gets 6711 * here with entity->open being TRUE. 6712 * 6713 * Since it is not certain that this logic is watertight, 6714 * we keep the line and merely exclude it from coverage 6715 * tests. 6716 */ 6717 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ 6718 } 6719 return XML_ERROR_RECURSIVE_ENTITY_REF; 6720 } 6721 if (entity->notation) { 6722 if (enc == parser->m_encoding) 6723 parser->m_eventPtr = ptr; 6724 return XML_ERROR_BINARY_ENTITY_REF; 6725 } 6726 if (! entity->textPtr) { 6727 if (enc == parser->m_encoding) 6728 parser->m_eventPtr = ptr; 6729 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; 6730 } else { 6731 enum XML_Error result; 6732 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE); 6733 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) { 6734 *nextPtr = next; 6735 } 6736 return result; 6737 } 6738 } break; 6739 default: 6740 /* The only token returned by XmlAttributeValueTok() that does 6741 * not have an explicit case here is XML_TOK_PARTIAL_CHAR. 6742 * Getting that would require an entity name to contain an 6743 * incomplete XML character (e.g. \xE2\x82); however previous 6744 * tokenisers will have already recognised and rejected such 6745 * names before XmlAttributeValueTok() gets a look-in. This 6746 * default case should be retained as a safety net, but the code 6747 * excluded from coverage tests. 6748 * 6749 * LCOV_EXCL_START 6750 */ 6751 if (enc == parser->m_encoding) 6752 parser->m_eventPtr = ptr; 6753 return XML_ERROR_UNEXPECTED_STATE; 6754 /* LCOV_EXCL_STOP */ 6755 } 6756 ptr = next; 6757 } 6758 /* not reached */ 6759 } 6760 6761 #if XML_GE == 1 6762 static enum XML_Error 6763 storeEntityValue(XML_Parser parser, const ENCODING *enc, 6764 const char *entityTextPtr, const char *entityTextEnd, 6765 enum XML_Account account, const char **nextPtr) { 6766 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6767 STRING_POOL *pool = &(dtd->entityValuePool); 6768 enum XML_Error result = XML_ERROR_NONE; 6769 # ifdef XML_DTD 6770 int oldInEntityValue = parser->m_prologState.inEntityValue; 6771 parser->m_prologState.inEntityValue = 1; 6772 # else 6773 UNUSED_P(account); 6774 # endif /* XML_DTD */ 6775 /* never return Null for the value argument in EntityDeclHandler, 6776 since this would indicate an external entity; therefore we 6777 have to make sure that entityValuePool.start is not null */ 6778 if (! pool->blocks) { 6779 if (! poolGrow(pool)) 6780 return XML_ERROR_NO_MEMORY; 6781 } 6782 6783 const char *next; 6784 for (;;) { 6785 next 6786 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ 6787 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); 6788 6789 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, 6790 account)) { 6791 accountingOnAbort(parser); 6792 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6793 goto endEntityValue; 6794 } 6795 6796 switch (tok) { 6797 case XML_TOK_PARAM_ENTITY_REF: 6798 # ifdef XML_DTD 6799 if (parser->m_isParamEntity || enc != parser->m_encoding) { 6800 const XML_Char *name; 6801 ENTITY *entity; 6802 name = poolStoreString(&parser->m_tempPool, enc, 6803 entityTextPtr + enc->minBytesPerChar, 6804 next - enc->minBytesPerChar); 6805 if (! name) { 6806 result = XML_ERROR_NO_MEMORY; 6807 goto endEntityValue; 6808 } 6809 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 6810 poolDiscard(&parser->m_tempPool); 6811 if (! entity) { 6812 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ 6813 /* cannot report skipped entity here - see comments on 6814 parser->m_skippedEntityHandler 6815 if (parser->m_skippedEntityHandler) 6816 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6817 */ 6818 dtd->keepProcessing = dtd->standalone; 6819 goto endEntityValue; 6820 } 6821 if (entity->open || (entity == parser->m_declEntity)) { 6822 if (enc == parser->m_encoding) 6823 parser->m_eventPtr = entityTextPtr; 6824 result = XML_ERROR_RECURSIVE_ENTITY_REF; 6825 goto endEntityValue; 6826 } 6827 if (entity->systemId) { 6828 if (parser->m_externalEntityRefHandler) { 6829 dtd->paramEntityRead = XML_FALSE; 6830 entity->open = XML_TRUE; 6831 entityTrackingOnOpen(parser, entity, __LINE__); 6832 if (! parser->m_externalEntityRefHandler( 6833 parser->m_externalEntityRefHandlerArg, 0, entity->base, 6834 entity->systemId, entity->publicId)) { 6835 entityTrackingOnClose(parser, entity, __LINE__); 6836 entity->open = XML_FALSE; 6837 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6838 goto endEntityValue; 6839 } 6840 entityTrackingOnClose(parser, entity, __LINE__); 6841 entity->open = XML_FALSE; 6842 if (! dtd->paramEntityRead) 6843 dtd->keepProcessing = dtd->standalone; 6844 } else 6845 dtd->keepProcessing = dtd->standalone; 6846 } else { 6847 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE); 6848 goto endEntityValue; 6849 } 6850 break; 6851 } 6852 # endif /* XML_DTD */ 6853 /* In the internal subset, PE references are not legal 6854 within markup declarations, e.g entity values in this case. */ 6855 parser->m_eventPtr = entityTextPtr; 6856 result = XML_ERROR_PARAM_ENTITY_REF; 6857 goto endEntityValue; 6858 case XML_TOK_NONE: 6859 result = XML_ERROR_NONE; 6860 goto endEntityValue; 6861 case XML_TOK_ENTITY_REF: 6862 case XML_TOK_DATA_CHARS: 6863 if (! poolAppend(pool, enc, entityTextPtr, next)) { 6864 result = XML_ERROR_NO_MEMORY; 6865 goto endEntityValue; 6866 } 6867 break; 6868 case XML_TOK_TRAILING_CR: 6869 next = entityTextPtr + enc->minBytesPerChar; 6870 /* fall through */ 6871 case XML_TOK_DATA_NEWLINE: 6872 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6873 result = XML_ERROR_NO_MEMORY; 6874 goto endEntityValue; 6875 } 6876 *(pool->ptr)++ = 0xA; 6877 break; 6878 case XML_TOK_CHAR_REF: { 6879 XML_Char buf[XML_ENCODE_MAX]; 6880 int i; 6881 int n = XmlCharRefNumber(enc, entityTextPtr); 6882 if (n < 0) { 6883 if (enc == parser->m_encoding) 6884 parser->m_eventPtr = entityTextPtr; 6885 result = XML_ERROR_BAD_CHAR_REF; 6886 goto endEntityValue; 6887 } 6888 n = XmlEncode(n, (ICHAR *)buf); 6889 /* The XmlEncode() functions can never return 0 here. That 6890 * error return happens if the code point passed in is either 6891 * negative or greater than or equal to 0x110000. The 6892 * XmlCharRefNumber() functions will all return a number 6893 * strictly less than 0x110000 or a negative value if an error 6894 * occurred. The negative value is intercepted above, so 6895 * XmlEncode() is never passed a value it might return an 6896 * error for. 6897 */ 6898 for (i = 0; i < n; i++) { 6899 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6900 result = XML_ERROR_NO_MEMORY; 6901 goto endEntityValue; 6902 } 6903 *(pool->ptr)++ = buf[i]; 6904 } 6905 } break; 6906 case XML_TOK_PARTIAL: 6907 if (enc == parser->m_encoding) 6908 parser->m_eventPtr = entityTextPtr; 6909 result = XML_ERROR_INVALID_TOKEN; 6910 goto endEntityValue; 6911 case XML_TOK_INVALID: 6912 if (enc == parser->m_encoding) 6913 parser->m_eventPtr = next; 6914 result = XML_ERROR_INVALID_TOKEN; 6915 goto endEntityValue; 6916 default: 6917 /* This default case should be unnecessary -- all the tokens 6918 * that XmlEntityValueTok() can return have their own explicit 6919 * cases -- but should be retained for safety. We do however 6920 * exclude it from the coverage statistics. 6921 * 6922 * LCOV_EXCL_START 6923 */ 6924 if (enc == parser->m_encoding) 6925 parser->m_eventPtr = entityTextPtr; 6926 result = XML_ERROR_UNEXPECTED_STATE; 6927 goto endEntityValue; 6928 /* LCOV_EXCL_STOP */ 6929 } 6930 entityTextPtr = next; 6931 } 6932 endEntityValue: 6933 # ifdef XML_DTD 6934 parser->m_prologState.inEntityValue = oldInEntityValue; 6935 # endif /* XML_DTD */ 6936 // If 'nextPtr' is given, it should be updated during the processing 6937 if (nextPtr != NULL) { 6938 *nextPtr = next; 6939 } 6940 return result; 6941 } 6942 6943 static enum XML_Error 6944 callStoreEntityValue(XML_Parser parser, const ENCODING *enc, 6945 const char *entityTextPtr, const char *entityTextEnd, 6946 enum XML_Account account) { 6947 const char *next = entityTextPtr; 6948 enum XML_Error result = XML_ERROR_NONE; 6949 while (1) { 6950 if (! parser->m_openValueEntities) { 6951 result 6952 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next); 6953 } else { 6954 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities; 6955 if (! openEntity) 6956 return XML_ERROR_UNEXPECTED_STATE; 6957 6958 ENTITY *const entity = openEntity->entity; 6959 const char *const textStart 6960 = ((const char *)entity->textPtr) + entity->processed; 6961 const char *const textEnd 6962 = (const char *)(entity->textPtr + entity->textLen); 6963 /* Set a safe default value in case 'next' does not get set */ 6964 const char *nextInEntity = textStart; 6965 if (entity->hasMore) { 6966 result = storeEntityValue(parser, parser->m_internalEncoding, textStart, 6967 textEnd, XML_ACCOUNT_ENTITY_EXPANSION, 6968 &nextInEntity); 6969 if (result != XML_ERROR_NONE) 6970 break; 6971 // Check if entity is complete, if not, mark down how much of it is 6972 // processed. A XML_SUSPENDED check here is not required as 6973 // appendAttributeValue will never suspend the parser. 6974 if (textEnd != nextInEntity) { 6975 entity->processed 6976 = (int)(nextInEntity - (const char *)entity->textPtr); 6977 continue; 6978 } 6979 6980 // Entity is complete. We cannot close it here since we need to first 6981 // process its possible inner entities (which are added to the 6982 // m_openValueEntities during storeEntityValue) 6983 entity->hasMore = XML_FALSE; 6984 continue; 6985 } // End of entity processing, "if" block skips the rest 6986 6987 // Remove fully processed openEntity from open entity list. 6988 # if XML_GE == 1 6989 entityTrackingOnClose(parser, entity, __LINE__); 6990 # endif 6991 // openEntity is m_openValueEntities' head, since we set it at the 6992 // start of this function and because we skipped storeEntityValue call 6993 // with hasMore set to false. This means we can directly remove the head 6994 // of m_openValueEntities 6995 assert(parser->m_openValueEntities == openEntity); 6996 entity->open = XML_FALSE; 6997 parser->m_openValueEntities = parser->m_openValueEntities->next; 6998 6999 /* put openEntity back in list of free instances */ 7000 openEntity->next = parser->m_freeValueEntities; 7001 parser->m_freeValueEntities = openEntity; 7002 } 7003 7004 // Break if an error occurred or there is nothing left to process 7005 if (result 7006 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) { 7007 break; 7008 } 7009 } 7010 7011 return result; 7012 } 7013 7014 #else /* XML_GE == 0 */ 7015 7016 static enum XML_Error 7017 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { 7018 // This will store "&entity123;" in entity->textPtr 7019 // to end up as "&entity123;" in the handler. 7020 const char *const entity_start = "&"; 7021 const char *const entity_end = ";"; 7022 7023 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool); 7024 if (! poolAppendString(pool, entity_start) 7025 || ! poolAppendString(pool, entity->name) 7026 || ! poolAppendString(pool, entity_end)) { 7027 poolDiscard(pool); 7028 return XML_ERROR_NO_MEMORY; 7029 } 7030 7031 entity->textPtr = poolStart(pool); 7032 entity->textLen = (int)(poolLength(pool)); 7033 poolFinish(pool); 7034 7035 return XML_ERROR_NONE; 7036 } 7037 7038 #endif /* XML_GE == 0 */ 7039 7040 static void FASTCALL 7041 normalizeLines(XML_Char *s) { 7042 XML_Char *p; 7043 for (;; s++) { 7044 if (*s == XML_T('\0')) 7045 return; 7046 if (*s == 0xD) 7047 break; 7048 } 7049 p = s; 7050 do { 7051 if (*s == 0xD) { 7052 *p++ = 0xA; 7053 if (*++s == 0xA) 7054 s++; 7055 } else 7056 *p++ = *s++; 7057 } while (*s); 7058 *p = XML_T('\0'); 7059 } 7060 7061 static int 7062 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 7063 const char *start, const char *end) { 7064 const XML_Char *target; 7065 XML_Char *data; 7066 const char *tem; 7067 if (! parser->m_processingInstructionHandler) { 7068 if (parser->m_defaultHandler) 7069 reportDefault(parser, enc, start, end); 7070 return 1; 7071 } 7072 start += enc->minBytesPerChar * 2; 7073 tem = start + XmlNameLength(enc, start); 7074 target = poolStoreString(&parser->m_tempPool, enc, start, tem); 7075 if (! target) 7076 return 0; 7077 poolFinish(&parser->m_tempPool); 7078 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), 7079 end - enc->minBytesPerChar * 2); 7080 if (! data) 7081 return 0; 7082 normalizeLines(data); 7083 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); 7084 poolClear(&parser->m_tempPool); 7085 return 1; 7086 } 7087 7088 static int 7089 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, 7090 const char *end) { 7091 XML_Char *data; 7092 if (! parser->m_commentHandler) { 7093 if (parser->m_defaultHandler) 7094 reportDefault(parser, enc, start, end); 7095 return 1; 7096 } 7097 data = poolStoreString(&parser->m_tempPool, enc, 7098 start + enc->minBytesPerChar * 4, 7099 end - enc->minBytesPerChar * 3); 7100 if (! data) 7101 return 0; 7102 normalizeLines(data); 7103 parser->m_commentHandler(parser->m_handlerArg, data); 7104 poolClear(&parser->m_tempPool); 7105 return 1; 7106 } 7107 7108 static void 7109 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, 7110 const char *end) { 7111 if (MUST_CONVERT(enc, s)) { 7112 enum XML_Convert_Result convert_res; 7113 const char **eventPP; 7114 const char **eventEndPP; 7115 if (enc == parser->m_encoding) { 7116 eventPP = &parser->m_eventPtr; 7117 eventEndPP = &parser->m_eventEndPtr; 7118 } else { 7119 /* To get here, two things must be true; the parser must be 7120 * using a character encoding that is not the same as the 7121 * encoding passed in, and the encoding passed in must need 7122 * conversion to the internal format (UTF-8 unless XML_UNICODE 7123 * is defined). The only occasions on which the encoding passed 7124 * in is not the same as the parser's encoding are when it is 7125 * the internal encoding (e.g. a previously defined parameter 7126 * entity, already converted to internal format). This by 7127 * definition doesn't need conversion, so the whole branch never 7128 * gets executed. 7129 * 7130 * For safety's sake we don't delete these lines and merely 7131 * exclude them from coverage statistics. 7132 * 7133 * LCOV_EXCL_START 7134 */ 7135 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 7136 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 7137 /* LCOV_EXCL_STOP */ 7138 } 7139 do { 7140 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 7141 convert_res 7142 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 7143 *eventEndPP = s; 7144 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, 7145 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 7146 *eventPP = s; 7147 } while ((convert_res != XML_CONVERT_COMPLETED) 7148 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); 7149 } else 7150 parser->m_defaultHandler( 7151 parser->m_handlerArg, (const XML_Char *)s, 7152 (int)((const XML_Char *)end - (const XML_Char *)s)); 7153 } 7154 7155 static int 7156 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, 7157 XML_Bool isId, const XML_Char *value, XML_Parser parser) { 7158 DEFAULT_ATTRIBUTE *att; 7159 if (value || isId) { 7160 /* The handling of default attributes gets messed up if we have 7161 a default which duplicates a non-default. */ 7162 int i; 7163 for (i = 0; i < type->nDefaultAtts; i++) 7164 if (attId == type->defaultAtts[i].id) 7165 return 1; 7166 if (isId && ! type->idAtt && ! attId->xmlns) 7167 type->idAtt = attId; 7168 } 7169 if (type->nDefaultAtts == type->allocDefaultAtts) { 7170 if (type->allocDefaultAtts == 0) { 7171 type->allocDefaultAtts = 8; 7172 type->defaultAtts 7173 = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 7174 if (! type->defaultAtts) { 7175 type->allocDefaultAtts = 0; 7176 return 0; 7177 } 7178 } else { 7179 DEFAULT_ATTRIBUTE *temp; 7180 7181 /* Detect and prevent integer overflow */ 7182 if (type->allocDefaultAtts > INT_MAX / 2) { 7183 return 0; 7184 } 7185 7186 int count = type->allocDefaultAtts * 2; 7187 7188 /* Detect and prevent integer overflow. 7189 * The preprocessor guard addresses the "always false" warning 7190 * from -Wtype-limits on platforms where 7191 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7192 #if UINT_MAX >= SIZE_MAX 7193 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { 7194 return 0; 7195 } 7196 #endif 7197 7198 temp = REALLOC(parser, type->defaultAtts, 7199 (count * sizeof(DEFAULT_ATTRIBUTE))); 7200 if (temp == NULL) 7201 return 0; 7202 type->allocDefaultAtts = count; 7203 type->defaultAtts = temp; 7204 } 7205 } 7206 att = type->defaultAtts + type->nDefaultAtts; 7207 att->id = attId; 7208 att->value = value; 7209 att->isCdata = isCdata; 7210 if (! isCdata) 7211 attId->maybeTokenized = XML_TRUE; 7212 type->nDefaultAtts += 1; 7213 return 1; 7214 } 7215 7216 static int 7217 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { 7218 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7219 const XML_Char *name; 7220 for (name = elementType->name; *name; name++) { 7221 if (*name == XML_T(ASCII_COLON)) { 7222 PREFIX *prefix; 7223 const XML_Char *s; 7224 for (s = elementType->name; s != name; s++) { 7225 if (! poolAppendChar(&dtd->pool, *s)) 7226 return 0; 7227 } 7228 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 7229 return 0; 7230 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), 7231 sizeof(PREFIX)); 7232 if (! prefix) 7233 return 0; 7234 if (prefix->name == poolStart(&dtd->pool)) 7235 poolFinish(&dtd->pool); 7236 else 7237 poolDiscard(&dtd->pool); 7238 elementType->prefix = prefix; 7239 break; 7240 } 7241 } 7242 return 1; 7243 } 7244 7245 static ATTRIBUTE_ID * 7246 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, 7247 const char *end) { 7248 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7249 ATTRIBUTE_ID *id; 7250 const XML_Char *name; 7251 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 7252 return NULL; 7253 name = poolStoreString(&dtd->pool, enc, start, end); 7254 if (! name) 7255 return NULL; 7256 /* skip quotation mark - its storage will be reused (like in name[-1]) */ 7257 ++name; 7258 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, 7259 sizeof(ATTRIBUTE_ID)); 7260 if (! id) 7261 return NULL; 7262 if (id->name != name) 7263 poolDiscard(&dtd->pool); 7264 else { 7265 poolFinish(&dtd->pool); 7266 if (! parser->m_ns) 7267 ; 7268 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) 7269 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) 7270 && name[4] == XML_T(ASCII_s) 7271 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { 7272 if (name[5] == XML_T('\0')) 7273 id->prefix = &dtd->defaultPrefix; 7274 else 7275 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, 7276 sizeof(PREFIX)); 7277 id->xmlns = XML_TRUE; 7278 } else { 7279 int i; 7280 for (i = 0; name[i]; i++) { 7281 /* attributes without prefix are *not* in the default namespace */ 7282 if (name[i] == XML_T(ASCII_COLON)) { 7283 int j; 7284 for (j = 0; j < i; j++) { 7285 if (! poolAppendChar(&dtd->pool, name[j])) 7286 return NULL; 7287 } 7288 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 7289 return NULL; 7290 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, 7291 poolStart(&dtd->pool), sizeof(PREFIX)); 7292 if (! id->prefix) 7293 return NULL; 7294 if (id->prefix->name == poolStart(&dtd->pool)) 7295 poolFinish(&dtd->pool); 7296 else 7297 poolDiscard(&dtd->pool); 7298 break; 7299 } 7300 } 7301 } 7302 } 7303 return id; 7304 } 7305 7306 #define CONTEXT_SEP XML_T(ASCII_FF) 7307 7308 static const XML_Char * 7309 getContext(XML_Parser parser) { 7310 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7311 HASH_TABLE_ITER iter; 7312 XML_Bool needSep = XML_FALSE; 7313 7314 if (dtd->defaultPrefix.binding) { 7315 int i; 7316 int len; 7317 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 7318 return NULL; 7319 len = dtd->defaultPrefix.binding->uriLen; 7320 if (parser->m_namespaceSeparator) 7321 len--; 7322 for (i = 0; i < len; i++) { 7323 if (! poolAppendChar(&parser->m_tempPool, 7324 dtd->defaultPrefix.binding->uri[i])) { 7325 /* Because of memory caching, I don't believe this line can be 7326 * executed. 7327 * 7328 * This is part of a loop copying the default prefix binding 7329 * URI into the parser's temporary string pool. Previously, 7330 * that URI was copied into the same string pool, with a 7331 * terminating NUL character, as part of setContext(). When 7332 * the pool was cleared, that leaves a block definitely big 7333 * enough to hold the URI on the free block list of the pool. 7334 * The URI copy in getContext() therefore cannot run out of 7335 * memory. 7336 * 7337 * If the pool is used between the setContext() and 7338 * getContext() calls, the worst it can do is leave a bigger 7339 * block on the front of the free list. Given that this is 7340 * all somewhat inobvious and program logic can be changed, we 7341 * don't delete the line but we do exclude it from the test 7342 * coverage statistics. 7343 */ 7344 return NULL; /* LCOV_EXCL_LINE */ 7345 } 7346 } 7347 needSep = XML_TRUE; 7348 } 7349 7350 hashTableIterInit(&iter, &(dtd->prefixes)); 7351 for (;;) { 7352 int i; 7353 int len; 7354 const XML_Char *s; 7355 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); 7356 if (! prefix) 7357 break; 7358 if (! prefix->binding) { 7359 /* This test appears to be (justifiable) paranoia. There does 7360 * not seem to be a way of injecting a prefix without a binding 7361 * that doesn't get errored long before this function is called. 7362 * The test should remain for safety's sake, so we instead 7363 * exclude the following line from the coverage statistics. 7364 */ 7365 continue; /* LCOV_EXCL_LINE */ 7366 } 7367 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 7368 return NULL; 7369 for (s = prefix->name; *s; s++) 7370 if (! poolAppendChar(&parser->m_tempPool, *s)) 7371 return NULL; 7372 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 7373 return NULL; 7374 len = prefix->binding->uriLen; 7375 if (parser->m_namespaceSeparator) 7376 len--; 7377 for (i = 0; i < len; i++) 7378 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) 7379 return NULL; 7380 needSep = XML_TRUE; 7381 } 7382 7383 hashTableIterInit(&iter, &(dtd->generalEntities)); 7384 for (;;) { 7385 const XML_Char *s; 7386 ENTITY *e = (ENTITY *)hashTableIterNext(&iter); 7387 if (! e) 7388 break; 7389 if (! e->open) 7390 continue; 7391 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 7392 return NULL; 7393 for (s = e->name; *s; s++) 7394 if (! poolAppendChar(&parser->m_tempPool, *s)) 7395 return 0; 7396 needSep = XML_TRUE; 7397 } 7398 7399 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7400 return NULL; 7401 return parser->m_tempPool.start; 7402 } 7403 7404 static XML_Bool 7405 setContext(XML_Parser parser, const XML_Char *context) { 7406 if (context == NULL) { 7407 return XML_FALSE; 7408 } 7409 7410 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7411 const XML_Char *s = context; 7412 7413 while (*context != XML_T('\0')) { 7414 if (*s == CONTEXT_SEP || *s == XML_T('\0')) { 7415 ENTITY *e; 7416 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7417 return XML_FALSE; 7418 e = (ENTITY *)lookup(parser, &dtd->generalEntities, 7419 poolStart(&parser->m_tempPool), 0); 7420 if (e) 7421 e->open = XML_TRUE; 7422 if (*s != XML_T('\0')) 7423 s++; 7424 context = s; 7425 poolDiscard(&parser->m_tempPool); 7426 } else if (*s == XML_T(ASCII_EQUALS)) { 7427 PREFIX *prefix; 7428 if (poolLength(&parser->m_tempPool) == 0) 7429 prefix = &dtd->defaultPrefix; 7430 else { 7431 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7432 return XML_FALSE; 7433 prefix 7434 = (PREFIX *)lookup(parser, &dtd->prefixes, 7435 poolStart(&parser->m_tempPool), sizeof(PREFIX)); 7436 if (! prefix) 7437 return XML_FALSE; 7438 if (prefix->name == poolStart(&parser->m_tempPool)) { 7439 prefix->name = poolCopyString(&dtd->pool, prefix->name); 7440 if (! prefix->name) 7441 return XML_FALSE; 7442 } 7443 poolDiscard(&parser->m_tempPool); 7444 } 7445 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); 7446 context++) 7447 if (! poolAppendChar(&parser->m_tempPool, *context)) 7448 return XML_FALSE; 7449 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7450 return XML_FALSE; 7451 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), 7452 &parser->m_inheritedBindings) 7453 != XML_ERROR_NONE) 7454 return XML_FALSE; 7455 poolDiscard(&parser->m_tempPool); 7456 if (*context != XML_T('\0')) 7457 ++context; 7458 s = context; 7459 } else { 7460 if (! poolAppendChar(&parser->m_tempPool, *s)) 7461 return XML_FALSE; 7462 s++; 7463 } 7464 } 7465 return XML_TRUE; 7466 } 7467 7468 static void FASTCALL 7469 normalizePublicId(XML_Char *publicId) { 7470 XML_Char *p = publicId; 7471 XML_Char *s; 7472 for (s = publicId; *s; s++) { 7473 switch (*s) { 7474 case 0x20: 7475 case 0xD: 7476 case 0xA: 7477 if (p != publicId && p[-1] != 0x20) 7478 *p++ = 0x20; 7479 break; 7480 default: 7481 *p++ = *s; 7482 } 7483 } 7484 if (p != publicId && p[-1] == 0x20) 7485 --p; 7486 *p = XML_T('\0'); 7487 } 7488 7489 static DTD * 7490 dtdCreate(XML_Parser parser) { 7491 DTD *p = MALLOC(parser, sizeof(DTD)); 7492 if (p == NULL) 7493 return p; 7494 poolInit(&(p->pool), parser); 7495 poolInit(&(p->entityValuePool), parser); 7496 hashTableInit(&(p->generalEntities), parser); 7497 hashTableInit(&(p->elementTypes), parser); 7498 hashTableInit(&(p->attributeIds), parser); 7499 hashTableInit(&(p->prefixes), parser); 7500 #ifdef XML_DTD 7501 p->paramEntityRead = XML_FALSE; 7502 hashTableInit(&(p->paramEntities), parser); 7503 #endif /* XML_DTD */ 7504 p->defaultPrefix.name = NULL; 7505 p->defaultPrefix.binding = NULL; 7506 7507 p->in_eldecl = XML_FALSE; 7508 p->scaffIndex = NULL; 7509 p->scaffold = NULL; 7510 p->scaffLevel = 0; 7511 p->scaffSize = 0; 7512 p->scaffCount = 0; 7513 p->contentStringLen = 0; 7514 7515 p->keepProcessing = XML_TRUE; 7516 p->hasParamEntityRefs = XML_FALSE; 7517 p->standalone = XML_FALSE; 7518 return p; 7519 } 7520 7521 static void 7522 dtdReset(DTD *p, XML_Parser parser) { 7523 HASH_TABLE_ITER iter; 7524 hashTableIterInit(&iter, &(p->elementTypes)); 7525 for (;;) { 7526 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7527 if (! e) 7528 break; 7529 if (e->allocDefaultAtts != 0) 7530 FREE(parser, e->defaultAtts); 7531 } 7532 hashTableClear(&(p->generalEntities)); 7533 #ifdef XML_DTD 7534 p->paramEntityRead = XML_FALSE; 7535 hashTableClear(&(p->paramEntities)); 7536 #endif /* XML_DTD */ 7537 hashTableClear(&(p->elementTypes)); 7538 hashTableClear(&(p->attributeIds)); 7539 hashTableClear(&(p->prefixes)); 7540 poolClear(&(p->pool)); 7541 poolClear(&(p->entityValuePool)); 7542 p->defaultPrefix.name = NULL; 7543 p->defaultPrefix.binding = NULL; 7544 7545 p->in_eldecl = XML_FALSE; 7546 7547 FREE(parser, p->scaffIndex); 7548 p->scaffIndex = NULL; 7549 FREE(parser, p->scaffold); 7550 p->scaffold = NULL; 7551 7552 p->scaffLevel = 0; 7553 p->scaffSize = 0; 7554 p->scaffCount = 0; 7555 p->contentStringLen = 0; 7556 7557 p->keepProcessing = XML_TRUE; 7558 p->hasParamEntityRefs = XML_FALSE; 7559 p->standalone = XML_FALSE; 7560 } 7561 7562 static void 7563 dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) { 7564 HASH_TABLE_ITER iter; 7565 hashTableIterInit(&iter, &(p->elementTypes)); 7566 for (;;) { 7567 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7568 if (! e) 7569 break; 7570 if (e->allocDefaultAtts != 0) 7571 FREE(parser, e->defaultAtts); 7572 } 7573 hashTableDestroy(&(p->generalEntities)); 7574 #ifdef XML_DTD 7575 hashTableDestroy(&(p->paramEntities)); 7576 #endif /* XML_DTD */ 7577 hashTableDestroy(&(p->elementTypes)); 7578 hashTableDestroy(&(p->attributeIds)); 7579 hashTableDestroy(&(p->prefixes)); 7580 poolDestroy(&(p->pool)); 7581 poolDestroy(&(p->entityValuePool)); 7582 if (isDocEntity) { 7583 FREE(parser, p->scaffIndex); 7584 FREE(parser, p->scaffold); 7585 } 7586 FREE(parser, p); 7587 } 7588 7589 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. 7590 The new DTD has already been initialized. 7591 */ 7592 static int 7593 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 7594 XML_Parser parser) { 7595 HASH_TABLE_ITER iter; 7596 7597 /* Copy the prefix table. */ 7598 7599 hashTableIterInit(&iter, &(oldDtd->prefixes)); 7600 for (;;) { 7601 const XML_Char *name; 7602 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); 7603 if (! oldP) 7604 break; 7605 name = poolCopyString(&(newDtd->pool), oldP->name); 7606 if (! name) 7607 return 0; 7608 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) 7609 return 0; 7610 } 7611 7612 hashTableIterInit(&iter, &(oldDtd->attributeIds)); 7613 7614 /* Copy the attribute id table. */ 7615 7616 for (;;) { 7617 ATTRIBUTE_ID *newA; 7618 const XML_Char *name; 7619 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); 7620 7621 if (! oldA) 7622 break; 7623 /* Remember to allocate the scratch byte before the name. */ 7624 if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) 7625 return 0; 7626 name = poolCopyString(&(newDtd->pool), oldA->name); 7627 if (! name) 7628 return 0; 7629 ++name; 7630 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, 7631 sizeof(ATTRIBUTE_ID)); 7632 if (! newA) 7633 return 0; 7634 newA->maybeTokenized = oldA->maybeTokenized; 7635 if (oldA->prefix) { 7636 newA->xmlns = oldA->xmlns; 7637 if (oldA->prefix == &oldDtd->defaultPrefix) 7638 newA->prefix = &newDtd->defaultPrefix; 7639 else 7640 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7641 oldA->prefix->name, 0); 7642 } 7643 } 7644 7645 /* Copy the element type table. */ 7646 7647 hashTableIterInit(&iter, &(oldDtd->elementTypes)); 7648 7649 for (;;) { 7650 int i; 7651 ELEMENT_TYPE *newE; 7652 const XML_Char *name; 7653 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7654 if (! oldE) 7655 break; 7656 name = poolCopyString(&(newDtd->pool), oldE->name); 7657 if (! name) 7658 return 0; 7659 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, 7660 sizeof(ELEMENT_TYPE)); 7661 if (! newE) 7662 return 0; 7663 if (oldE->nDefaultAtts) { 7664 /* Detect and prevent integer overflow. 7665 * The preprocessor guard addresses the "always false" warning 7666 * from -Wtype-limits on platforms where 7667 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ 7668 #if UINT_MAX >= SIZE_MAX 7669 if ((size_t)oldE->nDefaultAtts 7670 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { 7671 return 0; 7672 } 7673 #endif 7674 newE->defaultAtts 7675 = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 7676 if (! newE->defaultAtts) { 7677 return 0; 7678 } 7679 } 7680 if (oldE->idAtt) 7681 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), 7682 oldE->idAtt->name, 0); 7683 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; 7684 if (oldE->prefix) 7685 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7686 oldE->prefix->name, 0); 7687 for (i = 0; i < newE->nDefaultAtts; i++) { 7688 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( 7689 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); 7690 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; 7691 if (oldE->defaultAtts[i].value) { 7692 newE->defaultAtts[i].value 7693 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); 7694 if (! newE->defaultAtts[i].value) 7695 return 0; 7696 } else 7697 newE->defaultAtts[i].value = NULL; 7698 } 7699 } 7700 7701 /* Copy the entity tables. */ 7702 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), 7703 &(oldDtd->generalEntities))) 7704 return 0; 7705 7706 #ifdef XML_DTD 7707 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), 7708 &(oldDtd->paramEntities))) 7709 return 0; 7710 newDtd->paramEntityRead = oldDtd->paramEntityRead; 7711 #endif /* XML_DTD */ 7712 7713 newDtd->keepProcessing = oldDtd->keepProcessing; 7714 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; 7715 newDtd->standalone = oldDtd->standalone; 7716 7717 /* Don't want deep copying for scaffolding */ 7718 newDtd->in_eldecl = oldDtd->in_eldecl; 7719 newDtd->scaffold = oldDtd->scaffold; 7720 newDtd->contentStringLen = oldDtd->contentStringLen; 7721 newDtd->scaffSize = oldDtd->scaffSize; 7722 newDtd->scaffLevel = oldDtd->scaffLevel; 7723 newDtd->scaffIndex = oldDtd->scaffIndex; 7724 7725 return 1; 7726 } /* End dtdCopy */ 7727 7728 static int 7729 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 7730 STRING_POOL *newPool, const HASH_TABLE *oldTable) { 7731 HASH_TABLE_ITER iter; 7732 const XML_Char *cachedOldBase = NULL; 7733 const XML_Char *cachedNewBase = NULL; 7734 7735 hashTableIterInit(&iter, oldTable); 7736 7737 for (;;) { 7738 ENTITY *newE; 7739 const XML_Char *name; 7740 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); 7741 if (! oldE) 7742 break; 7743 name = poolCopyString(newPool, oldE->name); 7744 if (! name) 7745 return 0; 7746 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); 7747 if (! newE) 7748 return 0; 7749 if (oldE->systemId) { 7750 const XML_Char *tem = poolCopyString(newPool, oldE->systemId); 7751 if (! tem) 7752 return 0; 7753 newE->systemId = tem; 7754 if (oldE->base) { 7755 if (oldE->base == cachedOldBase) 7756 newE->base = cachedNewBase; 7757 else { 7758 cachedOldBase = oldE->base; 7759 tem = poolCopyString(newPool, cachedOldBase); 7760 if (! tem) 7761 return 0; 7762 cachedNewBase = newE->base = tem; 7763 } 7764 } 7765 if (oldE->publicId) { 7766 tem = poolCopyString(newPool, oldE->publicId); 7767 if (! tem) 7768 return 0; 7769 newE->publicId = tem; 7770 } 7771 } else { 7772 const XML_Char *tem 7773 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); 7774 if (! tem) 7775 return 0; 7776 newE->textPtr = tem; 7777 newE->textLen = oldE->textLen; 7778 } 7779 if (oldE->notation) { 7780 const XML_Char *tem = poolCopyString(newPool, oldE->notation); 7781 if (! tem) 7782 return 0; 7783 newE->notation = tem; 7784 } 7785 newE->is_param = oldE->is_param; 7786 newE->is_internal = oldE->is_internal; 7787 } 7788 return 1; 7789 } 7790 7791 #define INIT_POWER 6 7792 7793 static XML_Bool FASTCALL 7794 keyeq(KEY s1, KEY s2) { 7795 for (; *s1 == *s2; s1++, s2++) 7796 if (*s1 == 0) 7797 return XML_TRUE; 7798 return XML_FALSE; 7799 } 7800 7801 static size_t 7802 keylen(KEY s) { 7803 size_t len = 0; 7804 for (; *s; s++, len++) 7805 ; 7806 return len; 7807 } 7808 7809 static void 7810 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { 7811 key->k[0] = 0; 7812 key->k[1] = get_hash_secret_salt(parser); 7813 } 7814 7815 static unsigned long FASTCALL 7816 hash(XML_Parser parser, KEY s) { 7817 struct siphash state; 7818 struct sipkey key; 7819 (void)sip24_valid; 7820 copy_salt_to_sipkey(parser, &key); 7821 sip24_init(&state, &key); 7822 sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); 7823 return (unsigned long)sip24_final(&state); 7824 } 7825 7826 static NAMED * 7827 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { 7828 size_t i; 7829 if (table->size == 0) { 7830 size_t tsize; 7831 if (! createSize) 7832 return NULL; 7833 table->power = INIT_POWER; 7834 /* table->size is a power of 2 */ 7835 table->size = (size_t)1 << INIT_POWER; 7836 tsize = table->size * sizeof(NAMED *); 7837 table->v = MALLOC(table->parser, tsize); 7838 if (! table->v) { 7839 table->size = 0; 7840 return NULL; 7841 } 7842 memset(table->v, 0, tsize); 7843 i = hash(parser, name) & ((unsigned long)table->size - 1); 7844 } else { 7845 unsigned long h = hash(parser, name); 7846 unsigned long mask = (unsigned long)table->size - 1; 7847 unsigned char step = 0; 7848 i = h & mask; 7849 while (table->v[i]) { 7850 if (keyeq(name, table->v[i]->name)) 7851 return table->v[i]; 7852 if (! step) 7853 step = PROBE_STEP(h, mask, table->power); 7854 i < step ? (i += table->size - step) : (i -= step); 7855 } 7856 if (! createSize) 7857 return NULL; 7858 7859 /* check for overflow (table is half full) */ 7860 if (table->used >> (table->power - 1)) { 7861 unsigned char newPower = table->power + 1; 7862 7863 /* Detect and prevent invalid shift */ 7864 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { 7865 return NULL; 7866 } 7867 7868 size_t newSize = (size_t)1 << newPower; 7869 unsigned long newMask = (unsigned long)newSize - 1; 7870 7871 /* Detect and prevent integer overflow */ 7872 if (newSize > (size_t)(-1) / sizeof(NAMED *)) { 7873 return NULL; 7874 } 7875 7876 size_t tsize = newSize * sizeof(NAMED *); 7877 NAMED **newV = MALLOC(table->parser, tsize); 7878 if (! newV) 7879 return NULL; 7880 memset(newV, 0, tsize); 7881 for (i = 0; i < table->size; i++) 7882 if (table->v[i]) { 7883 unsigned long newHash = hash(parser, table->v[i]->name); 7884 size_t j = newHash & newMask; 7885 step = 0; 7886 while (newV[j]) { 7887 if (! step) 7888 step = PROBE_STEP(newHash, newMask, newPower); 7889 j < step ? (j += newSize - step) : (j -= step); 7890 } 7891 newV[j] = table->v[i]; 7892 } 7893 FREE(table->parser, table->v); 7894 table->v = newV; 7895 table->power = newPower; 7896 table->size = newSize; 7897 i = h & newMask; 7898 step = 0; 7899 while (table->v[i]) { 7900 if (! step) 7901 step = PROBE_STEP(h, newMask, newPower); 7902 i < step ? (i += newSize - step) : (i -= step); 7903 } 7904 } 7905 } 7906 table->v[i] = MALLOC(table->parser, createSize); 7907 if (! table->v[i]) 7908 return NULL; 7909 memset(table->v[i], 0, createSize); 7910 table->v[i]->name = name; 7911 (table->used)++; 7912 return table->v[i]; 7913 } 7914 7915 static void FASTCALL 7916 hashTableClear(HASH_TABLE *table) { 7917 size_t i; 7918 for (i = 0; i < table->size; i++) { 7919 FREE(table->parser, table->v[i]); 7920 table->v[i] = NULL; 7921 } 7922 table->used = 0; 7923 } 7924 7925 static void FASTCALL 7926 hashTableDestroy(HASH_TABLE *table) { 7927 size_t i; 7928 for (i = 0; i < table->size; i++) 7929 FREE(table->parser, table->v[i]); 7930 FREE(table->parser, table->v); 7931 } 7932 7933 static void FASTCALL 7934 hashTableInit(HASH_TABLE *p, XML_Parser parser) { 7935 p->power = 0; 7936 p->size = 0; 7937 p->used = 0; 7938 p->v = NULL; 7939 p->parser = parser; 7940 } 7941 7942 static void FASTCALL 7943 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { 7944 iter->p = table->v; 7945 iter->end = iter->p ? iter->p + table->size : NULL; 7946 } 7947 7948 static NAMED *FASTCALL 7949 hashTableIterNext(HASH_TABLE_ITER *iter) { 7950 while (iter->p != iter->end) { 7951 NAMED *tem = *(iter->p)++; 7952 if (tem) 7953 return tem; 7954 } 7955 return NULL; 7956 } 7957 7958 static void FASTCALL 7959 poolInit(STRING_POOL *pool, XML_Parser parser) { 7960 pool->blocks = NULL; 7961 pool->freeBlocks = NULL; 7962 pool->start = NULL; 7963 pool->ptr = NULL; 7964 pool->end = NULL; 7965 pool->parser = parser; 7966 } 7967 7968 static void FASTCALL 7969 poolClear(STRING_POOL *pool) { 7970 if (! pool->freeBlocks) 7971 pool->freeBlocks = pool->blocks; 7972 else { 7973 BLOCK *p = pool->blocks; 7974 while (p) { 7975 BLOCK *tem = p->next; 7976 p->next = pool->freeBlocks; 7977 pool->freeBlocks = p; 7978 p = tem; 7979 } 7980 } 7981 pool->blocks = NULL; 7982 pool->start = NULL; 7983 pool->ptr = NULL; 7984 pool->end = NULL; 7985 } 7986 7987 static void FASTCALL 7988 poolDestroy(STRING_POOL *pool) { 7989 BLOCK *p = pool->blocks; 7990 while (p) { 7991 BLOCK *tem = p->next; 7992 FREE(pool->parser, p); 7993 p = tem; 7994 } 7995 p = pool->freeBlocks; 7996 while (p) { 7997 BLOCK *tem = p->next; 7998 FREE(pool->parser, p); 7999 p = tem; 8000 } 8001 } 8002 8003 static XML_Char * 8004 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 8005 const char *end) { 8006 if (! pool->ptr && ! poolGrow(pool)) 8007 return NULL; 8008 for (;;) { 8009 const enum XML_Convert_Result convert_res = XmlConvert( 8010 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end); 8011 if ((convert_res == XML_CONVERT_COMPLETED) 8012 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 8013 break; 8014 if (! poolGrow(pool)) 8015 return NULL; 8016 } 8017 return pool->start; 8018 } 8019 8020 static const XML_Char *FASTCALL 8021 poolCopyString(STRING_POOL *pool, const XML_Char *s) { 8022 do { 8023 if (! poolAppendChar(pool, *s)) 8024 return NULL; 8025 } while (*s++); 8026 s = pool->start; 8027 poolFinish(pool); 8028 return s; 8029 } 8030 8031 static const XML_Char * 8032 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { 8033 if (! pool->ptr && ! poolGrow(pool)) { 8034 /* The following line is unreachable given the current usage of 8035 * poolCopyStringN(). Currently it is called from exactly one 8036 * place to copy the text of a simple general entity. By that 8037 * point, the name of the entity is already stored in the pool, so 8038 * pool->ptr cannot be NULL. 8039 * 8040 * If poolCopyStringN() is used elsewhere as it well might be, 8041 * this line may well become executable again. Regardless, this 8042 * sort of check shouldn't be removed lightly, so we just exclude 8043 * it from the coverage statistics. 8044 */ 8045 return NULL; /* LCOV_EXCL_LINE */ 8046 } 8047 for (; n > 0; --n, s++) { 8048 if (! poolAppendChar(pool, *s)) 8049 return NULL; 8050 } 8051 s = pool->start; 8052 poolFinish(pool); 8053 return s; 8054 } 8055 8056 static const XML_Char *FASTCALL 8057 poolAppendString(STRING_POOL *pool, const XML_Char *s) { 8058 while (*s) { 8059 if (! poolAppendChar(pool, *s)) 8060 return NULL; 8061 s++; 8062 } 8063 return pool->start; 8064 } 8065 8066 static XML_Char * 8067 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 8068 const char *end) { 8069 if (! poolAppend(pool, enc, ptr, end)) 8070 return NULL; 8071 if (pool->ptr == pool->end && ! poolGrow(pool)) 8072 return NULL; 8073 *(pool->ptr)++ = 0; 8074 return pool->start; 8075 } 8076 8077 static size_t 8078 poolBytesToAllocateFor(int blockSize) { 8079 /* Unprotected math would be: 8080 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); 8081 ** 8082 ** Detect overflow, avoiding _signed_ overflow undefined behavior 8083 ** For a + b * c we check b * c in isolation first, so that addition of a 8084 ** on top has no chance of making us accept a small non-negative number 8085 */ 8086 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ 8087 8088 if (blockSize <= 0) 8089 return 0; 8090 8091 if (blockSize > (int)(INT_MAX / stretch)) 8092 return 0; 8093 8094 { 8095 const int stretchedBlockSize = blockSize * (int)stretch; 8096 const int bytesToAllocate 8097 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); 8098 if (bytesToAllocate < 0) 8099 return 0; 8100 8101 return (size_t)bytesToAllocate; 8102 } 8103 } 8104 8105 static XML_Bool FASTCALL 8106 poolGrow(STRING_POOL *pool) { 8107 if (pool->freeBlocks) { 8108 if (pool->start == 0) { 8109 pool->blocks = pool->freeBlocks; 8110 pool->freeBlocks = pool->freeBlocks->next; 8111 pool->blocks->next = NULL; 8112 pool->start = pool->blocks->s; 8113 pool->end = pool->start + pool->blocks->size; 8114 pool->ptr = pool->start; 8115 return XML_TRUE; 8116 } 8117 if (pool->end - pool->start < pool->freeBlocks->size) { 8118 BLOCK *tem = pool->freeBlocks->next; 8119 pool->freeBlocks->next = pool->blocks; 8120 pool->blocks = pool->freeBlocks; 8121 pool->freeBlocks = tem; 8122 memcpy(pool->blocks->s, pool->start, 8123 (pool->end - pool->start) * sizeof(XML_Char)); 8124 pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 8125 pool->start = pool->blocks->s; 8126 pool->end = pool->start + pool->blocks->size; 8127 return XML_TRUE; 8128 } 8129 } 8130 if (pool->blocks && pool->start == pool->blocks->s) { 8131 BLOCK *temp; 8132 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); 8133 size_t bytesToAllocate; 8134 8135 /* NOTE: Needs to be calculated prior to calling `realloc` 8136 to avoid dangling pointers: */ 8137 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; 8138 8139 if (blockSize < 0) { 8140 /* This condition traps a situation where either more than 8141 * INT_MAX/2 bytes have already been allocated. This isn't 8142 * readily testable, since it is unlikely that an average 8143 * machine will have that much memory, so we exclude it from the 8144 * coverage statistics. 8145 */ 8146 return XML_FALSE; /* LCOV_EXCL_LINE */ 8147 } 8148 8149 bytesToAllocate = poolBytesToAllocateFor(blockSize); 8150 if (bytesToAllocate == 0) 8151 return XML_FALSE; 8152 8153 temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate); 8154 if (temp == NULL) 8155 return XML_FALSE; 8156 pool->blocks = temp; 8157 pool->blocks->size = blockSize; 8158 pool->ptr = pool->blocks->s + offsetInsideBlock; 8159 pool->start = pool->blocks->s; 8160 pool->end = pool->start + blockSize; 8161 } else { 8162 BLOCK *tem; 8163 int blockSize = (int)(pool->end - pool->start); 8164 size_t bytesToAllocate; 8165 8166 if (blockSize < 0) { 8167 /* This condition traps a situation where either more than 8168 * INT_MAX bytes have already been allocated (which is prevented 8169 * by various pieces of program logic, not least this one, never 8170 * mind the unlikelihood of actually having that much memory) or 8171 * the pool control fields have been corrupted (which could 8172 * conceivably happen in an extremely buggy user handler 8173 * function). Either way it isn't readily testable, so we 8174 * exclude it from the coverage statistics. 8175 */ 8176 return XML_FALSE; /* LCOV_EXCL_LINE */ 8177 } 8178 8179 if (blockSize < INIT_BLOCK_SIZE) 8180 blockSize = INIT_BLOCK_SIZE; 8181 else { 8182 /* Detect overflow, avoiding _signed_ overflow undefined behavior */ 8183 if ((int)((unsigned)blockSize * 2U) < 0) { 8184 return XML_FALSE; 8185 } 8186 blockSize *= 2; 8187 } 8188 8189 bytesToAllocate = poolBytesToAllocateFor(blockSize); 8190 if (bytesToAllocate == 0) 8191 return XML_FALSE; 8192 8193 tem = MALLOC(pool->parser, bytesToAllocate); 8194 if (! tem) 8195 return XML_FALSE; 8196 tem->size = blockSize; 8197 tem->next = pool->blocks; 8198 pool->blocks = tem; 8199 if (pool->ptr != pool->start) 8200 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); 8201 pool->ptr = tem->s + (pool->ptr - pool->start); 8202 pool->start = tem->s; 8203 pool->end = tem->s + blockSize; 8204 } 8205 return XML_TRUE; 8206 } 8207 8208 static int FASTCALL 8209 nextScaffoldPart(XML_Parser parser) { 8210 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 8211 CONTENT_SCAFFOLD *me; 8212 int next; 8213 8214 if (! dtd->scaffIndex) { 8215 /* Detect and prevent integer overflow. 8216 * The preprocessor guard addresses the "always false" warning 8217 * from -Wtype-limits on platforms where 8218 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 8219 #if UINT_MAX >= SIZE_MAX 8220 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { 8221 return -1; 8222 } 8223 #endif 8224 dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int)); 8225 if (! dtd->scaffIndex) 8226 return -1; 8227 dtd->scaffIndex[0] = 0; 8228 } 8229 8230 // Will casting to int be safe further down? 8231 if (dtd->scaffCount > INT_MAX) { 8232 return -1; 8233 } 8234 8235 if (dtd->scaffCount >= dtd->scaffSize) { 8236 CONTENT_SCAFFOLD *temp; 8237 if (dtd->scaffold) { 8238 /* Detect and prevent integer overflow */ 8239 if (dtd->scaffSize > UINT_MAX / 2u) { 8240 return -1; 8241 } 8242 /* Detect and prevent integer overflow. 8243 * The preprocessor guard addresses the "always false" warning 8244 * from -Wtype-limits on platforms where 8245 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 8246 #if UINT_MAX >= SIZE_MAX 8247 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { 8248 return -1; 8249 } 8250 #endif 8251 8252 temp = REALLOC(parser, dtd->scaffold, 8253 dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); 8254 if (temp == NULL) 8255 return -1; 8256 dtd->scaffSize *= 2; 8257 } else { 8258 temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD)); 8259 if (temp == NULL) 8260 return -1; 8261 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; 8262 } 8263 dtd->scaffold = temp; 8264 } 8265 next = (int)dtd->scaffCount++; 8266 me = &dtd->scaffold[next]; 8267 if (dtd->scaffLevel) { 8268 CONTENT_SCAFFOLD *parent 8269 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; 8270 if (parent->lastchild) { 8271 dtd->scaffold[parent->lastchild].nextsib = next; 8272 } 8273 if (! parent->childcnt) 8274 parent->firstchild = next; 8275 parent->lastchild = next; 8276 parent->childcnt++; 8277 } 8278 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; 8279 return next; 8280 } 8281 8282 static XML_Content * 8283 build_model(XML_Parser parser) { 8284 /* Function build_model transforms the existing parser->m_dtd->scaffold 8285 * array of CONTENT_SCAFFOLD tree nodes into a new array of 8286 * XML_Content tree nodes followed by a gapless list of zero-terminated 8287 * strings. */ 8288 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 8289 XML_Content *ret; 8290 XML_Char *str; /* the current string writing location */ 8291 8292 /* Detect and prevent integer overflow. 8293 * The preprocessor guard addresses the "always false" warning 8294 * from -Wtype-limits on platforms where 8295 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 8296 #if UINT_MAX >= SIZE_MAX 8297 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { 8298 return NULL; 8299 } 8300 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { 8301 return NULL; 8302 } 8303 #endif 8304 if (dtd->scaffCount * sizeof(XML_Content) 8305 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { 8306 return NULL; 8307 } 8308 8309 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) 8310 + (dtd->contentStringLen * sizeof(XML_Char))); 8311 8312 // NOTE: We are avoiding MALLOC(..) here to so that 8313 // applications that are not using XML_FreeContentModel but plain 8314 // free(..) or .free_fcn() to free the content model's memory are safe. 8315 ret = parser->m_mem.malloc_fcn(allocsize); 8316 if (! ret) 8317 return NULL; 8318 8319 /* What follows is an iterative implementation (of what was previously done 8320 * recursively in a dedicated function called "build_node". The old recursive 8321 * build_node could be forced into stack exhaustion from input as small as a 8322 * few megabyte, and so that was a security issue. Hence, a function call 8323 * stack is avoided now by resolving recursion.) 8324 * 8325 * The iterative approach works as follows: 8326 * 8327 * - We have two writing pointers, both walking up the result array; one does 8328 * the work, the other creates "jobs" for its colleague to do, and leads 8329 * the way: 8330 * 8331 * - The faster one, pointer jobDest, always leads and writes "what job 8332 * to do" by the other, once they reach that place in the 8333 * array: leader "jobDest" stores the source node array index (relative 8334 * to array dtd->scaffold) in field "numchildren". 8335 * 8336 * - The slower one, pointer dest, looks at the value stored in the 8337 * "numchildren" field (which actually holds a source node array index 8338 * at that time) and puts the real data from dtd->scaffold in. 8339 * 8340 * - Before the loop starts, jobDest writes source array index 0 8341 * (where the root node is located) so that dest will have something to do 8342 * when it starts operation. 8343 * 8344 * - Whenever nodes with children are encountered, jobDest appends 8345 * them as new jobs, in order. As a result, tree node siblings are 8346 * adjacent in the resulting array, for example: 8347 * 8348 * [0] root, has two children 8349 * [1] first child of 0, has three children 8350 * [3] first child of 1, does not have children 8351 * [4] second child of 1, does not have children 8352 * [5] third child of 1, does not have children 8353 * [2] second child of 0, does not have children 8354 * 8355 * Or (the same data) presented in flat array view: 8356 * 8357 * [0] root, has two children 8358 * 8359 * [1] first child of 0, has three children 8360 * [2] second child of 0, does not have children 8361 * 8362 * [3] first child of 1, does not have children 8363 * [4] second child of 1, does not have children 8364 * [5] third child of 1, does not have children 8365 * 8366 * - The algorithm repeats until all target array indices have been processed. 8367 */ 8368 XML_Content *dest = ret; /* tree node writing location, moves upwards */ 8369 XML_Content *const destLimit = &ret[dtd->scaffCount]; 8370 XML_Content *jobDest = ret; /* next free writing location in target array */ 8371 str = (XML_Char *)&ret[dtd->scaffCount]; 8372 8373 /* Add the starting job, the root node (index 0) of the source tree */ 8374 (jobDest++)->numchildren = 0; 8375 8376 for (; dest < destLimit; dest++) { 8377 /* Retrieve source tree array index from job storage */ 8378 const int src_node = (int)dest->numchildren; 8379 8380 /* Convert item */ 8381 dest->type = dtd->scaffold[src_node].type; 8382 dest->quant = dtd->scaffold[src_node].quant; 8383 if (dest->type == XML_CTYPE_NAME) { 8384 const XML_Char *src; 8385 dest->name = str; 8386 src = dtd->scaffold[src_node].name; 8387 for (;;) { 8388 *str++ = *src; 8389 if (! *src) 8390 break; 8391 src++; 8392 } 8393 dest->numchildren = 0; 8394 dest->children = NULL; 8395 } else { 8396 unsigned int i; 8397 int cn; 8398 dest->name = NULL; 8399 dest->numchildren = dtd->scaffold[src_node].childcnt; 8400 dest->children = jobDest; 8401 8402 /* Append scaffold indices of children to array */ 8403 for (i = 0, cn = dtd->scaffold[src_node].firstchild; 8404 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) 8405 (jobDest++)->numchildren = (unsigned int)cn; 8406 } 8407 } 8408 8409 return ret; 8410 } 8411 8412 static ELEMENT_TYPE * 8413 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, 8414 const char *end) { 8415 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 8416 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); 8417 ELEMENT_TYPE *ret; 8418 8419 if (! name) 8420 return NULL; 8421 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 8422 sizeof(ELEMENT_TYPE)); 8423 if (! ret) 8424 return NULL; 8425 if (ret->name != name) 8426 poolDiscard(&dtd->pool); 8427 else { 8428 poolFinish(&dtd->pool); 8429 if (! setElementTypePrefix(parser, ret)) 8430 return NULL; 8431 } 8432 return ret; 8433 } 8434 8435 static XML_Char * 8436 copyString(const XML_Char *s, XML_Parser parser) { 8437 size_t charsRequired = 0; 8438 XML_Char *result; 8439 8440 /* First determine how long the string is */ 8441 while (s[charsRequired] != 0) { 8442 charsRequired++; 8443 } 8444 /* Include the terminator */ 8445 charsRequired++; 8446 8447 /* Now allocate space for the copy */ 8448 result = MALLOC(parser, charsRequired * sizeof(XML_Char)); 8449 if (result == NULL) 8450 return NULL; 8451 /* Copy the original into place */ 8452 memcpy(result, s, charsRequired * sizeof(XML_Char)); 8453 return result; 8454 } 8455 8456 #if XML_GE == 1 8457 8458 static float 8459 accountingGetCurrentAmplification(XML_Parser rootParser) { 8460 // 1.........1.........12 => 22 8461 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1; 8462 const XmlBigCount countBytesOutput 8463 = rootParser->m_accounting.countBytesDirect 8464 + rootParser->m_accounting.countBytesIndirect; 8465 const float amplificationFactor 8466 = rootParser->m_accounting.countBytesDirect 8467 ? ((float)countBytesOutput 8468 / (float)(rootParser->m_accounting.countBytesDirect)) 8469 : ((float)(lenOfShortestInclude 8470 + rootParser->m_accounting.countBytesIndirect) 8471 / (float)lenOfShortestInclude); 8472 assert(! rootParser->m_parentParser); 8473 return amplificationFactor; 8474 } 8475 8476 static void 8477 accountingReportStats(XML_Parser originParser, const char *epilog) { 8478 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 8479 assert(! rootParser->m_parentParser); 8480 8481 if (rootParser->m_accounting.debugLevel == 0u) { 8482 return; 8483 } 8484 8485 const float amplificationFactor 8486 = accountingGetCurrentAmplification(rootParser); 8487 fprintf(stderr, 8488 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( 8489 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", 8490 (void *)rootParser, rootParser->m_accounting.countBytesDirect, 8491 rootParser->m_accounting.countBytesIndirect, 8492 (double)amplificationFactor, epilog); 8493 } 8494 8495 static void 8496 accountingOnAbort(XML_Parser originParser) { 8497 accountingReportStats(originParser, " ABORTING\n"); 8498 } 8499 8500 static void 8501 accountingReportDiff(XML_Parser rootParser, 8502 unsigned int levelsAwayFromRootParser, const char *before, 8503 const char *after, ptrdiff_t bytesMore, int source_line, 8504 enum XML_Account account) { 8505 assert(! rootParser->m_parentParser); 8506 8507 fprintf(stderr, 8508 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"", 8509 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", 8510 levelsAwayFromRootParser, source_line, 10, ""); 8511 8512 const char ellipis[] = "[..]"; 8513 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; 8514 const unsigned int contextLength = 10; 8515 8516 /* Note: Performance is of no concern here */ 8517 const char *walker = before; 8518 if ((rootParser->m_accounting.debugLevel >= 3u) 8519 || (after - before) 8520 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { 8521 for (; walker < after; walker++) { 8522 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 8523 } 8524 } else { 8525 for (; walker < before + contextLength; walker++) { 8526 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 8527 } 8528 fprintf(stderr, ellipis); 8529 walker = after - contextLength; 8530 for (; walker < after; walker++) { 8531 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 8532 } 8533 } 8534 fprintf(stderr, "\"\n"); 8535 } 8536 8537 static XML_Bool 8538 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, 8539 const char *after, int source_line, 8540 enum XML_Account account) { 8541 /* Note: We need to check the token type *first* to be sure that 8542 * we can even access variable <after>, safely. 8543 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ 8544 switch (tok) { 8545 case XML_TOK_INVALID: 8546 case XML_TOK_PARTIAL: 8547 case XML_TOK_PARTIAL_CHAR: 8548 case XML_TOK_NONE: 8549 return XML_TRUE; 8550 } 8551 8552 if (account == XML_ACCOUNT_NONE) 8553 return XML_TRUE; /* because these bytes have been accounted for, already */ 8554 8555 unsigned int levelsAwayFromRootParser; 8556 const XML_Parser rootParser 8557 = getRootParserOf(originParser, &levelsAwayFromRootParser); 8558 assert(! rootParser->m_parentParser); 8559 8560 const int isDirect 8561 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); 8562 const ptrdiff_t bytesMore = after - before; 8563 8564 XmlBigCount *const additionTarget 8565 = isDirect ? &rootParser->m_accounting.countBytesDirect 8566 : &rootParser->m_accounting.countBytesIndirect; 8567 8568 /* Detect and avoid integer overflow */ 8569 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) 8570 return XML_FALSE; 8571 *additionTarget += bytesMore; 8572 8573 const XmlBigCount countBytesOutput 8574 = rootParser->m_accounting.countBytesDirect 8575 + rootParser->m_accounting.countBytesIndirect; 8576 const float amplificationFactor 8577 = accountingGetCurrentAmplification(rootParser); 8578 const XML_Bool tolerated 8579 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) 8580 || (amplificationFactor 8581 <= rootParser->m_accounting.maximumAmplificationFactor); 8582 8583 if (rootParser->m_accounting.debugLevel >= 2u) { 8584 accountingReportStats(rootParser, ""); 8585 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, 8586 bytesMore, source_line, account); 8587 } 8588 8589 return tolerated; 8590 } 8591 8592 unsigned long long 8593 testingAccountingGetCountBytesDirect(XML_Parser parser) { 8594 if (! parser) 8595 return 0; 8596 return parser->m_accounting.countBytesDirect; 8597 } 8598 8599 unsigned long long 8600 testingAccountingGetCountBytesIndirect(XML_Parser parser) { 8601 if (! parser) 8602 return 0; 8603 return parser->m_accounting.countBytesIndirect; 8604 } 8605 8606 static void 8607 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, 8608 const char *action, int sourceLine) { 8609 assert(! rootParser->m_parentParser); 8610 if (rootParser->m_entity_stats.debugLevel == 0u) 8611 return; 8612 8613 # if defined(XML_UNICODE) 8614 const char *const entityName = "[..]"; 8615 # else 8616 const char *const entityName = entity->name; 8617 # endif 8618 8619 fprintf( 8620 stderr, 8621 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n", 8622 (void *)rootParser, rootParser->m_entity_stats.countEverOpened, 8623 rootParser->m_entity_stats.currentDepth, 8624 rootParser->m_entity_stats.maximumDepthSeen, 8625 ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "", 8626 entity->is_param ? "%" : "&", entityName, action, entity->textLen, 8627 sourceLine); 8628 } 8629 8630 static void 8631 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { 8632 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 8633 assert(! rootParser->m_parentParser); 8634 8635 rootParser->m_entity_stats.countEverOpened++; 8636 rootParser->m_entity_stats.currentDepth++; 8637 if (rootParser->m_entity_stats.currentDepth 8638 > rootParser->m_entity_stats.maximumDepthSeen) { 8639 rootParser->m_entity_stats.maximumDepthSeen++; 8640 } 8641 8642 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); 8643 } 8644 8645 static void 8646 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { 8647 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 8648 assert(! rootParser->m_parentParser); 8649 8650 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); 8651 rootParser->m_entity_stats.currentDepth--; 8652 } 8653 8654 #endif /* XML_GE == 1 */ 8655 8656 static XML_Parser 8657 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { 8658 XML_Parser rootParser = parser; 8659 unsigned int stepsTakenUpwards = 0; 8660 while (rootParser->m_parentParser) { 8661 rootParser = rootParser->m_parentParser; 8662 stepsTakenUpwards++; 8663 } 8664 assert(! rootParser->m_parentParser); 8665 if (outLevelDiff != NULL) { 8666 *outLevelDiff = stepsTakenUpwards; 8667 } 8668 return rootParser; 8669 } 8670 8671 #if XML_GE == 1 8672 8673 const char * 8674 unsignedCharToPrintable(unsigned char c) { 8675 switch (c) { 8676 case 0: 8677 return "\\0"; 8678 case 1: 8679 return "\\x1"; 8680 case 2: 8681 return "\\x2"; 8682 case 3: 8683 return "\\x3"; 8684 case 4: 8685 return "\\x4"; 8686 case 5: 8687 return "\\x5"; 8688 case 6: 8689 return "\\x6"; 8690 case 7: 8691 return "\\x7"; 8692 case 8: 8693 return "\\x8"; 8694 case 9: 8695 return "\\t"; 8696 case 10: 8697 return "\\n"; 8698 case 11: 8699 return "\\xB"; 8700 case 12: 8701 return "\\xC"; 8702 case 13: 8703 return "\\r"; 8704 case 14: 8705 return "\\xE"; 8706 case 15: 8707 return "\\xF"; 8708 case 16: 8709 return "\\x10"; 8710 case 17: 8711 return "\\x11"; 8712 case 18: 8713 return "\\x12"; 8714 case 19: 8715 return "\\x13"; 8716 case 20: 8717 return "\\x14"; 8718 case 21: 8719 return "\\x15"; 8720 case 22: 8721 return "\\x16"; 8722 case 23: 8723 return "\\x17"; 8724 case 24: 8725 return "\\x18"; 8726 case 25: 8727 return "\\x19"; 8728 case 26: 8729 return "\\x1A"; 8730 case 27: 8731 return "\\x1B"; 8732 case 28: 8733 return "\\x1C"; 8734 case 29: 8735 return "\\x1D"; 8736 case 30: 8737 return "\\x1E"; 8738 case 31: 8739 return "\\x1F"; 8740 case 32: 8741 return " "; 8742 case 33: 8743 return "!"; 8744 case 34: 8745 return "\\\""; 8746 case 35: 8747 return "#"; 8748 case 36: 8749 return "$"; 8750 case 37: 8751 return "%"; 8752 case 38: 8753 return "&"; 8754 case 39: 8755 return "'"; 8756 case 40: 8757 return "("; 8758 case 41: 8759 return ")"; 8760 case 42: 8761 return "*"; 8762 case 43: 8763 return "+"; 8764 case 44: 8765 return ","; 8766 case 45: 8767 return "-"; 8768 case 46: 8769 return "."; 8770 case 47: 8771 return "/"; 8772 case 48: 8773 return "0"; 8774 case 49: 8775 return "1"; 8776 case 50: 8777 return "2"; 8778 case 51: 8779 return "3"; 8780 case 52: 8781 return "4"; 8782 case 53: 8783 return "5"; 8784 case 54: 8785 return "6"; 8786 case 55: 8787 return "7"; 8788 case 56: 8789 return "8"; 8790 case 57: 8791 return "9"; 8792 case 58: 8793 return ":"; 8794 case 59: 8795 return ";"; 8796 case 60: 8797 return "<"; 8798 case 61: 8799 return "="; 8800 case 62: 8801 return ">"; 8802 case 63: 8803 return "?"; 8804 case 64: 8805 return "@"; 8806 case 65: 8807 return "A"; 8808 case 66: 8809 return "B"; 8810 case 67: 8811 return "C"; 8812 case 68: 8813 return "D"; 8814 case 69: 8815 return "E"; 8816 case 70: 8817 return "F"; 8818 case 71: 8819 return "G"; 8820 case 72: 8821 return "H"; 8822 case 73: 8823 return "I"; 8824 case 74: 8825 return "J"; 8826 case 75: 8827 return "K"; 8828 case 76: 8829 return "L"; 8830 case 77: 8831 return "M"; 8832 case 78: 8833 return "N"; 8834 case 79: 8835 return "O"; 8836 case 80: 8837 return "P"; 8838 case 81: 8839 return "Q"; 8840 case 82: 8841 return "R"; 8842 case 83: 8843 return "S"; 8844 case 84: 8845 return "T"; 8846 case 85: 8847 return "U"; 8848 case 86: 8849 return "V"; 8850 case 87: 8851 return "W"; 8852 case 88: 8853 return "X"; 8854 case 89: 8855 return "Y"; 8856 case 90: 8857 return "Z"; 8858 case 91: 8859 return "["; 8860 case 92: 8861 return "\\\\"; 8862 case 93: 8863 return "]"; 8864 case 94: 8865 return "^"; 8866 case 95: 8867 return "_"; 8868 case 96: 8869 return "`"; 8870 case 97: 8871 return "a"; 8872 case 98: 8873 return "b"; 8874 case 99: 8875 return "c"; 8876 case 100: 8877 return "d"; 8878 case 101: 8879 return "e"; 8880 case 102: 8881 return "f"; 8882 case 103: 8883 return "g"; 8884 case 104: 8885 return "h"; 8886 case 105: 8887 return "i"; 8888 case 106: 8889 return "j"; 8890 case 107: 8891 return "k"; 8892 case 108: 8893 return "l"; 8894 case 109: 8895 return "m"; 8896 case 110: 8897 return "n"; 8898 case 111: 8899 return "o"; 8900 case 112: 8901 return "p"; 8902 case 113: 8903 return "q"; 8904 case 114: 8905 return "r"; 8906 case 115: 8907 return "s"; 8908 case 116: 8909 return "t"; 8910 case 117: 8911 return "u"; 8912 case 118: 8913 return "v"; 8914 case 119: 8915 return "w"; 8916 case 120: 8917 return "x"; 8918 case 121: 8919 return "y"; 8920 case 122: 8921 return "z"; 8922 case 123: 8923 return "{"; 8924 case 124: 8925 return "|"; 8926 case 125: 8927 return "}"; 8928 case 126: 8929 return "~"; 8930 case 127: 8931 return "\\x7F"; 8932 case 128: 8933 return "\\x80"; 8934 case 129: 8935 return "\\x81"; 8936 case 130: 8937 return "\\x82"; 8938 case 131: 8939 return "\\x83"; 8940 case 132: 8941 return "\\x84"; 8942 case 133: 8943 return "\\x85"; 8944 case 134: 8945 return "\\x86"; 8946 case 135: 8947 return "\\x87"; 8948 case 136: 8949 return "\\x88"; 8950 case 137: 8951 return "\\x89"; 8952 case 138: 8953 return "\\x8A"; 8954 case 139: 8955 return "\\x8B"; 8956 case 140: 8957 return "\\x8C"; 8958 case 141: 8959 return "\\x8D"; 8960 case 142: 8961 return "\\x8E"; 8962 case 143: 8963 return "\\x8F"; 8964 case 144: 8965 return "\\x90"; 8966 case 145: 8967 return "\\x91"; 8968 case 146: 8969 return "\\x92"; 8970 case 147: 8971 return "\\x93"; 8972 case 148: 8973 return "\\x94"; 8974 case 149: 8975 return "\\x95"; 8976 case 150: 8977 return "\\x96"; 8978 case 151: 8979 return "\\x97"; 8980 case 152: 8981 return "\\x98"; 8982 case 153: 8983 return "\\x99"; 8984 case 154: 8985 return "\\x9A"; 8986 case 155: 8987 return "\\x9B"; 8988 case 156: 8989 return "\\x9C"; 8990 case 157: 8991 return "\\x9D"; 8992 case 158: 8993 return "\\x9E"; 8994 case 159: 8995 return "\\x9F"; 8996 case 160: 8997 return "\\xA0"; 8998 case 161: 8999 return "\\xA1"; 9000 case 162: 9001 return "\\xA2"; 9002 case 163: 9003 return "\\xA3"; 9004 case 164: 9005 return "\\xA4"; 9006 case 165: 9007 return "\\xA5"; 9008 case 166: 9009 return "\\xA6"; 9010 case 167: 9011 return "\\xA7"; 9012 case 168: 9013 return "\\xA8"; 9014 case 169: 9015 return "\\xA9"; 9016 case 170: 9017 return "\\xAA"; 9018 case 171: 9019 return "\\xAB"; 9020 case 172: 9021 return "\\xAC"; 9022 case 173: 9023 return "\\xAD"; 9024 case 174: 9025 return "\\xAE"; 9026 case 175: 9027 return "\\xAF"; 9028 case 176: 9029 return "\\xB0"; 9030 case 177: 9031 return "\\xB1"; 9032 case 178: 9033 return "\\xB2"; 9034 case 179: 9035 return "\\xB3"; 9036 case 180: 9037 return "\\xB4"; 9038 case 181: 9039 return "\\xB5"; 9040 case 182: 9041 return "\\xB6"; 9042 case 183: 9043 return "\\xB7"; 9044 case 184: 9045 return "\\xB8"; 9046 case 185: 9047 return "\\xB9"; 9048 case 186: 9049 return "\\xBA"; 9050 case 187: 9051 return "\\xBB"; 9052 case 188: 9053 return "\\xBC"; 9054 case 189: 9055 return "\\xBD"; 9056 case 190: 9057 return "\\xBE"; 9058 case 191: 9059 return "\\xBF"; 9060 case 192: 9061 return "\\xC0"; 9062 case 193: 9063 return "\\xC1"; 9064 case 194: 9065 return "\\xC2"; 9066 case 195: 9067 return "\\xC3"; 9068 case 196: 9069 return "\\xC4"; 9070 case 197: 9071 return "\\xC5"; 9072 case 198: 9073 return "\\xC6"; 9074 case 199: 9075 return "\\xC7"; 9076 case 200: 9077 return "\\xC8"; 9078 case 201: 9079 return "\\xC9"; 9080 case 202: 9081 return "\\xCA"; 9082 case 203: 9083 return "\\xCB"; 9084 case 204: 9085 return "\\xCC"; 9086 case 205: 9087 return "\\xCD"; 9088 case 206: 9089 return "\\xCE"; 9090 case 207: 9091 return "\\xCF"; 9092 case 208: 9093 return "\\xD0"; 9094 case 209: 9095 return "\\xD1"; 9096 case 210: 9097 return "\\xD2"; 9098 case 211: 9099 return "\\xD3"; 9100 case 212: 9101 return "\\xD4"; 9102 case 213: 9103 return "\\xD5"; 9104 case 214: 9105 return "\\xD6"; 9106 case 215: 9107 return "\\xD7"; 9108 case 216: 9109 return "\\xD8"; 9110 case 217: 9111 return "\\xD9"; 9112 case 218: 9113 return "\\xDA"; 9114 case 219: 9115 return "\\xDB"; 9116 case 220: 9117 return "\\xDC"; 9118 case 221: 9119 return "\\xDD"; 9120 case 222: 9121 return "\\xDE"; 9122 case 223: 9123 return "\\xDF"; 9124 case 224: 9125 return "\\xE0"; 9126 case 225: 9127 return "\\xE1"; 9128 case 226: 9129 return "\\xE2"; 9130 case 227: 9131 return "\\xE3"; 9132 case 228: 9133 return "\\xE4"; 9134 case 229: 9135 return "\\xE5"; 9136 case 230: 9137 return "\\xE6"; 9138 case 231: 9139 return "\\xE7"; 9140 case 232: 9141 return "\\xE8"; 9142 case 233: 9143 return "\\xE9"; 9144 case 234: 9145 return "\\xEA"; 9146 case 235: 9147 return "\\xEB"; 9148 case 236: 9149 return "\\xEC"; 9150 case 237: 9151 return "\\xED"; 9152 case 238: 9153 return "\\xEE"; 9154 case 239: 9155 return "\\xEF"; 9156 case 240: 9157 return "\\xF0"; 9158 case 241: 9159 return "\\xF1"; 9160 case 242: 9161 return "\\xF2"; 9162 case 243: 9163 return "\\xF3"; 9164 case 244: 9165 return "\\xF4"; 9166 case 245: 9167 return "\\xF5"; 9168 case 246: 9169 return "\\xF6"; 9170 case 247: 9171 return "\\xF7"; 9172 case 248: 9173 return "\\xF8"; 9174 case 249: 9175 return "\\xF9"; 9176 case 250: 9177 return "\\xFA"; 9178 case 251: 9179 return "\\xFB"; 9180 case 252: 9181 return "\\xFC"; 9182 case 253: 9183 return "\\xFD"; 9184 case 254: 9185 return "\\xFE"; 9186 case 255: 9187 return "\\xFF"; 9188 // LCOV_EXCL_START 9189 default: 9190 assert(0); /* never gets here */ 9191 return "dead code"; 9192 } 9193 assert(0); /* never gets here */ 9194 // LCOV_EXCL_STOP 9195 } 9196 9197 #endif /* XML_GE == 1 */ 9198 9199 static unsigned long 9200 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { 9201 const char *const valueOrNull = getenv(variableName); 9202 if (valueOrNull == NULL) { 9203 return defaultDebugLevel; 9204 } 9205 const char *const value = valueOrNull; 9206 9207 errno = 0; 9208 char *afterValue = NULL; 9209 unsigned long debugLevel = strtoul(value, &afterValue, 10); 9210 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { 9211 errno = 0; 9212 return defaultDebugLevel; 9213 } 9214 9215 return debugLevel; 9216 } 9217