1 /* 93c1caa66e2b0310459482516af05505b57c5cb7b96df777105308fc585c85d1 (2.7.5+) 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> 16 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> 17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com> 18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> 19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr> 20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl> 22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io> 24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me> 25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com> 26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de> 27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org> 28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org> 32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org> 34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> 35 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> 37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com> 38 Copyright (c) 2022 Jann Horn <jannh@google.com> 39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 40 Copyright (c) 2023 Owain Davies <owaind@bath.edu> 41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 42 Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> 43 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 44 Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> 45 Copyright (c) 2025 Atrem Borovik <polzovatellllk@gmail.com> 46 Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> 47 Copyright (c) 2026 Rosen Penev <rosenp@gmail.com> 48 Licensed under the MIT license: 49 50 Permission is hereby granted, free of charge, to any person obtaining 51 a copy of this software and associated documentation files (the 52 "Software"), to deal in the Software without restriction, including 53 without limitation the rights to use, copy, modify, merge, publish, 54 distribute, sublicense, and/or sell copies of the Software, and to permit 55 persons to whom the Software is furnished to do so, subject to the 56 following conditions: 57 58 The above copyright notice and this permission notice shall be included 59 in all copies or substantial portions of the Software. 60 61 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 62 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 63 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 64 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 65 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 66 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 67 USE OR OTHER DEALINGS IN THE SOFTWARE. 68 */ 69 70 #define XML_BUILDING_EXPAT 1 71 72 #include "expat_config.h" 73 74 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) 75 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) 76 #endif 77 78 #if defined(XML_DTD) && XML_GE == 0 79 # error Either undefine XML_DTD or define XML_GE to 1. 80 #endif 81 82 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \ 83 || (XML_CONTEXT_BYTES + 0 < 0) 84 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) 85 #endif 86 87 #if defined(HAVE_SYSCALL_GETRANDOM) 88 # if ! defined(_GNU_SOURCE) 89 # define _GNU_SOURCE 1 /* syscall prototype */ 90 # endif 91 #endif 92 93 #ifdef _WIN32 94 /* force stdlib to define rand_s() */ 95 # if ! defined(_CRT_RAND_S) 96 # define _CRT_RAND_S 97 # endif 98 #endif 99 100 #include <stdbool.h> 101 #include <stddef.h> 102 #include <string.h> /* memset(), memcpy() */ 103 #include <assert.h> 104 #include <limits.h> /* INT_MAX, UINT_MAX */ 105 #include <stdio.h> /* fprintf */ 106 #include <stdlib.h> /* getenv, rand_s */ 107 #include <stdint.h> /* SIZE_MAX, uintptr_t */ 108 #include <math.h> /* isnan */ 109 110 #ifdef _WIN32 111 # define getpid GetCurrentProcessId 112 #else 113 # include <sys/time.h> /* gettimeofday() */ 114 # include <sys/types.h> /* getpid() */ 115 # include <unistd.h> /* getpid() */ 116 # include <fcntl.h> /* O_RDONLY */ 117 # include <errno.h> 118 #endif 119 120 #ifdef _WIN32 121 # include "winconfig.h" 122 #endif 123 124 #include "ascii.h" 125 #include "expat.h" 126 #include "siphash.h" 127 128 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 129 # if defined(HAVE_GETRANDOM) 130 # include <sys/random.h> /* getrandom */ 131 # else 132 # include <unistd.h> /* syscall */ 133 # include <sys/syscall.h> /* SYS_getrandom */ 134 # endif 135 # if ! defined(GRND_NONBLOCK) 136 # define GRND_NONBLOCK 0x0001 137 # endif /* defined(GRND_NONBLOCK) */ 138 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 139 140 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) 141 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 142 #endif 143 144 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ 145 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ 146 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ 147 && ! defined(XML_POOR_ENTROPY) 148 # error You do not have support for any sources of high quality entropy \ 149 enabled. For end user security, that is probably not what you want. \ 150 \ 151 Your options include: \ 152 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ 153 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ 154 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ 155 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ 156 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ 157 * Windows >=Vista (rand_s): _WIN32. \ 158 \ 159 If insist on not using any of these, bypass this error by defining \ 160 XML_POOR_ENTROPY; you have been warned. \ 161 \ 162 If you have reasons to patch this detection code away or need changes \ 163 to the build system, please open a bug. Thank you! 164 #endif 165 166 #ifdef XML_UNICODE 167 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX 168 # define XmlConvert XmlUtf16Convert 169 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding 170 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS 171 # define XmlEncode XmlUtf16Encode 172 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) 173 typedef unsigned short ICHAR; 174 #else 175 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX 176 # define XmlConvert XmlUtf8Convert 177 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding 178 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS 179 # define XmlEncode XmlUtf8Encode 180 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8) 181 typedef char ICHAR; 182 #endif 183 184 #ifndef XML_NS 185 186 # define XmlInitEncodingNS XmlInitEncoding 187 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding 188 # undef XmlGetInternalEncodingNS 189 # define XmlGetInternalEncodingNS XmlGetInternalEncoding 190 # define XmlParseXmlDeclNS XmlParseXmlDecl 191 192 #endif 193 194 #ifdef XML_UNICODE 195 196 # ifdef XML_UNICODE_WCHAR_T 197 # define XML_T(x) (const wchar_t) x 198 # define XML_L(x) L##x 199 # else 200 # define XML_T(x) (const unsigned short)x 201 # define XML_L(x) x 202 # endif 203 204 #else 205 206 # define XML_T(x) x 207 # define XML_L(x) x 208 209 #endif 210 211 /* Round up n to be a multiple of sz, where sz is a power of 2. */ 212 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) 213 214 /* Do safe (NULL-aware) pointer arithmetic */ 215 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) 216 217 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) 218 219 #include "internal.h" 220 #include "xmltok.h" 221 #include "xmlrole.h" 222 223 typedef const XML_Char *KEY; 224 225 typedef struct { 226 KEY name; 227 } NAMED; 228 229 typedef struct { 230 NAMED **v; 231 unsigned char power; 232 size_t size; 233 size_t used; 234 XML_Parser parser; 235 } HASH_TABLE; 236 237 static size_t keylen(KEY s); 238 239 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); 240 241 /* For probing (after a collision) we need a step size relative prime 242 to the hash table size, which is a power of 2. We use double-hashing, 243 since we can calculate a second hash value cheaply by taking those bits 244 of the first hash value that were discarded (masked out) when the table 245 index was calculated: index = hash & mask, where mask = table->size - 1. 246 We limit the maximum step size to table->size / 4 (mask >> 2) and make 247 it odd, since odd numbers are always relative prime to a power of 2. 248 */ 249 #define SECOND_HASH(hash, mask, power) \ 250 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) 251 #define PROBE_STEP(hash, mask, power) \ 252 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) 253 254 typedef struct { 255 NAMED **p; 256 NAMED **end; 257 } HASH_TABLE_ITER; 258 259 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ 260 #define INIT_DATA_BUF_SIZE 1024 261 #define INIT_ATTS_SIZE 16 262 #define INIT_ATTS_VERSION 0xFFFFFFFF 263 #define INIT_BLOCK_SIZE 1024 264 #define INIT_BUFFER_SIZE 1024 265 266 #define EXPAND_SPARE 24 267 268 typedef struct binding { 269 struct prefix *prefix; 270 struct binding *nextTagBinding; 271 struct binding *prevPrefixBinding; 272 const struct attribute_id *attId; 273 XML_Char *uri; 274 int uriLen; 275 int uriAlloc; 276 } BINDING; 277 278 typedef struct prefix { 279 const XML_Char *name; 280 BINDING *binding; 281 } PREFIX; 282 283 typedef struct { 284 const XML_Char *str; 285 const XML_Char *localPart; 286 const XML_Char *prefix; 287 int strLen; 288 int uriLen; 289 int prefixLen; 290 } TAG_NAME; 291 292 /* TAG represents an open element. 293 The name of the element is stored in both the document and API 294 encodings. The memory buffer 'buf' is a separately-allocated 295 memory area which stores the name. During the XML_Parse()/ 296 XML_ParseBuffer() when the element is open, the memory for the 'raw' 297 version of the name (in the document encoding) is shared with the 298 document buffer. If the element is open across calls to 299 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to 300 contain the 'raw' name as well. 301 302 A parser reuses these structures, maintaining a list of allocated 303 TAG objects in a free list. 304 */ 305 typedef struct tag { 306 struct tag *parent; /* parent of this element */ 307 const char *rawName; /* tagName in the original encoding */ 308 int rawNameLength; 309 TAG_NAME name; /* tagName in the API encoding */ 310 union { 311 char *raw; /* for byte-level access (rawName storage) */ 312 XML_Char *str; /* for character-level access (converted name) */ 313 } buf; /* buffer for name components */ 314 char *bufEnd; /* end of the buffer */ 315 BINDING *bindings; 316 } TAG; 317 318 typedef struct { 319 const XML_Char *name; 320 const XML_Char *textPtr; 321 int textLen; /* length in XML_Chars */ 322 int processed; /* # of processed bytes - when suspended */ 323 const XML_Char *systemId; 324 const XML_Char *base; 325 const XML_Char *publicId; 326 const XML_Char *notation; 327 XML_Bool open; 328 XML_Bool hasMore; /* true if entity has not been completely processed */ 329 /* An entity can be open while being already completely processed (hasMore == 330 XML_FALSE). The reason is the delayed closing of entities until their inner 331 entities are processed and closed */ 332 XML_Bool is_param; 333 XML_Bool is_internal; /* true if declared in internal subset outside PE */ 334 } ENTITY; 335 336 typedef struct { 337 enum XML_Content_Type type; 338 enum XML_Content_Quant quant; 339 const XML_Char *name; 340 int firstchild; 341 int lastchild; 342 int childcnt; 343 int nextsib; 344 } CONTENT_SCAFFOLD; 345 346 #define INIT_SCAFFOLD_ELEMENTS 32 347 348 typedef struct block { 349 struct block *next; 350 int size; 351 XML_Char s[]; 352 } BLOCK; 353 354 typedef struct { 355 BLOCK *blocks; 356 BLOCK *freeBlocks; 357 const XML_Char *end; 358 XML_Char *ptr; 359 XML_Char *start; 360 XML_Parser parser; 361 } STRING_POOL; 362 363 /* The XML_Char before the name is used to determine whether 364 an attribute has been specified. */ 365 typedef struct attribute_id { 366 XML_Char *name; 367 PREFIX *prefix; 368 XML_Bool maybeTokenized; 369 XML_Bool xmlns; 370 } ATTRIBUTE_ID; 371 372 typedef struct { 373 const ATTRIBUTE_ID *id; 374 XML_Bool isCdata; 375 const XML_Char *value; 376 } DEFAULT_ATTRIBUTE; 377 378 typedef struct { 379 unsigned long version; 380 unsigned long hash; 381 const XML_Char *uriName; 382 } NS_ATT; 383 384 typedef struct { 385 const XML_Char *name; 386 PREFIX *prefix; 387 const ATTRIBUTE_ID *idAtt; 388 int nDefaultAtts; 389 int allocDefaultAtts; 390 DEFAULT_ATTRIBUTE *defaultAtts; 391 } ELEMENT_TYPE; 392 393 typedef struct { 394 HASH_TABLE generalEntities; 395 HASH_TABLE elementTypes; 396 HASH_TABLE attributeIds; 397 HASH_TABLE prefixes; 398 STRING_POOL pool; 399 STRING_POOL entityValuePool; 400 /* false once a parameter entity reference has been skipped */ 401 XML_Bool keepProcessing; 402 /* true once an internal or external PE reference has been encountered; 403 this includes the reference to an external subset */ 404 XML_Bool hasParamEntityRefs; 405 XML_Bool standalone; 406 #ifdef XML_DTD 407 /* indicates if external PE has been read */ 408 XML_Bool paramEntityRead; 409 HASH_TABLE paramEntities; 410 #endif /* XML_DTD */ 411 PREFIX defaultPrefix; 412 /* === scaffolding for building content model === */ 413 XML_Bool in_eldecl; 414 CONTENT_SCAFFOLD *scaffold; 415 unsigned contentStringLen; 416 unsigned scaffSize; 417 unsigned scaffCount; 418 int scaffLevel; 419 int *scaffIndex; 420 } DTD; 421 422 enum EntityType { 423 ENTITY_INTERNAL, 424 ENTITY_ATTRIBUTE, 425 ENTITY_VALUE, 426 }; 427 428 typedef struct open_internal_entity { 429 const char *internalEventPtr; 430 const char *internalEventEndPtr; 431 struct open_internal_entity *next; 432 ENTITY *entity; 433 int startTagLevel; 434 XML_Bool betweenDecl; /* WFC: PE Between Declarations */ 435 enum EntityType type; 436 } OPEN_INTERNAL_ENTITY; 437 438 enum XML_Account { 439 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ 440 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity 441 expansion */ 442 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ 443 }; 444 445 #if XML_GE == 1 446 typedef unsigned long long XmlBigCount; 447 typedef struct accounting { 448 XmlBigCount countBytesDirect; 449 XmlBigCount countBytesIndirect; 450 unsigned long debugLevel; 451 float maximumAmplificationFactor; // >=1.0 452 unsigned long long activationThresholdBytes; 453 } ACCOUNTING; 454 455 typedef struct MALLOC_TRACKER { 456 XmlBigCount bytesAllocated; 457 XmlBigCount peakBytesAllocated; // updated live only for debug level >=2 458 unsigned long debugLevel; 459 float maximumAmplificationFactor; // >=1.0 460 XmlBigCount activationThresholdBytes; 461 } MALLOC_TRACKER; 462 463 typedef struct entity_stats { 464 unsigned int countEverOpened; 465 unsigned int currentDepth; 466 unsigned int maximumDepthSeen; 467 unsigned long debugLevel; 468 } ENTITY_STATS; 469 #endif /* XML_GE == 1 */ 470 471 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, 472 const char *end, const char **endPtr); 473 474 static Processor prologProcessor; 475 static Processor prologInitProcessor; 476 static Processor contentProcessor; 477 static Processor cdataSectionProcessor; 478 #ifdef XML_DTD 479 static Processor ignoreSectionProcessor; 480 static Processor externalParEntProcessor; 481 static Processor externalParEntInitProcessor; 482 static Processor entityValueProcessor; 483 static Processor entityValueInitProcessor; 484 #endif /* XML_DTD */ 485 static Processor epilogProcessor; 486 static Processor errorProcessor; 487 static Processor externalEntityInitProcessor; 488 static Processor externalEntityInitProcessor2; 489 static Processor externalEntityInitProcessor3; 490 static Processor externalEntityContentProcessor; 491 static Processor internalEntityProcessor; 492 493 static enum XML_Error handleUnknownEncoding(XML_Parser parser, 494 const XML_Char *encodingName); 495 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, 496 const char *s, const char *next); 497 static enum XML_Error initializeEncoding(XML_Parser parser); 498 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, 499 const char *s, const char *end, int tok, 500 const char *next, const char **nextPtr, 501 XML_Bool haveMore, XML_Bool allowClosingDoctype, 502 enum XML_Account account); 503 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity, 504 XML_Bool betweenDecl, enum EntityType type); 505 static enum XML_Error doContent(XML_Parser parser, int startTagLevel, 506 const ENCODING *enc, const char *start, 507 const char *end, const char **endPtr, 508 XML_Bool haveMore, enum XML_Account account); 509 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, 510 const char **startPtr, const char *end, 511 const char **nextPtr, XML_Bool haveMore, 512 enum XML_Account account); 513 #ifdef XML_DTD 514 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, 515 const char **startPtr, const char *end, 516 const char **nextPtr, XML_Bool haveMore); 517 #endif /* XML_DTD */ 518 519 static void freeBindings(XML_Parser parser, BINDING *bindings); 520 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, 521 const char *attStr, TAG_NAME *tagNamePtr, 522 BINDING **bindingsPtr, 523 enum XML_Account account); 524 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, 525 const ATTRIBUTE_ID *attId, const XML_Char *uri, 526 BINDING **bindingsPtr); 527 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, 528 XML_Bool isCdata, XML_Bool isId, 529 const XML_Char *value, XML_Parser parser); 530 static enum XML_Error storeAttributeValue(XML_Parser parser, 531 const ENCODING *enc, XML_Bool isCdata, 532 const char *ptr, const char *end, 533 STRING_POOL *pool, 534 enum XML_Account account); 535 static enum XML_Error 536 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 537 const char *ptr, const char *end, STRING_POOL *pool, 538 enum XML_Account account, const char **nextPtr); 539 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, 540 const char *start, const char *end); 541 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); 542 #if XML_GE == 1 543 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, 544 const char *start, const char *end, 545 enum XML_Account account, 546 const char **nextPtr); 547 static enum XML_Error callStoreEntityValue(XML_Parser parser, 548 const ENCODING *enc, 549 const char *start, const char *end, 550 enum XML_Account account); 551 #else 552 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); 553 #endif 554 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 555 const char *start, const char *end); 556 static int reportComment(XML_Parser parser, const ENCODING *enc, 557 const char *start, const char *end); 558 static void reportDefault(XML_Parser parser, const ENCODING *enc, 559 const char *start, const char *end); 560 561 static const XML_Char *getContext(XML_Parser parser); 562 static XML_Bool setContext(XML_Parser parser, const XML_Char *context); 563 564 static void FASTCALL normalizePublicId(XML_Char *s); 565 566 static DTD *dtdCreate(XML_Parser parser); 567 /* do not call if m_parentParser != NULL */ 568 static void dtdReset(DTD *p, XML_Parser parser); 569 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser); 570 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 571 XML_Parser parser); 572 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 573 STRING_POOL *newPool, const HASH_TABLE *oldTable); 574 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, 575 size_t createSize); 576 static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser); 577 static void FASTCALL hashTableClear(HASH_TABLE *table); 578 static void FASTCALL hashTableDestroy(HASH_TABLE *table); 579 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, 580 const HASH_TABLE *table); 581 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); 582 583 static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser); 584 static void FASTCALL poolClear(STRING_POOL *pool); 585 static void FASTCALL poolDestroy(STRING_POOL *pool); 586 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, 587 const char *ptr, const char *end); 588 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, 589 const char *ptr, const char *end); 590 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); 591 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, 592 const XML_Char *s); 593 static const XML_Char *FASTCALL poolCopyStringNoFinish(STRING_POOL *pool, 594 const XML_Char *s); 595 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, 596 int n); 597 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, 598 const XML_Char *s); 599 600 static int FASTCALL nextScaffoldPart(XML_Parser parser); 601 static XML_Content *build_model(XML_Parser parser); 602 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, 603 const char *ptr, const char *end); 604 605 static XML_Char *copyString(const XML_Char *s, XML_Parser parser); 606 607 static unsigned long generate_hash_secret_salt(XML_Parser parser); 608 static XML_Bool startParsing(XML_Parser parser); 609 610 static XML_Parser parserCreate(const XML_Char *encodingName, 611 const XML_Memory_Handling_Suite *memsuite, 612 const XML_Char *nameSep, DTD *dtd, 613 XML_Parser parentParser); 614 615 static void parserInit(XML_Parser parser, const XML_Char *encodingName); 616 617 #if XML_GE == 1 618 static float accountingGetCurrentAmplification(XML_Parser rootParser); 619 static void accountingReportStats(XML_Parser originParser, const char *epilog); 620 static void accountingOnAbort(XML_Parser originParser); 621 static void accountingReportDiff(XML_Parser rootParser, 622 unsigned int levelsAwayFromRootParser, 623 const char *before, const char *after, 624 ptrdiff_t bytesMore, int source_line, 625 enum XML_Account account); 626 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, 627 const char *before, const char *after, 628 int source_line, 629 enum XML_Account account); 630 631 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, 632 const char *action, int sourceLine); 633 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, 634 int sourceLine); 635 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, 636 int sourceLine); 637 #endif /* XML_GE == 1 */ 638 639 static XML_Parser getRootParserOf(XML_Parser parser, 640 unsigned int *outLevelDiff); 641 642 static unsigned long getDebugLevel(const char *variableName, 643 unsigned long defaultDebugLevel); 644 645 #define poolStart(pool) ((pool)->start) 646 #define poolLength(pool) ((pool)->ptr - (pool)->start) 647 #define poolChop(pool) ((void)--(pool->ptr)) 648 #define poolLastChar(pool) (((pool)->ptr)[-1]) 649 #define poolDiscard(pool) ((pool)->ptr = (pool)->start) 650 #define poolFinish(pool) ((pool)->start = (pool)->ptr) 651 #define poolAppendChar(pool, c) \ 652 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ 653 ? 0 \ 654 : ((*((pool)->ptr)++ = c), 1)) 655 656 #if ! defined(XML_TESTING) 657 const 658 #endif 659 XML_Bool g_reparseDeferralEnabledDefault 660 = XML_TRUE; // write ONLY in runtests.c 661 #if defined(XML_TESTING) 662 unsigned int g_bytesScanned = 0; // used for testing only 663 #endif 664 665 struct XML_ParserStruct { 666 /* The first member must be m_userData so that the XML_GetUserData 667 macro works. */ 668 void *m_userData; 669 void *m_handlerArg; 670 671 // How the four parse buffer pointers below relate in time and space: 672 // 673 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim 674 // | | | | 675 // <--parsed-->| | | 676 // <---parsing--->| | 677 // <--unoccupied-->| 678 // <---------total-malloced/realloced-------->| 679 680 char *m_buffer; // malloc/realloc base pointer of parse buffer 681 const XML_Memory_Handling_Suite m_mem; 682 const char *m_bufferPtr; // first character to be parsed 683 char *m_bufferEnd; // past last character to be parsed 684 const char *m_bufferLim; // allocated end of m_buffer 685 686 XML_Index m_parseEndByteIndex; 687 const char *m_parseEndPtr; 688 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ 689 XML_Bool m_reparseDeferralEnabled; 690 int m_lastBufferRequestSize; 691 XML_Char *m_dataBuf; 692 XML_Char *m_dataBufEnd; 693 XML_StartElementHandler m_startElementHandler; 694 XML_EndElementHandler m_endElementHandler; 695 XML_CharacterDataHandler m_characterDataHandler; 696 XML_ProcessingInstructionHandler m_processingInstructionHandler; 697 XML_CommentHandler m_commentHandler; 698 XML_StartCdataSectionHandler m_startCdataSectionHandler; 699 XML_EndCdataSectionHandler m_endCdataSectionHandler; 700 XML_DefaultHandler m_defaultHandler; 701 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; 702 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; 703 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; 704 XML_NotationDeclHandler m_notationDeclHandler; 705 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; 706 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; 707 XML_NotStandaloneHandler m_notStandaloneHandler; 708 XML_ExternalEntityRefHandler m_externalEntityRefHandler; 709 XML_Parser m_externalEntityRefHandlerArg; 710 XML_SkippedEntityHandler m_skippedEntityHandler; 711 XML_UnknownEncodingHandler m_unknownEncodingHandler; 712 XML_ElementDeclHandler m_elementDeclHandler; 713 XML_AttlistDeclHandler m_attlistDeclHandler; 714 XML_EntityDeclHandler m_entityDeclHandler; 715 XML_XmlDeclHandler m_xmlDeclHandler; 716 const ENCODING *m_encoding; 717 INIT_ENCODING m_initEncoding; 718 const ENCODING *m_internalEncoding; 719 const XML_Char *m_protocolEncodingName; 720 XML_Bool m_ns; 721 XML_Bool m_ns_triplets; 722 void *m_unknownEncodingMem; 723 void *m_unknownEncodingData; 724 void *m_unknownEncodingHandlerData; 725 void(XMLCALL *m_unknownEncodingRelease)(void *); 726 PROLOG_STATE m_prologState; 727 Processor *m_processor; 728 enum XML_Error m_errorCode; 729 const char *m_eventPtr; 730 const char *m_eventEndPtr; 731 const char *m_positionPtr; 732 OPEN_INTERNAL_ENTITY *m_openInternalEntities; 733 OPEN_INTERNAL_ENTITY *m_freeInternalEntities; 734 OPEN_INTERNAL_ENTITY *m_openAttributeEntities; 735 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities; 736 OPEN_INTERNAL_ENTITY *m_openValueEntities; 737 OPEN_INTERNAL_ENTITY *m_freeValueEntities; 738 XML_Bool m_defaultExpandInternalEntities; 739 int m_tagLevel; 740 ENTITY *m_declEntity; 741 const XML_Char *m_doctypeName; 742 const XML_Char *m_doctypeSysid; 743 const XML_Char *m_doctypePubid; 744 const XML_Char *m_declAttributeType; 745 const XML_Char *m_declNotationName; 746 const XML_Char *m_declNotationPublicId; 747 ELEMENT_TYPE *m_declElementType; 748 ATTRIBUTE_ID *m_declAttributeId; 749 XML_Bool m_declAttributeIsCdata; 750 XML_Bool m_declAttributeIsId; 751 DTD *m_dtd; 752 const XML_Char *m_curBase; 753 TAG *m_tagStack; 754 TAG *m_freeTagList; 755 BINDING *m_inheritedBindings; 756 BINDING *m_freeBindingList; 757 int m_attsSize; 758 int m_nSpecifiedAtts; 759 int m_idAttIndex; 760 ATTRIBUTE *m_atts; 761 NS_ATT *m_nsAtts; 762 unsigned long m_nsAttsVersion; 763 unsigned char m_nsAttsPower; 764 #ifdef XML_ATTR_INFO 765 XML_AttrInfo *m_attInfo; 766 #endif 767 POSITION m_position; 768 STRING_POOL m_tempPool; 769 STRING_POOL m_temp2Pool; 770 char *m_groupConnector; 771 unsigned int m_groupSize; 772 XML_Char m_namespaceSeparator; 773 XML_Parser m_parentParser; 774 XML_ParsingStatus m_parsingStatus; 775 #ifdef XML_DTD 776 XML_Bool m_isParamEntity; 777 XML_Bool m_useForeignDTD; 778 enum XML_ParamEntityParsing m_paramEntityParsing; 779 #endif 780 unsigned long m_hash_secret_salt; 781 #if XML_GE == 1 782 ACCOUNTING m_accounting; 783 MALLOC_TRACKER m_alloc_tracker; 784 ENTITY_STATS m_entity_stats; 785 #endif 786 XML_Bool m_reenter; 787 }; 788 789 #if XML_GE == 1 790 # define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__)) 791 # define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__)) 792 # define FREE(parser, p) (expat_free((parser), (p), __LINE__)) 793 #else 794 # define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) 795 # define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) 796 # define FREE(parser, p) (parser->m_mem.free_fcn((p))) 797 #endif 798 799 #if XML_GE == 1 800 static void 801 expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff, 802 XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) { 803 // NOTE: This can be +infinity or -nan 804 const float amplification 805 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; 806 fprintf( 807 stderr, 808 "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL( 809 "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n", 810 (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator, 811 absDiff, newTotal, peakTotal, (double)amplification, sourceLine); 812 } 813 814 static bool 815 expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase, 816 int sourceLine) { 817 assert(rootParser != NULL); 818 assert(increase > 0); 819 820 XmlBigCount newTotal = 0; 821 bool tolerable = true; 822 823 // Detect integer overflow 824 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) { 825 tolerable = false; 826 } else { 827 newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase; 828 829 if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) { 830 assert(newTotal > 0); 831 // NOTE: This can be +infinity when dividing by zero but not -nan 832 const float amplification 833 = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; 834 if (amplification 835 > rootParser->m_alloc_tracker.maximumAmplificationFactor) { 836 tolerable = false; 837 } 838 } 839 } 840 841 if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) { 842 expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine); 843 } 844 845 return tolerable; 846 } 847 848 # if defined(XML_TESTING) 849 void * 850 # else 851 static void * 852 # endif 853 expat_malloc(XML_Parser parser, size_t size, int sourceLine) { 854 // Detect integer overflow 855 if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) { 856 return NULL; 857 } 858 859 const XML_Parser rootParser = getRootParserOf(parser, NULL); 860 assert(rootParser->m_parentParser == NULL); 861 862 const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size; 863 864 if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated 865 < bytesToAllocate) { 866 return NULL; // i.e. signal integer overflow as out-of-memory 867 } 868 869 if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate, 870 sourceLine)) { 871 return NULL; // i.e. signal violation as out-of-memory 872 } 873 874 // Actually allocate 875 void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate); 876 877 if (mallocedPtr == NULL) { 878 return NULL; 879 } 880 881 // Update in-block recorded size 882 *(size_t *)mallocedPtr = size; 883 884 // Update accounting 885 rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate; 886 887 // Report as needed 888 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 889 if (rootParser->m_alloc_tracker.bytesAllocated 890 > rootParser->m_alloc_tracker.peakBytesAllocated) { 891 rootParser->m_alloc_tracker.peakBytesAllocated 892 = rootParser->m_alloc_tracker.bytesAllocated; 893 } 894 expat_heap_stat(rootParser, '+', bytesToAllocate, 895 rootParser->m_alloc_tracker.bytesAllocated, 896 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 897 } 898 899 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; 900 } 901 902 # if defined(XML_TESTING) 903 void 904 # else 905 static void 906 # endif 907 expat_free(XML_Parser parser, void *ptr, int sourceLine) { 908 assert(parser != NULL); 909 910 if (ptr == NULL) { 911 return; 912 } 913 914 const XML_Parser rootParser = getRootParserOf(parser, NULL); 915 assert(rootParser->m_parentParser == NULL); 916 917 // Extract size (to the eyes of malloc_fcn/realloc_fcn) and 918 // the original pointer returned by malloc/realloc 919 void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); 920 const size_t bytesAllocated 921 = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr; 922 923 // Update accounting 924 assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated); 925 rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated; 926 927 // Report as needed 928 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 929 expat_heap_stat(rootParser, '-', bytesAllocated, 930 rootParser->m_alloc_tracker.bytesAllocated, 931 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 932 } 933 934 // NOTE: This may be freeing rootParser, so freeing has to come last 935 parser->m_mem.free_fcn(mallocedPtr); 936 } 937 938 # if defined(XML_TESTING) 939 void * 940 # else 941 static void * 942 # endif 943 expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) { 944 assert(parser != NULL); 945 946 if (ptr == NULL) { 947 return expat_malloc(parser, size, sourceLine); 948 } 949 950 if (size == 0) { 951 expat_free(parser, ptr, sourceLine); 952 return NULL; 953 } 954 955 const XML_Parser rootParser = getRootParserOf(parser, NULL); 956 assert(rootParser->m_parentParser == NULL); 957 958 // Extract original size (to the eyes of the caller) and the original 959 // pointer returned by malloc/realloc 960 void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); 961 const size_t prevSize = *(size_t *)mallocedPtr; 962 963 // Classify upcoming change 964 const bool isIncrease = (size > prevSize); 965 const size_t absDiff 966 = (size > prevSize) ? (size - prevSize) : (prevSize - size); 967 968 // Ask for permission from accounting 969 if (isIncrease) { 970 if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) { 971 return NULL; // i.e. signal violation as out-of-memory 972 } 973 } 974 975 // NOTE: Integer overflow detection has already been done for us 976 // by expat_heap_increase_tolerable(..) above 977 assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size); 978 979 // Actually allocate 980 mallocedPtr = parser->m_mem.realloc_fcn( 981 mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size); 982 983 if (mallocedPtr == NULL) { 984 return NULL; 985 } 986 987 // Update accounting 988 if (isIncrease) { 989 assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated 990 >= absDiff); 991 rootParser->m_alloc_tracker.bytesAllocated += absDiff; 992 } else { // i.e. decrease 993 assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff); 994 rootParser->m_alloc_tracker.bytesAllocated -= absDiff; 995 } 996 997 // Report as needed 998 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 999 if (rootParser->m_alloc_tracker.bytesAllocated 1000 > rootParser->m_alloc_tracker.peakBytesAllocated) { 1001 rootParser->m_alloc_tracker.peakBytesAllocated 1002 = rootParser->m_alloc_tracker.bytesAllocated; 1003 } 1004 expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff, 1005 rootParser->m_alloc_tracker.bytesAllocated, 1006 rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); 1007 } 1008 1009 // Update in-block recorded size 1010 *(size_t *)mallocedPtr = size; 1011 1012 return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; 1013 } 1014 #endif // XML_GE == 1 1015 1016 XML_Parser XMLCALL 1017 XML_ParserCreate(const XML_Char *encodingName) { 1018 return XML_ParserCreate_MM(encodingName, NULL, NULL); 1019 } 1020 1021 XML_Parser XMLCALL 1022 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { 1023 XML_Char tmp[2] = {nsSep, 0}; 1024 return XML_ParserCreate_MM(encodingName, NULL, tmp); 1025 } 1026 1027 // "xml=http://www.w3.org/XML/1998/namespace" 1028 static const XML_Char implicitContext[] 1029 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, 1030 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 1031 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, 1032 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, 1033 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, 1034 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, 1035 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, 1036 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, 1037 '\0'}; 1038 1039 /* To avoid warnings about unused functions: */ 1040 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 1041 1042 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 1043 1044 /* Obtain entropy on Linux 3.17+ */ 1045 static int 1046 writeRandomBytes_getrandom_nonblock(void *target, size_t count) { 1047 int success = 0; /* full count bytes written? */ 1048 size_t bytesWrittenTotal = 0; 1049 const unsigned int getrandomFlags = GRND_NONBLOCK; 1050 1051 do { 1052 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 1053 const size_t bytesToWrite = count - bytesWrittenTotal; 1054 1055 assert(bytesToWrite <= INT_MAX); 1056 1057 const int bytesWrittenMore = 1058 # if defined(HAVE_GETRANDOM) 1059 (int)getrandom(currentTarget, bytesToWrite, getrandomFlags); 1060 # else 1061 (int)syscall(SYS_getrandom, currentTarget, bytesToWrite, 1062 getrandomFlags); 1063 # endif 1064 1065 if (bytesWrittenMore > 0) { 1066 bytesWrittenTotal += bytesWrittenMore; 1067 if (bytesWrittenTotal >= count) 1068 success = 1; 1069 } 1070 } while (! success && (errno == EINTR)); 1071 1072 return success; 1073 } 1074 1075 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 1076 1077 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 1078 1079 /* Extract entropy from /dev/urandom */ 1080 static int 1081 writeRandomBytes_dev_urandom(void *target, size_t count) { 1082 int success = 0; /* full count bytes written? */ 1083 size_t bytesWrittenTotal = 0; 1084 1085 const int fd = open("/dev/urandom", O_RDONLY); 1086 if (fd < 0) { 1087 return 0; 1088 } 1089 1090 do { 1091 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 1092 const size_t bytesToWrite = count - bytesWrittenTotal; 1093 1094 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); 1095 1096 if (bytesWrittenMore > 0) { 1097 bytesWrittenTotal += bytesWrittenMore; 1098 if (bytesWrittenTotal >= count) 1099 success = 1; 1100 } 1101 } while (! success && (errno == EINTR)); 1102 1103 close(fd); 1104 return success; 1105 } 1106 1107 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 1108 1109 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 1110 1111 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) 1112 1113 static void 1114 writeRandomBytes_arc4random(void *target, size_t count) { 1115 size_t bytesWrittenTotal = 0; 1116 1117 while (bytesWrittenTotal < count) { 1118 const uint32_t random32 = arc4random(); 1119 size_t i = 0; 1120 1121 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 1122 i++, bytesWrittenTotal++) { 1123 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 1124 ((uint8_t *)target)[bytesWrittenTotal] = random8; 1125 } 1126 } 1127 } 1128 1129 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ 1130 1131 #ifdef _WIN32 1132 1133 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), 1134 as it didn't declare it in its header prior to version 5.3.0 of its 1135 runtime package (mingwrt, containing stdlib.h). The upstream fix 1136 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ 1137 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ 1138 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) 1139 __declspec(dllimport) int rand_s(unsigned int *); 1140 # endif 1141 1142 /* Obtain entropy on Windows using the rand_s() function which 1143 * generates cryptographically secure random numbers. Internally it 1144 * uses RtlGenRandom API which is present in Windows XP and later. 1145 */ 1146 static int 1147 writeRandomBytes_rand_s(void *target, size_t count) { 1148 size_t bytesWrittenTotal = 0; 1149 1150 while (bytesWrittenTotal < count) { 1151 unsigned int random32 = 0; 1152 size_t i = 0; 1153 1154 if (rand_s(&random32)) 1155 return 0; /* failure */ 1156 1157 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 1158 i++, bytesWrittenTotal++) { 1159 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 1160 ((uint8_t *)target)[bytesWrittenTotal] = random8; 1161 } 1162 } 1163 return 1; /* success */ 1164 } 1165 1166 #endif /* _WIN32 */ 1167 1168 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 1169 1170 static unsigned long 1171 gather_time_entropy(void) { 1172 # ifdef _WIN32 1173 FILETIME ft; 1174 GetSystemTimeAsFileTime(&ft); /* never fails */ 1175 return ft.dwHighDateTime ^ ft.dwLowDateTime; 1176 # else 1177 struct timeval tv; 1178 int gettimeofday_res; 1179 1180 gettimeofday_res = gettimeofday(&tv, NULL); 1181 1182 # if defined(NDEBUG) 1183 (void)gettimeofday_res; 1184 # else 1185 assert(gettimeofday_res == 0); 1186 # endif /* defined(NDEBUG) */ 1187 1188 /* Microseconds time is <20 bits entropy */ 1189 return tv.tv_usec; 1190 # endif 1191 } 1192 1193 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 1194 1195 static unsigned long 1196 ENTROPY_DEBUG(const char *label, unsigned long entropy) { 1197 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { 1198 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, 1199 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); 1200 } 1201 return entropy; 1202 } 1203 1204 static unsigned long 1205 generate_hash_secret_salt(XML_Parser parser) { 1206 unsigned long entropy; 1207 (void)parser; 1208 1209 /* "Failproof" high quality providers: */ 1210 #if defined(HAVE_ARC4RANDOM_BUF) 1211 arc4random_buf(&entropy, sizeof(entropy)); 1212 return ENTROPY_DEBUG("arc4random_buf", entropy); 1213 #elif defined(HAVE_ARC4RANDOM) 1214 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); 1215 return ENTROPY_DEBUG("arc4random", entropy); 1216 #else 1217 /* Try high quality providers first .. */ 1218 # ifdef _WIN32 1219 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { 1220 return ENTROPY_DEBUG("rand_s", entropy); 1221 } 1222 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 1223 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { 1224 return ENTROPY_DEBUG("getrandom", entropy); 1225 } 1226 # endif 1227 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 1228 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { 1229 return ENTROPY_DEBUG("/dev/urandom", entropy); 1230 } 1231 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 1232 /* .. and self-made low quality for backup: */ 1233 1234 entropy = gather_time_entropy(); 1235 # if ! defined(__wasi__) 1236 /* Process ID is 0 bits entropy if attacker has local access */ 1237 entropy ^= getpid(); 1238 # endif 1239 1240 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ 1241 if (sizeof(unsigned long) == 4) { 1242 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); 1243 } else { 1244 return ENTROPY_DEBUG("fallback(8)", 1245 entropy * (unsigned long)2305843009213693951ULL); 1246 } 1247 #endif 1248 } 1249 1250 static unsigned long 1251 get_hash_secret_salt(XML_Parser parser) { 1252 const XML_Parser rootParser = getRootParserOf(parser, NULL); 1253 assert(! rootParser->m_parentParser); 1254 1255 return rootParser->m_hash_secret_salt; 1256 } 1257 1258 static enum XML_Error 1259 callProcessor(XML_Parser parser, const char *start, const char *end, 1260 const char **endPtr) { 1261 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); 1262 1263 if (parser->m_reparseDeferralEnabled 1264 && ! parser->m_parsingStatus.finalBuffer) { 1265 // Heuristic: don't try to parse a partial token again until the amount of 1266 // available data has increased significantly. 1267 const size_t had_before = parser->m_partialTokenBytesBefore; 1268 // ...but *do* try anyway if we're close to causing a reallocation. 1269 size_t available_buffer 1270 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 1271 #if XML_CONTEXT_BYTES > 0 1272 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); 1273 #endif 1274 available_buffer 1275 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); 1276 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok 1277 const bool enough 1278 = (have_now >= 2 * had_before) 1279 || ((size_t)parser->m_lastBufferRequestSize > available_buffer); 1280 1281 if (! enough) { 1282 *endPtr = start; // callers may expect this to be set 1283 return XML_ERROR_NONE; 1284 } 1285 } 1286 #if defined(XML_TESTING) 1287 g_bytesScanned += (unsigned)have_now; 1288 #endif 1289 // Run in a loop to eliminate dangerous recursion depths 1290 enum XML_Error ret; 1291 *endPtr = start; 1292 while (1) { 1293 // Use endPtr as the new start in each iteration, since it will 1294 // be set to the next start point by m_processor. 1295 ret = parser->m_processor(parser, *endPtr, end, endPtr); 1296 1297 // Make parsing status (and in particular XML_SUSPENDED) take 1298 // precedence over re-enter flag when they disagree 1299 if (parser->m_parsingStatus.parsing != XML_PARSING) { 1300 parser->m_reenter = XML_FALSE; 1301 } 1302 1303 if (! parser->m_reenter) { 1304 break; 1305 } 1306 1307 parser->m_reenter = XML_FALSE; 1308 if (ret != XML_ERROR_NONE) 1309 return ret; 1310 } 1311 1312 if (ret == XML_ERROR_NONE) { 1313 // if we consumed nothing, remember what we had on this parse attempt. 1314 if (*endPtr == start) { 1315 parser->m_partialTokenBytesBefore = have_now; 1316 } else { 1317 parser->m_partialTokenBytesBefore = 0; 1318 } 1319 } 1320 return ret; 1321 } 1322 1323 static XML_Bool /* only valid for root parser */ 1324 startParsing(XML_Parser parser) { 1325 /* hash functions must be initialized before setContext() is called */ 1326 if (parser->m_hash_secret_salt == 0) 1327 parser->m_hash_secret_salt = generate_hash_secret_salt(parser); 1328 if (parser->m_ns) { 1329 /* implicit context only set for root parser, since child 1330 parsers (i.e. external entity parsers) will inherit it 1331 */ 1332 return setContext(parser, implicitContext); 1333 } 1334 return XML_TRUE; 1335 } 1336 1337 XML_Parser XMLCALL 1338 XML_ParserCreate_MM(const XML_Char *encodingName, 1339 const XML_Memory_Handling_Suite *memsuite, 1340 const XML_Char *nameSep) { 1341 return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); 1342 } 1343 1344 static XML_Parser 1345 parserCreate(const XML_Char *encodingName, 1346 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, 1347 DTD *dtd, XML_Parser parentParser) { 1348 XML_Parser parser = NULL; 1349 1350 #if XML_GE == 1 1351 const size_t increase 1352 = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct); 1353 1354 if (parentParser != NULL) { 1355 const XML_Parser rootParser = getRootParserOf(parentParser, NULL); 1356 if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) { 1357 return NULL; 1358 } 1359 } 1360 #else 1361 UNUSED_P(parentParser); 1362 #endif 1363 1364 if (memsuite) { 1365 XML_Memory_Handling_Suite *mtemp; 1366 #if XML_GE == 1 1367 void *const sizeAndParser 1368 = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING 1369 + sizeof(struct XML_ParserStruct)); 1370 if (sizeAndParser != NULL) { 1371 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); 1372 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) 1373 + EXPAT_MALLOC_PADDING); 1374 #else 1375 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); 1376 if (parser != NULL) { 1377 #endif 1378 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1379 mtemp->malloc_fcn = memsuite->malloc_fcn; 1380 mtemp->realloc_fcn = memsuite->realloc_fcn; 1381 mtemp->free_fcn = memsuite->free_fcn; 1382 } 1383 } else { 1384 XML_Memory_Handling_Suite *mtemp; 1385 #if XML_GE == 1 1386 void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING 1387 + sizeof(struct XML_ParserStruct)); 1388 if (sizeAndParser != NULL) { 1389 *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); 1390 parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) 1391 + EXPAT_MALLOC_PADDING); 1392 #else 1393 parser = malloc(sizeof(struct XML_ParserStruct)); 1394 if (parser != NULL) { 1395 #endif 1396 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1397 mtemp->malloc_fcn = malloc; 1398 mtemp->realloc_fcn = realloc; 1399 mtemp->free_fcn = free; 1400 } 1401 } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0 1402 1403 if (! parser) 1404 return parser; 1405 1406 #if XML_GE == 1 1407 // Initialize .m_alloc_tracker 1408 memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER)); 1409 if (parentParser == NULL) { 1410 parser->m_alloc_tracker.debugLevel 1411 = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u); 1412 parser->m_alloc_tracker.maximumAmplificationFactor 1413 = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT; 1414 parser->m_alloc_tracker.activationThresholdBytes 1415 = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT; 1416 1417 // NOTE: This initialization needs to come this early because these fields 1418 // are read by allocation tracking code 1419 parser->m_parentParser = NULL; 1420 parser->m_accounting.countBytesDirect = 0; 1421 } else { 1422 parser->m_parentParser = parentParser; 1423 } 1424 1425 // Record XML_ParserStruct allocation we did a few lines up before 1426 const XML_Parser rootParser = getRootParserOf(parser, NULL); 1427 assert(rootParser->m_parentParser == NULL); 1428 assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase); 1429 rootParser->m_alloc_tracker.bytesAllocated += increase; 1430 1431 // Report on allocation 1432 if (rootParser->m_alloc_tracker.debugLevel >= 2) { 1433 if (rootParser->m_alloc_tracker.bytesAllocated 1434 > rootParser->m_alloc_tracker.peakBytesAllocated) { 1435 rootParser->m_alloc_tracker.peakBytesAllocated 1436 = rootParser->m_alloc_tracker.bytesAllocated; 1437 } 1438 1439 expat_heap_stat(rootParser, '+', increase, 1440 rootParser->m_alloc_tracker.bytesAllocated, 1441 rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__); 1442 } 1443 #else 1444 parser->m_parentParser = NULL; 1445 #endif // XML_GE == 1 1446 1447 parser->m_buffer = NULL; 1448 parser->m_bufferLim = NULL; 1449 1450 parser->m_attsSize = INIT_ATTS_SIZE; 1451 parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); 1452 if (parser->m_atts == NULL) { 1453 FREE(parser, parser); 1454 return NULL; 1455 } 1456 #ifdef XML_ATTR_INFO 1457 parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo)); 1458 if (parser->m_attInfo == NULL) { 1459 FREE(parser, parser->m_atts); 1460 FREE(parser, parser); 1461 return NULL; 1462 } 1463 #endif 1464 parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); 1465 if (parser->m_dataBuf == NULL) { 1466 FREE(parser, parser->m_atts); 1467 #ifdef XML_ATTR_INFO 1468 FREE(parser, parser->m_attInfo); 1469 #endif 1470 FREE(parser, parser); 1471 return NULL; 1472 } 1473 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; 1474 1475 if (dtd) 1476 parser->m_dtd = dtd; 1477 else { 1478 parser->m_dtd = dtdCreate(parser); 1479 if (parser->m_dtd == NULL) { 1480 FREE(parser, parser->m_dataBuf); 1481 FREE(parser, parser->m_atts); 1482 #ifdef XML_ATTR_INFO 1483 FREE(parser, parser->m_attInfo); 1484 #endif 1485 FREE(parser, parser); 1486 return NULL; 1487 } 1488 } 1489 1490 parser->m_freeBindingList = NULL; 1491 parser->m_freeTagList = NULL; 1492 parser->m_freeInternalEntities = NULL; 1493 parser->m_freeAttributeEntities = NULL; 1494 parser->m_freeValueEntities = NULL; 1495 1496 parser->m_groupSize = 0; 1497 parser->m_groupConnector = NULL; 1498 1499 parser->m_unknownEncodingHandler = NULL; 1500 parser->m_unknownEncodingHandlerData = NULL; 1501 1502 parser->m_namespaceSeparator = ASCII_EXCL; 1503 parser->m_ns = XML_FALSE; 1504 parser->m_ns_triplets = XML_FALSE; 1505 1506 parser->m_nsAtts = NULL; 1507 parser->m_nsAttsVersion = 0; 1508 parser->m_nsAttsPower = 0; 1509 1510 parser->m_protocolEncodingName = NULL; 1511 1512 poolInit(&parser->m_tempPool, parser); 1513 poolInit(&parser->m_temp2Pool, parser); 1514 parserInit(parser, encodingName); 1515 1516 if (encodingName && ! parser->m_protocolEncodingName) { 1517 if (dtd) { 1518 // We need to stop the upcoming call to XML_ParserFree from happily 1519 // destroying parser->m_dtd because the DTD is shared with the parent 1520 // parser and the only guard that keeps XML_ParserFree from destroying 1521 // parser->m_dtd is parser->m_isParamEntity but it will be set to 1522 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). 1523 parser->m_dtd = NULL; 1524 } 1525 XML_ParserFree(parser); 1526 return NULL; 1527 } 1528 1529 if (nameSep) { 1530 parser->m_ns = XML_TRUE; 1531 parser->m_internalEncoding = XmlGetInternalEncodingNS(); 1532 parser->m_namespaceSeparator = *nameSep; 1533 } else { 1534 parser->m_internalEncoding = XmlGetInternalEncoding(); 1535 } 1536 1537 return parser; 1538 } 1539 1540 static void 1541 parserInit(XML_Parser parser, const XML_Char *encodingName) { 1542 parser->m_processor = prologInitProcessor; 1543 XmlPrologStateInit(&parser->m_prologState); 1544 if (encodingName != NULL) { 1545 parser->m_protocolEncodingName = copyString(encodingName, parser); 1546 } 1547 parser->m_curBase = NULL; 1548 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); 1549 parser->m_userData = NULL; 1550 parser->m_handlerArg = NULL; 1551 parser->m_startElementHandler = NULL; 1552 parser->m_endElementHandler = NULL; 1553 parser->m_characterDataHandler = NULL; 1554 parser->m_processingInstructionHandler = NULL; 1555 parser->m_commentHandler = NULL; 1556 parser->m_startCdataSectionHandler = NULL; 1557 parser->m_endCdataSectionHandler = NULL; 1558 parser->m_defaultHandler = NULL; 1559 parser->m_startDoctypeDeclHandler = NULL; 1560 parser->m_endDoctypeDeclHandler = NULL; 1561 parser->m_unparsedEntityDeclHandler = NULL; 1562 parser->m_notationDeclHandler = NULL; 1563 parser->m_startNamespaceDeclHandler = NULL; 1564 parser->m_endNamespaceDeclHandler = NULL; 1565 parser->m_notStandaloneHandler = NULL; 1566 parser->m_externalEntityRefHandler = NULL; 1567 parser->m_externalEntityRefHandlerArg = parser; 1568 parser->m_skippedEntityHandler = NULL; 1569 parser->m_elementDeclHandler = NULL; 1570 parser->m_attlistDeclHandler = NULL; 1571 parser->m_entityDeclHandler = NULL; 1572 parser->m_xmlDeclHandler = NULL; 1573 parser->m_bufferPtr = parser->m_buffer; 1574 parser->m_bufferEnd = parser->m_buffer; 1575 parser->m_parseEndByteIndex = 0; 1576 parser->m_parseEndPtr = NULL; 1577 parser->m_partialTokenBytesBefore = 0; 1578 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; 1579 parser->m_lastBufferRequestSize = 0; 1580 parser->m_declElementType = NULL; 1581 parser->m_declAttributeId = NULL; 1582 parser->m_declEntity = NULL; 1583 parser->m_doctypeName = NULL; 1584 parser->m_doctypeSysid = NULL; 1585 parser->m_doctypePubid = NULL; 1586 parser->m_declAttributeType = NULL; 1587 parser->m_declNotationName = NULL; 1588 parser->m_declNotationPublicId = NULL; 1589 parser->m_declAttributeIsCdata = XML_FALSE; 1590 parser->m_declAttributeIsId = XML_FALSE; 1591 memset(&parser->m_position, 0, sizeof(POSITION)); 1592 parser->m_errorCode = XML_ERROR_NONE; 1593 parser->m_eventPtr = NULL; 1594 parser->m_eventEndPtr = NULL; 1595 parser->m_positionPtr = NULL; 1596 parser->m_openInternalEntities = NULL; 1597 parser->m_openAttributeEntities = NULL; 1598 parser->m_openValueEntities = NULL; 1599 parser->m_defaultExpandInternalEntities = XML_TRUE; 1600 parser->m_tagLevel = 0; 1601 parser->m_tagStack = NULL; 1602 parser->m_inheritedBindings = NULL; 1603 parser->m_nSpecifiedAtts = 0; 1604 parser->m_unknownEncodingMem = NULL; 1605 parser->m_unknownEncodingRelease = NULL; 1606 parser->m_unknownEncodingData = NULL; 1607 parser->m_parsingStatus.parsing = XML_INITIALIZED; 1608 // Reentry can only be triggered inside m_processor calls 1609 parser->m_reenter = XML_FALSE; 1610 #ifdef XML_DTD 1611 parser->m_isParamEntity = XML_FALSE; 1612 parser->m_useForeignDTD = XML_FALSE; 1613 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 1614 #endif 1615 parser->m_hash_secret_salt = 0; 1616 1617 #if XML_GE == 1 1618 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); 1619 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); 1620 parser->m_accounting.maximumAmplificationFactor 1621 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; 1622 parser->m_accounting.activationThresholdBytes 1623 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; 1624 1625 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); 1626 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); 1627 #endif 1628 } 1629 1630 /* moves list of bindings to m_freeBindingList */ 1631 static void FASTCALL 1632 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { 1633 while (bindings) { 1634 BINDING *b = bindings; 1635 bindings = bindings->nextTagBinding; 1636 b->nextTagBinding = parser->m_freeBindingList; 1637 parser->m_freeBindingList = b; 1638 } 1639 } 1640 1641 XML_Bool XMLCALL 1642 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { 1643 TAG *tStk; 1644 OPEN_INTERNAL_ENTITY *openEntityList; 1645 1646 if (parser == NULL) 1647 return XML_FALSE; 1648 1649 if (parser->m_parentParser) 1650 return XML_FALSE; 1651 /* move m_tagStack to m_freeTagList */ 1652 tStk = parser->m_tagStack; 1653 while (tStk) { 1654 TAG *tag = tStk; 1655 tStk = tStk->parent; 1656 tag->parent = parser->m_freeTagList; 1657 moveToFreeBindingList(parser, tag->bindings); 1658 tag->bindings = NULL; 1659 parser->m_freeTagList = tag; 1660 } 1661 /* move m_openInternalEntities to m_freeInternalEntities */ 1662 openEntityList = parser->m_openInternalEntities; 1663 while (openEntityList) { 1664 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1665 openEntityList = openEntity->next; 1666 openEntity->next = parser->m_freeInternalEntities; 1667 parser->m_freeInternalEntities = openEntity; 1668 } 1669 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but 1670 * for attributes) */ 1671 openEntityList = parser->m_openAttributeEntities; 1672 while (openEntityList) { 1673 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1674 openEntityList = openEntity->next; 1675 openEntity->next = parser->m_freeAttributeEntities; 1676 parser->m_freeAttributeEntities = openEntity; 1677 } 1678 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but 1679 * for value entities) */ 1680 openEntityList = parser->m_openValueEntities; 1681 while (openEntityList) { 1682 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1683 openEntityList = openEntity->next; 1684 openEntity->next = parser->m_freeValueEntities; 1685 parser->m_freeValueEntities = openEntity; 1686 } 1687 moveToFreeBindingList(parser, parser->m_inheritedBindings); 1688 FREE(parser, parser->m_unknownEncodingMem); 1689 if (parser->m_unknownEncodingRelease) 1690 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1691 poolClear(&parser->m_tempPool); 1692 poolClear(&parser->m_temp2Pool); 1693 FREE(parser, (void *)parser->m_protocolEncodingName); 1694 parser->m_protocolEncodingName = NULL; 1695 parserInit(parser, encodingName); 1696 dtdReset(parser->m_dtd, parser); 1697 return XML_TRUE; 1698 } 1699 1700 static XML_Bool 1701 parserBusy(XML_Parser parser) { 1702 switch (parser->m_parsingStatus.parsing) { 1703 case XML_PARSING: 1704 case XML_SUSPENDED: 1705 return XML_TRUE; 1706 case XML_INITIALIZED: 1707 case XML_FINISHED: 1708 default: 1709 return XML_FALSE; 1710 } 1711 } 1712 1713 enum XML_Status XMLCALL 1714 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { 1715 if (parser == NULL) 1716 return XML_STATUS_ERROR; 1717 /* Block after XML_Parse()/XML_ParseBuffer() has been called. 1718 XXX There's no way for the caller to determine which of the 1719 XXX possible error cases caused the XML_STATUS_ERROR return. 1720 */ 1721 if (parserBusy(parser)) 1722 return XML_STATUS_ERROR; 1723 1724 /* Get rid of any previous encoding name */ 1725 FREE(parser, (void *)parser->m_protocolEncodingName); 1726 1727 if (encodingName == NULL) 1728 /* No new encoding name */ 1729 parser->m_protocolEncodingName = NULL; 1730 else { 1731 /* Copy the new encoding name into allocated memory */ 1732 parser->m_protocolEncodingName = copyString(encodingName, parser); 1733 if (! parser->m_protocolEncodingName) 1734 return XML_STATUS_ERROR; 1735 } 1736 return XML_STATUS_OK; 1737 } 1738 1739 XML_Parser XMLCALL 1740 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, 1741 const XML_Char *encodingName) { 1742 XML_Parser parser = oldParser; 1743 DTD *newDtd = NULL; 1744 DTD *oldDtd; 1745 XML_StartElementHandler oldStartElementHandler; 1746 XML_EndElementHandler oldEndElementHandler; 1747 XML_CharacterDataHandler oldCharacterDataHandler; 1748 XML_ProcessingInstructionHandler oldProcessingInstructionHandler; 1749 XML_CommentHandler oldCommentHandler; 1750 XML_StartCdataSectionHandler oldStartCdataSectionHandler; 1751 XML_EndCdataSectionHandler oldEndCdataSectionHandler; 1752 XML_DefaultHandler oldDefaultHandler; 1753 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; 1754 XML_NotationDeclHandler oldNotationDeclHandler; 1755 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; 1756 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; 1757 XML_NotStandaloneHandler oldNotStandaloneHandler; 1758 XML_ExternalEntityRefHandler oldExternalEntityRefHandler; 1759 XML_SkippedEntityHandler oldSkippedEntityHandler; 1760 XML_UnknownEncodingHandler oldUnknownEncodingHandler; 1761 void *oldUnknownEncodingHandlerData; 1762 XML_ElementDeclHandler oldElementDeclHandler; 1763 XML_AttlistDeclHandler oldAttlistDeclHandler; 1764 XML_EntityDeclHandler oldEntityDeclHandler; 1765 XML_XmlDeclHandler oldXmlDeclHandler; 1766 ELEMENT_TYPE *oldDeclElementType; 1767 1768 void *oldUserData; 1769 void *oldHandlerArg; 1770 XML_Bool oldDefaultExpandInternalEntities; 1771 XML_Parser oldExternalEntityRefHandlerArg; 1772 #ifdef XML_DTD 1773 enum XML_ParamEntityParsing oldParamEntityParsing; 1774 int oldInEntityValue; 1775 #endif 1776 XML_Bool oldns_triplets; 1777 /* Note that the new parser shares the same hash secret as the old 1778 parser, so that dtdCopy and copyEntityTable can lookup values 1779 from hash tables associated with either parser without us having 1780 to worry which hash secrets each table has. 1781 */ 1782 unsigned long oldhash_secret_salt; 1783 XML_Bool oldReparseDeferralEnabled; 1784 1785 /* Validate the oldParser parameter before we pull everything out of it */ 1786 if (oldParser == NULL) 1787 return NULL; 1788 1789 /* Stash the original parser contents on the stack */ 1790 oldDtd = parser->m_dtd; 1791 oldStartElementHandler = parser->m_startElementHandler; 1792 oldEndElementHandler = parser->m_endElementHandler; 1793 oldCharacterDataHandler = parser->m_characterDataHandler; 1794 oldProcessingInstructionHandler = parser->m_processingInstructionHandler; 1795 oldCommentHandler = parser->m_commentHandler; 1796 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; 1797 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; 1798 oldDefaultHandler = parser->m_defaultHandler; 1799 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; 1800 oldNotationDeclHandler = parser->m_notationDeclHandler; 1801 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; 1802 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; 1803 oldNotStandaloneHandler = parser->m_notStandaloneHandler; 1804 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; 1805 oldSkippedEntityHandler = parser->m_skippedEntityHandler; 1806 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; 1807 oldUnknownEncodingHandlerData = parser->m_unknownEncodingHandlerData; 1808 oldElementDeclHandler = parser->m_elementDeclHandler; 1809 oldAttlistDeclHandler = parser->m_attlistDeclHandler; 1810 oldEntityDeclHandler = parser->m_entityDeclHandler; 1811 oldXmlDeclHandler = parser->m_xmlDeclHandler; 1812 oldDeclElementType = parser->m_declElementType; 1813 1814 oldUserData = parser->m_userData; 1815 oldHandlerArg = parser->m_handlerArg; 1816 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; 1817 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; 1818 #ifdef XML_DTD 1819 oldParamEntityParsing = parser->m_paramEntityParsing; 1820 oldInEntityValue = parser->m_prologState.inEntityValue; 1821 #endif 1822 oldns_triplets = parser->m_ns_triplets; 1823 /* Note that the new parser shares the same hash secret as the old 1824 parser, so that dtdCopy and copyEntityTable can lookup values 1825 from hash tables associated with either parser without us having 1826 to worry which hash secrets each table has. 1827 */ 1828 oldhash_secret_salt = parser->m_hash_secret_salt; 1829 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; 1830 1831 #ifdef XML_DTD 1832 if (! context) 1833 newDtd = oldDtd; 1834 #endif /* XML_DTD */ 1835 1836 /* Note that the magical uses of the pre-processor to make field 1837 access look more like C++ require that `parser' be overwritten 1838 here. This makes this function more painful to follow than it 1839 would be otherwise. 1840 */ 1841 if (parser->m_ns) { 1842 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; 1843 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); 1844 } else { 1845 parser 1846 = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser); 1847 } 1848 1849 if (! parser) 1850 return NULL; 1851 1852 parser->m_startElementHandler = oldStartElementHandler; 1853 parser->m_endElementHandler = oldEndElementHandler; 1854 parser->m_characterDataHandler = oldCharacterDataHandler; 1855 parser->m_processingInstructionHandler = oldProcessingInstructionHandler; 1856 parser->m_commentHandler = oldCommentHandler; 1857 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; 1858 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; 1859 parser->m_defaultHandler = oldDefaultHandler; 1860 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; 1861 parser->m_notationDeclHandler = oldNotationDeclHandler; 1862 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; 1863 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; 1864 parser->m_notStandaloneHandler = oldNotStandaloneHandler; 1865 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; 1866 parser->m_skippedEntityHandler = oldSkippedEntityHandler; 1867 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; 1868 parser->m_unknownEncodingHandlerData = oldUnknownEncodingHandlerData; 1869 parser->m_elementDeclHandler = oldElementDeclHandler; 1870 parser->m_attlistDeclHandler = oldAttlistDeclHandler; 1871 parser->m_entityDeclHandler = oldEntityDeclHandler; 1872 parser->m_xmlDeclHandler = oldXmlDeclHandler; 1873 parser->m_declElementType = oldDeclElementType; 1874 parser->m_userData = oldUserData; 1875 if (oldUserData == oldHandlerArg) 1876 parser->m_handlerArg = parser->m_userData; 1877 else 1878 parser->m_handlerArg = parser; 1879 if (oldExternalEntityRefHandlerArg != oldParser) 1880 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; 1881 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; 1882 parser->m_ns_triplets = oldns_triplets; 1883 parser->m_hash_secret_salt = oldhash_secret_salt; 1884 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; 1885 parser->m_parentParser = oldParser; 1886 #ifdef XML_DTD 1887 parser->m_paramEntityParsing = oldParamEntityParsing; 1888 parser->m_prologState.inEntityValue = oldInEntityValue; 1889 if (context) { 1890 #endif /* XML_DTD */ 1891 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser) 1892 || ! setContext(parser, context)) { 1893 XML_ParserFree(parser); 1894 return NULL; 1895 } 1896 parser->m_processor = externalEntityInitProcessor; 1897 #ifdef XML_DTD 1898 } else { 1899 /* The DTD instance referenced by parser->m_dtd is shared between the 1900 document's root parser and external PE parsers, therefore one does not 1901 need to call setContext. In addition, one also *must* not call 1902 setContext, because this would overwrite existing prefix->binding 1903 pointers in parser->m_dtd with ones that get destroyed with the external 1904 PE parser. This would leave those prefixes with dangling pointers. 1905 */ 1906 parser->m_isParamEntity = XML_TRUE; 1907 XmlPrologStateInitExternalEntity(&parser->m_prologState); 1908 parser->m_processor = externalParEntInitProcessor; 1909 } 1910 #endif /* XML_DTD */ 1911 return parser; 1912 } 1913 1914 static void FASTCALL 1915 destroyBindings(BINDING *bindings, XML_Parser parser) { 1916 for (;;) { 1917 BINDING *b = bindings; 1918 if (! b) 1919 break; 1920 bindings = b->nextTagBinding; 1921 FREE(parser, b->uri); 1922 FREE(parser, b); 1923 } 1924 } 1925 1926 void XMLCALL 1927 XML_ParserFree(XML_Parser parser) { 1928 TAG *tagList; 1929 OPEN_INTERNAL_ENTITY *entityList; 1930 if (parser == NULL) 1931 return; 1932 /* free m_tagStack and m_freeTagList */ 1933 tagList = parser->m_tagStack; 1934 for (;;) { 1935 TAG *p; 1936 if (tagList == NULL) { 1937 if (parser->m_freeTagList == NULL) 1938 break; 1939 tagList = parser->m_freeTagList; 1940 parser->m_freeTagList = NULL; 1941 } 1942 p = tagList; 1943 tagList = tagList->parent; 1944 FREE(parser, p->buf.raw); 1945 destroyBindings(p->bindings, parser); 1946 FREE(parser, p); 1947 } 1948 /* free m_openInternalEntities and m_freeInternalEntities */ 1949 entityList = parser->m_openInternalEntities; 1950 for (;;) { 1951 OPEN_INTERNAL_ENTITY *openEntity; 1952 if (entityList == NULL) { 1953 if (parser->m_freeInternalEntities == NULL) 1954 break; 1955 entityList = parser->m_freeInternalEntities; 1956 parser->m_freeInternalEntities = NULL; 1957 } 1958 openEntity = entityList; 1959 entityList = entityList->next; 1960 FREE(parser, openEntity); 1961 } 1962 /* free m_openAttributeEntities and m_freeAttributeEntities */ 1963 entityList = parser->m_openAttributeEntities; 1964 for (;;) { 1965 OPEN_INTERNAL_ENTITY *openEntity; 1966 if (entityList == NULL) { 1967 if (parser->m_freeAttributeEntities == NULL) 1968 break; 1969 entityList = parser->m_freeAttributeEntities; 1970 parser->m_freeAttributeEntities = NULL; 1971 } 1972 openEntity = entityList; 1973 entityList = entityList->next; 1974 FREE(parser, openEntity); 1975 } 1976 /* free m_openValueEntities and m_freeValueEntities */ 1977 entityList = parser->m_openValueEntities; 1978 for (;;) { 1979 OPEN_INTERNAL_ENTITY *openEntity; 1980 if (entityList == NULL) { 1981 if (parser->m_freeValueEntities == NULL) 1982 break; 1983 entityList = parser->m_freeValueEntities; 1984 parser->m_freeValueEntities = NULL; 1985 } 1986 openEntity = entityList; 1987 entityList = entityList->next; 1988 FREE(parser, openEntity); 1989 } 1990 destroyBindings(parser->m_freeBindingList, parser); 1991 destroyBindings(parser->m_inheritedBindings, parser); 1992 poolDestroy(&parser->m_tempPool); 1993 poolDestroy(&parser->m_temp2Pool); 1994 FREE(parser, (void *)parser->m_protocolEncodingName); 1995 #ifdef XML_DTD 1996 /* external parameter entity parsers share the DTD structure 1997 parser->m_dtd with the root parser, so we must not destroy it 1998 */ 1999 if (! parser->m_isParamEntity && parser->m_dtd) 2000 #else 2001 if (parser->m_dtd) 2002 #endif /* XML_DTD */ 2003 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser); 2004 FREE(parser, parser->m_atts); 2005 #ifdef XML_ATTR_INFO 2006 FREE(parser, parser->m_attInfo); 2007 #endif 2008 FREE(parser, parser->m_groupConnector); 2009 // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2010 // is not being allocated with MALLOC(..) but with plain 2011 // .malloc_fcn(..). 2012 parser->m_mem.free_fcn(parser->m_buffer); 2013 FREE(parser, parser->m_dataBuf); 2014 FREE(parser, parser->m_nsAtts); 2015 FREE(parser, parser->m_unknownEncodingMem); 2016 if (parser->m_unknownEncodingRelease) 2017 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 2018 FREE(parser, parser); 2019 } 2020 2021 void XMLCALL 2022 XML_UseParserAsHandlerArg(XML_Parser parser) { 2023 if (parser != NULL) 2024 parser->m_handlerArg = parser; 2025 } 2026 2027 enum XML_Error XMLCALL 2028 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { 2029 if (parser == NULL) 2030 return XML_ERROR_INVALID_ARGUMENT; 2031 #ifdef XML_DTD 2032 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2033 if (parserBusy(parser)) 2034 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; 2035 parser->m_useForeignDTD = useDTD; 2036 return XML_ERROR_NONE; 2037 #else 2038 UNUSED_P(useDTD); 2039 return XML_ERROR_FEATURE_REQUIRES_XML_DTD; 2040 #endif 2041 } 2042 2043 void XMLCALL 2044 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { 2045 if (parser == NULL) 2046 return; 2047 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2048 if (parserBusy(parser)) 2049 return; 2050 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; 2051 } 2052 2053 void XMLCALL 2054 XML_SetUserData(XML_Parser parser, void *p) { 2055 if (parser == NULL) 2056 return; 2057 if (parser->m_handlerArg == parser->m_userData) 2058 parser->m_handlerArg = parser->m_userData = p; 2059 else 2060 parser->m_userData = p; 2061 } 2062 2063 enum XML_Status XMLCALL 2064 XML_SetBase(XML_Parser parser, const XML_Char *p) { 2065 if (parser == NULL) 2066 return XML_STATUS_ERROR; 2067 if (p) { 2068 p = poolCopyString(&parser->m_dtd->pool, p); 2069 if (! p) 2070 return XML_STATUS_ERROR; 2071 parser->m_curBase = p; 2072 } else 2073 parser->m_curBase = NULL; 2074 return XML_STATUS_OK; 2075 } 2076 2077 const XML_Char *XMLCALL 2078 XML_GetBase(XML_Parser parser) { 2079 if (parser == NULL) 2080 return NULL; 2081 return parser->m_curBase; 2082 } 2083 2084 int XMLCALL 2085 XML_GetSpecifiedAttributeCount(XML_Parser parser) { 2086 if (parser == NULL) 2087 return -1; 2088 return parser->m_nSpecifiedAtts; 2089 } 2090 2091 int XMLCALL 2092 XML_GetIdAttributeIndex(XML_Parser parser) { 2093 if (parser == NULL) 2094 return -1; 2095 return parser->m_idAttIndex; 2096 } 2097 2098 #ifdef XML_ATTR_INFO 2099 const XML_AttrInfo *XMLCALL 2100 XML_GetAttributeInfo(XML_Parser parser) { 2101 if (parser == NULL) 2102 return NULL; 2103 return parser->m_attInfo; 2104 } 2105 #endif 2106 2107 void XMLCALL 2108 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, 2109 XML_EndElementHandler end) { 2110 if (parser == NULL) 2111 return; 2112 parser->m_startElementHandler = start; 2113 parser->m_endElementHandler = end; 2114 } 2115 2116 void XMLCALL 2117 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { 2118 if (parser != NULL) 2119 parser->m_startElementHandler = start; 2120 } 2121 2122 void XMLCALL 2123 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { 2124 if (parser != NULL) 2125 parser->m_endElementHandler = end; 2126 } 2127 2128 void XMLCALL 2129 XML_SetCharacterDataHandler(XML_Parser parser, 2130 XML_CharacterDataHandler handler) { 2131 if (parser != NULL) 2132 parser->m_characterDataHandler = handler; 2133 } 2134 2135 void XMLCALL 2136 XML_SetProcessingInstructionHandler(XML_Parser parser, 2137 XML_ProcessingInstructionHandler handler) { 2138 if (parser != NULL) 2139 parser->m_processingInstructionHandler = handler; 2140 } 2141 2142 void XMLCALL 2143 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { 2144 if (parser != NULL) 2145 parser->m_commentHandler = handler; 2146 } 2147 2148 void XMLCALL 2149 XML_SetCdataSectionHandler(XML_Parser parser, 2150 XML_StartCdataSectionHandler start, 2151 XML_EndCdataSectionHandler end) { 2152 if (parser == NULL) 2153 return; 2154 parser->m_startCdataSectionHandler = start; 2155 parser->m_endCdataSectionHandler = end; 2156 } 2157 2158 void XMLCALL 2159 XML_SetStartCdataSectionHandler(XML_Parser parser, 2160 XML_StartCdataSectionHandler start) { 2161 if (parser != NULL) 2162 parser->m_startCdataSectionHandler = start; 2163 } 2164 2165 void XMLCALL 2166 XML_SetEndCdataSectionHandler(XML_Parser parser, 2167 XML_EndCdataSectionHandler end) { 2168 if (parser != NULL) 2169 parser->m_endCdataSectionHandler = end; 2170 } 2171 2172 void XMLCALL 2173 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { 2174 if (parser == NULL) 2175 return; 2176 parser->m_defaultHandler = handler; 2177 parser->m_defaultExpandInternalEntities = XML_FALSE; 2178 } 2179 2180 void XMLCALL 2181 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { 2182 if (parser == NULL) 2183 return; 2184 parser->m_defaultHandler = handler; 2185 parser->m_defaultExpandInternalEntities = XML_TRUE; 2186 } 2187 2188 void XMLCALL 2189 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, 2190 XML_EndDoctypeDeclHandler end) { 2191 if (parser == NULL) 2192 return; 2193 parser->m_startDoctypeDeclHandler = start; 2194 parser->m_endDoctypeDeclHandler = end; 2195 } 2196 2197 void XMLCALL 2198 XML_SetStartDoctypeDeclHandler(XML_Parser parser, 2199 XML_StartDoctypeDeclHandler start) { 2200 if (parser != NULL) 2201 parser->m_startDoctypeDeclHandler = start; 2202 } 2203 2204 void XMLCALL 2205 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { 2206 if (parser != NULL) 2207 parser->m_endDoctypeDeclHandler = end; 2208 } 2209 2210 void XMLCALL 2211 XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 2212 XML_UnparsedEntityDeclHandler handler) { 2213 if (parser != NULL) 2214 parser->m_unparsedEntityDeclHandler = handler; 2215 } 2216 2217 void XMLCALL 2218 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { 2219 if (parser != NULL) 2220 parser->m_notationDeclHandler = handler; 2221 } 2222 2223 void XMLCALL 2224 XML_SetNamespaceDeclHandler(XML_Parser parser, 2225 XML_StartNamespaceDeclHandler start, 2226 XML_EndNamespaceDeclHandler end) { 2227 if (parser == NULL) 2228 return; 2229 parser->m_startNamespaceDeclHandler = start; 2230 parser->m_endNamespaceDeclHandler = end; 2231 } 2232 2233 void XMLCALL 2234 XML_SetStartNamespaceDeclHandler(XML_Parser parser, 2235 XML_StartNamespaceDeclHandler start) { 2236 if (parser != NULL) 2237 parser->m_startNamespaceDeclHandler = start; 2238 } 2239 2240 void XMLCALL 2241 XML_SetEndNamespaceDeclHandler(XML_Parser parser, 2242 XML_EndNamespaceDeclHandler end) { 2243 if (parser != NULL) 2244 parser->m_endNamespaceDeclHandler = end; 2245 } 2246 2247 void XMLCALL 2248 XML_SetNotStandaloneHandler(XML_Parser parser, 2249 XML_NotStandaloneHandler handler) { 2250 if (parser != NULL) 2251 parser->m_notStandaloneHandler = handler; 2252 } 2253 2254 void XMLCALL 2255 XML_SetExternalEntityRefHandler(XML_Parser parser, 2256 XML_ExternalEntityRefHandler handler) { 2257 if (parser != NULL) 2258 parser->m_externalEntityRefHandler = handler; 2259 } 2260 2261 void XMLCALL 2262 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { 2263 if (parser == NULL) 2264 return; 2265 if (arg) 2266 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; 2267 else 2268 parser->m_externalEntityRefHandlerArg = parser; 2269 } 2270 2271 void XMLCALL 2272 XML_SetSkippedEntityHandler(XML_Parser parser, 2273 XML_SkippedEntityHandler handler) { 2274 if (parser != NULL) 2275 parser->m_skippedEntityHandler = handler; 2276 } 2277 2278 void XMLCALL 2279 XML_SetUnknownEncodingHandler(XML_Parser parser, 2280 XML_UnknownEncodingHandler handler, void *data) { 2281 if (parser == NULL) 2282 return; 2283 parser->m_unknownEncodingHandler = handler; 2284 parser->m_unknownEncodingHandlerData = data; 2285 } 2286 2287 void XMLCALL 2288 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { 2289 if (parser != NULL) 2290 parser->m_elementDeclHandler = eldecl; 2291 } 2292 2293 void XMLCALL 2294 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { 2295 if (parser != NULL) 2296 parser->m_attlistDeclHandler = attdecl; 2297 } 2298 2299 void XMLCALL 2300 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { 2301 if (parser != NULL) 2302 parser->m_entityDeclHandler = handler; 2303 } 2304 2305 void XMLCALL 2306 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { 2307 if (parser != NULL) 2308 parser->m_xmlDeclHandler = handler; 2309 } 2310 2311 int XMLCALL 2312 XML_SetParamEntityParsing(XML_Parser parser, 2313 enum XML_ParamEntityParsing peParsing) { 2314 if (parser == NULL) 2315 return 0; 2316 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2317 if (parserBusy(parser)) 2318 return 0; 2319 #ifdef XML_DTD 2320 parser->m_paramEntityParsing = peParsing; 2321 return 1; 2322 #else 2323 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; 2324 #endif 2325 } 2326 2327 int XMLCALL 2328 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { 2329 if (parser == NULL) 2330 return 0; 2331 2332 const XML_Parser rootParser = getRootParserOf(parser, NULL); 2333 assert(! rootParser->m_parentParser); 2334 2335 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 2336 if (parserBusy(rootParser)) 2337 return 0; 2338 rootParser->m_hash_secret_salt = hash_salt; 2339 return 1; 2340 } 2341 2342 enum XML_Status XMLCALL 2343 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { 2344 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { 2345 if (parser != NULL) 2346 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2347 return XML_STATUS_ERROR; 2348 } 2349 switch (parser->m_parsingStatus.parsing) { 2350 case XML_SUSPENDED: 2351 parser->m_errorCode = XML_ERROR_SUSPENDED; 2352 return XML_STATUS_ERROR; 2353 case XML_FINISHED: 2354 parser->m_errorCode = XML_ERROR_FINISHED; 2355 return XML_STATUS_ERROR; 2356 case XML_INITIALIZED: 2357 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 2358 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2359 return XML_STATUS_ERROR; 2360 } 2361 /* fall through */ 2362 default: 2363 parser->m_parsingStatus.parsing = XML_PARSING; 2364 } 2365 2366 #if XML_CONTEXT_BYTES == 0 2367 if (parser->m_bufferPtr == parser->m_bufferEnd) { 2368 const char *end; 2369 int nLeftOver; 2370 enum XML_Status result; 2371 /* Detect overflow (a+b > MAX <==> b > MAX-a) */ 2372 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { 2373 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2374 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2375 parser->m_processor = errorProcessor; 2376 return XML_STATUS_ERROR; 2377 } 2378 // though this isn't a buffer request, we assume that `len` is the app's 2379 // preferred buffer fill size, and therefore save it here. 2380 parser->m_lastBufferRequestSize = len; 2381 parser->m_parseEndByteIndex += len; 2382 parser->m_positionPtr = s; 2383 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2384 2385 parser->m_errorCode 2386 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); 2387 2388 if (parser->m_errorCode != XML_ERROR_NONE) { 2389 parser->m_eventEndPtr = parser->m_eventPtr; 2390 parser->m_processor = errorProcessor; 2391 return XML_STATUS_ERROR; 2392 } else { 2393 switch (parser->m_parsingStatus.parsing) { 2394 case XML_SUSPENDED: 2395 result = XML_STATUS_SUSPENDED; 2396 break; 2397 case XML_INITIALIZED: 2398 case XML_PARSING: 2399 if (isFinal) { 2400 parser->m_parsingStatus.parsing = XML_FINISHED; 2401 return XML_STATUS_OK; 2402 } 2403 /* fall through */ 2404 default: 2405 result = XML_STATUS_OK; 2406 } 2407 } 2408 2409 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, 2410 &parser->m_position); 2411 nLeftOver = s + len - end; 2412 if (nLeftOver) { 2413 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED 2414 // (and XML_ERROR_FINISHED) from XML_GetBuffer. 2415 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; 2416 parser->m_parsingStatus.parsing = XML_PARSING; 2417 void *const temp = XML_GetBuffer(parser, nLeftOver); 2418 parser->m_parsingStatus.parsing = originalStatus; 2419 // GetBuffer may have overwritten this, but we want to remember what the 2420 // app requested, not how many bytes were left over after parsing. 2421 parser->m_lastBufferRequestSize = len; 2422 if (temp == NULL) { 2423 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). 2424 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2425 parser->m_processor = errorProcessor; 2426 return XML_STATUS_ERROR; 2427 } 2428 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we 2429 // don't have any data to preserve, and can copy straight into the start 2430 // of the buffer rather than the GetBuffer return pointer (which may be 2431 // pointing further into the allocated buffer). 2432 memcpy(parser->m_buffer, end, nLeftOver); 2433 } 2434 parser->m_bufferPtr = parser->m_buffer; 2435 parser->m_bufferEnd = parser->m_buffer + nLeftOver; 2436 parser->m_positionPtr = parser->m_bufferPtr; 2437 parser->m_parseEndPtr = parser->m_bufferEnd; 2438 parser->m_eventPtr = parser->m_bufferPtr; 2439 parser->m_eventEndPtr = parser->m_bufferPtr; 2440 return result; 2441 } 2442 #endif /* XML_CONTEXT_BYTES == 0 */ 2443 void *buff = XML_GetBuffer(parser, len); 2444 if (buff == NULL) 2445 return XML_STATUS_ERROR; 2446 if (len > 0) { 2447 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above 2448 memcpy(buff, s, len); 2449 } 2450 return XML_ParseBuffer(parser, len, isFinal); 2451 } 2452 2453 enum XML_Status XMLCALL 2454 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { 2455 const char *start; 2456 enum XML_Status result = XML_STATUS_OK; 2457 2458 if (parser == NULL) 2459 return XML_STATUS_ERROR; 2460 2461 if (len < 0) { 2462 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2463 return XML_STATUS_ERROR; 2464 } 2465 2466 switch (parser->m_parsingStatus.parsing) { 2467 case XML_SUSPENDED: 2468 parser->m_errorCode = XML_ERROR_SUSPENDED; 2469 return XML_STATUS_ERROR; 2470 case XML_FINISHED: 2471 parser->m_errorCode = XML_ERROR_FINISHED; 2472 return XML_STATUS_ERROR; 2473 case XML_INITIALIZED: 2474 /* Has someone called XML_GetBuffer successfully before? */ 2475 if (! parser->m_bufferPtr) { 2476 parser->m_errorCode = XML_ERROR_NO_BUFFER; 2477 return XML_STATUS_ERROR; 2478 } 2479 2480 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 2481 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2482 return XML_STATUS_ERROR; 2483 } 2484 /* fall through */ 2485 default: 2486 parser->m_parsingStatus.parsing = XML_PARSING; 2487 } 2488 2489 start = parser->m_bufferPtr; 2490 parser->m_positionPtr = start; 2491 parser->m_bufferEnd += len; 2492 parser->m_parseEndPtr = parser->m_bufferEnd; 2493 parser->m_parseEndByteIndex += len; 2494 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2495 2496 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, 2497 &parser->m_bufferPtr); 2498 2499 if (parser->m_errorCode != XML_ERROR_NONE) { 2500 parser->m_eventEndPtr = parser->m_eventPtr; 2501 parser->m_processor = errorProcessor; 2502 return XML_STATUS_ERROR; 2503 } else { 2504 switch (parser->m_parsingStatus.parsing) { 2505 case XML_SUSPENDED: 2506 result = XML_STATUS_SUSPENDED; 2507 break; 2508 case XML_INITIALIZED: 2509 case XML_PARSING: 2510 if (isFinal) { 2511 parser->m_parsingStatus.parsing = XML_FINISHED; 2512 return result; 2513 } 2514 default:; /* should not happen */ 2515 } 2516 } 2517 2518 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2519 parser->m_bufferPtr, &parser->m_position); 2520 parser->m_positionPtr = parser->m_bufferPtr; 2521 return result; 2522 } 2523 2524 void *XMLCALL 2525 XML_GetBuffer(XML_Parser parser, int len) { 2526 if (parser == NULL) 2527 return NULL; 2528 if (len < 0) { 2529 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2530 return NULL; 2531 } 2532 switch (parser->m_parsingStatus.parsing) { 2533 case XML_SUSPENDED: 2534 parser->m_errorCode = XML_ERROR_SUSPENDED; 2535 return NULL; 2536 case XML_FINISHED: 2537 parser->m_errorCode = XML_ERROR_FINISHED; 2538 return NULL; 2539 default:; 2540 } 2541 2542 // whether or not the request succeeds, `len` seems to be the app's preferred 2543 // buffer fill size; remember it. 2544 parser->m_lastBufferRequestSize = len; 2545 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) 2546 || parser->m_buffer == NULL) { 2547 #if XML_CONTEXT_BYTES > 0 2548 int keep; 2549 #endif /* XML_CONTEXT_BYTES > 0 */ 2550 /* Do not invoke signed arithmetic overflow: */ 2551 int neededSize = (int)((unsigned)len 2552 + (unsigned)EXPAT_SAFE_PTR_DIFF( 2553 parser->m_bufferEnd, parser->m_bufferPtr)); 2554 if (neededSize < 0) { 2555 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2556 return NULL; 2557 } 2558 #if XML_CONTEXT_BYTES > 0 2559 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 2560 if (keep > XML_CONTEXT_BYTES) 2561 keep = XML_CONTEXT_BYTES; 2562 /* Detect and prevent integer overflow */ 2563 if (keep > INT_MAX - neededSize) { 2564 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2565 return NULL; 2566 } 2567 neededSize += keep; 2568 #endif /* XML_CONTEXT_BYTES > 0 */ 2569 if (parser->m_buffer && parser->m_bufferPtr 2570 && neededSize 2571 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { 2572 #if XML_CONTEXT_BYTES > 0 2573 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { 2574 int offset 2575 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) 2576 - keep; 2577 /* The buffer pointers cannot be NULL here; we have at least some bytes 2578 * in the buffer */ 2579 memmove(parser->m_buffer, &parser->m_buffer[offset], 2580 parser->m_bufferEnd - parser->m_bufferPtr + keep); 2581 parser->m_bufferEnd -= offset; 2582 parser->m_bufferPtr -= offset; 2583 } 2584 #else 2585 memmove(parser->m_buffer, parser->m_bufferPtr, 2586 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2587 parser->m_bufferEnd 2588 = parser->m_buffer 2589 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2590 parser->m_bufferPtr = parser->m_buffer; 2591 #endif /* XML_CONTEXT_BYTES > 0 */ 2592 } else { 2593 char *newBuf; 2594 int bufferSize 2595 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); 2596 if (bufferSize == 0) 2597 bufferSize = INIT_BUFFER_SIZE; 2598 do { 2599 /* Do not invoke signed arithmetic overflow: */ 2600 bufferSize = (int)(2U * (unsigned)bufferSize); 2601 } while (bufferSize < neededSize && bufferSize > 0); 2602 if (bufferSize <= 0) { 2603 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2604 return NULL; 2605 } 2606 // NOTE: We are avoiding MALLOC(..) here to leave limiting 2607 // the input size to the application using Expat. 2608 newBuf = parser->m_mem.malloc_fcn(bufferSize); 2609 if (newBuf == NULL) { 2610 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2611 return NULL; 2612 } 2613 parser->m_bufferLim = newBuf + bufferSize; 2614 #if XML_CONTEXT_BYTES > 0 2615 if (parser->m_bufferPtr) { 2616 memcpy(newBuf, &parser->m_bufferPtr[-keep], 2617 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2618 + keep); 2619 // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2620 // is not being allocated with MALLOC(..) but with plain 2621 // .malloc_fcn(..). 2622 parser->m_mem.free_fcn(parser->m_buffer); 2623 parser->m_buffer = newBuf; 2624 parser->m_bufferEnd 2625 = parser->m_buffer 2626 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2627 + keep; 2628 parser->m_bufferPtr = parser->m_buffer + keep; 2629 } else { 2630 /* This must be a brand new buffer with no data in it yet */ 2631 parser->m_bufferEnd = newBuf; 2632 parser->m_bufferPtr = parser->m_buffer = newBuf; 2633 } 2634 #else 2635 if (parser->m_bufferPtr) { 2636 memcpy(newBuf, parser->m_bufferPtr, 2637 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2638 // NOTE: We are avoiding FREE(..) here because parser->m_buffer 2639 // is not being allocated with MALLOC(..) but with plain 2640 // .malloc_fcn(..). 2641 parser->m_mem.free_fcn(parser->m_buffer); 2642 parser->m_bufferEnd 2643 = newBuf 2644 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2645 } else { 2646 /* This must be a brand new buffer with no data in it yet */ 2647 parser->m_bufferEnd = newBuf; 2648 } 2649 parser->m_bufferPtr = parser->m_buffer = newBuf; 2650 #endif /* XML_CONTEXT_BYTES > 0 */ 2651 } 2652 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2653 parser->m_positionPtr = NULL; 2654 } 2655 return parser->m_bufferEnd; 2656 } 2657 2658 static void 2659 triggerReenter(XML_Parser parser) { 2660 parser->m_reenter = XML_TRUE; 2661 } 2662 2663 enum XML_Status XMLCALL 2664 XML_StopParser(XML_Parser parser, XML_Bool resumable) { 2665 if (parser == NULL) 2666 return XML_STATUS_ERROR; 2667 switch (parser->m_parsingStatus.parsing) { 2668 case XML_INITIALIZED: 2669 parser->m_errorCode = XML_ERROR_NOT_STARTED; 2670 return XML_STATUS_ERROR; 2671 case XML_SUSPENDED: 2672 if (resumable) { 2673 parser->m_errorCode = XML_ERROR_SUSPENDED; 2674 return XML_STATUS_ERROR; 2675 } 2676 parser->m_parsingStatus.parsing = XML_FINISHED; 2677 break; 2678 case XML_FINISHED: 2679 parser->m_errorCode = XML_ERROR_FINISHED; 2680 return XML_STATUS_ERROR; 2681 case XML_PARSING: 2682 if (resumable) { 2683 #ifdef XML_DTD 2684 if (parser->m_isParamEntity) { 2685 parser->m_errorCode = XML_ERROR_SUSPEND_PE; 2686 return XML_STATUS_ERROR; 2687 } 2688 #endif 2689 parser->m_parsingStatus.parsing = XML_SUSPENDED; 2690 } else 2691 parser->m_parsingStatus.parsing = XML_FINISHED; 2692 break; 2693 default: 2694 assert(0); 2695 } 2696 return XML_STATUS_OK; 2697 } 2698 2699 enum XML_Status XMLCALL 2700 XML_ResumeParser(XML_Parser parser) { 2701 enum XML_Status result = XML_STATUS_OK; 2702 2703 if (parser == NULL) 2704 return XML_STATUS_ERROR; 2705 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { 2706 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; 2707 return XML_STATUS_ERROR; 2708 } 2709 parser->m_parsingStatus.parsing = XML_PARSING; 2710 2711 parser->m_errorCode = callProcessor( 2712 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); 2713 2714 if (parser->m_errorCode != XML_ERROR_NONE) { 2715 parser->m_eventEndPtr = parser->m_eventPtr; 2716 parser->m_processor = errorProcessor; 2717 return XML_STATUS_ERROR; 2718 } else { 2719 switch (parser->m_parsingStatus.parsing) { 2720 case XML_SUSPENDED: 2721 result = XML_STATUS_SUSPENDED; 2722 break; 2723 case XML_INITIALIZED: 2724 case XML_PARSING: 2725 if (parser->m_parsingStatus.finalBuffer) { 2726 parser->m_parsingStatus.parsing = XML_FINISHED; 2727 return result; 2728 } 2729 default:; 2730 } 2731 } 2732 2733 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2734 parser->m_bufferPtr, &parser->m_position); 2735 parser->m_positionPtr = parser->m_bufferPtr; 2736 return result; 2737 } 2738 2739 void XMLCALL 2740 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { 2741 if (parser == NULL) 2742 return; 2743 assert(status != NULL); 2744 *status = parser->m_parsingStatus; 2745 } 2746 2747 enum XML_Error XMLCALL 2748 XML_GetErrorCode(XML_Parser parser) { 2749 if (parser == NULL) 2750 return XML_ERROR_INVALID_ARGUMENT; 2751 return parser->m_errorCode; 2752 } 2753 2754 XML_Index XMLCALL 2755 XML_GetCurrentByteIndex(XML_Parser parser) { 2756 if (parser == NULL) 2757 return -1; 2758 if (parser->m_eventPtr) 2759 return (XML_Index)(parser->m_parseEndByteIndex 2760 - (parser->m_parseEndPtr - parser->m_eventPtr)); 2761 return -1; 2762 } 2763 2764 int XMLCALL 2765 XML_GetCurrentByteCount(XML_Parser parser) { 2766 if (parser == NULL) 2767 return 0; 2768 if (parser->m_eventEndPtr && parser->m_eventPtr) 2769 return (int)(parser->m_eventEndPtr - parser->m_eventPtr); 2770 return 0; 2771 } 2772 2773 const char *XMLCALL 2774 XML_GetInputContext(XML_Parser parser, int *offset, int *size) { 2775 #if XML_CONTEXT_BYTES > 0 2776 if (parser == NULL) 2777 return NULL; 2778 if (parser->m_eventPtr && parser->m_buffer) { 2779 if (offset != NULL) 2780 *offset = (int)(parser->m_eventPtr - parser->m_buffer); 2781 if (size != NULL) 2782 *size = (int)(parser->m_bufferEnd - parser->m_buffer); 2783 return parser->m_buffer; 2784 } 2785 #else 2786 (void)parser; 2787 (void)offset; 2788 (void)size; 2789 #endif /* XML_CONTEXT_BYTES > 0 */ 2790 return (const char *)0; 2791 } 2792 2793 XML_Size XMLCALL 2794 XML_GetCurrentLineNumber(XML_Parser parser) { 2795 if (parser == NULL) 2796 return 0; 2797 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2798 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2799 parser->m_eventPtr, &parser->m_position); 2800 parser->m_positionPtr = parser->m_eventPtr; 2801 } 2802 return parser->m_position.lineNumber + 1; 2803 } 2804 2805 XML_Size XMLCALL 2806 XML_GetCurrentColumnNumber(XML_Parser parser) { 2807 if (parser == NULL) 2808 return 0; 2809 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2810 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2811 parser->m_eventPtr, &parser->m_position); 2812 parser->m_positionPtr = parser->m_eventPtr; 2813 } 2814 return parser->m_position.columnNumber; 2815 } 2816 2817 void XMLCALL 2818 XML_FreeContentModel(XML_Parser parser, XML_Content *model) { 2819 if (parser == NULL) 2820 return; 2821 2822 // NOTE: We are avoiding FREE(..) here because the content model 2823 // has been created using plain .malloc_fcn(..) rather than MALLOC(..). 2824 parser->m_mem.free_fcn(model); 2825 } 2826 2827 void *XMLCALL 2828 XML_MemMalloc(XML_Parser parser, size_t size) { 2829 if (parser == NULL) 2830 return NULL; 2831 2832 // NOTE: We are avoiding MALLOC(..) here to not include 2833 // user allocations with allocation tracking and limiting. 2834 return parser->m_mem.malloc_fcn(size); 2835 } 2836 2837 void *XMLCALL 2838 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { 2839 if (parser == NULL) 2840 return NULL; 2841 2842 // NOTE: We are avoiding REALLOC(..) here to not include 2843 // user allocations with allocation tracking and limiting. 2844 return parser->m_mem.realloc_fcn(ptr, size); 2845 } 2846 2847 void XMLCALL 2848 XML_MemFree(XML_Parser parser, void *ptr) { 2849 if (parser == NULL) 2850 return; 2851 2852 // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and 2853 // XML_MemRealloc are not using MALLOC(..) and REALLOC(..) 2854 // but plain .malloc_fcn(..) and .realloc_fcn(..), internally. 2855 parser->m_mem.free_fcn(ptr); 2856 } 2857 2858 void XMLCALL 2859 XML_DefaultCurrent(XML_Parser parser) { 2860 if (parser == NULL) 2861 return; 2862 if (parser->m_defaultHandler) { 2863 if (parser->m_openInternalEntities) 2864 reportDefault(parser, parser->m_internalEncoding, 2865 parser->m_openInternalEntities->internalEventPtr, 2866 parser->m_openInternalEntities->internalEventEndPtr); 2867 else 2868 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, 2869 parser->m_eventEndPtr); 2870 } 2871 } 2872 2873 const XML_LChar *XMLCALL 2874 XML_ErrorString(enum XML_Error code) { 2875 switch (code) { 2876 case XML_ERROR_NONE: 2877 return NULL; 2878 case XML_ERROR_NO_MEMORY: 2879 return XML_L("out of memory"); 2880 case XML_ERROR_SYNTAX: 2881 return XML_L("syntax error"); 2882 case XML_ERROR_NO_ELEMENTS: 2883 return XML_L("no element found"); 2884 case XML_ERROR_INVALID_TOKEN: 2885 return XML_L("not well-formed (invalid token)"); 2886 case XML_ERROR_UNCLOSED_TOKEN: 2887 return XML_L("unclosed token"); 2888 case XML_ERROR_PARTIAL_CHAR: 2889 return XML_L("partial character"); 2890 case XML_ERROR_TAG_MISMATCH: 2891 return XML_L("mismatched tag"); 2892 case XML_ERROR_DUPLICATE_ATTRIBUTE: 2893 return XML_L("duplicate attribute"); 2894 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: 2895 return XML_L("junk after document element"); 2896 case XML_ERROR_PARAM_ENTITY_REF: 2897 return XML_L("illegal parameter entity reference"); 2898 case XML_ERROR_UNDEFINED_ENTITY: 2899 return XML_L("undefined entity"); 2900 case XML_ERROR_RECURSIVE_ENTITY_REF: 2901 return XML_L("recursive entity reference"); 2902 case XML_ERROR_ASYNC_ENTITY: 2903 return XML_L("asynchronous entity"); 2904 case XML_ERROR_BAD_CHAR_REF: 2905 return XML_L("reference to invalid character number"); 2906 case XML_ERROR_BINARY_ENTITY_REF: 2907 return XML_L("reference to binary entity"); 2908 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: 2909 return XML_L("reference to external entity in attribute"); 2910 case XML_ERROR_MISPLACED_XML_PI: 2911 return XML_L("XML or text declaration not at start of entity"); 2912 case XML_ERROR_UNKNOWN_ENCODING: 2913 return XML_L("unknown encoding"); 2914 case XML_ERROR_INCORRECT_ENCODING: 2915 return XML_L("encoding specified in XML declaration is incorrect"); 2916 case XML_ERROR_UNCLOSED_CDATA_SECTION: 2917 return XML_L("unclosed CDATA section"); 2918 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: 2919 return XML_L("error in processing external entity reference"); 2920 case XML_ERROR_NOT_STANDALONE: 2921 return XML_L("document is not standalone"); 2922 case XML_ERROR_UNEXPECTED_STATE: 2923 return XML_L("unexpected parser state - please send a bug report"); 2924 case XML_ERROR_ENTITY_DECLARED_IN_PE: 2925 return XML_L("entity declared in parameter entity"); 2926 case XML_ERROR_FEATURE_REQUIRES_XML_DTD: 2927 return XML_L("requested feature requires XML_DTD support in Expat"); 2928 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: 2929 return XML_L("cannot change setting once parsing has begun"); 2930 /* Added in 1.95.7. */ 2931 case XML_ERROR_UNBOUND_PREFIX: 2932 return XML_L("unbound prefix"); 2933 /* Added in 1.95.8. */ 2934 case XML_ERROR_UNDECLARING_PREFIX: 2935 return XML_L("must not undeclare prefix"); 2936 case XML_ERROR_INCOMPLETE_PE: 2937 return XML_L("incomplete markup in parameter entity"); 2938 case XML_ERROR_XML_DECL: 2939 return XML_L("XML declaration not well-formed"); 2940 case XML_ERROR_TEXT_DECL: 2941 return XML_L("text declaration not well-formed"); 2942 case XML_ERROR_PUBLICID: 2943 return XML_L("illegal character(s) in public id"); 2944 case XML_ERROR_SUSPENDED: 2945 return XML_L("parser suspended"); 2946 case XML_ERROR_NOT_SUSPENDED: 2947 return XML_L("parser not suspended"); 2948 case XML_ERROR_ABORTED: 2949 return XML_L("parsing aborted"); 2950 case XML_ERROR_FINISHED: 2951 return XML_L("parsing finished"); 2952 case XML_ERROR_SUSPEND_PE: 2953 return XML_L("cannot suspend in external parameter entity"); 2954 /* Added in 2.0.0. */ 2955 case XML_ERROR_RESERVED_PREFIX_XML: 2956 return XML_L( 2957 "reserved prefix (xml) must not be undeclared or bound to another namespace name"); 2958 case XML_ERROR_RESERVED_PREFIX_XMLNS: 2959 return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); 2960 case XML_ERROR_RESERVED_NAMESPACE_URI: 2961 return XML_L( 2962 "prefix must not be bound to one of the reserved namespace names"); 2963 /* Added in 2.2.5. */ 2964 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ 2965 return XML_L("invalid argument"); 2966 /* Added in 2.3.0. */ 2967 case XML_ERROR_NO_BUFFER: 2968 return XML_L( 2969 "a successful prior call to function XML_GetBuffer is required"); 2970 /* Added in 2.4.0. */ 2971 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: 2972 return XML_L( 2973 "limit on input amplification factor (from DTD and entities) breached"); 2974 /* Added in 2.6.4. */ 2975 case XML_ERROR_NOT_STARTED: 2976 return XML_L("parser not started"); 2977 } 2978 return NULL; 2979 } 2980 2981 const XML_LChar *XMLCALL 2982 XML_ExpatVersion(void) { 2983 /* V1 is used to string-ize the version number. However, it would 2984 string-ize the actual version macro *names* unless we get them 2985 substituted before being passed to V1. CPP is defined to expand 2986 a macro, then rescan for more expansions. Thus, we use V2 to expand 2987 the version macros, then CPP will expand the resulting V1() macro 2988 with the correct numerals. */ 2989 /* ### I'm assuming cpp is portable in this respect... */ 2990 2991 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) 2992 #define V2(a, b, c) XML_L("expat_") V1(a, b, c) 2993 2994 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); 2995 2996 #undef V1 2997 #undef V2 2998 } 2999 3000 XML_Expat_Version XMLCALL 3001 XML_ExpatVersionInfo(void) { 3002 XML_Expat_Version version; 3003 3004 version.major = XML_MAJOR_VERSION; 3005 version.minor = XML_MINOR_VERSION; 3006 version.micro = XML_MICRO_VERSION; 3007 3008 return version; 3009 } 3010 3011 const XML_Feature *XMLCALL 3012 XML_GetFeatureList(void) { 3013 static const XML_Feature features[] = { 3014 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), 3015 sizeof(XML_Char)}, 3016 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), 3017 sizeof(XML_LChar)}, 3018 #ifdef XML_UNICODE 3019 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, 3020 #endif 3021 #ifdef XML_UNICODE_WCHAR_T 3022 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, 3023 #endif 3024 #ifdef XML_DTD 3025 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, 3026 #endif 3027 #if XML_CONTEXT_BYTES > 0 3028 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), 3029 XML_CONTEXT_BYTES}, 3030 #endif 3031 #ifdef XML_MIN_SIZE 3032 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, 3033 #endif 3034 #ifdef XML_NS 3035 {XML_FEATURE_NS, XML_L("XML_NS"), 0}, 3036 #endif 3037 #ifdef XML_LARGE_SIZE 3038 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, 3039 #endif 3040 #ifdef XML_ATTR_INFO 3041 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, 3042 #endif 3043 #if XML_GE == 1 3044 /* Added in Expat 2.4.0 for XML_DTD defined and 3045 * added in Expat 2.6.0 for XML_GE == 1. */ 3046 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, 3047 XML_L("XML_BLAP_MAX_AMP"), 3048 (long int) 3049 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, 3050 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, 3051 XML_L("XML_BLAP_ACT_THRES"), 3052 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, 3053 /* Added in Expat 2.6.0. */ 3054 {XML_FEATURE_GE, XML_L("XML_GE"), 0}, 3055 /* Added in Expat 2.7.2. */ 3056 {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, 3057 XML_L("XML_AT_MAX_AMP"), 3058 (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT}, 3059 {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, 3060 XML_L("XML_AT_ACT_THRES"), 3061 (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT}, 3062 #endif 3063 {XML_FEATURE_END, NULL, 0}}; 3064 3065 return features; 3066 } 3067 3068 #if XML_GE == 1 3069 XML_Bool XMLCALL 3070 XML_SetBillionLaughsAttackProtectionMaximumAmplification( 3071 XML_Parser parser, float maximumAmplificationFactor) { 3072 if ((parser == NULL) || (parser->m_parentParser != NULL) 3073 || isnan(maximumAmplificationFactor) 3074 || (maximumAmplificationFactor < 1.0f)) { 3075 return XML_FALSE; 3076 } 3077 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; 3078 return XML_TRUE; 3079 } 3080 3081 XML_Bool XMLCALL 3082 XML_SetBillionLaughsAttackProtectionActivationThreshold( 3083 XML_Parser parser, unsigned long long activationThresholdBytes) { 3084 if ((parser == NULL) || (parser->m_parentParser != NULL)) { 3085 return XML_FALSE; 3086 } 3087 parser->m_accounting.activationThresholdBytes = activationThresholdBytes; 3088 return XML_TRUE; 3089 } 3090 3091 XML_Bool XMLCALL 3092 XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, 3093 float maximumAmplificationFactor) { 3094 if ((parser == NULL) || (parser->m_parentParser != NULL) 3095 || isnan(maximumAmplificationFactor) 3096 || (maximumAmplificationFactor < 1.0f)) { 3097 return XML_FALSE; 3098 } 3099 parser->m_alloc_tracker.maximumAmplificationFactor 3100 = maximumAmplificationFactor; 3101 return XML_TRUE; 3102 } 3103 3104 XML_Bool XMLCALL 3105 XML_SetAllocTrackerActivationThreshold( 3106 XML_Parser parser, unsigned long long activationThresholdBytes) { 3107 if ((parser == NULL) || (parser->m_parentParser != NULL)) { 3108 return XML_FALSE; 3109 } 3110 parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes; 3111 return XML_TRUE; 3112 } 3113 #endif /* XML_GE == 1 */ 3114 3115 XML_Bool XMLCALL 3116 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { 3117 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { 3118 parser->m_reparseDeferralEnabled = enabled; 3119 return XML_TRUE; 3120 } 3121 return XML_FALSE; 3122 } 3123 3124 /* Initially tag->rawName always points into the parse buffer; 3125 for those TAG instances opened while the current parse buffer was 3126 processed, and not yet closed, we need to store tag->rawName in a more 3127 permanent location, since the parse buffer is about to be discarded. 3128 */ 3129 static XML_Bool 3130 storeRawNames(XML_Parser parser) { 3131 TAG *tag = parser->m_tagStack; 3132 while (tag) { 3133 size_t bufSize; 3134 size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); 3135 size_t rawNameLen; 3136 char *rawNameBuf = tag->buf.raw + nameLen; 3137 /* Stop if already stored. Since m_tagStack is a stack, we can stop 3138 at the first entry that has already been copied; everything 3139 below it in the stack is already been accounted for in a 3140 previous call to this function. 3141 */ 3142 if (tag->rawName == rawNameBuf) 3143 break; 3144 /* For reuse purposes we need to ensure that the 3145 size of tag->buf is a multiple of sizeof(XML_Char). 3146 */ 3147 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); 3148 /* Detect and prevent integer overflow. */ 3149 if (rawNameLen > (size_t)INT_MAX - nameLen) 3150 return XML_FALSE; 3151 bufSize = nameLen + rawNameLen; 3152 if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) { 3153 char *temp = REALLOC(parser, tag->buf.raw, bufSize); 3154 if (temp == NULL) 3155 return XML_FALSE; 3156 /* if tag->name.str points to tag->buf.str (only when namespace 3157 processing is off) then we have to update it 3158 */ 3159 if (tag->name.str == tag->buf.str) 3160 tag->name.str = (XML_Char *)temp; 3161 /* if tag->name.localPart is set (when namespace processing is on) 3162 then update it as well, since it will always point into tag->buf 3163 */ 3164 if (tag->name.localPart) 3165 tag->name.localPart 3166 = (XML_Char *)temp + (tag->name.localPart - tag->buf.str); 3167 tag->buf.raw = temp; 3168 tag->bufEnd = temp + bufSize; 3169 rawNameBuf = temp + nameLen; 3170 } 3171 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); 3172 tag->rawName = rawNameBuf; 3173 tag = tag->parent; 3174 } 3175 return XML_TRUE; 3176 } 3177 3178 static enum XML_Error PTRCALL 3179 contentProcessor(XML_Parser parser, const char *start, const char *end, 3180 const char **endPtr) { 3181 enum XML_Error result = doContent( 3182 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end, 3183 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, 3184 XML_ACCOUNT_DIRECT); 3185 if (result == XML_ERROR_NONE) { 3186 if (! storeRawNames(parser)) 3187 return XML_ERROR_NO_MEMORY; 3188 } 3189 return result; 3190 } 3191 3192 static enum XML_Error PTRCALL 3193 externalEntityInitProcessor(XML_Parser parser, const char *start, 3194 const char *end, const char **endPtr) { 3195 enum XML_Error result = initializeEncoding(parser); 3196 if (result != XML_ERROR_NONE) 3197 return result; 3198 parser->m_processor = externalEntityInitProcessor2; 3199 return externalEntityInitProcessor2(parser, start, end, endPtr); 3200 } 3201 3202 static enum XML_Error PTRCALL 3203 externalEntityInitProcessor2(XML_Parser parser, const char *start, 3204 const char *end, const char **endPtr) { 3205 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 3206 int tok = XmlContentTok(parser->m_encoding, start, end, &next); 3207 switch (tok) { 3208 case XML_TOK_BOM: 3209 #if XML_GE == 1 3210 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, 3211 XML_ACCOUNT_DIRECT)) { 3212 accountingOnAbort(parser); 3213 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 3214 } 3215 #endif /* XML_GE == 1 */ 3216 3217 /* If we are at the end of the buffer, this would cause the next stage, 3218 i.e. externalEntityInitProcessor3, to pass control directly to 3219 doContent (by detecting XML_TOK_NONE) without processing any xml text 3220 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. 3221 */ 3222 if (next == end && ! parser->m_parsingStatus.finalBuffer) { 3223 *endPtr = next; 3224 return XML_ERROR_NONE; 3225 } 3226 start = next; 3227 break; 3228 case XML_TOK_PARTIAL: 3229 if (! parser->m_parsingStatus.finalBuffer) { 3230 *endPtr = start; 3231 return XML_ERROR_NONE; 3232 } 3233 parser->m_eventPtr = start; 3234 return XML_ERROR_UNCLOSED_TOKEN; 3235 case XML_TOK_PARTIAL_CHAR: 3236 if (! parser->m_parsingStatus.finalBuffer) { 3237 *endPtr = start; 3238 return XML_ERROR_NONE; 3239 } 3240 parser->m_eventPtr = start; 3241 return XML_ERROR_PARTIAL_CHAR; 3242 } 3243 parser->m_processor = externalEntityInitProcessor3; 3244 return externalEntityInitProcessor3(parser, start, end, endPtr); 3245 } 3246 3247 static enum XML_Error PTRCALL 3248 externalEntityInitProcessor3(XML_Parser parser, const char *start, 3249 const char *end, const char **endPtr) { 3250 int tok; 3251 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 3252 parser->m_eventPtr = start; 3253 tok = XmlContentTok(parser->m_encoding, start, end, &next); 3254 /* Note: These bytes are accounted later in: 3255 - processXmlDecl 3256 - externalEntityContentProcessor 3257 */ 3258 parser->m_eventEndPtr = next; 3259 3260 switch (tok) { 3261 case XML_TOK_XML_DECL: { 3262 enum XML_Error result; 3263 result = processXmlDecl(parser, 1, start, next); 3264 if (result != XML_ERROR_NONE) 3265 return result; 3266 switch (parser->m_parsingStatus.parsing) { 3267 case XML_SUSPENDED: 3268 *endPtr = next; 3269 return XML_ERROR_NONE; 3270 case XML_FINISHED: 3271 return XML_ERROR_ABORTED; 3272 case XML_PARSING: 3273 if (parser->m_reenter) { 3274 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 3275 } 3276 /* Fall through */ 3277 default: 3278 start = next; 3279 } 3280 } break; 3281 case XML_TOK_PARTIAL: 3282 if (! parser->m_parsingStatus.finalBuffer) { 3283 *endPtr = start; 3284 return XML_ERROR_NONE; 3285 } 3286 return XML_ERROR_UNCLOSED_TOKEN; 3287 case XML_TOK_PARTIAL_CHAR: 3288 if (! parser->m_parsingStatus.finalBuffer) { 3289 *endPtr = start; 3290 return XML_ERROR_NONE; 3291 } 3292 return XML_ERROR_PARTIAL_CHAR; 3293 } 3294 parser->m_processor = externalEntityContentProcessor; 3295 parser->m_tagLevel = 1; 3296 return externalEntityContentProcessor(parser, start, end, endPtr); 3297 } 3298 3299 static enum XML_Error PTRCALL 3300 externalEntityContentProcessor(XML_Parser parser, const char *start, 3301 const char *end, const char **endPtr) { 3302 enum XML_Error result 3303 = doContent(parser, 1, parser->m_encoding, start, end, endPtr, 3304 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 3305 XML_ACCOUNT_ENTITY_EXPANSION); 3306 if (result == XML_ERROR_NONE) { 3307 if (! storeRawNames(parser)) 3308 return XML_ERROR_NO_MEMORY; 3309 } 3310 return result; 3311 } 3312 3313 static enum XML_Error 3314 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, 3315 const char *s, const char *end, const char **nextPtr, 3316 XML_Bool haveMore, enum XML_Account account) { 3317 /* save one level of indirection */ 3318 DTD *const dtd = parser->m_dtd; 3319 3320 const char **eventPP; 3321 const char **eventEndPP; 3322 if (enc == parser->m_encoding) { 3323 eventPP = &parser->m_eventPtr; 3324 eventEndPP = &parser->m_eventEndPtr; 3325 } else { 3326 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 3327 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 3328 } 3329 *eventPP = s; 3330 3331 for (;;) { 3332 const char *next = s; /* XmlContentTok doesn't always set the last arg */ 3333 int tok = XmlContentTok(enc, s, end, &next); 3334 #if XML_GE == 1 3335 const char *accountAfter 3336 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) 3337 ? (haveMore ? s /* i.e. 0 bytes */ : end) 3338 : next; 3339 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, 3340 account)) { 3341 accountingOnAbort(parser); 3342 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 3343 } 3344 #endif 3345 *eventEndPP = next; 3346 switch (tok) { 3347 case XML_TOK_TRAILING_CR: 3348 if (haveMore) { 3349 *nextPtr = s; 3350 return XML_ERROR_NONE; 3351 } 3352 *eventEndPP = end; 3353 if (parser->m_characterDataHandler) { 3354 XML_Char c = 0xA; 3355 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3356 } else if (parser->m_defaultHandler) 3357 reportDefault(parser, enc, s, end); 3358 /* We are at the end of the final buffer, should we check for 3359 XML_SUSPENDED, XML_FINISHED? 3360 */ 3361 if (startTagLevel == 0) 3362 return XML_ERROR_NO_ELEMENTS; 3363 if (parser->m_tagLevel != startTagLevel) 3364 return XML_ERROR_ASYNC_ENTITY; 3365 *nextPtr = end; 3366 return XML_ERROR_NONE; 3367 case XML_TOK_NONE: 3368 if (haveMore) { 3369 *nextPtr = s; 3370 return XML_ERROR_NONE; 3371 } 3372 if (startTagLevel > 0) { 3373 if (parser->m_tagLevel != startTagLevel) 3374 return XML_ERROR_ASYNC_ENTITY; 3375 *nextPtr = s; 3376 return XML_ERROR_NONE; 3377 } 3378 return XML_ERROR_NO_ELEMENTS; 3379 case XML_TOK_INVALID: 3380 *eventPP = next; 3381 return XML_ERROR_INVALID_TOKEN; 3382 case XML_TOK_PARTIAL: 3383 if (haveMore) { 3384 *nextPtr = s; 3385 return XML_ERROR_NONE; 3386 } 3387 return XML_ERROR_UNCLOSED_TOKEN; 3388 case XML_TOK_PARTIAL_CHAR: 3389 if (haveMore) { 3390 *nextPtr = s; 3391 return XML_ERROR_NONE; 3392 } 3393 return XML_ERROR_PARTIAL_CHAR; 3394 case XML_TOK_ENTITY_REF: { 3395 const XML_Char *name; 3396 ENTITY *entity; 3397 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 3398 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 3399 if (ch) { 3400 #if XML_GE == 1 3401 /* NOTE: We are replacing 4-6 characters original input for 1 character 3402 * so there is no amplification and hence recording without 3403 * protection. */ 3404 accountingDiffTolerated(parser, tok, (char *)&ch, 3405 ((char *)&ch) + sizeof(XML_Char), __LINE__, 3406 XML_ACCOUNT_ENTITY_EXPANSION); 3407 #endif /* XML_GE == 1 */ 3408 if (parser->m_characterDataHandler) 3409 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); 3410 else if (parser->m_defaultHandler) 3411 reportDefault(parser, enc, s, next); 3412 break; 3413 } 3414 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 3415 next - enc->minBytesPerChar); 3416 if (! name) 3417 return XML_ERROR_NO_MEMORY; 3418 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 3419 poolDiscard(&dtd->pool); 3420 /* First, determine if a check for an existing declaration is needed; 3421 if yes, check that the entity exists, and that it is internal, 3422 otherwise call the skipped entity or default handler. 3423 */ 3424 if (! dtd->hasParamEntityRefs || dtd->standalone) { 3425 if (! entity) 3426 return XML_ERROR_UNDEFINED_ENTITY; 3427 else if (! entity->is_internal) 3428 return XML_ERROR_ENTITY_DECLARED_IN_PE; 3429 } else if (! entity) { 3430 if (parser->m_skippedEntityHandler) 3431 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 3432 else if (parser->m_defaultHandler) 3433 reportDefault(parser, enc, s, next); 3434 break; 3435 } 3436 if (entity->open) 3437 return XML_ERROR_RECURSIVE_ENTITY_REF; 3438 if (entity->notation) 3439 return XML_ERROR_BINARY_ENTITY_REF; 3440 if (entity->textPtr) { 3441 enum XML_Error result; 3442 if (! parser->m_defaultExpandInternalEntities) { 3443 if (parser->m_skippedEntityHandler) 3444 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 3445 0); 3446 else if (parser->m_defaultHandler) 3447 reportDefault(parser, enc, s, next); 3448 break; 3449 } 3450 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL); 3451 if (result != XML_ERROR_NONE) 3452 return result; 3453 } else if (parser->m_externalEntityRefHandler) { 3454 const XML_Char *context; 3455 entity->open = XML_TRUE; 3456 context = getContext(parser); 3457 entity->open = XML_FALSE; 3458 if (! context) 3459 return XML_ERROR_NO_MEMORY; 3460 if (! parser->m_externalEntityRefHandler( 3461 parser->m_externalEntityRefHandlerArg, context, entity->base, 3462 entity->systemId, entity->publicId)) 3463 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 3464 poolDiscard(&parser->m_tempPool); 3465 } else if (parser->m_defaultHandler) 3466 reportDefault(parser, enc, s, next); 3467 break; 3468 } 3469 case XML_TOK_START_TAG_NO_ATTS: 3470 /* fall through */ 3471 case XML_TOK_START_TAG_WITH_ATTS: { 3472 TAG *tag; 3473 enum XML_Error result; 3474 XML_Char *toPtr; 3475 if (parser->m_freeTagList) { 3476 tag = parser->m_freeTagList; 3477 parser->m_freeTagList = parser->m_freeTagList->parent; 3478 } else { 3479 tag = MALLOC(parser, sizeof(TAG)); 3480 if (! tag) 3481 return XML_ERROR_NO_MEMORY; 3482 tag->buf.raw = MALLOC(parser, INIT_TAG_BUF_SIZE); 3483 if (! tag->buf.raw) { 3484 FREE(parser, tag); 3485 return XML_ERROR_NO_MEMORY; 3486 } 3487 tag->bufEnd = tag->buf.raw + INIT_TAG_BUF_SIZE; 3488 } 3489 tag->bindings = NULL; 3490 tag->parent = parser->m_tagStack; 3491 parser->m_tagStack = tag; 3492 tag->name.localPart = NULL; 3493 tag->name.prefix = NULL; 3494 tag->rawName = s + enc->minBytesPerChar; 3495 tag->rawNameLength = XmlNameLength(enc, tag->rawName); 3496 ++parser->m_tagLevel; 3497 { 3498 const char *rawNameEnd = tag->rawName + tag->rawNameLength; 3499 const char *fromPtr = tag->rawName; 3500 toPtr = tag->buf.str; 3501 for (;;) { 3502 int convLen; 3503 const enum XML_Convert_Result convert_res 3504 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, 3505 (ICHAR *)tag->bufEnd - 1); 3506 convLen = (int)(toPtr - tag->buf.str); 3507 if ((fromPtr >= rawNameEnd) 3508 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { 3509 tag->name.strLen = convLen; 3510 break; 3511 } 3512 if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf.raw)) 3513 return XML_ERROR_NO_MEMORY; 3514 const size_t bufSize = (size_t)(tag->bufEnd - tag->buf.raw) * 2; 3515 { 3516 char *temp = REALLOC(parser, tag->buf.raw, bufSize); 3517 if (temp == NULL) 3518 return XML_ERROR_NO_MEMORY; 3519 tag->buf.raw = temp; 3520 tag->bufEnd = temp + bufSize; 3521 toPtr = (XML_Char *)temp + convLen; 3522 } 3523 } 3524 } 3525 tag->name.str = tag->buf.str; 3526 *toPtr = XML_T('\0'); 3527 result 3528 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); 3529 if (result) 3530 return result; 3531 if (parser->m_startElementHandler) 3532 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, 3533 (const XML_Char **)parser->m_atts); 3534 else if (parser->m_defaultHandler) 3535 reportDefault(parser, enc, s, next); 3536 poolClear(&parser->m_tempPool); 3537 break; 3538 } 3539 case XML_TOK_EMPTY_ELEMENT_NO_ATTS: 3540 /* fall through */ 3541 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { 3542 const char *rawName = s + enc->minBytesPerChar; 3543 enum XML_Error result; 3544 BINDING *bindings = NULL; 3545 XML_Bool noElmHandlers = XML_TRUE; 3546 TAG_NAME name; 3547 name.str = poolStoreString(&parser->m_tempPool, enc, rawName, 3548 rawName + XmlNameLength(enc, rawName)); 3549 if (! name.str) 3550 return XML_ERROR_NO_MEMORY; 3551 poolFinish(&parser->m_tempPool); 3552 result = storeAtts(parser, enc, s, &name, &bindings, 3553 XML_ACCOUNT_NONE /* token spans whole start tag */); 3554 if (result != XML_ERROR_NONE) { 3555 freeBindings(parser, bindings); 3556 return result; 3557 } 3558 poolFinish(&parser->m_tempPool); 3559 if (parser->m_startElementHandler) { 3560 parser->m_startElementHandler(parser->m_handlerArg, name.str, 3561 (const XML_Char **)parser->m_atts); 3562 noElmHandlers = XML_FALSE; 3563 } 3564 if (parser->m_endElementHandler) { 3565 if (parser->m_startElementHandler) 3566 *eventPP = *eventEndPP; 3567 parser->m_endElementHandler(parser->m_handlerArg, name.str); 3568 noElmHandlers = XML_FALSE; 3569 } 3570 if (noElmHandlers && parser->m_defaultHandler) 3571 reportDefault(parser, enc, s, next); 3572 poolClear(&parser->m_tempPool); 3573 freeBindings(parser, bindings); 3574 } 3575 if ((parser->m_tagLevel == 0) 3576 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3577 if (parser->m_parsingStatus.parsing == XML_SUSPENDED 3578 || (parser->m_parsingStatus.parsing == XML_PARSING 3579 && parser->m_reenter)) 3580 parser->m_processor = epilogProcessor; 3581 else 3582 return epilogProcessor(parser, next, end, nextPtr); 3583 } 3584 break; 3585 case XML_TOK_END_TAG: 3586 if (parser->m_tagLevel == startTagLevel) 3587 return XML_ERROR_ASYNC_ENTITY; 3588 else { 3589 int len; 3590 const char *rawName; 3591 TAG *tag = parser->m_tagStack; 3592 rawName = s + enc->minBytesPerChar * 2; 3593 len = XmlNameLength(enc, rawName); 3594 if (len != tag->rawNameLength 3595 || memcmp(tag->rawName, rawName, len) != 0) { 3596 *eventPP = rawName; 3597 return XML_ERROR_TAG_MISMATCH; 3598 } 3599 parser->m_tagStack = tag->parent; 3600 tag->parent = parser->m_freeTagList; 3601 parser->m_freeTagList = tag; 3602 --parser->m_tagLevel; 3603 if (parser->m_endElementHandler) { 3604 const XML_Char *localPart; 3605 const XML_Char *prefix; 3606 XML_Char *uri; 3607 localPart = tag->name.localPart; 3608 if (parser->m_ns && localPart) { 3609 /* localPart and prefix may have been overwritten in 3610 tag->name.str, since this points to the binding->uri 3611 buffer which gets reused; so we have to add them again 3612 */ 3613 uri = (XML_Char *)tag->name.str + tag->name.uriLen; 3614 /* don't need to check for space - already done in storeAtts() */ 3615 while (*localPart) 3616 *uri++ = *localPart++; 3617 prefix = tag->name.prefix; 3618 if (parser->m_ns_triplets && prefix) { 3619 *uri++ = parser->m_namespaceSeparator; 3620 while (*prefix) 3621 *uri++ = *prefix++; 3622 } 3623 *uri = XML_T('\0'); 3624 } 3625 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); 3626 } else if (parser->m_defaultHandler) 3627 reportDefault(parser, enc, s, next); 3628 while (tag->bindings) { 3629 BINDING *b = tag->bindings; 3630 if (parser->m_endNamespaceDeclHandler) 3631 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, 3632 b->prefix->name); 3633 tag->bindings = tag->bindings->nextTagBinding; 3634 b->nextTagBinding = parser->m_freeBindingList; 3635 parser->m_freeBindingList = b; 3636 b->prefix->binding = b->prevPrefixBinding; 3637 } 3638 if ((parser->m_tagLevel == 0) 3639 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3640 if (parser->m_parsingStatus.parsing == XML_SUSPENDED 3641 || (parser->m_parsingStatus.parsing == XML_PARSING 3642 && parser->m_reenter)) 3643 parser->m_processor = epilogProcessor; 3644 else 3645 return epilogProcessor(parser, next, end, nextPtr); 3646 } 3647 } 3648 break; 3649 case XML_TOK_CHAR_REF: { 3650 int n = XmlCharRefNumber(enc, s); 3651 if (n < 0) 3652 return XML_ERROR_BAD_CHAR_REF; 3653 if (parser->m_characterDataHandler) { 3654 XML_Char buf[XML_ENCODE_MAX]; 3655 parser->m_characterDataHandler(parser->m_handlerArg, buf, 3656 XmlEncode(n, (ICHAR *)buf)); 3657 } else if (parser->m_defaultHandler) 3658 reportDefault(parser, enc, s, next); 3659 } break; 3660 case XML_TOK_XML_DECL: 3661 return XML_ERROR_MISPLACED_XML_PI; 3662 case XML_TOK_DATA_NEWLINE: 3663 if (parser->m_characterDataHandler) { 3664 XML_Char c = 0xA; 3665 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3666 } else if (parser->m_defaultHandler) 3667 reportDefault(parser, enc, s, next); 3668 break; 3669 case XML_TOK_CDATA_SECT_OPEN: { 3670 enum XML_Error result; 3671 if (parser->m_startCdataSectionHandler) 3672 parser->m_startCdataSectionHandler(parser->m_handlerArg); 3673 /* BEGIN disabled code */ 3674 /* Suppose you doing a transformation on a document that involves 3675 changing only the character data. You set up a defaultHandler 3676 and a characterDataHandler. The defaultHandler simply copies 3677 characters through. The characterDataHandler does the 3678 transformation and writes the characters out escaping them as 3679 necessary. This case will fail to work if we leave out the 3680 following two lines (because & and < inside CDATA sections will 3681 be incorrectly escaped). 3682 3683 However, now we have a start/endCdataSectionHandler, so it seems 3684 easier to let the user deal with this. 3685 */ 3686 else if ((0) && parser->m_characterDataHandler) 3687 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3688 0); 3689 /* END disabled code */ 3690 else if (parser->m_defaultHandler) 3691 reportDefault(parser, enc, s, next); 3692 result 3693 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); 3694 if (result != XML_ERROR_NONE) 3695 return result; 3696 else if (! next) { 3697 parser->m_processor = cdataSectionProcessor; 3698 return result; 3699 } 3700 } break; 3701 case XML_TOK_TRAILING_RSQB: 3702 if (haveMore) { 3703 *nextPtr = s; 3704 return XML_ERROR_NONE; 3705 } 3706 if (parser->m_characterDataHandler) { 3707 if (MUST_CONVERT(enc, s)) { 3708 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3709 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3710 parser->m_characterDataHandler( 3711 parser->m_handlerArg, parser->m_dataBuf, 3712 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3713 } else 3714 parser->m_characterDataHandler( 3715 parser->m_handlerArg, (const XML_Char *)s, 3716 (int)((const XML_Char *)end - (const XML_Char *)s)); 3717 } else if (parser->m_defaultHandler) 3718 reportDefault(parser, enc, s, end); 3719 /* We are at the end of the final buffer, should we check for 3720 XML_SUSPENDED, XML_FINISHED? 3721 */ 3722 if (startTagLevel == 0) { 3723 *eventPP = end; 3724 return XML_ERROR_NO_ELEMENTS; 3725 } 3726 if (parser->m_tagLevel != startTagLevel) { 3727 *eventPP = end; 3728 return XML_ERROR_ASYNC_ENTITY; 3729 } 3730 *nextPtr = end; 3731 return XML_ERROR_NONE; 3732 case XML_TOK_DATA_CHARS: { 3733 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 3734 if (charDataHandler) { 3735 if (MUST_CONVERT(enc, s)) { 3736 for (;;) { 3737 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3738 const enum XML_Convert_Result convert_res = XmlConvert( 3739 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3740 *eventEndPP = s; 3741 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3742 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3743 if ((convert_res == XML_CONVERT_COMPLETED) 3744 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 3745 break; 3746 *eventPP = s; 3747 } 3748 } else 3749 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 3750 (int)((const XML_Char *)next - (const XML_Char *)s)); 3751 } else if (parser->m_defaultHandler) 3752 reportDefault(parser, enc, s, next); 3753 } break; 3754 case XML_TOK_PI: 3755 if (! reportProcessingInstruction(parser, enc, s, next)) 3756 return XML_ERROR_NO_MEMORY; 3757 break; 3758 case XML_TOK_COMMENT: 3759 if (! reportComment(parser, enc, s, next)) 3760 return XML_ERROR_NO_MEMORY; 3761 break; 3762 default: 3763 /* All of the tokens produced by XmlContentTok() have their own 3764 * explicit cases, so this default is not strictly necessary. 3765 * However it is a useful safety net, so we retain the code and 3766 * simply exclude it from the coverage tests. 3767 * 3768 * LCOV_EXCL_START 3769 */ 3770 if (parser->m_defaultHandler) 3771 reportDefault(parser, enc, s, next); 3772 break; 3773 /* LCOV_EXCL_STOP */ 3774 } 3775 switch (parser->m_parsingStatus.parsing) { 3776 case XML_SUSPENDED: 3777 *eventPP = next; 3778 *nextPtr = next; 3779 return XML_ERROR_NONE; 3780 case XML_FINISHED: 3781 *eventPP = next; 3782 return XML_ERROR_ABORTED; 3783 case XML_PARSING: 3784 if (parser->m_reenter) { 3785 *nextPtr = next; 3786 return XML_ERROR_NONE; 3787 } 3788 /* Fall through */ 3789 default:; 3790 *eventPP = s = next; 3791 } 3792 } 3793 /* not reached */ 3794 } 3795 3796 /* This function does not call free() on the allocated memory, merely 3797 * moving it to the parser's m_freeBindingList where it can be freed or 3798 * reused as appropriate. 3799 */ 3800 static void 3801 freeBindings(XML_Parser parser, BINDING *bindings) { 3802 while (bindings) { 3803 BINDING *b = bindings; 3804 3805 /* m_startNamespaceDeclHandler will have been called for this 3806 * binding in addBindings(), so call the end handler now. 3807 */ 3808 if (parser->m_endNamespaceDeclHandler) 3809 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); 3810 3811 bindings = bindings->nextTagBinding; 3812 b->nextTagBinding = parser->m_freeBindingList; 3813 parser->m_freeBindingList = b; 3814 b->prefix->binding = b->prevPrefixBinding; 3815 } 3816 } 3817 3818 /* Precondition: all arguments must be non-NULL; 3819 Purpose: 3820 - normalize attributes 3821 - check attributes for well-formedness 3822 - generate namespace aware attribute names (URI, prefix) 3823 - build list of attributes for startElementHandler 3824 - default attributes 3825 - process namespace declarations (check and report them) 3826 - generate namespace aware element name (URI, prefix) 3827 */ 3828 static enum XML_Error 3829 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, 3830 TAG_NAME *tagNamePtr, BINDING **bindingsPtr, 3831 enum XML_Account account) { 3832 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 3833 ELEMENT_TYPE *elementType; 3834 int nDefaultAtts; 3835 const XML_Char **appAtts; /* the attribute list for the application */ 3836 int attIndex = 0; 3837 int prefixLen; 3838 int i; 3839 int n; 3840 XML_Char *uri; 3841 int nPrefixes = 0; 3842 BINDING *binding; 3843 const XML_Char *localPart; 3844 3845 /* lookup the element type name */ 3846 elementType 3847 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); 3848 if (! elementType) { 3849 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); 3850 if (! name) 3851 return XML_ERROR_NO_MEMORY; 3852 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 3853 sizeof(ELEMENT_TYPE)); 3854 if (! elementType) 3855 return XML_ERROR_NO_MEMORY; 3856 if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) 3857 return XML_ERROR_NO_MEMORY; 3858 } 3859 nDefaultAtts = elementType->nDefaultAtts; 3860 3861 /* get the attributes from the tokenizer */ 3862 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); 3863 3864 /* Detect and prevent integer overflow */ 3865 if (n > INT_MAX - nDefaultAtts) { 3866 return XML_ERROR_NO_MEMORY; 3867 } 3868 3869 if (n + nDefaultAtts > parser->m_attsSize) { 3870 int oldAttsSize = parser->m_attsSize; 3871 ATTRIBUTE *temp; 3872 #ifdef XML_ATTR_INFO 3873 XML_AttrInfo *temp2; 3874 #endif 3875 3876 /* Detect and prevent integer overflow */ 3877 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) 3878 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { 3879 return XML_ERROR_NO_MEMORY; 3880 } 3881 3882 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; 3883 3884 /* Detect and prevent integer overflow. 3885 * The preprocessor guard addresses the "always false" warning 3886 * from -Wtype-limits on platforms where 3887 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3888 #if UINT_MAX >= SIZE_MAX 3889 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) { 3890 parser->m_attsSize = oldAttsSize; 3891 return XML_ERROR_NO_MEMORY; 3892 } 3893 #endif 3894 3895 temp = REALLOC(parser, parser->m_atts, 3896 parser->m_attsSize * sizeof(ATTRIBUTE)); 3897 if (temp == NULL) { 3898 parser->m_attsSize = oldAttsSize; 3899 return XML_ERROR_NO_MEMORY; 3900 } 3901 parser->m_atts = temp; 3902 #ifdef XML_ATTR_INFO 3903 /* Detect and prevent integer overflow. 3904 * The preprocessor guard addresses the "always false" warning 3905 * from -Wtype-limits on platforms where 3906 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3907 # if UINT_MAX >= SIZE_MAX 3908 if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) { 3909 parser->m_attsSize = oldAttsSize; 3910 return XML_ERROR_NO_MEMORY; 3911 } 3912 # endif 3913 3914 temp2 = REALLOC(parser, parser->m_attInfo, 3915 parser->m_attsSize * sizeof(XML_AttrInfo)); 3916 if (temp2 == NULL) { 3917 parser->m_attsSize = oldAttsSize; 3918 return XML_ERROR_NO_MEMORY; 3919 } 3920 parser->m_attInfo = temp2; 3921 #endif 3922 if (n > oldAttsSize) 3923 XmlGetAttributes(enc, attStr, n, parser->m_atts); 3924 } 3925 3926 appAtts = (const XML_Char **)parser->m_atts; 3927 for (i = 0; i < n; i++) { 3928 ATTRIBUTE *currAtt = &parser->m_atts[i]; 3929 #ifdef XML_ATTR_INFO 3930 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; 3931 #endif 3932 /* add the name and value to the attribute list */ 3933 ATTRIBUTE_ID *attId 3934 = getAttributeId(parser, enc, currAtt->name, 3935 currAtt->name + XmlNameLength(enc, currAtt->name)); 3936 if (! attId) 3937 return XML_ERROR_NO_MEMORY; 3938 #ifdef XML_ATTR_INFO 3939 currAttInfo->nameStart 3940 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); 3941 currAttInfo->nameEnd 3942 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); 3943 currAttInfo->valueStart = parser->m_parseEndByteIndex 3944 - (parser->m_parseEndPtr - currAtt->valuePtr); 3945 currAttInfo->valueEnd = parser->m_parseEndByteIndex 3946 - (parser->m_parseEndPtr - currAtt->valueEnd); 3947 #endif 3948 /* Detect duplicate attributes by their QNames. This does not work when 3949 namespace processing is turned on and different prefixes for the same 3950 namespace are used. For this case we have a check further down. 3951 */ 3952 if ((attId->name)[-1]) { 3953 if (enc == parser->m_encoding) 3954 parser->m_eventPtr = parser->m_atts[i].name; 3955 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3956 } 3957 (attId->name)[-1] = 1; 3958 appAtts[attIndex++] = attId->name; 3959 if (! parser->m_atts[i].normalized) { 3960 enum XML_Error result; 3961 XML_Bool isCdata = XML_TRUE; 3962 3963 /* figure out whether declared as other than CDATA */ 3964 if (attId->maybeTokenized) { 3965 int j; 3966 for (j = 0; j < nDefaultAtts; j++) { 3967 if (attId == elementType->defaultAtts[j].id) { 3968 isCdata = elementType->defaultAtts[j].isCdata; 3969 break; 3970 } 3971 } 3972 } 3973 3974 /* normalize the attribute value */ 3975 result = storeAttributeValue( 3976 parser, enc, isCdata, parser->m_atts[i].valuePtr, 3977 parser->m_atts[i].valueEnd, &parser->m_tempPool, account); 3978 if (result) 3979 return result; 3980 appAtts[attIndex] = poolStart(&parser->m_tempPool); 3981 poolFinish(&parser->m_tempPool); 3982 } else { 3983 /* the value did not need normalizing */ 3984 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, 3985 parser->m_atts[i].valuePtr, 3986 parser->m_atts[i].valueEnd); 3987 if (appAtts[attIndex] == 0) 3988 return XML_ERROR_NO_MEMORY; 3989 poolFinish(&parser->m_tempPool); 3990 } 3991 /* handle prefixed attribute names */ 3992 if (attId->prefix) { 3993 if (attId->xmlns) { 3994 /* deal with namespace declarations here */ 3995 enum XML_Error result = addBinding(parser, attId->prefix, attId, 3996 appAtts[attIndex], bindingsPtr); 3997 if (result) 3998 return result; 3999 --attIndex; 4000 } else { 4001 /* deal with other prefixed names later */ 4002 attIndex++; 4003 nPrefixes++; 4004 (attId->name)[-1] = 2; 4005 } 4006 } else 4007 attIndex++; 4008 } 4009 4010 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ 4011 parser->m_nSpecifiedAtts = attIndex; 4012 if (elementType->idAtt && (elementType->idAtt->name)[-1]) { 4013 for (i = 0; i < attIndex; i += 2) 4014 if (appAtts[i] == elementType->idAtt->name) { 4015 parser->m_idAttIndex = i; 4016 break; 4017 } 4018 } else 4019 parser->m_idAttIndex = -1; 4020 4021 /* do attribute defaulting */ 4022 for (i = 0; i < nDefaultAtts; i++) { 4023 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; 4024 if (! (da->id->name)[-1] && da->value) { 4025 if (da->id->prefix) { 4026 if (da->id->xmlns) { 4027 enum XML_Error result = addBinding(parser, da->id->prefix, da->id, 4028 da->value, bindingsPtr); 4029 if (result) 4030 return result; 4031 } else { 4032 (da->id->name)[-1] = 2; 4033 nPrefixes++; 4034 appAtts[attIndex++] = da->id->name; 4035 appAtts[attIndex++] = da->value; 4036 } 4037 } else { 4038 (da->id->name)[-1] = 1; 4039 appAtts[attIndex++] = da->id->name; 4040 appAtts[attIndex++] = da->value; 4041 } 4042 } 4043 } 4044 appAtts[attIndex] = 0; 4045 4046 /* expand prefixed attribute names, check for duplicates, 4047 and clear flags that say whether attributes were specified */ 4048 i = 0; 4049 if (nPrefixes) { 4050 unsigned int j; /* hash table index */ 4051 unsigned long version = parser->m_nsAttsVersion; 4052 4053 /* Detect and prevent invalid shift */ 4054 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { 4055 return XML_ERROR_NO_MEMORY; 4056 } 4057 4058 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; 4059 unsigned char oldNsAttsPower = parser->m_nsAttsPower; 4060 /* size of hash table must be at least 2 * (# of prefixed attributes) */ 4061 if ((nPrefixes << 1) 4062 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ 4063 NS_ATT *temp; 4064 /* hash table size must also be a power of 2 and >= 8 */ 4065 while (nPrefixes >> parser->m_nsAttsPower++) 4066 ; 4067 if (parser->m_nsAttsPower < 3) 4068 parser->m_nsAttsPower = 3; 4069 4070 /* Detect and prevent invalid shift */ 4071 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { 4072 /* Restore actual size of memory in m_nsAtts */ 4073 parser->m_nsAttsPower = oldNsAttsPower; 4074 return XML_ERROR_NO_MEMORY; 4075 } 4076 4077 nsAttsSize = 1u << parser->m_nsAttsPower; 4078 4079 /* Detect and prevent integer overflow. 4080 * The preprocessor guard addresses the "always false" warning 4081 * from -Wtype-limits on platforms where 4082 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4083 #if UINT_MAX >= SIZE_MAX 4084 if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) { 4085 /* Restore actual size of memory in m_nsAtts */ 4086 parser->m_nsAttsPower = oldNsAttsPower; 4087 return XML_ERROR_NO_MEMORY; 4088 } 4089 #endif 4090 4091 temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); 4092 if (! temp) { 4093 /* Restore actual size of memory in m_nsAtts */ 4094 parser->m_nsAttsPower = oldNsAttsPower; 4095 return XML_ERROR_NO_MEMORY; 4096 } 4097 parser->m_nsAtts = temp; 4098 version = 0; /* force re-initialization of m_nsAtts hash table */ 4099 } 4100 /* using a version flag saves us from initializing m_nsAtts every time */ 4101 if (! version) { /* initialize version flags when version wraps around */ 4102 version = INIT_ATTS_VERSION; 4103 for (j = nsAttsSize; j != 0;) 4104 parser->m_nsAtts[--j].version = version; 4105 } 4106 parser->m_nsAttsVersion = --version; 4107 4108 /* expand prefixed names and check for duplicates */ 4109 for (; i < attIndex; i += 2) { 4110 const XML_Char *s = appAtts[i]; 4111 if (s[-1] == 2) { /* prefixed */ 4112 ATTRIBUTE_ID *id; 4113 const BINDING *b; 4114 unsigned long uriHash; 4115 struct siphash sip_state; 4116 struct sipkey sip_key; 4117 4118 copy_salt_to_sipkey(parser, &sip_key); 4119 sip24_init(&sip_state, &sip_key); 4120 4121 ((XML_Char *)s)[-1] = 0; /* clear flag */ 4122 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); 4123 if (! id || ! id->prefix) { 4124 /* This code is walking through the appAtts array, dealing 4125 * with (in this case) a prefixed attribute name. To be in 4126 * the array, the attribute must have already been bound, so 4127 * has to have passed through the hash table lookup once 4128 * already. That implies that an entry for it already 4129 * exists, so the lookup above will return a pointer to 4130 * already allocated memory. There is no opportunaity for 4131 * the allocator to fail, so the condition above cannot be 4132 * fulfilled. 4133 * 4134 * Since it is difficult to be certain that the above 4135 * analysis is complete, we retain the test and merely 4136 * remove the code from coverage tests. 4137 */ 4138 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 4139 } 4140 b = id->prefix->binding; 4141 if (! b) 4142 return XML_ERROR_UNBOUND_PREFIX; 4143 4144 for (j = 0; j < (unsigned int)b->uriLen; j++) { 4145 const XML_Char c = b->uri[j]; 4146 if (! poolAppendChar(&parser->m_tempPool, c)) 4147 return XML_ERROR_NO_MEMORY; 4148 } 4149 4150 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); 4151 4152 while (*s++ != XML_T(ASCII_COLON)) 4153 ; 4154 4155 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); 4156 4157 do { /* copies null terminator */ 4158 if (! poolAppendChar(&parser->m_tempPool, *s)) 4159 return XML_ERROR_NO_MEMORY; 4160 } while (*s++); 4161 4162 uriHash = (unsigned long)sip24_final(&sip_state); 4163 4164 { /* Check hash table for duplicate of expanded name (uriName). 4165 Derived from code in lookup(parser, HASH_TABLE *table, ...). 4166 */ 4167 unsigned char step = 0; 4168 unsigned long mask = nsAttsSize - 1; 4169 j = uriHash & mask; /* index into hash table */ 4170 while (parser->m_nsAtts[j].version == version) { 4171 /* for speed we compare stored hash values first */ 4172 if (uriHash == parser->m_nsAtts[j].hash) { 4173 const XML_Char *s1 = poolStart(&parser->m_tempPool); 4174 const XML_Char *s2 = parser->m_nsAtts[j].uriName; 4175 /* s1 is null terminated, but not s2 */ 4176 for (; *s1 == *s2 && *s1 != 0; s1++, s2++) 4177 ; 4178 if (*s1 == 0) 4179 return XML_ERROR_DUPLICATE_ATTRIBUTE; 4180 } 4181 if (! step) 4182 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); 4183 j < step ? (j += nsAttsSize - step) : (j -= step); 4184 } 4185 } 4186 4187 if (parser->m_ns_triplets) { /* append namespace separator and prefix */ 4188 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; 4189 s = b->prefix->name; 4190 do { 4191 if (! poolAppendChar(&parser->m_tempPool, *s)) 4192 return XML_ERROR_NO_MEMORY; 4193 } while (*s++); 4194 } 4195 4196 /* store expanded name in attribute list */ 4197 s = poolStart(&parser->m_tempPool); 4198 poolFinish(&parser->m_tempPool); 4199 appAtts[i] = s; 4200 4201 /* fill empty slot with new version, uriName and hash value */ 4202 parser->m_nsAtts[j].version = version; 4203 parser->m_nsAtts[j].hash = uriHash; 4204 parser->m_nsAtts[j].uriName = s; 4205 4206 if (! --nPrefixes) { 4207 i += 2; 4208 break; 4209 } 4210 } else /* not prefixed */ 4211 ((XML_Char *)s)[-1] = 0; /* clear flag */ 4212 } 4213 } 4214 /* clear flags for the remaining attributes */ 4215 for (; i < attIndex; i += 2) 4216 ((XML_Char *)(appAtts[i]))[-1] = 0; 4217 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) 4218 binding->attId->name[-1] = 0; 4219 4220 if (! parser->m_ns) 4221 return XML_ERROR_NONE; 4222 4223 /* expand the element type name */ 4224 if (elementType->prefix) { 4225 binding = elementType->prefix->binding; 4226 if (! binding) 4227 return XML_ERROR_UNBOUND_PREFIX; 4228 localPart = tagNamePtr->str; 4229 while (*localPart++ != XML_T(ASCII_COLON)) 4230 ; 4231 } else if (dtd->defaultPrefix.binding) { 4232 binding = dtd->defaultPrefix.binding; 4233 localPart = tagNamePtr->str; 4234 } else 4235 return XML_ERROR_NONE; 4236 prefixLen = 0; 4237 if (parser->m_ns_triplets && binding->prefix->name) { 4238 while (binding->prefix->name[prefixLen++]) 4239 ; /* prefixLen includes null terminator */ 4240 } 4241 tagNamePtr->localPart = localPart; 4242 tagNamePtr->uriLen = binding->uriLen; 4243 tagNamePtr->prefix = binding->prefix->name; 4244 tagNamePtr->prefixLen = prefixLen; 4245 for (i = 0; localPart[i++];) 4246 ; /* i includes null terminator */ 4247 4248 /* Detect and prevent integer overflow */ 4249 if (binding->uriLen > INT_MAX - prefixLen 4250 || i > INT_MAX - (binding->uriLen + prefixLen)) { 4251 return XML_ERROR_NO_MEMORY; 4252 } 4253 4254 n = i + binding->uriLen + prefixLen; 4255 if (n > binding->uriAlloc) { 4256 TAG *p; 4257 4258 /* Detect and prevent integer overflow */ 4259 if (n > INT_MAX - EXPAND_SPARE) { 4260 return XML_ERROR_NO_MEMORY; 4261 } 4262 /* Detect and prevent integer overflow. 4263 * The preprocessor guard addresses the "always false" warning 4264 * from -Wtype-limits on platforms where 4265 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4266 #if UINT_MAX >= SIZE_MAX 4267 if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { 4268 return XML_ERROR_NO_MEMORY; 4269 } 4270 #endif 4271 4272 uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); 4273 if (! uri) 4274 return XML_ERROR_NO_MEMORY; 4275 binding->uriAlloc = n + EXPAND_SPARE; 4276 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); 4277 for (p = parser->m_tagStack; p; p = p->parent) 4278 if (p->name.str == binding->uri) 4279 p->name.str = uri; 4280 FREE(parser, binding->uri); 4281 binding->uri = uri; 4282 } 4283 /* if m_namespaceSeparator != '\0' then uri includes it already */ 4284 uri = binding->uri + binding->uriLen; 4285 memcpy(uri, localPart, i * sizeof(XML_Char)); 4286 /* we always have a namespace separator between localPart and prefix */ 4287 if (prefixLen) { 4288 uri += i - 1; 4289 *uri = parser->m_namespaceSeparator; /* replace null terminator */ 4290 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); 4291 } 4292 tagNamePtr->str = binding->uri; 4293 return XML_ERROR_NONE; 4294 } 4295 4296 static XML_Bool 4297 is_rfc3986_uri_char(XML_Char candidate) { 4298 // For the RFC 3986 ANBF grammar see 4299 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A 4300 4301 switch (candidate) { 4302 // From rule "ALPHA" (uppercase half) 4303 case 'A': 4304 case 'B': 4305 case 'C': 4306 case 'D': 4307 case 'E': 4308 case 'F': 4309 case 'G': 4310 case 'H': 4311 case 'I': 4312 case 'J': 4313 case 'K': 4314 case 'L': 4315 case 'M': 4316 case 'N': 4317 case 'O': 4318 case 'P': 4319 case 'Q': 4320 case 'R': 4321 case 'S': 4322 case 'T': 4323 case 'U': 4324 case 'V': 4325 case 'W': 4326 case 'X': 4327 case 'Y': 4328 case 'Z': 4329 4330 // From rule "ALPHA" (lowercase half) 4331 case 'a': 4332 case 'b': 4333 case 'c': 4334 case 'd': 4335 case 'e': 4336 case 'f': 4337 case 'g': 4338 case 'h': 4339 case 'i': 4340 case 'j': 4341 case 'k': 4342 case 'l': 4343 case 'm': 4344 case 'n': 4345 case 'o': 4346 case 'p': 4347 case 'q': 4348 case 'r': 4349 case 's': 4350 case 't': 4351 case 'u': 4352 case 'v': 4353 case 'w': 4354 case 'x': 4355 case 'y': 4356 case 'z': 4357 4358 // From rule "DIGIT" 4359 case '0': 4360 case '1': 4361 case '2': 4362 case '3': 4363 case '4': 4364 case '5': 4365 case '6': 4366 case '7': 4367 case '8': 4368 case '9': 4369 4370 // From rule "pct-encoded" 4371 case '%': 4372 4373 // From rule "unreserved" 4374 case '-': 4375 case '.': 4376 case '_': 4377 case '~': 4378 4379 // From rule "gen-delims" 4380 case ':': 4381 case '/': 4382 case '?': 4383 case '#': 4384 case '[': 4385 case ']': 4386 case '@': 4387 4388 // From rule "sub-delims" 4389 case '!': 4390 case '$': 4391 case '&': 4392 case '\'': 4393 case '(': 4394 case ')': 4395 case '*': 4396 case '+': 4397 case ',': 4398 case ';': 4399 case '=': 4400 return XML_TRUE; 4401 4402 default: 4403 return XML_FALSE; 4404 } 4405 } 4406 4407 /* addBinding() overwrites the value of prefix->binding without checking. 4408 Therefore one must keep track of the old value outside of addBinding(). 4409 */ 4410 static enum XML_Error 4411 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, 4412 const XML_Char *uri, BINDING **bindingsPtr) { 4413 // "http://www.w3.org/XML/1998/namespace" 4414 static const XML_Char xmlNamespace[] 4415 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, 4416 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, 4417 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, 4418 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, 4419 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, 4420 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, 4421 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, 4422 ASCII_e, '\0'}; 4423 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; 4424 // "http://www.w3.org/2000/xmlns/" 4425 static const XML_Char xmlnsNamespace[] 4426 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 4427 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, 4428 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, 4429 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, 4430 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; 4431 static const int xmlnsLen 4432 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; 4433 4434 XML_Bool mustBeXML = XML_FALSE; 4435 XML_Bool isXML = XML_TRUE; 4436 XML_Bool isXMLNS = XML_TRUE; 4437 4438 BINDING *b; 4439 int len; 4440 4441 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ 4442 if (*uri == XML_T('\0') && prefix->name) 4443 return XML_ERROR_UNDECLARING_PREFIX; 4444 4445 if (prefix->name && prefix->name[0] == XML_T(ASCII_x) 4446 && prefix->name[1] == XML_T(ASCII_m) 4447 && prefix->name[2] == XML_T(ASCII_l)) { 4448 /* Not allowed to bind xmlns */ 4449 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) 4450 && prefix->name[5] == XML_T('\0')) 4451 return XML_ERROR_RESERVED_PREFIX_XMLNS; 4452 4453 if (prefix->name[3] == XML_T('\0')) 4454 mustBeXML = XML_TRUE; 4455 } 4456 4457 for (len = 0; uri[len]; len++) { 4458 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) 4459 isXML = XML_FALSE; 4460 4461 if (! mustBeXML && isXMLNS 4462 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) 4463 isXMLNS = XML_FALSE; 4464 4465 // NOTE: While Expat does not validate namespace URIs against RFC 3986 4466 // today (and is not REQUIRED to do so with regard to the XML 1.0 4467 // namespaces specification) we have to at least make sure, that 4468 // the application on top of Expat (that is likely splitting expanded 4469 // element names ("qualified names") of form 4470 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces 4471 // in its element handler code) cannot be confused by an attacker 4472 // putting additional namespace separator characters into namespace 4473 // declarations. That would be ambiguous and not to be expected. 4474 // 4475 // While the HTML API docs of function XML_ParserCreateNS have been 4476 // advising against use of a namespace separator character that can 4477 // appear in a URI for >20 years now, some widespread applications 4478 // are using URI characters (':' (colon) in particular) for a 4479 // namespace separator, in practice. To keep these applications 4480 // functional, we only reject namespaces URIs containing the 4481 // application-chosen namespace separator if the chosen separator 4482 // is a non-URI character with regard to RFC 3986. 4483 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) 4484 && ! is_rfc3986_uri_char(uri[len])) { 4485 return XML_ERROR_SYNTAX; 4486 } 4487 } 4488 isXML = isXML && len == xmlLen; 4489 isXMLNS = isXMLNS && len == xmlnsLen; 4490 4491 if (mustBeXML != isXML) 4492 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML 4493 : XML_ERROR_RESERVED_NAMESPACE_URI; 4494 4495 if (isXMLNS) 4496 return XML_ERROR_RESERVED_NAMESPACE_URI; 4497 4498 if (parser->m_namespaceSeparator) 4499 len++; 4500 if (parser->m_freeBindingList) { 4501 b = parser->m_freeBindingList; 4502 if (len > b->uriAlloc) { 4503 /* Detect and prevent integer overflow */ 4504 if (len > INT_MAX - EXPAND_SPARE) { 4505 return XML_ERROR_NO_MEMORY; 4506 } 4507 4508 /* Detect and prevent integer overflow. 4509 * The preprocessor guard addresses the "always false" warning 4510 * from -Wtype-limits on platforms where 4511 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4512 #if UINT_MAX >= SIZE_MAX 4513 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { 4514 return XML_ERROR_NO_MEMORY; 4515 } 4516 #endif 4517 4518 XML_Char *temp 4519 = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4520 if (temp == NULL) 4521 return XML_ERROR_NO_MEMORY; 4522 b->uri = temp; 4523 b->uriAlloc = len + EXPAND_SPARE; 4524 } 4525 parser->m_freeBindingList = b->nextTagBinding; 4526 } else { 4527 b = MALLOC(parser, sizeof(BINDING)); 4528 if (! b) 4529 return XML_ERROR_NO_MEMORY; 4530 4531 /* Detect and prevent integer overflow */ 4532 if (len > INT_MAX - EXPAND_SPARE) { 4533 return XML_ERROR_NO_MEMORY; 4534 } 4535 /* Detect and prevent integer overflow. 4536 * The preprocessor guard addresses the "always false" warning 4537 * from -Wtype-limits on platforms where 4538 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4539 #if UINT_MAX >= SIZE_MAX 4540 if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { 4541 return XML_ERROR_NO_MEMORY; 4542 } 4543 #endif 4544 4545 b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4546 if (! b->uri) { 4547 FREE(parser, b); 4548 return XML_ERROR_NO_MEMORY; 4549 } 4550 b->uriAlloc = len + EXPAND_SPARE; 4551 } 4552 b->uriLen = len; 4553 memcpy(b->uri, uri, len * sizeof(XML_Char)); 4554 if (parser->m_namespaceSeparator) 4555 b->uri[len - 1] = parser->m_namespaceSeparator; 4556 b->prefix = prefix; 4557 b->attId = attId; 4558 b->prevPrefixBinding = prefix->binding; 4559 /* NULL binding when default namespace undeclared */ 4560 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) 4561 prefix->binding = NULL; 4562 else 4563 prefix->binding = b; 4564 b->nextTagBinding = *bindingsPtr; 4565 *bindingsPtr = b; 4566 /* if attId == NULL then we are not starting a namespace scope */ 4567 if (attId && parser->m_startNamespaceDeclHandler) 4568 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, 4569 prefix->binding ? uri : 0); 4570 return XML_ERROR_NONE; 4571 } 4572 4573 /* The idea here is to avoid using stack for each CDATA section when 4574 the whole file is parsed with one call. 4575 */ 4576 static enum XML_Error PTRCALL 4577 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, 4578 const char **endPtr) { 4579 enum XML_Error result = doCdataSection( 4580 parser, parser->m_encoding, &start, end, endPtr, 4581 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 4582 if (result != XML_ERROR_NONE) 4583 return result; 4584 if (start) { 4585 if (parser->m_parentParser) { /* we are parsing an external entity */ 4586 parser->m_processor = externalEntityContentProcessor; 4587 return externalEntityContentProcessor(parser, start, end, endPtr); 4588 } else { 4589 parser->m_processor = contentProcessor; 4590 return contentProcessor(parser, start, end, endPtr); 4591 } 4592 } 4593 return result; 4594 } 4595 4596 /* startPtr gets set to non-null if the section is closed, and to null if 4597 the section is not yet closed. 4598 */ 4599 static enum XML_Error 4600 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4601 const char *end, const char **nextPtr, XML_Bool haveMore, 4602 enum XML_Account account) { 4603 const char *s = *startPtr; 4604 const char **eventPP; 4605 const char **eventEndPP; 4606 if (enc == parser->m_encoding) { 4607 eventPP = &parser->m_eventPtr; 4608 *eventPP = s; 4609 eventEndPP = &parser->m_eventEndPtr; 4610 } else { 4611 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4612 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4613 } 4614 *eventPP = s; 4615 *startPtr = NULL; 4616 4617 for (;;) { 4618 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4619 int tok = XmlCdataSectionTok(enc, s, end, &next); 4620 #if XML_GE == 1 4621 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4622 accountingOnAbort(parser); 4623 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4624 } 4625 #else 4626 UNUSED_P(account); 4627 #endif 4628 *eventEndPP = next; 4629 switch (tok) { 4630 case XML_TOK_CDATA_SECT_CLOSE: 4631 if (parser->m_endCdataSectionHandler) 4632 parser->m_endCdataSectionHandler(parser->m_handlerArg); 4633 /* BEGIN disabled code */ 4634 /* see comment under XML_TOK_CDATA_SECT_OPEN */ 4635 else if ((0) && parser->m_characterDataHandler) 4636 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4637 0); 4638 /* END disabled code */ 4639 else if (parser->m_defaultHandler) 4640 reportDefault(parser, enc, s, next); 4641 *startPtr = next; 4642 *nextPtr = next; 4643 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4644 return XML_ERROR_ABORTED; 4645 else 4646 return XML_ERROR_NONE; 4647 case XML_TOK_DATA_NEWLINE: 4648 if (parser->m_characterDataHandler) { 4649 XML_Char c = 0xA; 4650 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 4651 } else if (parser->m_defaultHandler) 4652 reportDefault(parser, enc, s, next); 4653 break; 4654 case XML_TOK_DATA_CHARS: { 4655 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 4656 if (charDataHandler) { 4657 if (MUST_CONVERT(enc, s)) { 4658 for (;;) { 4659 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 4660 const enum XML_Convert_Result convert_res = XmlConvert( 4661 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 4662 *eventEndPP = next; 4663 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4664 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 4665 if ((convert_res == XML_CONVERT_COMPLETED) 4666 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 4667 break; 4668 *eventPP = s; 4669 } 4670 } else 4671 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 4672 (int)((const XML_Char *)next - (const XML_Char *)s)); 4673 } else if (parser->m_defaultHandler) 4674 reportDefault(parser, enc, s, next); 4675 } break; 4676 case XML_TOK_INVALID: 4677 *eventPP = next; 4678 return XML_ERROR_INVALID_TOKEN; 4679 case XML_TOK_PARTIAL_CHAR: 4680 if (haveMore) { 4681 *nextPtr = s; 4682 return XML_ERROR_NONE; 4683 } 4684 return XML_ERROR_PARTIAL_CHAR; 4685 case XML_TOK_PARTIAL: 4686 case XML_TOK_NONE: 4687 if (haveMore) { 4688 *nextPtr = s; 4689 return XML_ERROR_NONE; 4690 } 4691 return XML_ERROR_UNCLOSED_CDATA_SECTION; 4692 default: 4693 /* Every token returned by XmlCdataSectionTok() has its own 4694 * explicit case, so this default case will never be executed. 4695 * We retain it as a safety net and exclude it from the coverage 4696 * statistics. 4697 * 4698 * LCOV_EXCL_START 4699 */ 4700 *eventPP = next; 4701 return XML_ERROR_UNEXPECTED_STATE; 4702 /* LCOV_EXCL_STOP */ 4703 } 4704 4705 switch (parser->m_parsingStatus.parsing) { 4706 case XML_SUSPENDED: 4707 *eventPP = next; 4708 *nextPtr = next; 4709 return XML_ERROR_NONE; 4710 case XML_FINISHED: 4711 *eventPP = next; 4712 return XML_ERROR_ABORTED; 4713 case XML_PARSING: 4714 if (parser->m_reenter) { 4715 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 4716 } 4717 /* Fall through */ 4718 default:; 4719 *eventPP = s = next; 4720 } 4721 } 4722 /* not reached */ 4723 } 4724 4725 #ifdef XML_DTD 4726 4727 /* The idea here is to avoid using stack for each IGNORE section when 4728 the whole file is parsed with one call. 4729 */ 4730 static enum XML_Error PTRCALL 4731 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, 4732 const char **endPtr) { 4733 enum XML_Error result 4734 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, 4735 (XML_Bool)! parser->m_parsingStatus.finalBuffer); 4736 if (result != XML_ERROR_NONE) 4737 return result; 4738 if (start) { 4739 parser->m_processor = prologProcessor; 4740 return prologProcessor(parser, start, end, endPtr); 4741 } 4742 return result; 4743 } 4744 4745 /* startPtr gets set to non-null is the section is closed, and to null 4746 if the section is not yet closed. 4747 */ 4748 static enum XML_Error 4749 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4750 const char *end, const char **nextPtr, XML_Bool haveMore) { 4751 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4752 int tok; 4753 const char *s = *startPtr; 4754 const char **eventPP; 4755 const char **eventEndPP; 4756 if (enc == parser->m_encoding) { 4757 eventPP = &parser->m_eventPtr; 4758 *eventPP = s; 4759 eventEndPP = &parser->m_eventEndPtr; 4760 } else { 4761 /* It's not entirely clear, but it seems the following two lines 4762 * of code cannot be executed. The only occasions on which 'enc' 4763 * is not 'encoding' are when this function is called 4764 * from the internal entity processing, and IGNORE sections are an 4765 * error in internal entities. 4766 * 4767 * Since it really isn't clear that this is true, we keep the code 4768 * and just remove it from our coverage tests. 4769 * 4770 * LCOV_EXCL_START 4771 */ 4772 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4773 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4774 /* LCOV_EXCL_STOP */ 4775 } 4776 *eventPP = s; 4777 *startPtr = NULL; 4778 tok = XmlIgnoreSectionTok(enc, s, end, &next); 4779 # if XML_GE == 1 4780 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4781 XML_ACCOUNT_DIRECT)) { 4782 accountingOnAbort(parser); 4783 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4784 } 4785 # endif 4786 *eventEndPP = next; 4787 switch (tok) { 4788 case XML_TOK_IGNORE_SECT: 4789 if (parser->m_defaultHandler) 4790 reportDefault(parser, enc, s, next); 4791 *startPtr = next; 4792 *nextPtr = next; 4793 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4794 return XML_ERROR_ABORTED; 4795 else 4796 return XML_ERROR_NONE; 4797 case XML_TOK_INVALID: 4798 *eventPP = next; 4799 return XML_ERROR_INVALID_TOKEN; 4800 case XML_TOK_PARTIAL_CHAR: 4801 if (haveMore) { 4802 *nextPtr = s; 4803 return XML_ERROR_NONE; 4804 } 4805 return XML_ERROR_PARTIAL_CHAR; 4806 case XML_TOK_PARTIAL: 4807 case XML_TOK_NONE: 4808 if (haveMore) { 4809 *nextPtr = s; 4810 return XML_ERROR_NONE; 4811 } 4812 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ 4813 default: 4814 /* All of the tokens that XmlIgnoreSectionTok() returns have 4815 * explicit cases to handle them, so this default case is never 4816 * executed. We keep it as a safety net anyway, and remove it 4817 * from our test coverage statistics. 4818 * 4819 * LCOV_EXCL_START 4820 */ 4821 *eventPP = next; 4822 return XML_ERROR_UNEXPECTED_STATE; 4823 /* LCOV_EXCL_STOP */ 4824 } 4825 /* not reached */ 4826 } 4827 4828 #endif /* XML_DTD */ 4829 4830 static enum XML_Error 4831 initializeEncoding(XML_Parser parser) { 4832 const char *s; 4833 #ifdef XML_UNICODE 4834 char encodingBuf[128]; 4835 /* See comments about `protocolEncodingName` in parserInit() */ 4836 if (! parser->m_protocolEncodingName) 4837 s = NULL; 4838 else { 4839 int i; 4840 for (i = 0; parser->m_protocolEncodingName[i]; i++) { 4841 if (i == sizeof(encodingBuf) - 1 4842 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { 4843 encodingBuf[0] = '\0'; 4844 break; 4845 } 4846 encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; 4847 } 4848 encodingBuf[i] = '\0'; 4849 s = encodingBuf; 4850 } 4851 #else 4852 s = parser->m_protocolEncodingName; 4853 #endif 4854 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( 4855 &parser->m_initEncoding, &parser->m_encoding, s)) 4856 return XML_ERROR_NONE; 4857 return handleUnknownEncoding(parser, parser->m_protocolEncodingName); 4858 } 4859 4860 static enum XML_Error 4861 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, 4862 const char *next) { 4863 const char *encodingName = NULL; 4864 const XML_Char *storedEncName = NULL; 4865 const ENCODING *newEncoding = NULL; 4866 const char *version = NULL; 4867 const char *versionend = NULL; 4868 const XML_Char *storedversion = NULL; 4869 int standalone = -1; 4870 4871 #if XML_GE == 1 4872 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, 4873 XML_ACCOUNT_DIRECT)) { 4874 accountingOnAbort(parser); 4875 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4876 } 4877 #endif 4878 4879 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( 4880 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, 4881 &version, &versionend, &encodingName, &newEncoding, &standalone)) { 4882 if (isGeneralTextEntity) 4883 return XML_ERROR_TEXT_DECL; 4884 else 4885 return XML_ERROR_XML_DECL; 4886 } 4887 if (! isGeneralTextEntity && standalone == 1) { 4888 parser->m_dtd->standalone = XML_TRUE; 4889 #ifdef XML_DTD 4890 if (parser->m_paramEntityParsing 4891 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 4892 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 4893 #endif /* XML_DTD */ 4894 } 4895 if (parser->m_xmlDeclHandler) { 4896 if (encodingName != NULL) { 4897 storedEncName = poolStoreString( 4898 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4899 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4900 if (! storedEncName) 4901 return XML_ERROR_NO_MEMORY; 4902 poolFinish(&parser->m_temp2Pool); 4903 } 4904 if (version) { 4905 storedversion 4906 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, 4907 versionend - parser->m_encoding->minBytesPerChar); 4908 if (! storedversion) 4909 return XML_ERROR_NO_MEMORY; 4910 } 4911 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, 4912 standalone); 4913 } else if (parser->m_defaultHandler) 4914 reportDefault(parser, parser->m_encoding, s, next); 4915 if (parser->m_protocolEncodingName == NULL) { 4916 if (newEncoding) { 4917 /* Check that the specified encoding does not conflict with what 4918 * the parser has already deduced. Do we have the same number 4919 * of bytes in the smallest representation of a character? If 4920 * this is UTF-16, is it the same endianness? 4921 */ 4922 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar 4923 || (newEncoding->minBytesPerChar == 2 4924 && newEncoding != parser->m_encoding)) { 4925 parser->m_eventPtr = encodingName; 4926 return XML_ERROR_INCORRECT_ENCODING; 4927 } 4928 parser->m_encoding = newEncoding; 4929 } else if (encodingName) { 4930 enum XML_Error result; 4931 if (! storedEncName) { 4932 storedEncName = poolStoreString( 4933 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4934 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4935 if (! storedEncName) 4936 return XML_ERROR_NO_MEMORY; 4937 } 4938 result = handleUnknownEncoding(parser, storedEncName); 4939 poolClear(&parser->m_temp2Pool); 4940 if (result == XML_ERROR_UNKNOWN_ENCODING) 4941 parser->m_eventPtr = encodingName; 4942 return result; 4943 } 4944 } 4945 4946 if (storedEncName || storedversion) 4947 poolClear(&parser->m_temp2Pool); 4948 4949 return XML_ERROR_NONE; 4950 } 4951 4952 static enum XML_Error 4953 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { 4954 if (parser->m_unknownEncodingHandler) { 4955 XML_Encoding info; 4956 int i; 4957 for (i = 0; i < 256; i++) 4958 info.map[i] = -1; 4959 info.convert = NULL; 4960 info.data = NULL; 4961 info.release = NULL; 4962 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, 4963 encodingName, &info)) { 4964 ENCODING *enc; 4965 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); 4966 if (! parser->m_unknownEncodingMem) { 4967 if (info.release) 4968 info.release(info.data); 4969 return XML_ERROR_NO_MEMORY; 4970 } 4971 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( 4972 parser->m_unknownEncodingMem, info.map, info.convert, info.data); 4973 if (enc) { 4974 parser->m_unknownEncodingData = info.data; 4975 parser->m_unknownEncodingRelease = info.release; 4976 parser->m_encoding = enc; 4977 return XML_ERROR_NONE; 4978 } 4979 } 4980 if (info.release != NULL) 4981 info.release(info.data); 4982 } 4983 return XML_ERROR_UNKNOWN_ENCODING; 4984 } 4985 4986 static enum XML_Error PTRCALL 4987 prologInitProcessor(XML_Parser parser, const char *s, const char *end, 4988 const char **nextPtr) { 4989 enum XML_Error result = initializeEncoding(parser); 4990 if (result != XML_ERROR_NONE) 4991 return result; 4992 parser->m_processor = prologProcessor; 4993 return prologProcessor(parser, s, end, nextPtr); 4994 } 4995 4996 #ifdef XML_DTD 4997 4998 static enum XML_Error PTRCALL 4999 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, 5000 const char **nextPtr) { 5001 enum XML_Error result = initializeEncoding(parser); 5002 if (result != XML_ERROR_NONE) 5003 return result; 5004 5005 /* we know now that XML_Parse(Buffer) has been called, 5006 so we consider the external parameter entity read */ 5007 parser->m_dtd->paramEntityRead = XML_TRUE; 5008 5009 if (parser->m_prologState.inEntityValue) { 5010 parser->m_processor = entityValueInitProcessor; 5011 return entityValueInitProcessor(parser, s, end, nextPtr); 5012 } else { 5013 parser->m_processor = externalParEntProcessor; 5014 return externalParEntProcessor(parser, s, end, nextPtr); 5015 } 5016 } 5017 5018 static enum XML_Error PTRCALL 5019 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, 5020 const char **nextPtr) { 5021 int tok; 5022 const char *start = s; 5023 const char *next = start; 5024 parser->m_eventPtr = start; 5025 5026 for (;;) { 5027 tok = XmlPrologTok(parser->m_encoding, start, end, &next); 5028 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: 5029 - storeEntityValue 5030 - processXmlDecl 5031 */ 5032 parser->m_eventEndPtr = next; 5033 if (tok <= 0) { 5034 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 5035 *nextPtr = s; 5036 return XML_ERROR_NONE; 5037 } 5038 switch (tok) { 5039 case XML_TOK_INVALID: 5040 return XML_ERROR_INVALID_TOKEN; 5041 case XML_TOK_PARTIAL: 5042 return XML_ERROR_UNCLOSED_TOKEN; 5043 case XML_TOK_PARTIAL_CHAR: 5044 return XML_ERROR_PARTIAL_CHAR; 5045 case XML_TOK_NONE: /* start == end */ 5046 default: 5047 break; 5048 } 5049 /* found end of entity value - can store it now */ 5050 return storeEntityValue(parser, parser->m_encoding, s, end, 5051 XML_ACCOUNT_DIRECT, NULL); 5052 } else if (tok == XML_TOK_XML_DECL) { 5053 enum XML_Error result; 5054 result = processXmlDecl(parser, 0, start, next); 5055 if (result != XML_ERROR_NONE) 5056 return result; 5057 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For 5058 * that to happen, a parameter entity parsing handler must have attempted 5059 * to suspend the parser, which fails and raises an error. The parser can 5060 * be aborted, but can't be suspended. 5061 */ 5062 if (parser->m_parsingStatus.parsing == XML_FINISHED) 5063 return XML_ERROR_ABORTED; 5064 *nextPtr = next; 5065 /* stop scanning for text declaration - we found one */ 5066 parser->m_processor = entityValueProcessor; 5067 return entityValueProcessor(parser, next, end, nextPtr); 5068 } 5069 /* XmlPrologTok has now set the encoding based on the BOM it found, and we 5070 must move s and nextPtr forward to consume the BOM. 5071 5072 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we 5073 would leave the BOM in the buffer and return. On the next call to this 5074 function, our XmlPrologTok call would return XML_TOK_INVALID, since it 5075 is not valid to have multiple BOMs. 5076 */ 5077 else if (tok == XML_TOK_BOM) { 5078 # if XML_GE == 1 5079 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 5080 XML_ACCOUNT_DIRECT)) { 5081 accountingOnAbort(parser); 5082 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5083 } 5084 # endif 5085 5086 *nextPtr = next; 5087 s = next; 5088 } 5089 /* If we get this token, we have the start of what might be a 5090 normal tag, but not a declaration (i.e. it doesn't begin with 5091 "<!" or "<?"). In a DTD context, that isn't legal. 5092 */ 5093 else if (tok == XML_TOK_INSTANCE_START) { 5094 *nextPtr = next; 5095 return XML_ERROR_SYNTAX; 5096 } 5097 start = next; 5098 parser->m_eventPtr = start; 5099 } 5100 } 5101 5102 static enum XML_Error PTRCALL 5103 externalParEntProcessor(XML_Parser parser, const char *s, const char *end, 5104 const char **nextPtr) { 5105 const char *next = s; 5106 int tok; 5107 5108 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5109 if (tok <= 0) { 5110 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 5111 *nextPtr = s; 5112 return XML_ERROR_NONE; 5113 } 5114 switch (tok) { 5115 case XML_TOK_INVALID: 5116 return XML_ERROR_INVALID_TOKEN; 5117 case XML_TOK_PARTIAL: 5118 return XML_ERROR_UNCLOSED_TOKEN; 5119 case XML_TOK_PARTIAL_CHAR: 5120 return XML_ERROR_PARTIAL_CHAR; 5121 case XML_TOK_NONE: /* start == end */ 5122 default: 5123 break; 5124 } 5125 } 5126 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. 5127 However, when parsing an external subset, doProlog will not accept a BOM 5128 as valid, and report a syntax error, so we have to skip the BOM, and 5129 account for the BOM bytes. 5130 */ 5131 else if (tok == XML_TOK_BOM) { 5132 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 5133 XML_ACCOUNT_DIRECT)) { 5134 accountingOnAbort(parser); 5135 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5136 } 5137 5138 s = next; 5139 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5140 } 5141 5142 parser->m_processor = prologProcessor; 5143 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 5144 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 5145 XML_ACCOUNT_DIRECT); 5146 } 5147 5148 static enum XML_Error PTRCALL 5149 entityValueProcessor(XML_Parser parser, const char *s, const char *end, 5150 const char **nextPtr) { 5151 const char *start = s; 5152 const char *next = s; 5153 const ENCODING *enc = parser->m_encoding; 5154 int tok; 5155 5156 for (;;) { 5157 tok = XmlPrologTok(enc, start, end, &next); 5158 /* Note: These bytes are accounted later in: 5159 - storeEntityValue 5160 */ 5161 if (tok <= 0) { 5162 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 5163 *nextPtr = s; 5164 return XML_ERROR_NONE; 5165 } 5166 switch (tok) { 5167 case XML_TOK_INVALID: 5168 return XML_ERROR_INVALID_TOKEN; 5169 case XML_TOK_PARTIAL: 5170 return XML_ERROR_UNCLOSED_TOKEN; 5171 case XML_TOK_PARTIAL_CHAR: 5172 return XML_ERROR_PARTIAL_CHAR; 5173 case XML_TOK_NONE: /* start == end */ 5174 default: 5175 break; 5176 } 5177 /* found end of entity value - can store it now */ 5178 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL); 5179 } 5180 /* If we get this token, we have the start of what might be a 5181 normal tag, but not a declaration (i.e. it doesn't begin with 5182 "<!" or "<?"). In a DTD context, that isn't legal. 5183 */ 5184 else if (tok == XML_TOK_INSTANCE_START) { 5185 *nextPtr = next; 5186 return XML_ERROR_SYNTAX; 5187 } 5188 5189 start = next; 5190 } 5191 } 5192 5193 #endif /* XML_DTD */ 5194 5195 static enum XML_Error PTRCALL 5196 prologProcessor(XML_Parser parser, const char *s, const char *end, 5197 const char **nextPtr) { 5198 const char *next = s; 5199 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5200 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 5201 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 5202 XML_ACCOUNT_DIRECT); 5203 } 5204 5205 static enum XML_Error 5206 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, 5207 int tok, const char *next, const char **nextPtr, XML_Bool haveMore, 5208 XML_Bool allowClosingDoctype, enum XML_Account account) { 5209 #ifdef XML_DTD 5210 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; 5211 #endif /* XML_DTD */ 5212 static const XML_Char atypeCDATA[] 5213 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 5214 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; 5215 static const XML_Char atypeIDREF[] 5216 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; 5217 static const XML_Char atypeIDREFS[] 5218 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; 5219 static const XML_Char atypeENTITY[] 5220 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; 5221 static const XML_Char atypeENTITIES[] 5222 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, 5223 ASCII_I, ASCII_E, ASCII_S, '\0'}; 5224 static const XML_Char atypeNMTOKEN[] 5225 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; 5226 static const XML_Char atypeNMTOKENS[] 5227 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, 5228 ASCII_E, ASCII_N, ASCII_S, '\0'}; 5229 static const XML_Char notationPrefix[] 5230 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, 5231 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; 5232 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; 5233 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; 5234 5235 #ifndef XML_DTD 5236 UNUSED_P(account); 5237 #endif 5238 5239 /* save one level of indirection */ 5240 DTD *const dtd = parser->m_dtd; 5241 5242 const char **eventPP; 5243 const char **eventEndPP; 5244 enum XML_Content_Quant quant; 5245 5246 if (enc == parser->m_encoding) { 5247 eventPP = &parser->m_eventPtr; 5248 eventEndPP = &parser->m_eventEndPtr; 5249 } else { 5250 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 5251 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 5252 } 5253 5254 for (;;) { 5255 int role; 5256 XML_Bool handleDefault = XML_TRUE; 5257 *eventPP = s; 5258 *eventEndPP = next; 5259 if (tok <= 0) { 5260 if (haveMore && tok != XML_TOK_INVALID) { 5261 *nextPtr = s; 5262 return XML_ERROR_NONE; 5263 } 5264 switch (tok) { 5265 case XML_TOK_INVALID: 5266 *eventPP = next; 5267 return XML_ERROR_INVALID_TOKEN; 5268 case XML_TOK_PARTIAL: 5269 return XML_ERROR_UNCLOSED_TOKEN; 5270 case XML_TOK_PARTIAL_CHAR: 5271 return XML_ERROR_PARTIAL_CHAR; 5272 case -XML_TOK_PROLOG_S: 5273 tok = -tok; 5274 break; 5275 case XML_TOK_NONE: 5276 #ifdef XML_DTD 5277 /* for internal PE NOT referenced between declarations */ 5278 if (enc != parser->m_encoding 5279 && ! parser->m_openInternalEntities->betweenDecl) { 5280 *nextPtr = s; 5281 return XML_ERROR_NONE; 5282 } 5283 /* WFC: PE Between Declarations - must check that PE contains 5284 complete markup, not only for external PEs, but also for 5285 internal PEs if the reference occurs between declarations. 5286 */ 5287 if (parser->m_isParamEntity || enc != parser->m_encoding) { 5288 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) 5289 == XML_ROLE_ERROR) 5290 return XML_ERROR_INCOMPLETE_PE; 5291 *nextPtr = s; 5292 return XML_ERROR_NONE; 5293 } 5294 #endif /* XML_DTD */ 5295 return XML_ERROR_NO_ELEMENTS; 5296 default: 5297 tok = -tok; 5298 next = end; 5299 break; 5300 } 5301 } 5302 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); 5303 #if XML_GE == 1 5304 switch (role) { 5305 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor 5306 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl 5307 # ifdef XML_DTD 5308 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl 5309 # endif 5310 break; 5311 default: 5312 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 5313 accountingOnAbort(parser); 5314 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5315 } 5316 } 5317 #endif 5318 switch (role) { 5319 case XML_ROLE_XML_DECL: { 5320 enum XML_Error result = processXmlDecl(parser, 0, s, next); 5321 if (result != XML_ERROR_NONE) 5322 return result; 5323 enc = parser->m_encoding; 5324 handleDefault = XML_FALSE; 5325 } break; 5326 case XML_ROLE_DOCTYPE_NAME: 5327 if (parser->m_startDoctypeDeclHandler) { 5328 parser->m_doctypeName 5329 = poolStoreString(&parser->m_tempPool, enc, s, next); 5330 if (! parser->m_doctypeName) 5331 return XML_ERROR_NO_MEMORY; 5332 poolFinish(&parser->m_tempPool); 5333 parser->m_doctypePubid = NULL; 5334 handleDefault = XML_FALSE; 5335 } 5336 parser->m_doctypeSysid = NULL; /* always initialize to NULL */ 5337 break; 5338 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: 5339 if (parser->m_startDoctypeDeclHandler) { 5340 parser->m_startDoctypeDeclHandler( 5341 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 5342 parser->m_doctypePubid, 1); 5343 parser->m_doctypeName = NULL; 5344 poolClear(&parser->m_tempPool); 5345 handleDefault = XML_FALSE; 5346 } 5347 break; 5348 #ifdef XML_DTD 5349 case XML_ROLE_TEXT_DECL: { 5350 enum XML_Error result = processXmlDecl(parser, 1, s, next); 5351 if (result != XML_ERROR_NONE) 5352 return result; 5353 enc = parser->m_encoding; 5354 handleDefault = XML_FALSE; 5355 } break; 5356 #endif /* XML_DTD */ 5357 case XML_ROLE_DOCTYPE_PUBLIC_ID: 5358 #ifdef XML_DTD 5359 parser->m_useForeignDTD = XML_FALSE; 5360 parser->m_declEntity = (ENTITY *)lookup( 5361 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5362 if (! parser->m_declEntity) 5363 return XML_ERROR_NO_MEMORY; 5364 #endif /* XML_DTD */ 5365 dtd->hasParamEntityRefs = XML_TRUE; 5366 if (parser->m_startDoctypeDeclHandler) { 5367 XML_Char *pubId; 5368 if (! XmlIsPublicId(enc, s, next, eventPP)) 5369 return XML_ERROR_PUBLICID; 5370 pubId = poolStoreString(&parser->m_tempPool, enc, 5371 s + enc->minBytesPerChar, 5372 next - enc->minBytesPerChar); 5373 if (! pubId) 5374 return XML_ERROR_NO_MEMORY; 5375 normalizePublicId(pubId); 5376 poolFinish(&parser->m_tempPool); 5377 parser->m_doctypePubid = pubId; 5378 handleDefault = XML_FALSE; 5379 goto alreadyChecked; 5380 } 5381 /* fall through */ 5382 case XML_ROLE_ENTITY_PUBLIC_ID: 5383 if (! XmlIsPublicId(enc, s, next, eventPP)) 5384 return XML_ERROR_PUBLICID; 5385 alreadyChecked: 5386 if (dtd->keepProcessing && parser->m_declEntity) { 5387 XML_Char *tem 5388 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5389 next - enc->minBytesPerChar); 5390 if (! tem) 5391 return XML_ERROR_NO_MEMORY; 5392 normalizePublicId(tem); 5393 parser->m_declEntity->publicId = tem; 5394 poolFinish(&dtd->pool); 5395 /* Don't suppress the default handler if we fell through from 5396 * the XML_ROLE_DOCTYPE_PUBLIC_ID case. 5397 */ 5398 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) 5399 handleDefault = XML_FALSE; 5400 } 5401 break; 5402 case XML_ROLE_DOCTYPE_CLOSE: 5403 if (allowClosingDoctype != XML_TRUE) { 5404 /* Must not close doctype from within expanded parameter entities */ 5405 return XML_ERROR_INVALID_TOKEN; 5406 } 5407 5408 if (parser->m_doctypeName) { 5409 parser->m_startDoctypeDeclHandler( 5410 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 5411 parser->m_doctypePubid, 0); 5412 poolClear(&parser->m_tempPool); 5413 handleDefault = XML_FALSE; 5414 } 5415 /* parser->m_doctypeSysid will be non-NULL in the case of a previous 5416 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler 5417 was not set, indicating an external subset 5418 */ 5419 #ifdef XML_DTD 5420 if (parser->m_doctypeSysid || parser->m_useForeignDTD) { 5421 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 5422 dtd->hasParamEntityRefs = XML_TRUE; 5423 if (parser->m_paramEntityParsing 5424 && parser->m_externalEntityRefHandler) { 5425 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5426 externalSubsetName, sizeof(ENTITY)); 5427 if (! entity) { 5428 /* The external subset name "#" will have already been 5429 * inserted into the hash table at the start of the 5430 * external entity parsing, so no allocation will happen 5431 * and lookup() cannot fail. 5432 */ 5433 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 5434 } 5435 if (parser->m_useForeignDTD) 5436 entity->base = parser->m_curBase; 5437 dtd->paramEntityRead = XML_FALSE; 5438 if (! parser->m_externalEntityRefHandler( 5439 parser->m_externalEntityRefHandlerArg, 0, entity->base, 5440 entity->systemId, entity->publicId)) 5441 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5442 if (dtd->paramEntityRead) { 5443 if (! dtd->standalone && parser->m_notStandaloneHandler 5444 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5445 return XML_ERROR_NOT_STANDALONE; 5446 } 5447 /* if we didn't read the foreign DTD then this means that there 5448 is no external subset and we must reset dtd->hasParamEntityRefs 5449 */ 5450 else if (! parser->m_doctypeSysid) 5451 dtd->hasParamEntityRefs = hadParamEntityRefs; 5452 /* end of DTD - no need to update dtd->keepProcessing */ 5453 } 5454 parser->m_useForeignDTD = XML_FALSE; 5455 } 5456 #endif /* XML_DTD */ 5457 if (parser->m_endDoctypeDeclHandler) { 5458 parser->m_endDoctypeDeclHandler(parser->m_handlerArg); 5459 handleDefault = XML_FALSE; 5460 } 5461 break; 5462 case XML_ROLE_INSTANCE_START: 5463 #ifdef XML_DTD 5464 /* if there is no DOCTYPE declaration then now is the 5465 last chance to read the foreign DTD 5466 */ 5467 if (parser->m_useForeignDTD) { 5468 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 5469 dtd->hasParamEntityRefs = XML_TRUE; 5470 if (parser->m_paramEntityParsing 5471 && parser->m_externalEntityRefHandler) { 5472 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5473 externalSubsetName, sizeof(ENTITY)); 5474 if (! entity) 5475 return XML_ERROR_NO_MEMORY; 5476 entity->base = parser->m_curBase; 5477 dtd->paramEntityRead = XML_FALSE; 5478 if (! parser->m_externalEntityRefHandler( 5479 parser->m_externalEntityRefHandlerArg, 0, entity->base, 5480 entity->systemId, entity->publicId)) 5481 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5482 if (dtd->paramEntityRead) { 5483 if (! dtd->standalone && parser->m_notStandaloneHandler 5484 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5485 return XML_ERROR_NOT_STANDALONE; 5486 } 5487 /* if we didn't read the foreign DTD then this means that there 5488 is no external subset and we must reset dtd->hasParamEntityRefs 5489 */ 5490 else 5491 dtd->hasParamEntityRefs = hadParamEntityRefs; 5492 /* end of DTD - no need to update dtd->keepProcessing */ 5493 } 5494 } 5495 #endif /* XML_DTD */ 5496 parser->m_processor = contentProcessor; 5497 return contentProcessor(parser, s, end, nextPtr); 5498 case XML_ROLE_ATTLIST_ELEMENT_NAME: 5499 parser->m_declElementType = getElementType(parser, enc, s, next); 5500 if (! parser->m_declElementType) 5501 return XML_ERROR_NO_MEMORY; 5502 goto checkAttListDeclHandler; 5503 case XML_ROLE_ATTRIBUTE_NAME: 5504 parser->m_declAttributeId = getAttributeId(parser, enc, s, next); 5505 if (! parser->m_declAttributeId) 5506 return XML_ERROR_NO_MEMORY; 5507 parser->m_declAttributeIsCdata = XML_FALSE; 5508 parser->m_declAttributeType = NULL; 5509 parser->m_declAttributeIsId = XML_FALSE; 5510 goto checkAttListDeclHandler; 5511 case XML_ROLE_ATTRIBUTE_TYPE_CDATA: 5512 parser->m_declAttributeIsCdata = XML_TRUE; 5513 parser->m_declAttributeType = atypeCDATA; 5514 goto checkAttListDeclHandler; 5515 case XML_ROLE_ATTRIBUTE_TYPE_ID: 5516 parser->m_declAttributeIsId = XML_TRUE; 5517 parser->m_declAttributeType = atypeID; 5518 goto checkAttListDeclHandler; 5519 case XML_ROLE_ATTRIBUTE_TYPE_IDREF: 5520 parser->m_declAttributeType = atypeIDREF; 5521 goto checkAttListDeclHandler; 5522 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: 5523 parser->m_declAttributeType = atypeIDREFS; 5524 goto checkAttListDeclHandler; 5525 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: 5526 parser->m_declAttributeType = atypeENTITY; 5527 goto checkAttListDeclHandler; 5528 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: 5529 parser->m_declAttributeType = atypeENTITIES; 5530 goto checkAttListDeclHandler; 5531 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: 5532 parser->m_declAttributeType = atypeNMTOKEN; 5533 goto checkAttListDeclHandler; 5534 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: 5535 parser->m_declAttributeType = atypeNMTOKENS; 5536 checkAttListDeclHandler: 5537 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5538 handleDefault = XML_FALSE; 5539 break; 5540 case XML_ROLE_ATTRIBUTE_ENUM_VALUE: 5541 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: 5542 if (dtd->keepProcessing && parser->m_attlistDeclHandler) { 5543 const XML_Char *prefix; 5544 if (parser->m_declAttributeType) { 5545 prefix = enumValueSep; 5546 } else { 5547 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix 5548 : enumValueStart); 5549 } 5550 if (! poolAppendString(&parser->m_tempPool, prefix)) 5551 return XML_ERROR_NO_MEMORY; 5552 if (! poolAppend(&parser->m_tempPool, enc, s, next)) 5553 return XML_ERROR_NO_MEMORY; 5554 parser->m_declAttributeType = parser->m_tempPool.start; 5555 handleDefault = XML_FALSE; 5556 } 5557 break; 5558 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: 5559 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: 5560 if (dtd->keepProcessing) { 5561 if (! defineAttribute(parser->m_declElementType, 5562 parser->m_declAttributeId, 5563 parser->m_declAttributeIsCdata, 5564 parser->m_declAttributeIsId, 0, parser)) 5565 return XML_ERROR_NO_MEMORY; 5566 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5567 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5568 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5569 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5570 /* Enumerated or Notation type */ 5571 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5572 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5573 return XML_ERROR_NO_MEMORY; 5574 parser->m_declAttributeType = parser->m_tempPool.start; 5575 poolFinish(&parser->m_tempPool); 5576 } 5577 *eventEndPP = s; 5578 parser->m_attlistDeclHandler( 5579 parser->m_handlerArg, parser->m_declElementType->name, 5580 parser->m_declAttributeId->name, parser->m_declAttributeType, 0, 5581 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); 5582 handleDefault = XML_FALSE; 5583 } 5584 } 5585 poolClear(&parser->m_tempPool); 5586 break; 5587 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: 5588 case XML_ROLE_FIXED_ATTRIBUTE_VALUE: 5589 if (dtd->keepProcessing) { 5590 const XML_Char *attVal; 5591 enum XML_Error result = storeAttributeValue( 5592 parser, enc, parser->m_declAttributeIsCdata, 5593 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, 5594 XML_ACCOUNT_NONE); 5595 if (result) 5596 return result; 5597 attVal = poolStart(&dtd->pool); 5598 poolFinish(&dtd->pool); 5599 /* ID attributes aren't allowed to have a default */ 5600 if (! defineAttribute( 5601 parser->m_declElementType, parser->m_declAttributeId, 5602 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) 5603 return XML_ERROR_NO_MEMORY; 5604 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5605 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5606 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5607 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5608 /* Enumerated or Notation type */ 5609 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5610 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5611 return XML_ERROR_NO_MEMORY; 5612 parser->m_declAttributeType = parser->m_tempPool.start; 5613 poolFinish(&parser->m_tempPool); 5614 } 5615 *eventEndPP = s; 5616 parser->m_attlistDeclHandler( 5617 parser->m_handlerArg, parser->m_declElementType->name, 5618 parser->m_declAttributeId->name, parser->m_declAttributeType, 5619 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); 5620 poolClear(&parser->m_tempPool); 5621 handleDefault = XML_FALSE; 5622 } 5623 } 5624 break; 5625 case XML_ROLE_ENTITY_VALUE: 5626 if (dtd->keepProcessing) { 5627 #if XML_GE == 1 5628 // This will store the given replacement text in 5629 // parser->m_declEntity->textPtr. 5630 enum XML_Error result = callStoreEntityValue( 5631 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, 5632 XML_ACCOUNT_NONE); 5633 if (parser->m_declEntity) { 5634 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); 5635 parser->m_declEntity->textLen 5636 = (int)(poolLength(&dtd->entityValuePool)); 5637 poolFinish(&dtd->entityValuePool); 5638 if (parser->m_entityDeclHandler) { 5639 *eventEndPP = s; 5640 parser->m_entityDeclHandler( 5641 parser->m_handlerArg, parser->m_declEntity->name, 5642 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5643 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5644 handleDefault = XML_FALSE; 5645 } 5646 } else 5647 poolDiscard(&dtd->entityValuePool); 5648 if (result != XML_ERROR_NONE) 5649 return result; 5650 #else 5651 // This will store "&entity123;" in parser->m_declEntity->textPtr 5652 // to end up as "&entity123;" in the handler. 5653 if (parser->m_declEntity != NULL) { 5654 const enum XML_Error result 5655 = storeSelfEntityValue(parser, parser->m_declEntity); 5656 if (result != XML_ERROR_NONE) 5657 return result; 5658 5659 if (parser->m_entityDeclHandler) { 5660 *eventEndPP = s; 5661 parser->m_entityDeclHandler( 5662 parser->m_handlerArg, parser->m_declEntity->name, 5663 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5664 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5665 handleDefault = XML_FALSE; 5666 } 5667 } 5668 #endif 5669 } 5670 break; 5671 case XML_ROLE_DOCTYPE_SYSTEM_ID: 5672 #ifdef XML_DTD 5673 parser->m_useForeignDTD = XML_FALSE; 5674 #endif /* XML_DTD */ 5675 dtd->hasParamEntityRefs = XML_TRUE; 5676 if (parser->m_startDoctypeDeclHandler) { 5677 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, 5678 s + enc->minBytesPerChar, 5679 next - enc->minBytesPerChar); 5680 if (parser->m_doctypeSysid == NULL) 5681 return XML_ERROR_NO_MEMORY; 5682 poolFinish(&parser->m_tempPool); 5683 handleDefault = XML_FALSE; 5684 } 5685 #ifdef XML_DTD 5686 else 5687 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL 5688 for the case where no parser->m_startDoctypeDeclHandler is set */ 5689 parser->m_doctypeSysid = externalSubsetName; 5690 #endif /* XML_DTD */ 5691 if (! dtd->standalone 5692 #ifdef XML_DTD 5693 && ! parser->m_paramEntityParsing 5694 #endif /* XML_DTD */ 5695 && parser->m_notStandaloneHandler 5696 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5697 return XML_ERROR_NOT_STANDALONE; 5698 #ifndef XML_DTD 5699 break; 5700 #else /* XML_DTD */ 5701 if (! parser->m_declEntity) { 5702 parser->m_declEntity = (ENTITY *)lookup( 5703 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5704 if (! parser->m_declEntity) 5705 return XML_ERROR_NO_MEMORY; 5706 parser->m_declEntity->publicId = NULL; 5707 } 5708 #endif /* XML_DTD */ 5709 /* fall through */ 5710 case XML_ROLE_ENTITY_SYSTEM_ID: 5711 if (dtd->keepProcessing && parser->m_declEntity) { 5712 parser->m_declEntity->systemId 5713 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5714 next - enc->minBytesPerChar); 5715 if (! parser->m_declEntity->systemId) 5716 return XML_ERROR_NO_MEMORY; 5717 parser->m_declEntity->base = parser->m_curBase; 5718 poolFinish(&dtd->pool); 5719 /* Don't suppress the default handler if we fell through from 5720 * the XML_ROLE_DOCTYPE_SYSTEM_ID case. 5721 */ 5722 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) 5723 handleDefault = XML_FALSE; 5724 } 5725 break; 5726 case XML_ROLE_ENTITY_COMPLETE: 5727 #if XML_GE == 0 5728 // This will store "&entity123;" in entity->textPtr 5729 // to end up as "&entity123;" in the handler. 5730 if (parser->m_declEntity != NULL) { 5731 const enum XML_Error result 5732 = storeSelfEntityValue(parser, parser->m_declEntity); 5733 if (result != XML_ERROR_NONE) 5734 return result; 5735 } 5736 #endif 5737 if (dtd->keepProcessing && parser->m_declEntity 5738 && parser->m_entityDeclHandler) { 5739 *eventEndPP = s; 5740 parser->m_entityDeclHandler( 5741 parser->m_handlerArg, parser->m_declEntity->name, 5742 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, 5743 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); 5744 handleDefault = XML_FALSE; 5745 } 5746 break; 5747 case XML_ROLE_ENTITY_NOTATION_NAME: 5748 if (dtd->keepProcessing && parser->m_declEntity) { 5749 parser->m_declEntity->notation 5750 = poolStoreString(&dtd->pool, enc, s, next); 5751 if (! parser->m_declEntity->notation) 5752 return XML_ERROR_NO_MEMORY; 5753 poolFinish(&dtd->pool); 5754 if (parser->m_unparsedEntityDeclHandler) { 5755 *eventEndPP = s; 5756 parser->m_unparsedEntityDeclHandler( 5757 parser->m_handlerArg, parser->m_declEntity->name, 5758 parser->m_declEntity->base, parser->m_declEntity->systemId, 5759 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5760 handleDefault = XML_FALSE; 5761 } else if (parser->m_entityDeclHandler) { 5762 *eventEndPP = s; 5763 parser->m_entityDeclHandler( 5764 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, 5765 parser->m_declEntity->base, parser->m_declEntity->systemId, 5766 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5767 handleDefault = XML_FALSE; 5768 } 5769 } 5770 break; 5771 case XML_ROLE_GENERAL_ENTITY_NAME: { 5772 if (XmlPredefinedEntityName(enc, s, next)) { 5773 parser->m_declEntity = NULL; 5774 break; 5775 } 5776 if (dtd->keepProcessing) { 5777 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5778 if (! name) 5779 return XML_ERROR_NO_MEMORY; 5780 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, 5781 name, sizeof(ENTITY)); 5782 if (! parser->m_declEntity) 5783 return XML_ERROR_NO_MEMORY; 5784 if (parser->m_declEntity->name != name) { 5785 poolDiscard(&dtd->pool); 5786 parser->m_declEntity = NULL; 5787 } else { 5788 poolFinish(&dtd->pool); 5789 parser->m_declEntity->publicId = NULL; 5790 parser->m_declEntity->is_param = XML_FALSE; 5791 /* if we have a parent parser or are reading an internal parameter 5792 entity, then the entity declaration is not considered "internal" 5793 */ 5794 parser->m_declEntity->is_internal 5795 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5796 if (parser->m_entityDeclHandler) 5797 handleDefault = XML_FALSE; 5798 } 5799 } else { 5800 poolDiscard(&dtd->pool); 5801 parser->m_declEntity = NULL; 5802 } 5803 } break; 5804 case XML_ROLE_PARAM_ENTITY_NAME: 5805 #ifdef XML_DTD 5806 if (dtd->keepProcessing) { 5807 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5808 if (! name) 5809 return XML_ERROR_NO_MEMORY; 5810 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5811 name, sizeof(ENTITY)); 5812 if (! parser->m_declEntity) 5813 return XML_ERROR_NO_MEMORY; 5814 if (parser->m_declEntity->name != name) { 5815 poolDiscard(&dtd->pool); 5816 parser->m_declEntity = NULL; 5817 } else { 5818 poolFinish(&dtd->pool); 5819 parser->m_declEntity->publicId = NULL; 5820 parser->m_declEntity->is_param = XML_TRUE; 5821 /* if we have a parent parser or are reading an internal parameter 5822 entity, then the entity declaration is not considered "internal" 5823 */ 5824 parser->m_declEntity->is_internal 5825 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5826 if (parser->m_entityDeclHandler) 5827 handleDefault = XML_FALSE; 5828 } 5829 } else { 5830 poolDiscard(&dtd->pool); 5831 parser->m_declEntity = NULL; 5832 } 5833 #else /* not XML_DTD */ 5834 parser->m_declEntity = NULL; 5835 #endif /* XML_DTD */ 5836 break; 5837 case XML_ROLE_NOTATION_NAME: 5838 parser->m_declNotationPublicId = NULL; 5839 parser->m_declNotationName = NULL; 5840 if (parser->m_notationDeclHandler) { 5841 parser->m_declNotationName 5842 = poolStoreString(&parser->m_tempPool, enc, s, next); 5843 if (! parser->m_declNotationName) 5844 return XML_ERROR_NO_MEMORY; 5845 poolFinish(&parser->m_tempPool); 5846 handleDefault = XML_FALSE; 5847 } 5848 break; 5849 case XML_ROLE_NOTATION_PUBLIC_ID: 5850 if (! XmlIsPublicId(enc, s, next, eventPP)) 5851 return XML_ERROR_PUBLICID; 5852 if (parser 5853 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ 5854 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, 5855 s + enc->minBytesPerChar, 5856 next - enc->minBytesPerChar); 5857 if (! tem) 5858 return XML_ERROR_NO_MEMORY; 5859 normalizePublicId(tem); 5860 parser->m_declNotationPublicId = tem; 5861 poolFinish(&parser->m_tempPool); 5862 handleDefault = XML_FALSE; 5863 } 5864 break; 5865 case XML_ROLE_NOTATION_SYSTEM_ID: 5866 if (parser->m_declNotationName && parser->m_notationDeclHandler) { 5867 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, 5868 s + enc->minBytesPerChar, 5869 next - enc->minBytesPerChar); 5870 if (! systemId) 5871 return XML_ERROR_NO_MEMORY; 5872 *eventEndPP = s; 5873 parser->m_notationDeclHandler( 5874 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5875 systemId, parser->m_declNotationPublicId); 5876 handleDefault = XML_FALSE; 5877 } 5878 poolClear(&parser->m_tempPool); 5879 break; 5880 case XML_ROLE_NOTATION_NO_SYSTEM_ID: 5881 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { 5882 *eventEndPP = s; 5883 parser->m_notationDeclHandler( 5884 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5885 0, parser->m_declNotationPublicId); 5886 handleDefault = XML_FALSE; 5887 } 5888 poolClear(&parser->m_tempPool); 5889 break; 5890 case XML_ROLE_ERROR: 5891 switch (tok) { 5892 case XML_TOK_PARAM_ENTITY_REF: 5893 /* PE references in internal subset are 5894 not allowed within declarations. */ 5895 return XML_ERROR_PARAM_ENTITY_REF; 5896 case XML_TOK_XML_DECL: 5897 return XML_ERROR_MISPLACED_XML_PI; 5898 default: 5899 return XML_ERROR_SYNTAX; 5900 } 5901 #ifdef XML_DTD 5902 case XML_ROLE_IGNORE_SECT: { 5903 enum XML_Error result; 5904 if (parser->m_defaultHandler) 5905 reportDefault(parser, enc, s, next); 5906 handleDefault = XML_FALSE; 5907 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); 5908 if (result != XML_ERROR_NONE) 5909 return result; 5910 else if (! next) { 5911 parser->m_processor = ignoreSectionProcessor; 5912 return result; 5913 } 5914 } break; 5915 #endif /* XML_DTD */ 5916 case XML_ROLE_GROUP_OPEN: 5917 if (parser->m_prologState.level >= parser->m_groupSize) { 5918 if (parser->m_groupSize) { 5919 { 5920 /* Detect and prevent integer overflow */ 5921 if (parser->m_groupSize > (unsigned int)(-1) / 2u) { 5922 return XML_ERROR_NO_MEMORY; 5923 } 5924 5925 char *const new_connector = REALLOC( 5926 parser, parser->m_groupConnector, parser->m_groupSize *= 2); 5927 if (new_connector == NULL) { 5928 parser->m_groupSize /= 2; 5929 return XML_ERROR_NO_MEMORY; 5930 } 5931 parser->m_groupConnector = new_connector; 5932 } 5933 5934 if (dtd->scaffIndex) { 5935 /* Detect and prevent integer overflow. 5936 * The preprocessor guard addresses the "always false" warning 5937 * from -Wtype-limits on platforms where 5938 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 5939 #if UINT_MAX >= SIZE_MAX 5940 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { 5941 parser->m_groupSize /= 2; 5942 return XML_ERROR_NO_MEMORY; 5943 } 5944 #endif 5945 5946 int *const new_scaff_index = REALLOC( 5947 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); 5948 if (new_scaff_index == NULL) { 5949 parser->m_groupSize /= 2; 5950 return XML_ERROR_NO_MEMORY; 5951 } 5952 dtd->scaffIndex = new_scaff_index; 5953 } 5954 } else { 5955 parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32); 5956 if (! parser->m_groupConnector) { 5957 parser->m_groupSize = 0; 5958 return XML_ERROR_NO_MEMORY; 5959 } 5960 } 5961 } 5962 parser->m_groupConnector[parser->m_prologState.level] = 0; 5963 if (dtd->in_eldecl) { 5964 int myindex = nextScaffoldPart(parser); 5965 if (myindex < 0) 5966 return XML_ERROR_NO_MEMORY; 5967 assert(dtd->scaffIndex != NULL); 5968 dtd->scaffIndex[dtd->scaffLevel] = myindex; 5969 dtd->scaffLevel++; 5970 dtd->scaffold[myindex].type = XML_CTYPE_SEQ; 5971 if (parser->m_elementDeclHandler) 5972 handleDefault = XML_FALSE; 5973 } 5974 break; 5975 case XML_ROLE_GROUP_SEQUENCE: 5976 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) 5977 return XML_ERROR_SYNTAX; 5978 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; 5979 if (dtd->in_eldecl && parser->m_elementDeclHandler) 5980 handleDefault = XML_FALSE; 5981 break; 5982 case XML_ROLE_GROUP_CHOICE: 5983 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) 5984 return XML_ERROR_SYNTAX; 5985 if (dtd->in_eldecl 5986 && ! parser->m_groupConnector[parser->m_prologState.level] 5987 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5988 != XML_CTYPE_MIXED)) { 5989 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5990 = XML_CTYPE_CHOICE; 5991 if (parser->m_elementDeclHandler) 5992 handleDefault = XML_FALSE; 5993 } 5994 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; 5995 break; 5996 case XML_ROLE_PARAM_ENTITY_REF: 5997 #ifdef XML_DTD 5998 case XML_ROLE_INNER_PARAM_ENTITY_REF: 5999 dtd->hasParamEntityRefs = XML_TRUE; 6000 if (! parser->m_paramEntityParsing) 6001 dtd->keepProcessing = dtd->standalone; 6002 else { 6003 const XML_Char *name; 6004 ENTITY *entity; 6005 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 6006 next - enc->minBytesPerChar); 6007 if (! name) 6008 return XML_ERROR_NO_MEMORY; 6009 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 6010 poolDiscard(&dtd->pool); 6011 /* first, determine if a check for an existing declaration is needed; 6012 if yes, check that the entity exists, and that it is internal, 6013 otherwise call the skipped entity handler 6014 */ 6015 if (parser->m_prologState.documentEntity 6016 && (dtd->standalone ? ! parser->m_openInternalEntities 6017 : ! dtd->hasParamEntityRefs)) { 6018 if (! entity) 6019 return XML_ERROR_UNDEFINED_ENTITY; 6020 else if (! entity->is_internal) { 6021 /* It's hard to exhaustively search the code to be sure, 6022 * but there doesn't seem to be a way of executing the 6023 * following line. There are two cases: 6024 * 6025 * If 'standalone' is false, the DTD must have no 6026 * parameter entities or we wouldn't have passed the outer 6027 * 'if' statement. That means the only entity in the hash 6028 * table is the external subset name "#" which cannot be 6029 * given as a parameter entity name in XML syntax, so the 6030 * lookup must have returned NULL and we don't even reach 6031 * the test for an internal entity. 6032 * 6033 * If 'standalone' is true, it does not seem to be 6034 * possible to create entities taking this code path that 6035 * are not internal entities, so fail the test above. 6036 * 6037 * Because this analysis is very uncertain, the code is 6038 * being left in place and merely removed from the 6039 * coverage test statistics. 6040 */ 6041 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ 6042 } 6043 } else if (! entity) { 6044 dtd->keepProcessing = dtd->standalone; 6045 /* cannot report skipped entities in declarations */ 6046 if ((role == XML_ROLE_PARAM_ENTITY_REF) 6047 && parser->m_skippedEntityHandler) { 6048 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); 6049 handleDefault = XML_FALSE; 6050 } 6051 break; 6052 } 6053 if (entity->open) 6054 return XML_ERROR_RECURSIVE_ENTITY_REF; 6055 if (entity->textPtr) { 6056 enum XML_Error result; 6057 XML_Bool betweenDecl 6058 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); 6059 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL); 6060 if (result != XML_ERROR_NONE) 6061 return result; 6062 handleDefault = XML_FALSE; 6063 break; 6064 } 6065 if (parser->m_externalEntityRefHandler) { 6066 dtd->paramEntityRead = XML_FALSE; 6067 entity->open = XML_TRUE; 6068 entityTrackingOnOpen(parser, entity, __LINE__); 6069 if (! parser->m_externalEntityRefHandler( 6070 parser->m_externalEntityRefHandlerArg, 0, entity->base, 6071 entity->systemId, entity->publicId)) { 6072 entityTrackingOnClose(parser, entity, __LINE__); 6073 entity->open = XML_FALSE; 6074 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6075 } 6076 entityTrackingOnClose(parser, entity, __LINE__); 6077 entity->open = XML_FALSE; 6078 handleDefault = XML_FALSE; 6079 if (! dtd->paramEntityRead) { 6080 dtd->keepProcessing = dtd->standalone; 6081 break; 6082 } 6083 } else { 6084 dtd->keepProcessing = dtd->standalone; 6085 break; 6086 } 6087 } 6088 #endif /* XML_DTD */ 6089 if (! dtd->standalone && parser->m_notStandaloneHandler 6090 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 6091 return XML_ERROR_NOT_STANDALONE; 6092 break; 6093 6094 /* Element declaration stuff */ 6095 6096 case XML_ROLE_ELEMENT_NAME: 6097 if (parser->m_elementDeclHandler) { 6098 parser->m_declElementType = getElementType(parser, enc, s, next); 6099 if (! parser->m_declElementType) 6100 return XML_ERROR_NO_MEMORY; 6101 dtd->scaffLevel = 0; 6102 dtd->scaffCount = 0; 6103 dtd->in_eldecl = XML_TRUE; 6104 handleDefault = XML_FALSE; 6105 } 6106 break; 6107 6108 case XML_ROLE_CONTENT_ANY: 6109 case XML_ROLE_CONTENT_EMPTY: 6110 if (dtd->in_eldecl) { 6111 if (parser->m_elementDeclHandler) { 6112 // NOTE: We are avoiding MALLOC(..) here to so that 6113 // applications that are not using XML_FreeContentModel but 6114 // plain free(..) or .free_fcn() to free the content model's 6115 // memory are safe. 6116 XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content)); 6117 if (! content) 6118 return XML_ERROR_NO_MEMORY; 6119 content->quant = XML_CQUANT_NONE; 6120 content->name = NULL; 6121 content->numchildren = 0; 6122 content->children = NULL; 6123 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY 6124 : XML_CTYPE_EMPTY); 6125 *eventEndPP = s; 6126 parser->m_elementDeclHandler( 6127 parser->m_handlerArg, parser->m_declElementType->name, content); 6128 handleDefault = XML_FALSE; 6129 } 6130 dtd->in_eldecl = XML_FALSE; 6131 } 6132 break; 6133 6134 case XML_ROLE_CONTENT_PCDATA: 6135 if (dtd->in_eldecl) { 6136 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 6137 = XML_CTYPE_MIXED; 6138 if (parser->m_elementDeclHandler) 6139 handleDefault = XML_FALSE; 6140 } 6141 break; 6142 6143 case XML_ROLE_CONTENT_ELEMENT: 6144 quant = XML_CQUANT_NONE; 6145 goto elementContent; 6146 case XML_ROLE_CONTENT_ELEMENT_OPT: 6147 quant = XML_CQUANT_OPT; 6148 goto elementContent; 6149 case XML_ROLE_CONTENT_ELEMENT_REP: 6150 quant = XML_CQUANT_REP; 6151 goto elementContent; 6152 case XML_ROLE_CONTENT_ELEMENT_PLUS: 6153 quant = XML_CQUANT_PLUS; 6154 elementContent: 6155 if (dtd->in_eldecl) { 6156 ELEMENT_TYPE *el; 6157 const XML_Char *name; 6158 size_t nameLen; 6159 const char *nxt 6160 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); 6161 int myindex = nextScaffoldPart(parser); 6162 if (myindex < 0) 6163 return XML_ERROR_NO_MEMORY; 6164 dtd->scaffold[myindex].type = XML_CTYPE_NAME; 6165 dtd->scaffold[myindex].quant = quant; 6166 el = getElementType(parser, enc, s, nxt); 6167 if (! el) 6168 return XML_ERROR_NO_MEMORY; 6169 name = el->name; 6170 dtd->scaffold[myindex].name = name; 6171 nameLen = 0; 6172 while (name[nameLen++]) 6173 ; 6174 6175 /* Detect and prevent integer overflow */ 6176 if (nameLen > UINT_MAX - dtd->contentStringLen) { 6177 return XML_ERROR_NO_MEMORY; 6178 } 6179 6180 dtd->contentStringLen += (unsigned)nameLen; 6181 if (parser->m_elementDeclHandler) 6182 handleDefault = XML_FALSE; 6183 } 6184 break; 6185 6186 case XML_ROLE_GROUP_CLOSE: 6187 quant = XML_CQUANT_NONE; 6188 goto closeGroup; 6189 case XML_ROLE_GROUP_CLOSE_OPT: 6190 quant = XML_CQUANT_OPT; 6191 goto closeGroup; 6192 case XML_ROLE_GROUP_CLOSE_REP: 6193 quant = XML_CQUANT_REP; 6194 goto closeGroup; 6195 case XML_ROLE_GROUP_CLOSE_PLUS: 6196 quant = XML_CQUANT_PLUS; 6197 closeGroup: 6198 if (dtd->in_eldecl) { 6199 if (parser->m_elementDeclHandler) 6200 handleDefault = XML_FALSE; 6201 dtd->scaffLevel--; 6202 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; 6203 if (dtd->scaffLevel == 0) { 6204 if (! handleDefault) { 6205 XML_Content *model = build_model(parser); 6206 if (! model) 6207 return XML_ERROR_NO_MEMORY; 6208 *eventEndPP = s; 6209 parser->m_elementDeclHandler( 6210 parser->m_handlerArg, parser->m_declElementType->name, model); 6211 } 6212 dtd->in_eldecl = XML_FALSE; 6213 dtd->contentStringLen = 0; 6214 } 6215 } 6216 break; 6217 /* End element declaration stuff */ 6218 6219 case XML_ROLE_PI: 6220 if (! reportProcessingInstruction(parser, enc, s, next)) 6221 return XML_ERROR_NO_MEMORY; 6222 handleDefault = XML_FALSE; 6223 break; 6224 case XML_ROLE_COMMENT: 6225 if (! reportComment(parser, enc, s, next)) 6226 return XML_ERROR_NO_MEMORY; 6227 handleDefault = XML_FALSE; 6228 break; 6229 case XML_ROLE_NONE: 6230 switch (tok) { 6231 case XML_TOK_BOM: 6232 handleDefault = XML_FALSE; 6233 break; 6234 } 6235 break; 6236 case XML_ROLE_DOCTYPE_NONE: 6237 if (parser->m_startDoctypeDeclHandler) 6238 handleDefault = XML_FALSE; 6239 break; 6240 case XML_ROLE_ENTITY_NONE: 6241 if (dtd->keepProcessing && parser->m_entityDeclHandler) 6242 handleDefault = XML_FALSE; 6243 break; 6244 case XML_ROLE_NOTATION_NONE: 6245 if (parser->m_notationDeclHandler) 6246 handleDefault = XML_FALSE; 6247 break; 6248 case XML_ROLE_ATTLIST_NONE: 6249 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 6250 handleDefault = XML_FALSE; 6251 break; 6252 case XML_ROLE_ELEMENT_NONE: 6253 if (parser->m_elementDeclHandler) 6254 handleDefault = XML_FALSE; 6255 break; 6256 } /* end of big switch */ 6257 6258 if (handleDefault && parser->m_defaultHandler) 6259 reportDefault(parser, enc, s, next); 6260 6261 switch (parser->m_parsingStatus.parsing) { 6262 case XML_SUSPENDED: 6263 *nextPtr = next; 6264 return XML_ERROR_NONE; 6265 case XML_FINISHED: 6266 return XML_ERROR_ABORTED; 6267 case XML_PARSING: 6268 if (parser->m_reenter) { 6269 *nextPtr = next; 6270 return XML_ERROR_NONE; 6271 } 6272 /* Fall through */ 6273 default: 6274 s = next; 6275 tok = XmlPrologTok(enc, s, end, &next); 6276 } 6277 } 6278 /* not reached */ 6279 } 6280 6281 static enum XML_Error PTRCALL 6282 epilogProcessor(XML_Parser parser, const char *s, const char *end, 6283 const char **nextPtr) { 6284 parser->m_processor = epilogProcessor; 6285 parser->m_eventPtr = s; 6286 for (;;) { 6287 const char *next = NULL; 6288 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 6289 #if XML_GE == 1 6290 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 6291 XML_ACCOUNT_DIRECT)) { 6292 accountingOnAbort(parser); 6293 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6294 } 6295 #endif 6296 parser->m_eventEndPtr = next; 6297 switch (tok) { 6298 /* report partial linebreak - it might be the last token */ 6299 case -XML_TOK_PROLOG_S: 6300 if (parser->m_defaultHandler) { 6301 reportDefault(parser, parser->m_encoding, s, next); 6302 if (parser->m_parsingStatus.parsing == XML_FINISHED) 6303 return XML_ERROR_ABORTED; 6304 } 6305 *nextPtr = next; 6306 return XML_ERROR_NONE; 6307 case XML_TOK_NONE: 6308 *nextPtr = s; 6309 return XML_ERROR_NONE; 6310 case XML_TOK_PROLOG_S: 6311 if (parser->m_defaultHandler) 6312 reportDefault(parser, parser->m_encoding, s, next); 6313 break; 6314 case XML_TOK_PI: 6315 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) 6316 return XML_ERROR_NO_MEMORY; 6317 break; 6318 case XML_TOK_COMMENT: 6319 if (! reportComment(parser, parser->m_encoding, s, next)) 6320 return XML_ERROR_NO_MEMORY; 6321 break; 6322 case XML_TOK_INVALID: 6323 parser->m_eventPtr = next; 6324 return XML_ERROR_INVALID_TOKEN; 6325 case XML_TOK_PARTIAL: 6326 if (! parser->m_parsingStatus.finalBuffer) { 6327 *nextPtr = s; 6328 return XML_ERROR_NONE; 6329 } 6330 return XML_ERROR_UNCLOSED_TOKEN; 6331 case XML_TOK_PARTIAL_CHAR: 6332 if (! parser->m_parsingStatus.finalBuffer) { 6333 *nextPtr = s; 6334 return XML_ERROR_NONE; 6335 } 6336 return XML_ERROR_PARTIAL_CHAR; 6337 default: 6338 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; 6339 } 6340 switch (parser->m_parsingStatus.parsing) { 6341 case XML_SUSPENDED: 6342 parser->m_eventPtr = next; 6343 *nextPtr = next; 6344 return XML_ERROR_NONE; 6345 case XML_FINISHED: 6346 parser->m_eventPtr = next; 6347 return XML_ERROR_ABORTED; 6348 case XML_PARSING: 6349 if (parser->m_reenter) { 6350 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE 6351 } 6352 /* Fall through */ 6353 default:; 6354 parser->m_eventPtr = s = next; 6355 } 6356 } 6357 } 6358 6359 static enum XML_Error 6360 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, 6361 enum EntityType type) { 6362 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList; 6363 switch (type) { 6364 case ENTITY_INTERNAL: 6365 parser->m_processor = internalEntityProcessor; 6366 openEntityList = &parser->m_openInternalEntities; 6367 freeEntityList = &parser->m_freeInternalEntities; 6368 break; 6369 case ENTITY_ATTRIBUTE: 6370 openEntityList = &parser->m_openAttributeEntities; 6371 freeEntityList = &parser->m_freeAttributeEntities; 6372 break; 6373 case ENTITY_VALUE: 6374 openEntityList = &parser->m_openValueEntities; 6375 freeEntityList = &parser->m_freeValueEntities; 6376 break; 6377 /* default case serves merely as a safety net in case of a 6378 * wrong entityType. Therefore we exclude the following lines 6379 * from the test coverage. 6380 * 6381 * LCOV_EXCL_START 6382 */ 6383 default: 6384 // Should not reach here 6385 assert(0); 6386 /* LCOV_EXCL_STOP */ 6387 } 6388 6389 if (*freeEntityList) { 6390 openEntity = *freeEntityList; 6391 *freeEntityList = openEntity->next; 6392 } else { 6393 openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); 6394 if (! openEntity) 6395 return XML_ERROR_NO_MEMORY; 6396 } 6397 entity->open = XML_TRUE; 6398 entity->hasMore = XML_TRUE; 6399 #if XML_GE == 1 6400 entityTrackingOnOpen(parser, entity, __LINE__); 6401 #endif 6402 entity->processed = 0; 6403 openEntity->next = *openEntityList; 6404 *openEntityList = openEntity; 6405 openEntity->entity = entity; 6406 openEntity->type = type; 6407 openEntity->startTagLevel = parser->m_tagLevel; 6408 openEntity->betweenDecl = betweenDecl; 6409 openEntity->internalEventPtr = NULL; 6410 openEntity->internalEventEndPtr = NULL; 6411 6412 // Only internal entities make use of the reenter flag 6413 // therefore no need to set it for other entity types 6414 if (type == ENTITY_INTERNAL) { 6415 triggerReenter(parser); 6416 } 6417 return XML_ERROR_NONE; 6418 } 6419 6420 static enum XML_Error PTRCALL 6421 internalEntityProcessor(XML_Parser parser, const char *s, const char *end, 6422 const char **nextPtr) { 6423 UNUSED_P(s); 6424 UNUSED_P(end); 6425 UNUSED_P(nextPtr); 6426 ENTITY *entity; 6427 const char *textStart, *textEnd; 6428 const char *next; 6429 enum XML_Error result; 6430 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; 6431 if (! openEntity) 6432 return XML_ERROR_UNEXPECTED_STATE; 6433 6434 entity = openEntity->entity; 6435 6436 // This will return early 6437 if (entity->hasMore) { 6438 textStart = ((const char *)entity->textPtr) + entity->processed; 6439 textEnd = (const char *)(entity->textPtr + entity->textLen); 6440 /* Set a safe default value in case 'next' does not get set */ 6441 next = textStart; 6442 6443 if (entity->is_param) { 6444 int tok 6445 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 6446 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 6447 tok, next, &next, XML_FALSE, XML_FALSE, 6448 XML_ACCOUNT_ENTITY_EXPANSION); 6449 } else { 6450 result = doContent(parser, openEntity->startTagLevel, 6451 parser->m_internalEncoding, textStart, textEnd, &next, 6452 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); 6453 } 6454 6455 if (result != XML_ERROR_NONE) 6456 return result; 6457 // Check if entity is complete, if not, mark down how much of it is 6458 // processed 6459 if (textEnd != next 6460 && (parser->m_parsingStatus.parsing == XML_SUSPENDED 6461 || (parser->m_parsingStatus.parsing == XML_PARSING 6462 && parser->m_reenter))) { 6463 entity->processed = (int)(next - (const char *)entity->textPtr); 6464 return result; 6465 } 6466 6467 // Entity is complete. We cannot close it here since we need to first 6468 // process its possible inner entities (which are added to the 6469 // m_openInternalEntities during doProlog or doContent calls above) 6470 entity->hasMore = XML_FALSE; 6471 if (! entity->is_param 6472 && (openEntity->startTagLevel != parser->m_tagLevel)) { 6473 return XML_ERROR_ASYNC_ENTITY; 6474 } 6475 triggerReenter(parser); 6476 return result; 6477 } // End of entity processing, "if" block will return here 6478 6479 // Remove fully processed openEntity from open entity list. 6480 #if XML_GE == 1 6481 entityTrackingOnClose(parser, entity, __LINE__); 6482 #endif 6483 // openEntity is m_openInternalEntities' head, as we set it at the start of 6484 // this function and we skipped doProlog and doContent calls with hasMore set 6485 // to false. This means we can directly remove the head of 6486 // m_openInternalEntities 6487 assert(parser->m_openInternalEntities == openEntity); 6488 entity->open = XML_FALSE; 6489 parser->m_openInternalEntities = parser->m_openInternalEntities->next; 6490 6491 /* put openEntity back in list of free instances */ 6492 openEntity->next = parser->m_freeInternalEntities; 6493 parser->m_freeInternalEntities = openEntity; 6494 6495 if (parser->m_openInternalEntities == NULL) { 6496 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor; 6497 } 6498 triggerReenter(parser); 6499 return XML_ERROR_NONE; 6500 } 6501 6502 static enum XML_Error PTRCALL 6503 errorProcessor(XML_Parser parser, const char *s, const char *end, 6504 const char **nextPtr) { 6505 UNUSED_P(s); 6506 UNUSED_P(end); 6507 UNUSED_P(nextPtr); 6508 return parser->m_errorCode; 6509 } 6510 6511 static enum XML_Error 6512 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 6513 const char *ptr, const char *end, STRING_POOL *pool, 6514 enum XML_Account account) { 6515 const char *next = ptr; 6516 enum XML_Error result = XML_ERROR_NONE; 6517 6518 while (1) { 6519 if (! parser->m_openAttributeEntities) { 6520 result = appendAttributeValue(parser, enc, isCdata, next, end, pool, 6521 account, &next); 6522 } else { 6523 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities; 6524 if (! openEntity) 6525 return XML_ERROR_UNEXPECTED_STATE; 6526 6527 ENTITY *const entity = openEntity->entity; 6528 const char *const textStart 6529 = ((const char *)entity->textPtr) + entity->processed; 6530 const char *const textEnd 6531 = (const char *)(entity->textPtr + entity->textLen); 6532 /* Set a safe default value in case 'next' does not get set */ 6533 const char *nextInEntity = textStart; 6534 if (entity->hasMore) { 6535 result = appendAttributeValue( 6536 parser, parser->m_internalEncoding, isCdata, textStart, textEnd, 6537 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity); 6538 if (result != XML_ERROR_NONE) 6539 break; 6540 // Check if entity is complete, if not, mark down how much of it is 6541 // processed. A XML_SUSPENDED check here is not required as 6542 // appendAttributeValue will never suspend the parser. 6543 if (textEnd != nextInEntity) { 6544 entity->processed 6545 = (int)(nextInEntity - (const char *)entity->textPtr); 6546 continue; 6547 } 6548 6549 // Entity is complete. We cannot close it here since we need to first 6550 // process its possible inner entities (which are added to the 6551 // m_openAttributeEntities during appendAttributeValue) 6552 entity->hasMore = XML_FALSE; 6553 continue; 6554 } // End of entity processing, "if" block skips the rest 6555 6556 // Remove fully processed openEntity from open entity list. 6557 #if XML_GE == 1 6558 entityTrackingOnClose(parser, entity, __LINE__); 6559 #endif 6560 // openEntity is m_openAttributeEntities' head, since we set it at the 6561 // start of this function and because we skipped appendAttributeValue call 6562 // with hasMore set to false. This means we can directly remove the head 6563 // of m_openAttributeEntities 6564 assert(parser->m_openAttributeEntities == openEntity); 6565 entity->open = XML_FALSE; 6566 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next; 6567 6568 /* put openEntity back in list of free instances */ 6569 openEntity->next = parser->m_freeAttributeEntities; 6570 parser->m_freeAttributeEntities = openEntity; 6571 } 6572 6573 // Break if an error occurred or there is nothing left to process 6574 if (result || (parser->m_openAttributeEntities == NULL && end == next)) { 6575 break; 6576 } 6577 } 6578 6579 if (result) 6580 return result; 6581 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) 6582 poolChop(pool); 6583 if (! poolAppendChar(pool, XML_T('\0'))) 6584 return XML_ERROR_NO_MEMORY; 6585 return XML_ERROR_NONE; 6586 } 6587 6588 static enum XML_Error 6589 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 6590 const char *ptr, const char *end, STRING_POOL *pool, 6591 enum XML_Account account, const char **nextPtr) { 6592 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6593 #ifndef XML_DTD 6594 UNUSED_P(account); 6595 #endif 6596 6597 for (;;) { 6598 const char *next 6599 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ 6600 int tok = XmlAttributeValueTok(enc, ptr, end, &next); 6601 #if XML_GE == 1 6602 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { 6603 accountingOnAbort(parser); 6604 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6605 } 6606 #endif 6607 switch (tok) { 6608 case XML_TOK_NONE: 6609 if (nextPtr) { 6610 *nextPtr = next; 6611 } 6612 return XML_ERROR_NONE; 6613 case XML_TOK_INVALID: 6614 if (enc == parser->m_encoding) 6615 parser->m_eventPtr = next; 6616 return XML_ERROR_INVALID_TOKEN; 6617 case XML_TOK_PARTIAL: 6618 if (enc == parser->m_encoding) 6619 parser->m_eventPtr = ptr; 6620 return XML_ERROR_INVALID_TOKEN; 6621 case XML_TOK_CHAR_REF: { 6622 XML_Char buf[XML_ENCODE_MAX]; 6623 int i; 6624 int n = XmlCharRefNumber(enc, ptr); 6625 if (n < 0) { 6626 if (enc == parser->m_encoding) 6627 parser->m_eventPtr = ptr; 6628 return XML_ERROR_BAD_CHAR_REF; 6629 } 6630 if (! isCdata && n == 0x20 /* space */ 6631 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6632 break; 6633 n = XmlEncode(n, (ICHAR *)buf); 6634 /* The XmlEncode() functions can never return 0 here. That 6635 * error return happens if the code point passed in is either 6636 * negative or greater than or equal to 0x110000. The 6637 * XmlCharRefNumber() functions will all return a number 6638 * strictly less than 0x110000 or a negative value if an error 6639 * occurred. The negative value is intercepted above, so 6640 * XmlEncode() is never passed a value it might return an 6641 * error for. 6642 */ 6643 for (i = 0; i < n; i++) { 6644 if (! poolAppendChar(pool, buf[i])) 6645 return XML_ERROR_NO_MEMORY; 6646 } 6647 } break; 6648 case XML_TOK_DATA_CHARS: 6649 if (! poolAppend(pool, enc, ptr, next)) 6650 return XML_ERROR_NO_MEMORY; 6651 break; 6652 case XML_TOK_TRAILING_CR: 6653 next = ptr + enc->minBytesPerChar; 6654 /* fall through */ 6655 case XML_TOK_ATTRIBUTE_VALUE_S: 6656 case XML_TOK_DATA_NEWLINE: 6657 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6658 break; 6659 if (! poolAppendChar(pool, 0x20)) 6660 return XML_ERROR_NO_MEMORY; 6661 break; 6662 case XML_TOK_ENTITY_REF: { 6663 const XML_Char *name; 6664 ENTITY *entity; 6665 bool checkEntityDecl; 6666 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 6667 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); 6668 if (ch) { 6669 #if XML_GE == 1 6670 /* NOTE: We are replacing 4-6 characters original input for 1 character 6671 * so there is no amplification and hence recording without 6672 * protection. */ 6673 accountingDiffTolerated(parser, tok, (char *)&ch, 6674 ((char *)&ch) + sizeof(XML_Char), __LINE__, 6675 XML_ACCOUNT_ENTITY_EXPANSION); 6676 #endif /* XML_GE == 1 */ 6677 if (! poolAppendChar(pool, ch)) 6678 return XML_ERROR_NO_MEMORY; 6679 break; 6680 } 6681 name = poolStoreString(&parser->m_temp2Pool, enc, 6682 ptr + enc->minBytesPerChar, 6683 next - enc->minBytesPerChar); 6684 if (! name) 6685 return XML_ERROR_NO_MEMORY; 6686 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 6687 poolDiscard(&parser->m_temp2Pool); 6688 /* First, determine if a check for an existing declaration is needed; 6689 if yes, check that the entity exists, and that it is internal. 6690 */ 6691 if (pool == &dtd->pool) /* are we called from prolog? */ 6692 checkEntityDecl = 6693 #ifdef XML_DTD 6694 parser->m_prologState.documentEntity && 6695 #endif /* XML_DTD */ 6696 (dtd->standalone ? ! parser->m_openInternalEntities 6697 : ! dtd->hasParamEntityRefs); 6698 else /* if (pool == &parser->m_tempPool): we are called from content */ 6699 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; 6700 if (checkEntityDecl) { 6701 if (! entity) 6702 return XML_ERROR_UNDEFINED_ENTITY; 6703 else if (! entity->is_internal) 6704 return XML_ERROR_ENTITY_DECLARED_IN_PE; 6705 } else if (! entity) { 6706 /* Cannot report skipped entity here - see comments on 6707 parser->m_skippedEntityHandler. 6708 if (parser->m_skippedEntityHandler) 6709 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6710 */ 6711 /* Cannot call the default handler because this would be 6712 out of sync with the call to the startElementHandler. 6713 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) 6714 reportDefault(parser, enc, ptr, next); 6715 */ 6716 break; 6717 } 6718 if (entity->open) { 6719 if (enc == parser->m_encoding) { 6720 /* It does not appear that this line can be executed. 6721 * 6722 * The "if (entity->open)" check catches recursive entity 6723 * definitions. In order to be called with an open 6724 * entity, it must have gone through this code before and 6725 * been through the recursive call to 6726 * appendAttributeValue() some lines below. That call 6727 * sets the local encoding ("enc") to the parser's 6728 * internal encoding (internal_utf8 or internal_utf16), 6729 * which can never be the same as the principle encoding. 6730 * It doesn't appear there is another code path that gets 6731 * here with entity->open being TRUE. 6732 * 6733 * Since it is not certain that this logic is watertight, 6734 * we keep the line and merely exclude it from coverage 6735 * tests. 6736 */ 6737 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ 6738 } 6739 return XML_ERROR_RECURSIVE_ENTITY_REF; 6740 } 6741 if (entity->notation) { 6742 if (enc == parser->m_encoding) 6743 parser->m_eventPtr = ptr; 6744 return XML_ERROR_BINARY_ENTITY_REF; 6745 } 6746 if (! entity->textPtr) { 6747 if (enc == parser->m_encoding) 6748 parser->m_eventPtr = ptr; 6749 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; 6750 } else { 6751 enum XML_Error result; 6752 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE); 6753 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) { 6754 *nextPtr = next; 6755 } 6756 return result; 6757 } 6758 } break; 6759 default: 6760 /* The only token returned by XmlAttributeValueTok() that does 6761 * not have an explicit case here is XML_TOK_PARTIAL_CHAR. 6762 * Getting that would require an entity name to contain an 6763 * incomplete XML character (e.g. \xE2\x82); however previous 6764 * tokenisers will have already recognised and rejected such 6765 * names before XmlAttributeValueTok() gets a look-in. This 6766 * default case should be retained as a safety net, but the code 6767 * excluded from coverage tests. 6768 * 6769 * LCOV_EXCL_START 6770 */ 6771 if (enc == parser->m_encoding) 6772 parser->m_eventPtr = ptr; 6773 return XML_ERROR_UNEXPECTED_STATE; 6774 /* LCOV_EXCL_STOP */ 6775 } 6776 ptr = next; 6777 } 6778 /* not reached */ 6779 } 6780 6781 #if XML_GE == 1 6782 static enum XML_Error 6783 storeEntityValue(XML_Parser parser, const ENCODING *enc, 6784 const char *entityTextPtr, const char *entityTextEnd, 6785 enum XML_Account account, const char **nextPtr) { 6786 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6787 STRING_POOL *pool = &(dtd->entityValuePool); 6788 enum XML_Error result = XML_ERROR_NONE; 6789 # ifdef XML_DTD 6790 int oldInEntityValue = parser->m_prologState.inEntityValue; 6791 parser->m_prologState.inEntityValue = 1; 6792 # else 6793 UNUSED_P(account); 6794 # endif /* XML_DTD */ 6795 /* never return Null for the value argument in EntityDeclHandler, 6796 since this would indicate an external entity; therefore we 6797 have to make sure that entityValuePool.start is not null */ 6798 if (! pool->blocks) { 6799 if (! poolGrow(pool)) 6800 return XML_ERROR_NO_MEMORY; 6801 } 6802 6803 const char *next = entityTextPtr; 6804 6805 /* Nothing to tokenize. */ 6806 if (entityTextPtr >= entityTextEnd) { 6807 result = XML_ERROR_NONE; 6808 goto endEntityValue; 6809 } 6810 6811 for (;;) { 6812 next 6813 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ 6814 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); 6815 6816 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, 6817 account)) { 6818 accountingOnAbort(parser); 6819 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6820 goto endEntityValue; 6821 } 6822 6823 switch (tok) { 6824 case XML_TOK_PARAM_ENTITY_REF: 6825 # ifdef XML_DTD 6826 if (parser->m_isParamEntity || enc != parser->m_encoding) { 6827 const XML_Char *name; 6828 ENTITY *entity; 6829 name = poolStoreString(&parser->m_tempPool, enc, 6830 entityTextPtr + enc->minBytesPerChar, 6831 next - enc->minBytesPerChar); 6832 if (! name) { 6833 result = XML_ERROR_NO_MEMORY; 6834 goto endEntityValue; 6835 } 6836 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 6837 poolDiscard(&parser->m_tempPool); 6838 if (! entity) { 6839 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ 6840 /* cannot report skipped entity here - see comments on 6841 parser->m_skippedEntityHandler 6842 if (parser->m_skippedEntityHandler) 6843 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6844 */ 6845 dtd->keepProcessing = dtd->standalone; 6846 goto endEntityValue; 6847 } 6848 if (entity->open || (entity == parser->m_declEntity)) { 6849 if (enc == parser->m_encoding) 6850 parser->m_eventPtr = entityTextPtr; 6851 result = XML_ERROR_RECURSIVE_ENTITY_REF; 6852 goto endEntityValue; 6853 } 6854 if (entity->systemId) { 6855 if (parser->m_externalEntityRefHandler) { 6856 dtd->paramEntityRead = XML_FALSE; 6857 entity->open = XML_TRUE; 6858 entityTrackingOnOpen(parser, entity, __LINE__); 6859 if (! parser->m_externalEntityRefHandler( 6860 parser->m_externalEntityRefHandlerArg, 0, entity->base, 6861 entity->systemId, entity->publicId)) { 6862 entityTrackingOnClose(parser, entity, __LINE__); 6863 entity->open = XML_FALSE; 6864 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6865 goto endEntityValue; 6866 } 6867 entityTrackingOnClose(parser, entity, __LINE__); 6868 entity->open = XML_FALSE; 6869 if (! dtd->paramEntityRead) 6870 dtd->keepProcessing = dtd->standalone; 6871 } else 6872 dtd->keepProcessing = dtd->standalone; 6873 } else { 6874 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE); 6875 goto endEntityValue; 6876 } 6877 break; 6878 } 6879 # endif /* XML_DTD */ 6880 /* In the internal subset, PE references are not legal 6881 within markup declarations, e.g entity values in this case. */ 6882 parser->m_eventPtr = entityTextPtr; 6883 result = XML_ERROR_PARAM_ENTITY_REF; 6884 goto endEntityValue; 6885 case XML_TOK_NONE: 6886 result = XML_ERROR_NONE; 6887 goto endEntityValue; 6888 case XML_TOK_ENTITY_REF: 6889 case XML_TOK_DATA_CHARS: 6890 if (! poolAppend(pool, enc, entityTextPtr, next)) { 6891 result = XML_ERROR_NO_MEMORY; 6892 goto endEntityValue; 6893 } 6894 break; 6895 case XML_TOK_TRAILING_CR: 6896 next = entityTextPtr + enc->minBytesPerChar; 6897 /* fall through */ 6898 case XML_TOK_DATA_NEWLINE: 6899 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6900 result = XML_ERROR_NO_MEMORY; 6901 goto endEntityValue; 6902 } 6903 *(pool->ptr)++ = 0xA; 6904 break; 6905 case XML_TOK_CHAR_REF: { 6906 XML_Char buf[XML_ENCODE_MAX]; 6907 int i; 6908 int n = XmlCharRefNumber(enc, entityTextPtr); 6909 if (n < 0) { 6910 if (enc == parser->m_encoding) 6911 parser->m_eventPtr = entityTextPtr; 6912 result = XML_ERROR_BAD_CHAR_REF; 6913 goto endEntityValue; 6914 } 6915 n = XmlEncode(n, (ICHAR *)buf); 6916 /* The XmlEncode() functions can never return 0 here. That 6917 * error return happens if the code point passed in is either 6918 * negative or greater than or equal to 0x110000. The 6919 * XmlCharRefNumber() functions will all return a number 6920 * strictly less than 0x110000 or a negative value if an error 6921 * occurred. The negative value is intercepted above, so 6922 * XmlEncode() is never passed a value it might return an 6923 * error for. 6924 */ 6925 for (i = 0; i < n; i++) { 6926 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6927 result = XML_ERROR_NO_MEMORY; 6928 goto endEntityValue; 6929 } 6930 *(pool->ptr)++ = buf[i]; 6931 } 6932 } break; 6933 case XML_TOK_PARTIAL: 6934 if (enc == parser->m_encoding) 6935 parser->m_eventPtr = entityTextPtr; 6936 result = XML_ERROR_INVALID_TOKEN; 6937 goto endEntityValue; 6938 case XML_TOK_INVALID: 6939 if (enc == parser->m_encoding) 6940 parser->m_eventPtr = next; 6941 result = XML_ERROR_INVALID_TOKEN; 6942 goto endEntityValue; 6943 default: 6944 /* This default case should be unnecessary -- all the tokens 6945 * that XmlEntityValueTok() can return have their own explicit 6946 * cases -- but should be retained for safety. We do however 6947 * exclude it from the coverage statistics. 6948 * 6949 * LCOV_EXCL_START 6950 */ 6951 if (enc == parser->m_encoding) 6952 parser->m_eventPtr = entityTextPtr; 6953 result = XML_ERROR_UNEXPECTED_STATE; 6954 goto endEntityValue; 6955 /* LCOV_EXCL_STOP */ 6956 } 6957 entityTextPtr = next; 6958 } 6959 endEntityValue: 6960 # ifdef XML_DTD 6961 parser->m_prologState.inEntityValue = oldInEntityValue; 6962 # endif /* XML_DTD */ 6963 // If 'nextPtr' is given, it should be updated during the processing 6964 if (nextPtr != NULL) { 6965 *nextPtr = next; 6966 } 6967 return result; 6968 } 6969 6970 static enum XML_Error 6971 callStoreEntityValue(XML_Parser parser, const ENCODING *enc, 6972 const char *entityTextPtr, const char *entityTextEnd, 6973 enum XML_Account account) { 6974 const char *next = entityTextPtr; 6975 enum XML_Error result = XML_ERROR_NONE; 6976 while (1) { 6977 if (! parser->m_openValueEntities) { 6978 result 6979 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next); 6980 } else { 6981 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities; 6982 if (! openEntity) 6983 return XML_ERROR_UNEXPECTED_STATE; 6984 6985 ENTITY *const entity = openEntity->entity; 6986 const char *const textStart 6987 = ((const char *)entity->textPtr) + entity->processed; 6988 const char *const textEnd 6989 = (const char *)(entity->textPtr + entity->textLen); 6990 /* Set a safe default value in case 'next' does not get set */ 6991 const char *nextInEntity = textStart; 6992 if (entity->hasMore) { 6993 result = storeEntityValue(parser, parser->m_internalEncoding, textStart, 6994 textEnd, XML_ACCOUNT_ENTITY_EXPANSION, 6995 &nextInEntity); 6996 if (result != XML_ERROR_NONE) 6997 break; 6998 // Check if entity is complete, if not, mark down how much of it is 6999 // processed. A XML_SUSPENDED check here is not required as 7000 // appendAttributeValue will never suspend the parser. 7001 if (textEnd != nextInEntity) { 7002 entity->processed 7003 = (int)(nextInEntity - (const char *)entity->textPtr); 7004 continue; 7005 } 7006 7007 // Entity is complete. We cannot close it here since we need to first 7008 // process its possible inner entities (which are added to the 7009 // m_openValueEntities during storeEntityValue) 7010 entity->hasMore = XML_FALSE; 7011 continue; 7012 } // End of entity processing, "if" block skips the rest 7013 7014 // Remove fully processed openEntity from open entity list. 7015 # if XML_GE == 1 7016 entityTrackingOnClose(parser, entity, __LINE__); 7017 # endif 7018 // openEntity is m_openValueEntities' head, since we set it at the 7019 // start of this function and because we skipped storeEntityValue call 7020 // with hasMore set to false. This means we can directly remove the head 7021 // of m_openValueEntities 7022 assert(parser->m_openValueEntities == openEntity); 7023 entity->open = XML_FALSE; 7024 parser->m_openValueEntities = parser->m_openValueEntities->next; 7025 7026 /* put openEntity back in list of free instances */ 7027 openEntity->next = parser->m_freeValueEntities; 7028 parser->m_freeValueEntities = openEntity; 7029 } 7030 7031 // Break if an error occurred or there is nothing left to process 7032 if (result 7033 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) { 7034 break; 7035 } 7036 } 7037 7038 return result; 7039 } 7040 7041 #else /* XML_GE == 0 */ 7042 7043 static enum XML_Error 7044 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { 7045 // This will store "&entity123;" in entity->textPtr 7046 // to end up as "&entity123;" in the handler. 7047 const char *const entity_start = "&"; 7048 const char *const entity_end = ";"; 7049 7050 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool); 7051 if (! poolAppendString(pool, entity_start) 7052 || ! poolAppendString(pool, entity->name) 7053 || ! poolAppendString(pool, entity_end)) { 7054 poolDiscard(pool); 7055 return XML_ERROR_NO_MEMORY; 7056 } 7057 7058 entity->textPtr = poolStart(pool); 7059 entity->textLen = (int)(poolLength(pool)); 7060 poolFinish(pool); 7061 7062 return XML_ERROR_NONE; 7063 } 7064 7065 #endif /* XML_GE == 0 */ 7066 7067 static void FASTCALL 7068 normalizeLines(XML_Char *s) { 7069 XML_Char *p; 7070 for (;; s++) { 7071 if (*s == XML_T('\0')) 7072 return; 7073 if (*s == 0xD) 7074 break; 7075 } 7076 p = s; 7077 do { 7078 if (*s == 0xD) { 7079 *p++ = 0xA; 7080 if (*++s == 0xA) 7081 s++; 7082 } else 7083 *p++ = *s++; 7084 } while (*s); 7085 *p = XML_T('\0'); 7086 } 7087 7088 static int 7089 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 7090 const char *start, const char *end) { 7091 const XML_Char *target; 7092 XML_Char *data; 7093 const char *tem; 7094 if (! parser->m_processingInstructionHandler) { 7095 if (parser->m_defaultHandler) 7096 reportDefault(parser, enc, start, end); 7097 return 1; 7098 } 7099 start += enc->minBytesPerChar * 2; 7100 tem = start + XmlNameLength(enc, start); 7101 target = poolStoreString(&parser->m_tempPool, enc, start, tem); 7102 if (! target) 7103 return 0; 7104 poolFinish(&parser->m_tempPool); 7105 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), 7106 end - enc->minBytesPerChar * 2); 7107 if (! data) 7108 return 0; 7109 normalizeLines(data); 7110 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); 7111 poolClear(&parser->m_tempPool); 7112 return 1; 7113 } 7114 7115 static int 7116 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, 7117 const char *end) { 7118 XML_Char *data; 7119 if (! parser->m_commentHandler) { 7120 if (parser->m_defaultHandler) 7121 reportDefault(parser, enc, start, end); 7122 return 1; 7123 } 7124 data = poolStoreString(&parser->m_tempPool, enc, 7125 start + enc->minBytesPerChar * 4, 7126 end - enc->minBytesPerChar * 3); 7127 if (! data) 7128 return 0; 7129 normalizeLines(data); 7130 parser->m_commentHandler(parser->m_handlerArg, data); 7131 poolClear(&parser->m_tempPool); 7132 return 1; 7133 } 7134 7135 static void 7136 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, 7137 const char *end) { 7138 if (MUST_CONVERT(enc, s)) { 7139 enum XML_Convert_Result convert_res; 7140 const char **eventPP; 7141 const char **eventEndPP; 7142 if (enc == parser->m_encoding) { 7143 eventPP = &parser->m_eventPtr; 7144 eventEndPP = &parser->m_eventEndPtr; 7145 } else { 7146 /* To get here, two things must be true; the parser must be 7147 * using a character encoding that is not the same as the 7148 * encoding passed in, and the encoding passed in must need 7149 * conversion to the internal format (UTF-8 unless XML_UNICODE 7150 * is defined). The only occasions on which the encoding passed 7151 * in is not the same as the parser's encoding are when it is 7152 * the internal encoding (e.g. a previously defined parameter 7153 * entity, already converted to internal format). This by 7154 * definition doesn't need conversion, so the whole branch never 7155 * gets executed. 7156 * 7157 * For safety's sake we don't delete these lines and merely 7158 * exclude them from coverage statistics. 7159 * 7160 * LCOV_EXCL_START 7161 */ 7162 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 7163 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 7164 /* LCOV_EXCL_STOP */ 7165 } 7166 do { 7167 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 7168 convert_res 7169 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 7170 *eventEndPP = s; 7171 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, 7172 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 7173 *eventPP = s; 7174 } while ((convert_res != XML_CONVERT_COMPLETED) 7175 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); 7176 } else 7177 parser->m_defaultHandler( 7178 parser->m_handlerArg, (const XML_Char *)s, 7179 (int)((const XML_Char *)end - (const XML_Char *)s)); 7180 } 7181 7182 static int 7183 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, 7184 XML_Bool isId, const XML_Char *value, XML_Parser parser) { 7185 DEFAULT_ATTRIBUTE *att; 7186 if (value || isId) { 7187 /* The handling of default attributes gets messed up if we have 7188 a default which duplicates a non-default. */ 7189 int i; 7190 for (i = 0; i < type->nDefaultAtts; i++) 7191 if (attId == type->defaultAtts[i].id) 7192 return 1; 7193 if (isId && ! type->idAtt && ! attId->xmlns) 7194 type->idAtt = attId; 7195 } 7196 if (type->nDefaultAtts == type->allocDefaultAtts) { 7197 if (type->allocDefaultAtts == 0) { 7198 type->allocDefaultAtts = 8; 7199 type->defaultAtts 7200 = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 7201 if (! type->defaultAtts) { 7202 type->allocDefaultAtts = 0; 7203 return 0; 7204 } 7205 } else { 7206 DEFAULT_ATTRIBUTE *temp; 7207 7208 /* Detect and prevent integer overflow */ 7209 if (type->allocDefaultAtts > INT_MAX / 2) { 7210 return 0; 7211 } 7212 7213 int count = type->allocDefaultAtts * 2; 7214 7215 /* Detect and prevent integer overflow. 7216 * The preprocessor guard addresses the "always false" warning 7217 * from -Wtype-limits on platforms where 7218 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7219 #if UINT_MAX >= SIZE_MAX 7220 if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { 7221 return 0; 7222 } 7223 #endif 7224 7225 temp = REALLOC(parser, type->defaultAtts, 7226 (count * sizeof(DEFAULT_ATTRIBUTE))); 7227 if (temp == NULL) 7228 return 0; 7229 type->allocDefaultAtts = count; 7230 type->defaultAtts = temp; 7231 } 7232 } 7233 att = type->defaultAtts + type->nDefaultAtts; 7234 att->id = attId; 7235 att->value = value; 7236 att->isCdata = isCdata; 7237 if (! isCdata) 7238 attId->maybeTokenized = XML_TRUE; 7239 type->nDefaultAtts += 1; 7240 return 1; 7241 } 7242 7243 static int 7244 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { 7245 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7246 const XML_Char *name; 7247 for (name = elementType->name; *name; name++) { 7248 if (*name == XML_T(ASCII_COLON)) { 7249 PREFIX *prefix; 7250 const XML_Char *s; 7251 for (s = elementType->name; s != name; s++) { 7252 if (! poolAppendChar(&dtd->pool, *s)) 7253 return 0; 7254 } 7255 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 7256 return 0; 7257 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), 7258 sizeof(PREFIX)); 7259 if (! prefix) 7260 return 0; 7261 if (prefix->name == poolStart(&dtd->pool)) 7262 poolFinish(&dtd->pool); 7263 else 7264 poolDiscard(&dtd->pool); 7265 elementType->prefix = prefix; 7266 break; 7267 } 7268 } 7269 return 1; 7270 } 7271 7272 static ATTRIBUTE_ID * 7273 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, 7274 const char *end) { 7275 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7276 ATTRIBUTE_ID *id; 7277 const XML_Char *name; 7278 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 7279 return NULL; 7280 name = poolStoreString(&dtd->pool, enc, start, end); 7281 if (! name) 7282 return NULL; 7283 /* skip quotation mark - its storage will be reused (like in name[-1]) */ 7284 ++name; 7285 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, 7286 sizeof(ATTRIBUTE_ID)); 7287 if (! id) 7288 return NULL; 7289 if (id->name != name) 7290 poolDiscard(&dtd->pool); 7291 else { 7292 poolFinish(&dtd->pool); 7293 if (! parser->m_ns) 7294 ; 7295 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) 7296 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) 7297 && name[4] == XML_T(ASCII_s) 7298 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { 7299 if (name[5] == XML_T('\0')) 7300 id->prefix = &dtd->defaultPrefix; 7301 else 7302 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, 7303 sizeof(PREFIX)); 7304 id->xmlns = XML_TRUE; 7305 } else { 7306 int i; 7307 for (i = 0; name[i]; i++) { 7308 /* attributes without prefix are *not* in the default namespace */ 7309 if (name[i] == XML_T(ASCII_COLON)) { 7310 int j; 7311 for (j = 0; j < i; j++) { 7312 if (! poolAppendChar(&dtd->pool, name[j])) 7313 return NULL; 7314 } 7315 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 7316 return NULL; 7317 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, 7318 poolStart(&dtd->pool), sizeof(PREFIX)); 7319 if (! id->prefix) 7320 return NULL; 7321 if (id->prefix->name == poolStart(&dtd->pool)) 7322 poolFinish(&dtd->pool); 7323 else 7324 poolDiscard(&dtd->pool); 7325 break; 7326 } 7327 } 7328 } 7329 } 7330 return id; 7331 } 7332 7333 #define CONTEXT_SEP XML_T(ASCII_FF) 7334 7335 static const XML_Char * 7336 getContext(XML_Parser parser) { 7337 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7338 HASH_TABLE_ITER iter; 7339 XML_Bool needSep = XML_FALSE; 7340 7341 if (dtd->defaultPrefix.binding) { 7342 int i; 7343 int len; 7344 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 7345 return NULL; 7346 len = dtd->defaultPrefix.binding->uriLen; 7347 if (parser->m_namespaceSeparator) 7348 len--; 7349 for (i = 0; i < len; i++) { 7350 if (! poolAppendChar(&parser->m_tempPool, 7351 dtd->defaultPrefix.binding->uri[i])) { 7352 /* Because of memory caching, I don't believe this line can be 7353 * executed. 7354 * 7355 * This is part of a loop copying the default prefix binding 7356 * URI into the parser's temporary string pool. Previously, 7357 * that URI was copied into the same string pool, with a 7358 * terminating NUL character, as part of setContext(). When 7359 * the pool was cleared, that leaves a block definitely big 7360 * enough to hold the URI on the free block list of the pool. 7361 * The URI copy in getContext() therefore cannot run out of 7362 * memory. 7363 * 7364 * If the pool is used between the setContext() and 7365 * getContext() calls, the worst it can do is leave a bigger 7366 * block on the front of the free list. Given that this is 7367 * all somewhat inobvious and program logic can be changed, we 7368 * don't delete the line but we do exclude it from the test 7369 * coverage statistics. 7370 */ 7371 return NULL; /* LCOV_EXCL_LINE */ 7372 } 7373 } 7374 needSep = XML_TRUE; 7375 } 7376 7377 hashTableIterInit(&iter, &(dtd->prefixes)); 7378 for (;;) { 7379 int i; 7380 int len; 7381 const XML_Char *s; 7382 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); 7383 if (! prefix) 7384 break; 7385 if (! prefix->binding) { 7386 /* This test appears to be (justifiable) paranoia. There does 7387 * not seem to be a way of injecting a prefix without a binding 7388 * that doesn't get errored long before this function is called. 7389 * The test should remain for safety's sake, so we instead 7390 * exclude the following line from the coverage statistics. 7391 */ 7392 continue; /* LCOV_EXCL_LINE */ 7393 } 7394 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 7395 return NULL; 7396 for (s = prefix->name; *s; s++) 7397 if (! poolAppendChar(&parser->m_tempPool, *s)) 7398 return NULL; 7399 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 7400 return NULL; 7401 len = prefix->binding->uriLen; 7402 if (parser->m_namespaceSeparator) 7403 len--; 7404 for (i = 0; i < len; i++) 7405 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) 7406 return NULL; 7407 needSep = XML_TRUE; 7408 } 7409 7410 hashTableIterInit(&iter, &(dtd->generalEntities)); 7411 for (;;) { 7412 const XML_Char *s; 7413 ENTITY *e = (ENTITY *)hashTableIterNext(&iter); 7414 if (! e) 7415 break; 7416 if (! e->open) 7417 continue; 7418 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 7419 return NULL; 7420 for (s = e->name; *s; s++) 7421 if (! poolAppendChar(&parser->m_tempPool, *s)) 7422 return 0; 7423 needSep = XML_TRUE; 7424 } 7425 7426 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7427 return NULL; 7428 return parser->m_tempPool.start; 7429 } 7430 7431 static XML_Bool 7432 setContext(XML_Parser parser, const XML_Char *context) { 7433 if (context == NULL) { 7434 return XML_FALSE; 7435 } 7436 7437 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7438 const XML_Char *s = context; 7439 7440 while (*context != XML_T('\0')) { 7441 if (*s == CONTEXT_SEP || *s == XML_T('\0')) { 7442 ENTITY *e; 7443 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7444 return XML_FALSE; 7445 e = (ENTITY *)lookup(parser, &dtd->generalEntities, 7446 poolStart(&parser->m_tempPool), 0); 7447 if (e) 7448 e->open = XML_TRUE; 7449 if (*s != XML_T('\0')) 7450 s++; 7451 context = s; 7452 poolDiscard(&parser->m_tempPool); 7453 } else if (*s == XML_T(ASCII_EQUALS)) { 7454 PREFIX *prefix; 7455 if (poolLength(&parser->m_tempPool) == 0) 7456 prefix = &dtd->defaultPrefix; 7457 else { 7458 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7459 return XML_FALSE; 7460 const XML_Char *const prefixName = poolCopyStringNoFinish( 7461 &dtd->pool, poolStart(&parser->m_tempPool)); 7462 if (! prefixName) { 7463 return XML_FALSE; 7464 } 7465 7466 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, prefixName, 7467 sizeof(PREFIX)); 7468 7469 const bool prefixNameUsed = prefix && prefix->name == prefixName; 7470 if (prefixNameUsed) 7471 poolFinish(&dtd->pool); 7472 else 7473 poolDiscard(&dtd->pool); 7474 7475 if (! prefix) 7476 return XML_FALSE; 7477 7478 poolDiscard(&parser->m_tempPool); 7479 } 7480 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); 7481 context++) 7482 if (! poolAppendChar(&parser->m_tempPool, *context)) 7483 return XML_FALSE; 7484 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 7485 return XML_FALSE; 7486 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), 7487 &parser->m_inheritedBindings) 7488 != XML_ERROR_NONE) 7489 return XML_FALSE; 7490 poolDiscard(&parser->m_tempPool); 7491 if (*context != XML_T('\0')) 7492 ++context; 7493 s = context; 7494 } else { 7495 if (! poolAppendChar(&parser->m_tempPool, *s)) 7496 return XML_FALSE; 7497 s++; 7498 } 7499 } 7500 return XML_TRUE; 7501 } 7502 7503 static void FASTCALL 7504 normalizePublicId(XML_Char *publicId) { 7505 XML_Char *p = publicId; 7506 XML_Char *s; 7507 for (s = publicId; *s; s++) { 7508 switch (*s) { 7509 case 0x20: 7510 case 0xD: 7511 case 0xA: 7512 if (p != publicId && p[-1] != 0x20) 7513 *p++ = 0x20; 7514 break; 7515 default: 7516 *p++ = *s; 7517 } 7518 } 7519 if (p != publicId && p[-1] == 0x20) 7520 --p; 7521 *p = XML_T('\0'); 7522 } 7523 7524 static DTD * 7525 dtdCreate(XML_Parser parser) { 7526 DTD *p = MALLOC(parser, sizeof(DTD)); 7527 if (p == NULL) 7528 return p; 7529 poolInit(&(p->pool), parser); 7530 poolInit(&(p->entityValuePool), parser); 7531 hashTableInit(&(p->generalEntities), parser); 7532 hashTableInit(&(p->elementTypes), parser); 7533 hashTableInit(&(p->attributeIds), parser); 7534 hashTableInit(&(p->prefixes), parser); 7535 #ifdef XML_DTD 7536 p->paramEntityRead = XML_FALSE; 7537 hashTableInit(&(p->paramEntities), parser); 7538 #endif /* XML_DTD */ 7539 p->defaultPrefix.name = NULL; 7540 p->defaultPrefix.binding = NULL; 7541 7542 p->in_eldecl = XML_FALSE; 7543 p->scaffIndex = NULL; 7544 p->scaffold = NULL; 7545 p->scaffLevel = 0; 7546 p->scaffSize = 0; 7547 p->scaffCount = 0; 7548 p->contentStringLen = 0; 7549 7550 p->keepProcessing = XML_TRUE; 7551 p->hasParamEntityRefs = XML_FALSE; 7552 p->standalone = XML_FALSE; 7553 return p; 7554 } 7555 7556 static void 7557 dtdReset(DTD *p, XML_Parser parser) { 7558 HASH_TABLE_ITER iter; 7559 hashTableIterInit(&iter, &(p->elementTypes)); 7560 for (;;) { 7561 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7562 if (! e) 7563 break; 7564 if (e->allocDefaultAtts != 0) 7565 FREE(parser, e->defaultAtts); 7566 } 7567 hashTableClear(&(p->generalEntities)); 7568 #ifdef XML_DTD 7569 p->paramEntityRead = XML_FALSE; 7570 hashTableClear(&(p->paramEntities)); 7571 #endif /* XML_DTD */ 7572 hashTableClear(&(p->elementTypes)); 7573 hashTableClear(&(p->attributeIds)); 7574 hashTableClear(&(p->prefixes)); 7575 poolClear(&(p->pool)); 7576 poolClear(&(p->entityValuePool)); 7577 p->defaultPrefix.name = NULL; 7578 p->defaultPrefix.binding = NULL; 7579 7580 p->in_eldecl = XML_FALSE; 7581 7582 FREE(parser, p->scaffIndex); 7583 p->scaffIndex = NULL; 7584 FREE(parser, p->scaffold); 7585 p->scaffold = NULL; 7586 7587 p->scaffLevel = 0; 7588 p->scaffSize = 0; 7589 p->scaffCount = 0; 7590 p->contentStringLen = 0; 7591 7592 p->keepProcessing = XML_TRUE; 7593 p->hasParamEntityRefs = XML_FALSE; 7594 p->standalone = XML_FALSE; 7595 } 7596 7597 static void 7598 dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) { 7599 HASH_TABLE_ITER iter; 7600 hashTableIterInit(&iter, &(p->elementTypes)); 7601 for (;;) { 7602 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7603 if (! e) 7604 break; 7605 if (e->allocDefaultAtts != 0) 7606 FREE(parser, e->defaultAtts); 7607 } 7608 hashTableDestroy(&(p->generalEntities)); 7609 #ifdef XML_DTD 7610 hashTableDestroy(&(p->paramEntities)); 7611 #endif /* XML_DTD */ 7612 hashTableDestroy(&(p->elementTypes)); 7613 hashTableDestroy(&(p->attributeIds)); 7614 hashTableDestroy(&(p->prefixes)); 7615 poolDestroy(&(p->pool)); 7616 poolDestroy(&(p->entityValuePool)); 7617 if (isDocEntity) { 7618 FREE(parser, p->scaffIndex); 7619 FREE(parser, p->scaffold); 7620 } 7621 FREE(parser, p); 7622 } 7623 7624 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. 7625 The new DTD has already been initialized. 7626 */ 7627 static int 7628 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 7629 XML_Parser parser) { 7630 HASH_TABLE_ITER iter; 7631 7632 /* Copy the prefix table. */ 7633 7634 hashTableIterInit(&iter, &(oldDtd->prefixes)); 7635 for (;;) { 7636 const XML_Char *name; 7637 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); 7638 if (! oldP) 7639 break; 7640 name = poolCopyString(&(newDtd->pool), oldP->name); 7641 if (! name) 7642 return 0; 7643 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) 7644 return 0; 7645 } 7646 7647 hashTableIterInit(&iter, &(oldDtd->attributeIds)); 7648 7649 /* Copy the attribute id table. */ 7650 7651 for (;;) { 7652 ATTRIBUTE_ID *newA; 7653 const XML_Char *name; 7654 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); 7655 7656 if (! oldA) 7657 break; 7658 /* Remember to allocate the scratch byte before the name. */ 7659 if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) 7660 return 0; 7661 name = poolCopyString(&(newDtd->pool), oldA->name); 7662 if (! name) 7663 return 0; 7664 ++name; 7665 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, 7666 sizeof(ATTRIBUTE_ID)); 7667 if (! newA) 7668 return 0; 7669 newA->maybeTokenized = oldA->maybeTokenized; 7670 if (oldA->prefix) { 7671 newA->xmlns = oldA->xmlns; 7672 if (oldA->prefix == &oldDtd->defaultPrefix) 7673 newA->prefix = &newDtd->defaultPrefix; 7674 else 7675 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7676 oldA->prefix->name, 0); 7677 } 7678 } 7679 7680 /* Copy the element type table. */ 7681 7682 hashTableIterInit(&iter, &(oldDtd->elementTypes)); 7683 7684 for (;;) { 7685 int i; 7686 ELEMENT_TYPE *newE; 7687 const XML_Char *name; 7688 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7689 if (! oldE) 7690 break; 7691 name = poolCopyString(&(newDtd->pool), oldE->name); 7692 if (! name) 7693 return 0; 7694 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, 7695 sizeof(ELEMENT_TYPE)); 7696 if (! newE) 7697 return 0; 7698 if (oldE->nDefaultAtts) { 7699 /* Detect and prevent integer overflow. 7700 * The preprocessor guard addresses the "always false" warning 7701 * from -Wtype-limits on platforms where 7702 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ 7703 #if UINT_MAX >= SIZE_MAX 7704 if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { 7705 return 0; 7706 } 7707 #endif 7708 newE->defaultAtts 7709 = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 7710 if (! newE->defaultAtts) { 7711 return 0; 7712 } 7713 } 7714 if (oldE->idAtt) 7715 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), 7716 oldE->idAtt->name, 0); 7717 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; 7718 if (oldE->prefix) 7719 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7720 oldE->prefix->name, 0); 7721 for (i = 0; i < newE->nDefaultAtts; i++) { 7722 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( 7723 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); 7724 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; 7725 if (oldE->defaultAtts[i].value) { 7726 newE->defaultAtts[i].value 7727 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); 7728 if (! newE->defaultAtts[i].value) 7729 return 0; 7730 } else 7731 newE->defaultAtts[i].value = NULL; 7732 } 7733 } 7734 7735 /* Copy the entity tables. */ 7736 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), 7737 &(oldDtd->generalEntities))) 7738 return 0; 7739 7740 #ifdef XML_DTD 7741 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), 7742 &(oldDtd->paramEntities))) 7743 return 0; 7744 newDtd->paramEntityRead = oldDtd->paramEntityRead; 7745 #endif /* XML_DTD */ 7746 7747 newDtd->keepProcessing = oldDtd->keepProcessing; 7748 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; 7749 newDtd->standalone = oldDtd->standalone; 7750 7751 /* Don't want deep copying for scaffolding */ 7752 newDtd->in_eldecl = oldDtd->in_eldecl; 7753 newDtd->scaffold = oldDtd->scaffold; 7754 newDtd->contentStringLen = oldDtd->contentStringLen; 7755 newDtd->scaffSize = oldDtd->scaffSize; 7756 newDtd->scaffLevel = oldDtd->scaffLevel; 7757 newDtd->scaffIndex = oldDtd->scaffIndex; 7758 7759 return 1; 7760 } /* End dtdCopy */ 7761 7762 static int 7763 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 7764 STRING_POOL *newPool, const HASH_TABLE *oldTable) { 7765 HASH_TABLE_ITER iter; 7766 const XML_Char *cachedOldBase = NULL; 7767 const XML_Char *cachedNewBase = NULL; 7768 7769 hashTableIterInit(&iter, oldTable); 7770 7771 for (;;) { 7772 ENTITY *newE; 7773 const XML_Char *name; 7774 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); 7775 if (! oldE) 7776 break; 7777 name = poolCopyString(newPool, oldE->name); 7778 if (! name) 7779 return 0; 7780 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); 7781 if (! newE) 7782 return 0; 7783 if (oldE->systemId) { 7784 const XML_Char *tem = poolCopyString(newPool, oldE->systemId); 7785 if (! tem) 7786 return 0; 7787 newE->systemId = tem; 7788 if (oldE->base) { 7789 if (oldE->base == cachedOldBase) 7790 newE->base = cachedNewBase; 7791 else { 7792 cachedOldBase = oldE->base; 7793 tem = poolCopyString(newPool, cachedOldBase); 7794 if (! tem) 7795 return 0; 7796 cachedNewBase = newE->base = tem; 7797 } 7798 } 7799 if (oldE->publicId) { 7800 tem = poolCopyString(newPool, oldE->publicId); 7801 if (! tem) 7802 return 0; 7803 newE->publicId = tem; 7804 } 7805 } else { 7806 const XML_Char *tem 7807 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); 7808 if (! tem) 7809 return 0; 7810 newE->textPtr = tem; 7811 newE->textLen = oldE->textLen; 7812 } 7813 if (oldE->notation) { 7814 const XML_Char *tem = poolCopyString(newPool, oldE->notation); 7815 if (! tem) 7816 return 0; 7817 newE->notation = tem; 7818 } 7819 newE->is_param = oldE->is_param; 7820 newE->is_internal = oldE->is_internal; 7821 } 7822 return 1; 7823 } 7824 7825 #define INIT_POWER 6 7826 7827 static XML_Bool FASTCALL 7828 keyeq(KEY s1, KEY s2) { 7829 for (; *s1 == *s2; s1++, s2++) 7830 if (*s1 == 0) 7831 return XML_TRUE; 7832 return XML_FALSE; 7833 } 7834 7835 static size_t 7836 keylen(KEY s) { 7837 size_t len = 0; 7838 for (; *s; s++, len++) 7839 ; 7840 return len; 7841 } 7842 7843 static void 7844 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { 7845 key->k[0] = 0; 7846 key->k[1] = get_hash_secret_salt(parser); 7847 } 7848 7849 static unsigned long FASTCALL 7850 hash(XML_Parser parser, KEY s) { 7851 struct siphash state; 7852 struct sipkey key; 7853 (void)sip24_valid; 7854 copy_salt_to_sipkey(parser, &key); 7855 sip24_init(&state, &key); 7856 sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); 7857 return (unsigned long)sip24_final(&state); 7858 } 7859 7860 static NAMED * 7861 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { 7862 size_t i; 7863 if (table->size == 0) { 7864 size_t tsize; 7865 if (! createSize) 7866 return NULL; 7867 table->power = INIT_POWER; 7868 /* table->size is a power of 2 */ 7869 table->size = (size_t)1 << INIT_POWER; 7870 tsize = table->size * sizeof(NAMED *); 7871 table->v = MALLOC(table->parser, tsize); 7872 if (! table->v) { 7873 table->size = 0; 7874 return NULL; 7875 } 7876 memset(table->v, 0, tsize); 7877 i = hash(parser, name) & ((unsigned long)table->size - 1); 7878 } else { 7879 unsigned long h = hash(parser, name); 7880 unsigned long mask = (unsigned long)table->size - 1; 7881 unsigned char step = 0; 7882 i = h & mask; 7883 while (table->v[i]) { 7884 if (keyeq(name, table->v[i]->name)) 7885 return table->v[i]; 7886 if (! step) 7887 step = PROBE_STEP(h, mask, table->power); 7888 i < step ? (i += table->size - step) : (i -= step); 7889 } 7890 if (! createSize) 7891 return NULL; 7892 7893 /* check for overflow (table is half full) */ 7894 if (table->used >> (table->power - 1)) { 7895 unsigned char newPower = table->power + 1; 7896 7897 /* Detect and prevent invalid shift */ 7898 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { 7899 return NULL; 7900 } 7901 7902 size_t newSize = (size_t)1 << newPower; 7903 unsigned long newMask = (unsigned long)newSize - 1; 7904 7905 /* Detect and prevent integer overflow */ 7906 if (newSize > SIZE_MAX / sizeof(NAMED *)) { 7907 return NULL; 7908 } 7909 7910 size_t tsize = newSize * sizeof(NAMED *); 7911 NAMED **newV = MALLOC(table->parser, tsize); 7912 if (! newV) 7913 return NULL; 7914 memset(newV, 0, tsize); 7915 for (i = 0; i < table->size; i++) 7916 if (table->v[i]) { 7917 unsigned long newHash = hash(parser, table->v[i]->name); 7918 size_t j = newHash & newMask; 7919 step = 0; 7920 while (newV[j]) { 7921 if (! step) 7922 step = PROBE_STEP(newHash, newMask, newPower); 7923 j < step ? (j += newSize - step) : (j -= step); 7924 } 7925 newV[j] = table->v[i]; 7926 } 7927 FREE(table->parser, table->v); 7928 table->v = newV; 7929 table->power = newPower; 7930 table->size = newSize; 7931 i = h & newMask; 7932 step = 0; 7933 while (table->v[i]) { 7934 if (! step) 7935 step = PROBE_STEP(h, newMask, newPower); 7936 i < step ? (i += newSize - step) : (i -= step); 7937 } 7938 } 7939 } 7940 table->v[i] = MALLOC(table->parser, createSize); 7941 if (! table->v[i]) 7942 return NULL; 7943 memset(table->v[i], 0, createSize); 7944 table->v[i]->name = name; 7945 (table->used)++; 7946 return table->v[i]; 7947 } 7948 7949 static void FASTCALL 7950 hashTableClear(HASH_TABLE *table) { 7951 size_t i; 7952 for (i = 0; i < table->size; i++) { 7953 FREE(table->parser, table->v[i]); 7954 table->v[i] = NULL; 7955 } 7956 table->used = 0; 7957 } 7958 7959 static void FASTCALL 7960 hashTableDestroy(HASH_TABLE *table) { 7961 size_t i; 7962 for (i = 0; i < table->size; i++) 7963 FREE(table->parser, table->v[i]); 7964 FREE(table->parser, table->v); 7965 } 7966 7967 static void FASTCALL 7968 hashTableInit(HASH_TABLE *p, XML_Parser parser) { 7969 p->power = 0; 7970 p->size = 0; 7971 p->used = 0; 7972 p->v = NULL; 7973 p->parser = parser; 7974 } 7975 7976 static void FASTCALL 7977 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { 7978 iter->p = table->v; 7979 iter->end = iter->p ? iter->p + table->size : NULL; 7980 } 7981 7982 static NAMED *FASTCALL 7983 hashTableIterNext(HASH_TABLE_ITER *iter) { 7984 while (iter->p != iter->end) { 7985 NAMED *tem = *(iter->p)++; 7986 if (tem) 7987 return tem; 7988 } 7989 return NULL; 7990 } 7991 7992 static void FASTCALL 7993 poolInit(STRING_POOL *pool, XML_Parser parser) { 7994 pool->blocks = NULL; 7995 pool->freeBlocks = NULL; 7996 pool->start = NULL; 7997 pool->ptr = NULL; 7998 pool->end = NULL; 7999 pool->parser = parser; 8000 } 8001 8002 static void FASTCALL 8003 poolClear(STRING_POOL *pool) { 8004 if (! pool->freeBlocks) 8005 pool->freeBlocks = pool->blocks; 8006 else { 8007 BLOCK *p = pool->blocks; 8008 while (p) { 8009 BLOCK *tem = p->next; 8010 p->next = pool->freeBlocks; 8011 pool->freeBlocks = p; 8012 p = tem; 8013 } 8014 } 8015 pool->blocks = NULL; 8016 pool->start = NULL; 8017 pool->ptr = NULL; 8018 pool->end = NULL; 8019 } 8020 8021 static void FASTCALL 8022 poolDestroy(STRING_POOL *pool) { 8023 BLOCK *p = pool->blocks; 8024 while (p) { 8025 BLOCK *tem = p->next; 8026 FREE(pool->parser, p); 8027 p = tem; 8028 } 8029 p = pool->freeBlocks; 8030 while (p) { 8031 BLOCK *tem = p->next; 8032 FREE(pool->parser, p); 8033 p = tem; 8034 } 8035 } 8036 8037 static XML_Char * 8038 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 8039 const char *end) { 8040 if (! pool->ptr && ! poolGrow(pool)) 8041 return NULL; 8042 for (;;) { 8043 const enum XML_Convert_Result convert_res = XmlConvert( 8044 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end); 8045 if ((convert_res == XML_CONVERT_COMPLETED) 8046 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 8047 break; 8048 if (! poolGrow(pool)) 8049 return NULL; 8050 } 8051 return pool->start; 8052 } 8053 8054 static const XML_Char *FASTCALL 8055 poolCopyString(STRING_POOL *pool, const XML_Char *s) { 8056 do { 8057 if (! poolAppendChar(pool, *s)) 8058 return NULL; 8059 } while (*s++); 8060 s = pool->start; 8061 poolFinish(pool); 8062 return s; 8063 } 8064 8065 // A version of `poolCopyString` that does not call `poolFinish` 8066 // and reverts any partial advancement upon failure. 8067 static const XML_Char *FASTCALL 8068 poolCopyStringNoFinish(STRING_POOL *pool, const XML_Char *s) { 8069 const XML_Char *const original = s; 8070 do { 8071 if (! poolAppendChar(pool, *s)) { 8072 // Revert any previously successful advancement 8073 const ptrdiff_t advancedBy = s - original; 8074 if (advancedBy > 0) 8075 pool->ptr -= advancedBy; 8076 return NULL; 8077 } 8078 } while (*s++); 8079 return pool->start; 8080 } 8081 8082 static const XML_Char * 8083 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { 8084 if (! pool->ptr && ! poolGrow(pool)) { 8085 /* The following line is unreachable given the current usage of 8086 * poolCopyStringN(). Currently it is called from exactly one 8087 * place to copy the text of a simple general entity. By that 8088 * point, the name of the entity is already stored in the pool, so 8089 * pool->ptr cannot be NULL. 8090 * 8091 * If poolCopyStringN() is used elsewhere as it well might be, 8092 * this line may well become executable again. Regardless, this 8093 * sort of check shouldn't be removed lightly, so we just exclude 8094 * it from the coverage statistics. 8095 */ 8096 return NULL; /* LCOV_EXCL_LINE */ 8097 } 8098 for (; n > 0; --n, s++) { 8099 if (! poolAppendChar(pool, *s)) 8100 return NULL; 8101 } 8102 s = pool->start; 8103 poolFinish(pool); 8104 return s; 8105 } 8106 8107 static const XML_Char *FASTCALL 8108 poolAppendString(STRING_POOL *pool, const XML_Char *s) { 8109 while (*s) { 8110 if (! poolAppendChar(pool, *s)) 8111 return NULL; 8112 s++; 8113 } 8114 return pool->start; 8115 } 8116 8117 static XML_Char * 8118 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 8119 const char *end) { 8120 if (! poolAppend(pool, enc, ptr, end)) 8121 return NULL; 8122 if (pool->ptr == pool->end && ! poolGrow(pool)) 8123 return NULL; 8124 *(pool->ptr)++ = 0; 8125 return pool->start; 8126 } 8127 8128 static size_t 8129 poolBytesToAllocateFor(int blockSize) { 8130 /* Unprotected math would be: 8131 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); 8132 ** 8133 ** Detect overflow, avoiding _signed_ overflow undefined behavior 8134 ** For a + b * c we check b * c in isolation first, so that addition of a 8135 ** on top has no chance of making us accept a small non-negative number 8136 */ 8137 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ 8138 8139 if (blockSize <= 0) 8140 return 0; 8141 8142 if (blockSize > (int)(INT_MAX / stretch)) 8143 return 0; 8144 8145 { 8146 const int stretchedBlockSize = blockSize * (int)stretch; 8147 const int bytesToAllocate 8148 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); 8149 if (bytesToAllocate < 0) 8150 return 0; 8151 8152 return (size_t)bytesToAllocate; 8153 } 8154 } 8155 8156 static XML_Bool FASTCALL 8157 poolGrow(STRING_POOL *pool) { 8158 if (pool->freeBlocks) { 8159 if (pool->start == NULL) { 8160 pool->blocks = pool->freeBlocks; 8161 pool->freeBlocks = pool->freeBlocks->next; 8162 pool->blocks->next = NULL; 8163 pool->start = pool->blocks->s; 8164 pool->end = pool->start + pool->blocks->size; 8165 pool->ptr = pool->start; 8166 return XML_TRUE; 8167 } 8168 if (pool->end - pool->start < pool->freeBlocks->size) { 8169 BLOCK *tem = pool->freeBlocks->next; 8170 pool->freeBlocks->next = pool->blocks; 8171 pool->blocks = pool->freeBlocks; 8172 pool->freeBlocks = tem; 8173 memcpy(pool->blocks->s, pool->start, 8174 (pool->end - pool->start) * sizeof(XML_Char)); 8175 pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 8176 pool->start = pool->blocks->s; 8177 pool->end = pool->start + pool->blocks->size; 8178 return XML_TRUE; 8179 } 8180 } 8181 if (pool->blocks && pool->start == pool->blocks->s) { 8182 BLOCK *temp; 8183 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); 8184 size_t bytesToAllocate; 8185 8186 /* NOTE: Needs to be calculated prior to calling `realloc` 8187 to avoid dangling pointers: */ 8188 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; 8189 8190 if (blockSize < 0) { 8191 /* This condition traps a situation where either more than 8192 * INT_MAX/2 bytes have already been allocated. This isn't 8193 * readily testable, since it is unlikely that an average 8194 * machine will have that much memory, so we exclude it from the 8195 * coverage statistics. 8196 */ 8197 return XML_FALSE; /* LCOV_EXCL_LINE */ 8198 } 8199 8200 bytesToAllocate = poolBytesToAllocateFor(blockSize); 8201 if (bytesToAllocate == 0) 8202 return XML_FALSE; 8203 8204 temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate); 8205 if (temp == NULL) 8206 return XML_FALSE; 8207 pool->blocks = temp; 8208 pool->blocks->size = blockSize; 8209 pool->ptr = pool->blocks->s + offsetInsideBlock; 8210 pool->start = pool->blocks->s; 8211 pool->end = pool->start + blockSize; 8212 } else { 8213 BLOCK *tem; 8214 int blockSize = (int)(pool->end - pool->start); 8215 size_t bytesToAllocate; 8216 8217 if (blockSize < 0) { 8218 /* This condition traps a situation where either more than 8219 * INT_MAX bytes have already been allocated (which is prevented 8220 * by various pieces of program logic, not least this one, never 8221 * mind the unlikelihood of actually having that much memory) or 8222 * the pool control fields have been corrupted (which could 8223 * conceivably happen in an extremely buggy user handler 8224 * function). Either way it isn't readily testable, so we 8225 * exclude it from the coverage statistics. 8226 */ 8227 return XML_FALSE; /* LCOV_EXCL_LINE */ 8228 } 8229 8230 if (blockSize < INIT_BLOCK_SIZE) 8231 blockSize = INIT_BLOCK_SIZE; 8232 else { 8233 /* Detect overflow, avoiding _signed_ overflow undefined behavior */ 8234 if ((int)((unsigned)blockSize * 2U) < 0) { 8235 return XML_FALSE; 8236 } 8237 blockSize *= 2; 8238 } 8239 8240 bytesToAllocate = poolBytesToAllocateFor(blockSize); 8241 if (bytesToAllocate == 0) 8242 return XML_FALSE; 8243 8244 tem = MALLOC(pool->parser, bytesToAllocate); 8245 if (! tem) 8246 return XML_FALSE; 8247 tem->size = blockSize; 8248 tem->next = pool->blocks; 8249 pool->blocks = tem; 8250 if (pool->ptr != pool->start) 8251 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); 8252 pool->ptr = tem->s + (pool->ptr - pool->start); 8253 pool->start = tem->s; 8254 pool->end = tem->s + blockSize; 8255 } 8256 return XML_TRUE; 8257 } 8258 8259 static int FASTCALL 8260 nextScaffoldPart(XML_Parser parser) { 8261 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 8262 CONTENT_SCAFFOLD *me; 8263 int next; 8264 8265 if (! dtd->scaffIndex) { 8266 /* Detect and prevent integer overflow. 8267 * The preprocessor guard addresses the "always false" warning 8268 * from -Wtype-limits on platforms where 8269 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 8270 #if UINT_MAX >= SIZE_MAX 8271 if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { 8272 return -1; 8273 } 8274 #endif 8275 dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int)); 8276 if (! dtd->scaffIndex) 8277 return -1; 8278 dtd->scaffIndex[0] = 0; 8279 } 8280 8281 // Will casting to int be safe further down? 8282 if (dtd->scaffCount > INT_MAX) { 8283 return -1; 8284 } 8285 8286 if (dtd->scaffCount >= dtd->scaffSize) { 8287 CONTENT_SCAFFOLD *temp; 8288 if (dtd->scaffold) { 8289 /* Detect and prevent integer overflow */ 8290 if (dtd->scaffSize > UINT_MAX / 2u) { 8291 return -1; 8292 } 8293 /* Detect and prevent integer overflow. 8294 * The preprocessor guard addresses the "always false" warning 8295 * from -Wtype-limits on platforms where 8296 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 8297 #if UINT_MAX >= SIZE_MAX 8298 if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) { 8299 return -1; 8300 } 8301 #endif 8302 8303 temp = REALLOC(parser, dtd->scaffold, 8304 dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); 8305 if (temp == NULL) 8306 return -1; 8307 dtd->scaffSize *= 2; 8308 } else { 8309 temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD)); 8310 if (temp == NULL) 8311 return -1; 8312 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; 8313 } 8314 dtd->scaffold = temp; 8315 } 8316 next = (int)dtd->scaffCount++; 8317 me = &dtd->scaffold[next]; 8318 if (dtd->scaffLevel) { 8319 CONTENT_SCAFFOLD *parent 8320 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; 8321 if (parent->lastchild) { 8322 dtd->scaffold[parent->lastchild].nextsib = next; 8323 } 8324 if (! parent->childcnt) 8325 parent->firstchild = next; 8326 parent->lastchild = next; 8327 parent->childcnt++; 8328 } 8329 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; 8330 return next; 8331 } 8332 8333 static XML_Content * 8334 build_model(XML_Parser parser) { 8335 /* Function build_model transforms the existing parser->m_dtd->scaffold 8336 * array of CONTENT_SCAFFOLD tree nodes into a new array of 8337 * XML_Content tree nodes followed by a gapless list of zero-terminated 8338 * strings. */ 8339 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 8340 XML_Content *ret; 8341 XML_Char *str; /* the current string writing location */ 8342 8343 /* Detect and prevent integer overflow. 8344 * The preprocessor guard addresses the "always false" warning 8345 * from -Wtype-limits on platforms where 8346 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 8347 #if UINT_MAX >= SIZE_MAX 8348 if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) { 8349 return NULL; 8350 } 8351 if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) { 8352 return NULL; 8353 } 8354 #endif 8355 if (dtd->scaffCount * sizeof(XML_Content) 8356 > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) { 8357 return NULL; 8358 } 8359 8360 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) 8361 + (dtd->contentStringLen * sizeof(XML_Char))); 8362 8363 // NOTE: We are avoiding MALLOC(..) here to so that 8364 // applications that are not using XML_FreeContentModel but plain 8365 // free(..) or .free_fcn() to free the content model's memory are safe. 8366 ret = parser->m_mem.malloc_fcn(allocsize); 8367 if (! ret) 8368 return NULL; 8369 8370 /* What follows is an iterative implementation (of what was previously done 8371 * recursively in a dedicated function called "build_node". The old recursive 8372 * build_node could be forced into stack exhaustion from input as small as a 8373 * few megabyte, and so that was a security issue. Hence, a function call 8374 * stack is avoided now by resolving recursion.) 8375 * 8376 * The iterative approach works as follows: 8377 * 8378 * - We have two writing pointers, both walking up the result array; one does 8379 * the work, the other creates "jobs" for its colleague to do, and leads 8380 * the way: 8381 * 8382 * - The faster one, pointer jobDest, always leads and writes "what job 8383 * to do" by the other, once they reach that place in the 8384 * array: leader "jobDest" stores the source node array index (relative 8385 * to array dtd->scaffold) in field "numchildren". 8386 * 8387 * - The slower one, pointer dest, looks at the value stored in the 8388 * "numchildren" field (which actually holds a source node array index 8389 * at that time) and puts the real data from dtd->scaffold in. 8390 * 8391 * - Before the loop starts, jobDest writes source array index 0 8392 * (where the root node is located) so that dest will have something to do 8393 * when it starts operation. 8394 * 8395 * - Whenever nodes with children are encountered, jobDest appends 8396 * them as new jobs, in order. As a result, tree node siblings are 8397 * adjacent in the resulting array, for example: 8398 * 8399 * [0] root, has two children 8400 * [1] first child of 0, has three children 8401 * [3] first child of 1, does not have children 8402 * [4] second child of 1, does not have children 8403 * [5] third child of 1, does not have children 8404 * [2] second child of 0, does not have children 8405 * 8406 * Or (the same data) presented in flat array view: 8407 * 8408 * [0] root, has two children 8409 * 8410 * [1] first child of 0, has three children 8411 * [2] second child of 0, does not have children 8412 * 8413 * [3] first child of 1, does not have children 8414 * [4] second child of 1, does not have children 8415 * [5] third child of 1, does not have children 8416 * 8417 * - The algorithm repeats until all target array indices have been processed. 8418 */ 8419 XML_Content *dest = ret; /* tree node writing location, moves upwards */ 8420 XML_Content *const destLimit = &ret[dtd->scaffCount]; 8421 XML_Content *jobDest = ret; /* next free writing location in target array */ 8422 str = (XML_Char *)&ret[dtd->scaffCount]; 8423 8424 /* Add the starting job, the root node (index 0) of the source tree */ 8425 (jobDest++)->numchildren = 0; 8426 8427 for (; dest < destLimit; dest++) { 8428 /* Retrieve source tree array index from job storage */ 8429 const int src_node = (int)dest->numchildren; 8430 8431 /* Convert item */ 8432 dest->type = dtd->scaffold[src_node].type; 8433 dest->quant = dtd->scaffold[src_node].quant; 8434 if (dest->type == XML_CTYPE_NAME) { 8435 const XML_Char *src; 8436 dest->name = str; 8437 src = dtd->scaffold[src_node].name; 8438 for (;;) { 8439 *str++ = *src; 8440 if (! *src) 8441 break; 8442 src++; 8443 } 8444 dest->numchildren = 0; 8445 dest->children = NULL; 8446 } else { 8447 unsigned int i; 8448 int cn; 8449 dest->name = NULL; 8450 dest->numchildren = dtd->scaffold[src_node].childcnt; 8451 dest->children = jobDest; 8452 8453 /* Append scaffold indices of children to array */ 8454 for (i = 0, cn = dtd->scaffold[src_node].firstchild; 8455 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) 8456 (jobDest++)->numchildren = (unsigned int)cn; 8457 } 8458 } 8459 8460 return ret; 8461 } 8462 8463 static ELEMENT_TYPE * 8464 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, 8465 const char *end) { 8466 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 8467 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); 8468 ELEMENT_TYPE *ret; 8469 8470 if (! name) 8471 return NULL; 8472 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 8473 sizeof(ELEMENT_TYPE)); 8474 if (! ret) 8475 return NULL; 8476 if (ret->name != name) 8477 poolDiscard(&dtd->pool); 8478 else { 8479 poolFinish(&dtd->pool); 8480 if (! setElementTypePrefix(parser, ret)) 8481 return NULL; 8482 } 8483 return ret; 8484 } 8485 8486 static XML_Char * 8487 copyString(const XML_Char *s, XML_Parser parser) { 8488 size_t charsRequired = 0; 8489 XML_Char *result; 8490 8491 /* First determine how long the string is */ 8492 while (s[charsRequired] != 0) { 8493 charsRequired++; 8494 } 8495 /* Include the terminator */ 8496 charsRequired++; 8497 8498 /* Now allocate space for the copy */ 8499 result = MALLOC(parser, charsRequired * sizeof(XML_Char)); 8500 if (result == NULL) 8501 return NULL; 8502 /* Copy the original into place */ 8503 memcpy(result, s, charsRequired * sizeof(XML_Char)); 8504 return result; 8505 } 8506 8507 #if XML_GE == 1 8508 8509 static float 8510 accountingGetCurrentAmplification(XML_Parser rootParser) { 8511 // 1.........1.........12 => 22 8512 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1; 8513 const XmlBigCount countBytesOutput 8514 = rootParser->m_accounting.countBytesDirect 8515 + rootParser->m_accounting.countBytesIndirect; 8516 const float amplificationFactor 8517 = rootParser->m_accounting.countBytesDirect 8518 ? ((float)countBytesOutput 8519 / (float)(rootParser->m_accounting.countBytesDirect)) 8520 : ((float)(lenOfShortestInclude 8521 + rootParser->m_accounting.countBytesIndirect) 8522 / (float)lenOfShortestInclude); 8523 assert(! rootParser->m_parentParser); 8524 return amplificationFactor; 8525 } 8526 8527 static void 8528 accountingReportStats(XML_Parser originParser, const char *epilog) { 8529 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 8530 assert(! rootParser->m_parentParser); 8531 8532 if (rootParser->m_accounting.debugLevel == 0u) { 8533 return; 8534 } 8535 8536 const float amplificationFactor 8537 = accountingGetCurrentAmplification(rootParser); 8538 fprintf(stderr, 8539 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( 8540 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", 8541 (void *)rootParser, rootParser->m_accounting.countBytesDirect, 8542 rootParser->m_accounting.countBytesIndirect, 8543 (double)amplificationFactor, epilog); 8544 } 8545 8546 static void 8547 accountingOnAbort(XML_Parser originParser) { 8548 accountingReportStats(originParser, " ABORTING\n"); 8549 } 8550 8551 static void 8552 accountingReportDiff(XML_Parser rootParser, 8553 unsigned int levelsAwayFromRootParser, const char *before, 8554 const char *after, ptrdiff_t bytesMore, int source_line, 8555 enum XML_Account account) { 8556 assert(! rootParser->m_parentParser); 8557 8558 fprintf(stderr, 8559 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"", 8560 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", 8561 levelsAwayFromRootParser, source_line, 10, ""); 8562 8563 const char ellipis[] = "[..]"; 8564 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; 8565 const unsigned int contextLength = 10; 8566 8567 /* Note: Performance is of no concern here */ 8568 const char *walker = before; 8569 if ((rootParser->m_accounting.debugLevel >= 3u) 8570 || (after - before) 8571 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { 8572 for (; walker < after; walker++) { 8573 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 8574 } 8575 } else { 8576 for (; walker < before + contextLength; walker++) { 8577 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 8578 } 8579 fprintf(stderr, ellipis); 8580 walker = after - contextLength; 8581 for (; walker < after; walker++) { 8582 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 8583 } 8584 } 8585 fprintf(stderr, "\"\n"); 8586 } 8587 8588 static XML_Bool 8589 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, 8590 const char *after, int source_line, 8591 enum XML_Account account) { 8592 /* Note: We need to check the token type *first* to be sure that 8593 * we can even access variable <after>, safely. 8594 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ 8595 switch (tok) { 8596 case XML_TOK_INVALID: 8597 case XML_TOK_PARTIAL: 8598 case XML_TOK_PARTIAL_CHAR: 8599 case XML_TOK_NONE: 8600 return XML_TRUE; 8601 } 8602 8603 if (account == XML_ACCOUNT_NONE) 8604 return XML_TRUE; /* because these bytes have been accounted for, already */ 8605 8606 unsigned int levelsAwayFromRootParser; 8607 const XML_Parser rootParser 8608 = getRootParserOf(originParser, &levelsAwayFromRootParser); 8609 assert(! rootParser->m_parentParser); 8610 8611 const int isDirect 8612 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); 8613 const ptrdiff_t bytesMore = after - before; 8614 8615 XmlBigCount *const additionTarget 8616 = isDirect ? &rootParser->m_accounting.countBytesDirect 8617 : &rootParser->m_accounting.countBytesIndirect; 8618 8619 /* Detect and avoid integer overflow */ 8620 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) 8621 return XML_FALSE; 8622 *additionTarget += bytesMore; 8623 8624 const XmlBigCount countBytesOutput 8625 = rootParser->m_accounting.countBytesDirect 8626 + rootParser->m_accounting.countBytesIndirect; 8627 const float amplificationFactor 8628 = accountingGetCurrentAmplification(rootParser); 8629 const XML_Bool tolerated 8630 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) 8631 || (amplificationFactor 8632 <= rootParser->m_accounting.maximumAmplificationFactor); 8633 8634 if (rootParser->m_accounting.debugLevel >= 2u) { 8635 accountingReportStats(rootParser, ""); 8636 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, 8637 bytesMore, source_line, account); 8638 } 8639 8640 return tolerated; 8641 } 8642 8643 unsigned long long 8644 testingAccountingGetCountBytesDirect(XML_Parser parser) { 8645 if (! parser) 8646 return 0; 8647 return parser->m_accounting.countBytesDirect; 8648 } 8649 8650 unsigned long long 8651 testingAccountingGetCountBytesIndirect(XML_Parser parser) { 8652 if (! parser) 8653 return 0; 8654 return parser->m_accounting.countBytesIndirect; 8655 } 8656 8657 static void 8658 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, 8659 const char *action, int sourceLine) { 8660 assert(! rootParser->m_parentParser); 8661 if (rootParser->m_entity_stats.debugLevel == 0u) 8662 return; 8663 8664 # if defined(XML_UNICODE) 8665 const char *const entityName = "[..]"; 8666 # else 8667 const char *const entityName = entity->name; 8668 # endif 8669 8670 fprintf( 8671 stderr, 8672 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n", 8673 (void *)rootParser, rootParser->m_entity_stats.countEverOpened, 8674 rootParser->m_entity_stats.currentDepth, 8675 rootParser->m_entity_stats.maximumDepthSeen, 8676 ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "", 8677 entity->is_param ? "%" : "&", entityName, action, entity->textLen, 8678 sourceLine); 8679 } 8680 8681 static void 8682 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { 8683 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 8684 assert(! rootParser->m_parentParser); 8685 8686 rootParser->m_entity_stats.countEverOpened++; 8687 rootParser->m_entity_stats.currentDepth++; 8688 if (rootParser->m_entity_stats.currentDepth 8689 > rootParser->m_entity_stats.maximumDepthSeen) { 8690 rootParser->m_entity_stats.maximumDepthSeen++; 8691 } 8692 8693 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); 8694 } 8695 8696 static void 8697 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { 8698 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 8699 assert(! rootParser->m_parentParser); 8700 8701 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); 8702 rootParser->m_entity_stats.currentDepth--; 8703 } 8704 8705 #endif /* XML_GE == 1 */ 8706 8707 static XML_Parser 8708 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { 8709 XML_Parser rootParser = parser; 8710 unsigned int stepsTakenUpwards = 0; 8711 while (rootParser->m_parentParser) { 8712 rootParser = rootParser->m_parentParser; 8713 stepsTakenUpwards++; 8714 } 8715 assert(! rootParser->m_parentParser); 8716 if (outLevelDiff != NULL) { 8717 *outLevelDiff = stepsTakenUpwards; 8718 } 8719 return rootParser; 8720 } 8721 8722 #if XML_GE == 1 8723 8724 const char * 8725 unsignedCharToPrintable(unsigned char c) { 8726 switch (c) { 8727 case 0: 8728 return "\\0"; 8729 case 1: 8730 return "\\x1"; 8731 case 2: 8732 return "\\x2"; 8733 case 3: 8734 return "\\x3"; 8735 case 4: 8736 return "\\x4"; 8737 case 5: 8738 return "\\x5"; 8739 case 6: 8740 return "\\x6"; 8741 case 7: 8742 return "\\x7"; 8743 case 8: 8744 return "\\x8"; 8745 case 9: 8746 return "\\t"; 8747 case 10: 8748 return "\\n"; 8749 case 11: 8750 return "\\xB"; 8751 case 12: 8752 return "\\xC"; 8753 case 13: 8754 return "\\r"; 8755 case 14: 8756 return "\\xE"; 8757 case 15: 8758 return "\\xF"; 8759 case 16: 8760 return "\\x10"; 8761 case 17: 8762 return "\\x11"; 8763 case 18: 8764 return "\\x12"; 8765 case 19: 8766 return "\\x13"; 8767 case 20: 8768 return "\\x14"; 8769 case 21: 8770 return "\\x15"; 8771 case 22: 8772 return "\\x16"; 8773 case 23: 8774 return "\\x17"; 8775 case 24: 8776 return "\\x18"; 8777 case 25: 8778 return "\\x19"; 8779 case 26: 8780 return "\\x1A"; 8781 case 27: 8782 return "\\x1B"; 8783 case 28: 8784 return "\\x1C"; 8785 case 29: 8786 return "\\x1D"; 8787 case 30: 8788 return "\\x1E"; 8789 case 31: 8790 return "\\x1F"; 8791 case 32: 8792 return " "; 8793 case 33: 8794 return "!"; 8795 case 34: 8796 return "\\\""; 8797 case 35: 8798 return "#"; 8799 case 36: 8800 return "$"; 8801 case 37: 8802 return "%"; 8803 case 38: 8804 return "&"; 8805 case 39: 8806 return "'"; 8807 case 40: 8808 return "("; 8809 case 41: 8810 return ")"; 8811 case 42: 8812 return "*"; 8813 case 43: 8814 return "+"; 8815 case 44: 8816 return ","; 8817 case 45: 8818 return "-"; 8819 case 46: 8820 return "."; 8821 case 47: 8822 return "/"; 8823 case 48: 8824 return "0"; 8825 case 49: 8826 return "1"; 8827 case 50: 8828 return "2"; 8829 case 51: 8830 return "3"; 8831 case 52: 8832 return "4"; 8833 case 53: 8834 return "5"; 8835 case 54: 8836 return "6"; 8837 case 55: 8838 return "7"; 8839 case 56: 8840 return "8"; 8841 case 57: 8842 return "9"; 8843 case 58: 8844 return ":"; 8845 case 59: 8846 return ";"; 8847 case 60: 8848 return "<"; 8849 case 61: 8850 return "="; 8851 case 62: 8852 return ">"; 8853 case 63: 8854 return "?"; 8855 case 64: 8856 return "@"; 8857 case 65: 8858 return "A"; 8859 case 66: 8860 return "B"; 8861 case 67: 8862 return "C"; 8863 case 68: 8864 return "D"; 8865 case 69: 8866 return "E"; 8867 case 70: 8868 return "F"; 8869 case 71: 8870 return "G"; 8871 case 72: 8872 return "H"; 8873 case 73: 8874 return "I"; 8875 case 74: 8876 return "J"; 8877 case 75: 8878 return "K"; 8879 case 76: 8880 return "L"; 8881 case 77: 8882 return "M"; 8883 case 78: 8884 return "N"; 8885 case 79: 8886 return "O"; 8887 case 80: 8888 return "P"; 8889 case 81: 8890 return "Q"; 8891 case 82: 8892 return "R"; 8893 case 83: 8894 return "S"; 8895 case 84: 8896 return "T"; 8897 case 85: 8898 return "U"; 8899 case 86: 8900 return "V"; 8901 case 87: 8902 return "W"; 8903 case 88: 8904 return "X"; 8905 case 89: 8906 return "Y"; 8907 case 90: 8908 return "Z"; 8909 case 91: 8910 return "["; 8911 case 92: 8912 return "\\\\"; 8913 case 93: 8914 return "]"; 8915 case 94: 8916 return "^"; 8917 case 95: 8918 return "_"; 8919 case 96: 8920 return "`"; 8921 case 97: 8922 return "a"; 8923 case 98: 8924 return "b"; 8925 case 99: 8926 return "c"; 8927 case 100: 8928 return "d"; 8929 case 101: 8930 return "e"; 8931 case 102: 8932 return "f"; 8933 case 103: 8934 return "g"; 8935 case 104: 8936 return "h"; 8937 case 105: 8938 return "i"; 8939 case 106: 8940 return "j"; 8941 case 107: 8942 return "k"; 8943 case 108: 8944 return "l"; 8945 case 109: 8946 return "m"; 8947 case 110: 8948 return "n"; 8949 case 111: 8950 return "o"; 8951 case 112: 8952 return "p"; 8953 case 113: 8954 return "q"; 8955 case 114: 8956 return "r"; 8957 case 115: 8958 return "s"; 8959 case 116: 8960 return "t"; 8961 case 117: 8962 return "u"; 8963 case 118: 8964 return "v"; 8965 case 119: 8966 return "w"; 8967 case 120: 8968 return "x"; 8969 case 121: 8970 return "y"; 8971 case 122: 8972 return "z"; 8973 case 123: 8974 return "{"; 8975 case 124: 8976 return "|"; 8977 case 125: 8978 return "}"; 8979 case 126: 8980 return "~"; 8981 case 127: 8982 return "\\x7F"; 8983 case 128: 8984 return "\\x80"; 8985 case 129: 8986 return "\\x81"; 8987 case 130: 8988 return "\\x82"; 8989 case 131: 8990 return "\\x83"; 8991 case 132: 8992 return "\\x84"; 8993 case 133: 8994 return "\\x85"; 8995 case 134: 8996 return "\\x86"; 8997 case 135: 8998 return "\\x87"; 8999 case 136: 9000 return "\\x88"; 9001 case 137: 9002 return "\\x89"; 9003 case 138: 9004 return "\\x8A"; 9005 case 139: 9006 return "\\x8B"; 9007 case 140: 9008 return "\\x8C"; 9009 case 141: 9010 return "\\x8D"; 9011 case 142: 9012 return "\\x8E"; 9013 case 143: 9014 return "\\x8F"; 9015 case 144: 9016 return "\\x90"; 9017 case 145: 9018 return "\\x91"; 9019 case 146: 9020 return "\\x92"; 9021 case 147: 9022 return "\\x93"; 9023 case 148: 9024 return "\\x94"; 9025 case 149: 9026 return "\\x95"; 9027 case 150: 9028 return "\\x96"; 9029 case 151: 9030 return "\\x97"; 9031 case 152: 9032 return "\\x98"; 9033 case 153: 9034 return "\\x99"; 9035 case 154: 9036 return "\\x9A"; 9037 case 155: 9038 return "\\x9B"; 9039 case 156: 9040 return "\\x9C"; 9041 case 157: 9042 return "\\x9D"; 9043 case 158: 9044 return "\\x9E"; 9045 case 159: 9046 return "\\x9F"; 9047 case 160: 9048 return "\\xA0"; 9049 case 161: 9050 return "\\xA1"; 9051 case 162: 9052 return "\\xA2"; 9053 case 163: 9054 return "\\xA3"; 9055 case 164: 9056 return "\\xA4"; 9057 case 165: 9058 return "\\xA5"; 9059 case 166: 9060 return "\\xA6"; 9061 case 167: 9062 return "\\xA7"; 9063 case 168: 9064 return "\\xA8"; 9065 case 169: 9066 return "\\xA9"; 9067 case 170: 9068 return "\\xAA"; 9069 case 171: 9070 return "\\xAB"; 9071 case 172: 9072 return "\\xAC"; 9073 case 173: 9074 return "\\xAD"; 9075 case 174: 9076 return "\\xAE"; 9077 case 175: 9078 return "\\xAF"; 9079 case 176: 9080 return "\\xB0"; 9081 case 177: 9082 return "\\xB1"; 9083 case 178: 9084 return "\\xB2"; 9085 case 179: 9086 return "\\xB3"; 9087 case 180: 9088 return "\\xB4"; 9089 case 181: 9090 return "\\xB5"; 9091 case 182: 9092 return "\\xB6"; 9093 case 183: 9094 return "\\xB7"; 9095 case 184: 9096 return "\\xB8"; 9097 case 185: 9098 return "\\xB9"; 9099 case 186: 9100 return "\\xBA"; 9101 case 187: 9102 return "\\xBB"; 9103 case 188: 9104 return "\\xBC"; 9105 case 189: 9106 return "\\xBD"; 9107 case 190: 9108 return "\\xBE"; 9109 case 191: 9110 return "\\xBF"; 9111 case 192: 9112 return "\\xC0"; 9113 case 193: 9114 return "\\xC1"; 9115 case 194: 9116 return "\\xC2"; 9117 case 195: 9118 return "\\xC3"; 9119 case 196: 9120 return "\\xC4"; 9121 case 197: 9122 return "\\xC5"; 9123 case 198: 9124 return "\\xC6"; 9125 case 199: 9126 return "\\xC7"; 9127 case 200: 9128 return "\\xC8"; 9129 case 201: 9130 return "\\xC9"; 9131 case 202: 9132 return "\\xCA"; 9133 case 203: 9134 return "\\xCB"; 9135 case 204: 9136 return "\\xCC"; 9137 case 205: 9138 return "\\xCD"; 9139 case 206: 9140 return "\\xCE"; 9141 case 207: 9142 return "\\xCF"; 9143 case 208: 9144 return "\\xD0"; 9145 case 209: 9146 return "\\xD1"; 9147 case 210: 9148 return "\\xD2"; 9149 case 211: 9150 return "\\xD3"; 9151 case 212: 9152 return "\\xD4"; 9153 case 213: 9154 return "\\xD5"; 9155 case 214: 9156 return "\\xD6"; 9157 case 215: 9158 return "\\xD7"; 9159 case 216: 9160 return "\\xD8"; 9161 case 217: 9162 return "\\xD9"; 9163 case 218: 9164 return "\\xDA"; 9165 case 219: 9166 return "\\xDB"; 9167 case 220: 9168 return "\\xDC"; 9169 case 221: 9170 return "\\xDD"; 9171 case 222: 9172 return "\\xDE"; 9173 case 223: 9174 return "\\xDF"; 9175 case 224: 9176 return "\\xE0"; 9177 case 225: 9178 return "\\xE1"; 9179 case 226: 9180 return "\\xE2"; 9181 case 227: 9182 return "\\xE3"; 9183 case 228: 9184 return "\\xE4"; 9185 case 229: 9186 return "\\xE5"; 9187 case 230: 9188 return "\\xE6"; 9189 case 231: 9190 return "\\xE7"; 9191 case 232: 9192 return "\\xE8"; 9193 case 233: 9194 return "\\xE9"; 9195 case 234: 9196 return "\\xEA"; 9197 case 235: 9198 return "\\xEB"; 9199 case 236: 9200 return "\\xEC"; 9201 case 237: 9202 return "\\xED"; 9203 case 238: 9204 return "\\xEE"; 9205 case 239: 9206 return "\\xEF"; 9207 case 240: 9208 return "\\xF0"; 9209 case 241: 9210 return "\\xF1"; 9211 case 242: 9212 return "\\xF2"; 9213 case 243: 9214 return "\\xF3"; 9215 case 244: 9216 return "\\xF4"; 9217 case 245: 9218 return "\\xF5"; 9219 case 246: 9220 return "\\xF6"; 9221 case 247: 9222 return "\\xF7"; 9223 case 248: 9224 return "\\xF8"; 9225 case 249: 9226 return "\\xF9"; 9227 case 250: 9228 return "\\xFA"; 9229 case 251: 9230 return "\\xFB"; 9231 case 252: 9232 return "\\xFC"; 9233 case 253: 9234 return "\\xFD"; 9235 case 254: 9236 return "\\xFE"; 9237 case 255: 9238 return "\\xFF"; 9239 // LCOV_EXCL_START 9240 default: 9241 assert(0); /* never gets here */ 9242 return "dead code"; 9243 } 9244 assert(0); /* never gets here */ 9245 // LCOV_EXCL_STOP 9246 } 9247 9248 #endif /* XML_GE == 1 */ 9249 9250 static unsigned long 9251 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { 9252 const char *const valueOrNull = getenv(variableName); 9253 if (valueOrNull == NULL) { 9254 return defaultDebugLevel; 9255 } 9256 const char *const value = valueOrNull; 9257 9258 errno = 0; 9259 char *afterValue = NULL; 9260 unsigned long debugLevel = strtoul(value, &afterValue, 10); 9261 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { 9262 errno = 0; 9263 return defaultDebugLevel; 9264 } 9265 9266 return debugLevel; 9267 } 9268