1 /* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+) 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> 16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com> 18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> 19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr> 20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl> 22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io> 24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me> 25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com> 26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de> 27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org> 28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org> 32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org> 34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> 35 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> 37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com> 38 Copyright (c) 2022 Jann Horn <jannh@google.com> 39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 40 Copyright (c) 2023 Owain Davies <owaind@bath.edu> 41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 42 Copyright (c) 2024 Berkay Eren Ürün <berkay.ueruen@siemens.com> 43 Licensed under the MIT license: 44 45 Permission is hereby granted, free of charge, to any person obtaining 46 a copy of this software and associated documentation files (the 47 "Software"), to deal in the Software without restriction, including 48 without limitation the rights to use, copy, modify, merge, publish, 49 distribute, sublicense, and/or sell copies of the Software, and to permit 50 persons to whom the Software is furnished to do so, subject to the 51 following conditions: 52 53 The above copyright notice and this permission notice shall be included 54 in all copies or substantial portions of the Software. 55 56 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 57 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 58 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 59 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 60 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 61 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 62 USE OR OTHER DEALINGS IN THE SOFTWARE. 63 */ 64 65 #define XML_BUILDING_EXPAT 1 66 67 #include "expat_config.h" 68 69 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) 70 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) 71 #endif 72 73 #if defined(XML_DTD) && XML_GE == 0 74 # error Either undefine XML_DTD or define XML_GE to 1. 75 #endif 76 77 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \ 78 || (XML_CONTEXT_BYTES + 0 < 0) 79 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) 80 #endif 81 82 #if defined(HAVE_SYSCALL_GETRANDOM) 83 # if ! defined(_GNU_SOURCE) 84 # define _GNU_SOURCE 1 /* syscall prototype */ 85 # endif 86 #endif 87 88 #ifdef _WIN32 89 /* force stdlib to define rand_s() */ 90 # if ! defined(_CRT_RAND_S) 91 # define _CRT_RAND_S 92 # endif 93 #endif 94 95 #include <stdbool.h> 96 #include <stddef.h> 97 #include <string.h> /* memset(), memcpy() */ 98 #include <assert.h> 99 #include <limits.h> /* UINT_MAX */ 100 #include <stdio.h> /* fprintf */ 101 #include <stdlib.h> /* getenv, rand_s */ 102 #include <stdint.h> /* uintptr_t */ 103 #include <math.h> /* isnan */ 104 105 #ifdef _WIN32 106 # define getpid GetCurrentProcessId 107 #else 108 # include <sys/time.h> /* gettimeofday() */ 109 # include <sys/types.h> /* getpid() */ 110 # include <unistd.h> /* getpid() */ 111 # include <fcntl.h> /* O_RDONLY */ 112 # include <errno.h> 113 #endif 114 115 #ifdef _WIN32 116 # include "winconfig.h" 117 #endif 118 119 #include "ascii.h" 120 #include "expat.h" 121 #include "siphash.h" 122 123 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 124 # if defined(HAVE_GETRANDOM) 125 # include <sys/random.h> /* getrandom */ 126 # else 127 # include <unistd.h> /* syscall */ 128 # include <sys/syscall.h> /* SYS_getrandom */ 129 # endif 130 # if ! defined(GRND_NONBLOCK) 131 # define GRND_NONBLOCK 0x0001 132 # endif /* defined(GRND_NONBLOCK) */ 133 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 134 135 #if defined(HAVE_LIBBSD) \ 136 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) 137 # include <bsd/stdlib.h> 138 #endif 139 140 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) 141 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 142 #endif 143 144 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ 145 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ 146 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ 147 && ! defined(XML_POOR_ENTROPY) 148 # error You do not have support for any sources of high quality entropy \ 149 enabled. For end user security, that is probably not what you want. \ 150 \ 151 Your options include: \ 152 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ 153 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ 154 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ 155 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ 156 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ 157 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ 158 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ 159 * Windows >=Vista (rand_s): _WIN32. \ 160 \ 161 If insist on not using any of these, bypass this error by defining \ 162 XML_POOR_ENTROPY; you have been warned. \ 163 \ 164 If you have reasons to patch this detection code away or need changes \ 165 to the build system, please open a bug. Thank you! 166 #endif 167 168 #ifdef XML_UNICODE 169 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX 170 # define XmlConvert XmlUtf16Convert 171 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding 172 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS 173 # define XmlEncode XmlUtf16Encode 174 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) 175 typedef unsigned short ICHAR; 176 #else 177 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX 178 # define XmlConvert XmlUtf8Convert 179 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding 180 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS 181 # define XmlEncode XmlUtf8Encode 182 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8) 183 typedef char ICHAR; 184 #endif 185 186 #ifndef XML_NS 187 188 # define XmlInitEncodingNS XmlInitEncoding 189 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding 190 # undef XmlGetInternalEncodingNS 191 # define XmlGetInternalEncodingNS XmlGetInternalEncoding 192 # define XmlParseXmlDeclNS XmlParseXmlDecl 193 194 #endif 195 196 #ifdef XML_UNICODE 197 198 # ifdef XML_UNICODE_WCHAR_T 199 # define XML_T(x) (const wchar_t) x 200 # define XML_L(x) L##x 201 # else 202 # define XML_T(x) (const unsigned short)x 203 # define XML_L(x) x 204 # endif 205 206 #else 207 208 # define XML_T(x) x 209 # define XML_L(x) x 210 211 #endif 212 213 /* Round up n to be a multiple of sz, where sz is a power of 2. */ 214 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) 215 216 /* Do safe (NULL-aware) pointer arithmetic */ 217 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) 218 219 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) 220 221 #include "internal.h" 222 #include "xmltok.h" 223 #include "xmlrole.h" 224 225 typedef const XML_Char *KEY; 226 227 typedef struct { 228 KEY name; 229 } NAMED; 230 231 typedef struct { 232 NAMED **v; 233 unsigned char power; 234 size_t size; 235 size_t used; 236 const XML_Memory_Handling_Suite *mem; 237 } HASH_TABLE; 238 239 static size_t keylen(KEY s); 240 241 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); 242 243 /* For probing (after a collision) we need a step size relative prime 244 to the hash table size, which is a power of 2. We use double-hashing, 245 since we can calculate a second hash value cheaply by taking those bits 246 of the first hash value that were discarded (masked out) when the table 247 index was calculated: index = hash & mask, where mask = table->size - 1. 248 We limit the maximum step size to table->size / 4 (mask >> 2) and make 249 it odd, since odd numbers are always relative prime to a power of 2. 250 */ 251 #define SECOND_HASH(hash, mask, power) \ 252 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) 253 #define PROBE_STEP(hash, mask, power) \ 254 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) 255 256 typedef struct { 257 NAMED **p; 258 NAMED **end; 259 } HASH_TABLE_ITER; 260 261 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ 262 #define INIT_DATA_BUF_SIZE 1024 263 #define INIT_ATTS_SIZE 16 264 #define INIT_ATTS_VERSION 0xFFFFFFFF 265 #define INIT_BLOCK_SIZE 1024 266 #define INIT_BUFFER_SIZE 1024 267 268 #define EXPAND_SPARE 24 269 270 typedef struct binding { 271 struct prefix *prefix; 272 struct binding *nextTagBinding; 273 struct binding *prevPrefixBinding; 274 const struct attribute_id *attId; 275 XML_Char *uri; 276 int uriLen; 277 int uriAlloc; 278 } BINDING; 279 280 typedef struct prefix { 281 const XML_Char *name; 282 BINDING *binding; 283 } PREFIX; 284 285 typedef struct { 286 const XML_Char *str; 287 const XML_Char *localPart; 288 const XML_Char *prefix; 289 int strLen; 290 int uriLen; 291 int prefixLen; 292 } TAG_NAME; 293 294 /* TAG represents an open element. 295 The name of the element is stored in both the document and API 296 encodings. The memory buffer 'buf' is a separately-allocated 297 memory area which stores the name. During the XML_Parse()/ 298 XML_ParseBuffer() when the element is open, the memory for the 'raw' 299 version of the name (in the document encoding) is shared with the 300 document buffer. If the element is open across calls to 301 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to 302 contain the 'raw' name as well. 303 304 A parser reuses these structures, maintaining a list of allocated 305 TAG objects in a free list. 306 */ 307 typedef struct tag { 308 struct tag *parent; /* parent of this element */ 309 const char *rawName; /* tagName in the original encoding */ 310 int rawNameLength; 311 TAG_NAME name; /* tagName in the API encoding */ 312 char *buf; /* buffer for name components */ 313 char *bufEnd; /* end of the buffer */ 314 BINDING *bindings; 315 } TAG; 316 317 typedef struct { 318 const XML_Char *name; 319 const XML_Char *textPtr; 320 int textLen; /* length in XML_Chars */ 321 int processed; /* # of processed bytes - when suspended */ 322 const XML_Char *systemId; 323 const XML_Char *base; 324 const XML_Char *publicId; 325 const XML_Char *notation; 326 XML_Bool open; 327 XML_Bool is_param; 328 XML_Bool is_internal; /* true if declared in internal subset outside PE */ 329 } ENTITY; 330 331 typedef struct { 332 enum XML_Content_Type type; 333 enum XML_Content_Quant quant; 334 const XML_Char *name; 335 int firstchild; 336 int lastchild; 337 int childcnt; 338 int nextsib; 339 } CONTENT_SCAFFOLD; 340 341 #define INIT_SCAFFOLD_ELEMENTS 32 342 343 typedef struct block { 344 struct block *next; 345 int size; 346 XML_Char s[1]; 347 } BLOCK; 348 349 typedef struct { 350 BLOCK *blocks; 351 BLOCK *freeBlocks; 352 const XML_Char *end; 353 XML_Char *ptr; 354 XML_Char *start; 355 const XML_Memory_Handling_Suite *mem; 356 } STRING_POOL; 357 358 /* The XML_Char before the name is used to determine whether 359 an attribute has been specified. */ 360 typedef struct attribute_id { 361 XML_Char *name; 362 PREFIX *prefix; 363 XML_Bool maybeTokenized; 364 XML_Bool xmlns; 365 } ATTRIBUTE_ID; 366 367 typedef struct { 368 const ATTRIBUTE_ID *id; 369 XML_Bool isCdata; 370 const XML_Char *value; 371 } DEFAULT_ATTRIBUTE; 372 373 typedef struct { 374 unsigned long version; 375 unsigned long hash; 376 const XML_Char *uriName; 377 } NS_ATT; 378 379 typedef struct { 380 const XML_Char *name; 381 PREFIX *prefix; 382 const ATTRIBUTE_ID *idAtt; 383 int nDefaultAtts; 384 int allocDefaultAtts; 385 DEFAULT_ATTRIBUTE *defaultAtts; 386 } ELEMENT_TYPE; 387 388 typedef struct { 389 HASH_TABLE generalEntities; 390 HASH_TABLE elementTypes; 391 HASH_TABLE attributeIds; 392 HASH_TABLE prefixes; 393 STRING_POOL pool; 394 STRING_POOL entityValuePool; 395 /* false once a parameter entity reference has been skipped */ 396 XML_Bool keepProcessing; 397 /* true once an internal or external PE reference has been encountered; 398 this includes the reference to an external subset */ 399 XML_Bool hasParamEntityRefs; 400 XML_Bool standalone; 401 #ifdef XML_DTD 402 /* indicates if external PE has been read */ 403 XML_Bool paramEntityRead; 404 HASH_TABLE paramEntities; 405 #endif /* XML_DTD */ 406 PREFIX defaultPrefix; 407 /* === scaffolding for building content model === */ 408 XML_Bool in_eldecl; 409 CONTENT_SCAFFOLD *scaffold; 410 unsigned contentStringLen; 411 unsigned scaffSize; 412 unsigned scaffCount; 413 int scaffLevel; 414 int *scaffIndex; 415 } DTD; 416 417 typedef struct open_internal_entity { 418 const char *internalEventPtr; 419 const char *internalEventEndPtr; 420 struct open_internal_entity *next; 421 ENTITY *entity; 422 int startTagLevel; 423 XML_Bool betweenDecl; /* WFC: PE Between Declarations */ 424 } OPEN_INTERNAL_ENTITY; 425 426 enum XML_Account { 427 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ 428 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity 429 expansion */ 430 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ 431 }; 432 433 #if XML_GE == 1 434 typedef unsigned long long XmlBigCount; 435 typedef struct accounting { 436 XmlBigCount countBytesDirect; 437 XmlBigCount countBytesIndirect; 438 unsigned long debugLevel; 439 float maximumAmplificationFactor; // >=1.0 440 unsigned long long activationThresholdBytes; 441 } ACCOUNTING; 442 443 typedef struct entity_stats { 444 unsigned int countEverOpened; 445 unsigned int currentDepth; 446 unsigned int maximumDepthSeen; 447 unsigned long debugLevel; 448 } ENTITY_STATS; 449 #endif /* XML_GE == 1 */ 450 451 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, 452 const char *end, const char **endPtr); 453 454 static Processor prologProcessor; 455 static Processor prologInitProcessor; 456 static Processor contentProcessor; 457 static Processor cdataSectionProcessor; 458 #ifdef XML_DTD 459 static Processor ignoreSectionProcessor; 460 static Processor externalParEntProcessor; 461 static Processor externalParEntInitProcessor; 462 static Processor entityValueProcessor; 463 static Processor entityValueInitProcessor; 464 #endif /* XML_DTD */ 465 static Processor epilogProcessor; 466 static Processor errorProcessor; 467 static Processor externalEntityInitProcessor; 468 static Processor externalEntityInitProcessor2; 469 static Processor externalEntityInitProcessor3; 470 static Processor externalEntityContentProcessor; 471 static Processor internalEntityProcessor; 472 473 static enum XML_Error handleUnknownEncoding(XML_Parser parser, 474 const XML_Char *encodingName); 475 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, 476 const char *s, const char *next); 477 static enum XML_Error initializeEncoding(XML_Parser parser); 478 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, 479 const char *s, const char *end, int tok, 480 const char *next, const char **nextPtr, 481 XML_Bool haveMore, XML_Bool allowClosingDoctype, 482 enum XML_Account account); 483 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, 484 XML_Bool betweenDecl); 485 static enum XML_Error doContent(XML_Parser parser, int startTagLevel, 486 const ENCODING *enc, const char *start, 487 const char *end, const char **endPtr, 488 XML_Bool haveMore, enum XML_Account account); 489 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, 490 const char **startPtr, const char *end, 491 const char **nextPtr, XML_Bool haveMore, 492 enum XML_Account account); 493 #ifdef XML_DTD 494 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, 495 const char **startPtr, const char *end, 496 const char **nextPtr, XML_Bool haveMore); 497 #endif /* XML_DTD */ 498 499 static void freeBindings(XML_Parser parser, BINDING *bindings); 500 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, 501 const char *attStr, TAG_NAME *tagNamePtr, 502 BINDING **bindingsPtr, 503 enum XML_Account account); 504 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, 505 const ATTRIBUTE_ID *attId, const XML_Char *uri, 506 BINDING **bindingsPtr); 507 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, 508 XML_Bool isCdata, XML_Bool isId, 509 const XML_Char *value, XML_Parser parser); 510 static enum XML_Error storeAttributeValue(XML_Parser parser, 511 const ENCODING *enc, XML_Bool isCdata, 512 const char *ptr, const char *end, 513 STRING_POOL *pool, 514 enum XML_Account account); 515 static enum XML_Error appendAttributeValue(XML_Parser parser, 516 const ENCODING *enc, 517 XML_Bool isCdata, const char *ptr, 518 const char *end, STRING_POOL *pool, 519 enum XML_Account account); 520 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, 521 const char *start, const char *end); 522 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); 523 #if XML_GE == 1 524 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, 525 const char *start, const char *end, 526 enum XML_Account account); 527 #else 528 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); 529 #endif 530 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 531 const char *start, const char *end); 532 static int reportComment(XML_Parser parser, const ENCODING *enc, 533 const char *start, const char *end); 534 static void reportDefault(XML_Parser parser, const ENCODING *enc, 535 const char *start, const char *end); 536 537 static const XML_Char *getContext(XML_Parser parser); 538 static XML_Bool setContext(XML_Parser parser, const XML_Char *context); 539 540 static void FASTCALL normalizePublicId(XML_Char *s); 541 542 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); 543 /* do not call if m_parentParser != NULL */ 544 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); 545 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, 546 const XML_Memory_Handling_Suite *ms); 547 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 548 const XML_Memory_Handling_Suite *ms); 549 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 550 STRING_POOL *newPool, const HASH_TABLE *oldTable); 551 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, 552 size_t createSize); 553 static void FASTCALL hashTableInit(HASH_TABLE *table, 554 const XML_Memory_Handling_Suite *ms); 555 static void FASTCALL hashTableClear(HASH_TABLE *table); 556 static void FASTCALL hashTableDestroy(HASH_TABLE *table); 557 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, 558 const HASH_TABLE *table); 559 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); 560 561 static void FASTCALL poolInit(STRING_POOL *pool, 562 const XML_Memory_Handling_Suite *ms); 563 static void FASTCALL poolClear(STRING_POOL *pool); 564 static void FASTCALL poolDestroy(STRING_POOL *pool); 565 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, 566 const char *ptr, const char *end); 567 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, 568 const char *ptr, const char *end); 569 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); 570 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, 571 const XML_Char *s); 572 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, 573 int n); 574 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, 575 const XML_Char *s); 576 577 static int FASTCALL nextScaffoldPart(XML_Parser parser); 578 static XML_Content *build_model(XML_Parser parser); 579 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, 580 const char *ptr, const char *end); 581 582 static XML_Char *copyString(const XML_Char *s, 583 const XML_Memory_Handling_Suite *memsuite); 584 585 static unsigned long generate_hash_secret_salt(XML_Parser parser); 586 static XML_Bool startParsing(XML_Parser parser); 587 588 static XML_Parser parserCreate(const XML_Char *encodingName, 589 const XML_Memory_Handling_Suite *memsuite, 590 const XML_Char *nameSep, DTD *dtd); 591 592 static void parserInit(XML_Parser parser, const XML_Char *encodingName); 593 594 #if XML_GE == 1 595 static float accountingGetCurrentAmplification(XML_Parser rootParser); 596 static void accountingReportStats(XML_Parser originParser, const char *epilog); 597 static void accountingOnAbort(XML_Parser originParser); 598 static void accountingReportDiff(XML_Parser rootParser, 599 unsigned int levelsAwayFromRootParser, 600 const char *before, const char *after, 601 ptrdiff_t bytesMore, int source_line, 602 enum XML_Account account); 603 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, 604 const char *before, const char *after, 605 int source_line, 606 enum XML_Account account); 607 608 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, 609 const char *action, int sourceLine); 610 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, 611 int sourceLine); 612 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, 613 int sourceLine); 614 615 static XML_Parser getRootParserOf(XML_Parser parser, 616 unsigned int *outLevelDiff); 617 #endif /* XML_GE == 1 */ 618 619 static unsigned long getDebugLevel(const char *variableName, 620 unsigned long defaultDebugLevel); 621 622 #define poolStart(pool) ((pool)->start) 623 #define poolLength(pool) ((pool)->ptr - (pool)->start) 624 #define poolChop(pool) ((void)--(pool->ptr)) 625 #define poolLastChar(pool) (((pool)->ptr)[-1]) 626 #define poolDiscard(pool) ((pool)->ptr = (pool)->start) 627 #define poolFinish(pool) ((pool)->start = (pool)->ptr) 628 #define poolAppendChar(pool, c) \ 629 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ 630 ? 0 \ 631 : ((*((pool)->ptr)++ = c), 1)) 632 633 #if ! defined(XML_TESTING) 634 const 635 #endif 636 XML_Bool g_reparseDeferralEnabledDefault 637 = XML_TRUE; // write ONLY in runtests.c 638 #if defined(XML_TESTING) 639 unsigned int g_bytesScanned = 0; // used for testing only 640 #endif 641 642 struct XML_ParserStruct { 643 /* The first member must be m_userData so that the XML_GetUserData 644 macro works. */ 645 void *m_userData; 646 void *m_handlerArg; 647 648 // How the four parse buffer pointers below relate in time and space: 649 // 650 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim 651 // | | | | 652 // <--parsed-->| | | 653 // <---parsing--->| | 654 // <--unoccupied-->| 655 // <---------total-malloced/realloced-------->| 656 657 char *m_buffer; // malloc/realloc base pointer of parse buffer 658 const XML_Memory_Handling_Suite m_mem; 659 const char *m_bufferPtr; // first character to be parsed 660 char *m_bufferEnd; // past last character to be parsed 661 const char *m_bufferLim; // allocated end of m_buffer 662 663 XML_Index m_parseEndByteIndex; 664 const char *m_parseEndPtr; 665 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ 666 XML_Bool m_reparseDeferralEnabled; 667 int m_lastBufferRequestSize; 668 XML_Char *m_dataBuf; 669 XML_Char *m_dataBufEnd; 670 XML_StartElementHandler m_startElementHandler; 671 XML_EndElementHandler m_endElementHandler; 672 XML_CharacterDataHandler m_characterDataHandler; 673 XML_ProcessingInstructionHandler m_processingInstructionHandler; 674 XML_CommentHandler m_commentHandler; 675 XML_StartCdataSectionHandler m_startCdataSectionHandler; 676 XML_EndCdataSectionHandler m_endCdataSectionHandler; 677 XML_DefaultHandler m_defaultHandler; 678 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; 679 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; 680 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; 681 XML_NotationDeclHandler m_notationDeclHandler; 682 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; 683 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; 684 XML_NotStandaloneHandler m_notStandaloneHandler; 685 XML_ExternalEntityRefHandler m_externalEntityRefHandler; 686 XML_Parser m_externalEntityRefHandlerArg; 687 XML_SkippedEntityHandler m_skippedEntityHandler; 688 XML_UnknownEncodingHandler m_unknownEncodingHandler; 689 XML_ElementDeclHandler m_elementDeclHandler; 690 XML_AttlistDeclHandler m_attlistDeclHandler; 691 XML_EntityDeclHandler m_entityDeclHandler; 692 XML_XmlDeclHandler m_xmlDeclHandler; 693 const ENCODING *m_encoding; 694 INIT_ENCODING m_initEncoding; 695 const ENCODING *m_internalEncoding; 696 const XML_Char *m_protocolEncodingName; 697 XML_Bool m_ns; 698 XML_Bool m_ns_triplets; 699 void *m_unknownEncodingMem; 700 void *m_unknownEncodingData; 701 void *m_unknownEncodingHandlerData; 702 void(XMLCALL *m_unknownEncodingRelease)(void *); 703 PROLOG_STATE m_prologState; 704 Processor *m_processor; 705 enum XML_Error m_errorCode; 706 const char *m_eventPtr; 707 const char *m_eventEndPtr; 708 const char *m_positionPtr; 709 OPEN_INTERNAL_ENTITY *m_openInternalEntities; 710 OPEN_INTERNAL_ENTITY *m_freeInternalEntities; 711 XML_Bool m_defaultExpandInternalEntities; 712 int m_tagLevel; 713 ENTITY *m_declEntity; 714 const XML_Char *m_doctypeName; 715 const XML_Char *m_doctypeSysid; 716 const XML_Char *m_doctypePubid; 717 const XML_Char *m_declAttributeType; 718 const XML_Char *m_declNotationName; 719 const XML_Char *m_declNotationPublicId; 720 ELEMENT_TYPE *m_declElementType; 721 ATTRIBUTE_ID *m_declAttributeId; 722 XML_Bool m_declAttributeIsCdata; 723 XML_Bool m_declAttributeIsId; 724 DTD *m_dtd; 725 const XML_Char *m_curBase; 726 TAG *m_tagStack; 727 TAG *m_freeTagList; 728 BINDING *m_inheritedBindings; 729 BINDING *m_freeBindingList; 730 int m_attsSize; 731 int m_nSpecifiedAtts; 732 int m_idAttIndex; 733 ATTRIBUTE *m_atts; 734 NS_ATT *m_nsAtts; 735 unsigned long m_nsAttsVersion; 736 unsigned char m_nsAttsPower; 737 #ifdef XML_ATTR_INFO 738 XML_AttrInfo *m_attInfo; 739 #endif 740 POSITION m_position; 741 STRING_POOL m_tempPool; 742 STRING_POOL m_temp2Pool; 743 char *m_groupConnector; 744 unsigned int m_groupSize; 745 XML_Char m_namespaceSeparator; 746 XML_Parser m_parentParser; 747 XML_ParsingStatus m_parsingStatus; 748 #ifdef XML_DTD 749 XML_Bool m_isParamEntity; 750 XML_Bool m_useForeignDTD; 751 enum XML_ParamEntityParsing m_paramEntityParsing; 752 #endif 753 unsigned long m_hash_secret_salt; 754 #if XML_GE == 1 755 ACCOUNTING m_accounting; 756 ENTITY_STATS m_entity_stats; 757 #endif 758 }; 759 760 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) 761 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) 762 #define FREE(parser, p) (parser->m_mem.free_fcn((p))) 763 764 XML_Parser XMLCALL 765 XML_ParserCreate(const XML_Char *encodingName) { 766 return XML_ParserCreate_MM(encodingName, NULL, NULL); 767 } 768 769 XML_Parser XMLCALL 770 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { 771 XML_Char tmp[2] = {nsSep, 0}; 772 return XML_ParserCreate_MM(encodingName, NULL, tmp); 773 } 774 775 // "xml=http://www.w3.org/XML/1998/namespace" 776 static const XML_Char implicitContext[] 777 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, 778 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 779 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, 780 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, 781 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, 782 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, 783 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, 784 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, 785 '\0'}; 786 787 /* To avoid warnings about unused functions: */ 788 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 789 790 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 791 792 /* Obtain entropy on Linux 3.17+ */ 793 static int 794 writeRandomBytes_getrandom_nonblock(void *target, size_t count) { 795 int success = 0; /* full count bytes written? */ 796 size_t bytesWrittenTotal = 0; 797 const unsigned int getrandomFlags = GRND_NONBLOCK; 798 799 do { 800 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 801 const size_t bytesToWrite = count - bytesWrittenTotal; 802 803 const int bytesWrittenMore = 804 # if defined(HAVE_GETRANDOM) 805 getrandom(currentTarget, bytesToWrite, getrandomFlags); 806 # else 807 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); 808 # endif 809 810 if (bytesWrittenMore > 0) { 811 bytesWrittenTotal += bytesWrittenMore; 812 if (bytesWrittenTotal >= count) 813 success = 1; 814 } 815 } while (! success && (errno == EINTR)); 816 817 return success; 818 } 819 820 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 821 822 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 823 824 /* Extract entropy from /dev/urandom */ 825 static int 826 writeRandomBytes_dev_urandom(void *target, size_t count) { 827 int success = 0; /* full count bytes written? */ 828 size_t bytesWrittenTotal = 0; 829 830 const int fd = open("/dev/urandom", O_RDONLY); 831 if (fd < 0) { 832 return 0; 833 } 834 835 do { 836 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 837 const size_t bytesToWrite = count - bytesWrittenTotal; 838 839 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); 840 841 if (bytesWrittenMore > 0) { 842 bytesWrittenTotal += bytesWrittenMore; 843 if (bytesWrittenTotal >= count) 844 success = 1; 845 } 846 } while (! success && (errno == EINTR)); 847 848 close(fd); 849 return success; 850 } 851 852 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 853 854 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 855 856 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) 857 858 static void 859 writeRandomBytes_arc4random(void *target, size_t count) { 860 size_t bytesWrittenTotal = 0; 861 862 while (bytesWrittenTotal < count) { 863 const uint32_t random32 = arc4random(); 864 size_t i = 0; 865 866 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 867 i++, bytesWrittenTotal++) { 868 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 869 ((uint8_t *)target)[bytesWrittenTotal] = random8; 870 } 871 } 872 } 873 874 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ 875 876 #ifdef _WIN32 877 878 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), 879 as it didn't declare it in its header prior to version 5.3.0 of its 880 runtime package (mingwrt, containing stdlib.h). The upstream fix 881 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ 882 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ 883 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) 884 __declspec(dllimport) int rand_s(unsigned int *); 885 # endif 886 887 /* Obtain entropy on Windows using the rand_s() function which 888 * generates cryptographically secure random numbers. Internally it 889 * uses RtlGenRandom API which is present in Windows XP and later. 890 */ 891 static int 892 writeRandomBytes_rand_s(void *target, size_t count) { 893 size_t bytesWrittenTotal = 0; 894 895 while (bytesWrittenTotal < count) { 896 unsigned int random32 = 0; 897 size_t i = 0; 898 899 if (rand_s(&random32)) 900 return 0; /* failure */ 901 902 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 903 i++, bytesWrittenTotal++) { 904 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 905 ((uint8_t *)target)[bytesWrittenTotal] = random8; 906 } 907 } 908 return 1; /* success */ 909 } 910 911 #endif /* _WIN32 */ 912 913 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 914 915 static unsigned long 916 gather_time_entropy(void) { 917 # ifdef _WIN32 918 FILETIME ft; 919 GetSystemTimeAsFileTime(&ft); /* never fails */ 920 return ft.dwHighDateTime ^ ft.dwLowDateTime; 921 # else 922 struct timeval tv; 923 int gettimeofday_res; 924 925 gettimeofday_res = gettimeofday(&tv, NULL); 926 927 # if defined(NDEBUG) 928 (void)gettimeofday_res; 929 # else 930 assert(gettimeofday_res == 0); 931 # endif /* defined(NDEBUG) */ 932 933 /* Microseconds time is <20 bits entropy */ 934 return tv.tv_usec; 935 # endif 936 } 937 938 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 939 940 static unsigned long 941 ENTROPY_DEBUG(const char *label, unsigned long entropy) { 942 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { 943 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, 944 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); 945 } 946 return entropy; 947 } 948 949 static unsigned long 950 generate_hash_secret_salt(XML_Parser parser) { 951 unsigned long entropy; 952 (void)parser; 953 954 /* "Failproof" high quality providers: */ 955 #if defined(HAVE_ARC4RANDOM_BUF) 956 arc4random_buf(&entropy, sizeof(entropy)); 957 return ENTROPY_DEBUG("arc4random_buf", entropy); 958 #elif defined(HAVE_ARC4RANDOM) 959 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); 960 return ENTROPY_DEBUG("arc4random", entropy); 961 #else 962 /* Try high quality providers first .. */ 963 # ifdef _WIN32 964 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { 965 return ENTROPY_DEBUG("rand_s", entropy); 966 } 967 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 968 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { 969 return ENTROPY_DEBUG("getrandom", entropy); 970 } 971 # endif 972 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 973 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { 974 return ENTROPY_DEBUG("/dev/urandom", entropy); 975 } 976 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 977 /* .. and self-made low quality for backup: */ 978 979 /* Process ID is 0 bits entropy if attacker has local access */ 980 entropy = gather_time_entropy() ^ getpid(); 981 982 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ 983 if (sizeof(unsigned long) == 4) { 984 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); 985 } else { 986 return ENTROPY_DEBUG("fallback(8)", 987 entropy * (unsigned long)2305843009213693951ULL); 988 } 989 #endif 990 } 991 992 static unsigned long 993 get_hash_secret_salt(XML_Parser parser) { 994 if (parser->m_parentParser != NULL) 995 return get_hash_secret_salt(parser->m_parentParser); 996 return parser->m_hash_secret_salt; 997 } 998 999 static enum XML_Error 1000 callProcessor(XML_Parser parser, const char *start, const char *end, 1001 const char **endPtr) { 1002 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); 1003 1004 if (parser->m_reparseDeferralEnabled 1005 && ! parser->m_parsingStatus.finalBuffer) { 1006 // Heuristic: don't try to parse a partial token again until the amount of 1007 // available data has increased significantly. 1008 const size_t had_before = parser->m_partialTokenBytesBefore; 1009 // ...but *do* try anyway if we're close to causing a reallocation. 1010 size_t available_buffer 1011 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 1012 #if XML_CONTEXT_BYTES > 0 1013 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); 1014 #endif 1015 available_buffer 1016 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); 1017 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok 1018 const bool enough 1019 = (have_now >= 2 * had_before) 1020 || ((size_t)parser->m_lastBufferRequestSize > available_buffer); 1021 1022 if (! enough) { 1023 *endPtr = start; // callers may expect this to be set 1024 return XML_ERROR_NONE; 1025 } 1026 } 1027 #if defined(XML_TESTING) 1028 g_bytesScanned += (unsigned)have_now; 1029 #endif 1030 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); 1031 if (ret == XML_ERROR_NONE) { 1032 // if we consumed nothing, remember what we had on this parse attempt. 1033 if (*endPtr == start) { 1034 parser->m_partialTokenBytesBefore = have_now; 1035 } else { 1036 parser->m_partialTokenBytesBefore = 0; 1037 } 1038 } 1039 return ret; 1040 } 1041 1042 static XML_Bool /* only valid for root parser */ 1043 startParsing(XML_Parser parser) { 1044 /* hash functions must be initialized before setContext() is called */ 1045 if (parser->m_hash_secret_salt == 0) 1046 parser->m_hash_secret_salt = generate_hash_secret_salt(parser); 1047 if (parser->m_ns) { 1048 /* implicit context only set for root parser, since child 1049 parsers (i.e. external entity parsers) will inherit it 1050 */ 1051 return setContext(parser, implicitContext); 1052 } 1053 return XML_TRUE; 1054 } 1055 1056 XML_Parser XMLCALL 1057 XML_ParserCreate_MM(const XML_Char *encodingName, 1058 const XML_Memory_Handling_Suite *memsuite, 1059 const XML_Char *nameSep) { 1060 return parserCreate(encodingName, memsuite, nameSep, NULL); 1061 } 1062 1063 static XML_Parser 1064 parserCreate(const XML_Char *encodingName, 1065 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, 1066 DTD *dtd) { 1067 XML_Parser parser; 1068 1069 if (memsuite) { 1070 XML_Memory_Handling_Suite *mtemp; 1071 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); 1072 if (parser != NULL) { 1073 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1074 mtemp->malloc_fcn = memsuite->malloc_fcn; 1075 mtemp->realloc_fcn = memsuite->realloc_fcn; 1076 mtemp->free_fcn = memsuite->free_fcn; 1077 } 1078 } else { 1079 XML_Memory_Handling_Suite *mtemp; 1080 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); 1081 if (parser != NULL) { 1082 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1083 mtemp->malloc_fcn = malloc; 1084 mtemp->realloc_fcn = realloc; 1085 mtemp->free_fcn = free; 1086 } 1087 } 1088 1089 if (! parser) 1090 return parser; 1091 1092 parser->m_buffer = NULL; 1093 parser->m_bufferLim = NULL; 1094 1095 parser->m_attsSize = INIT_ATTS_SIZE; 1096 parser->m_atts 1097 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); 1098 if (parser->m_atts == NULL) { 1099 FREE(parser, parser); 1100 return NULL; 1101 } 1102 #ifdef XML_ATTR_INFO 1103 parser->m_attInfo = (XML_AttrInfo *)MALLOC( 1104 parser, parser->m_attsSize * sizeof(XML_AttrInfo)); 1105 if (parser->m_attInfo == NULL) { 1106 FREE(parser, parser->m_atts); 1107 FREE(parser, parser); 1108 return NULL; 1109 } 1110 #endif 1111 parser->m_dataBuf 1112 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); 1113 if (parser->m_dataBuf == NULL) { 1114 FREE(parser, parser->m_atts); 1115 #ifdef XML_ATTR_INFO 1116 FREE(parser, parser->m_attInfo); 1117 #endif 1118 FREE(parser, parser); 1119 return NULL; 1120 } 1121 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; 1122 1123 if (dtd) 1124 parser->m_dtd = dtd; 1125 else { 1126 parser->m_dtd = dtdCreate(&parser->m_mem); 1127 if (parser->m_dtd == NULL) { 1128 FREE(parser, parser->m_dataBuf); 1129 FREE(parser, parser->m_atts); 1130 #ifdef XML_ATTR_INFO 1131 FREE(parser, parser->m_attInfo); 1132 #endif 1133 FREE(parser, parser); 1134 return NULL; 1135 } 1136 } 1137 1138 parser->m_freeBindingList = NULL; 1139 parser->m_freeTagList = NULL; 1140 parser->m_freeInternalEntities = NULL; 1141 1142 parser->m_groupSize = 0; 1143 parser->m_groupConnector = NULL; 1144 1145 parser->m_unknownEncodingHandler = NULL; 1146 parser->m_unknownEncodingHandlerData = NULL; 1147 1148 parser->m_namespaceSeparator = ASCII_EXCL; 1149 parser->m_ns = XML_FALSE; 1150 parser->m_ns_triplets = XML_FALSE; 1151 1152 parser->m_nsAtts = NULL; 1153 parser->m_nsAttsVersion = 0; 1154 parser->m_nsAttsPower = 0; 1155 1156 parser->m_protocolEncodingName = NULL; 1157 1158 poolInit(&parser->m_tempPool, &(parser->m_mem)); 1159 poolInit(&parser->m_temp2Pool, &(parser->m_mem)); 1160 parserInit(parser, encodingName); 1161 1162 if (encodingName && ! parser->m_protocolEncodingName) { 1163 if (dtd) { 1164 // We need to stop the upcoming call to XML_ParserFree from happily 1165 // destroying parser->m_dtd because the DTD is shared with the parent 1166 // parser and the only guard that keeps XML_ParserFree from destroying 1167 // parser->m_dtd is parser->m_isParamEntity but it will be set to 1168 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). 1169 parser->m_dtd = NULL; 1170 } 1171 XML_ParserFree(parser); 1172 return NULL; 1173 } 1174 1175 if (nameSep) { 1176 parser->m_ns = XML_TRUE; 1177 parser->m_internalEncoding = XmlGetInternalEncodingNS(); 1178 parser->m_namespaceSeparator = *nameSep; 1179 } else { 1180 parser->m_internalEncoding = XmlGetInternalEncoding(); 1181 } 1182 1183 return parser; 1184 } 1185 1186 static void 1187 parserInit(XML_Parser parser, const XML_Char *encodingName) { 1188 parser->m_processor = prologInitProcessor; 1189 XmlPrologStateInit(&parser->m_prologState); 1190 if (encodingName != NULL) { 1191 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); 1192 } 1193 parser->m_curBase = NULL; 1194 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); 1195 parser->m_userData = NULL; 1196 parser->m_handlerArg = NULL; 1197 parser->m_startElementHandler = NULL; 1198 parser->m_endElementHandler = NULL; 1199 parser->m_characterDataHandler = NULL; 1200 parser->m_processingInstructionHandler = NULL; 1201 parser->m_commentHandler = NULL; 1202 parser->m_startCdataSectionHandler = NULL; 1203 parser->m_endCdataSectionHandler = NULL; 1204 parser->m_defaultHandler = NULL; 1205 parser->m_startDoctypeDeclHandler = NULL; 1206 parser->m_endDoctypeDeclHandler = NULL; 1207 parser->m_unparsedEntityDeclHandler = NULL; 1208 parser->m_notationDeclHandler = NULL; 1209 parser->m_startNamespaceDeclHandler = NULL; 1210 parser->m_endNamespaceDeclHandler = NULL; 1211 parser->m_notStandaloneHandler = NULL; 1212 parser->m_externalEntityRefHandler = NULL; 1213 parser->m_externalEntityRefHandlerArg = parser; 1214 parser->m_skippedEntityHandler = NULL; 1215 parser->m_elementDeclHandler = NULL; 1216 parser->m_attlistDeclHandler = NULL; 1217 parser->m_entityDeclHandler = NULL; 1218 parser->m_xmlDeclHandler = NULL; 1219 parser->m_bufferPtr = parser->m_buffer; 1220 parser->m_bufferEnd = parser->m_buffer; 1221 parser->m_parseEndByteIndex = 0; 1222 parser->m_parseEndPtr = NULL; 1223 parser->m_partialTokenBytesBefore = 0; 1224 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; 1225 parser->m_lastBufferRequestSize = 0; 1226 parser->m_declElementType = NULL; 1227 parser->m_declAttributeId = NULL; 1228 parser->m_declEntity = NULL; 1229 parser->m_doctypeName = NULL; 1230 parser->m_doctypeSysid = NULL; 1231 parser->m_doctypePubid = NULL; 1232 parser->m_declAttributeType = NULL; 1233 parser->m_declNotationName = NULL; 1234 parser->m_declNotationPublicId = NULL; 1235 parser->m_declAttributeIsCdata = XML_FALSE; 1236 parser->m_declAttributeIsId = XML_FALSE; 1237 memset(&parser->m_position, 0, sizeof(POSITION)); 1238 parser->m_errorCode = XML_ERROR_NONE; 1239 parser->m_eventPtr = NULL; 1240 parser->m_eventEndPtr = NULL; 1241 parser->m_positionPtr = NULL; 1242 parser->m_openInternalEntities = NULL; 1243 parser->m_defaultExpandInternalEntities = XML_TRUE; 1244 parser->m_tagLevel = 0; 1245 parser->m_tagStack = NULL; 1246 parser->m_inheritedBindings = NULL; 1247 parser->m_nSpecifiedAtts = 0; 1248 parser->m_unknownEncodingMem = NULL; 1249 parser->m_unknownEncodingRelease = NULL; 1250 parser->m_unknownEncodingData = NULL; 1251 parser->m_parentParser = NULL; 1252 parser->m_parsingStatus.parsing = XML_INITIALIZED; 1253 #ifdef XML_DTD 1254 parser->m_isParamEntity = XML_FALSE; 1255 parser->m_useForeignDTD = XML_FALSE; 1256 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 1257 #endif 1258 parser->m_hash_secret_salt = 0; 1259 1260 #if XML_GE == 1 1261 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); 1262 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); 1263 parser->m_accounting.maximumAmplificationFactor 1264 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; 1265 parser->m_accounting.activationThresholdBytes 1266 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; 1267 1268 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); 1269 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); 1270 #endif 1271 } 1272 1273 /* moves list of bindings to m_freeBindingList */ 1274 static void FASTCALL 1275 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { 1276 while (bindings) { 1277 BINDING *b = bindings; 1278 bindings = bindings->nextTagBinding; 1279 b->nextTagBinding = parser->m_freeBindingList; 1280 parser->m_freeBindingList = b; 1281 } 1282 } 1283 1284 XML_Bool XMLCALL 1285 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { 1286 TAG *tStk; 1287 OPEN_INTERNAL_ENTITY *openEntityList; 1288 1289 if (parser == NULL) 1290 return XML_FALSE; 1291 1292 if (parser->m_parentParser) 1293 return XML_FALSE; 1294 /* move m_tagStack to m_freeTagList */ 1295 tStk = parser->m_tagStack; 1296 while (tStk) { 1297 TAG *tag = tStk; 1298 tStk = tStk->parent; 1299 tag->parent = parser->m_freeTagList; 1300 moveToFreeBindingList(parser, tag->bindings); 1301 tag->bindings = NULL; 1302 parser->m_freeTagList = tag; 1303 } 1304 /* move m_openInternalEntities to m_freeInternalEntities */ 1305 openEntityList = parser->m_openInternalEntities; 1306 while (openEntityList) { 1307 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1308 openEntityList = openEntity->next; 1309 openEntity->next = parser->m_freeInternalEntities; 1310 parser->m_freeInternalEntities = openEntity; 1311 } 1312 moveToFreeBindingList(parser, parser->m_inheritedBindings); 1313 FREE(parser, parser->m_unknownEncodingMem); 1314 if (parser->m_unknownEncodingRelease) 1315 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1316 poolClear(&parser->m_tempPool); 1317 poolClear(&parser->m_temp2Pool); 1318 FREE(parser, (void *)parser->m_protocolEncodingName); 1319 parser->m_protocolEncodingName = NULL; 1320 parserInit(parser, encodingName); 1321 dtdReset(parser->m_dtd, &parser->m_mem); 1322 return XML_TRUE; 1323 } 1324 1325 enum XML_Status XMLCALL 1326 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { 1327 if (parser == NULL) 1328 return XML_STATUS_ERROR; 1329 /* Block after XML_Parse()/XML_ParseBuffer() has been called. 1330 XXX There's no way for the caller to determine which of the 1331 XXX possible error cases caused the XML_STATUS_ERROR return. 1332 */ 1333 if (parser->m_parsingStatus.parsing == XML_PARSING 1334 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1335 return XML_STATUS_ERROR; 1336 1337 /* Get rid of any previous encoding name */ 1338 FREE(parser, (void *)parser->m_protocolEncodingName); 1339 1340 if (encodingName == NULL) 1341 /* No new encoding name */ 1342 parser->m_protocolEncodingName = NULL; 1343 else { 1344 /* Copy the new encoding name into allocated memory */ 1345 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); 1346 if (! parser->m_protocolEncodingName) 1347 return XML_STATUS_ERROR; 1348 } 1349 return XML_STATUS_OK; 1350 } 1351 1352 XML_Parser XMLCALL 1353 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, 1354 const XML_Char *encodingName) { 1355 XML_Parser parser = oldParser; 1356 DTD *newDtd = NULL; 1357 DTD *oldDtd; 1358 XML_StartElementHandler oldStartElementHandler; 1359 XML_EndElementHandler oldEndElementHandler; 1360 XML_CharacterDataHandler oldCharacterDataHandler; 1361 XML_ProcessingInstructionHandler oldProcessingInstructionHandler; 1362 XML_CommentHandler oldCommentHandler; 1363 XML_StartCdataSectionHandler oldStartCdataSectionHandler; 1364 XML_EndCdataSectionHandler oldEndCdataSectionHandler; 1365 XML_DefaultHandler oldDefaultHandler; 1366 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; 1367 XML_NotationDeclHandler oldNotationDeclHandler; 1368 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; 1369 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; 1370 XML_NotStandaloneHandler oldNotStandaloneHandler; 1371 XML_ExternalEntityRefHandler oldExternalEntityRefHandler; 1372 XML_SkippedEntityHandler oldSkippedEntityHandler; 1373 XML_UnknownEncodingHandler oldUnknownEncodingHandler; 1374 XML_ElementDeclHandler oldElementDeclHandler; 1375 XML_AttlistDeclHandler oldAttlistDeclHandler; 1376 XML_EntityDeclHandler oldEntityDeclHandler; 1377 XML_XmlDeclHandler oldXmlDeclHandler; 1378 ELEMENT_TYPE *oldDeclElementType; 1379 1380 void *oldUserData; 1381 void *oldHandlerArg; 1382 XML_Bool oldDefaultExpandInternalEntities; 1383 XML_Parser oldExternalEntityRefHandlerArg; 1384 #ifdef XML_DTD 1385 enum XML_ParamEntityParsing oldParamEntityParsing; 1386 int oldInEntityValue; 1387 #endif 1388 XML_Bool oldns_triplets; 1389 /* Note that the new parser shares the same hash secret as the old 1390 parser, so that dtdCopy and copyEntityTable can lookup values 1391 from hash tables associated with either parser without us having 1392 to worry which hash secrets each table has. 1393 */ 1394 unsigned long oldhash_secret_salt; 1395 XML_Bool oldReparseDeferralEnabled; 1396 1397 /* Validate the oldParser parameter before we pull everything out of it */ 1398 if (oldParser == NULL) 1399 return NULL; 1400 1401 /* Stash the original parser contents on the stack */ 1402 oldDtd = parser->m_dtd; 1403 oldStartElementHandler = parser->m_startElementHandler; 1404 oldEndElementHandler = parser->m_endElementHandler; 1405 oldCharacterDataHandler = parser->m_characterDataHandler; 1406 oldProcessingInstructionHandler = parser->m_processingInstructionHandler; 1407 oldCommentHandler = parser->m_commentHandler; 1408 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; 1409 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; 1410 oldDefaultHandler = parser->m_defaultHandler; 1411 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; 1412 oldNotationDeclHandler = parser->m_notationDeclHandler; 1413 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; 1414 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; 1415 oldNotStandaloneHandler = parser->m_notStandaloneHandler; 1416 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; 1417 oldSkippedEntityHandler = parser->m_skippedEntityHandler; 1418 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; 1419 oldElementDeclHandler = parser->m_elementDeclHandler; 1420 oldAttlistDeclHandler = parser->m_attlistDeclHandler; 1421 oldEntityDeclHandler = parser->m_entityDeclHandler; 1422 oldXmlDeclHandler = parser->m_xmlDeclHandler; 1423 oldDeclElementType = parser->m_declElementType; 1424 1425 oldUserData = parser->m_userData; 1426 oldHandlerArg = parser->m_handlerArg; 1427 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; 1428 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; 1429 #ifdef XML_DTD 1430 oldParamEntityParsing = parser->m_paramEntityParsing; 1431 oldInEntityValue = parser->m_prologState.inEntityValue; 1432 #endif 1433 oldns_triplets = parser->m_ns_triplets; 1434 /* Note that the new parser shares the same hash secret as the old 1435 parser, so that dtdCopy and copyEntityTable can lookup values 1436 from hash tables associated with either parser without us having 1437 to worry which hash secrets each table has. 1438 */ 1439 oldhash_secret_salt = parser->m_hash_secret_salt; 1440 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; 1441 1442 #ifdef XML_DTD 1443 if (! context) 1444 newDtd = oldDtd; 1445 #endif /* XML_DTD */ 1446 1447 /* Note that the magical uses of the pre-processor to make field 1448 access look more like C++ require that `parser' be overwritten 1449 here. This makes this function more painful to follow than it 1450 would be otherwise. 1451 */ 1452 if (parser->m_ns) { 1453 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; 1454 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); 1455 } else { 1456 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); 1457 } 1458 1459 if (! parser) 1460 return NULL; 1461 1462 parser->m_startElementHandler = oldStartElementHandler; 1463 parser->m_endElementHandler = oldEndElementHandler; 1464 parser->m_characterDataHandler = oldCharacterDataHandler; 1465 parser->m_processingInstructionHandler = oldProcessingInstructionHandler; 1466 parser->m_commentHandler = oldCommentHandler; 1467 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; 1468 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; 1469 parser->m_defaultHandler = oldDefaultHandler; 1470 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; 1471 parser->m_notationDeclHandler = oldNotationDeclHandler; 1472 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; 1473 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; 1474 parser->m_notStandaloneHandler = oldNotStandaloneHandler; 1475 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; 1476 parser->m_skippedEntityHandler = oldSkippedEntityHandler; 1477 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; 1478 parser->m_elementDeclHandler = oldElementDeclHandler; 1479 parser->m_attlistDeclHandler = oldAttlistDeclHandler; 1480 parser->m_entityDeclHandler = oldEntityDeclHandler; 1481 parser->m_xmlDeclHandler = oldXmlDeclHandler; 1482 parser->m_declElementType = oldDeclElementType; 1483 parser->m_userData = oldUserData; 1484 if (oldUserData == oldHandlerArg) 1485 parser->m_handlerArg = parser->m_userData; 1486 else 1487 parser->m_handlerArg = parser; 1488 if (oldExternalEntityRefHandlerArg != oldParser) 1489 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; 1490 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; 1491 parser->m_ns_triplets = oldns_triplets; 1492 parser->m_hash_secret_salt = oldhash_secret_salt; 1493 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; 1494 parser->m_parentParser = oldParser; 1495 #ifdef XML_DTD 1496 parser->m_paramEntityParsing = oldParamEntityParsing; 1497 parser->m_prologState.inEntityValue = oldInEntityValue; 1498 if (context) { 1499 #endif /* XML_DTD */ 1500 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) 1501 || ! setContext(parser, context)) { 1502 XML_ParserFree(parser); 1503 return NULL; 1504 } 1505 parser->m_processor = externalEntityInitProcessor; 1506 #ifdef XML_DTD 1507 } else { 1508 /* The DTD instance referenced by parser->m_dtd is shared between the 1509 document's root parser and external PE parsers, therefore one does not 1510 need to call setContext. In addition, one also *must* not call 1511 setContext, because this would overwrite existing prefix->binding 1512 pointers in parser->m_dtd with ones that get destroyed with the external 1513 PE parser. This would leave those prefixes with dangling pointers. 1514 */ 1515 parser->m_isParamEntity = XML_TRUE; 1516 XmlPrologStateInitExternalEntity(&parser->m_prologState); 1517 parser->m_processor = externalParEntInitProcessor; 1518 } 1519 #endif /* XML_DTD */ 1520 return parser; 1521 } 1522 1523 static void FASTCALL 1524 destroyBindings(BINDING *bindings, XML_Parser parser) { 1525 for (;;) { 1526 BINDING *b = bindings; 1527 if (! b) 1528 break; 1529 bindings = b->nextTagBinding; 1530 FREE(parser, b->uri); 1531 FREE(parser, b); 1532 } 1533 } 1534 1535 void XMLCALL 1536 XML_ParserFree(XML_Parser parser) { 1537 TAG *tagList; 1538 OPEN_INTERNAL_ENTITY *entityList; 1539 if (parser == NULL) 1540 return; 1541 /* free m_tagStack and m_freeTagList */ 1542 tagList = parser->m_tagStack; 1543 for (;;) { 1544 TAG *p; 1545 if (tagList == NULL) { 1546 if (parser->m_freeTagList == NULL) 1547 break; 1548 tagList = parser->m_freeTagList; 1549 parser->m_freeTagList = NULL; 1550 } 1551 p = tagList; 1552 tagList = tagList->parent; 1553 FREE(parser, p->buf); 1554 destroyBindings(p->bindings, parser); 1555 FREE(parser, p); 1556 } 1557 /* free m_openInternalEntities and m_freeInternalEntities */ 1558 entityList = parser->m_openInternalEntities; 1559 for (;;) { 1560 OPEN_INTERNAL_ENTITY *openEntity; 1561 if (entityList == NULL) { 1562 if (parser->m_freeInternalEntities == NULL) 1563 break; 1564 entityList = parser->m_freeInternalEntities; 1565 parser->m_freeInternalEntities = NULL; 1566 } 1567 openEntity = entityList; 1568 entityList = entityList->next; 1569 FREE(parser, openEntity); 1570 } 1571 1572 destroyBindings(parser->m_freeBindingList, parser); 1573 destroyBindings(parser->m_inheritedBindings, parser); 1574 poolDestroy(&parser->m_tempPool); 1575 poolDestroy(&parser->m_temp2Pool); 1576 FREE(parser, (void *)parser->m_protocolEncodingName); 1577 #ifdef XML_DTD 1578 /* external parameter entity parsers share the DTD structure 1579 parser->m_dtd with the root parser, so we must not destroy it 1580 */ 1581 if (! parser->m_isParamEntity && parser->m_dtd) 1582 #else 1583 if (parser->m_dtd) 1584 #endif /* XML_DTD */ 1585 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, 1586 &parser->m_mem); 1587 FREE(parser, (void *)parser->m_atts); 1588 #ifdef XML_ATTR_INFO 1589 FREE(parser, (void *)parser->m_attInfo); 1590 #endif 1591 FREE(parser, parser->m_groupConnector); 1592 FREE(parser, parser->m_buffer); 1593 FREE(parser, parser->m_dataBuf); 1594 FREE(parser, parser->m_nsAtts); 1595 FREE(parser, parser->m_unknownEncodingMem); 1596 if (parser->m_unknownEncodingRelease) 1597 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1598 FREE(parser, parser); 1599 } 1600 1601 void XMLCALL 1602 XML_UseParserAsHandlerArg(XML_Parser parser) { 1603 if (parser != NULL) 1604 parser->m_handlerArg = parser; 1605 } 1606 1607 enum XML_Error XMLCALL 1608 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { 1609 if (parser == NULL) 1610 return XML_ERROR_INVALID_ARGUMENT; 1611 #ifdef XML_DTD 1612 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1613 if (parser->m_parsingStatus.parsing == XML_PARSING 1614 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1615 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; 1616 parser->m_useForeignDTD = useDTD; 1617 return XML_ERROR_NONE; 1618 #else 1619 UNUSED_P(useDTD); 1620 return XML_ERROR_FEATURE_REQUIRES_XML_DTD; 1621 #endif 1622 } 1623 1624 void XMLCALL 1625 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { 1626 if (parser == NULL) 1627 return; 1628 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1629 if (parser->m_parsingStatus.parsing == XML_PARSING 1630 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1631 return; 1632 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; 1633 } 1634 1635 void XMLCALL 1636 XML_SetUserData(XML_Parser parser, void *p) { 1637 if (parser == NULL) 1638 return; 1639 if (parser->m_handlerArg == parser->m_userData) 1640 parser->m_handlerArg = parser->m_userData = p; 1641 else 1642 parser->m_userData = p; 1643 } 1644 1645 enum XML_Status XMLCALL 1646 XML_SetBase(XML_Parser parser, const XML_Char *p) { 1647 if (parser == NULL) 1648 return XML_STATUS_ERROR; 1649 if (p) { 1650 p = poolCopyString(&parser->m_dtd->pool, p); 1651 if (! p) 1652 return XML_STATUS_ERROR; 1653 parser->m_curBase = p; 1654 } else 1655 parser->m_curBase = NULL; 1656 return XML_STATUS_OK; 1657 } 1658 1659 const XML_Char *XMLCALL 1660 XML_GetBase(XML_Parser parser) { 1661 if (parser == NULL) 1662 return NULL; 1663 return parser->m_curBase; 1664 } 1665 1666 int XMLCALL 1667 XML_GetSpecifiedAttributeCount(XML_Parser parser) { 1668 if (parser == NULL) 1669 return -1; 1670 return parser->m_nSpecifiedAtts; 1671 } 1672 1673 int XMLCALL 1674 XML_GetIdAttributeIndex(XML_Parser parser) { 1675 if (parser == NULL) 1676 return -1; 1677 return parser->m_idAttIndex; 1678 } 1679 1680 #ifdef XML_ATTR_INFO 1681 const XML_AttrInfo *XMLCALL 1682 XML_GetAttributeInfo(XML_Parser parser) { 1683 if (parser == NULL) 1684 return NULL; 1685 return parser->m_attInfo; 1686 } 1687 #endif 1688 1689 void XMLCALL 1690 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, 1691 XML_EndElementHandler end) { 1692 if (parser == NULL) 1693 return; 1694 parser->m_startElementHandler = start; 1695 parser->m_endElementHandler = end; 1696 } 1697 1698 void XMLCALL 1699 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { 1700 if (parser != NULL) 1701 parser->m_startElementHandler = start; 1702 } 1703 1704 void XMLCALL 1705 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { 1706 if (parser != NULL) 1707 parser->m_endElementHandler = end; 1708 } 1709 1710 void XMLCALL 1711 XML_SetCharacterDataHandler(XML_Parser parser, 1712 XML_CharacterDataHandler handler) { 1713 if (parser != NULL) 1714 parser->m_characterDataHandler = handler; 1715 } 1716 1717 void XMLCALL 1718 XML_SetProcessingInstructionHandler(XML_Parser parser, 1719 XML_ProcessingInstructionHandler handler) { 1720 if (parser != NULL) 1721 parser->m_processingInstructionHandler = handler; 1722 } 1723 1724 void XMLCALL 1725 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { 1726 if (parser != NULL) 1727 parser->m_commentHandler = handler; 1728 } 1729 1730 void XMLCALL 1731 XML_SetCdataSectionHandler(XML_Parser parser, 1732 XML_StartCdataSectionHandler start, 1733 XML_EndCdataSectionHandler end) { 1734 if (parser == NULL) 1735 return; 1736 parser->m_startCdataSectionHandler = start; 1737 parser->m_endCdataSectionHandler = end; 1738 } 1739 1740 void XMLCALL 1741 XML_SetStartCdataSectionHandler(XML_Parser parser, 1742 XML_StartCdataSectionHandler start) { 1743 if (parser != NULL) 1744 parser->m_startCdataSectionHandler = start; 1745 } 1746 1747 void XMLCALL 1748 XML_SetEndCdataSectionHandler(XML_Parser parser, 1749 XML_EndCdataSectionHandler end) { 1750 if (parser != NULL) 1751 parser->m_endCdataSectionHandler = end; 1752 } 1753 1754 void XMLCALL 1755 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { 1756 if (parser == NULL) 1757 return; 1758 parser->m_defaultHandler = handler; 1759 parser->m_defaultExpandInternalEntities = XML_FALSE; 1760 } 1761 1762 void XMLCALL 1763 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { 1764 if (parser == NULL) 1765 return; 1766 parser->m_defaultHandler = handler; 1767 parser->m_defaultExpandInternalEntities = XML_TRUE; 1768 } 1769 1770 void XMLCALL 1771 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, 1772 XML_EndDoctypeDeclHandler end) { 1773 if (parser == NULL) 1774 return; 1775 parser->m_startDoctypeDeclHandler = start; 1776 parser->m_endDoctypeDeclHandler = end; 1777 } 1778 1779 void XMLCALL 1780 XML_SetStartDoctypeDeclHandler(XML_Parser parser, 1781 XML_StartDoctypeDeclHandler start) { 1782 if (parser != NULL) 1783 parser->m_startDoctypeDeclHandler = start; 1784 } 1785 1786 void XMLCALL 1787 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { 1788 if (parser != NULL) 1789 parser->m_endDoctypeDeclHandler = end; 1790 } 1791 1792 void XMLCALL 1793 XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 1794 XML_UnparsedEntityDeclHandler handler) { 1795 if (parser != NULL) 1796 parser->m_unparsedEntityDeclHandler = handler; 1797 } 1798 1799 void XMLCALL 1800 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { 1801 if (parser != NULL) 1802 parser->m_notationDeclHandler = handler; 1803 } 1804 1805 void XMLCALL 1806 XML_SetNamespaceDeclHandler(XML_Parser parser, 1807 XML_StartNamespaceDeclHandler start, 1808 XML_EndNamespaceDeclHandler end) { 1809 if (parser == NULL) 1810 return; 1811 parser->m_startNamespaceDeclHandler = start; 1812 parser->m_endNamespaceDeclHandler = end; 1813 } 1814 1815 void XMLCALL 1816 XML_SetStartNamespaceDeclHandler(XML_Parser parser, 1817 XML_StartNamespaceDeclHandler start) { 1818 if (parser != NULL) 1819 parser->m_startNamespaceDeclHandler = start; 1820 } 1821 1822 void XMLCALL 1823 XML_SetEndNamespaceDeclHandler(XML_Parser parser, 1824 XML_EndNamespaceDeclHandler end) { 1825 if (parser != NULL) 1826 parser->m_endNamespaceDeclHandler = end; 1827 } 1828 1829 void XMLCALL 1830 XML_SetNotStandaloneHandler(XML_Parser parser, 1831 XML_NotStandaloneHandler handler) { 1832 if (parser != NULL) 1833 parser->m_notStandaloneHandler = handler; 1834 } 1835 1836 void XMLCALL 1837 XML_SetExternalEntityRefHandler(XML_Parser parser, 1838 XML_ExternalEntityRefHandler handler) { 1839 if (parser != NULL) 1840 parser->m_externalEntityRefHandler = handler; 1841 } 1842 1843 void XMLCALL 1844 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { 1845 if (parser == NULL) 1846 return; 1847 if (arg) 1848 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; 1849 else 1850 parser->m_externalEntityRefHandlerArg = parser; 1851 } 1852 1853 void XMLCALL 1854 XML_SetSkippedEntityHandler(XML_Parser parser, 1855 XML_SkippedEntityHandler handler) { 1856 if (parser != NULL) 1857 parser->m_skippedEntityHandler = handler; 1858 } 1859 1860 void XMLCALL 1861 XML_SetUnknownEncodingHandler(XML_Parser parser, 1862 XML_UnknownEncodingHandler handler, void *data) { 1863 if (parser == NULL) 1864 return; 1865 parser->m_unknownEncodingHandler = handler; 1866 parser->m_unknownEncodingHandlerData = data; 1867 } 1868 1869 void XMLCALL 1870 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { 1871 if (parser != NULL) 1872 parser->m_elementDeclHandler = eldecl; 1873 } 1874 1875 void XMLCALL 1876 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { 1877 if (parser != NULL) 1878 parser->m_attlistDeclHandler = attdecl; 1879 } 1880 1881 void XMLCALL 1882 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { 1883 if (parser != NULL) 1884 parser->m_entityDeclHandler = handler; 1885 } 1886 1887 void XMLCALL 1888 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { 1889 if (parser != NULL) 1890 parser->m_xmlDeclHandler = handler; 1891 } 1892 1893 int XMLCALL 1894 XML_SetParamEntityParsing(XML_Parser parser, 1895 enum XML_ParamEntityParsing peParsing) { 1896 if (parser == NULL) 1897 return 0; 1898 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1899 if (parser->m_parsingStatus.parsing == XML_PARSING 1900 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1901 return 0; 1902 #ifdef XML_DTD 1903 parser->m_paramEntityParsing = peParsing; 1904 return 1; 1905 #else 1906 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; 1907 #endif 1908 } 1909 1910 int XMLCALL 1911 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { 1912 if (parser == NULL) 1913 return 0; 1914 if (parser->m_parentParser) 1915 return XML_SetHashSalt(parser->m_parentParser, hash_salt); 1916 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1917 if (parser->m_parsingStatus.parsing == XML_PARSING 1918 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1919 return 0; 1920 parser->m_hash_secret_salt = hash_salt; 1921 return 1; 1922 } 1923 1924 enum XML_Status XMLCALL 1925 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { 1926 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { 1927 if (parser != NULL) 1928 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 1929 return XML_STATUS_ERROR; 1930 } 1931 switch (parser->m_parsingStatus.parsing) { 1932 case XML_SUSPENDED: 1933 parser->m_errorCode = XML_ERROR_SUSPENDED; 1934 return XML_STATUS_ERROR; 1935 case XML_FINISHED: 1936 parser->m_errorCode = XML_ERROR_FINISHED; 1937 return XML_STATUS_ERROR; 1938 case XML_INITIALIZED: 1939 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 1940 parser->m_errorCode = XML_ERROR_NO_MEMORY; 1941 return XML_STATUS_ERROR; 1942 } 1943 /* fall through */ 1944 default: 1945 parser->m_parsingStatus.parsing = XML_PARSING; 1946 } 1947 1948 #if XML_CONTEXT_BYTES == 0 1949 if (parser->m_bufferPtr == parser->m_bufferEnd) { 1950 const char *end; 1951 int nLeftOver; 1952 enum XML_Status result; 1953 /* Detect overflow (a+b > MAX <==> b > MAX-a) */ 1954 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { 1955 parser->m_errorCode = XML_ERROR_NO_MEMORY; 1956 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 1957 parser->m_processor = errorProcessor; 1958 return XML_STATUS_ERROR; 1959 } 1960 // though this isn't a buffer request, we assume that `len` is the app's 1961 // preferred buffer fill size, and therefore save it here. 1962 parser->m_lastBufferRequestSize = len; 1963 parser->m_parseEndByteIndex += len; 1964 parser->m_positionPtr = s; 1965 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 1966 1967 parser->m_errorCode 1968 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); 1969 1970 if (parser->m_errorCode != XML_ERROR_NONE) { 1971 parser->m_eventEndPtr = parser->m_eventPtr; 1972 parser->m_processor = errorProcessor; 1973 return XML_STATUS_ERROR; 1974 } else { 1975 switch (parser->m_parsingStatus.parsing) { 1976 case XML_SUSPENDED: 1977 result = XML_STATUS_SUSPENDED; 1978 break; 1979 case XML_INITIALIZED: 1980 case XML_PARSING: 1981 if (isFinal) { 1982 parser->m_parsingStatus.parsing = XML_FINISHED; 1983 return XML_STATUS_OK; 1984 } 1985 /* fall through */ 1986 default: 1987 result = XML_STATUS_OK; 1988 } 1989 } 1990 1991 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, 1992 &parser->m_position); 1993 nLeftOver = s + len - end; 1994 if (nLeftOver) { 1995 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED 1996 // (and XML_ERROR_FINISHED) from XML_GetBuffer. 1997 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; 1998 parser->m_parsingStatus.parsing = XML_PARSING; 1999 void *const temp = XML_GetBuffer(parser, nLeftOver); 2000 parser->m_parsingStatus.parsing = originalStatus; 2001 // GetBuffer may have overwritten this, but we want to remember what the 2002 // app requested, not how many bytes were left over after parsing. 2003 parser->m_lastBufferRequestSize = len; 2004 if (temp == NULL) { 2005 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). 2006 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2007 parser->m_processor = errorProcessor; 2008 return XML_STATUS_ERROR; 2009 } 2010 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we 2011 // don't have any data to preserve, and can copy straight into the start 2012 // of the buffer rather than the GetBuffer return pointer (which may be 2013 // pointing further into the allocated buffer). 2014 memcpy(parser->m_buffer, end, nLeftOver); 2015 } 2016 parser->m_bufferPtr = parser->m_buffer; 2017 parser->m_bufferEnd = parser->m_buffer + nLeftOver; 2018 parser->m_positionPtr = parser->m_bufferPtr; 2019 parser->m_parseEndPtr = parser->m_bufferEnd; 2020 parser->m_eventPtr = parser->m_bufferPtr; 2021 parser->m_eventEndPtr = parser->m_bufferPtr; 2022 return result; 2023 } 2024 #endif /* XML_CONTEXT_BYTES == 0 */ 2025 void *buff = XML_GetBuffer(parser, len); 2026 if (buff == NULL) 2027 return XML_STATUS_ERROR; 2028 if (len > 0) { 2029 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above 2030 memcpy(buff, s, len); 2031 } 2032 return XML_ParseBuffer(parser, len, isFinal); 2033 } 2034 2035 enum XML_Status XMLCALL 2036 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { 2037 const char *start; 2038 enum XML_Status result = XML_STATUS_OK; 2039 2040 if (parser == NULL) 2041 return XML_STATUS_ERROR; 2042 2043 if (len < 0) { 2044 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 2045 return XML_STATUS_ERROR; 2046 } 2047 2048 switch (parser->m_parsingStatus.parsing) { 2049 case XML_SUSPENDED: 2050 parser->m_errorCode = XML_ERROR_SUSPENDED; 2051 return XML_STATUS_ERROR; 2052 case XML_FINISHED: 2053 parser->m_errorCode = XML_ERROR_FINISHED; 2054 return XML_STATUS_ERROR; 2055 case XML_INITIALIZED: 2056 /* Has someone called XML_GetBuffer successfully before? */ 2057 if (! parser->m_bufferPtr) { 2058 parser->m_errorCode = XML_ERROR_NO_BUFFER; 2059 return XML_STATUS_ERROR; 2060 } 2061 2062 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 2063 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2064 return XML_STATUS_ERROR; 2065 } 2066 /* fall through */ 2067 default: 2068 parser->m_parsingStatus.parsing = XML_PARSING; 2069 } 2070 2071 start = parser->m_bufferPtr; 2072 parser->m_positionPtr = start; 2073 parser->m_bufferEnd += len; 2074 parser->m_parseEndPtr = parser->m_bufferEnd; 2075 parser->m_parseEndByteIndex += len; 2076 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2077 2078 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, 2079 &parser->m_bufferPtr); 2080 2081 if (parser->m_errorCode != XML_ERROR_NONE) { 2082 parser->m_eventEndPtr = parser->m_eventPtr; 2083 parser->m_processor = errorProcessor; 2084 return XML_STATUS_ERROR; 2085 } else { 2086 switch (parser->m_parsingStatus.parsing) { 2087 case XML_SUSPENDED: 2088 result = XML_STATUS_SUSPENDED; 2089 break; 2090 case XML_INITIALIZED: 2091 case XML_PARSING: 2092 if (isFinal) { 2093 parser->m_parsingStatus.parsing = XML_FINISHED; 2094 return result; 2095 } 2096 default:; /* should not happen */ 2097 } 2098 } 2099 2100 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2101 parser->m_bufferPtr, &parser->m_position); 2102 parser->m_positionPtr = parser->m_bufferPtr; 2103 return result; 2104 } 2105 2106 void *XMLCALL 2107 XML_GetBuffer(XML_Parser parser, int len) { 2108 if (parser == NULL) 2109 return NULL; 2110 if (len < 0) { 2111 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2112 return NULL; 2113 } 2114 switch (parser->m_parsingStatus.parsing) { 2115 case XML_SUSPENDED: 2116 parser->m_errorCode = XML_ERROR_SUSPENDED; 2117 return NULL; 2118 case XML_FINISHED: 2119 parser->m_errorCode = XML_ERROR_FINISHED; 2120 return NULL; 2121 default:; 2122 } 2123 2124 // whether or not the request succeeds, `len` seems to be the app's preferred 2125 // buffer fill size; remember it. 2126 parser->m_lastBufferRequestSize = len; 2127 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) 2128 || parser->m_buffer == NULL) { 2129 #if XML_CONTEXT_BYTES > 0 2130 int keep; 2131 #endif /* XML_CONTEXT_BYTES > 0 */ 2132 /* Do not invoke signed arithmetic overflow: */ 2133 int neededSize = (int)((unsigned)len 2134 + (unsigned)EXPAT_SAFE_PTR_DIFF( 2135 parser->m_bufferEnd, parser->m_bufferPtr)); 2136 if (neededSize < 0) { 2137 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2138 return NULL; 2139 } 2140 #if XML_CONTEXT_BYTES > 0 2141 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 2142 if (keep > XML_CONTEXT_BYTES) 2143 keep = XML_CONTEXT_BYTES; 2144 /* Detect and prevent integer overflow */ 2145 if (keep > INT_MAX - neededSize) { 2146 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2147 return NULL; 2148 } 2149 neededSize += keep; 2150 #endif /* XML_CONTEXT_BYTES > 0 */ 2151 if (parser->m_buffer && parser->m_bufferPtr 2152 && neededSize 2153 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { 2154 #if XML_CONTEXT_BYTES > 0 2155 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { 2156 int offset 2157 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) 2158 - keep; 2159 /* The buffer pointers cannot be NULL here; we have at least some bytes 2160 * in the buffer */ 2161 memmove(parser->m_buffer, &parser->m_buffer[offset], 2162 parser->m_bufferEnd - parser->m_bufferPtr + keep); 2163 parser->m_bufferEnd -= offset; 2164 parser->m_bufferPtr -= offset; 2165 } 2166 #else 2167 memmove(parser->m_buffer, parser->m_bufferPtr, 2168 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2169 parser->m_bufferEnd 2170 = parser->m_buffer 2171 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2172 parser->m_bufferPtr = parser->m_buffer; 2173 #endif /* XML_CONTEXT_BYTES > 0 */ 2174 } else { 2175 char *newBuf; 2176 int bufferSize 2177 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); 2178 if (bufferSize == 0) 2179 bufferSize = INIT_BUFFER_SIZE; 2180 do { 2181 /* Do not invoke signed arithmetic overflow: */ 2182 bufferSize = (int)(2U * (unsigned)bufferSize); 2183 } while (bufferSize < neededSize && bufferSize > 0); 2184 if (bufferSize <= 0) { 2185 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2186 return NULL; 2187 } 2188 newBuf = (char *)MALLOC(parser, bufferSize); 2189 if (newBuf == 0) { 2190 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2191 return NULL; 2192 } 2193 parser->m_bufferLim = newBuf + bufferSize; 2194 #if XML_CONTEXT_BYTES > 0 2195 if (parser->m_bufferPtr) { 2196 memcpy(newBuf, &parser->m_bufferPtr[-keep], 2197 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2198 + keep); 2199 FREE(parser, parser->m_buffer); 2200 parser->m_buffer = newBuf; 2201 parser->m_bufferEnd 2202 = parser->m_buffer 2203 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2204 + keep; 2205 parser->m_bufferPtr = parser->m_buffer + keep; 2206 } else { 2207 /* This must be a brand new buffer with no data in it yet */ 2208 parser->m_bufferEnd = newBuf; 2209 parser->m_bufferPtr = parser->m_buffer = newBuf; 2210 } 2211 #else 2212 if (parser->m_bufferPtr) { 2213 memcpy(newBuf, parser->m_bufferPtr, 2214 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2215 FREE(parser, parser->m_buffer); 2216 parser->m_bufferEnd 2217 = newBuf 2218 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2219 } else { 2220 /* This must be a brand new buffer with no data in it yet */ 2221 parser->m_bufferEnd = newBuf; 2222 } 2223 parser->m_bufferPtr = parser->m_buffer = newBuf; 2224 #endif /* XML_CONTEXT_BYTES > 0 */ 2225 } 2226 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2227 parser->m_positionPtr = NULL; 2228 } 2229 return parser->m_bufferEnd; 2230 } 2231 2232 enum XML_Status XMLCALL 2233 XML_StopParser(XML_Parser parser, XML_Bool resumable) { 2234 if (parser == NULL) 2235 return XML_STATUS_ERROR; 2236 switch (parser->m_parsingStatus.parsing) { 2237 case XML_SUSPENDED: 2238 if (resumable) { 2239 parser->m_errorCode = XML_ERROR_SUSPENDED; 2240 return XML_STATUS_ERROR; 2241 } 2242 parser->m_parsingStatus.parsing = XML_FINISHED; 2243 break; 2244 case XML_FINISHED: 2245 parser->m_errorCode = XML_ERROR_FINISHED; 2246 return XML_STATUS_ERROR; 2247 default: 2248 if (resumable) { 2249 #ifdef XML_DTD 2250 if (parser->m_isParamEntity) { 2251 parser->m_errorCode = XML_ERROR_SUSPEND_PE; 2252 return XML_STATUS_ERROR; 2253 } 2254 #endif 2255 parser->m_parsingStatus.parsing = XML_SUSPENDED; 2256 } else 2257 parser->m_parsingStatus.parsing = XML_FINISHED; 2258 } 2259 return XML_STATUS_OK; 2260 } 2261 2262 enum XML_Status XMLCALL 2263 XML_ResumeParser(XML_Parser parser) { 2264 enum XML_Status result = XML_STATUS_OK; 2265 2266 if (parser == NULL) 2267 return XML_STATUS_ERROR; 2268 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { 2269 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; 2270 return XML_STATUS_ERROR; 2271 } 2272 parser->m_parsingStatus.parsing = XML_PARSING; 2273 2274 parser->m_errorCode = callProcessor( 2275 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); 2276 2277 if (parser->m_errorCode != XML_ERROR_NONE) { 2278 parser->m_eventEndPtr = parser->m_eventPtr; 2279 parser->m_processor = errorProcessor; 2280 return XML_STATUS_ERROR; 2281 } else { 2282 switch (parser->m_parsingStatus.parsing) { 2283 case XML_SUSPENDED: 2284 result = XML_STATUS_SUSPENDED; 2285 break; 2286 case XML_INITIALIZED: 2287 case XML_PARSING: 2288 if (parser->m_parsingStatus.finalBuffer) { 2289 parser->m_parsingStatus.parsing = XML_FINISHED; 2290 return result; 2291 } 2292 default:; 2293 } 2294 } 2295 2296 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2297 parser->m_bufferPtr, &parser->m_position); 2298 parser->m_positionPtr = parser->m_bufferPtr; 2299 return result; 2300 } 2301 2302 void XMLCALL 2303 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { 2304 if (parser == NULL) 2305 return; 2306 assert(status != NULL); 2307 *status = parser->m_parsingStatus; 2308 } 2309 2310 enum XML_Error XMLCALL 2311 XML_GetErrorCode(XML_Parser parser) { 2312 if (parser == NULL) 2313 return XML_ERROR_INVALID_ARGUMENT; 2314 return parser->m_errorCode; 2315 } 2316 2317 XML_Index XMLCALL 2318 XML_GetCurrentByteIndex(XML_Parser parser) { 2319 if (parser == NULL) 2320 return -1; 2321 if (parser->m_eventPtr) 2322 return (XML_Index)(parser->m_parseEndByteIndex 2323 - (parser->m_parseEndPtr - parser->m_eventPtr)); 2324 return -1; 2325 } 2326 2327 int XMLCALL 2328 XML_GetCurrentByteCount(XML_Parser parser) { 2329 if (parser == NULL) 2330 return 0; 2331 if (parser->m_eventEndPtr && parser->m_eventPtr) 2332 return (int)(parser->m_eventEndPtr - parser->m_eventPtr); 2333 return 0; 2334 } 2335 2336 const char *XMLCALL 2337 XML_GetInputContext(XML_Parser parser, int *offset, int *size) { 2338 #if XML_CONTEXT_BYTES > 0 2339 if (parser == NULL) 2340 return NULL; 2341 if (parser->m_eventPtr && parser->m_buffer) { 2342 if (offset != NULL) 2343 *offset = (int)(parser->m_eventPtr - parser->m_buffer); 2344 if (size != NULL) 2345 *size = (int)(parser->m_bufferEnd - parser->m_buffer); 2346 return parser->m_buffer; 2347 } 2348 #else 2349 (void)parser; 2350 (void)offset; 2351 (void)size; 2352 #endif /* XML_CONTEXT_BYTES > 0 */ 2353 return (const char *)0; 2354 } 2355 2356 XML_Size XMLCALL 2357 XML_GetCurrentLineNumber(XML_Parser parser) { 2358 if (parser == NULL) 2359 return 0; 2360 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2361 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2362 parser->m_eventPtr, &parser->m_position); 2363 parser->m_positionPtr = parser->m_eventPtr; 2364 } 2365 return parser->m_position.lineNumber + 1; 2366 } 2367 2368 XML_Size XMLCALL 2369 XML_GetCurrentColumnNumber(XML_Parser parser) { 2370 if (parser == NULL) 2371 return 0; 2372 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2373 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2374 parser->m_eventPtr, &parser->m_position); 2375 parser->m_positionPtr = parser->m_eventPtr; 2376 } 2377 return parser->m_position.columnNumber; 2378 } 2379 2380 void XMLCALL 2381 XML_FreeContentModel(XML_Parser parser, XML_Content *model) { 2382 if (parser != NULL) 2383 FREE(parser, model); 2384 } 2385 2386 void *XMLCALL 2387 XML_MemMalloc(XML_Parser parser, size_t size) { 2388 if (parser == NULL) 2389 return NULL; 2390 return MALLOC(parser, size); 2391 } 2392 2393 void *XMLCALL 2394 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { 2395 if (parser == NULL) 2396 return NULL; 2397 return REALLOC(parser, ptr, size); 2398 } 2399 2400 void XMLCALL 2401 XML_MemFree(XML_Parser parser, void *ptr) { 2402 if (parser != NULL) 2403 FREE(parser, ptr); 2404 } 2405 2406 void XMLCALL 2407 XML_DefaultCurrent(XML_Parser parser) { 2408 if (parser == NULL) 2409 return; 2410 if (parser->m_defaultHandler) { 2411 if (parser->m_openInternalEntities) 2412 reportDefault(parser, parser->m_internalEncoding, 2413 parser->m_openInternalEntities->internalEventPtr, 2414 parser->m_openInternalEntities->internalEventEndPtr); 2415 else 2416 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, 2417 parser->m_eventEndPtr); 2418 } 2419 } 2420 2421 const XML_LChar *XMLCALL 2422 XML_ErrorString(enum XML_Error code) { 2423 switch (code) { 2424 case XML_ERROR_NONE: 2425 return NULL; 2426 case XML_ERROR_NO_MEMORY: 2427 return XML_L("out of memory"); 2428 case XML_ERROR_SYNTAX: 2429 return XML_L("syntax error"); 2430 case XML_ERROR_NO_ELEMENTS: 2431 return XML_L("no element found"); 2432 case XML_ERROR_INVALID_TOKEN: 2433 return XML_L("not well-formed (invalid token)"); 2434 case XML_ERROR_UNCLOSED_TOKEN: 2435 return XML_L("unclosed token"); 2436 case XML_ERROR_PARTIAL_CHAR: 2437 return XML_L("partial character"); 2438 case XML_ERROR_TAG_MISMATCH: 2439 return XML_L("mismatched tag"); 2440 case XML_ERROR_DUPLICATE_ATTRIBUTE: 2441 return XML_L("duplicate attribute"); 2442 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: 2443 return XML_L("junk after document element"); 2444 case XML_ERROR_PARAM_ENTITY_REF: 2445 return XML_L("illegal parameter entity reference"); 2446 case XML_ERROR_UNDEFINED_ENTITY: 2447 return XML_L("undefined entity"); 2448 case XML_ERROR_RECURSIVE_ENTITY_REF: 2449 return XML_L("recursive entity reference"); 2450 case XML_ERROR_ASYNC_ENTITY: 2451 return XML_L("asynchronous entity"); 2452 case XML_ERROR_BAD_CHAR_REF: 2453 return XML_L("reference to invalid character number"); 2454 case XML_ERROR_BINARY_ENTITY_REF: 2455 return XML_L("reference to binary entity"); 2456 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: 2457 return XML_L("reference to external entity in attribute"); 2458 case XML_ERROR_MISPLACED_XML_PI: 2459 return XML_L("XML or text declaration not at start of entity"); 2460 case XML_ERROR_UNKNOWN_ENCODING: 2461 return XML_L("unknown encoding"); 2462 case XML_ERROR_INCORRECT_ENCODING: 2463 return XML_L("encoding specified in XML declaration is incorrect"); 2464 case XML_ERROR_UNCLOSED_CDATA_SECTION: 2465 return XML_L("unclosed CDATA section"); 2466 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: 2467 return XML_L("error in processing external entity reference"); 2468 case XML_ERROR_NOT_STANDALONE: 2469 return XML_L("document is not standalone"); 2470 case XML_ERROR_UNEXPECTED_STATE: 2471 return XML_L("unexpected parser state - please send a bug report"); 2472 case XML_ERROR_ENTITY_DECLARED_IN_PE: 2473 return XML_L("entity declared in parameter entity"); 2474 case XML_ERROR_FEATURE_REQUIRES_XML_DTD: 2475 return XML_L("requested feature requires XML_DTD support in Expat"); 2476 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: 2477 return XML_L("cannot change setting once parsing has begun"); 2478 /* Added in 1.95.7. */ 2479 case XML_ERROR_UNBOUND_PREFIX: 2480 return XML_L("unbound prefix"); 2481 /* Added in 1.95.8. */ 2482 case XML_ERROR_UNDECLARING_PREFIX: 2483 return XML_L("must not undeclare prefix"); 2484 case XML_ERROR_INCOMPLETE_PE: 2485 return XML_L("incomplete markup in parameter entity"); 2486 case XML_ERROR_XML_DECL: 2487 return XML_L("XML declaration not well-formed"); 2488 case XML_ERROR_TEXT_DECL: 2489 return XML_L("text declaration not well-formed"); 2490 case XML_ERROR_PUBLICID: 2491 return XML_L("illegal character(s) in public id"); 2492 case XML_ERROR_SUSPENDED: 2493 return XML_L("parser suspended"); 2494 case XML_ERROR_NOT_SUSPENDED: 2495 return XML_L("parser not suspended"); 2496 case XML_ERROR_ABORTED: 2497 return XML_L("parsing aborted"); 2498 case XML_ERROR_FINISHED: 2499 return XML_L("parsing finished"); 2500 case XML_ERROR_SUSPEND_PE: 2501 return XML_L("cannot suspend in external parameter entity"); 2502 /* Added in 2.0.0. */ 2503 case XML_ERROR_RESERVED_PREFIX_XML: 2504 return XML_L( 2505 "reserved prefix (xml) must not be undeclared or bound to another namespace name"); 2506 case XML_ERROR_RESERVED_PREFIX_XMLNS: 2507 return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); 2508 case XML_ERROR_RESERVED_NAMESPACE_URI: 2509 return XML_L( 2510 "prefix must not be bound to one of the reserved namespace names"); 2511 /* Added in 2.2.5. */ 2512 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ 2513 return XML_L("invalid argument"); 2514 /* Added in 2.3.0. */ 2515 case XML_ERROR_NO_BUFFER: 2516 return XML_L( 2517 "a successful prior call to function XML_GetBuffer is required"); 2518 /* Added in 2.4.0. */ 2519 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: 2520 return XML_L( 2521 "limit on input amplification factor (from DTD and entities) breached"); 2522 } 2523 return NULL; 2524 } 2525 2526 const XML_LChar *XMLCALL 2527 XML_ExpatVersion(void) { 2528 /* V1 is used to string-ize the version number. However, it would 2529 string-ize the actual version macro *names* unless we get them 2530 substituted before being passed to V1. CPP is defined to expand 2531 a macro, then rescan for more expansions. Thus, we use V2 to expand 2532 the version macros, then CPP will expand the resulting V1() macro 2533 with the correct numerals. */ 2534 /* ### I'm assuming cpp is portable in this respect... */ 2535 2536 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) 2537 #define V2(a, b, c) XML_L("expat_") V1(a, b, c) 2538 2539 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); 2540 2541 #undef V1 2542 #undef V2 2543 } 2544 2545 XML_Expat_Version XMLCALL 2546 XML_ExpatVersionInfo(void) { 2547 XML_Expat_Version version; 2548 2549 version.major = XML_MAJOR_VERSION; 2550 version.minor = XML_MINOR_VERSION; 2551 version.micro = XML_MICRO_VERSION; 2552 2553 return version; 2554 } 2555 2556 const XML_Feature *XMLCALL 2557 XML_GetFeatureList(void) { 2558 static const XML_Feature features[] = { 2559 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), 2560 sizeof(XML_Char)}, 2561 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), 2562 sizeof(XML_LChar)}, 2563 #ifdef XML_UNICODE 2564 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, 2565 #endif 2566 #ifdef XML_UNICODE_WCHAR_T 2567 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, 2568 #endif 2569 #ifdef XML_DTD 2570 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, 2571 #endif 2572 #if XML_CONTEXT_BYTES > 0 2573 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), 2574 XML_CONTEXT_BYTES}, 2575 #endif 2576 #ifdef XML_MIN_SIZE 2577 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, 2578 #endif 2579 #ifdef XML_NS 2580 {XML_FEATURE_NS, XML_L("XML_NS"), 0}, 2581 #endif 2582 #ifdef XML_LARGE_SIZE 2583 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, 2584 #endif 2585 #ifdef XML_ATTR_INFO 2586 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, 2587 #endif 2588 #if XML_GE == 1 2589 /* Added in Expat 2.4.0 for XML_DTD defined and 2590 * added in Expat 2.6.0 for XML_GE == 1. */ 2591 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, 2592 XML_L("XML_BLAP_MAX_AMP"), 2593 (long int) 2594 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, 2595 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, 2596 XML_L("XML_BLAP_ACT_THRES"), 2597 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, 2598 /* Added in Expat 2.6.0. */ 2599 {XML_FEATURE_GE, XML_L("XML_GE"), 0}, 2600 #endif 2601 {XML_FEATURE_END, NULL, 0}}; 2602 2603 return features; 2604 } 2605 2606 #if XML_GE == 1 2607 XML_Bool XMLCALL 2608 XML_SetBillionLaughsAttackProtectionMaximumAmplification( 2609 XML_Parser parser, float maximumAmplificationFactor) { 2610 if ((parser == NULL) || (parser->m_parentParser != NULL) 2611 || isnan(maximumAmplificationFactor) 2612 || (maximumAmplificationFactor < 1.0f)) { 2613 return XML_FALSE; 2614 } 2615 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; 2616 return XML_TRUE; 2617 } 2618 2619 XML_Bool XMLCALL 2620 XML_SetBillionLaughsAttackProtectionActivationThreshold( 2621 XML_Parser parser, unsigned long long activationThresholdBytes) { 2622 if ((parser == NULL) || (parser->m_parentParser != NULL)) { 2623 return XML_FALSE; 2624 } 2625 parser->m_accounting.activationThresholdBytes = activationThresholdBytes; 2626 return XML_TRUE; 2627 } 2628 #endif /* XML_GE == 1 */ 2629 2630 XML_Bool XMLCALL 2631 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { 2632 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { 2633 parser->m_reparseDeferralEnabled = enabled; 2634 return XML_TRUE; 2635 } 2636 return XML_FALSE; 2637 } 2638 2639 /* Initially tag->rawName always points into the parse buffer; 2640 for those TAG instances opened while the current parse buffer was 2641 processed, and not yet closed, we need to store tag->rawName in a more 2642 permanent location, since the parse buffer is about to be discarded. 2643 */ 2644 static XML_Bool 2645 storeRawNames(XML_Parser parser) { 2646 TAG *tag = parser->m_tagStack; 2647 while (tag) { 2648 int bufSize; 2649 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); 2650 size_t rawNameLen; 2651 char *rawNameBuf = tag->buf + nameLen; 2652 /* Stop if already stored. Since m_tagStack is a stack, we can stop 2653 at the first entry that has already been copied; everything 2654 below it in the stack is already been accounted for in a 2655 previous call to this function. 2656 */ 2657 if (tag->rawName == rawNameBuf) 2658 break; 2659 /* For reuse purposes we need to ensure that the 2660 size of tag->buf is a multiple of sizeof(XML_Char). 2661 */ 2662 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); 2663 /* Detect and prevent integer overflow. */ 2664 if (rawNameLen > (size_t)INT_MAX - nameLen) 2665 return XML_FALSE; 2666 bufSize = nameLen + (int)rawNameLen; 2667 if (bufSize > tag->bufEnd - tag->buf) { 2668 char *temp = (char *)REALLOC(parser, tag->buf, bufSize); 2669 if (temp == NULL) 2670 return XML_FALSE; 2671 /* if tag->name.str points to tag->buf (only when namespace 2672 processing is off) then we have to update it 2673 */ 2674 if (tag->name.str == (XML_Char *)tag->buf) 2675 tag->name.str = (XML_Char *)temp; 2676 /* if tag->name.localPart is set (when namespace processing is on) 2677 then update it as well, since it will always point into tag->buf 2678 */ 2679 if (tag->name.localPart) 2680 tag->name.localPart 2681 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); 2682 tag->buf = temp; 2683 tag->bufEnd = temp + bufSize; 2684 rawNameBuf = temp + nameLen; 2685 } 2686 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); 2687 tag->rawName = rawNameBuf; 2688 tag = tag->parent; 2689 } 2690 return XML_TRUE; 2691 } 2692 2693 static enum XML_Error PTRCALL 2694 contentProcessor(XML_Parser parser, const char *start, const char *end, 2695 const char **endPtr) { 2696 enum XML_Error result = doContent( 2697 parser, 0, parser->m_encoding, start, end, endPtr, 2698 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 2699 if (result == XML_ERROR_NONE) { 2700 if (! storeRawNames(parser)) 2701 return XML_ERROR_NO_MEMORY; 2702 } 2703 return result; 2704 } 2705 2706 static enum XML_Error PTRCALL 2707 externalEntityInitProcessor(XML_Parser parser, const char *start, 2708 const char *end, const char **endPtr) { 2709 enum XML_Error result = initializeEncoding(parser); 2710 if (result != XML_ERROR_NONE) 2711 return result; 2712 parser->m_processor = externalEntityInitProcessor2; 2713 return externalEntityInitProcessor2(parser, start, end, endPtr); 2714 } 2715 2716 static enum XML_Error PTRCALL 2717 externalEntityInitProcessor2(XML_Parser parser, const char *start, 2718 const char *end, const char **endPtr) { 2719 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 2720 int tok = XmlContentTok(parser->m_encoding, start, end, &next); 2721 switch (tok) { 2722 case XML_TOK_BOM: 2723 #if XML_GE == 1 2724 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, 2725 XML_ACCOUNT_DIRECT)) { 2726 accountingOnAbort(parser); 2727 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2728 } 2729 #endif /* XML_GE == 1 */ 2730 2731 /* If we are at the end of the buffer, this would cause the next stage, 2732 i.e. externalEntityInitProcessor3, to pass control directly to 2733 doContent (by detecting XML_TOK_NONE) without processing any xml text 2734 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. 2735 */ 2736 if (next == end && ! parser->m_parsingStatus.finalBuffer) { 2737 *endPtr = next; 2738 return XML_ERROR_NONE; 2739 } 2740 start = next; 2741 break; 2742 case XML_TOK_PARTIAL: 2743 if (! parser->m_parsingStatus.finalBuffer) { 2744 *endPtr = start; 2745 return XML_ERROR_NONE; 2746 } 2747 parser->m_eventPtr = start; 2748 return XML_ERROR_UNCLOSED_TOKEN; 2749 case XML_TOK_PARTIAL_CHAR: 2750 if (! parser->m_parsingStatus.finalBuffer) { 2751 *endPtr = start; 2752 return XML_ERROR_NONE; 2753 } 2754 parser->m_eventPtr = start; 2755 return XML_ERROR_PARTIAL_CHAR; 2756 } 2757 parser->m_processor = externalEntityInitProcessor3; 2758 return externalEntityInitProcessor3(parser, start, end, endPtr); 2759 } 2760 2761 static enum XML_Error PTRCALL 2762 externalEntityInitProcessor3(XML_Parser parser, const char *start, 2763 const char *end, const char **endPtr) { 2764 int tok; 2765 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 2766 parser->m_eventPtr = start; 2767 tok = XmlContentTok(parser->m_encoding, start, end, &next); 2768 /* Note: These bytes are accounted later in: 2769 - processXmlDecl 2770 - externalEntityContentProcessor 2771 */ 2772 parser->m_eventEndPtr = next; 2773 2774 switch (tok) { 2775 case XML_TOK_XML_DECL: { 2776 enum XML_Error result; 2777 result = processXmlDecl(parser, 1, start, next); 2778 if (result != XML_ERROR_NONE) 2779 return result; 2780 switch (parser->m_parsingStatus.parsing) { 2781 case XML_SUSPENDED: 2782 *endPtr = next; 2783 return XML_ERROR_NONE; 2784 case XML_FINISHED: 2785 return XML_ERROR_ABORTED; 2786 default: 2787 start = next; 2788 } 2789 } break; 2790 case XML_TOK_PARTIAL: 2791 if (! parser->m_parsingStatus.finalBuffer) { 2792 *endPtr = start; 2793 return XML_ERROR_NONE; 2794 } 2795 return XML_ERROR_UNCLOSED_TOKEN; 2796 case XML_TOK_PARTIAL_CHAR: 2797 if (! parser->m_parsingStatus.finalBuffer) { 2798 *endPtr = start; 2799 return XML_ERROR_NONE; 2800 } 2801 return XML_ERROR_PARTIAL_CHAR; 2802 } 2803 parser->m_processor = externalEntityContentProcessor; 2804 parser->m_tagLevel = 1; 2805 return externalEntityContentProcessor(parser, start, end, endPtr); 2806 } 2807 2808 static enum XML_Error PTRCALL 2809 externalEntityContentProcessor(XML_Parser parser, const char *start, 2810 const char *end, const char **endPtr) { 2811 enum XML_Error result 2812 = doContent(parser, 1, parser->m_encoding, start, end, endPtr, 2813 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 2814 XML_ACCOUNT_ENTITY_EXPANSION); 2815 if (result == XML_ERROR_NONE) { 2816 if (! storeRawNames(parser)) 2817 return XML_ERROR_NO_MEMORY; 2818 } 2819 return result; 2820 } 2821 2822 static enum XML_Error 2823 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, 2824 const char *s, const char *end, const char **nextPtr, 2825 XML_Bool haveMore, enum XML_Account account) { 2826 /* save one level of indirection */ 2827 DTD *const dtd = parser->m_dtd; 2828 2829 const char **eventPP; 2830 const char **eventEndPP; 2831 if (enc == parser->m_encoding) { 2832 eventPP = &parser->m_eventPtr; 2833 eventEndPP = &parser->m_eventEndPtr; 2834 } else { 2835 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 2836 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 2837 } 2838 *eventPP = s; 2839 2840 for (;;) { 2841 const char *next = s; /* XmlContentTok doesn't always set the last arg */ 2842 int tok = XmlContentTok(enc, s, end, &next); 2843 #if XML_GE == 1 2844 const char *accountAfter 2845 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) 2846 ? (haveMore ? s /* i.e. 0 bytes */ : end) 2847 : next; 2848 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, 2849 account)) { 2850 accountingOnAbort(parser); 2851 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2852 } 2853 #endif 2854 *eventEndPP = next; 2855 switch (tok) { 2856 case XML_TOK_TRAILING_CR: 2857 if (haveMore) { 2858 *nextPtr = s; 2859 return XML_ERROR_NONE; 2860 } 2861 *eventEndPP = end; 2862 if (parser->m_characterDataHandler) { 2863 XML_Char c = 0xA; 2864 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 2865 } else if (parser->m_defaultHandler) 2866 reportDefault(parser, enc, s, end); 2867 /* We are at the end of the final buffer, should we check for 2868 XML_SUSPENDED, XML_FINISHED? 2869 */ 2870 if (startTagLevel == 0) 2871 return XML_ERROR_NO_ELEMENTS; 2872 if (parser->m_tagLevel != startTagLevel) 2873 return XML_ERROR_ASYNC_ENTITY; 2874 *nextPtr = end; 2875 return XML_ERROR_NONE; 2876 case XML_TOK_NONE: 2877 if (haveMore) { 2878 *nextPtr = s; 2879 return XML_ERROR_NONE; 2880 } 2881 if (startTagLevel > 0) { 2882 if (parser->m_tagLevel != startTagLevel) 2883 return XML_ERROR_ASYNC_ENTITY; 2884 *nextPtr = s; 2885 return XML_ERROR_NONE; 2886 } 2887 return XML_ERROR_NO_ELEMENTS; 2888 case XML_TOK_INVALID: 2889 *eventPP = next; 2890 return XML_ERROR_INVALID_TOKEN; 2891 case XML_TOK_PARTIAL: 2892 if (haveMore) { 2893 *nextPtr = s; 2894 return XML_ERROR_NONE; 2895 } 2896 return XML_ERROR_UNCLOSED_TOKEN; 2897 case XML_TOK_PARTIAL_CHAR: 2898 if (haveMore) { 2899 *nextPtr = s; 2900 return XML_ERROR_NONE; 2901 } 2902 return XML_ERROR_PARTIAL_CHAR; 2903 case XML_TOK_ENTITY_REF: { 2904 const XML_Char *name; 2905 ENTITY *entity; 2906 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 2907 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 2908 if (ch) { 2909 #if XML_GE == 1 2910 /* NOTE: We are replacing 4-6 characters original input for 1 character 2911 * so there is no amplification and hence recording without 2912 * protection. */ 2913 accountingDiffTolerated(parser, tok, (char *)&ch, 2914 ((char *)&ch) + sizeof(XML_Char), __LINE__, 2915 XML_ACCOUNT_ENTITY_EXPANSION); 2916 #endif /* XML_GE == 1 */ 2917 if (parser->m_characterDataHandler) 2918 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); 2919 else if (parser->m_defaultHandler) 2920 reportDefault(parser, enc, s, next); 2921 break; 2922 } 2923 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 2924 next - enc->minBytesPerChar); 2925 if (! name) 2926 return XML_ERROR_NO_MEMORY; 2927 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 2928 poolDiscard(&dtd->pool); 2929 /* First, determine if a check for an existing declaration is needed; 2930 if yes, check that the entity exists, and that it is internal, 2931 otherwise call the skipped entity or default handler. 2932 */ 2933 if (! dtd->hasParamEntityRefs || dtd->standalone) { 2934 if (! entity) 2935 return XML_ERROR_UNDEFINED_ENTITY; 2936 else if (! entity->is_internal) 2937 return XML_ERROR_ENTITY_DECLARED_IN_PE; 2938 } else if (! entity) { 2939 if (parser->m_skippedEntityHandler) 2940 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 2941 else if (parser->m_defaultHandler) 2942 reportDefault(parser, enc, s, next); 2943 break; 2944 } 2945 if (entity->open) 2946 return XML_ERROR_RECURSIVE_ENTITY_REF; 2947 if (entity->notation) 2948 return XML_ERROR_BINARY_ENTITY_REF; 2949 if (entity->textPtr) { 2950 enum XML_Error result; 2951 if (! parser->m_defaultExpandInternalEntities) { 2952 if (parser->m_skippedEntityHandler) 2953 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 2954 0); 2955 else if (parser->m_defaultHandler) 2956 reportDefault(parser, enc, s, next); 2957 break; 2958 } 2959 result = processInternalEntity(parser, entity, XML_FALSE); 2960 if (result != XML_ERROR_NONE) 2961 return result; 2962 } else if (parser->m_externalEntityRefHandler) { 2963 const XML_Char *context; 2964 entity->open = XML_TRUE; 2965 context = getContext(parser); 2966 entity->open = XML_FALSE; 2967 if (! context) 2968 return XML_ERROR_NO_MEMORY; 2969 if (! parser->m_externalEntityRefHandler( 2970 parser->m_externalEntityRefHandlerArg, context, entity->base, 2971 entity->systemId, entity->publicId)) 2972 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 2973 poolDiscard(&parser->m_tempPool); 2974 } else if (parser->m_defaultHandler) 2975 reportDefault(parser, enc, s, next); 2976 break; 2977 } 2978 case XML_TOK_START_TAG_NO_ATTS: 2979 /* fall through */ 2980 case XML_TOK_START_TAG_WITH_ATTS: { 2981 TAG *tag; 2982 enum XML_Error result; 2983 XML_Char *toPtr; 2984 if (parser->m_freeTagList) { 2985 tag = parser->m_freeTagList; 2986 parser->m_freeTagList = parser->m_freeTagList->parent; 2987 } else { 2988 tag = (TAG *)MALLOC(parser, sizeof(TAG)); 2989 if (! tag) 2990 return XML_ERROR_NO_MEMORY; 2991 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); 2992 if (! tag->buf) { 2993 FREE(parser, tag); 2994 return XML_ERROR_NO_MEMORY; 2995 } 2996 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; 2997 } 2998 tag->bindings = NULL; 2999 tag->parent = parser->m_tagStack; 3000 parser->m_tagStack = tag; 3001 tag->name.localPart = NULL; 3002 tag->name.prefix = NULL; 3003 tag->rawName = s + enc->minBytesPerChar; 3004 tag->rawNameLength = XmlNameLength(enc, tag->rawName); 3005 ++parser->m_tagLevel; 3006 { 3007 const char *rawNameEnd = tag->rawName + tag->rawNameLength; 3008 const char *fromPtr = tag->rawName; 3009 toPtr = (XML_Char *)tag->buf; 3010 for (;;) { 3011 int bufSize; 3012 int convLen; 3013 const enum XML_Convert_Result convert_res 3014 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, 3015 (ICHAR *)tag->bufEnd - 1); 3016 convLen = (int)(toPtr - (XML_Char *)tag->buf); 3017 if ((fromPtr >= rawNameEnd) 3018 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { 3019 tag->name.strLen = convLen; 3020 break; 3021 } 3022 bufSize = (int)(tag->bufEnd - tag->buf) << 1; 3023 { 3024 char *temp = (char *)REALLOC(parser, tag->buf, bufSize); 3025 if (temp == NULL) 3026 return XML_ERROR_NO_MEMORY; 3027 tag->buf = temp; 3028 tag->bufEnd = temp + bufSize; 3029 toPtr = (XML_Char *)temp + convLen; 3030 } 3031 } 3032 } 3033 tag->name.str = (XML_Char *)tag->buf; 3034 *toPtr = XML_T('\0'); 3035 result 3036 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); 3037 if (result) 3038 return result; 3039 if (parser->m_startElementHandler) 3040 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, 3041 (const XML_Char **)parser->m_atts); 3042 else if (parser->m_defaultHandler) 3043 reportDefault(parser, enc, s, next); 3044 poolClear(&parser->m_tempPool); 3045 break; 3046 } 3047 case XML_TOK_EMPTY_ELEMENT_NO_ATTS: 3048 /* fall through */ 3049 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { 3050 const char *rawName = s + enc->minBytesPerChar; 3051 enum XML_Error result; 3052 BINDING *bindings = NULL; 3053 XML_Bool noElmHandlers = XML_TRUE; 3054 TAG_NAME name; 3055 name.str = poolStoreString(&parser->m_tempPool, enc, rawName, 3056 rawName + XmlNameLength(enc, rawName)); 3057 if (! name.str) 3058 return XML_ERROR_NO_MEMORY; 3059 poolFinish(&parser->m_tempPool); 3060 result = storeAtts(parser, enc, s, &name, &bindings, 3061 XML_ACCOUNT_NONE /* token spans whole start tag */); 3062 if (result != XML_ERROR_NONE) { 3063 freeBindings(parser, bindings); 3064 return result; 3065 } 3066 poolFinish(&parser->m_tempPool); 3067 if (parser->m_startElementHandler) { 3068 parser->m_startElementHandler(parser->m_handlerArg, name.str, 3069 (const XML_Char **)parser->m_atts); 3070 noElmHandlers = XML_FALSE; 3071 } 3072 if (parser->m_endElementHandler) { 3073 if (parser->m_startElementHandler) 3074 *eventPP = *eventEndPP; 3075 parser->m_endElementHandler(parser->m_handlerArg, name.str); 3076 noElmHandlers = XML_FALSE; 3077 } 3078 if (noElmHandlers && parser->m_defaultHandler) 3079 reportDefault(parser, enc, s, next); 3080 poolClear(&parser->m_tempPool); 3081 freeBindings(parser, bindings); 3082 } 3083 if ((parser->m_tagLevel == 0) 3084 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3085 if (parser->m_parsingStatus.parsing == XML_SUSPENDED) 3086 parser->m_processor = epilogProcessor; 3087 else 3088 return epilogProcessor(parser, next, end, nextPtr); 3089 } 3090 break; 3091 case XML_TOK_END_TAG: 3092 if (parser->m_tagLevel == startTagLevel) 3093 return XML_ERROR_ASYNC_ENTITY; 3094 else { 3095 int len; 3096 const char *rawName; 3097 TAG *tag = parser->m_tagStack; 3098 rawName = s + enc->minBytesPerChar * 2; 3099 len = XmlNameLength(enc, rawName); 3100 if (len != tag->rawNameLength 3101 || memcmp(tag->rawName, rawName, len) != 0) { 3102 *eventPP = rawName; 3103 return XML_ERROR_TAG_MISMATCH; 3104 } 3105 parser->m_tagStack = tag->parent; 3106 tag->parent = parser->m_freeTagList; 3107 parser->m_freeTagList = tag; 3108 --parser->m_tagLevel; 3109 if (parser->m_endElementHandler) { 3110 const XML_Char *localPart; 3111 const XML_Char *prefix; 3112 XML_Char *uri; 3113 localPart = tag->name.localPart; 3114 if (parser->m_ns && localPart) { 3115 /* localPart and prefix may have been overwritten in 3116 tag->name.str, since this points to the binding->uri 3117 buffer which gets reused; so we have to add them again 3118 */ 3119 uri = (XML_Char *)tag->name.str + tag->name.uriLen; 3120 /* don't need to check for space - already done in storeAtts() */ 3121 while (*localPart) 3122 *uri++ = *localPart++; 3123 prefix = tag->name.prefix; 3124 if (parser->m_ns_triplets && prefix) { 3125 *uri++ = parser->m_namespaceSeparator; 3126 while (*prefix) 3127 *uri++ = *prefix++; 3128 } 3129 *uri = XML_T('\0'); 3130 } 3131 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); 3132 } else if (parser->m_defaultHandler) 3133 reportDefault(parser, enc, s, next); 3134 while (tag->bindings) { 3135 BINDING *b = tag->bindings; 3136 if (parser->m_endNamespaceDeclHandler) 3137 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, 3138 b->prefix->name); 3139 tag->bindings = tag->bindings->nextTagBinding; 3140 b->nextTagBinding = parser->m_freeBindingList; 3141 parser->m_freeBindingList = b; 3142 b->prefix->binding = b->prevPrefixBinding; 3143 } 3144 if ((parser->m_tagLevel == 0) 3145 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3146 if (parser->m_parsingStatus.parsing == XML_SUSPENDED) 3147 parser->m_processor = epilogProcessor; 3148 else 3149 return epilogProcessor(parser, next, end, nextPtr); 3150 } 3151 } 3152 break; 3153 case XML_TOK_CHAR_REF: { 3154 int n = XmlCharRefNumber(enc, s); 3155 if (n < 0) 3156 return XML_ERROR_BAD_CHAR_REF; 3157 if (parser->m_characterDataHandler) { 3158 XML_Char buf[XML_ENCODE_MAX]; 3159 parser->m_characterDataHandler(parser->m_handlerArg, buf, 3160 XmlEncode(n, (ICHAR *)buf)); 3161 } else if (parser->m_defaultHandler) 3162 reportDefault(parser, enc, s, next); 3163 } break; 3164 case XML_TOK_XML_DECL: 3165 return XML_ERROR_MISPLACED_XML_PI; 3166 case XML_TOK_DATA_NEWLINE: 3167 if (parser->m_characterDataHandler) { 3168 XML_Char c = 0xA; 3169 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3170 } else if (parser->m_defaultHandler) 3171 reportDefault(parser, enc, s, next); 3172 break; 3173 case XML_TOK_CDATA_SECT_OPEN: { 3174 enum XML_Error result; 3175 if (parser->m_startCdataSectionHandler) 3176 parser->m_startCdataSectionHandler(parser->m_handlerArg); 3177 /* BEGIN disabled code */ 3178 /* Suppose you doing a transformation on a document that involves 3179 changing only the character data. You set up a defaultHandler 3180 and a characterDataHandler. The defaultHandler simply copies 3181 characters through. The characterDataHandler does the 3182 transformation and writes the characters out escaping them as 3183 necessary. This case will fail to work if we leave out the 3184 following two lines (because & and < inside CDATA sections will 3185 be incorrectly escaped). 3186 3187 However, now we have a start/endCdataSectionHandler, so it seems 3188 easier to let the user deal with this. 3189 */ 3190 else if ((0) && parser->m_characterDataHandler) 3191 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3192 0); 3193 /* END disabled code */ 3194 else if (parser->m_defaultHandler) 3195 reportDefault(parser, enc, s, next); 3196 result 3197 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); 3198 if (result != XML_ERROR_NONE) 3199 return result; 3200 else if (! next) { 3201 parser->m_processor = cdataSectionProcessor; 3202 return result; 3203 } 3204 } break; 3205 case XML_TOK_TRAILING_RSQB: 3206 if (haveMore) { 3207 *nextPtr = s; 3208 return XML_ERROR_NONE; 3209 } 3210 if (parser->m_characterDataHandler) { 3211 if (MUST_CONVERT(enc, s)) { 3212 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3213 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3214 parser->m_characterDataHandler( 3215 parser->m_handlerArg, parser->m_dataBuf, 3216 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3217 } else 3218 parser->m_characterDataHandler( 3219 parser->m_handlerArg, (const XML_Char *)s, 3220 (int)((const XML_Char *)end - (const XML_Char *)s)); 3221 } else if (parser->m_defaultHandler) 3222 reportDefault(parser, enc, s, end); 3223 /* We are at the end of the final buffer, should we check for 3224 XML_SUSPENDED, XML_FINISHED? 3225 */ 3226 if (startTagLevel == 0) { 3227 *eventPP = end; 3228 return XML_ERROR_NO_ELEMENTS; 3229 } 3230 if (parser->m_tagLevel != startTagLevel) { 3231 *eventPP = end; 3232 return XML_ERROR_ASYNC_ENTITY; 3233 } 3234 *nextPtr = end; 3235 return XML_ERROR_NONE; 3236 case XML_TOK_DATA_CHARS: { 3237 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 3238 if (charDataHandler) { 3239 if (MUST_CONVERT(enc, s)) { 3240 for (;;) { 3241 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3242 const enum XML_Convert_Result convert_res = XmlConvert( 3243 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3244 *eventEndPP = s; 3245 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3246 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3247 if ((convert_res == XML_CONVERT_COMPLETED) 3248 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 3249 break; 3250 *eventPP = s; 3251 } 3252 } else 3253 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 3254 (int)((const XML_Char *)next - (const XML_Char *)s)); 3255 } else if (parser->m_defaultHandler) 3256 reportDefault(parser, enc, s, next); 3257 } break; 3258 case XML_TOK_PI: 3259 if (! reportProcessingInstruction(parser, enc, s, next)) 3260 return XML_ERROR_NO_MEMORY; 3261 break; 3262 case XML_TOK_COMMENT: 3263 if (! reportComment(parser, enc, s, next)) 3264 return XML_ERROR_NO_MEMORY; 3265 break; 3266 default: 3267 /* All of the tokens produced by XmlContentTok() have their own 3268 * explicit cases, so this default is not strictly necessary. 3269 * However it is a useful safety net, so we retain the code and 3270 * simply exclude it from the coverage tests. 3271 * 3272 * LCOV_EXCL_START 3273 */ 3274 if (parser->m_defaultHandler) 3275 reportDefault(parser, enc, s, next); 3276 break; 3277 /* LCOV_EXCL_STOP */ 3278 } 3279 *eventPP = s = next; 3280 switch (parser->m_parsingStatus.parsing) { 3281 case XML_SUSPENDED: 3282 *nextPtr = next; 3283 return XML_ERROR_NONE; 3284 case XML_FINISHED: 3285 return XML_ERROR_ABORTED; 3286 default:; 3287 } 3288 } 3289 /* not reached */ 3290 } 3291 3292 /* This function does not call free() on the allocated memory, merely 3293 * moving it to the parser's m_freeBindingList where it can be freed or 3294 * reused as appropriate. 3295 */ 3296 static void 3297 freeBindings(XML_Parser parser, BINDING *bindings) { 3298 while (bindings) { 3299 BINDING *b = bindings; 3300 3301 /* m_startNamespaceDeclHandler will have been called for this 3302 * binding in addBindings(), so call the end handler now. 3303 */ 3304 if (parser->m_endNamespaceDeclHandler) 3305 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); 3306 3307 bindings = bindings->nextTagBinding; 3308 b->nextTagBinding = parser->m_freeBindingList; 3309 parser->m_freeBindingList = b; 3310 b->prefix->binding = b->prevPrefixBinding; 3311 } 3312 } 3313 3314 /* Precondition: all arguments must be non-NULL; 3315 Purpose: 3316 - normalize attributes 3317 - check attributes for well-formedness 3318 - generate namespace aware attribute names (URI, prefix) 3319 - build list of attributes for startElementHandler 3320 - default attributes 3321 - process namespace declarations (check and report them) 3322 - generate namespace aware element name (URI, prefix) 3323 */ 3324 static enum XML_Error 3325 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, 3326 TAG_NAME *tagNamePtr, BINDING **bindingsPtr, 3327 enum XML_Account account) { 3328 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 3329 ELEMENT_TYPE *elementType; 3330 int nDefaultAtts; 3331 const XML_Char **appAtts; /* the attribute list for the application */ 3332 int attIndex = 0; 3333 int prefixLen; 3334 int i; 3335 int n; 3336 XML_Char *uri; 3337 int nPrefixes = 0; 3338 BINDING *binding; 3339 const XML_Char *localPart; 3340 3341 /* lookup the element type name */ 3342 elementType 3343 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); 3344 if (! elementType) { 3345 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); 3346 if (! name) 3347 return XML_ERROR_NO_MEMORY; 3348 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 3349 sizeof(ELEMENT_TYPE)); 3350 if (! elementType) 3351 return XML_ERROR_NO_MEMORY; 3352 if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) 3353 return XML_ERROR_NO_MEMORY; 3354 } 3355 nDefaultAtts = elementType->nDefaultAtts; 3356 3357 /* get the attributes from the tokenizer */ 3358 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); 3359 3360 /* Detect and prevent integer overflow */ 3361 if (n > INT_MAX - nDefaultAtts) { 3362 return XML_ERROR_NO_MEMORY; 3363 } 3364 3365 if (n + nDefaultAtts > parser->m_attsSize) { 3366 int oldAttsSize = parser->m_attsSize; 3367 ATTRIBUTE *temp; 3368 #ifdef XML_ATTR_INFO 3369 XML_AttrInfo *temp2; 3370 #endif 3371 3372 /* Detect and prevent integer overflow */ 3373 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) 3374 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { 3375 return XML_ERROR_NO_MEMORY; 3376 } 3377 3378 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; 3379 3380 /* Detect and prevent integer overflow. 3381 * The preprocessor guard addresses the "always false" warning 3382 * from -Wtype-limits on platforms where 3383 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3384 #if UINT_MAX >= SIZE_MAX 3385 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { 3386 parser->m_attsSize = oldAttsSize; 3387 return XML_ERROR_NO_MEMORY; 3388 } 3389 #endif 3390 3391 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, 3392 parser->m_attsSize * sizeof(ATTRIBUTE)); 3393 if (temp == NULL) { 3394 parser->m_attsSize = oldAttsSize; 3395 return XML_ERROR_NO_MEMORY; 3396 } 3397 parser->m_atts = temp; 3398 #ifdef XML_ATTR_INFO 3399 /* Detect and prevent integer overflow. 3400 * The preprocessor guard addresses the "always false" warning 3401 * from -Wtype-limits on platforms where 3402 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3403 # if UINT_MAX >= SIZE_MAX 3404 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { 3405 parser->m_attsSize = oldAttsSize; 3406 return XML_ERROR_NO_MEMORY; 3407 } 3408 # endif 3409 3410 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, 3411 parser->m_attsSize * sizeof(XML_AttrInfo)); 3412 if (temp2 == NULL) { 3413 parser->m_attsSize = oldAttsSize; 3414 return XML_ERROR_NO_MEMORY; 3415 } 3416 parser->m_attInfo = temp2; 3417 #endif 3418 if (n > oldAttsSize) 3419 XmlGetAttributes(enc, attStr, n, parser->m_atts); 3420 } 3421 3422 appAtts = (const XML_Char **)parser->m_atts; 3423 for (i = 0; i < n; i++) { 3424 ATTRIBUTE *currAtt = &parser->m_atts[i]; 3425 #ifdef XML_ATTR_INFO 3426 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; 3427 #endif 3428 /* add the name and value to the attribute list */ 3429 ATTRIBUTE_ID *attId 3430 = getAttributeId(parser, enc, currAtt->name, 3431 currAtt->name + XmlNameLength(enc, currAtt->name)); 3432 if (! attId) 3433 return XML_ERROR_NO_MEMORY; 3434 #ifdef XML_ATTR_INFO 3435 currAttInfo->nameStart 3436 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); 3437 currAttInfo->nameEnd 3438 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); 3439 currAttInfo->valueStart = parser->m_parseEndByteIndex 3440 - (parser->m_parseEndPtr - currAtt->valuePtr); 3441 currAttInfo->valueEnd = parser->m_parseEndByteIndex 3442 - (parser->m_parseEndPtr - currAtt->valueEnd); 3443 #endif 3444 /* Detect duplicate attributes by their QNames. This does not work when 3445 namespace processing is turned on and different prefixes for the same 3446 namespace are used. For this case we have a check further down. 3447 */ 3448 if ((attId->name)[-1]) { 3449 if (enc == parser->m_encoding) 3450 parser->m_eventPtr = parser->m_atts[i].name; 3451 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3452 } 3453 (attId->name)[-1] = 1; 3454 appAtts[attIndex++] = attId->name; 3455 if (! parser->m_atts[i].normalized) { 3456 enum XML_Error result; 3457 XML_Bool isCdata = XML_TRUE; 3458 3459 /* figure out whether declared as other than CDATA */ 3460 if (attId->maybeTokenized) { 3461 int j; 3462 for (j = 0; j < nDefaultAtts; j++) { 3463 if (attId == elementType->defaultAtts[j].id) { 3464 isCdata = elementType->defaultAtts[j].isCdata; 3465 break; 3466 } 3467 } 3468 } 3469 3470 /* normalize the attribute value */ 3471 result = storeAttributeValue( 3472 parser, enc, isCdata, parser->m_atts[i].valuePtr, 3473 parser->m_atts[i].valueEnd, &parser->m_tempPool, account); 3474 if (result) 3475 return result; 3476 appAtts[attIndex] = poolStart(&parser->m_tempPool); 3477 poolFinish(&parser->m_tempPool); 3478 } else { 3479 /* the value did not need normalizing */ 3480 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, 3481 parser->m_atts[i].valuePtr, 3482 parser->m_atts[i].valueEnd); 3483 if (appAtts[attIndex] == 0) 3484 return XML_ERROR_NO_MEMORY; 3485 poolFinish(&parser->m_tempPool); 3486 } 3487 /* handle prefixed attribute names */ 3488 if (attId->prefix) { 3489 if (attId->xmlns) { 3490 /* deal with namespace declarations here */ 3491 enum XML_Error result = addBinding(parser, attId->prefix, attId, 3492 appAtts[attIndex], bindingsPtr); 3493 if (result) 3494 return result; 3495 --attIndex; 3496 } else { 3497 /* deal with other prefixed names later */ 3498 attIndex++; 3499 nPrefixes++; 3500 (attId->name)[-1] = 2; 3501 } 3502 } else 3503 attIndex++; 3504 } 3505 3506 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ 3507 parser->m_nSpecifiedAtts = attIndex; 3508 if (elementType->idAtt && (elementType->idAtt->name)[-1]) { 3509 for (i = 0; i < attIndex; i += 2) 3510 if (appAtts[i] == elementType->idAtt->name) { 3511 parser->m_idAttIndex = i; 3512 break; 3513 } 3514 } else 3515 parser->m_idAttIndex = -1; 3516 3517 /* do attribute defaulting */ 3518 for (i = 0; i < nDefaultAtts; i++) { 3519 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; 3520 if (! (da->id->name)[-1] && da->value) { 3521 if (da->id->prefix) { 3522 if (da->id->xmlns) { 3523 enum XML_Error result = addBinding(parser, da->id->prefix, da->id, 3524 da->value, bindingsPtr); 3525 if (result) 3526 return result; 3527 } else { 3528 (da->id->name)[-1] = 2; 3529 nPrefixes++; 3530 appAtts[attIndex++] = da->id->name; 3531 appAtts[attIndex++] = da->value; 3532 } 3533 } else { 3534 (da->id->name)[-1] = 1; 3535 appAtts[attIndex++] = da->id->name; 3536 appAtts[attIndex++] = da->value; 3537 } 3538 } 3539 } 3540 appAtts[attIndex] = 0; 3541 3542 /* expand prefixed attribute names, check for duplicates, 3543 and clear flags that say whether attributes were specified */ 3544 i = 0; 3545 if (nPrefixes) { 3546 int j; /* hash table index */ 3547 unsigned long version = parser->m_nsAttsVersion; 3548 3549 /* Detect and prevent invalid shift */ 3550 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { 3551 return XML_ERROR_NO_MEMORY; 3552 } 3553 3554 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; 3555 unsigned char oldNsAttsPower = parser->m_nsAttsPower; 3556 /* size of hash table must be at least 2 * (# of prefixed attributes) */ 3557 if ((nPrefixes << 1) 3558 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ 3559 NS_ATT *temp; 3560 /* hash table size must also be a power of 2 and >= 8 */ 3561 while (nPrefixes >> parser->m_nsAttsPower++) 3562 ; 3563 if (parser->m_nsAttsPower < 3) 3564 parser->m_nsAttsPower = 3; 3565 3566 /* Detect and prevent invalid shift */ 3567 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { 3568 /* Restore actual size of memory in m_nsAtts */ 3569 parser->m_nsAttsPower = oldNsAttsPower; 3570 return XML_ERROR_NO_MEMORY; 3571 } 3572 3573 nsAttsSize = 1u << parser->m_nsAttsPower; 3574 3575 /* Detect and prevent integer overflow. 3576 * The preprocessor guard addresses the "always false" warning 3577 * from -Wtype-limits on platforms where 3578 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3579 #if UINT_MAX >= SIZE_MAX 3580 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { 3581 /* Restore actual size of memory in m_nsAtts */ 3582 parser->m_nsAttsPower = oldNsAttsPower; 3583 return XML_ERROR_NO_MEMORY; 3584 } 3585 #endif 3586 3587 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, 3588 nsAttsSize * sizeof(NS_ATT)); 3589 if (! temp) { 3590 /* Restore actual size of memory in m_nsAtts */ 3591 parser->m_nsAttsPower = oldNsAttsPower; 3592 return XML_ERROR_NO_MEMORY; 3593 } 3594 parser->m_nsAtts = temp; 3595 version = 0; /* force re-initialization of m_nsAtts hash table */ 3596 } 3597 /* using a version flag saves us from initializing m_nsAtts every time */ 3598 if (! version) { /* initialize version flags when version wraps around */ 3599 version = INIT_ATTS_VERSION; 3600 for (j = nsAttsSize; j != 0;) 3601 parser->m_nsAtts[--j].version = version; 3602 } 3603 parser->m_nsAttsVersion = --version; 3604 3605 /* expand prefixed names and check for duplicates */ 3606 for (; i < attIndex; i += 2) { 3607 const XML_Char *s = appAtts[i]; 3608 if (s[-1] == 2) { /* prefixed */ 3609 ATTRIBUTE_ID *id; 3610 const BINDING *b; 3611 unsigned long uriHash; 3612 struct siphash sip_state; 3613 struct sipkey sip_key; 3614 3615 copy_salt_to_sipkey(parser, &sip_key); 3616 sip24_init(&sip_state, &sip_key); 3617 3618 ((XML_Char *)s)[-1] = 0; /* clear flag */ 3619 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); 3620 if (! id || ! id->prefix) { 3621 /* This code is walking through the appAtts array, dealing 3622 * with (in this case) a prefixed attribute name. To be in 3623 * the array, the attribute must have already been bound, so 3624 * has to have passed through the hash table lookup once 3625 * already. That implies that an entry for it already 3626 * exists, so the lookup above will return a pointer to 3627 * already allocated memory. There is no opportunaity for 3628 * the allocator to fail, so the condition above cannot be 3629 * fulfilled. 3630 * 3631 * Since it is difficult to be certain that the above 3632 * analysis is complete, we retain the test and merely 3633 * remove the code from coverage tests. 3634 */ 3635 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 3636 } 3637 b = id->prefix->binding; 3638 if (! b) 3639 return XML_ERROR_UNBOUND_PREFIX; 3640 3641 for (j = 0; j < b->uriLen; j++) { 3642 const XML_Char c = b->uri[j]; 3643 if (! poolAppendChar(&parser->m_tempPool, c)) 3644 return XML_ERROR_NO_MEMORY; 3645 } 3646 3647 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); 3648 3649 while (*s++ != XML_T(ASCII_COLON)) 3650 ; 3651 3652 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); 3653 3654 do { /* copies null terminator */ 3655 if (! poolAppendChar(&parser->m_tempPool, *s)) 3656 return XML_ERROR_NO_MEMORY; 3657 } while (*s++); 3658 3659 uriHash = (unsigned long)sip24_final(&sip_state); 3660 3661 { /* Check hash table for duplicate of expanded name (uriName). 3662 Derived from code in lookup(parser, HASH_TABLE *table, ...). 3663 */ 3664 unsigned char step = 0; 3665 unsigned long mask = nsAttsSize - 1; 3666 j = uriHash & mask; /* index into hash table */ 3667 while (parser->m_nsAtts[j].version == version) { 3668 /* for speed we compare stored hash values first */ 3669 if (uriHash == parser->m_nsAtts[j].hash) { 3670 const XML_Char *s1 = poolStart(&parser->m_tempPool); 3671 const XML_Char *s2 = parser->m_nsAtts[j].uriName; 3672 /* s1 is null terminated, but not s2 */ 3673 for (; *s1 == *s2 && *s1 != 0; s1++, s2++) 3674 ; 3675 if (*s1 == 0) 3676 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3677 } 3678 if (! step) 3679 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); 3680 j < step ? (j += nsAttsSize - step) : (j -= step); 3681 } 3682 } 3683 3684 if (parser->m_ns_triplets) { /* append namespace separator and prefix */ 3685 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; 3686 s = b->prefix->name; 3687 do { 3688 if (! poolAppendChar(&parser->m_tempPool, *s)) 3689 return XML_ERROR_NO_MEMORY; 3690 } while (*s++); 3691 } 3692 3693 /* store expanded name in attribute list */ 3694 s = poolStart(&parser->m_tempPool); 3695 poolFinish(&parser->m_tempPool); 3696 appAtts[i] = s; 3697 3698 /* fill empty slot with new version, uriName and hash value */ 3699 parser->m_nsAtts[j].version = version; 3700 parser->m_nsAtts[j].hash = uriHash; 3701 parser->m_nsAtts[j].uriName = s; 3702 3703 if (! --nPrefixes) { 3704 i += 2; 3705 break; 3706 } 3707 } else /* not prefixed */ 3708 ((XML_Char *)s)[-1] = 0; /* clear flag */ 3709 } 3710 } 3711 /* clear flags for the remaining attributes */ 3712 for (; i < attIndex; i += 2) 3713 ((XML_Char *)(appAtts[i]))[-1] = 0; 3714 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) 3715 binding->attId->name[-1] = 0; 3716 3717 if (! parser->m_ns) 3718 return XML_ERROR_NONE; 3719 3720 /* expand the element type name */ 3721 if (elementType->prefix) { 3722 binding = elementType->prefix->binding; 3723 if (! binding) 3724 return XML_ERROR_UNBOUND_PREFIX; 3725 localPart = tagNamePtr->str; 3726 while (*localPart++ != XML_T(ASCII_COLON)) 3727 ; 3728 } else if (dtd->defaultPrefix.binding) { 3729 binding = dtd->defaultPrefix.binding; 3730 localPart = tagNamePtr->str; 3731 } else 3732 return XML_ERROR_NONE; 3733 prefixLen = 0; 3734 if (parser->m_ns_triplets && binding->prefix->name) { 3735 for (; binding->prefix->name[prefixLen++];) 3736 ; /* prefixLen includes null terminator */ 3737 } 3738 tagNamePtr->localPart = localPart; 3739 tagNamePtr->uriLen = binding->uriLen; 3740 tagNamePtr->prefix = binding->prefix->name; 3741 tagNamePtr->prefixLen = prefixLen; 3742 for (i = 0; localPart[i++];) 3743 ; /* i includes null terminator */ 3744 3745 /* Detect and prevent integer overflow */ 3746 if (binding->uriLen > INT_MAX - prefixLen 3747 || i > INT_MAX - (binding->uriLen + prefixLen)) { 3748 return XML_ERROR_NO_MEMORY; 3749 } 3750 3751 n = i + binding->uriLen + prefixLen; 3752 if (n > binding->uriAlloc) { 3753 TAG *p; 3754 3755 /* Detect and prevent integer overflow */ 3756 if (n > INT_MAX - EXPAND_SPARE) { 3757 return XML_ERROR_NO_MEMORY; 3758 } 3759 /* Detect and prevent integer overflow. 3760 * The preprocessor guard addresses the "always false" warning 3761 * from -Wtype-limits on platforms where 3762 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3763 #if UINT_MAX >= SIZE_MAX 3764 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 3765 return XML_ERROR_NO_MEMORY; 3766 } 3767 #endif 3768 3769 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); 3770 if (! uri) 3771 return XML_ERROR_NO_MEMORY; 3772 binding->uriAlloc = n + EXPAND_SPARE; 3773 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); 3774 for (p = parser->m_tagStack; p; p = p->parent) 3775 if (p->name.str == binding->uri) 3776 p->name.str = uri; 3777 FREE(parser, binding->uri); 3778 binding->uri = uri; 3779 } 3780 /* if m_namespaceSeparator != '\0' then uri includes it already */ 3781 uri = binding->uri + binding->uriLen; 3782 memcpy(uri, localPart, i * sizeof(XML_Char)); 3783 /* we always have a namespace separator between localPart and prefix */ 3784 if (prefixLen) { 3785 uri += i - 1; 3786 *uri = parser->m_namespaceSeparator; /* replace null terminator */ 3787 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); 3788 } 3789 tagNamePtr->str = binding->uri; 3790 return XML_ERROR_NONE; 3791 } 3792 3793 static XML_Bool 3794 is_rfc3986_uri_char(XML_Char candidate) { 3795 // For the RFC 3986 ANBF grammar see 3796 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A 3797 3798 switch (candidate) { 3799 // From rule "ALPHA" (uppercase half) 3800 case 'A': 3801 case 'B': 3802 case 'C': 3803 case 'D': 3804 case 'E': 3805 case 'F': 3806 case 'G': 3807 case 'H': 3808 case 'I': 3809 case 'J': 3810 case 'K': 3811 case 'L': 3812 case 'M': 3813 case 'N': 3814 case 'O': 3815 case 'P': 3816 case 'Q': 3817 case 'R': 3818 case 'S': 3819 case 'T': 3820 case 'U': 3821 case 'V': 3822 case 'W': 3823 case 'X': 3824 case 'Y': 3825 case 'Z': 3826 3827 // From rule "ALPHA" (lowercase half) 3828 case 'a': 3829 case 'b': 3830 case 'c': 3831 case 'd': 3832 case 'e': 3833 case 'f': 3834 case 'g': 3835 case 'h': 3836 case 'i': 3837 case 'j': 3838 case 'k': 3839 case 'l': 3840 case 'm': 3841 case 'n': 3842 case 'o': 3843 case 'p': 3844 case 'q': 3845 case 'r': 3846 case 's': 3847 case 't': 3848 case 'u': 3849 case 'v': 3850 case 'w': 3851 case 'x': 3852 case 'y': 3853 case 'z': 3854 3855 // From rule "DIGIT" 3856 case '0': 3857 case '1': 3858 case '2': 3859 case '3': 3860 case '4': 3861 case '5': 3862 case '6': 3863 case '7': 3864 case '8': 3865 case '9': 3866 3867 // From rule "pct-encoded" 3868 case '%': 3869 3870 // From rule "unreserved" 3871 case '-': 3872 case '.': 3873 case '_': 3874 case '~': 3875 3876 // From rule "gen-delims" 3877 case ':': 3878 case '/': 3879 case '?': 3880 case '#': 3881 case '[': 3882 case ']': 3883 case '@': 3884 3885 // From rule "sub-delims" 3886 case '!': 3887 case '$': 3888 case '&': 3889 case '\'': 3890 case '(': 3891 case ')': 3892 case '*': 3893 case '+': 3894 case ',': 3895 case ';': 3896 case '=': 3897 return XML_TRUE; 3898 3899 default: 3900 return XML_FALSE; 3901 } 3902 } 3903 3904 /* addBinding() overwrites the value of prefix->binding without checking. 3905 Therefore one must keep track of the old value outside of addBinding(). 3906 */ 3907 static enum XML_Error 3908 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, 3909 const XML_Char *uri, BINDING **bindingsPtr) { 3910 // "http://www.w3.org/XML/1998/namespace" 3911 static const XML_Char xmlNamespace[] 3912 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, 3913 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, 3914 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, 3915 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, 3916 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, 3917 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, 3918 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, 3919 ASCII_e, '\0'}; 3920 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; 3921 // "http://www.w3.org/2000/xmlns/" 3922 static const XML_Char xmlnsNamespace[] 3923 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 3924 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, 3925 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, 3926 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, 3927 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; 3928 static const int xmlnsLen 3929 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; 3930 3931 XML_Bool mustBeXML = XML_FALSE; 3932 XML_Bool isXML = XML_TRUE; 3933 XML_Bool isXMLNS = XML_TRUE; 3934 3935 BINDING *b; 3936 int len; 3937 3938 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ 3939 if (*uri == XML_T('\0') && prefix->name) 3940 return XML_ERROR_UNDECLARING_PREFIX; 3941 3942 if (prefix->name && prefix->name[0] == XML_T(ASCII_x) 3943 && prefix->name[1] == XML_T(ASCII_m) 3944 && prefix->name[2] == XML_T(ASCII_l)) { 3945 /* Not allowed to bind xmlns */ 3946 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) 3947 && prefix->name[5] == XML_T('\0')) 3948 return XML_ERROR_RESERVED_PREFIX_XMLNS; 3949 3950 if (prefix->name[3] == XML_T('\0')) 3951 mustBeXML = XML_TRUE; 3952 } 3953 3954 for (len = 0; uri[len]; len++) { 3955 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) 3956 isXML = XML_FALSE; 3957 3958 if (! mustBeXML && isXMLNS 3959 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) 3960 isXMLNS = XML_FALSE; 3961 3962 // NOTE: While Expat does not validate namespace URIs against RFC 3986 3963 // today (and is not REQUIRED to do so with regard to the XML 1.0 3964 // namespaces specification) we have to at least make sure, that 3965 // the application on top of Expat (that is likely splitting expanded 3966 // element names ("qualified names") of form 3967 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces 3968 // in its element handler code) cannot be confused by an attacker 3969 // putting additional namespace separator characters into namespace 3970 // declarations. That would be ambiguous and not to be expected. 3971 // 3972 // While the HTML API docs of function XML_ParserCreateNS have been 3973 // advising against use of a namespace separator character that can 3974 // appear in a URI for >20 years now, some widespread applications 3975 // are using URI characters (':' (colon) in particular) for a 3976 // namespace separator, in practice. To keep these applications 3977 // functional, we only reject namespaces URIs containing the 3978 // application-chosen namespace separator if the chosen separator 3979 // is a non-URI character with regard to RFC 3986. 3980 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) 3981 && ! is_rfc3986_uri_char(uri[len])) { 3982 return XML_ERROR_SYNTAX; 3983 } 3984 } 3985 isXML = isXML && len == xmlLen; 3986 isXMLNS = isXMLNS && len == xmlnsLen; 3987 3988 if (mustBeXML != isXML) 3989 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML 3990 : XML_ERROR_RESERVED_NAMESPACE_URI; 3991 3992 if (isXMLNS) 3993 return XML_ERROR_RESERVED_NAMESPACE_URI; 3994 3995 if (parser->m_namespaceSeparator) 3996 len++; 3997 if (parser->m_freeBindingList) { 3998 b = parser->m_freeBindingList; 3999 if (len > b->uriAlloc) { 4000 /* Detect and prevent integer overflow */ 4001 if (len > INT_MAX - EXPAND_SPARE) { 4002 return XML_ERROR_NO_MEMORY; 4003 } 4004 4005 /* Detect and prevent integer overflow. 4006 * The preprocessor guard addresses the "always false" warning 4007 * from -Wtype-limits on platforms where 4008 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4009 #if UINT_MAX >= SIZE_MAX 4010 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4011 return XML_ERROR_NO_MEMORY; 4012 } 4013 #endif 4014 4015 XML_Char *temp = (XML_Char *)REALLOC( 4016 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4017 if (temp == NULL) 4018 return XML_ERROR_NO_MEMORY; 4019 b->uri = temp; 4020 b->uriAlloc = len + EXPAND_SPARE; 4021 } 4022 parser->m_freeBindingList = b->nextTagBinding; 4023 } else { 4024 b = (BINDING *)MALLOC(parser, sizeof(BINDING)); 4025 if (! b) 4026 return XML_ERROR_NO_MEMORY; 4027 4028 /* Detect and prevent integer overflow */ 4029 if (len > INT_MAX - EXPAND_SPARE) { 4030 return XML_ERROR_NO_MEMORY; 4031 } 4032 /* Detect and prevent integer overflow. 4033 * The preprocessor guard addresses the "always false" warning 4034 * from -Wtype-limits on platforms where 4035 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4036 #if UINT_MAX >= SIZE_MAX 4037 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4038 return XML_ERROR_NO_MEMORY; 4039 } 4040 #endif 4041 4042 b->uri 4043 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4044 if (! b->uri) { 4045 FREE(parser, b); 4046 return XML_ERROR_NO_MEMORY; 4047 } 4048 b->uriAlloc = len + EXPAND_SPARE; 4049 } 4050 b->uriLen = len; 4051 memcpy(b->uri, uri, len * sizeof(XML_Char)); 4052 if (parser->m_namespaceSeparator) 4053 b->uri[len - 1] = parser->m_namespaceSeparator; 4054 b->prefix = prefix; 4055 b->attId = attId; 4056 b->prevPrefixBinding = prefix->binding; 4057 /* NULL binding when default namespace undeclared */ 4058 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) 4059 prefix->binding = NULL; 4060 else 4061 prefix->binding = b; 4062 b->nextTagBinding = *bindingsPtr; 4063 *bindingsPtr = b; 4064 /* if attId == NULL then we are not starting a namespace scope */ 4065 if (attId && parser->m_startNamespaceDeclHandler) 4066 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, 4067 prefix->binding ? uri : 0); 4068 return XML_ERROR_NONE; 4069 } 4070 4071 /* The idea here is to avoid using stack for each CDATA section when 4072 the whole file is parsed with one call. 4073 */ 4074 static enum XML_Error PTRCALL 4075 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, 4076 const char **endPtr) { 4077 enum XML_Error result = doCdataSection( 4078 parser, parser->m_encoding, &start, end, endPtr, 4079 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 4080 if (result != XML_ERROR_NONE) 4081 return result; 4082 if (start) { 4083 if (parser->m_parentParser) { /* we are parsing an external entity */ 4084 parser->m_processor = externalEntityContentProcessor; 4085 return externalEntityContentProcessor(parser, start, end, endPtr); 4086 } else { 4087 parser->m_processor = contentProcessor; 4088 return contentProcessor(parser, start, end, endPtr); 4089 } 4090 } 4091 return result; 4092 } 4093 4094 /* startPtr gets set to non-null if the section is closed, and to null if 4095 the section is not yet closed. 4096 */ 4097 static enum XML_Error 4098 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4099 const char *end, const char **nextPtr, XML_Bool haveMore, 4100 enum XML_Account account) { 4101 const char *s = *startPtr; 4102 const char **eventPP; 4103 const char **eventEndPP; 4104 if (enc == parser->m_encoding) { 4105 eventPP = &parser->m_eventPtr; 4106 *eventPP = s; 4107 eventEndPP = &parser->m_eventEndPtr; 4108 } else { 4109 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4110 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4111 } 4112 *eventPP = s; 4113 *startPtr = NULL; 4114 4115 for (;;) { 4116 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4117 int tok = XmlCdataSectionTok(enc, s, end, &next); 4118 #if XML_GE == 1 4119 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4120 accountingOnAbort(parser); 4121 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4122 } 4123 #else 4124 UNUSED_P(account); 4125 #endif 4126 *eventEndPP = next; 4127 switch (tok) { 4128 case XML_TOK_CDATA_SECT_CLOSE: 4129 if (parser->m_endCdataSectionHandler) 4130 parser->m_endCdataSectionHandler(parser->m_handlerArg); 4131 /* BEGIN disabled code */ 4132 /* see comment under XML_TOK_CDATA_SECT_OPEN */ 4133 else if ((0) && parser->m_characterDataHandler) 4134 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4135 0); 4136 /* END disabled code */ 4137 else if (parser->m_defaultHandler) 4138 reportDefault(parser, enc, s, next); 4139 *startPtr = next; 4140 *nextPtr = next; 4141 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4142 return XML_ERROR_ABORTED; 4143 else 4144 return XML_ERROR_NONE; 4145 case XML_TOK_DATA_NEWLINE: 4146 if (parser->m_characterDataHandler) { 4147 XML_Char c = 0xA; 4148 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 4149 } else if (parser->m_defaultHandler) 4150 reportDefault(parser, enc, s, next); 4151 break; 4152 case XML_TOK_DATA_CHARS: { 4153 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 4154 if (charDataHandler) { 4155 if (MUST_CONVERT(enc, s)) { 4156 for (;;) { 4157 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 4158 const enum XML_Convert_Result convert_res = XmlConvert( 4159 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 4160 *eventEndPP = next; 4161 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4162 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 4163 if ((convert_res == XML_CONVERT_COMPLETED) 4164 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 4165 break; 4166 *eventPP = s; 4167 } 4168 } else 4169 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 4170 (int)((const XML_Char *)next - (const XML_Char *)s)); 4171 } else if (parser->m_defaultHandler) 4172 reportDefault(parser, enc, s, next); 4173 } break; 4174 case XML_TOK_INVALID: 4175 *eventPP = next; 4176 return XML_ERROR_INVALID_TOKEN; 4177 case XML_TOK_PARTIAL_CHAR: 4178 if (haveMore) { 4179 *nextPtr = s; 4180 return XML_ERROR_NONE; 4181 } 4182 return XML_ERROR_PARTIAL_CHAR; 4183 case XML_TOK_PARTIAL: 4184 case XML_TOK_NONE: 4185 if (haveMore) { 4186 *nextPtr = s; 4187 return XML_ERROR_NONE; 4188 } 4189 return XML_ERROR_UNCLOSED_CDATA_SECTION; 4190 default: 4191 /* Every token returned by XmlCdataSectionTok() has its own 4192 * explicit case, so this default case will never be executed. 4193 * We retain it as a safety net and exclude it from the coverage 4194 * statistics. 4195 * 4196 * LCOV_EXCL_START 4197 */ 4198 *eventPP = next; 4199 return XML_ERROR_UNEXPECTED_STATE; 4200 /* LCOV_EXCL_STOP */ 4201 } 4202 4203 *eventPP = s = next; 4204 switch (parser->m_parsingStatus.parsing) { 4205 case XML_SUSPENDED: 4206 *nextPtr = next; 4207 return XML_ERROR_NONE; 4208 case XML_FINISHED: 4209 return XML_ERROR_ABORTED; 4210 default:; 4211 } 4212 } 4213 /* not reached */ 4214 } 4215 4216 #ifdef XML_DTD 4217 4218 /* The idea here is to avoid using stack for each IGNORE section when 4219 the whole file is parsed with one call. 4220 */ 4221 static enum XML_Error PTRCALL 4222 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, 4223 const char **endPtr) { 4224 enum XML_Error result 4225 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, 4226 (XML_Bool)! parser->m_parsingStatus.finalBuffer); 4227 if (result != XML_ERROR_NONE) 4228 return result; 4229 if (start) { 4230 parser->m_processor = prologProcessor; 4231 return prologProcessor(parser, start, end, endPtr); 4232 } 4233 return result; 4234 } 4235 4236 /* startPtr gets set to non-null is the section is closed, and to null 4237 if the section is not yet closed. 4238 */ 4239 static enum XML_Error 4240 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4241 const char *end, const char **nextPtr, XML_Bool haveMore) { 4242 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4243 int tok; 4244 const char *s = *startPtr; 4245 const char **eventPP; 4246 const char **eventEndPP; 4247 if (enc == parser->m_encoding) { 4248 eventPP = &parser->m_eventPtr; 4249 *eventPP = s; 4250 eventEndPP = &parser->m_eventEndPtr; 4251 } else { 4252 /* It's not entirely clear, but it seems the following two lines 4253 * of code cannot be executed. The only occasions on which 'enc' 4254 * is not 'encoding' are when this function is called 4255 * from the internal entity processing, and IGNORE sections are an 4256 * error in internal entities. 4257 * 4258 * Since it really isn't clear that this is true, we keep the code 4259 * and just remove it from our coverage tests. 4260 * 4261 * LCOV_EXCL_START 4262 */ 4263 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4264 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4265 /* LCOV_EXCL_STOP */ 4266 } 4267 *eventPP = s; 4268 *startPtr = NULL; 4269 tok = XmlIgnoreSectionTok(enc, s, end, &next); 4270 # if XML_GE == 1 4271 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4272 XML_ACCOUNT_DIRECT)) { 4273 accountingOnAbort(parser); 4274 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4275 } 4276 # endif 4277 *eventEndPP = next; 4278 switch (tok) { 4279 case XML_TOK_IGNORE_SECT: 4280 if (parser->m_defaultHandler) 4281 reportDefault(parser, enc, s, next); 4282 *startPtr = next; 4283 *nextPtr = next; 4284 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4285 return XML_ERROR_ABORTED; 4286 else 4287 return XML_ERROR_NONE; 4288 case XML_TOK_INVALID: 4289 *eventPP = next; 4290 return XML_ERROR_INVALID_TOKEN; 4291 case XML_TOK_PARTIAL_CHAR: 4292 if (haveMore) { 4293 *nextPtr = s; 4294 return XML_ERROR_NONE; 4295 } 4296 return XML_ERROR_PARTIAL_CHAR; 4297 case XML_TOK_PARTIAL: 4298 case XML_TOK_NONE: 4299 if (haveMore) { 4300 *nextPtr = s; 4301 return XML_ERROR_NONE; 4302 } 4303 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ 4304 default: 4305 /* All of the tokens that XmlIgnoreSectionTok() returns have 4306 * explicit cases to handle them, so this default case is never 4307 * executed. We keep it as a safety net anyway, and remove it 4308 * from our test coverage statistics. 4309 * 4310 * LCOV_EXCL_START 4311 */ 4312 *eventPP = next; 4313 return XML_ERROR_UNEXPECTED_STATE; 4314 /* LCOV_EXCL_STOP */ 4315 } 4316 /* not reached */ 4317 } 4318 4319 #endif /* XML_DTD */ 4320 4321 static enum XML_Error 4322 initializeEncoding(XML_Parser parser) { 4323 const char *s; 4324 #ifdef XML_UNICODE 4325 char encodingBuf[128]; 4326 /* See comments about `protocolEncodingName` in parserInit() */ 4327 if (! parser->m_protocolEncodingName) 4328 s = NULL; 4329 else { 4330 int i; 4331 for (i = 0; parser->m_protocolEncodingName[i]; i++) { 4332 if (i == sizeof(encodingBuf) - 1 4333 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { 4334 encodingBuf[0] = '\0'; 4335 break; 4336 } 4337 encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; 4338 } 4339 encodingBuf[i] = '\0'; 4340 s = encodingBuf; 4341 } 4342 #else 4343 s = parser->m_protocolEncodingName; 4344 #endif 4345 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( 4346 &parser->m_initEncoding, &parser->m_encoding, s)) 4347 return XML_ERROR_NONE; 4348 return handleUnknownEncoding(parser, parser->m_protocolEncodingName); 4349 } 4350 4351 static enum XML_Error 4352 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, 4353 const char *next) { 4354 const char *encodingName = NULL; 4355 const XML_Char *storedEncName = NULL; 4356 const ENCODING *newEncoding = NULL; 4357 const char *version = NULL; 4358 const char *versionend = NULL; 4359 const XML_Char *storedversion = NULL; 4360 int standalone = -1; 4361 4362 #if XML_GE == 1 4363 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, 4364 XML_ACCOUNT_DIRECT)) { 4365 accountingOnAbort(parser); 4366 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4367 } 4368 #endif 4369 4370 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( 4371 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, 4372 &version, &versionend, &encodingName, &newEncoding, &standalone)) { 4373 if (isGeneralTextEntity) 4374 return XML_ERROR_TEXT_DECL; 4375 else 4376 return XML_ERROR_XML_DECL; 4377 } 4378 if (! isGeneralTextEntity && standalone == 1) { 4379 parser->m_dtd->standalone = XML_TRUE; 4380 #ifdef XML_DTD 4381 if (parser->m_paramEntityParsing 4382 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 4383 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 4384 #endif /* XML_DTD */ 4385 } 4386 if (parser->m_xmlDeclHandler) { 4387 if (encodingName != NULL) { 4388 storedEncName = poolStoreString( 4389 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4390 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4391 if (! storedEncName) 4392 return XML_ERROR_NO_MEMORY; 4393 poolFinish(&parser->m_temp2Pool); 4394 } 4395 if (version) { 4396 storedversion 4397 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, 4398 versionend - parser->m_encoding->minBytesPerChar); 4399 if (! storedversion) 4400 return XML_ERROR_NO_MEMORY; 4401 } 4402 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, 4403 standalone); 4404 } else if (parser->m_defaultHandler) 4405 reportDefault(parser, parser->m_encoding, s, next); 4406 if (parser->m_protocolEncodingName == NULL) { 4407 if (newEncoding) { 4408 /* Check that the specified encoding does not conflict with what 4409 * the parser has already deduced. Do we have the same number 4410 * of bytes in the smallest representation of a character? If 4411 * this is UTF-16, is it the same endianness? 4412 */ 4413 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar 4414 || (newEncoding->minBytesPerChar == 2 4415 && newEncoding != parser->m_encoding)) { 4416 parser->m_eventPtr = encodingName; 4417 return XML_ERROR_INCORRECT_ENCODING; 4418 } 4419 parser->m_encoding = newEncoding; 4420 } else if (encodingName) { 4421 enum XML_Error result; 4422 if (! storedEncName) { 4423 storedEncName = poolStoreString( 4424 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4425 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4426 if (! storedEncName) 4427 return XML_ERROR_NO_MEMORY; 4428 } 4429 result = handleUnknownEncoding(parser, storedEncName); 4430 poolClear(&parser->m_temp2Pool); 4431 if (result == XML_ERROR_UNKNOWN_ENCODING) 4432 parser->m_eventPtr = encodingName; 4433 return result; 4434 } 4435 } 4436 4437 if (storedEncName || storedversion) 4438 poolClear(&parser->m_temp2Pool); 4439 4440 return XML_ERROR_NONE; 4441 } 4442 4443 static enum XML_Error 4444 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { 4445 if (parser->m_unknownEncodingHandler) { 4446 XML_Encoding info; 4447 int i; 4448 for (i = 0; i < 256; i++) 4449 info.map[i] = -1; 4450 info.convert = NULL; 4451 info.data = NULL; 4452 info.release = NULL; 4453 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, 4454 encodingName, &info)) { 4455 ENCODING *enc; 4456 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); 4457 if (! parser->m_unknownEncodingMem) { 4458 if (info.release) 4459 info.release(info.data); 4460 return XML_ERROR_NO_MEMORY; 4461 } 4462 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( 4463 parser->m_unknownEncodingMem, info.map, info.convert, info.data); 4464 if (enc) { 4465 parser->m_unknownEncodingData = info.data; 4466 parser->m_unknownEncodingRelease = info.release; 4467 parser->m_encoding = enc; 4468 return XML_ERROR_NONE; 4469 } 4470 } 4471 if (info.release != NULL) 4472 info.release(info.data); 4473 } 4474 return XML_ERROR_UNKNOWN_ENCODING; 4475 } 4476 4477 static enum XML_Error PTRCALL 4478 prologInitProcessor(XML_Parser parser, const char *s, const char *end, 4479 const char **nextPtr) { 4480 enum XML_Error result = initializeEncoding(parser); 4481 if (result != XML_ERROR_NONE) 4482 return result; 4483 parser->m_processor = prologProcessor; 4484 return prologProcessor(parser, s, end, nextPtr); 4485 } 4486 4487 #ifdef XML_DTD 4488 4489 static enum XML_Error PTRCALL 4490 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, 4491 const char **nextPtr) { 4492 enum XML_Error result = initializeEncoding(parser); 4493 if (result != XML_ERROR_NONE) 4494 return result; 4495 4496 /* we know now that XML_Parse(Buffer) has been called, 4497 so we consider the external parameter entity read */ 4498 parser->m_dtd->paramEntityRead = XML_TRUE; 4499 4500 if (parser->m_prologState.inEntityValue) { 4501 parser->m_processor = entityValueInitProcessor; 4502 return entityValueInitProcessor(parser, s, end, nextPtr); 4503 } else { 4504 parser->m_processor = externalParEntProcessor; 4505 return externalParEntProcessor(parser, s, end, nextPtr); 4506 } 4507 } 4508 4509 static enum XML_Error PTRCALL 4510 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, 4511 const char **nextPtr) { 4512 int tok; 4513 const char *start = s; 4514 const char *next = start; 4515 parser->m_eventPtr = start; 4516 4517 for (;;) { 4518 tok = XmlPrologTok(parser->m_encoding, start, end, &next); 4519 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: 4520 - storeEntityValue 4521 - processXmlDecl 4522 */ 4523 parser->m_eventEndPtr = next; 4524 if (tok <= 0) { 4525 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4526 *nextPtr = s; 4527 return XML_ERROR_NONE; 4528 } 4529 switch (tok) { 4530 case XML_TOK_INVALID: 4531 return XML_ERROR_INVALID_TOKEN; 4532 case XML_TOK_PARTIAL: 4533 return XML_ERROR_UNCLOSED_TOKEN; 4534 case XML_TOK_PARTIAL_CHAR: 4535 return XML_ERROR_PARTIAL_CHAR; 4536 case XML_TOK_NONE: /* start == end */ 4537 default: 4538 break; 4539 } 4540 /* found end of entity value - can store it now */ 4541 return storeEntityValue(parser, parser->m_encoding, s, end, 4542 XML_ACCOUNT_DIRECT); 4543 } else if (tok == XML_TOK_XML_DECL) { 4544 enum XML_Error result; 4545 result = processXmlDecl(parser, 0, start, next); 4546 if (result != XML_ERROR_NONE) 4547 return result; 4548 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For 4549 * that to happen, a parameter entity parsing handler must have attempted 4550 * to suspend the parser, which fails and raises an error. The parser can 4551 * be aborted, but can't be suspended. 4552 */ 4553 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4554 return XML_ERROR_ABORTED; 4555 *nextPtr = next; 4556 /* stop scanning for text declaration - we found one */ 4557 parser->m_processor = entityValueProcessor; 4558 return entityValueProcessor(parser, next, end, nextPtr); 4559 } 4560 /* XmlPrologTok has now set the encoding based on the BOM it found, and we 4561 must move s and nextPtr forward to consume the BOM. 4562 4563 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we 4564 would leave the BOM in the buffer and return. On the next call to this 4565 function, our XmlPrologTok call would return XML_TOK_INVALID, since it 4566 is not valid to have multiple BOMs. 4567 */ 4568 else if (tok == XML_TOK_BOM) { 4569 # if XML_GE == 1 4570 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4571 XML_ACCOUNT_DIRECT)) { 4572 accountingOnAbort(parser); 4573 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4574 } 4575 # endif 4576 4577 *nextPtr = next; 4578 s = next; 4579 } 4580 /* If we get this token, we have the start of what might be a 4581 normal tag, but not a declaration (i.e. it doesn't begin with 4582 "<!"). In a DTD context, that isn't legal. 4583 */ 4584 else if (tok == XML_TOK_INSTANCE_START) { 4585 *nextPtr = next; 4586 return XML_ERROR_SYNTAX; 4587 } 4588 start = next; 4589 parser->m_eventPtr = start; 4590 } 4591 } 4592 4593 static enum XML_Error PTRCALL 4594 externalParEntProcessor(XML_Parser parser, const char *s, const char *end, 4595 const char **nextPtr) { 4596 const char *next = s; 4597 int tok; 4598 4599 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4600 if (tok <= 0) { 4601 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4602 *nextPtr = s; 4603 return XML_ERROR_NONE; 4604 } 4605 switch (tok) { 4606 case XML_TOK_INVALID: 4607 return XML_ERROR_INVALID_TOKEN; 4608 case XML_TOK_PARTIAL: 4609 return XML_ERROR_UNCLOSED_TOKEN; 4610 case XML_TOK_PARTIAL_CHAR: 4611 return XML_ERROR_PARTIAL_CHAR; 4612 case XML_TOK_NONE: /* start == end */ 4613 default: 4614 break; 4615 } 4616 } 4617 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. 4618 However, when parsing an external subset, doProlog will not accept a BOM 4619 as valid, and report a syntax error, so we have to skip the BOM, and 4620 account for the BOM bytes. 4621 */ 4622 else if (tok == XML_TOK_BOM) { 4623 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4624 XML_ACCOUNT_DIRECT)) { 4625 accountingOnAbort(parser); 4626 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4627 } 4628 4629 s = next; 4630 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4631 } 4632 4633 parser->m_processor = prologProcessor; 4634 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4635 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 4636 XML_ACCOUNT_DIRECT); 4637 } 4638 4639 static enum XML_Error PTRCALL 4640 entityValueProcessor(XML_Parser parser, const char *s, const char *end, 4641 const char **nextPtr) { 4642 const char *start = s; 4643 const char *next = s; 4644 const ENCODING *enc = parser->m_encoding; 4645 int tok; 4646 4647 for (;;) { 4648 tok = XmlPrologTok(enc, start, end, &next); 4649 /* Note: These bytes are accounted later in: 4650 - storeEntityValue 4651 */ 4652 if (tok <= 0) { 4653 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4654 *nextPtr = s; 4655 return XML_ERROR_NONE; 4656 } 4657 switch (tok) { 4658 case XML_TOK_INVALID: 4659 return XML_ERROR_INVALID_TOKEN; 4660 case XML_TOK_PARTIAL: 4661 return XML_ERROR_UNCLOSED_TOKEN; 4662 case XML_TOK_PARTIAL_CHAR: 4663 return XML_ERROR_PARTIAL_CHAR; 4664 case XML_TOK_NONE: /* start == end */ 4665 default: 4666 break; 4667 } 4668 /* found end of entity value - can store it now */ 4669 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); 4670 } 4671 start = next; 4672 } 4673 } 4674 4675 #endif /* XML_DTD */ 4676 4677 static enum XML_Error PTRCALL 4678 prologProcessor(XML_Parser parser, const char *s, const char *end, 4679 const char **nextPtr) { 4680 const char *next = s; 4681 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4682 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4683 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 4684 XML_ACCOUNT_DIRECT); 4685 } 4686 4687 static enum XML_Error 4688 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, 4689 int tok, const char *next, const char **nextPtr, XML_Bool haveMore, 4690 XML_Bool allowClosingDoctype, enum XML_Account account) { 4691 #ifdef XML_DTD 4692 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; 4693 #endif /* XML_DTD */ 4694 static const XML_Char atypeCDATA[] 4695 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 4696 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; 4697 static const XML_Char atypeIDREF[] 4698 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; 4699 static const XML_Char atypeIDREFS[] 4700 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; 4701 static const XML_Char atypeENTITY[] 4702 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; 4703 static const XML_Char atypeENTITIES[] 4704 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, 4705 ASCII_I, ASCII_E, ASCII_S, '\0'}; 4706 static const XML_Char atypeNMTOKEN[] 4707 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; 4708 static const XML_Char atypeNMTOKENS[] 4709 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, 4710 ASCII_E, ASCII_N, ASCII_S, '\0'}; 4711 static const XML_Char notationPrefix[] 4712 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, 4713 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; 4714 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; 4715 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; 4716 4717 #ifndef XML_DTD 4718 UNUSED_P(account); 4719 #endif 4720 4721 /* save one level of indirection */ 4722 DTD *const dtd = parser->m_dtd; 4723 4724 const char **eventPP; 4725 const char **eventEndPP; 4726 enum XML_Content_Quant quant; 4727 4728 if (enc == parser->m_encoding) { 4729 eventPP = &parser->m_eventPtr; 4730 eventEndPP = &parser->m_eventEndPtr; 4731 } else { 4732 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4733 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4734 } 4735 4736 for (;;) { 4737 int role; 4738 XML_Bool handleDefault = XML_TRUE; 4739 *eventPP = s; 4740 *eventEndPP = next; 4741 if (tok <= 0) { 4742 if (haveMore && tok != XML_TOK_INVALID) { 4743 *nextPtr = s; 4744 return XML_ERROR_NONE; 4745 } 4746 switch (tok) { 4747 case XML_TOK_INVALID: 4748 *eventPP = next; 4749 return XML_ERROR_INVALID_TOKEN; 4750 case XML_TOK_PARTIAL: 4751 return XML_ERROR_UNCLOSED_TOKEN; 4752 case XML_TOK_PARTIAL_CHAR: 4753 return XML_ERROR_PARTIAL_CHAR; 4754 case -XML_TOK_PROLOG_S: 4755 tok = -tok; 4756 break; 4757 case XML_TOK_NONE: 4758 #ifdef XML_DTD 4759 /* for internal PE NOT referenced between declarations */ 4760 if (enc != parser->m_encoding 4761 && ! parser->m_openInternalEntities->betweenDecl) { 4762 *nextPtr = s; 4763 return XML_ERROR_NONE; 4764 } 4765 /* WFC: PE Between Declarations - must check that PE contains 4766 complete markup, not only for external PEs, but also for 4767 internal PEs if the reference occurs between declarations. 4768 */ 4769 if (parser->m_isParamEntity || enc != parser->m_encoding) { 4770 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) 4771 == XML_ROLE_ERROR) 4772 return XML_ERROR_INCOMPLETE_PE; 4773 *nextPtr = s; 4774 return XML_ERROR_NONE; 4775 } 4776 #endif /* XML_DTD */ 4777 return XML_ERROR_NO_ELEMENTS; 4778 default: 4779 tok = -tok; 4780 next = end; 4781 break; 4782 } 4783 } 4784 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); 4785 #if XML_GE == 1 4786 switch (role) { 4787 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor 4788 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl 4789 # ifdef XML_DTD 4790 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl 4791 # endif 4792 break; 4793 default: 4794 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4795 accountingOnAbort(parser); 4796 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4797 } 4798 } 4799 #endif 4800 switch (role) { 4801 case XML_ROLE_XML_DECL: { 4802 enum XML_Error result = processXmlDecl(parser, 0, s, next); 4803 if (result != XML_ERROR_NONE) 4804 return result; 4805 enc = parser->m_encoding; 4806 handleDefault = XML_FALSE; 4807 } break; 4808 case XML_ROLE_DOCTYPE_NAME: 4809 if (parser->m_startDoctypeDeclHandler) { 4810 parser->m_doctypeName 4811 = poolStoreString(&parser->m_tempPool, enc, s, next); 4812 if (! parser->m_doctypeName) 4813 return XML_ERROR_NO_MEMORY; 4814 poolFinish(&parser->m_tempPool); 4815 parser->m_doctypePubid = NULL; 4816 handleDefault = XML_FALSE; 4817 } 4818 parser->m_doctypeSysid = NULL; /* always initialize to NULL */ 4819 break; 4820 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: 4821 if (parser->m_startDoctypeDeclHandler) { 4822 parser->m_startDoctypeDeclHandler( 4823 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4824 parser->m_doctypePubid, 1); 4825 parser->m_doctypeName = NULL; 4826 poolClear(&parser->m_tempPool); 4827 handleDefault = XML_FALSE; 4828 } 4829 break; 4830 #ifdef XML_DTD 4831 case XML_ROLE_TEXT_DECL: { 4832 enum XML_Error result = processXmlDecl(parser, 1, s, next); 4833 if (result != XML_ERROR_NONE) 4834 return result; 4835 enc = parser->m_encoding; 4836 handleDefault = XML_FALSE; 4837 } break; 4838 #endif /* XML_DTD */ 4839 case XML_ROLE_DOCTYPE_PUBLIC_ID: 4840 #ifdef XML_DTD 4841 parser->m_useForeignDTD = XML_FALSE; 4842 parser->m_declEntity = (ENTITY *)lookup( 4843 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 4844 if (! parser->m_declEntity) 4845 return XML_ERROR_NO_MEMORY; 4846 #endif /* XML_DTD */ 4847 dtd->hasParamEntityRefs = XML_TRUE; 4848 if (parser->m_startDoctypeDeclHandler) { 4849 XML_Char *pubId; 4850 if (! XmlIsPublicId(enc, s, next, eventPP)) 4851 return XML_ERROR_PUBLICID; 4852 pubId = poolStoreString(&parser->m_tempPool, enc, 4853 s + enc->minBytesPerChar, 4854 next - enc->minBytesPerChar); 4855 if (! pubId) 4856 return XML_ERROR_NO_MEMORY; 4857 normalizePublicId(pubId); 4858 poolFinish(&parser->m_tempPool); 4859 parser->m_doctypePubid = pubId; 4860 handleDefault = XML_FALSE; 4861 goto alreadyChecked; 4862 } 4863 /* fall through */ 4864 case XML_ROLE_ENTITY_PUBLIC_ID: 4865 if (! XmlIsPublicId(enc, s, next, eventPP)) 4866 return XML_ERROR_PUBLICID; 4867 alreadyChecked: 4868 if (dtd->keepProcessing && parser->m_declEntity) { 4869 XML_Char *tem 4870 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 4871 next - enc->minBytesPerChar); 4872 if (! tem) 4873 return XML_ERROR_NO_MEMORY; 4874 normalizePublicId(tem); 4875 parser->m_declEntity->publicId = tem; 4876 poolFinish(&dtd->pool); 4877 /* Don't suppress the default handler if we fell through from 4878 * the XML_ROLE_DOCTYPE_PUBLIC_ID case. 4879 */ 4880 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) 4881 handleDefault = XML_FALSE; 4882 } 4883 break; 4884 case XML_ROLE_DOCTYPE_CLOSE: 4885 if (allowClosingDoctype != XML_TRUE) { 4886 /* Must not close doctype from within expanded parameter entities */ 4887 return XML_ERROR_INVALID_TOKEN; 4888 } 4889 4890 if (parser->m_doctypeName) { 4891 parser->m_startDoctypeDeclHandler( 4892 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4893 parser->m_doctypePubid, 0); 4894 poolClear(&parser->m_tempPool); 4895 handleDefault = XML_FALSE; 4896 } 4897 /* parser->m_doctypeSysid will be non-NULL in the case of a previous 4898 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler 4899 was not set, indicating an external subset 4900 */ 4901 #ifdef XML_DTD 4902 if (parser->m_doctypeSysid || parser->m_useForeignDTD) { 4903 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4904 dtd->hasParamEntityRefs = XML_TRUE; 4905 if (parser->m_paramEntityParsing 4906 && parser->m_externalEntityRefHandler) { 4907 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 4908 externalSubsetName, sizeof(ENTITY)); 4909 if (! entity) { 4910 /* The external subset name "#" will have already been 4911 * inserted into the hash table at the start of the 4912 * external entity parsing, so no allocation will happen 4913 * and lookup() cannot fail. 4914 */ 4915 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 4916 } 4917 if (parser->m_useForeignDTD) 4918 entity->base = parser->m_curBase; 4919 dtd->paramEntityRead = XML_FALSE; 4920 if (! parser->m_externalEntityRefHandler( 4921 parser->m_externalEntityRefHandlerArg, 0, entity->base, 4922 entity->systemId, entity->publicId)) 4923 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4924 if (dtd->paramEntityRead) { 4925 if (! dtd->standalone && parser->m_notStandaloneHandler 4926 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 4927 return XML_ERROR_NOT_STANDALONE; 4928 } 4929 /* if we didn't read the foreign DTD then this means that there 4930 is no external subset and we must reset dtd->hasParamEntityRefs 4931 */ 4932 else if (! parser->m_doctypeSysid) 4933 dtd->hasParamEntityRefs = hadParamEntityRefs; 4934 /* end of DTD - no need to update dtd->keepProcessing */ 4935 } 4936 parser->m_useForeignDTD = XML_FALSE; 4937 } 4938 #endif /* XML_DTD */ 4939 if (parser->m_endDoctypeDeclHandler) { 4940 parser->m_endDoctypeDeclHandler(parser->m_handlerArg); 4941 handleDefault = XML_FALSE; 4942 } 4943 break; 4944 case XML_ROLE_INSTANCE_START: 4945 #ifdef XML_DTD 4946 /* if there is no DOCTYPE declaration then now is the 4947 last chance to read the foreign DTD 4948 */ 4949 if (parser->m_useForeignDTD) { 4950 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4951 dtd->hasParamEntityRefs = XML_TRUE; 4952 if (parser->m_paramEntityParsing 4953 && parser->m_externalEntityRefHandler) { 4954 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 4955 externalSubsetName, sizeof(ENTITY)); 4956 if (! entity) 4957 return XML_ERROR_NO_MEMORY; 4958 entity->base = parser->m_curBase; 4959 dtd->paramEntityRead = XML_FALSE; 4960 if (! parser->m_externalEntityRefHandler( 4961 parser->m_externalEntityRefHandlerArg, 0, entity->base, 4962 entity->systemId, entity->publicId)) 4963 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4964 if (dtd->paramEntityRead) { 4965 if (! dtd->standalone && parser->m_notStandaloneHandler 4966 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 4967 return XML_ERROR_NOT_STANDALONE; 4968 } 4969 /* if we didn't read the foreign DTD then this means that there 4970 is no external subset and we must reset dtd->hasParamEntityRefs 4971 */ 4972 else 4973 dtd->hasParamEntityRefs = hadParamEntityRefs; 4974 /* end of DTD - no need to update dtd->keepProcessing */ 4975 } 4976 } 4977 #endif /* XML_DTD */ 4978 parser->m_processor = contentProcessor; 4979 return contentProcessor(parser, s, end, nextPtr); 4980 case XML_ROLE_ATTLIST_ELEMENT_NAME: 4981 parser->m_declElementType = getElementType(parser, enc, s, next); 4982 if (! parser->m_declElementType) 4983 return XML_ERROR_NO_MEMORY; 4984 goto checkAttListDeclHandler; 4985 case XML_ROLE_ATTRIBUTE_NAME: 4986 parser->m_declAttributeId = getAttributeId(parser, enc, s, next); 4987 if (! parser->m_declAttributeId) 4988 return XML_ERROR_NO_MEMORY; 4989 parser->m_declAttributeIsCdata = XML_FALSE; 4990 parser->m_declAttributeType = NULL; 4991 parser->m_declAttributeIsId = XML_FALSE; 4992 goto checkAttListDeclHandler; 4993 case XML_ROLE_ATTRIBUTE_TYPE_CDATA: 4994 parser->m_declAttributeIsCdata = XML_TRUE; 4995 parser->m_declAttributeType = atypeCDATA; 4996 goto checkAttListDeclHandler; 4997 case XML_ROLE_ATTRIBUTE_TYPE_ID: 4998 parser->m_declAttributeIsId = XML_TRUE; 4999 parser->m_declAttributeType = atypeID; 5000 goto checkAttListDeclHandler; 5001 case XML_ROLE_ATTRIBUTE_TYPE_IDREF: 5002 parser->m_declAttributeType = atypeIDREF; 5003 goto checkAttListDeclHandler; 5004 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: 5005 parser->m_declAttributeType = atypeIDREFS; 5006 goto checkAttListDeclHandler; 5007 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: 5008 parser->m_declAttributeType = atypeENTITY; 5009 goto checkAttListDeclHandler; 5010 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: 5011 parser->m_declAttributeType = atypeENTITIES; 5012 goto checkAttListDeclHandler; 5013 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: 5014 parser->m_declAttributeType = atypeNMTOKEN; 5015 goto checkAttListDeclHandler; 5016 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: 5017 parser->m_declAttributeType = atypeNMTOKENS; 5018 checkAttListDeclHandler: 5019 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5020 handleDefault = XML_FALSE; 5021 break; 5022 case XML_ROLE_ATTRIBUTE_ENUM_VALUE: 5023 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: 5024 if (dtd->keepProcessing && parser->m_attlistDeclHandler) { 5025 const XML_Char *prefix; 5026 if (parser->m_declAttributeType) { 5027 prefix = enumValueSep; 5028 } else { 5029 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix 5030 : enumValueStart); 5031 } 5032 if (! poolAppendString(&parser->m_tempPool, prefix)) 5033 return XML_ERROR_NO_MEMORY; 5034 if (! poolAppend(&parser->m_tempPool, enc, s, next)) 5035 return XML_ERROR_NO_MEMORY; 5036 parser->m_declAttributeType = parser->m_tempPool.start; 5037 handleDefault = XML_FALSE; 5038 } 5039 break; 5040 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: 5041 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: 5042 if (dtd->keepProcessing) { 5043 if (! defineAttribute(parser->m_declElementType, 5044 parser->m_declAttributeId, 5045 parser->m_declAttributeIsCdata, 5046 parser->m_declAttributeIsId, 0, parser)) 5047 return XML_ERROR_NO_MEMORY; 5048 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5049 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5050 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5051 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5052 /* Enumerated or Notation type */ 5053 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5054 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5055 return XML_ERROR_NO_MEMORY; 5056 parser->m_declAttributeType = parser->m_tempPool.start; 5057 poolFinish(&parser->m_tempPool); 5058 } 5059 *eventEndPP = s; 5060 parser->m_attlistDeclHandler( 5061 parser->m_handlerArg, parser->m_declElementType->name, 5062 parser->m_declAttributeId->name, parser->m_declAttributeType, 0, 5063 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); 5064 handleDefault = XML_FALSE; 5065 } 5066 } 5067 poolClear(&parser->m_tempPool); 5068 break; 5069 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: 5070 case XML_ROLE_FIXED_ATTRIBUTE_VALUE: 5071 if (dtd->keepProcessing) { 5072 const XML_Char *attVal; 5073 enum XML_Error result = storeAttributeValue( 5074 parser, enc, parser->m_declAttributeIsCdata, 5075 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, 5076 XML_ACCOUNT_NONE); 5077 if (result) 5078 return result; 5079 attVal = poolStart(&dtd->pool); 5080 poolFinish(&dtd->pool); 5081 /* ID attributes aren't allowed to have a default */ 5082 if (! defineAttribute( 5083 parser->m_declElementType, parser->m_declAttributeId, 5084 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) 5085 return XML_ERROR_NO_MEMORY; 5086 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5087 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5088 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5089 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5090 /* Enumerated or Notation type */ 5091 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5092 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5093 return XML_ERROR_NO_MEMORY; 5094 parser->m_declAttributeType = parser->m_tempPool.start; 5095 poolFinish(&parser->m_tempPool); 5096 } 5097 *eventEndPP = s; 5098 parser->m_attlistDeclHandler( 5099 parser->m_handlerArg, parser->m_declElementType->name, 5100 parser->m_declAttributeId->name, parser->m_declAttributeType, 5101 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); 5102 poolClear(&parser->m_tempPool); 5103 handleDefault = XML_FALSE; 5104 } 5105 } 5106 break; 5107 case XML_ROLE_ENTITY_VALUE: 5108 if (dtd->keepProcessing) { 5109 #if XML_GE == 1 5110 // This will store the given replacement text in 5111 // parser->m_declEntity->textPtr. 5112 enum XML_Error result 5113 = storeEntityValue(parser, enc, s + enc->minBytesPerChar, 5114 next - enc->minBytesPerChar, XML_ACCOUNT_NONE); 5115 if (parser->m_declEntity) { 5116 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); 5117 parser->m_declEntity->textLen 5118 = (int)(poolLength(&dtd->entityValuePool)); 5119 poolFinish(&dtd->entityValuePool); 5120 if (parser->m_entityDeclHandler) { 5121 *eventEndPP = s; 5122 parser->m_entityDeclHandler( 5123 parser->m_handlerArg, parser->m_declEntity->name, 5124 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5125 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5126 handleDefault = XML_FALSE; 5127 } 5128 } else 5129 poolDiscard(&dtd->entityValuePool); 5130 if (result != XML_ERROR_NONE) 5131 return result; 5132 #else 5133 // This will store "&entity123;" in parser->m_declEntity->textPtr 5134 // to end up as "&entity123;" in the handler. 5135 if (parser->m_declEntity != NULL) { 5136 const enum XML_Error result 5137 = storeSelfEntityValue(parser, parser->m_declEntity); 5138 if (result != XML_ERROR_NONE) 5139 return result; 5140 5141 if (parser->m_entityDeclHandler) { 5142 *eventEndPP = s; 5143 parser->m_entityDeclHandler( 5144 parser->m_handlerArg, parser->m_declEntity->name, 5145 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5146 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5147 handleDefault = XML_FALSE; 5148 } 5149 } 5150 #endif 5151 } 5152 break; 5153 case XML_ROLE_DOCTYPE_SYSTEM_ID: 5154 #ifdef XML_DTD 5155 parser->m_useForeignDTD = XML_FALSE; 5156 #endif /* XML_DTD */ 5157 dtd->hasParamEntityRefs = XML_TRUE; 5158 if (parser->m_startDoctypeDeclHandler) { 5159 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, 5160 s + enc->minBytesPerChar, 5161 next - enc->minBytesPerChar); 5162 if (parser->m_doctypeSysid == NULL) 5163 return XML_ERROR_NO_MEMORY; 5164 poolFinish(&parser->m_tempPool); 5165 handleDefault = XML_FALSE; 5166 } 5167 #ifdef XML_DTD 5168 else 5169 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL 5170 for the case where no parser->m_startDoctypeDeclHandler is set */ 5171 parser->m_doctypeSysid = externalSubsetName; 5172 #endif /* XML_DTD */ 5173 if (! dtd->standalone 5174 #ifdef XML_DTD 5175 && ! parser->m_paramEntityParsing 5176 #endif /* XML_DTD */ 5177 && parser->m_notStandaloneHandler 5178 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5179 return XML_ERROR_NOT_STANDALONE; 5180 #ifndef XML_DTD 5181 break; 5182 #else /* XML_DTD */ 5183 if (! parser->m_declEntity) { 5184 parser->m_declEntity = (ENTITY *)lookup( 5185 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5186 if (! parser->m_declEntity) 5187 return XML_ERROR_NO_MEMORY; 5188 parser->m_declEntity->publicId = NULL; 5189 } 5190 #endif /* XML_DTD */ 5191 /* fall through */ 5192 case XML_ROLE_ENTITY_SYSTEM_ID: 5193 if (dtd->keepProcessing && parser->m_declEntity) { 5194 parser->m_declEntity->systemId 5195 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5196 next - enc->minBytesPerChar); 5197 if (! parser->m_declEntity->systemId) 5198 return XML_ERROR_NO_MEMORY; 5199 parser->m_declEntity->base = parser->m_curBase; 5200 poolFinish(&dtd->pool); 5201 /* Don't suppress the default handler if we fell through from 5202 * the XML_ROLE_DOCTYPE_SYSTEM_ID case. 5203 */ 5204 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) 5205 handleDefault = XML_FALSE; 5206 } 5207 break; 5208 case XML_ROLE_ENTITY_COMPLETE: 5209 #if XML_GE == 0 5210 // This will store "&entity123;" in entity->textPtr 5211 // to end up as "&entity123;" in the handler. 5212 if (parser->m_declEntity != NULL) { 5213 const enum XML_Error result 5214 = storeSelfEntityValue(parser, parser->m_declEntity); 5215 if (result != XML_ERROR_NONE) 5216 return result; 5217 } 5218 #endif 5219 if (dtd->keepProcessing && parser->m_declEntity 5220 && parser->m_entityDeclHandler) { 5221 *eventEndPP = s; 5222 parser->m_entityDeclHandler( 5223 parser->m_handlerArg, parser->m_declEntity->name, 5224 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, 5225 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); 5226 handleDefault = XML_FALSE; 5227 } 5228 break; 5229 case XML_ROLE_ENTITY_NOTATION_NAME: 5230 if (dtd->keepProcessing && parser->m_declEntity) { 5231 parser->m_declEntity->notation 5232 = poolStoreString(&dtd->pool, enc, s, next); 5233 if (! parser->m_declEntity->notation) 5234 return XML_ERROR_NO_MEMORY; 5235 poolFinish(&dtd->pool); 5236 if (parser->m_unparsedEntityDeclHandler) { 5237 *eventEndPP = s; 5238 parser->m_unparsedEntityDeclHandler( 5239 parser->m_handlerArg, parser->m_declEntity->name, 5240 parser->m_declEntity->base, parser->m_declEntity->systemId, 5241 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5242 handleDefault = XML_FALSE; 5243 } else if (parser->m_entityDeclHandler) { 5244 *eventEndPP = s; 5245 parser->m_entityDeclHandler( 5246 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, 5247 parser->m_declEntity->base, parser->m_declEntity->systemId, 5248 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5249 handleDefault = XML_FALSE; 5250 } 5251 } 5252 break; 5253 case XML_ROLE_GENERAL_ENTITY_NAME: { 5254 if (XmlPredefinedEntityName(enc, s, next)) { 5255 parser->m_declEntity = NULL; 5256 break; 5257 } 5258 if (dtd->keepProcessing) { 5259 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5260 if (! name) 5261 return XML_ERROR_NO_MEMORY; 5262 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, 5263 name, sizeof(ENTITY)); 5264 if (! parser->m_declEntity) 5265 return XML_ERROR_NO_MEMORY; 5266 if (parser->m_declEntity->name != name) { 5267 poolDiscard(&dtd->pool); 5268 parser->m_declEntity = NULL; 5269 } else { 5270 poolFinish(&dtd->pool); 5271 parser->m_declEntity->publicId = NULL; 5272 parser->m_declEntity->is_param = XML_FALSE; 5273 /* if we have a parent parser or are reading an internal parameter 5274 entity, then the entity declaration is not considered "internal" 5275 */ 5276 parser->m_declEntity->is_internal 5277 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5278 if (parser->m_entityDeclHandler) 5279 handleDefault = XML_FALSE; 5280 } 5281 } else { 5282 poolDiscard(&dtd->pool); 5283 parser->m_declEntity = NULL; 5284 } 5285 } break; 5286 case XML_ROLE_PARAM_ENTITY_NAME: 5287 #ifdef XML_DTD 5288 if (dtd->keepProcessing) { 5289 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5290 if (! name) 5291 return XML_ERROR_NO_MEMORY; 5292 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5293 name, sizeof(ENTITY)); 5294 if (! parser->m_declEntity) 5295 return XML_ERROR_NO_MEMORY; 5296 if (parser->m_declEntity->name != name) { 5297 poolDiscard(&dtd->pool); 5298 parser->m_declEntity = NULL; 5299 } else { 5300 poolFinish(&dtd->pool); 5301 parser->m_declEntity->publicId = NULL; 5302 parser->m_declEntity->is_param = XML_TRUE; 5303 /* if we have a parent parser or are reading an internal parameter 5304 entity, then the entity declaration is not considered "internal" 5305 */ 5306 parser->m_declEntity->is_internal 5307 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5308 if (parser->m_entityDeclHandler) 5309 handleDefault = XML_FALSE; 5310 } 5311 } else { 5312 poolDiscard(&dtd->pool); 5313 parser->m_declEntity = NULL; 5314 } 5315 #else /* not XML_DTD */ 5316 parser->m_declEntity = NULL; 5317 #endif /* XML_DTD */ 5318 break; 5319 case XML_ROLE_NOTATION_NAME: 5320 parser->m_declNotationPublicId = NULL; 5321 parser->m_declNotationName = NULL; 5322 if (parser->m_notationDeclHandler) { 5323 parser->m_declNotationName 5324 = poolStoreString(&parser->m_tempPool, enc, s, next); 5325 if (! parser->m_declNotationName) 5326 return XML_ERROR_NO_MEMORY; 5327 poolFinish(&parser->m_tempPool); 5328 handleDefault = XML_FALSE; 5329 } 5330 break; 5331 case XML_ROLE_NOTATION_PUBLIC_ID: 5332 if (! XmlIsPublicId(enc, s, next, eventPP)) 5333 return XML_ERROR_PUBLICID; 5334 if (parser 5335 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ 5336 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, 5337 s + enc->minBytesPerChar, 5338 next - enc->minBytesPerChar); 5339 if (! tem) 5340 return XML_ERROR_NO_MEMORY; 5341 normalizePublicId(tem); 5342 parser->m_declNotationPublicId = tem; 5343 poolFinish(&parser->m_tempPool); 5344 handleDefault = XML_FALSE; 5345 } 5346 break; 5347 case XML_ROLE_NOTATION_SYSTEM_ID: 5348 if (parser->m_declNotationName && parser->m_notationDeclHandler) { 5349 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, 5350 s + enc->minBytesPerChar, 5351 next - enc->minBytesPerChar); 5352 if (! systemId) 5353 return XML_ERROR_NO_MEMORY; 5354 *eventEndPP = s; 5355 parser->m_notationDeclHandler( 5356 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5357 systemId, parser->m_declNotationPublicId); 5358 handleDefault = XML_FALSE; 5359 } 5360 poolClear(&parser->m_tempPool); 5361 break; 5362 case XML_ROLE_NOTATION_NO_SYSTEM_ID: 5363 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { 5364 *eventEndPP = s; 5365 parser->m_notationDeclHandler( 5366 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5367 0, parser->m_declNotationPublicId); 5368 handleDefault = XML_FALSE; 5369 } 5370 poolClear(&parser->m_tempPool); 5371 break; 5372 case XML_ROLE_ERROR: 5373 switch (tok) { 5374 case XML_TOK_PARAM_ENTITY_REF: 5375 /* PE references in internal subset are 5376 not allowed within declarations. */ 5377 return XML_ERROR_PARAM_ENTITY_REF; 5378 case XML_TOK_XML_DECL: 5379 return XML_ERROR_MISPLACED_XML_PI; 5380 default: 5381 return XML_ERROR_SYNTAX; 5382 } 5383 #ifdef XML_DTD 5384 case XML_ROLE_IGNORE_SECT: { 5385 enum XML_Error result; 5386 if (parser->m_defaultHandler) 5387 reportDefault(parser, enc, s, next); 5388 handleDefault = XML_FALSE; 5389 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); 5390 if (result != XML_ERROR_NONE) 5391 return result; 5392 else if (! next) { 5393 parser->m_processor = ignoreSectionProcessor; 5394 return result; 5395 } 5396 } break; 5397 #endif /* XML_DTD */ 5398 case XML_ROLE_GROUP_OPEN: 5399 if (parser->m_prologState.level >= parser->m_groupSize) { 5400 if (parser->m_groupSize) { 5401 { 5402 /* Detect and prevent integer overflow */ 5403 if (parser->m_groupSize > (unsigned int)(-1) / 2u) { 5404 return XML_ERROR_NO_MEMORY; 5405 } 5406 5407 char *const new_connector = (char *)REALLOC( 5408 parser, parser->m_groupConnector, parser->m_groupSize *= 2); 5409 if (new_connector == NULL) { 5410 parser->m_groupSize /= 2; 5411 return XML_ERROR_NO_MEMORY; 5412 } 5413 parser->m_groupConnector = new_connector; 5414 } 5415 5416 if (dtd->scaffIndex) { 5417 /* Detect and prevent integer overflow. 5418 * The preprocessor guard addresses the "always false" warning 5419 * from -Wtype-limits on platforms where 5420 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 5421 #if UINT_MAX >= SIZE_MAX 5422 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { 5423 return XML_ERROR_NO_MEMORY; 5424 } 5425 #endif 5426 5427 int *const new_scaff_index = (int *)REALLOC( 5428 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); 5429 if (new_scaff_index == NULL) 5430 return XML_ERROR_NO_MEMORY; 5431 dtd->scaffIndex = new_scaff_index; 5432 } 5433 } else { 5434 parser->m_groupConnector 5435 = (char *)MALLOC(parser, parser->m_groupSize = 32); 5436 if (! parser->m_groupConnector) { 5437 parser->m_groupSize = 0; 5438 return XML_ERROR_NO_MEMORY; 5439 } 5440 } 5441 } 5442 parser->m_groupConnector[parser->m_prologState.level] = 0; 5443 if (dtd->in_eldecl) { 5444 int myindex = nextScaffoldPart(parser); 5445 if (myindex < 0) 5446 return XML_ERROR_NO_MEMORY; 5447 assert(dtd->scaffIndex != NULL); 5448 dtd->scaffIndex[dtd->scaffLevel] = myindex; 5449 dtd->scaffLevel++; 5450 dtd->scaffold[myindex].type = XML_CTYPE_SEQ; 5451 if (parser->m_elementDeclHandler) 5452 handleDefault = XML_FALSE; 5453 } 5454 break; 5455 case XML_ROLE_GROUP_SEQUENCE: 5456 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) 5457 return XML_ERROR_SYNTAX; 5458 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; 5459 if (dtd->in_eldecl && parser->m_elementDeclHandler) 5460 handleDefault = XML_FALSE; 5461 break; 5462 case XML_ROLE_GROUP_CHOICE: 5463 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) 5464 return XML_ERROR_SYNTAX; 5465 if (dtd->in_eldecl 5466 && ! parser->m_groupConnector[parser->m_prologState.level] 5467 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5468 != XML_CTYPE_MIXED)) { 5469 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5470 = XML_CTYPE_CHOICE; 5471 if (parser->m_elementDeclHandler) 5472 handleDefault = XML_FALSE; 5473 } 5474 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; 5475 break; 5476 case XML_ROLE_PARAM_ENTITY_REF: 5477 #ifdef XML_DTD 5478 case XML_ROLE_INNER_PARAM_ENTITY_REF: 5479 dtd->hasParamEntityRefs = XML_TRUE; 5480 if (! parser->m_paramEntityParsing) 5481 dtd->keepProcessing = dtd->standalone; 5482 else { 5483 const XML_Char *name; 5484 ENTITY *entity; 5485 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5486 next - enc->minBytesPerChar); 5487 if (! name) 5488 return XML_ERROR_NO_MEMORY; 5489 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 5490 poolDiscard(&dtd->pool); 5491 /* first, determine if a check for an existing declaration is needed; 5492 if yes, check that the entity exists, and that it is internal, 5493 otherwise call the skipped entity handler 5494 */ 5495 if (parser->m_prologState.documentEntity 5496 && (dtd->standalone ? ! parser->m_openInternalEntities 5497 : ! dtd->hasParamEntityRefs)) { 5498 if (! entity) 5499 return XML_ERROR_UNDEFINED_ENTITY; 5500 else if (! entity->is_internal) { 5501 /* It's hard to exhaustively search the code to be sure, 5502 * but there doesn't seem to be a way of executing the 5503 * following line. There are two cases: 5504 * 5505 * If 'standalone' is false, the DTD must have no 5506 * parameter entities or we wouldn't have passed the outer 5507 * 'if' statement. That means the only entity in the hash 5508 * table is the external subset name "#" which cannot be 5509 * given as a parameter entity name in XML syntax, so the 5510 * lookup must have returned NULL and we don't even reach 5511 * the test for an internal entity. 5512 * 5513 * If 'standalone' is true, it does not seem to be 5514 * possible to create entities taking this code path that 5515 * are not internal entities, so fail the test above. 5516 * 5517 * Because this analysis is very uncertain, the code is 5518 * being left in place and merely removed from the 5519 * coverage test statistics. 5520 */ 5521 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ 5522 } 5523 } else if (! entity) { 5524 dtd->keepProcessing = dtd->standalone; 5525 /* cannot report skipped entities in declarations */ 5526 if ((role == XML_ROLE_PARAM_ENTITY_REF) 5527 && parser->m_skippedEntityHandler) { 5528 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); 5529 handleDefault = XML_FALSE; 5530 } 5531 break; 5532 } 5533 if (entity->open) 5534 return XML_ERROR_RECURSIVE_ENTITY_REF; 5535 if (entity->textPtr) { 5536 enum XML_Error result; 5537 XML_Bool betweenDecl 5538 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); 5539 result = processInternalEntity(parser, entity, betweenDecl); 5540 if (result != XML_ERROR_NONE) 5541 return result; 5542 handleDefault = XML_FALSE; 5543 break; 5544 } 5545 if (parser->m_externalEntityRefHandler) { 5546 dtd->paramEntityRead = XML_FALSE; 5547 entity->open = XML_TRUE; 5548 entityTrackingOnOpen(parser, entity, __LINE__); 5549 if (! parser->m_externalEntityRefHandler( 5550 parser->m_externalEntityRefHandlerArg, 0, entity->base, 5551 entity->systemId, entity->publicId)) { 5552 entityTrackingOnClose(parser, entity, __LINE__); 5553 entity->open = XML_FALSE; 5554 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5555 } 5556 entityTrackingOnClose(parser, entity, __LINE__); 5557 entity->open = XML_FALSE; 5558 handleDefault = XML_FALSE; 5559 if (! dtd->paramEntityRead) { 5560 dtd->keepProcessing = dtd->standalone; 5561 break; 5562 } 5563 } else { 5564 dtd->keepProcessing = dtd->standalone; 5565 break; 5566 } 5567 } 5568 #endif /* XML_DTD */ 5569 if (! dtd->standalone && parser->m_notStandaloneHandler 5570 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5571 return XML_ERROR_NOT_STANDALONE; 5572 break; 5573 5574 /* Element declaration stuff */ 5575 5576 case XML_ROLE_ELEMENT_NAME: 5577 if (parser->m_elementDeclHandler) { 5578 parser->m_declElementType = getElementType(parser, enc, s, next); 5579 if (! parser->m_declElementType) 5580 return XML_ERROR_NO_MEMORY; 5581 dtd->scaffLevel = 0; 5582 dtd->scaffCount = 0; 5583 dtd->in_eldecl = XML_TRUE; 5584 handleDefault = XML_FALSE; 5585 } 5586 break; 5587 5588 case XML_ROLE_CONTENT_ANY: 5589 case XML_ROLE_CONTENT_EMPTY: 5590 if (dtd->in_eldecl) { 5591 if (parser->m_elementDeclHandler) { 5592 XML_Content *content 5593 = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); 5594 if (! content) 5595 return XML_ERROR_NO_MEMORY; 5596 content->quant = XML_CQUANT_NONE; 5597 content->name = NULL; 5598 content->numchildren = 0; 5599 content->children = NULL; 5600 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY 5601 : XML_CTYPE_EMPTY); 5602 *eventEndPP = s; 5603 parser->m_elementDeclHandler( 5604 parser->m_handlerArg, parser->m_declElementType->name, content); 5605 handleDefault = XML_FALSE; 5606 } 5607 dtd->in_eldecl = XML_FALSE; 5608 } 5609 break; 5610 5611 case XML_ROLE_CONTENT_PCDATA: 5612 if (dtd->in_eldecl) { 5613 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5614 = XML_CTYPE_MIXED; 5615 if (parser->m_elementDeclHandler) 5616 handleDefault = XML_FALSE; 5617 } 5618 break; 5619 5620 case XML_ROLE_CONTENT_ELEMENT: 5621 quant = XML_CQUANT_NONE; 5622 goto elementContent; 5623 case XML_ROLE_CONTENT_ELEMENT_OPT: 5624 quant = XML_CQUANT_OPT; 5625 goto elementContent; 5626 case XML_ROLE_CONTENT_ELEMENT_REP: 5627 quant = XML_CQUANT_REP; 5628 goto elementContent; 5629 case XML_ROLE_CONTENT_ELEMENT_PLUS: 5630 quant = XML_CQUANT_PLUS; 5631 elementContent: 5632 if (dtd->in_eldecl) { 5633 ELEMENT_TYPE *el; 5634 const XML_Char *name; 5635 size_t nameLen; 5636 const char *nxt 5637 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); 5638 int myindex = nextScaffoldPart(parser); 5639 if (myindex < 0) 5640 return XML_ERROR_NO_MEMORY; 5641 dtd->scaffold[myindex].type = XML_CTYPE_NAME; 5642 dtd->scaffold[myindex].quant = quant; 5643 el = getElementType(parser, enc, s, nxt); 5644 if (! el) 5645 return XML_ERROR_NO_MEMORY; 5646 name = el->name; 5647 dtd->scaffold[myindex].name = name; 5648 nameLen = 0; 5649 for (; name[nameLen++];) 5650 ; 5651 5652 /* Detect and prevent integer overflow */ 5653 if (nameLen > UINT_MAX - dtd->contentStringLen) { 5654 return XML_ERROR_NO_MEMORY; 5655 } 5656 5657 dtd->contentStringLen += (unsigned)nameLen; 5658 if (parser->m_elementDeclHandler) 5659 handleDefault = XML_FALSE; 5660 } 5661 break; 5662 5663 case XML_ROLE_GROUP_CLOSE: 5664 quant = XML_CQUANT_NONE; 5665 goto closeGroup; 5666 case XML_ROLE_GROUP_CLOSE_OPT: 5667 quant = XML_CQUANT_OPT; 5668 goto closeGroup; 5669 case XML_ROLE_GROUP_CLOSE_REP: 5670 quant = XML_CQUANT_REP; 5671 goto closeGroup; 5672 case XML_ROLE_GROUP_CLOSE_PLUS: 5673 quant = XML_CQUANT_PLUS; 5674 closeGroup: 5675 if (dtd->in_eldecl) { 5676 if (parser->m_elementDeclHandler) 5677 handleDefault = XML_FALSE; 5678 dtd->scaffLevel--; 5679 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; 5680 if (dtd->scaffLevel == 0) { 5681 if (! handleDefault) { 5682 XML_Content *model = build_model(parser); 5683 if (! model) 5684 return XML_ERROR_NO_MEMORY; 5685 *eventEndPP = s; 5686 parser->m_elementDeclHandler( 5687 parser->m_handlerArg, parser->m_declElementType->name, model); 5688 } 5689 dtd->in_eldecl = XML_FALSE; 5690 dtd->contentStringLen = 0; 5691 } 5692 } 5693 break; 5694 /* End element declaration stuff */ 5695 5696 case XML_ROLE_PI: 5697 if (! reportProcessingInstruction(parser, enc, s, next)) 5698 return XML_ERROR_NO_MEMORY; 5699 handleDefault = XML_FALSE; 5700 break; 5701 case XML_ROLE_COMMENT: 5702 if (! reportComment(parser, enc, s, next)) 5703 return XML_ERROR_NO_MEMORY; 5704 handleDefault = XML_FALSE; 5705 break; 5706 case XML_ROLE_NONE: 5707 switch (tok) { 5708 case XML_TOK_BOM: 5709 handleDefault = XML_FALSE; 5710 break; 5711 } 5712 break; 5713 case XML_ROLE_DOCTYPE_NONE: 5714 if (parser->m_startDoctypeDeclHandler) 5715 handleDefault = XML_FALSE; 5716 break; 5717 case XML_ROLE_ENTITY_NONE: 5718 if (dtd->keepProcessing && parser->m_entityDeclHandler) 5719 handleDefault = XML_FALSE; 5720 break; 5721 case XML_ROLE_NOTATION_NONE: 5722 if (parser->m_notationDeclHandler) 5723 handleDefault = XML_FALSE; 5724 break; 5725 case XML_ROLE_ATTLIST_NONE: 5726 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5727 handleDefault = XML_FALSE; 5728 break; 5729 case XML_ROLE_ELEMENT_NONE: 5730 if (parser->m_elementDeclHandler) 5731 handleDefault = XML_FALSE; 5732 break; 5733 } /* end of big switch */ 5734 5735 if (handleDefault && parser->m_defaultHandler) 5736 reportDefault(parser, enc, s, next); 5737 5738 switch (parser->m_parsingStatus.parsing) { 5739 case XML_SUSPENDED: 5740 *nextPtr = next; 5741 return XML_ERROR_NONE; 5742 case XML_FINISHED: 5743 return XML_ERROR_ABORTED; 5744 default: 5745 s = next; 5746 tok = XmlPrologTok(enc, s, end, &next); 5747 } 5748 } 5749 /* not reached */ 5750 } 5751 5752 static enum XML_Error PTRCALL 5753 epilogProcessor(XML_Parser parser, const char *s, const char *end, 5754 const char **nextPtr) { 5755 parser->m_processor = epilogProcessor; 5756 parser->m_eventPtr = s; 5757 for (;;) { 5758 const char *next = NULL; 5759 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5760 #if XML_GE == 1 5761 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 5762 XML_ACCOUNT_DIRECT)) { 5763 accountingOnAbort(parser); 5764 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5765 } 5766 #endif 5767 parser->m_eventEndPtr = next; 5768 switch (tok) { 5769 /* report partial linebreak - it might be the last token */ 5770 case -XML_TOK_PROLOG_S: 5771 if (parser->m_defaultHandler) { 5772 reportDefault(parser, parser->m_encoding, s, next); 5773 if (parser->m_parsingStatus.parsing == XML_FINISHED) 5774 return XML_ERROR_ABORTED; 5775 } 5776 *nextPtr = next; 5777 return XML_ERROR_NONE; 5778 case XML_TOK_NONE: 5779 *nextPtr = s; 5780 return XML_ERROR_NONE; 5781 case XML_TOK_PROLOG_S: 5782 if (parser->m_defaultHandler) 5783 reportDefault(parser, parser->m_encoding, s, next); 5784 break; 5785 case XML_TOK_PI: 5786 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) 5787 return XML_ERROR_NO_MEMORY; 5788 break; 5789 case XML_TOK_COMMENT: 5790 if (! reportComment(parser, parser->m_encoding, s, next)) 5791 return XML_ERROR_NO_MEMORY; 5792 break; 5793 case XML_TOK_INVALID: 5794 parser->m_eventPtr = next; 5795 return XML_ERROR_INVALID_TOKEN; 5796 case XML_TOK_PARTIAL: 5797 if (! parser->m_parsingStatus.finalBuffer) { 5798 *nextPtr = s; 5799 return XML_ERROR_NONE; 5800 } 5801 return XML_ERROR_UNCLOSED_TOKEN; 5802 case XML_TOK_PARTIAL_CHAR: 5803 if (! parser->m_parsingStatus.finalBuffer) { 5804 *nextPtr = s; 5805 return XML_ERROR_NONE; 5806 } 5807 return XML_ERROR_PARTIAL_CHAR; 5808 default: 5809 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; 5810 } 5811 parser->m_eventPtr = s = next; 5812 switch (parser->m_parsingStatus.parsing) { 5813 case XML_SUSPENDED: 5814 *nextPtr = next; 5815 return XML_ERROR_NONE; 5816 case XML_FINISHED: 5817 return XML_ERROR_ABORTED; 5818 default:; 5819 } 5820 } 5821 } 5822 5823 static enum XML_Error 5824 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { 5825 const char *textStart, *textEnd; 5826 const char *next; 5827 enum XML_Error result; 5828 OPEN_INTERNAL_ENTITY *openEntity; 5829 5830 if (parser->m_freeInternalEntities) { 5831 openEntity = parser->m_freeInternalEntities; 5832 parser->m_freeInternalEntities = openEntity->next; 5833 } else { 5834 openEntity 5835 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); 5836 if (! openEntity) 5837 return XML_ERROR_NO_MEMORY; 5838 } 5839 entity->open = XML_TRUE; 5840 #if XML_GE == 1 5841 entityTrackingOnOpen(parser, entity, __LINE__); 5842 #endif 5843 entity->processed = 0; 5844 openEntity->next = parser->m_openInternalEntities; 5845 parser->m_openInternalEntities = openEntity; 5846 openEntity->entity = entity; 5847 openEntity->startTagLevel = parser->m_tagLevel; 5848 openEntity->betweenDecl = betweenDecl; 5849 openEntity->internalEventPtr = NULL; 5850 openEntity->internalEventEndPtr = NULL; 5851 textStart = (const char *)entity->textPtr; 5852 textEnd = (const char *)(entity->textPtr + entity->textLen); 5853 /* Set a safe default value in case 'next' does not get set */ 5854 next = textStart; 5855 5856 if (entity->is_param) { 5857 int tok 5858 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 5859 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 5860 tok, next, &next, XML_FALSE, XML_FALSE, 5861 XML_ACCOUNT_ENTITY_EXPANSION); 5862 } else { 5863 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, 5864 textStart, textEnd, &next, XML_FALSE, 5865 XML_ACCOUNT_ENTITY_EXPANSION); 5866 } 5867 5868 if (result == XML_ERROR_NONE) { 5869 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5870 entity->processed = (int)(next - textStart); 5871 parser->m_processor = internalEntityProcessor; 5872 } else if (parser->m_openInternalEntities->entity == entity) { 5873 #if XML_GE == 1 5874 entityTrackingOnClose(parser, entity, __LINE__); 5875 #endif /* XML_GE == 1 */ 5876 entity->open = XML_FALSE; 5877 parser->m_openInternalEntities = openEntity->next; 5878 /* put openEntity back in list of free instances */ 5879 openEntity->next = parser->m_freeInternalEntities; 5880 parser->m_freeInternalEntities = openEntity; 5881 } 5882 } 5883 return result; 5884 } 5885 5886 static enum XML_Error PTRCALL 5887 internalEntityProcessor(XML_Parser parser, const char *s, const char *end, 5888 const char **nextPtr) { 5889 ENTITY *entity; 5890 const char *textStart, *textEnd; 5891 const char *next; 5892 enum XML_Error result; 5893 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; 5894 if (! openEntity) 5895 return XML_ERROR_UNEXPECTED_STATE; 5896 5897 entity = openEntity->entity; 5898 textStart = ((const char *)entity->textPtr) + entity->processed; 5899 textEnd = (const char *)(entity->textPtr + entity->textLen); 5900 /* Set a safe default value in case 'next' does not get set */ 5901 next = textStart; 5902 5903 if (entity->is_param) { 5904 int tok 5905 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 5906 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 5907 tok, next, &next, XML_FALSE, XML_TRUE, 5908 XML_ACCOUNT_ENTITY_EXPANSION); 5909 } else { 5910 result = doContent(parser, openEntity->startTagLevel, 5911 parser->m_internalEncoding, textStart, textEnd, &next, 5912 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); 5913 } 5914 5915 if (result != XML_ERROR_NONE) 5916 return result; 5917 5918 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5919 entity->processed = (int)(next - (const char *)entity->textPtr); 5920 return result; 5921 } 5922 5923 #if XML_GE == 1 5924 entityTrackingOnClose(parser, entity, __LINE__); 5925 #endif 5926 entity->open = XML_FALSE; 5927 parser->m_openInternalEntities = openEntity->next; 5928 /* put openEntity back in list of free instances */ 5929 openEntity->next = parser->m_freeInternalEntities; 5930 parser->m_freeInternalEntities = openEntity; 5931 5932 // If there are more open entities we want to stop right here and have the 5933 // upcoming call to XML_ResumeParser continue with entity content, or it would 5934 // be ignored altogether. 5935 if (parser->m_openInternalEntities != NULL 5936 && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5937 return XML_ERROR_NONE; 5938 } 5939 5940 if (entity->is_param) { 5941 int tok; 5942 parser->m_processor = prologProcessor; 5943 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5944 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 5945 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 5946 XML_ACCOUNT_DIRECT); 5947 } else { 5948 parser->m_processor = contentProcessor; 5949 /* see externalEntityContentProcessor vs contentProcessor */ 5950 result = doContent(parser, parser->m_parentParser ? 1 : 0, 5951 parser->m_encoding, s, end, nextPtr, 5952 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 5953 XML_ACCOUNT_DIRECT); 5954 if (result == XML_ERROR_NONE) { 5955 if (! storeRawNames(parser)) 5956 return XML_ERROR_NO_MEMORY; 5957 } 5958 return result; 5959 } 5960 } 5961 5962 static enum XML_Error PTRCALL 5963 errorProcessor(XML_Parser parser, const char *s, const char *end, 5964 const char **nextPtr) { 5965 UNUSED_P(s); 5966 UNUSED_P(end); 5967 UNUSED_P(nextPtr); 5968 return parser->m_errorCode; 5969 } 5970 5971 static enum XML_Error 5972 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 5973 const char *ptr, const char *end, STRING_POOL *pool, 5974 enum XML_Account account) { 5975 enum XML_Error result 5976 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); 5977 if (result) 5978 return result; 5979 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) 5980 poolChop(pool); 5981 if (! poolAppendChar(pool, XML_T('\0'))) 5982 return XML_ERROR_NO_MEMORY; 5983 return XML_ERROR_NONE; 5984 } 5985 5986 static enum XML_Error 5987 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 5988 const char *ptr, const char *end, STRING_POOL *pool, 5989 enum XML_Account account) { 5990 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 5991 #ifndef XML_DTD 5992 UNUSED_P(account); 5993 #endif 5994 5995 for (;;) { 5996 const char *next 5997 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ 5998 int tok = XmlAttributeValueTok(enc, ptr, end, &next); 5999 #if XML_GE == 1 6000 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { 6001 accountingOnAbort(parser); 6002 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6003 } 6004 #endif 6005 switch (tok) { 6006 case XML_TOK_NONE: 6007 return XML_ERROR_NONE; 6008 case XML_TOK_INVALID: 6009 if (enc == parser->m_encoding) 6010 parser->m_eventPtr = next; 6011 return XML_ERROR_INVALID_TOKEN; 6012 case XML_TOK_PARTIAL: 6013 if (enc == parser->m_encoding) 6014 parser->m_eventPtr = ptr; 6015 return XML_ERROR_INVALID_TOKEN; 6016 case XML_TOK_CHAR_REF: { 6017 XML_Char buf[XML_ENCODE_MAX]; 6018 int i; 6019 int n = XmlCharRefNumber(enc, ptr); 6020 if (n < 0) { 6021 if (enc == parser->m_encoding) 6022 parser->m_eventPtr = ptr; 6023 return XML_ERROR_BAD_CHAR_REF; 6024 } 6025 if (! isCdata && n == 0x20 /* space */ 6026 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6027 break; 6028 n = XmlEncode(n, (ICHAR *)buf); 6029 /* The XmlEncode() functions can never return 0 here. That 6030 * error return happens if the code point passed in is either 6031 * negative or greater than or equal to 0x110000. The 6032 * XmlCharRefNumber() functions will all return a number 6033 * strictly less than 0x110000 or a negative value if an error 6034 * occurred. The negative value is intercepted above, so 6035 * XmlEncode() is never passed a value it might return an 6036 * error for. 6037 */ 6038 for (i = 0; i < n; i++) { 6039 if (! poolAppendChar(pool, buf[i])) 6040 return XML_ERROR_NO_MEMORY; 6041 } 6042 } break; 6043 case XML_TOK_DATA_CHARS: 6044 if (! poolAppend(pool, enc, ptr, next)) 6045 return XML_ERROR_NO_MEMORY; 6046 break; 6047 case XML_TOK_TRAILING_CR: 6048 next = ptr + enc->minBytesPerChar; 6049 /* fall through */ 6050 case XML_TOK_ATTRIBUTE_VALUE_S: 6051 case XML_TOK_DATA_NEWLINE: 6052 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6053 break; 6054 if (! poolAppendChar(pool, 0x20)) 6055 return XML_ERROR_NO_MEMORY; 6056 break; 6057 case XML_TOK_ENTITY_REF: { 6058 const XML_Char *name; 6059 ENTITY *entity; 6060 char checkEntityDecl; 6061 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 6062 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); 6063 if (ch) { 6064 #if XML_GE == 1 6065 /* NOTE: We are replacing 4-6 characters original input for 1 character 6066 * so there is no amplification and hence recording without 6067 * protection. */ 6068 accountingDiffTolerated(parser, tok, (char *)&ch, 6069 ((char *)&ch) + sizeof(XML_Char), __LINE__, 6070 XML_ACCOUNT_ENTITY_EXPANSION); 6071 #endif /* XML_GE == 1 */ 6072 if (! poolAppendChar(pool, ch)) 6073 return XML_ERROR_NO_MEMORY; 6074 break; 6075 } 6076 name = poolStoreString(&parser->m_temp2Pool, enc, 6077 ptr + enc->minBytesPerChar, 6078 next - enc->minBytesPerChar); 6079 if (! name) 6080 return XML_ERROR_NO_MEMORY; 6081 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 6082 poolDiscard(&parser->m_temp2Pool); 6083 /* First, determine if a check for an existing declaration is needed; 6084 if yes, check that the entity exists, and that it is internal. 6085 */ 6086 if (pool == &dtd->pool) /* are we called from prolog? */ 6087 checkEntityDecl = 6088 #ifdef XML_DTD 6089 parser->m_prologState.documentEntity && 6090 #endif /* XML_DTD */ 6091 (dtd->standalone ? ! parser->m_openInternalEntities 6092 : ! dtd->hasParamEntityRefs); 6093 else /* if (pool == &parser->m_tempPool): we are called from content */ 6094 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; 6095 if (checkEntityDecl) { 6096 if (! entity) 6097 return XML_ERROR_UNDEFINED_ENTITY; 6098 else if (! entity->is_internal) 6099 return XML_ERROR_ENTITY_DECLARED_IN_PE; 6100 } else if (! entity) { 6101 /* Cannot report skipped entity here - see comments on 6102 parser->m_skippedEntityHandler. 6103 if (parser->m_skippedEntityHandler) 6104 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6105 */ 6106 /* Cannot call the default handler because this would be 6107 out of sync with the call to the startElementHandler. 6108 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) 6109 reportDefault(parser, enc, ptr, next); 6110 */ 6111 break; 6112 } 6113 if (entity->open) { 6114 if (enc == parser->m_encoding) { 6115 /* It does not appear that this line can be executed. 6116 * 6117 * The "if (entity->open)" check catches recursive entity 6118 * definitions. In order to be called with an open 6119 * entity, it must have gone through this code before and 6120 * been through the recursive call to 6121 * appendAttributeValue() some lines below. That call 6122 * sets the local encoding ("enc") to the parser's 6123 * internal encoding (internal_utf8 or internal_utf16), 6124 * which can never be the same as the principle encoding. 6125 * It doesn't appear there is another code path that gets 6126 * here with entity->open being TRUE. 6127 * 6128 * Since it is not certain that this logic is watertight, 6129 * we keep the line and merely exclude it from coverage 6130 * tests. 6131 */ 6132 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ 6133 } 6134 return XML_ERROR_RECURSIVE_ENTITY_REF; 6135 } 6136 if (entity->notation) { 6137 if (enc == parser->m_encoding) 6138 parser->m_eventPtr = ptr; 6139 return XML_ERROR_BINARY_ENTITY_REF; 6140 } 6141 if (! entity->textPtr) { 6142 if (enc == parser->m_encoding) 6143 parser->m_eventPtr = ptr; 6144 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; 6145 } else { 6146 enum XML_Error result; 6147 const XML_Char *textEnd = entity->textPtr + entity->textLen; 6148 entity->open = XML_TRUE; 6149 #if XML_GE == 1 6150 entityTrackingOnOpen(parser, entity, __LINE__); 6151 #endif 6152 result = appendAttributeValue(parser, parser->m_internalEncoding, 6153 isCdata, (const char *)entity->textPtr, 6154 (const char *)textEnd, pool, 6155 XML_ACCOUNT_ENTITY_EXPANSION); 6156 #if XML_GE == 1 6157 entityTrackingOnClose(parser, entity, __LINE__); 6158 #endif 6159 entity->open = XML_FALSE; 6160 if (result) 6161 return result; 6162 } 6163 } break; 6164 default: 6165 /* The only token returned by XmlAttributeValueTok() that does 6166 * not have an explicit case here is XML_TOK_PARTIAL_CHAR. 6167 * Getting that would require an entity name to contain an 6168 * incomplete XML character (e.g. \xE2\x82); however previous 6169 * tokenisers will have already recognised and rejected such 6170 * names before XmlAttributeValueTok() gets a look-in. This 6171 * default case should be retained as a safety net, but the code 6172 * excluded from coverage tests. 6173 * 6174 * LCOV_EXCL_START 6175 */ 6176 if (enc == parser->m_encoding) 6177 parser->m_eventPtr = ptr; 6178 return XML_ERROR_UNEXPECTED_STATE; 6179 /* LCOV_EXCL_STOP */ 6180 } 6181 ptr = next; 6182 } 6183 /* not reached */ 6184 } 6185 6186 #if XML_GE == 1 6187 static enum XML_Error 6188 storeEntityValue(XML_Parser parser, const ENCODING *enc, 6189 const char *entityTextPtr, const char *entityTextEnd, 6190 enum XML_Account account) { 6191 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6192 STRING_POOL *pool = &(dtd->entityValuePool); 6193 enum XML_Error result = XML_ERROR_NONE; 6194 # ifdef XML_DTD 6195 int oldInEntityValue = parser->m_prologState.inEntityValue; 6196 parser->m_prologState.inEntityValue = 1; 6197 # else 6198 UNUSED_P(account); 6199 # endif /* XML_DTD */ 6200 /* never return Null for the value argument in EntityDeclHandler, 6201 since this would indicate an external entity; therefore we 6202 have to make sure that entityValuePool.start is not null */ 6203 if (! pool->blocks) { 6204 if (! poolGrow(pool)) 6205 return XML_ERROR_NO_MEMORY; 6206 } 6207 6208 for (;;) { 6209 const char *next 6210 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ 6211 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); 6212 6213 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, 6214 account)) { 6215 accountingOnAbort(parser); 6216 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6217 goto endEntityValue; 6218 } 6219 6220 switch (tok) { 6221 case XML_TOK_PARAM_ENTITY_REF: 6222 # ifdef XML_DTD 6223 if (parser->m_isParamEntity || enc != parser->m_encoding) { 6224 const XML_Char *name; 6225 ENTITY *entity; 6226 name = poolStoreString(&parser->m_tempPool, enc, 6227 entityTextPtr + enc->minBytesPerChar, 6228 next - enc->minBytesPerChar); 6229 if (! name) { 6230 result = XML_ERROR_NO_MEMORY; 6231 goto endEntityValue; 6232 } 6233 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 6234 poolDiscard(&parser->m_tempPool); 6235 if (! entity) { 6236 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ 6237 /* cannot report skipped entity here - see comments on 6238 parser->m_skippedEntityHandler 6239 if (parser->m_skippedEntityHandler) 6240 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6241 */ 6242 dtd->keepProcessing = dtd->standalone; 6243 goto endEntityValue; 6244 } 6245 if (entity->open || (entity == parser->m_declEntity)) { 6246 if (enc == parser->m_encoding) 6247 parser->m_eventPtr = entityTextPtr; 6248 result = XML_ERROR_RECURSIVE_ENTITY_REF; 6249 goto endEntityValue; 6250 } 6251 if (entity->systemId) { 6252 if (parser->m_externalEntityRefHandler) { 6253 dtd->paramEntityRead = XML_FALSE; 6254 entity->open = XML_TRUE; 6255 entityTrackingOnOpen(parser, entity, __LINE__); 6256 if (! parser->m_externalEntityRefHandler( 6257 parser->m_externalEntityRefHandlerArg, 0, entity->base, 6258 entity->systemId, entity->publicId)) { 6259 entityTrackingOnClose(parser, entity, __LINE__); 6260 entity->open = XML_FALSE; 6261 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6262 goto endEntityValue; 6263 } 6264 entityTrackingOnClose(parser, entity, __LINE__); 6265 entity->open = XML_FALSE; 6266 if (! dtd->paramEntityRead) 6267 dtd->keepProcessing = dtd->standalone; 6268 } else 6269 dtd->keepProcessing = dtd->standalone; 6270 } else { 6271 entity->open = XML_TRUE; 6272 entityTrackingOnOpen(parser, entity, __LINE__); 6273 result = storeEntityValue( 6274 parser, parser->m_internalEncoding, (const char *)entity->textPtr, 6275 (const char *)(entity->textPtr + entity->textLen), 6276 XML_ACCOUNT_ENTITY_EXPANSION); 6277 entityTrackingOnClose(parser, entity, __LINE__); 6278 entity->open = XML_FALSE; 6279 if (result) 6280 goto endEntityValue; 6281 } 6282 break; 6283 } 6284 # endif /* XML_DTD */ 6285 /* In the internal subset, PE references are not legal 6286 within markup declarations, e.g entity values in this case. */ 6287 parser->m_eventPtr = entityTextPtr; 6288 result = XML_ERROR_PARAM_ENTITY_REF; 6289 goto endEntityValue; 6290 case XML_TOK_NONE: 6291 result = XML_ERROR_NONE; 6292 goto endEntityValue; 6293 case XML_TOK_ENTITY_REF: 6294 case XML_TOK_DATA_CHARS: 6295 if (! poolAppend(pool, enc, entityTextPtr, next)) { 6296 result = XML_ERROR_NO_MEMORY; 6297 goto endEntityValue; 6298 } 6299 break; 6300 case XML_TOK_TRAILING_CR: 6301 next = entityTextPtr + enc->minBytesPerChar; 6302 /* fall through */ 6303 case XML_TOK_DATA_NEWLINE: 6304 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6305 result = XML_ERROR_NO_MEMORY; 6306 goto endEntityValue; 6307 } 6308 *(pool->ptr)++ = 0xA; 6309 break; 6310 case XML_TOK_CHAR_REF: { 6311 XML_Char buf[XML_ENCODE_MAX]; 6312 int i; 6313 int n = XmlCharRefNumber(enc, entityTextPtr); 6314 if (n < 0) { 6315 if (enc == parser->m_encoding) 6316 parser->m_eventPtr = entityTextPtr; 6317 result = XML_ERROR_BAD_CHAR_REF; 6318 goto endEntityValue; 6319 } 6320 n = XmlEncode(n, (ICHAR *)buf); 6321 /* The XmlEncode() functions can never return 0 here. That 6322 * error return happens if the code point passed in is either 6323 * negative or greater than or equal to 0x110000. The 6324 * XmlCharRefNumber() functions will all return a number 6325 * strictly less than 0x110000 or a negative value if an error 6326 * occurred. The negative value is intercepted above, so 6327 * XmlEncode() is never passed a value it might return an 6328 * error for. 6329 */ 6330 for (i = 0; i < n; i++) { 6331 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6332 result = XML_ERROR_NO_MEMORY; 6333 goto endEntityValue; 6334 } 6335 *(pool->ptr)++ = buf[i]; 6336 } 6337 } break; 6338 case XML_TOK_PARTIAL: 6339 if (enc == parser->m_encoding) 6340 parser->m_eventPtr = entityTextPtr; 6341 result = XML_ERROR_INVALID_TOKEN; 6342 goto endEntityValue; 6343 case XML_TOK_INVALID: 6344 if (enc == parser->m_encoding) 6345 parser->m_eventPtr = next; 6346 result = XML_ERROR_INVALID_TOKEN; 6347 goto endEntityValue; 6348 default: 6349 /* This default case should be unnecessary -- all the tokens 6350 * that XmlEntityValueTok() can return have their own explicit 6351 * cases -- but should be retained for safety. We do however 6352 * exclude it from the coverage statistics. 6353 * 6354 * LCOV_EXCL_START 6355 */ 6356 if (enc == parser->m_encoding) 6357 parser->m_eventPtr = entityTextPtr; 6358 result = XML_ERROR_UNEXPECTED_STATE; 6359 goto endEntityValue; 6360 /* LCOV_EXCL_STOP */ 6361 } 6362 entityTextPtr = next; 6363 } 6364 endEntityValue: 6365 # ifdef XML_DTD 6366 parser->m_prologState.inEntityValue = oldInEntityValue; 6367 # endif /* XML_DTD */ 6368 return result; 6369 } 6370 6371 #else /* XML_GE == 0 */ 6372 6373 static enum XML_Error 6374 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { 6375 // This will store "&entity123;" in entity->textPtr 6376 // to end up as "&entity123;" in the handler. 6377 const char *const entity_start = "&"; 6378 const char *const entity_end = ";"; 6379 6380 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool); 6381 if (! poolAppendString(pool, entity_start) 6382 || ! poolAppendString(pool, entity->name) 6383 || ! poolAppendString(pool, entity_end)) { 6384 poolDiscard(pool); 6385 return XML_ERROR_NO_MEMORY; 6386 } 6387 6388 entity->textPtr = poolStart(pool); 6389 entity->textLen = (int)(poolLength(pool)); 6390 poolFinish(pool); 6391 6392 return XML_ERROR_NONE; 6393 } 6394 6395 #endif /* XML_GE == 0 */ 6396 6397 static void FASTCALL 6398 normalizeLines(XML_Char *s) { 6399 XML_Char *p; 6400 for (;; s++) { 6401 if (*s == XML_T('\0')) 6402 return; 6403 if (*s == 0xD) 6404 break; 6405 } 6406 p = s; 6407 do { 6408 if (*s == 0xD) { 6409 *p++ = 0xA; 6410 if (*++s == 0xA) 6411 s++; 6412 } else 6413 *p++ = *s++; 6414 } while (*s); 6415 *p = XML_T('\0'); 6416 } 6417 6418 static int 6419 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 6420 const char *start, const char *end) { 6421 const XML_Char *target; 6422 XML_Char *data; 6423 const char *tem; 6424 if (! parser->m_processingInstructionHandler) { 6425 if (parser->m_defaultHandler) 6426 reportDefault(parser, enc, start, end); 6427 return 1; 6428 } 6429 start += enc->minBytesPerChar * 2; 6430 tem = start + XmlNameLength(enc, start); 6431 target = poolStoreString(&parser->m_tempPool, enc, start, tem); 6432 if (! target) 6433 return 0; 6434 poolFinish(&parser->m_tempPool); 6435 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), 6436 end - enc->minBytesPerChar * 2); 6437 if (! data) 6438 return 0; 6439 normalizeLines(data); 6440 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); 6441 poolClear(&parser->m_tempPool); 6442 return 1; 6443 } 6444 6445 static int 6446 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, 6447 const char *end) { 6448 XML_Char *data; 6449 if (! parser->m_commentHandler) { 6450 if (parser->m_defaultHandler) 6451 reportDefault(parser, enc, start, end); 6452 return 1; 6453 } 6454 data = poolStoreString(&parser->m_tempPool, enc, 6455 start + enc->minBytesPerChar * 4, 6456 end - enc->minBytesPerChar * 3); 6457 if (! data) 6458 return 0; 6459 normalizeLines(data); 6460 parser->m_commentHandler(parser->m_handlerArg, data); 6461 poolClear(&parser->m_tempPool); 6462 return 1; 6463 } 6464 6465 static void 6466 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, 6467 const char *end) { 6468 if (MUST_CONVERT(enc, s)) { 6469 enum XML_Convert_Result convert_res; 6470 const char **eventPP; 6471 const char **eventEndPP; 6472 if (enc == parser->m_encoding) { 6473 eventPP = &parser->m_eventPtr; 6474 eventEndPP = &parser->m_eventEndPtr; 6475 } else { 6476 /* To get here, two things must be true; the parser must be 6477 * using a character encoding that is not the same as the 6478 * encoding passed in, and the encoding passed in must need 6479 * conversion to the internal format (UTF-8 unless XML_UNICODE 6480 * is defined). The only occasions on which the encoding passed 6481 * in is not the same as the parser's encoding are when it is 6482 * the internal encoding (e.g. a previously defined parameter 6483 * entity, already converted to internal format). This by 6484 * definition doesn't need conversion, so the whole branch never 6485 * gets executed. 6486 * 6487 * For safety's sake we don't delete these lines and merely 6488 * exclude them from coverage statistics. 6489 * 6490 * LCOV_EXCL_START 6491 */ 6492 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 6493 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 6494 /* LCOV_EXCL_STOP */ 6495 } 6496 do { 6497 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 6498 convert_res 6499 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 6500 *eventEndPP = s; 6501 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, 6502 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 6503 *eventPP = s; 6504 } while ((convert_res != XML_CONVERT_COMPLETED) 6505 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); 6506 } else 6507 parser->m_defaultHandler( 6508 parser->m_handlerArg, (const XML_Char *)s, 6509 (int)((const XML_Char *)end - (const XML_Char *)s)); 6510 } 6511 6512 static int 6513 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, 6514 XML_Bool isId, const XML_Char *value, XML_Parser parser) { 6515 DEFAULT_ATTRIBUTE *att; 6516 if (value || isId) { 6517 /* The handling of default attributes gets messed up if we have 6518 a default which duplicates a non-default. */ 6519 int i; 6520 for (i = 0; i < type->nDefaultAtts; i++) 6521 if (attId == type->defaultAtts[i].id) 6522 return 1; 6523 if (isId && ! type->idAtt && ! attId->xmlns) 6524 type->idAtt = attId; 6525 } 6526 if (type->nDefaultAtts == type->allocDefaultAtts) { 6527 if (type->allocDefaultAtts == 0) { 6528 type->allocDefaultAtts = 8; 6529 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( 6530 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 6531 if (! type->defaultAtts) { 6532 type->allocDefaultAtts = 0; 6533 return 0; 6534 } 6535 } else { 6536 DEFAULT_ATTRIBUTE *temp; 6537 6538 /* Detect and prevent integer overflow */ 6539 if (type->allocDefaultAtts > INT_MAX / 2) { 6540 return 0; 6541 } 6542 6543 int count = type->allocDefaultAtts * 2; 6544 6545 /* Detect and prevent integer overflow. 6546 * The preprocessor guard addresses the "always false" warning 6547 * from -Wtype-limits on platforms where 6548 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 6549 #if UINT_MAX >= SIZE_MAX 6550 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { 6551 return 0; 6552 } 6553 #endif 6554 6555 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, 6556 (count * sizeof(DEFAULT_ATTRIBUTE))); 6557 if (temp == NULL) 6558 return 0; 6559 type->allocDefaultAtts = count; 6560 type->defaultAtts = temp; 6561 } 6562 } 6563 att = type->defaultAtts + type->nDefaultAtts; 6564 att->id = attId; 6565 att->value = value; 6566 att->isCdata = isCdata; 6567 if (! isCdata) 6568 attId->maybeTokenized = XML_TRUE; 6569 type->nDefaultAtts += 1; 6570 return 1; 6571 } 6572 6573 static int 6574 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { 6575 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6576 const XML_Char *name; 6577 for (name = elementType->name; *name; name++) { 6578 if (*name == XML_T(ASCII_COLON)) { 6579 PREFIX *prefix; 6580 const XML_Char *s; 6581 for (s = elementType->name; s != name; s++) { 6582 if (! poolAppendChar(&dtd->pool, *s)) 6583 return 0; 6584 } 6585 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6586 return 0; 6587 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), 6588 sizeof(PREFIX)); 6589 if (! prefix) 6590 return 0; 6591 if (prefix->name == poolStart(&dtd->pool)) 6592 poolFinish(&dtd->pool); 6593 else 6594 poolDiscard(&dtd->pool); 6595 elementType->prefix = prefix; 6596 break; 6597 } 6598 } 6599 return 1; 6600 } 6601 6602 static ATTRIBUTE_ID * 6603 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, 6604 const char *end) { 6605 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6606 ATTRIBUTE_ID *id; 6607 const XML_Char *name; 6608 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6609 return NULL; 6610 name = poolStoreString(&dtd->pool, enc, start, end); 6611 if (! name) 6612 return NULL; 6613 /* skip quotation mark - its storage will be reused (like in name[-1]) */ 6614 ++name; 6615 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, 6616 sizeof(ATTRIBUTE_ID)); 6617 if (! id) 6618 return NULL; 6619 if (id->name != name) 6620 poolDiscard(&dtd->pool); 6621 else { 6622 poolFinish(&dtd->pool); 6623 if (! parser->m_ns) 6624 ; 6625 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) 6626 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) 6627 && name[4] == XML_T(ASCII_s) 6628 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { 6629 if (name[5] == XML_T('\0')) 6630 id->prefix = &dtd->defaultPrefix; 6631 else 6632 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, 6633 sizeof(PREFIX)); 6634 id->xmlns = XML_TRUE; 6635 } else { 6636 int i; 6637 for (i = 0; name[i]; i++) { 6638 /* attributes without prefix are *not* in the default namespace */ 6639 if (name[i] == XML_T(ASCII_COLON)) { 6640 int j; 6641 for (j = 0; j < i; j++) { 6642 if (! poolAppendChar(&dtd->pool, name[j])) 6643 return NULL; 6644 } 6645 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6646 return NULL; 6647 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, 6648 poolStart(&dtd->pool), sizeof(PREFIX)); 6649 if (! id->prefix) 6650 return NULL; 6651 if (id->prefix->name == poolStart(&dtd->pool)) 6652 poolFinish(&dtd->pool); 6653 else 6654 poolDiscard(&dtd->pool); 6655 break; 6656 } 6657 } 6658 } 6659 } 6660 return id; 6661 } 6662 6663 #define CONTEXT_SEP XML_T(ASCII_FF) 6664 6665 static const XML_Char * 6666 getContext(XML_Parser parser) { 6667 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6668 HASH_TABLE_ITER iter; 6669 XML_Bool needSep = XML_FALSE; 6670 6671 if (dtd->defaultPrefix.binding) { 6672 int i; 6673 int len; 6674 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 6675 return NULL; 6676 len = dtd->defaultPrefix.binding->uriLen; 6677 if (parser->m_namespaceSeparator) 6678 len--; 6679 for (i = 0; i < len; i++) { 6680 if (! poolAppendChar(&parser->m_tempPool, 6681 dtd->defaultPrefix.binding->uri[i])) { 6682 /* Because of memory caching, I don't believe this line can be 6683 * executed. 6684 * 6685 * This is part of a loop copying the default prefix binding 6686 * URI into the parser's temporary string pool. Previously, 6687 * that URI was copied into the same string pool, with a 6688 * terminating NUL character, as part of setContext(). When 6689 * the pool was cleared, that leaves a block definitely big 6690 * enough to hold the URI on the free block list of the pool. 6691 * The URI copy in getContext() therefore cannot run out of 6692 * memory. 6693 * 6694 * If the pool is used between the setContext() and 6695 * getContext() calls, the worst it can do is leave a bigger 6696 * block on the front of the free list. Given that this is 6697 * all somewhat inobvious and program logic can be changed, we 6698 * don't delete the line but we do exclude it from the test 6699 * coverage statistics. 6700 */ 6701 return NULL; /* LCOV_EXCL_LINE */ 6702 } 6703 } 6704 needSep = XML_TRUE; 6705 } 6706 6707 hashTableIterInit(&iter, &(dtd->prefixes)); 6708 for (;;) { 6709 int i; 6710 int len; 6711 const XML_Char *s; 6712 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); 6713 if (! prefix) 6714 break; 6715 if (! prefix->binding) { 6716 /* This test appears to be (justifiable) paranoia. There does 6717 * not seem to be a way of injecting a prefix without a binding 6718 * that doesn't get errored long before this function is called. 6719 * The test should remain for safety's sake, so we instead 6720 * exclude the following line from the coverage statistics. 6721 */ 6722 continue; /* LCOV_EXCL_LINE */ 6723 } 6724 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 6725 return NULL; 6726 for (s = prefix->name; *s; s++) 6727 if (! poolAppendChar(&parser->m_tempPool, *s)) 6728 return NULL; 6729 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 6730 return NULL; 6731 len = prefix->binding->uriLen; 6732 if (parser->m_namespaceSeparator) 6733 len--; 6734 for (i = 0; i < len; i++) 6735 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) 6736 return NULL; 6737 needSep = XML_TRUE; 6738 } 6739 6740 hashTableIterInit(&iter, &(dtd->generalEntities)); 6741 for (;;) { 6742 const XML_Char *s; 6743 ENTITY *e = (ENTITY *)hashTableIterNext(&iter); 6744 if (! e) 6745 break; 6746 if (! e->open) 6747 continue; 6748 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 6749 return NULL; 6750 for (s = e->name; *s; s++) 6751 if (! poolAppendChar(&parser->m_tempPool, *s)) 6752 return 0; 6753 needSep = XML_TRUE; 6754 } 6755 6756 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6757 return NULL; 6758 return parser->m_tempPool.start; 6759 } 6760 6761 static XML_Bool 6762 setContext(XML_Parser parser, const XML_Char *context) { 6763 if (context == NULL) { 6764 return XML_FALSE; 6765 } 6766 6767 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6768 const XML_Char *s = context; 6769 6770 while (*context != XML_T('\0')) { 6771 if (*s == CONTEXT_SEP || *s == XML_T('\0')) { 6772 ENTITY *e; 6773 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6774 return XML_FALSE; 6775 e = (ENTITY *)lookup(parser, &dtd->generalEntities, 6776 poolStart(&parser->m_tempPool), 0); 6777 if (e) 6778 e->open = XML_TRUE; 6779 if (*s != XML_T('\0')) 6780 s++; 6781 context = s; 6782 poolDiscard(&parser->m_tempPool); 6783 } else if (*s == XML_T(ASCII_EQUALS)) { 6784 PREFIX *prefix; 6785 if (poolLength(&parser->m_tempPool) == 0) 6786 prefix = &dtd->defaultPrefix; 6787 else { 6788 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6789 return XML_FALSE; 6790 prefix 6791 = (PREFIX *)lookup(parser, &dtd->prefixes, 6792 poolStart(&parser->m_tempPool), sizeof(PREFIX)); 6793 if (! prefix) 6794 return XML_FALSE; 6795 if (prefix->name == poolStart(&parser->m_tempPool)) { 6796 prefix->name = poolCopyString(&dtd->pool, prefix->name); 6797 if (! prefix->name) 6798 return XML_FALSE; 6799 } 6800 poolDiscard(&parser->m_tempPool); 6801 } 6802 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); 6803 context++) 6804 if (! poolAppendChar(&parser->m_tempPool, *context)) 6805 return XML_FALSE; 6806 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6807 return XML_FALSE; 6808 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), 6809 &parser->m_inheritedBindings) 6810 != XML_ERROR_NONE) 6811 return XML_FALSE; 6812 poolDiscard(&parser->m_tempPool); 6813 if (*context != XML_T('\0')) 6814 ++context; 6815 s = context; 6816 } else { 6817 if (! poolAppendChar(&parser->m_tempPool, *s)) 6818 return XML_FALSE; 6819 s++; 6820 } 6821 } 6822 return XML_TRUE; 6823 } 6824 6825 static void FASTCALL 6826 normalizePublicId(XML_Char *publicId) { 6827 XML_Char *p = publicId; 6828 XML_Char *s; 6829 for (s = publicId; *s; s++) { 6830 switch (*s) { 6831 case 0x20: 6832 case 0xD: 6833 case 0xA: 6834 if (p != publicId && p[-1] != 0x20) 6835 *p++ = 0x20; 6836 break; 6837 default: 6838 *p++ = *s; 6839 } 6840 } 6841 if (p != publicId && p[-1] == 0x20) 6842 --p; 6843 *p = XML_T('\0'); 6844 } 6845 6846 static DTD * 6847 dtdCreate(const XML_Memory_Handling_Suite *ms) { 6848 DTD *p = ms->malloc_fcn(sizeof(DTD)); 6849 if (p == NULL) 6850 return p; 6851 poolInit(&(p->pool), ms); 6852 poolInit(&(p->entityValuePool), ms); 6853 hashTableInit(&(p->generalEntities), ms); 6854 hashTableInit(&(p->elementTypes), ms); 6855 hashTableInit(&(p->attributeIds), ms); 6856 hashTableInit(&(p->prefixes), ms); 6857 #ifdef XML_DTD 6858 p->paramEntityRead = XML_FALSE; 6859 hashTableInit(&(p->paramEntities), ms); 6860 #endif /* XML_DTD */ 6861 p->defaultPrefix.name = NULL; 6862 p->defaultPrefix.binding = NULL; 6863 6864 p->in_eldecl = XML_FALSE; 6865 p->scaffIndex = NULL; 6866 p->scaffold = NULL; 6867 p->scaffLevel = 0; 6868 p->scaffSize = 0; 6869 p->scaffCount = 0; 6870 p->contentStringLen = 0; 6871 6872 p->keepProcessing = XML_TRUE; 6873 p->hasParamEntityRefs = XML_FALSE; 6874 p->standalone = XML_FALSE; 6875 return p; 6876 } 6877 6878 static void 6879 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { 6880 HASH_TABLE_ITER iter; 6881 hashTableIterInit(&iter, &(p->elementTypes)); 6882 for (;;) { 6883 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 6884 if (! e) 6885 break; 6886 if (e->allocDefaultAtts != 0) 6887 ms->free_fcn(e->defaultAtts); 6888 } 6889 hashTableClear(&(p->generalEntities)); 6890 #ifdef XML_DTD 6891 p->paramEntityRead = XML_FALSE; 6892 hashTableClear(&(p->paramEntities)); 6893 #endif /* XML_DTD */ 6894 hashTableClear(&(p->elementTypes)); 6895 hashTableClear(&(p->attributeIds)); 6896 hashTableClear(&(p->prefixes)); 6897 poolClear(&(p->pool)); 6898 poolClear(&(p->entityValuePool)); 6899 p->defaultPrefix.name = NULL; 6900 p->defaultPrefix.binding = NULL; 6901 6902 p->in_eldecl = XML_FALSE; 6903 6904 ms->free_fcn(p->scaffIndex); 6905 p->scaffIndex = NULL; 6906 ms->free_fcn(p->scaffold); 6907 p->scaffold = NULL; 6908 6909 p->scaffLevel = 0; 6910 p->scaffSize = 0; 6911 p->scaffCount = 0; 6912 p->contentStringLen = 0; 6913 6914 p->keepProcessing = XML_TRUE; 6915 p->hasParamEntityRefs = XML_FALSE; 6916 p->standalone = XML_FALSE; 6917 } 6918 6919 static void 6920 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { 6921 HASH_TABLE_ITER iter; 6922 hashTableIterInit(&iter, &(p->elementTypes)); 6923 for (;;) { 6924 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 6925 if (! e) 6926 break; 6927 if (e->allocDefaultAtts != 0) 6928 ms->free_fcn(e->defaultAtts); 6929 } 6930 hashTableDestroy(&(p->generalEntities)); 6931 #ifdef XML_DTD 6932 hashTableDestroy(&(p->paramEntities)); 6933 #endif /* XML_DTD */ 6934 hashTableDestroy(&(p->elementTypes)); 6935 hashTableDestroy(&(p->attributeIds)); 6936 hashTableDestroy(&(p->prefixes)); 6937 poolDestroy(&(p->pool)); 6938 poolDestroy(&(p->entityValuePool)); 6939 if (isDocEntity) { 6940 ms->free_fcn(p->scaffIndex); 6941 ms->free_fcn(p->scaffold); 6942 } 6943 ms->free_fcn(p); 6944 } 6945 6946 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. 6947 The new DTD has already been initialized. 6948 */ 6949 static int 6950 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 6951 const XML_Memory_Handling_Suite *ms) { 6952 HASH_TABLE_ITER iter; 6953 6954 /* Copy the prefix table. */ 6955 6956 hashTableIterInit(&iter, &(oldDtd->prefixes)); 6957 for (;;) { 6958 const XML_Char *name; 6959 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); 6960 if (! oldP) 6961 break; 6962 name = poolCopyString(&(newDtd->pool), oldP->name); 6963 if (! name) 6964 return 0; 6965 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) 6966 return 0; 6967 } 6968 6969 hashTableIterInit(&iter, &(oldDtd->attributeIds)); 6970 6971 /* Copy the attribute id table. */ 6972 6973 for (;;) { 6974 ATTRIBUTE_ID *newA; 6975 const XML_Char *name; 6976 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); 6977 6978 if (! oldA) 6979 break; 6980 /* Remember to allocate the scratch byte before the name. */ 6981 if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) 6982 return 0; 6983 name = poolCopyString(&(newDtd->pool), oldA->name); 6984 if (! name) 6985 return 0; 6986 ++name; 6987 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, 6988 sizeof(ATTRIBUTE_ID)); 6989 if (! newA) 6990 return 0; 6991 newA->maybeTokenized = oldA->maybeTokenized; 6992 if (oldA->prefix) { 6993 newA->xmlns = oldA->xmlns; 6994 if (oldA->prefix == &oldDtd->defaultPrefix) 6995 newA->prefix = &newDtd->defaultPrefix; 6996 else 6997 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 6998 oldA->prefix->name, 0); 6999 } 7000 } 7001 7002 /* Copy the element type table. */ 7003 7004 hashTableIterInit(&iter, &(oldDtd->elementTypes)); 7005 7006 for (;;) { 7007 int i; 7008 ELEMENT_TYPE *newE; 7009 const XML_Char *name; 7010 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7011 if (! oldE) 7012 break; 7013 name = poolCopyString(&(newDtd->pool), oldE->name); 7014 if (! name) 7015 return 0; 7016 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, 7017 sizeof(ELEMENT_TYPE)); 7018 if (! newE) 7019 return 0; 7020 if (oldE->nDefaultAtts) { 7021 /* Detect and prevent integer overflow. 7022 * The preprocessor guard addresses the "always false" warning 7023 * from -Wtype-limits on platforms where 7024 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ 7025 #if UINT_MAX >= SIZE_MAX 7026 if ((size_t)oldE->nDefaultAtts 7027 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { 7028 return 0; 7029 } 7030 #endif 7031 newE->defaultAtts 7032 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 7033 if (! newE->defaultAtts) { 7034 return 0; 7035 } 7036 } 7037 if (oldE->idAtt) 7038 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), 7039 oldE->idAtt->name, 0); 7040 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; 7041 if (oldE->prefix) 7042 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7043 oldE->prefix->name, 0); 7044 for (i = 0; i < newE->nDefaultAtts; i++) { 7045 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( 7046 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); 7047 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; 7048 if (oldE->defaultAtts[i].value) { 7049 newE->defaultAtts[i].value 7050 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); 7051 if (! newE->defaultAtts[i].value) 7052 return 0; 7053 } else 7054 newE->defaultAtts[i].value = NULL; 7055 } 7056 } 7057 7058 /* Copy the entity tables. */ 7059 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), 7060 &(oldDtd->generalEntities))) 7061 return 0; 7062 7063 #ifdef XML_DTD 7064 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), 7065 &(oldDtd->paramEntities))) 7066 return 0; 7067 newDtd->paramEntityRead = oldDtd->paramEntityRead; 7068 #endif /* XML_DTD */ 7069 7070 newDtd->keepProcessing = oldDtd->keepProcessing; 7071 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; 7072 newDtd->standalone = oldDtd->standalone; 7073 7074 /* Don't want deep copying for scaffolding */ 7075 newDtd->in_eldecl = oldDtd->in_eldecl; 7076 newDtd->scaffold = oldDtd->scaffold; 7077 newDtd->contentStringLen = oldDtd->contentStringLen; 7078 newDtd->scaffSize = oldDtd->scaffSize; 7079 newDtd->scaffLevel = oldDtd->scaffLevel; 7080 newDtd->scaffIndex = oldDtd->scaffIndex; 7081 7082 return 1; 7083 } /* End dtdCopy */ 7084 7085 static int 7086 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 7087 STRING_POOL *newPool, const HASH_TABLE *oldTable) { 7088 HASH_TABLE_ITER iter; 7089 const XML_Char *cachedOldBase = NULL; 7090 const XML_Char *cachedNewBase = NULL; 7091 7092 hashTableIterInit(&iter, oldTable); 7093 7094 for (;;) { 7095 ENTITY *newE; 7096 const XML_Char *name; 7097 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); 7098 if (! oldE) 7099 break; 7100 name = poolCopyString(newPool, oldE->name); 7101 if (! name) 7102 return 0; 7103 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); 7104 if (! newE) 7105 return 0; 7106 if (oldE->systemId) { 7107 const XML_Char *tem = poolCopyString(newPool, oldE->systemId); 7108 if (! tem) 7109 return 0; 7110 newE->systemId = tem; 7111 if (oldE->base) { 7112 if (oldE->base == cachedOldBase) 7113 newE->base = cachedNewBase; 7114 else { 7115 cachedOldBase = oldE->base; 7116 tem = poolCopyString(newPool, cachedOldBase); 7117 if (! tem) 7118 return 0; 7119 cachedNewBase = newE->base = tem; 7120 } 7121 } 7122 if (oldE->publicId) { 7123 tem = poolCopyString(newPool, oldE->publicId); 7124 if (! tem) 7125 return 0; 7126 newE->publicId = tem; 7127 } 7128 } else { 7129 const XML_Char *tem 7130 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); 7131 if (! tem) 7132 return 0; 7133 newE->textPtr = tem; 7134 newE->textLen = oldE->textLen; 7135 } 7136 if (oldE->notation) { 7137 const XML_Char *tem = poolCopyString(newPool, oldE->notation); 7138 if (! tem) 7139 return 0; 7140 newE->notation = tem; 7141 } 7142 newE->is_param = oldE->is_param; 7143 newE->is_internal = oldE->is_internal; 7144 } 7145 return 1; 7146 } 7147 7148 #define INIT_POWER 6 7149 7150 static XML_Bool FASTCALL 7151 keyeq(KEY s1, KEY s2) { 7152 for (; *s1 == *s2; s1++, s2++) 7153 if (*s1 == 0) 7154 return XML_TRUE; 7155 return XML_FALSE; 7156 } 7157 7158 static size_t 7159 keylen(KEY s) { 7160 size_t len = 0; 7161 for (; *s; s++, len++) 7162 ; 7163 return len; 7164 } 7165 7166 static void 7167 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { 7168 key->k[0] = 0; 7169 key->k[1] = get_hash_secret_salt(parser); 7170 } 7171 7172 static unsigned long FASTCALL 7173 hash(XML_Parser parser, KEY s) { 7174 struct siphash state; 7175 struct sipkey key; 7176 (void)sip24_valid; 7177 copy_salt_to_sipkey(parser, &key); 7178 sip24_init(&state, &key); 7179 sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); 7180 return (unsigned long)sip24_final(&state); 7181 } 7182 7183 static NAMED * 7184 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { 7185 size_t i; 7186 if (table->size == 0) { 7187 size_t tsize; 7188 if (! createSize) 7189 return NULL; 7190 table->power = INIT_POWER; 7191 /* table->size is a power of 2 */ 7192 table->size = (size_t)1 << INIT_POWER; 7193 tsize = table->size * sizeof(NAMED *); 7194 table->v = table->mem->malloc_fcn(tsize); 7195 if (! table->v) { 7196 table->size = 0; 7197 return NULL; 7198 } 7199 memset(table->v, 0, tsize); 7200 i = hash(parser, name) & ((unsigned long)table->size - 1); 7201 } else { 7202 unsigned long h = hash(parser, name); 7203 unsigned long mask = (unsigned long)table->size - 1; 7204 unsigned char step = 0; 7205 i = h & mask; 7206 while (table->v[i]) { 7207 if (keyeq(name, table->v[i]->name)) 7208 return table->v[i]; 7209 if (! step) 7210 step = PROBE_STEP(h, mask, table->power); 7211 i < step ? (i += table->size - step) : (i -= step); 7212 } 7213 if (! createSize) 7214 return NULL; 7215 7216 /* check for overflow (table is half full) */ 7217 if (table->used >> (table->power - 1)) { 7218 unsigned char newPower = table->power + 1; 7219 7220 /* Detect and prevent invalid shift */ 7221 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { 7222 return NULL; 7223 } 7224 7225 size_t newSize = (size_t)1 << newPower; 7226 unsigned long newMask = (unsigned long)newSize - 1; 7227 7228 /* Detect and prevent integer overflow */ 7229 if (newSize > (size_t)(-1) / sizeof(NAMED *)) { 7230 return NULL; 7231 } 7232 7233 size_t tsize = newSize * sizeof(NAMED *); 7234 NAMED **newV = table->mem->malloc_fcn(tsize); 7235 if (! newV) 7236 return NULL; 7237 memset(newV, 0, tsize); 7238 for (i = 0; i < table->size; i++) 7239 if (table->v[i]) { 7240 unsigned long newHash = hash(parser, table->v[i]->name); 7241 size_t j = newHash & newMask; 7242 step = 0; 7243 while (newV[j]) { 7244 if (! step) 7245 step = PROBE_STEP(newHash, newMask, newPower); 7246 j < step ? (j += newSize - step) : (j -= step); 7247 } 7248 newV[j] = table->v[i]; 7249 } 7250 table->mem->free_fcn(table->v); 7251 table->v = newV; 7252 table->power = newPower; 7253 table->size = newSize; 7254 i = h & newMask; 7255 step = 0; 7256 while (table->v[i]) { 7257 if (! step) 7258 step = PROBE_STEP(h, newMask, newPower); 7259 i < step ? (i += newSize - step) : (i -= step); 7260 } 7261 } 7262 } 7263 table->v[i] = table->mem->malloc_fcn(createSize); 7264 if (! table->v[i]) 7265 return NULL; 7266 memset(table->v[i], 0, createSize); 7267 table->v[i]->name = name; 7268 (table->used)++; 7269 return table->v[i]; 7270 } 7271 7272 static void FASTCALL 7273 hashTableClear(HASH_TABLE *table) { 7274 size_t i; 7275 for (i = 0; i < table->size; i++) { 7276 table->mem->free_fcn(table->v[i]); 7277 table->v[i] = NULL; 7278 } 7279 table->used = 0; 7280 } 7281 7282 static void FASTCALL 7283 hashTableDestroy(HASH_TABLE *table) { 7284 size_t i; 7285 for (i = 0; i < table->size; i++) 7286 table->mem->free_fcn(table->v[i]); 7287 table->mem->free_fcn(table->v); 7288 } 7289 7290 static void FASTCALL 7291 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { 7292 p->power = 0; 7293 p->size = 0; 7294 p->used = 0; 7295 p->v = NULL; 7296 p->mem = ms; 7297 } 7298 7299 static void FASTCALL 7300 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { 7301 iter->p = table->v; 7302 iter->end = iter->p ? iter->p + table->size : NULL; 7303 } 7304 7305 static NAMED *FASTCALL 7306 hashTableIterNext(HASH_TABLE_ITER *iter) { 7307 while (iter->p != iter->end) { 7308 NAMED *tem = *(iter->p)++; 7309 if (tem) 7310 return tem; 7311 } 7312 return NULL; 7313 } 7314 7315 static void FASTCALL 7316 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { 7317 pool->blocks = NULL; 7318 pool->freeBlocks = NULL; 7319 pool->start = NULL; 7320 pool->ptr = NULL; 7321 pool->end = NULL; 7322 pool->mem = ms; 7323 } 7324 7325 static void FASTCALL 7326 poolClear(STRING_POOL *pool) { 7327 if (! pool->freeBlocks) 7328 pool->freeBlocks = pool->blocks; 7329 else { 7330 BLOCK *p = pool->blocks; 7331 while (p) { 7332 BLOCK *tem = p->next; 7333 p->next = pool->freeBlocks; 7334 pool->freeBlocks = p; 7335 p = tem; 7336 } 7337 } 7338 pool->blocks = NULL; 7339 pool->start = NULL; 7340 pool->ptr = NULL; 7341 pool->end = NULL; 7342 } 7343 7344 static void FASTCALL 7345 poolDestroy(STRING_POOL *pool) { 7346 BLOCK *p = pool->blocks; 7347 while (p) { 7348 BLOCK *tem = p->next; 7349 pool->mem->free_fcn(p); 7350 p = tem; 7351 } 7352 p = pool->freeBlocks; 7353 while (p) { 7354 BLOCK *tem = p->next; 7355 pool->mem->free_fcn(p); 7356 p = tem; 7357 } 7358 } 7359 7360 static XML_Char * 7361 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 7362 const char *end) { 7363 if (! pool->ptr && ! poolGrow(pool)) 7364 return NULL; 7365 for (;;) { 7366 const enum XML_Convert_Result convert_res = XmlConvert( 7367 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end); 7368 if ((convert_res == XML_CONVERT_COMPLETED) 7369 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 7370 break; 7371 if (! poolGrow(pool)) 7372 return NULL; 7373 } 7374 return pool->start; 7375 } 7376 7377 static const XML_Char *FASTCALL 7378 poolCopyString(STRING_POOL *pool, const XML_Char *s) { 7379 do { 7380 if (! poolAppendChar(pool, *s)) 7381 return NULL; 7382 } while (*s++); 7383 s = pool->start; 7384 poolFinish(pool); 7385 return s; 7386 } 7387 7388 static const XML_Char * 7389 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { 7390 if (! pool->ptr && ! poolGrow(pool)) { 7391 /* The following line is unreachable given the current usage of 7392 * poolCopyStringN(). Currently it is called from exactly one 7393 * place to copy the text of a simple general entity. By that 7394 * point, the name of the entity is already stored in the pool, so 7395 * pool->ptr cannot be NULL. 7396 * 7397 * If poolCopyStringN() is used elsewhere as it well might be, 7398 * this line may well become executable again. Regardless, this 7399 * sort of check shouldn't be removed lightly, so we just exclude 7400 * it from the coverage statistics. 7401 */ 7402 return NULL; /* LCOV_EXCL_LINE */ 7403 } 7404 for (; n > 0; --n, s++) { 7405 if (! poolAppendChar(pool, *s)) 7406 return NULL; 7407 } 7408 s = pool->start; 7409 poolFinish(pool); 7410 return s; 7411 } 7412 7413 static const XML_Char *FASTCALL 7414 poolAppendString(STRING_POOL *pool, const XML_Char *s) { 7415 while (*s) { 7416 if (! poolAppendChar(pool, *s)) 7417 return NULL; 7418 s++; 7419 } 7420 return pool->start; 7421 } 7422 7423 static XML_Char * 7424 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 7425 const char *end) { 7426 if (! poolAppend(pool, enc, ptr, end)) 7427 return NULL; 7428 if (pool->ptr == pool->end && ! poolGrow(pool)) 7429 return NULL; 7430 *(pool->ptr)++ = 0; 7431 return pool->start; 7432 } 7433 7434 static size_t 7435 poolBytesToAllocateFor(int blockSize) { 7436 /* Unprotected math would be: 7437 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); 7438 ** 7439 ** Detect overflow, avoiding _signed_ overflow undefined behavior 7440 ** For a + b * c we check b * c in isolation first, so that addition of a 7441 ** on top has no chance of making us accept a small non-negative number 7442 */ 7443 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ 7444 7445 if (blockSize <= 0) 7446 return 0; 7447 7448 if (blockSize > (int)(INT_MAX / stretch)) 7449 return 0; 7450 7451 { 7452 const int stretchedBlockSize = blockSize * (int)stretch; 7453 const int bytesToAllocate 7454 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); 7455 if (bytesToAllocate < 0) 7456 return 0; 7457 7458 return (size_t)bytesToAllocate; 7459 } 7460 } 7461 7462 static XML_Bool FASTCALL 7463 poolGrow(STRING_POOL *pool) { 7464 if (pool->freeBlocks) { 7465 if (pool->start == 0) { 7466 pool->blocks = pool->freeBlocks; 7467 pool->freeBlocks = pool->freeBlocks->next; 7468 pool->blocks->next = NULL; 7469 pool->start = pool->blocks->s; 7470 pool->end = pool->start + pool->blocks->size; 7471 pool->ptr = pool->start; 7472 return XML_TRUE; 7473 } 7474 if (pool->end - pool->start < pool->freeBlocks->size) { 7475 BLOCK *tem = pool->freeBlocks->next; 7476 pool->freeBlocks->next = pool->blocks; 7477 pool->blocks = pool->freeBlocks; 7478 pool->freeBlocks = tem; 7479 memcpy(pool->blocks->s, pool->start, 7480 (pool->end - pool->start) * sizeof(XML_Char)); 7481 pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 7482 pool->start = pool->blocks->s; 7483 pool->end = pool->start + pool->blocks->size; 7484 return XML_TRUE; 7485 } 7486 } 7487 if (pool->blocks && pool->start == pool->blocks->s) { 7488 BLOCK *temp; 7489 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); 7490 size_t bytesToAllocate; 7491 7492 /* NOTE: Needs to be calculated prior to calling `realloc` 7493 to avoid dangling pointers: */ 7494 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; 7495 7496 if (blockSize < 0) { 7497 /* This condition traps a situation where either more than 7498 * INT_MAX/2 bytes have already been allocated. This isn't 7499 * readily testable, since it is unlikely that an average 7500 * machine will have that much memory, so we exclude it from the 7501 * coverage statistics. 7502 */ 7503 return XML_FALSE; /* LCOV_EXCL_LINE */ 7504 } 7505 7506 bytesToAllocate = poolBytesToAllocateFor(blockSize); 7507 if (bytesToAllocate == 0) 7508 return XML_FALSE; 7509 7510 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, 7511 (unsigned)bytesToAllocate); 7512 if (temp == NULL) 7513 return XML_FALSE; 7514 pool->blocks = temp; 7515 pool->blocks->size = blockSize; 7516 pool->ptr = pool->blocks->s + offsetInsideBlock; 7517 pool->start = pool->blocks->s; 7518 pool->end = pool->start + blockSize; 7519 } else { 7520 BLOCK *tem; 7521 int blockSize = (int)(pool->end - pool->start); 7522 size_t bytesToAllocate; 7523 7524 if (blockSize < 0) { 7525 /* This condition traps a situation where either more than 7526 * INT_MAX bytes have already been allocated (which is prevented 7527 * by various pieces of program logic, not least this one, never 7528 * mind the unlikelihood of actually having that much memory) or 7529 * the pool control fields have been corrupted (which could 7530 * conceivably happen in an extremely buggy user handler 7531 * function). Either way it isn't readily testable, so we 7532 * exclude it from the coverage statistics. 7533 */ 7534 return XML_FALSE; /* LCOV_EXCL_LINE */ 7535 } 7536 7537 if (blockSize < INIT_BLOCK_SIZE) 7538 blockSize = INIT_BLOCK_SIZE; 7539 else { 7540 /* Detect overflow, avoiding _signed_ overflow undefined behavior */ 7541 if ((int)((unsigned)blockSize * 2U) < 0) { 7542 return XML_FALSE; 7543 } 7544 blockSize *= 2; 7545 } 7546 7547 bytesToAllocate = poolBytesToAllocateFor(blockSize); 7548 if (bytesToAllocate == 0) 7549 return XML_FALSE; 7550 7551 tem = pool->mem->malloc_fcn(bytesToAllocate); 7552 if (! tem) 7553 return XML_FALSE; 7554 tem->size = blockSize; 7555 tem->next = pool->blocks; 7556 pool->blocks = tem; 7557 if (pool->ptr != pool->start) 7558 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); 7559 pool->ptr = tem->s + (pool->ptr - pool->start); 7560 pool->start = tem->s; 7561 pool->end = tem->s + blockSize; 7562 } 7563 return XML_TRUE; 7564 } 7565 7566 static int FASTCALL 7567 nextScaffoldPart(XML_Parser parser) { 7568 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7569 CONTENT_SCAFFOLD *me; 7570 int next; 7571 7572 if (! dtd->scaffIndex) { 7573 /* Detect and prevent integer overflow. 7574 * The preprocessor guard addresses the "always false" warning 7575 * from -Wtype-limits on platforms where 7576 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7577 #if UINT_MAX >= SIZE_MAX 7578 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { 7579 return -1; 7580 } 7581 #endif 7582 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); 7583 if (! dtd->scaffIndex) 7584 return -1; 7585 dtd->scaffIndex[0] = 0; 7586 } 7587 7588 if (dtd->scaffCount >= dtd->scaffSize) { 7589 CONTENT_SCAFFOLD *temp; 7590 if (dtd->scaffold) { 7591 /* Detect and prevent integer overflow */ 7592 if (dtd->scaffSize > UINT_MAX / 2u) { 7593 return -1; 7594 } 7595 /* Detect and prevent integer overflow. 7596 * The preprocessor guard addresses the "always false" warning 7597 * from -Wtype-limits on platforms where 7598 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7599 #if UINT_MAX >= SIZE_MAX 7600 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { 7601 return -1; 7602 } 7603 #endif 7604 7605 temp = (CONTENT_SCAFFOLD *)REALLOC( 7606 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); 7607 if (temp == NULL) 7608 return -1; 7609 dtd->scaffSize *= 2; 7610 } else { 7611 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS 7612 * sizeof(CONTENT_SCAFFOLD)); 7613 if (temp == NULL) 7614 return -1; 7615 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; 7616 } 7617 dtd->scaffold = temp; 7618 } 7619 next = dtd->scaffCount++; 7620 me = &dtd->scaffold[next]; 7621 if (dtd->scaffLevel) { 7622 CONTENT_SCAFFOLD *parent 7623 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; 7624 if (parent->lastchild) { 7625 dtd->scaffold[parent->lastchild].nextsib = next; 7626 } 7627 if (! parent->childcnt) 7628 parent->firstchild = next; 7629 parent->lastchild = next; 7630 parent->childcnt++; 7631 } 7632 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; 7633 return next; 7634 } 7635 7636 static XML_Content * 7637 build_model(XML_Parser parser) { 7638 /* Function build_model transforms the existing parser->m_dtd->scaffold 7639 * array of CONTENT_SCAFFOLD tree nodes into a new array of 7640 * XML_Content tree nodes followed by a gapless list of zero-terminated 7641 * strings. */ 7642 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7643 XML_Content *ret; 7644 XML_Char *str; /* the current string writing location */ 7645 7646 /* Detect and prevent integer overflow. 7647 * The preprocessor guard addresses the "always false" warning 7648 * from -Wtype-limits on platforms where 7649 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7650 #if UINT_MAX >= SIZE_MAX 7651 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { 7652 return NULL; 7653 } 7654 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { 7655 return NULL; 7656 } 7657 #endif 7658 if (dtd->scaffCount * sizeof(XML_Content) 7659 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { 7660 return NULL; 7661 } 7662 7663 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) 7664 + (dtd->contentStringLen * sizeof(XML_Char))); 7665 7666 ret = (XML_Content *)MALLOC(parser, allocsize); 7667 if (! ret) 7668 return NULL; 7669 7670 /* What follows is an iterative implementation (of what was previously done 7671 * recursively in a dedicated function called "build_node". The old recursive 7672 * build_node could be forced into stack exhaustion from input as small as a 7673 * few megabyte, and so that was a security issue. Hence, a function call 7674 * stack is avoided now by resolving recursion.) 7675 * 7676 * The iterative approach works as follows: 7677 * 7678 * - We have two writing pointers, both walking up the result array; one does 7679 * the work, the other creates "jobs" for its colleague to do, and leads 7680 * the way: 7681 * 7682 * - The faster one, pointer jobDest, always leads and writes "what job 7683 * to do" by the other, once they reach that place in the 7684 * array: leader "jobDest" stores the source node array index (relative 7685 * to array dtd->scaffold) in field "numchildren". 7686 * 7687 * - The slower one, pointer dest, looks at the value stored in the 7688 * "numchildren" field (which actually holds a source node array index 7689 * at that time) and puts the real data from dtd->scaffold in. 7690 * 7691 * - Before the loop starts, jobDest writes source array index 0 7692 * (where the root node is located) so that dest will have something to do 7693 * when it starts operation. 7694 * 7695 * - Whenever nodes with children are encountered, jobDest appends 7696 * them as new jobs, in order. As a result, tree node siblings are 7697 * adjacent in the resulting array, for example: 7698 * 7699 * [0] root, has two children 7700 * [1] first child of 0, has three children 7701 * [3] first child of 1, does not have children 7702 * [4] second child of 1, does not have children 7703 * [5] third child of 1, does not have children 7704 * [2] second child of 0, does not have children 7705 * 7706 * Or (the same data) presented in flat array view: 7707 * 7708 * [0] root, has two children 7709 * 7710 * [1] first child of 0, has three children 7711 * [2] second child of 0, does not have children 7712 * 7713 * [3] first child of 1, does not have children 7714 * [4] second child of 1, does not have children 7715 * [5] third child of 1, does not have children 7716 * 7717 * - The algorithm repeats until all target array indices have been processed. 7718 */ 7719 XML_Content *dest = ret; /* tree node writing location, moves upwards */ 7720 XML_Content *const destLimit = &ret[dtd->scaffCount]; 7721 XML_Content *jobDest = ret; /* next free writing location in target array */ 7722 str = (XML_Char *)&ret[dtd->scaffCount]; 7723 7724 /* Add the starting job, the root node (index 0) of the source tree */ 7725 (jobDest++)->numchildren = 0; 7726 7727 for (; dest < destLimit; dest++) { 7728 /* Retrieve source tree array index from job storage */ 7729 const int src_node = (int)dest->numchildren; 7730 7731 /* Convert item */ 7732 dest->type = dtd->scaffold[src_node].type; 7733 dest->quant = dtd->scaffold[src_node].quant; 7734 if (dest->type == XML_CTYPE_NAME) { 7735 const XML_Char *src; 7736 dest->name = str; 7737 src = dtd->scaffold[src_node].name; 7738 for (;;) { 7739 *str++ = *src; 7740 if (! *src) 7741 break; 7742 src++; 7743 } 7744 dest->numchildren = 0; 7745 dest->children = NULL; 7746 } else { 7747 unsigned int i; 7748 int cn; 7749 dest->name = NULL; 7750 dest->numchildren = dtd->scaffold[src_node].childcnt; 7751 dest->children = jobDest; 7752 7753 /* Append scaffold indices of children to array */ 7754 for (i = 0, cn = dtd->scaffold[src_node].firstchild; 7755 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) 7756 (jobDest++)->numchildren = (unsigned int)cn; 7757 } 7758 } 7759 7760 return ret; 7761 } 7762 7763 static ELEMENT_TYPE * 7764 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, 7765 const char *end) { 7766 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7767 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); 7768 ELEMENT_TYPE *ret; 7769 7770 if (! name) 7771 return NULL; 7772 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 7773 sizeof(ELEMENT_TYPE)); 7774 if (! ret) 7775 return NULL; 7776 if (ret->name != name) 7777 poolDiscard(&dtd->pool); 7778 else { 7779 poolFinish(&dtd->pool); 7780 if (! setElementTypePrefix(parser, ret)) 7781 return NULL; 7782 } 7783 return ret; 7784 } 7785 7786 static XML_Char * 7787 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { 7788 size_t charsRequired = 0; 7789 XML_Char *result; 7790 7791 /* First determine how long the string is */ 7792 while (s[charsRequired] != 0) { 7793 charsRequired++; 7794 } 7795 /* Include the terminator */ 7796 charsRequired++; 7797 7798 /* Now allocate space for the copy */ 7799 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); 7800 if (result == NULL) 7801 return NULL; 7802 /* Copy the original into place */ 7803 memcpy(result, s, charsRequired * sizeof(XML_Char)); 7804 return result; 7805 } 7806 7807 #if XML_GE == 1 7808 7809 static float 7810 accountingGetCurrentAmplification(XML_Parser rootParser) { 7811 // 1.........1.........12 => 22 7812 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1; 7813 const XmlBigCount countBytesOutput 7814 = rootParser->m_accounting.countBytesDirect 7815 + rootParser->m_accounting.countBytesIndirect; 7816 const float amplificationFactor 7817 = rootParser->m_accounting.countBytesDirect 7818 ? (countBytesOutput 7819 / (float)(rootParser->m_accounting.countBytesDirect)) 7820 : ((lenOfShortestInclude 7821 + rootParser->m_accounting.countBytesIndirect) 7822 / (float)lenOfShortestInclude); 7823 assert(! rootParser->m_parentParser); 7824 return amplificationFactor; 7825 } 7826 7827 static void 7828 accountingReportStats(XML_Parser originParser, const char *epilog) { 7829 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7830 assert(! rootParser->m_parentParser); 7831 7832 if (rootParser->m_accounting.debugLevel == 0u) { 7833 return; 7834 } 7835 7836 const float amplificationFactor 7837 = accountingGetCurrentAmplification(rootParser); 7838 fprintf(stderr, 7839 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( 7840 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", 7841 (void *)rootParser, rootParser->m_accounting.countBytesDirect, 7842 rootParser->m_accounting.countBytesIndirect, 7843 (double)amplificationFactor, epilog); 7844 } 7845 7846 static void 7847 accountingOnAbort(XML_Parser originParser) { 7848 accountingReportStats(originParser, " ABORTING\n"); 7849 } 7850 7851 static void 7852 accountingReportDiff(XML_Parser rootParser, 7853 unsigned int levelsAwayFromRootParser, const char *before, 7854 const char *after, ptrdiff_t bytesMore, int source_line, 7855 enum XML_Account account) { 7856 assert(! rootParser->m_parentParser); 7857 7858 fprintf(stderr, 7859 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", 7860 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", 7861 levelsAwayFromRootParser, source_line, 10, ""); 7862 7863 const char ellipis[] = "[..]"; 7864 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; 7865 const unsigned int contextLength = 10; 7866 7867 /* Note: Performance is of no concern here */ 7868 const char *walker = before; 7869 if ((rootParser->m_accounting.debugLevel >= 3u) 7870 || (after - before) 7871 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { 7872 for (; walker < after; walker++) { 7873 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7874 } 7875 } else { 7876 for (; walker < before + contextLength; walker++) { 7877 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7878 } 7879 fprintf(stderr, ellipis); 7880 walker = after - contextLength; 7881 for (; walker < after; walker++) { 7882 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7883 } 7884 } 7885 fprintf(stderr, "\"\n"); 7886 } 7887 7888 static XML_Bool 7889 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, 7890 const char *after, int source_line, 7891 enum XML_Account account) { 7892 /* Note: We need to check the token type *first* to be sure that 7893 * we can even access variable <after>, safely. 7894 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ 7895 switch (tok) { 7896 case XML_TOK_INVALID: 7897 case XML_TOK_PARTIAL: 7898 case XML_TOK_PARTIAL_CHAR: 7899 case XML_TOK_NONE: 7900 return XML_TRUE; 7901 } 7902 7903 if (account == XML_ACCOUNT_NONE) 7904 return XML_TRUE; /* because these bytes have been accounted for, already */ 7905 7906 unsigned int levelsAwayFromRootParser; 7907 const XML_Parser rootParser 7908 = getRootParserOf(originParser, &levelsAwayFromRootParser); 7909 assert(! rootParser->m_parentParser); 7910 7911 const int isDirect 7912 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); 7913 const ptrdiff_t bytesMore = after - before; 7914 7915 XmlBigCount *const additionTarget 7916 = isDirect ? &rootParser->m_accounting.countBytesDirect 7917 : &rootParser->m_accounting.countBytesIndirect; 7918 7919 /* Detect and avoid integer overflow */ 7920 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) 7921 return XML_FALSE; 7922 *additionTarget += bytesMore; 7923 7924 const XmlBigCount countBytesOutput 7925 = rootParser->m_accounting.countBytesDirect 7926 + rootParser->m_accounting.countBytesIndirect; 7927 const float amplificationFactor 7928 = accountingGetCurrentAmplification(rootParser); 7929 const XML_Bool tolerated 7930 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) 7931 || (amplificationFactor 7932 <= rootParser->m_accounting.maximumAmplificationFactor); 7933 7934 if (rootParser->m_accounting.debugLevel >= 2u) { 7935 accountingReportStats(rootParser, ""); 7936 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, 7937 bytesMore, source_line, account); 7938 } 7939 7940 return tolerated; 7941 } 7942 7943 unsigned long long 7944 testingAccountingGetCountBytesDirect(XML_Parser parser) { 7945 if (! parser) 7946 return 0; 7947 return parser->m_accounting.countBytesDirect; 7948 } 7949 7950 unsigned long long 7951 testingAccountingGetCountBytesIndirect(XML_Parser parser) { 7952 if (! parser) 7953 return 0; 7954 return parser->m_accounting.countBytesIndirect; 7955 } 7956 7957 static void 7958 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, 7959 const char *action, int sourceLine) { 7960 assert(! rootParser->m_parentParser); 7961 if (rootParser->m_entity_stats.debugLevel == 0u) 7962 return; 7963 7964 # if defined(XML_UNICODE) 7965 const char *const entityName = "[..]"; 7966 # else 7967 const char *const entityName = entity->name; 7968 # endif 7969 7970 fprintf( 7971 stderr, 7972 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", 7973 (void *)rootParser, rootParser->m_entity_stats.countEverOpened, 7974 rootParser->m_entity_stats.currentDepth, 7975 rootParser->m_entity_stats.maximumDepthSeen, 7976 (rootParser->m_entity_stats.currentDepth - 1) * 2, "", 7977 entity->is_param ? "%" : "&", entityName, action, entity->textLen, 7978 sourceLine); 7979 } 7980 7981 static void 7982 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { 7983 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7984 assert(! rootParser->m_parentParser); 7985 7986 rootParser->m_entity_stats.countEverOpened++; 7987 rootParser->m_entity_stats.currentDepth++; 7988 if (rootParser->m_entity_stats.currentDepth 7989 > rootParser->m_entity_stats.maximumDepthSeen) { 7990 rootParser->m_entity_stats.maximumDepthSeen++; 7991 } 7992 7993 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); 7994 } 7995 7996 static void 7997 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { 7998 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7999 assert(! rootParser->m_parentParser); 8000 8001 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); 8002 rootParser->m_entity_stats.currentDepth--; 8003 } 8004 8005 static XML_Parser 8006 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { 8007 XML_Parser rootParser = parser; 8008 unsigned int stepsTakenUpwards = 0; 8009 while (rootParser->m_parentParser) { 8010 rootParser = rootParser->m_parentParser; 8011 stepsTakenUpwards++; 8012 } 8013 assert(! rootParser->m_parentParser); 8014 if (outLevelDiff != NULL) { 8015 *outLevelDiff = stepsTakenUpwards; 8016 } 8017 return rootParser; 8018 } 8019 8020 const char * 8021 unsignedCharToPrintable(unsigned char c) { 8022 switch (c) { 8023 case 0: 8024 return "\\0"; 8025 case 1: 8026 return "\\x1"; 8027 case 2: 8028 return "\\x2"; 8029 case 3: 8030 return "\\x3"; 8031 case 4: 8032 return "\\x4"; 8033 case 5: 8034 return "\\x5"; 8035 case 6: 8036 return "\\x6"; 8037 case 7: 8038 return "\\x7"; 8039 case 8: 8040 return "\\x8"; 8041 case 9: 8042 return "\\t"; 8043 case 10: 8044 return "\\n"; 8045 case 11: 8046 return "\\xB"; 8047 case 12: 8048 return "\\xC"; 8049 case 13: 8050 return "\\r"; 8051 case 14: 8052 return "\\xE"; 8053 case 15: 8054 return "\\xF"; 8055 case 16: 8056 return "\\x10"; 8057 case 17: 8058 return "\\x11"; 8059 case 18: 8060 return "\\x12"; 8061 case 19: 8062 return "\\x13"; 8063 case 20: 8064 return "\\x14"; 8065 case 21: 8066 return "\\x15"; 8067 case 22: 8068 return "\\x16"; 8069 case 23: 8070 return "\\x17"; 8071 case 24: 8072 return "\\x18"; 8073 case 25: 8074 return "\\x19"; 8075 case 26: 8076 return "\\x1A"; 8077 case 27: 8078 return "\\x1B"; 8079 case 28: 8080 return "\\x1C"; 8081 case 29: 8082 return "\\x1D"; 8083 case 30: 8084 return "\\x1E"; 8085 case 31: 8086 return "\\x1F"; 8087 case 32: 8088 return " "; 8089 case 33: 8090 return "!"; 8091 case 34: 8092 return "\\\""; 8093 case 35: 8094 return "#"; 8095 case 36: 8096 return "$"; 8097 case 37: 8098 return "%"; 8099 case 38: 8100 return "&"; 8101 case 39: 8102 return "'"; 8103 case 40: 8104 return "("; 8105 case 41: 8106 return ")"; 8107 case 42: 8108 return "*"; 8109 case 43: 8110 return "+"; 8111 case 44: 8112 return ","; 8113 case 45: 8114 return "-"; 8115 case 46: 8116 return "."; 8117 case 47: 8118 return "/"; 8119 case 48: 8120 return "0"; 8121 case 49: 8122 return "1"; 8123 case 50: 8124 return "2"; 8125 case 51: 8126 return "3"; 8127 case 52: 8128 return "4"; 8129 case 53: 8130 return "5"; 8131 case 54: 8132 return "6"; 8133 case 55: 8134 return "7"; 8135 case 56: 8136 return "8"; 8137 case 57: 8138 return "9"; 8139 case 58: 8140 return ":"; 8141 case 59: 8142 return ";"; 8143 case 60: 8144 return "<"; 8145 case 61: 8146 return "="; 8147 case 62: 8148 return ">"; 8149 case 63: 8150 return "?"; 8151 case 64: 8152 return "@"; 8153 case 65: 8154 return "A"; 8155 case 66: 8156 return "B"; 8157 case 67: 8158 return "C"; 8159 case 68: 8160 return "D"; 8161 case 69: 8162 return "E"; 8163 case 70: 8164 return "F"; 8165 case 71: 8166 return "G"; 8167 case 72: 8168 return "H"; 8169 case 73: 8170 return "I"; 8171 case 74: 8172 return "J"; 8173 case 75: 8174 return "K"; 8175 case 76: 8176 return "L"; 8177 case 77: 8178 return "M"; 8179 case 78: 8180 return "N"; 8181 case 79: 8182 return "O"; 8183 case 80: 8184 return "P"; 8185 case 81: 8186 return "Q"; 8187 case 82: 8188 return "R"; 8189 case 83: 8190 return "S"; 8191 case 84: 8192 return "T"; 8193 case 85: 8194 return "U"; 8195 case 86: 8196 return "V"; 8197 case 87: 8198 return "W"; 8199 case 88: 8200 return "X"; 8201 case 89: 8202 return "Y"; 8203 case 90: 8204 return "Z"; 8205 case 91: 8206 return "["; 8207 case 92: 8208 return "\\\\"; 8209 case 93: 8210 return "]"; 8211 case 94: 8212 return "^"; 8213 case 95: 8214 return "_"; 8215 case 96: 8216 return "`"; 8217 case 97: 8218 return "a"; 8219 case 98: 8220 return "b"; 8221 case 99: 8222 return "c"; 8223 case 100: 8224 return "d"; 8225 case 101: 8226 return "e"; 8227 case 102: 8228 return "f"; 8229 case 103: 8230 return "g"; 8231 case 104: 8232 return "h"; 8233 case 105: 8234 return "i"; 8235 case 106: 8236 return "j"; 8237 case 107: 8238 return "k"; 8239 case 108: 8240 return "l"; 8241 case 109: 8242 return "m"; 8243 case 110: 8244 return "n"; 8245 case 111: 8246 return "o"; 8247 case 112: 8248 return "p"; 8249 case 113: 8250 return "q"; 8251 case 114: 8252 return "r"; 8253 case 115: 8254 return "s"; 8255 case 116: 8256 return "t"; 8257 case 117: 8258 return "u"; 8259 case 118: 8260 return "v"; 8261 case 119: 8262 return "w"; 8263 case 120: 8264 return "x"; 8265 case 121: 8266 return "y"; 8267 case 122: 8268 return "z"; 8269 case 123: 8270 return "{"; 8271 case 124: 8272 return "|"; 8273 case 125: 8274 return "}"; 8275 case 126: 8276 return "~"; 8277 case 127: 8278 return "\\x7F"; 8279 case 128: 8280 return "\\x80"; 8281 case 129: 8282 return "\\x81"; 8283 case 130: 8284 return "\\x82"; 8285 case 131: 8286 return "\\x83"; 8287 case 132: 8288 return "\\x84"; 8289 case 133: 8290 return "\\x85"; 8291 case 134: 8292 return "\\x86"; 8293 case 135: 8294 return "\\x87"; 8295 case 136: 8296 return "\\x88"; 8297 case 137: 8298 return "\\x89"; 8299 case 138: 8300 return "\\x8A"; 8301 case 139: 8302 return "\\x8B"; 8303 case 140: 8304 return "\\x8C"; 8305 case 141: 8306 return "\\x8D"; 8307 case 142: 8308 return "\\x8E"; 8309 case 143: 8310 return "\\x8F"; 8311 case 144: 8312 return "\\x90"; 8313 case 145: 8314 return "\\x91"; 8315 case 146: 8316 return "\\x92"; 8317 case 147: 8318 return "\\x93"; 8319 case 148: 8320 return "\\x94"; 8321 case 149: 8322 return "\\x95"; 8323 case 150: 8324 return "\\x96"; 8325 case 151: 8326 return "\\x97"; 8327 case 152: 8328 return "\\x98"; 8329 case 153: 8330 return "\\x99"; 8331 case 154: 8332 return "\\x9A"; 8333 case 155: 8334 return "\\x9B"; 8335 case 156: 8336 return "\\x9C"; 8337 case 157: 8338 return "\\x9D"; 8339 case 158: 8340 return "\\x9E"; 8341 case 159: 8342 return "\\x9F"; 8343 case 160: 8344 return "\\xA0"; 8345 case 161: 8346 return "\\xA1"; 8347 case 162: 8348 return "\\xA2"; 8349 case 163: 8350 return "\\xA3"; 8351 case 164: 8352 return "\\xA4"; 8353 case 165: 8354 return "\\xA5"; 8355 case 166: 8356 return "\\xA6"; 8357 case 167: 8358 return "\\xA7"; 8359 case 168: 8360 return "\\xA8"; 8361 case 169: 8362 return "\\xA9"; 8363 case 170: 8364 return "\\xAA"; 8365 case 171: 8366 return "\\xAB"; 8367 case 172: 8368 return "\\xAC"; 8369 case 173: 8370 return "\\xAD"; 8371 case 174: 8372 return "\\xAE"; 8373 case 175: 8374 return "\\xAF"; 8375 case 176: 8376 return "\\xB0"; 8377 case 177: 8378 return "\\xB1"; 8379 case 178: 8380 return "\\xB2"; 8381 case 179: 8382 return "\\xB3"; 8383 case 180: 8384 return "\\xB4"; 8385 case 181: 8386 return "\\xB5"; 8387 case 182: 8388 return "\\xB6"; 8389 case 183: 8390 return "\\xB7"; 8391 case 184: 8392 return "\\xB8"; 8393 case 185: 8394 return "\\xB9"; 8395 case 186: 8396 return "\\xBA"; 8397 case 187: 8398 return "\\xBB"; 8399 case 188: 8400 return "\\xBC"; 8401 case 189: 8402 return "\\xBD"; 8403 case 190: 8404 return "\\xBE"; 8405 case 191: 8406 return "\\xBF"; 8407 case 192: 8408 return "\\xC0"; 8409 case 193: 8410 return "\\xC1"; 8411 case 194: 8412 return "\\xC2"; 8413 case 195: 8414 return "\\xC3"; 8415 case 196: 8416 return "\\xC4"; 8417 case 197: 8418 return "\\xC5"; 8419 case 198: 8420 return "\\xC6"; 8421 case 199: 8422 return "\\xC7"; 8423 case 200: 8424 return "\\xC8"; 8425 case 201: 8426 return "\\xC9"; 8427 case 202: 8428 return "\\xCA"; 8429 case 203: 8430 return "\\xCB"; 8431 case 204: 8432 return "\\xCC"; 8433 case 205: 8434 return "\\xCD"; 8435 case 206: 8436 return "\\xCE"; 8437 case 207: 8438 return "\\xCF"; 8439 case 208: 8440 return "\\xD0"; 8441 case 209: 8442 return "\\xD1"; 8443 case 210: 8444 return "\\xD2"; 8445 case 211: 8446 return "\\xD3"; 8447 case 212: 8448 return "\\xD4"; 8449 case 213: 8450 return "\\xD5"; 8451 case 214: 8452 return "\\xD6"; 8453 case 215: 8454 return "\\xD7"; 8455 case 216: 8456 return "\\xD8"; 8457 case 217: 8458 return "\\xD9"; 8459 case 218: 8460 return "\\xDA"; 8461 case 219: 8462 return "\\xDB"; 8463 case 220: 8464 return "\\xDC"; 8465 case 221: 8466 return "\\xDD"; 8467 case 222: 8468 return "\\xDE"; 8469 case 223: 8470 return "\\xDF"; 8471 case 224: 8472 return "\\xE0"; 8473 case 225: 8474 return "\\xE1"; 8475 case 226: 8476 return "\\xE2"; 8477 case 227: 8478 return "\\xE3"; 8479 case 228: 8480 return "\\xE4"; 8481 case 229: 8482 return "\\xE5"; 8483 case 230: 8484 return "\\xE6"; 8485 case 231: 8486 return "\\xE7"; 8487 case 232: 8488 return "\\xE8"; 8489 case 233: 8490 return "\\xE9"; 8491 case 234: 8492 return "\\xEA"; 8493 case 235: 8494 return "\\xEB"; 8495 case 236: 8496 return "\\xEC"; 8497 case 237: 8498 return "\\xED"; 8499 case 238: 8500 return "\\xEE"; 8501 case 239: 8502 return "\\xEF"; 8503 case 240: 8504 return "\\xF0"; 8505 case 241: 8506 return "\\xF1"; 8507 case 242: 8508 return "\\xF2"; 8509 case 243: 8510 return "\\xF3"; 8511 case 244: 8512 return "\\xF4"; 8513 case 245: 8514 return "\\xF5"; 8515 case 246: 8516 return "\\xF6"; 8517 case 247: 8518 return "\\xF7"; 8519 case 248: 8520 return "\\xF8"; 8521 case 249: 8522 return "\\xF9"; 8523 case 250: 8524 return "\\xFA"; 8525 case 251: 8526 return "\\xFB"; 8527 case 252: 8528 return "\\xFC"; 8529 case 253: 8530 return "\\xFD"; 8531 case 254: 8532 return "\\xFE"; 8533 case 255: 8534 return "\\xFF"; 8535 default: 8536 assert(0); /* never gets here */ 8537 return "dead code"; 8538 } 8539 assert(0); /* never gets here */ 8540 } 8541 8542 #endif /* XML_GE == 1 */ 8543 8544 static unsigned long 8545 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { 8546 const char *const valueOrNull = getenv(variableName); 8547 if (valueOrNull == NULL) { 8548 return defaultDebugLevel; 8549 } 8550 const char *const value = valueOrNull; 8551 8552 errno = 0; 8553 char *afterValue = NULL; 8554 unsigned long debugLevel = strtoul(value, &afterValue, 10); 8555 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { 8556 errno = 0; 8557 return defaultDebugLevel; 8558 } 8559 8560 return debugLevel; 8561 } 8562