1 /* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+) 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> 13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> 16 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com> 18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> 19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr> 20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl> 22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io> 24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me> 25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com> 26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de> 27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org> 28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org> 32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org> 34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> 35 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> 37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com> 38 Copyright (c) 2022 Jann Horn <jannh@google.com> 39 Copyright (c) 2022 Sean McBride <sean@rogue-research.com> 40 Copyright (c) 2023 Owain Davies <owaind@bath.edu> 41 Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> 42 Licensed under the MIT license: 43 44 Permission is hereby granted, free of charge, to any person obtaining 45 a copy of this software and associated documentation files (the 46 "Software"), to deal in the Software without restriction, including 47 without limitation the rights to use, copy, modify, merge, publish, 48 distribute, sublicense, and/or sell copies of the Software, and to permit 49 persons to whom the Software is furnished to do so, subject to the 50 following conditions: 51 52 The above copyright notice and this permission notice shall be included 53 in all copies or substantial portions of the Software. 54 55 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 56 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 57 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 58 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 59 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 60 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 61 USE OR OTHER DEALINGS IN THE SOFTWARE. 62 */ 63 64 #define XML_BUILDING_EXPAT 1 65 66 #include "expat_config.h" 67 68 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) 69 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) 70 #endif 71 72 #if defined(XML_DTD) && XML_GE == 0 73 # error Either undefine XML_DTD or define XML_GE to 1. 74 #endif 75 76 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \ 77 || (XML_CONTEXT_BYTES + 0 < 0) 78 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) 79 #endif 80 81 #if defined(HAVE_SYSCALL_GETRANDOM) 82 # if ! defined(_GNU_SOURCE) 83 # define _GNU_SOURCE 1 /* syscall prototype */ 84 # endif 85 #endif 86 87 #ifdef _WIN32 88 /* force stdlib to define rand_s() */ 89 # if ! defined(_CRT_RAND_S) 90 # define _CRT_RAND_S 91 # endif 92 #endif 93 94 #include <stdbool.h> 95 #include <stddef.h> 96 #include <string.h> /* memset(), memcpy() */ 97 #include <assert.h> 98 #include <limits.h> /* UINT_MAX */ 99 #include <stdio.h> /* fprintf */ 100 #include <stdlib.h> /* getenv, rand_s */ 101 #include <stdint.h> /* uintptr_t */ 102 #include <math.h> /* isnan */ 103 104 #ifdef _WIN32 105 # define getpid GetCurrentProcessId 106 #else 107 # include <sys/time.h> /* gettimeofday() */ 108 # include <sys/types.h> /* getpid() */ 109 # include <unistd.h> /* getpid() */ 110 # include <fcntl.h> /* O_RDONLY */ 111 # include <errno.h> 112 #endif 113 114 #ifdef _WIN32 115 # include "winconfig.h" 116 #endif 117 118 #include "ascii.h" 119 #include "expat.h" 120 #include "siphash.h" 121 122 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 123 # if defined(HAVE_GETRANDOM) 124 # include <sys/random.h> /* getrandom */ 125 # else 126 # include <unistd.h> /* syscall */ 127 # include <sys/syscall.h> /* SYS_getrandom */ 128 # endif 129 # if ! defined(GRND_NONBLOCK) 130 # define GRND_NONBLOCK 0x0001 131 # endif /* defined(GRND_NONBLOCK) */ 132 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 133 134 #if defined(HAVE_LIBBSD) \ 135 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) 136 # include <bsd/stdlib.h> 137 #endif 138 139 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) 140 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 141 #endif 142 143 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ 144 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ 145 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ 146 && ! defined(XML_POOR_ENTROPY) 147 # error You do not have support for any sources of high quality entropy \ 148 enabled. For end user security, that is probably not what you want. \ 149 \ 150 Your options include: \ 151 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ 152 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ 153 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ 154 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ 155 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ 156 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ 157 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ 158 * Windows >=Vista (rand_s): _WIN32. \ 159 \ 160 If insist on not using any of these, bypass this error by defining \ 161 XML_POOR_ENTROPY; you have been warned. \ 162 \ 163 If you have reasons to patch this detection code away or need changes \ 164 to the build system, please open a bug. Thank you! 165 #endif 166 167 #ifdef XML_UNICODE 168 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX 169 # define XmlConvert XmlUtf16Convert 170 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding 171 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS 172 # define XmlEncode XmlUtf16Encode 173 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) 174 typedef unsigned short ICHAR; 175 #else 176 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX 177 # define XmlConvert XmlUtf8Convert 178 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding 179 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS 180 # define XmlEncode XmlUtf8Encode 181 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8) 182 typedef char ICHAR; 183 #endif 184 185 #ifndef XML_NS 186 187 # define XmlInitEncodingNS XmlInitEncoding 188 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding 189 # undef XmlGetInternalEncodingNS 190 # define XmlGetInternalEncodingNS XmlGetInternalEncoding 191 # define XmlParseXmlDeclNS XmlParseXmlDecl 192 193 #endif 194 195 #ifdef XML_UNICODE 196 197 # ifdef XML_UNICODE_WCHAR_T 198 # define XML_T(x) (const wchar_t) x 199 # define XML_L(x) L##x 200 # else 201 # define XML_T(x) (const unsigned short)x 202 # define XML_L(x) x 203 # endif 204 205 #else 206 207 # define XML_T(x) x 208 # define XML_L(x) x 209 210 #endif 211 212 /* Round up n to be a multiple of sz, where sz is a power of 2. */ 213 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) 214 215 /* Do safe (NULL-aware) pointer arithmetic */ 216 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) 217 218 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) 219 220 #include "internal.h" 221 #include "xmltok.h" 222 #include "xmlrole.h" 223 224 typedef const XML_Char *KEY; 225 226 typedef struct { 227 KEY name; 228 } NAMED; 229 230 typedef struct { 231 NAMED **v; 232 unsigned char power; 233 size_t size; 234 size_t used; 235 const XML_Memory_Handling_Suite *mem; 236 } HASH_TABLE; 237 238 static size_t keylen(KEY s); 239 240 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); 241 242 /* For probing (after a collision) we need a step size relative prime 243 to the hash table size, which is a power of 2. We use double-hashing, 244 since we can calculate a second hash value cheaply by taking those bits 245 of the first hash value that were discarded (masked out) when the table 246 index was calculated: index = hash & mask, where mask = table->size - 1. 247 We limit the maximum step size to table->size / 4 (mask >> 2) and make 248 it odd, since odd numbers are always relative prime to a power of 2. 249 */ 250 #define SECOND_HASH(hash, mask, power) \ 251 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) 252 #define PROBE_STEP(hash, mask, power) \ 253 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) 254 255 typedef struct { 256 NAMED **p; 257 NAMED **end; 258 } HASH_TABLE_ITER; 259 260 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ 261 #define INIT_DATA_BUF_SIZE 1024 262 #define INIT_ATTS_SIZE 16 263 #define INIT_ATTS_VERSION 0xFFFFFFFF 264 #define INIT_BLOCK_SIZE 1024 265 #define INIT_BUFFER_SIZE 1024 266 267 #define EXPAND_SPARE 24 268 269 typedef struct binding { 270 struct prefix *prefix; 271 struct binding *nextTagBinding; 272 struct binding *prevPrefixBinding; 273 const struct attribute_id *attId; 274 XML_Char *uri; 275 int uriLen; 276 int uriAlloc; 277 } BINDING; 278 279 typedef struct prefix { 280 const XML_Char *name; 281 BINDING *binding; 282 } PREFIX; 283 284 typedef struct { 285 const XML_Char *str; 286 const XML_Char *localPart; 287 const XML_Char *prefix; 288 int strLen; 289 int uriLen; 290 int prefixLen; 291 } TAG_NAME; 292 293 /* TAG represents an open element. 294 The name of the element is stored in both the document and API 295 encodings. The memory buffer 'buf' is a separately-allocated 296 memory area which stores the name. During the XML_Parse()/ 297 XMLParseBuffer() when the element is open, the memory for the 'raw' 298 version of the name (in the document encoding) is shared with the 299 document buffer. If the element is open across calls to 300 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to 301 contain the 'raw' name as well. 302 303 A parser reuses these structures, maintaining a list of allocated 304 TAG objects in a free list. 305 */ 306 typedef struct tag { 307 struct tag *parent; /* parent of this element */ 308 const char *rawName; /* tagName in the original encoding */ 309 int rawNameLength; 310 TAG_NAME name; /* tagName in the API encoding */ 311 char *buf; /* buffer for name components */ 312 char *bufEnd; /* end of the buffer */ 313 BINDING *bindings; 314 } TAG; 315 316 typedef struct { 317 const XML_Char *name; 318 const XML_Char *textPtr; 319 int textLen; /* length in XML_Chars */ 320 int processed; /* # of processed bytes - when suspended */ 321 const XML_Char *systemId; 322 const XML_Char *base; 323 const XML_Char *publicId; 324 const XML_Char *notation; 325 XML_Bool open; 326 XML_Bool is_param; 327 XML_Bool is_internal; /* true if declared in internal subset outside PE */ 328 } ENTITY; 329 330 typedef struct { 331 enum XML_Content_Type type; 332 enum XML_Content_Quant quant; 333 const XML_Char *name; 334 int firstchild; 335 int lastchild; 336 int childcnt; 337 int nextsib; 338 } CONTENT_SCAFFOLD; 339 340 #define INIT_SCAFFOLD_ELEMENTS 32 341 342 typedef struct block { 343 struct block *next; 344 int size; 345 XML_Char s[1]; 346 } BLOCK; 347 348 typedef struct { 349 BLOCK *blocks; 350 BLOCK *freeBlocks; 351 const XML_Char *end; 352 XML_Char *ptr; 353 XML_Char *start; 354 const XML_Memory_Handling_Suite *mem; 355 } STRING_POOL; 356 357 /* The XML_Char before the name is used to determine whether 358 an attribute has been specified. */ 359 typedef struct attribute_id { 360 XML_Char *name; 361 PREFIX *prefix; 362 XML_Bool maybeTokenized; 363 XML_Bool xmlns; 364 } ATTRIBUTE_ID; 365 366 typedef struct { 367 const ATTRIBUTE_ID *id; 368 XML_Bool isCdata; 369 const XML_Char *value; 370 } DEFAULT_ATTRIBUTE; 371 372 typedef struct { 373 unsigned long version; 374 unsigned long hash; 375 const XML_Char *uriName; 376 } NS_ATT; 377 378 typedef struct { 379 const XML_Char *name; 380 PREFIX *prefix; 381 const ATTRIBUTE_ID *idAtt; 382 int nDefaultAtts; 383 int allocDefaultAtts; 384 DEFAULT_ATTRIBUTE *defaultAtts; 385 } ELEMENT_TYPE; 386 387 typedef struct { 388 HASH_TABLE generalEntities; 389 HASH_TABLE elementTypes; 390 HASH_TABLE attributeIds; 391 HASH_TABLE prefixes; 392 STRING_POOL pool; 393 STRING_POOL entityValuePool; 394 /* false once a parameter entity reference has been skipped */ 395 XML_Bool keepProcessing; 396 /* true once an internal or external PE reference has been encountered; 397 this includes the reference to an external subset */ 398 XML_Bool hasParamEntityRefs; 399 XML_Bool standalone; 400 #ifdef XML_DTD 401 /* indicates if external PE has been read */ 402 XML_Bool paramEntityRead; 403 HASH_TABLE paramEntities; 404 #endif /* XML_DTD */ 405 PREFIX defaultPrefix; 406 /* === scaffolding for building content model === */ 407 XML_Bool in_eldecl; 408 CONTENT_SCAFFOLD *scaffold; 409 unsigned contentStringLen; 410 unsigned scaffSize; 411 unsigned scaffCount; 412 int scaffLevel; 413 int *scaffIndex; 414 } DTD; 415 416 typedef struct open_internal_entity { 417 const char *internalEventPtr; 418 const char *internalEventEndPtr; 419 struct open_internal_entity *next; 420 ENTITY *entity; 421 int startTagLevel; 422 XML_Bool betweenDecl; /* WFC: PE Between Declarations */ 423 } OPEN_INTERNAL_ENTITY; 424 425 enum XML_Account { 426 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ 427 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity 428 expansion */ 429 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ 430 }; 431 432 #if XML_GE == 1 433 typedef unsigned long long XmlBigCount; 434 typedef struct accounting { 435 XmlBigCount countBytesDirect; 436 XmlBigCount countBytesIndirect; 437 unsigned long debugLevel; 438 float maximumAmplificationFactor; // >=1.0 439 unsigned long long activationThresholdBytes; 440 } ACCOUNTING; 441 442 typedef struct entity_stats { 443 unsigned int countEverOpened; 444 unsigned int currentDepth; 445 unsigned int maximumDepthSeen; 446 unsigned long debugLevel; 447 } ENTITY_STATS; 448 #endif /* XML_GE == 1 */ 449 450 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, 451 const char *end, const char **endPtr); 452 453 static Processor prologProcessor; 454 static Processor prologInitProcessor; 455 static Processor contentProcessor; 456 static Processor cdataSectionProcessor; 457 #ifdef XML_DTD 458 static Processor ignoreSectionProcessor; 459 static Processor externalParEntProcessor; 460 static Processor externalParEntInitProcessor; 461 static Processor entityValueProcessor; 462 static Processor entityValueInitProcessor; 463 #endif /* XML_DTD */ 464 static Processor epilogProcessor; 465 static Processor errorProcessor; 466 static Processor externalEntityInitProcessor; 467 static Processor externalEntityInitProcessor2; 468 static Processor externalEntityInitProcessor3; 469 static Processor externalEntityContentProcessor; 470 static Processor internalEntityProcessor; 471 472 static enum XML_Error handleUnknownEncoding(XML_Parser parser, 473 const XML_Char *encodingName); 474 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, 475 const char *s, const char *next); 476 static enum XML_Error initializeEncoding(XML_Parser parser); 477 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, 478 const char *s, const char *end, int tok, 479 const char *next, const char **nextPtr, 480 XML_Bool haveMore, XML_Bool allowClosingDoctype, 481 enum XML_Account account); 482 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, 483 XML_Bool betweenDecl); 484 static enum XML_Error doContent(XML_Parser parser, int startTagLevel, 485 const ENCODING *enc, const char *start, 486 const char *end, const char **endPtr, 487 XML_Bool haveMore, enum XML_Account account); 488 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, 489 const char **startPtr, const char *end, 490 const char **nextPtr, XML_Bool haveMore, 491 enum XML_Account account); 492 #ifdef XML_DTD 493 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, 494 const char **startPtr, const char *end, 495 const char **nextPtr, XML_Bool haveMore); 496 #endif /* XML_DTD */ 497 498 static void freeBindings(XML_Parser parser, BINDING *bindings); 499 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, 500 const char *attStr, TAG_NAME *tagNamePtr, 501 BINDING **bindingsPtr, 502 enum XML_Account account); 503 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, 504 const ATTRIBUTE_ID *attId, const XML_Char *uri, 505 BINDING **bindingsPtr); 506 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, 507 XML_Bool isCdata, XML_Bool isId, 508 const XML_Char *value, XML_Parser parser); 509 static enum XML_Error storeAttributeValue(XML_Parser parser, 510 const ENCODING *enc, XML_Bool isCdata, 511 const char *ptr, const char *end, 512 STRING_POOL *pool, 513 enum XML_Account account); 514 static enum XML_Error appendAttributeValue(XML_Parser parser, 515 const ENCODING *enc, 516 XML_Bool isCdata, const char *ptr, 517 const char *end, STRING_POOL *pool, 518 enum XML_Account account); 519 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, 520 const char *start, const char *end); 521 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); 522 #if XML_GE == 1 523 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, 524 const char *start, const char *end, 525 enum XML_Account account); 526 #else 527 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); 528 #endif 529 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 530 const char *start, const char *end); 531 static int reportComment(XML_Parser parser, const ENCODING *enc, 532 const char *start, const char *end); 533 static void reportDefault(XML_Parser parser, const ENCODING *enc, 534 const char *start, const char *end); 535 536 static const XML_Char *getContext(XML_Parser parser); 537 static XML_Bool setContext(XML_Parser parser, const XML_Char *context); 538 539 static void FASTCALL normalizePublicId(XML_Char *s); 540 541 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); 542 /* do not call if m_parentParser != NULL */ 543 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); 544 static void dtdDestroy(DTD *p, XML_Bool isDocEntity, 545 const XML_Memory_Handling_Suite *ms); 546 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 547 const XML_Memory_Handling_Suite *ms); 548 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 549 STRING_POOL *newPool, const HASH_TABLE *oldTable); 550 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, 551 size_t createSize); 552 static void FASTCALL hashTableInit(HASH_TABLE *table, 553 const XML_Memory_Handling_Suite *ms); 554 static void FASTCALL hashTableClear(HASH_TABLE *table); 555 static void FASTCALL hashTableDestroy(HASH_TABLE *table); 556 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, 557 const HASH_TABLE *table); 558 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); 559 560 static void FASTCALL poolInit(STRING_POOL *pool, 561 const XML_Memory_Handling_Suite *ms); 562 static void FASTCALL poolClear(STRING_POOL *pool); 563 static void FASTCALL poolDestroy(STRING_POOL *pool); 564 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, 565 const char *ptr, const char *end); 566 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, 567 const char *ptr, const char *end); 568 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); 569 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, 570 const XML_Char *s); 571 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, 572 int n); 573 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, 574 const XML_Char *s); 575 576 static int FASTCALL nextScaffoldPart(XML_Parser parser); 577 static XML_Content *build_model(XML_Parser parser); 578 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, 579 const char *ptr, const char *end); 580 581 static XML_Char *copyString(const XML_Char *s, 582 const XML_Memory_Handling_Suite *memsuite); 583 584 static unsigned long generate_hash_secret_salt(XML_Parser parser); 585 static XML_Bool startParsing(XML_Parser parser); 586 587 static XML_Parser parserCreate(const XML_Char *encodingName, 588 const XML_Memory_Handling_Suite *memsuite, 589 const XML_Char *nameSep, DTD *dtd); 590 591 static void parserInit(XML_Parser parser, const XML_Char *encodingName); 592 593 #if XML_GE == 1 594 static float accountingGetCurrentAmplification(XML_Parser rootParser); 595 static void accountingReportStats(XML_Parser originParser, const char *epilog); 596 static void accountingOnAbort(XML_Parser originParser); 597 static void accountingReportDiff(XML_Parser rootParser, 598 unsigned int levelsAwayFromRootParser, 599 const char *before, const char *after, 600 ptrdiff_t bytesMore, int source_line, 601 enum XML_Account account); 602 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, 603 const char *before, const char *after, 604 int source_line, 605 enum XML_Account account); 606 607 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, 608 const char *action, int sourceLine); 609 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, 610 int sourceLine); 611 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, 612 int sourceLine); 613 614 static XML_Parser getRootParserOf(XML_Parser parser, 615 unsigned int *outLevelDiff); 616 #endif /* XML_GE == 1 */ 617 618 static unsigned long getDebugLevel(const char *variableName, 619 unsigned long defaultDebugLevel); 620 621 #define poolStart(pool) ((pool)->start) 622 #define poolLength(pool) ((pool)->ptr - (pool)->start) 623 #define poolChop(pool) ((void)--(pool->ptr)) 624 #define poolLastChar(pool) (((pool)->ptr)[-1]) 625 #define poolDiscard(pool) ((pool)->ptr = (pool)->start) 626 #define poolFinish(pool) ((pool)->start = (pool)->ptr) 627 #define poolAppendChar(pool, c) \ 628 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ 629 ? 0 \ 630 : ((*((pool)->ptr)++ = c), 1)) 631 632 XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c 633 unsigned int g_parseAttempts = 0; // used for testing only 634 635 struct XML_ParserStruct { 636 /* The first member must be m_userData so that the XML_GetUserData 637 macro works. */ 638 void *m_userData; 639 void *m_handlerArg; 640 641 // How the four parse buffer pointers below relate in time and space: 642 // 643 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim 644 // | | | | 645 // <--parsed-->| | | 646 // <---parsing--->| | 647 // <--unoccupied-->| 648 // <---------total-malloced/realloced-------->| 649 650 char *m_buffer; // malloc/realloc base pointer of parse buffer 651 const XML_Memory_Handling_Suite m_mem; 652 const char *m_bufferPtr; // first character to be parsed 653 char *m_bufferEnd; // past last character to be parsed 654 const char *m_bufferLim; // allocated end of m_buffer 655 656 XML_Index m_parseEndByteIndex; 657 const char *m_parseEndPtr; 658 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ 659 XML_Bool m_reparseDeferralEnabled; 660 int m_lastBufferRequestSize; 661 XML_Char *m_dataBuf; 662 XML_Char *m_dataBufEnd; 663 XML_StartElementHandler m_startElementHandler; 664 XML_EndElementHandler m_endElementHandler; 665 XML_CharacterDataHandler m_characterDataHandler; 666 XML_ProcessingInstructionHandler m_processingInstructionHandler; 667 XML_CommentHandler m_commentHandler; 668 XML_StartCdataSectionHandler m_startCdataSectionHandler; 669 XML_EndCdataSectionHandler m_endCdataSectionHandler; 670 XML_DefaultHandler m_defaultHandler; 671 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; 672 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; 673 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; 674 XML_NotationDeclHandler m_notationDeclHandler; 675 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; 676 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; 677 XML_NotStandaloneHandler m_notStandaloneHandler; 678 XML_ExternalEntityRefHandler m_externalEntityRefHandler; 679 XML_Parser m_externalEntityRefHandlerArg; 680 XML_SkippedEntityHandler m_skippedEntityHandler; 681 XML_UnknownEncodingHandler m_unknownEncodingHandler; 682 XML_ElementDeclHandler m_elementDeclHandler; 683 XML_AttlistDeclHandler m_attlistDeclHandler; 684 XML_EntityDeclHandler m_entityDeclHandler; 685 XML_XmlDeclHandler m_xmlDeclHandler; 686 const ENCODING *m_encoding; 687 INIT_ENCODING m_initEncoding; 688 const ENCODING *m_internalEncoding; 689 const XML_Char *m_protocolEncodingName; 690 XML_Bool m_ns; 691 XML_Bool m_ns_triplets; 692 void *m_unknownEncodingMem; 693 void *m_unknownEncodingData; 694 void *m_unknownEncodingHandlerData; 695 void(XMLCALL *m_unknownEncodingRelease)(void *); 696 PROLOG_STATE m_prologState; 697 Processor *m_processor; 698 enum XML_Error m_errorCode; 699 const char *m_eventPtr; 700 const char *m_eventEndPtr; 701 const char *m_positionPtr; 702 OPEN_INTERNAL_ENTITY *m_openInternalEntities; 703 OPEN_INTERNAL_ENTITY *m_freeInternalEntities; 704 XML_Bool m_defaultExpandInternalEntities; 705 int m_tagLevel; 706 ENTITY *m_declEntity; 707 const XML_Char *m_doctypeName; 708 const XML_Char *m_doctypeSysid; 709 const XML_Char *m_doctypePubid; 710 const XML_Char *m_declAttributeType; 711 const XML_Char *m_declNotationName; 712 const XML_Char *m_declNotationPublicId; 713 ELEMENT_TYPE *m_declElementType; 714 ATTRIBUTE_ID *m_declAttributeId; 715 XML_Bool m_declAttributeIsCdata; 716 XML_Bool m_declAttributeIsId; 717 DTD *m_dtd; 718 const XML_Char *m_curBase; 719 TAG *m_tagStack; 720 TAG *m_freeTagList; 721 BINDING *m_inheritedBindings; 722 BINDING *m_freeBindingList; 723 int m_attsSize; 724 int m_nSpecifiedAtts; 725 int m_idAttIndex; 726 ATTRIBUTE *m_atts; 727 NS_ATT *m_nsAtts; 728 unsigned long m_nsAttsVersion; 729 unsigned char m_nsAttsPower; 730 #ifdef XML_ATTR_INFO 731 XML_AttrInfo *m_attInfo; 732 #endif 733 POSITION m_position; 734 STRING_POOL m_tempPool; 735 STRING_POOL m_temp2Pool; 736 char *m_groupConnector; 737 unsigned int m_groupSize; 738 XML_Char m_namespaceSeparator; 739 XML_Parser m_parentParser; 740 XML_ParsingStatus m_parsingStatus; 741 #ifdef XML_DTD 742 XML_Bool m_isParamEntity; 743 XML_Bool m_useForeignDTD; 744 enum XML_ParamEntityParsing m_paramEntityParsing; 745 #endif 746 unsigned long m_hash_secret_salt; 747 #if XML_GE == 1 748 ACCOUNTING m_accounting; 749 ENTITY_STATS m_entity_stats; 750 #endif 751 }; 752 753 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) 754 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) 755 #define FREE(parser, p) (parser->m_mem.free_fcn((p))) 756 757 XML_Parser XMLCALL 758 XML_ParserCreate(const XML_Char *encodingName) { 759 return XML_ParserCreate_MM(encodingName, NULL, NULL); 760 } 761 762 XML_Parser XMLCALL 763 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { 764 XML_Char tmp[2] = {nsSep, 0}; 765 return XML_ParserCreate_MM(encodingName, NULL, tmp); 766 } 767 768 // "xml=http://www.w3.org/XML/1998/namespace" 769 static const XML_Char implicitContext[] 770 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, 771 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 772 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, 773 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, 774 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, 775 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, 776 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, 777 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, 778 '\0'}; 779 780 /* To avoid warnings about unused functions: */ 781 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 782 783 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 784 785 /* Obtain entropy on Linux 3.17+ */ 786 static int 787 writeRandomBytes_getrandom_nonblock(void *target, size_t count) { 788 int success = 0; /* full count bytes written? */ 789 size_t bytesWrittenTotal = 0; 790 const unsigned int getrandomFlags = GRND_NONBLOCK; 791 792 do { 793 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 794 const size_t bytesToWrite = count - bytesWrittenTotal; 795 796 const int bytesWrittenMore = 797 # if defined(HAVE_GETRANDOM) 798 getrandom(currentTarget, bytesToWrite, getrandomFlags); 799 # else 800 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); 801 # endif 802 803 if (bytesWrittenMore > 0) { 804 bytesWrittenTotal += bytesWrittenMore; 805 if (bytesWrittenTotal >= count) 806 success = 1; 807 } 808 } while (! success && (errno == EINTR)); 809 810 return success; 811 } 812 813 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ 814 815 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 816 817 /* Extract entropy from /dev/urandom */ 818 static int 819 writeRandomBytes_dev_urandom(void *target, size_t count) { 820 int success = 0; /* full count bytes written? */ 821 size_t bytesWrittenTotal = 0; 822 823 const int fd = open("/dev/urandom", O_RDONLY); 824 if (fd < 0) { 825 return 0; 826 } 827 828 do { 829 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); 830 const size_t bytesToWrite = count - bytesWrittenTotal; 831 832 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); 833 834 if (bytesWrittenMore > 0) { 835 bytesWrittenTotal += bytesWrittenMore; 836 if (bytesWrittenTotal >= count) 837 success = 1; 838 } 839 } while (! success && (errno == EINTR)); 840 841 close(fd); 842 return success; 843 } 844 845 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 846 847 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 848 849 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) 850 851 static void 852 writeRandomBytes_arc4random(void *target, size_t count) { 853 size_t bytesWrittenTotal = 0; 854 855 while (bytesWrittenTotal < count) { 856 const uint32_t random32 = arc4random(); 857 size_t i = 0; 858 859 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 860 i++, bytesWrittenTotal++) { 861 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 862 ((uint8_t *)target)[bytesWrittenTotal] = random8; 863 } 864 } 865 } 866 867 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ 868 869 #ifdef _WIN32 870 871 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), 872 as it didn't declare it in its header prior to version 5.3.0 of its 873 runtime package (mingwrt, containing stdlib.h). The upstream fix 874 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ 875 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ 876 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) 877 __declspec(dllimport) int rand_s(unsigned int *); 878 # endif 879 880 /* Obtain entropy on Windows using the rand_s() function which 881 * generates cryptographically secure random numbers. Internally it 882 * uses RtlGenRandom API which is present in Windows XP and later. 883 */ 884 static int 885 writeRandomBytes_rand_s(void *target, size_t count) { 886 size_t bytesWrittenTotal = 0; 887 888 while (bytesWrittenTotal < count) { 889 unsigned int random32 = 0; 890 size_t i = 0; 891 892 if (rand_s(&random32)) 893 return 0; /* failure */ 894 895 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); 896 i++, bytesWrittenTotal++) { 897 const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); 898 ((uint8_t *)target)[bytesWrittenTotal] = random8; 899 } 900 } 901 return 1; /* success */ 902 } 903 904 #endif /* _WIN32 */ 905 906 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) 907 908 static unsigned long 909 gather_time_entropy(void) { 910 # ifdef _WIN32 911 FILETIME ft; 912 GetSystemTimeAsFileTime(&ft); /* never fails */ 913 return ft.dwHighDateTime ^ ft.dwLowDateTime; 914 # else 915 struct timeval tv; 916 int gettimeofday_res; 917 918 gettimeofday_res = gettimeofday(&tv, NULL); 919 920 # if defined(NDEBUG) 921 (void)gettimeofday_res; 922 # else 923 assert(gettimeofday_res == 0); 924 # endif /* defined(NDEBUG) */ 925 926 /* Microseconds time is <20 bits entropy */ 927 return tv.tv_usec; 928 # endif 929 } 930 931 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ 932 933 static unsigned long 934 ENTROPY_DEBUG(const char *label, unsigned long entropy) { 935 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { 936 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, 937 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); 938 } 939 return entropy; 940 } 941 942 static unsigned long 943 generate_hash_secret_salt(XML_Parser parser) { 944 unsigned long entropy; 945 (void)parser; 946 947 /* "Failproof" high quality providers: */ 948 #if defined(HAVE_ARC4RANDOM_BUF) 949 arc4random_buf(&entropy, sizeof(entropy)); 950 return ENTROPY_DEBUG("arc4random_buf", entropy); 951 #elif defined(HAVE_ARC4RANDOM) 952 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); 953 return ENTROPY_DEBUG("arc4random", entropy); 954 #else 955 /* Try high quality providers first .. */ 956 # ifdef _WIN32 957 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { 958 return ENTROPY_DEBUG("rand_s", entropy); 959 } 960 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) 961 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { 962 return ENTROPY_DEBUG("getrandom", entropy); 963 } 964 # endif 965 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) 966 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { 967 return ENTROPY_DEBUG("/dev/urandom", entropy); 968 } 969 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ 970 /* .. and self-made low quality for backup: */ 971 972 /* Process ID is 0 bits entropy if attacker has local access */ 973 entropy = gather_time_entropy() ^ getpid(); 974 975 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ 976 if (sizeof(unsigned long) == 4) { 977 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); 978 } else { 979 return ENTROPY_DEBUG("fallback(8)", 980 entropy * (unsigned long)2305843009213693951ULL); 981 } 982 #endif 983 } 984 985 static unsigned long 986 get_hash_secret_salt(XML_Parser parser) { 987 if (parser->m_parentParser != NULL) 988 return get_hash_secret_salt(parser->m_parentParser); 989 return parser->m_hash_secret_salt; 990 } 991 992 static enum XML_Error 993 callProcessor(XML_Parser parser, const char *start, const char *end, 994 const char **endPtr) { 995 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); 996 997 if (parser->m_reparseDeferralEnabled 998 && ! parser->m_parsingStatus.finalBuffer) { 999 // Heuristic: don't try to parse a partial token again until the amount of 1000 // available data has increased significantly. 1001 const size_t had_before = parser->m_partialTokenBytesBefore; 1002 // ...but *do* try anyway if we're close to causing a reallocation. 1003 size_t available_buffer 1004 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 1005 #if XML_CONTEXT_BYTES > 0 1006 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); 1007 #endif 1008 available_buffer 1009 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); 1010 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok 1011 const bool enough 1012 = (have_now >= 2 * had_before) 1013 || ((size_t)parser->m_lastBufferRequestSize > available_buffer); 1014 1015 if (! enough) { 1016 *endPtr = start; // callers may expect this to be set 1017 return XML_ERROR_NONE; 1018 } 1019 } 1020 g_parseAttempts += 1; 1021 const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); 1022 if (ret == XML_ERROR_NONE) { 1023 // if we consumed nothing, remember what we had on this parse attempt. 1024 if (*endPtr == start) { 1025 parser->m_partialTokenBytesBefore = have_now; 1026 } else { 1027 parser->m_partialTokenBytesBefore = 0; 1028 } 1029 } 1030 return ret; 1031 } 1032 1033 static XML_Bool /* only valid for root parser */ 1034 startParsing(XML_Parser parser) { 1035 /* hash functions must be initialized before setContext() is called */ 1036 if (parser->m_hash_secret_salt == 0) 1037 parser->m_hash_secret_salt = generate_hash_secret_salt(parser); 1038 if (parser->m_ns) { 1039 /* implicit context only set for root parser, since child 1040 parsers (i.e. external entity parsers) will inherit it 1041 */ 1042 return setContext(parser, implicitContext); 1043 } 1044 return XML_TRUE; 1045 } 1046 1047 XML_Parser XMLCALL 1048 XML_ParserCreate_MM(const XML_Char *encodingName, 1049 const XML_Memory_Handling_Suite *memsuite, 1050 const XML_Char *nameSep) { 1051 return parserCreate(encodingName, memsuite, nameSep, NULL); 1052 } 1053 1054 static XML_Parser 1055 parserCreate(const XML_Char *encodingName, 1056 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, 1057 DTD *dtd) { 1058 XML_Parser parser; 1059 1060 if (memsuite) { 1061 XML_Memory_Handling_Suite *mtemp; 1062 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); 1063 if (parser != NULL) { 1064 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1065 mtemp->malloc_fcn = memsuite->malloc_fcn; 1066 mtemp->realloc_fcn = memsuite->realloc_fcn; 1067 mtemp->free_fcn = memsuite->free_fcn; 1068 } 1069 } else { 1070 XML_Memory_Handling_Suite *mtemp; 1071 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); 1072 if (parser != NULL) { 1073 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); 1074 mtemp->malloc_fcn = malloc; 1075 mtemp->realloc_fcn = realloc; 1076 mtemp->free_fcn = free; 1077 } 1078 } 1079 1080 if (! parser) 1081 return parser; 1082 1083 parser->m_buffer = NULL; 1084 parser->m_bufferLim = NULL; 1085 1086 parser->m_attsSize = INIT_ATTS_SIZE; 1087 parser->m_atts 1088 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); 1089 if (parser->m_atts == NULL) { 1090 FREE(parser, parser); 1091 return NULL; 1092 } 1093 #ifdef XML_ATTR_INFO 1094 parser->m_attInfo = (XML_AttrInfo *)MALLOC( 1095 parser, parser->m_attsSize * sizeof(XML_AttrInfo)); 1096 if (parser->m_attInfo == NULL) { 1097 FREE(parser, parser->m_atts); 1098 FREE(parser, parser); 1099 return NULL; 1100 } 1101 #endif 1102 parser->m_dataBuf 1103 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); 1104 if (parser->m_dataBuf == NULL) { 1105 FREE(parser, parser->m_atts); 1106 #ifdef XML_ATTR_INFO 1107 FREE(parser, parser->m_attInfo); 1108 #endif 1109 FREE(parser, parser); 1110 return NULL; 1111 } 1112 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; 1113 1114 if (dtd) 1115 parser->m_dtd = dtd; 1116 else { 1117 parser->m_dtd = dtdCreate(&parser->m_mem); 1118 if (parser->m_dtd == NULL) { 1119 FREE(parser, parser->m_dataBuf); 1120 FREE(parser, parser->m_atts); 1121 #ifdef XML_ATTR_INFO 1122 FREE(parser, parser->m_attInfo); 1123 #endif 1124 FREE(parser, parser); 1125 return NULL; 1126 } 1127 } 1128 1129 parser->m_freeBindingList = NULL; 1130 parser->m_freeTagList = NULL; 1131 parser->m_freeInternalEntities = NULL; 1132 1133 parser->m_groupSize = 0; 1134 parser->m_groupConnector = NULL; 1135 1136 parser->m_unknownEncodingHandler = NULL; 1137 parser->m_unknownEncodingHandlerData = NULL; 1138 1139 parser->m_namespaceSeparator = ASCII_EXCL; 1140 parser->m_ns = XML_FALSE; 1141 parser->m_ns_triplets = XML_FALSE; 1142 1143 parser->m_nsAtts = NULL; 1144 parser->m_nsAttsVersion = 0; 1145 parser->m_nsAttsPower = 0; 1146 1147 parser->m_protocolEncodingName = NULL; 1148 1149 poolInit(&parser->m_tempPool, &(parser->m_mem)); 1150 poolInit(&parser->m_temp2Pool, &(parser->m_mem)); 1151 parserInit(parser, encodingName); 1152 1153 if (encodingName && ! parser->m_protocolEncodingName) { 1154 if (dtd) { 1155 // We need to stop the upcoming call to XML_ParserFree from happily 1156 // destroying parser->m_dtd because the DTD is shared with the parent 1157 // parser and the only guard that keeps XML_ParserFree from destroying 1158 // parser->m_dtd is parser->m_isParamEntity but it will be set to 1159 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). 1160 parser->m_dtd = NULL; 1161 } 1162 XML_ParserFree(parser); 1163 return NULL; 1164 } 1165 1166 if (nameSep) { 1167 parser->m_ns = XML_TRUE; 1168 parser->m_internalEncoding = XmlGetInternalEncodingNS(); 1169 parser->m_namespaceSeparator = *nameSep; 1170 } else { 1171 parser->m_internalEncoding = XmlGetInternalEncoding(); 1172 } 1173 1174 return parser; 1175 } 1176 1177 static void 1178 parserInit(XML_Parser parser, const XML_Char *encodingName) { 1179 parser->m_processor = prologInitProcessor; 1180 XmlPrologStateInit(&parser->m_prologState); 1181 if (encodingName != NULL) { 1182 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); 1183 } 1184 parser->m_curBase = NULL; 1185 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); 1186 parser->m_userData = NULL; 1187 parser->m_handlerArg = NULL; 1188 parser->m_startElementHandler = NULL; 1189 parser->m_endElementHandler = NULL; 1190 parser->m_characterDataHandler = NULL; 1191 parser->m_processingInstructionHandler = NULL; 1192 parser->m_commentHandler = NULL; 1193 parser->m_startCdataSectionHandler = NULL; 1194 parser->m_endCdataSectionHandler = NULL; 1195 parser->m_defaultHandler = NULL; 1196 parser->m_startDoctypeDeclHandler = NULL; 1197 parser->m_endDoctypeDeclHandler = NULL; 1198 parser->m_unparsedEntityDeclHandler = NULL; 1199 parser->m_notationDeclHandler = NULL; 1200 parser->m_startNamespaceDeclHandler = NULL; 1201 parser->m_endNamespaceDeclHandler = NULL; 1202 parser->m_notStandaloneHandler = NULL; 1203 parser->m_externalEntityRefHandler = NULL; 1204 parser->m_externalEntityRefHandlerArg = parser; 1205 parser->m_skippedEntityHandler = NULL; 1206 parser->m_elementDeclHandler = NULL; 1207 parser->m_attlistDeclHandler = NULL; 1208 parser->m_entityDeclHandler = NULL; 1209 parser->m_xmlDeclHandler = NULL; 1210 parser->m_bufferPtr = parser->m_buffer; 1211 parser->m_bufferEnd = parser->m_buffer; 1212 parser->m_parseEndByteIndex = 0; 1213 parser->m_parseEndPtr = NULL; 1214 parser->m_partialTokenBytesBefore = 0; 1215 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; 1216 parser->m_lastBufferRequestSize = 0; 1217 parser->m_declElementType = NULL; 1218 parser->m_declAttributeId = NULL; 1219 parser->m_declEntity = NULL; 1220 parser->m_doctypeName = NULL; 1221 parser->m_doctypeSysid = NULL; 1222 parser->m_doctypePubid = NULL; 1223 parser->m_declAttributeType = NULL; 1224 parser->m_declNotationName = NULL; 1225 parser->m_declNotationPublicId = NULL; 1226 parser->m_declAttributeIsCdata = XML_FALSE; 1227 parser->m_declAttributeIsId = XML_FALSE; 1228 memset(&parser->m_position, 0, sizeof(POSITION)); 1229 parser->m_errorCode = XML_ERROR_NONE; 1230 parser->m_eventPtr = NULL; 1231 parser->m_eventEndPtr = NULL; 1232 parser->m_positionPtr = NULL; 1233 parser->m_openInternalEntities = NULL; 1234 parser->m_defaultExpandInternalEntities = XML_TRUE; 1235 parser->m_tagLevel = 0; 1236 parser->m_tagStack = NULL; 1237 parser->m_inheritedBindings = NULL; 1238 parser->m_nSpecifiedAtts = 0; 1239 parser->m_unknownEncodingMem = NULL; 1240 parser->m_unknownEncodingRelease = NULL; 1241 parser->m_unknownEncodingData = NULL; 1242 parser->m_parentParser = NULL; 1243 parser->m_parsingStatus.parsing = XML_INITIALIZED; 1244 #ifdef XML_DTD 1245 parser->m_isParamEntity = XML_FALSE; 1246 parser->m_useForeignDTD = XML_FALSE; 1247 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 1248 #endif 1249 parser->m_hash_secret_salt = 0; 1250 1251 #if XML_GE == 1 1252 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); 1253 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); 1254 parser->m_accounting.maximumAmplificationFactor 1255 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; 1256 parser->m_accounting.activationThresholdBytes 1257 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; 1258 1259 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); 1260 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); 1261 #endif 1262 } 1263 1264 /* moves list of bindings to m_freeBindingList */ 1265 static void FASTCALL 1266 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { 1267 while (bindings) { 1268 BINDING *b = bindings; 1269 bindings = bindings->nextTagBinding; 1270 b->nextTagBinding = parser->m_freeBindingList; 1271 parser->m_freeBindingList = b; 1272 } 1273 } 1274 1275 XML_Bool XMLCALL 1276 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { 1277 TAG *tStk; 1278 OPEN_INTERNAL_ENTITY *openEntityList; 1279 1280 if (parser == NULL) 1281 return XML_FALSE; 1282 1283 if (parser->m_parentParser) 1284 return XML_FALSE; 1285 /* move m_tagStack to m_freeTagList */ 1286 tStk = parser->m_tagStack; 1287 while (tStk) { 1288 TAG *tag = tStk; 1289 tStk = tStk->parent; 1290 tag->parent = parser->m_freeTagList; 1291 moveToFreeBindingList(parser, tag->bindings); 1292 tag->bindings = NULL; 1293 parser->m_freeTagList = tag; 1294 } 1295 /* move m_openInternalEntities to m_freeInternalEntities */ 1296 openEntityList = parser->m_openInternalEntities; 1297 while (openEntityList) { 1298 OPEN_INTERNAL_ENTITY *openEntity = openEntityList; 1299 openEntityList = openEntity->next; 1300 openEntity->next = parser->m_freeInternalEntities; 1301 parser->m_freeInternalEntities = openEntity; 1302 } 1303 moveToFreeBindingList(parser, parser->m_inheritedBindings); 1304 FREE(parser, parser->m_unknownEncodingMem); 1305 if (parser->m_unknownEncodingRelease) 1306 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1307 poolClear(&parser->m_tempPool); 1308 poolClear(&parser->m_temp2Pool); 1309 FREE(parser, (void *)parser->m_protocolEncodingName); 1310 parser->m_protocolEncodingName = NULL; 1311 parserInit(parser, encodingName); 1312 dtdReset(parser->m_dtd, &parser->m_mem); 1313 return XML_TRUE; 1314 } 1315 1316 enum XML_Status XMLCALL 1317 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { 1318 if (parser == NULL) 1319 return XML_STATUS_ERROR; 1320 /* Block after XML_Parse()/XML_ParseBuffer() has been called. 1321 XXX There's no way for the caller to determine which of the 1322 XXX possible error cases caused the XML_STATUS_ERROR return. 1323 */ 1324 if (parser->m_parsingStatus.parsing == XML_PARSING 1325 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1326 return XML_STATUS_ERROR; 1327 1328 /* Get rid of any previous encoding name */ 1329 FREE(parser, (void *)parser->m_protocolEncodingName); 1330 1331 if (encodingName == NULL) 1332 /* No new encoding name */ 1333 parser->m_protocolEncodingName = NULL; 1334 else { 1335 /* Copy the new encoding name into allocated memory */ 1336 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); 1337 if (! parser->m_protocolEncodingName) 1338 return XML_STATUS_ERROR; 1339 } 1340 return XML_STATUS_OK; 1341 } 1342 1343 XML_Parser XMLCALL 1344 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, 1345 const XML_Char *encodingName) { 1346 XML_Parser parser = oldParser; 1347 DTD *newDtd = NULL; 1348 DTD *oldDtd; 1349 XML_StartElementHandler oldStartElementHandler; 1350 XML_EndElementHandler oldEndElementHandler; 1351 XML_CharacterDataHandler oldCharacterDataHandler; 1352 XML_ProcessingInstructionHandler oldProcessingInstructionHandler; 1353 XML_CommentHandler oldCommentHandler; 1354 XML_StartCdataSectionHandler oldStartCdataSectionHandler; 1355 XML_EndCdataSectionHandler oldEndCdataSectionHandler; 1356 XML_DefaultHandler oldDefaultHandler; 1357 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; 1358 XML_NotationDeclHandler oldNotationDeclHandler; 1359 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; 1360 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; 1361 XML_NotStandaloneHandler oldNotStandaloneHandler; 1362 XML_ExternalEntityRefHandler oldExternalEntityRefHandler; 1363 XML_SkippedEntityHandler oldSkippedEntityHandler; 1364 XML_UnknownEncodingHandler oldUnknownEncodingHandler; 1365 XML_ElementDeclHandler oldElementDeclHandler; 1366 XML_AttlistDeclHandler oldAttlistDeclHandler; 1367 XML_EntityDeclHandler oldEntityDeclHandler; 1368 XML_XmlDeclHandler oldXmlDeclHandler; 1369 ELEMENT_TYPE *oldDeclElementType; 1370 1371 void *oldUserData; 1372 void *oldHandlerArg; 1373 XML_Bool oldDefaultExpandInternalEntities; 1374 XML_Parser oldExternalEntityRefHandlerArg; 1375 #ifdef XML_DTD 1376 enum XML_ParamEntityParsing oldParamEntityParsing; 1377 int oldInEntityValue; 1378 #endif 1379 XML_Bool oldns_triplets; 1380 /* Note that the new parser shares the same hash secret as the old 1381 parser, so that dtdCopy and copyEntityTable can lookup values 1382 from hash tables associated with either parser without us having 1383 to worry which hash secrets each table has. 1384 */ 1385 unsigned long oldhash_secret_salt; 1386 XML_Bool oldReparseDeferralEnabled; 1387 1388 /* Validate the oldParser parameter before we pull everything out of it */ 1389 if (oldParser == NULL) 1390 return NULL; 1391 1392 /* Stash the original parser contents on the stack */ 1393 oldDtd = parser->m_dtd; 1394 oldStartElementHandler = parser->m_startElementHandler; 1395 oldEndElementHandler = parser->m_endElementHandler; 1396 oldCharacterDataHandler = parser->m_characterDataHandler; 1397 oldProcessingInstructionHandler = parser->m_processingInstructionHandler; 1398 oldCommentHandler = parser->m_commentHandler; 1399 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; 1400 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; 1401 oldDefaultHandler = parser->m_defaultHandler; 1402 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; 1403 oldNotationDeclHandler = parser->m_notationDeclHandler; 1404 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; 1405 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; 1406 oldNotStandaloneHandler = parser->m_notStandaloneHandler; 1407 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; 1408 oldSkippedEntityHandler = parser->m_skippedEntityHandler; 1409 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; 1410 oldElementDeclHandler = parser->m_elementDeclHandler; 1411 oldAttlistDeclHandler = parser->m_attlistDeclHandler; 1412 oldEntityDeclHandler = parser->m_entityDeclHandler; 1413 oldXmlDeclHandler = parser->m_xmlDeclHandler; 1414 oldDeclElementType = parser->m_declElementType; 1415 1416 oldUserData = parser->m_userData; 1417 oldHandlerArg = parser->m_handlerArg; 1418 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; 1419 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; 1420 #ifdef XML_DTD 1421 oldParamEntityParsing = parser->m_paramEntityParsing; 1422 oldInEntityValue = parser->m_prologState.inEntityValue; 1423 #endif 1424 oldns_triplets = parser->m_ns_triplets; 1425 /* Note that the new parser shares the same hash secret as the old 1426 parser, so that dtdCopy and copyEntityTable can lookup values 1427 from hash tables associated with either parser without us having 1428 to worry which hash secrets each table has. 1429 */ 1430 oldhash_secret_salt = parser->m_hash_secret_salt; 1431 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; 1432 1433 #ifdef XML_DTD 1434 if (! context) 1435 newDtd = oldDtd; 1436 #endif /* XML_DTD */ 1437 1438 /* Note that the magical uses of the pre-processor to make field 1439 access look more like C++ require that `parser' be overwritten 1440 here. This makes this function more painful to follow than it 1441 would be otherwise. 1442 */ 1443 if (parser->m_ns) { 1444 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; 1445 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); 1446 } else { 1447 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); 1448 } 1449 1450 if (! parser) 1451 return NULL; 1452 1453 parser->m_startElementHandler = oldStartElementHandler; 1454 parser->m_endElementHandler = oldEndElementHandler; 1455 parser->m_characterDataHandler = oldCharacterDataHandler; 1456 parser->m_processingInstructionHandler = oldProcessingInstructionHandler; 1457 parser->m_commentHandler = oldCommentHandler; 1458 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; 1459 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; 1460 parser->m_defaultHandler = oldDefaultHandler; 1461 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; 1462 parser->m_notationDeclHandler = oldNotationDeclHandler; 1463 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; 1464 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; 1465 parser->m_notStandaloneHandler = oldNotStandaloneHandler; 1466 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; 1467 parser->m_skippedEntityHandler = oldSkippedEntityHandler; 1468 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; 1469 parser->m_elementDeclHandler = oldElementDeclHandler; 1470 parser->m_attlistDeclHandler = oldAttlistDeclHandler; 1471 parser->m_entityDeclHandler = oldEntityDeclHandler; 1472 parser->m_xmlDeclHandler = oldXmlDeclHandler; 1473 parser->m_declElementType = oldDeclElementType; 1474 parser->m_userData = oldUserData; 1475 if (oldUserData == oldHandlerArg) 1476 parser->m_handlerArg = parser->m_userData; 1477 else 1478 parser->m_handlerArg = parser; 1479 if (oldExternalEntityRefHandlerArg != oldParser) 1480 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; 1481 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; 1482 parser->m_ns_triplets = oldns_triplets; 1483 parser->m_hash_secret_salt = oldhash_secret_salt; 1484 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; 1485 parser->m_parentParser = oldParser; 1486 #ifdef XML_DTD 1487 parser->m_paramEntityParsing = oldParamEntityParsing; 1488 parser->m_prologState.inEntityValue = oldInEntityValue; 1489 if (context) { 1490 #endif /* XML_DTD */ 1491 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) 1492 || ! setContext(parser, context)) { 1493 XML_ParserFree(parser); 1494 return NULL; 1495 } 1496 parser->m_processor = externalEntityInitProcessor; 1497 #ifdef XML_DTD 1498 } else { 1499 /* The DTD instance referenced by parser->m_dtd is shared between the 1500 document's root parser and external PE parsers, therefore one does not 1501 need to call setContext. In addition, one also *must* not call 1502 setContext, because this would overwrite existing prefix->binding 1503 pointers in parser->m_dtd with ones that get destroyed with the external 1504 PE parser. This would leave those prefixes with dangling pointers. 1505 */ 1506 parser->m_isParamEntity = XML_TRUE; 1507 XmlPrologStateInitExternalEntity(&parser->m_prologState); 1508 parser->m_processor = externalParEntInitProcessor; 1509 } 1510 #endif /* XML_DTD */ 1511 return parser; 1512 } 1513 1514 static void FASTCALL 1515 destroyBindings(BINDING *bindings, XML_Parser parser) { 1516 for (;;) { 1517 BINDING *b = bindings; 1518 if (! b) 1519 break; 1520 bindings = b->nextTagBinding; 1521 FREE(parser, b->uri); 1522 FREE(parser, b); 1523 } 1524 } 1525 1526 void XMLCALL 1527 XML_ParserFree(XML_Parser parser) { 1528 TAG *tagList; 1529 OPEN_INTERNAL_ENTITY *entityList; 1530 if (parser == NULL) 1531 return; 1532 /* free m_tagStack and m_freeTagList */ 1533 tagList = parser->m_tagStack; 1534 for (;;) { 1535 TAG *p; 1536 if (tagList == NULL) { 1537 if (parser->m_freeTagList == NULL) 1538 break; 1539 tagList = parser->m_freeTagList; 1540 parser->m_freeTagList = NULL; 1541 } 1542 p = tagList; 1543 tagList = tagList->parent; 1544 FREE(parser, p->buf); 1545 destroyBindings(p->bindings, parser); 1546 FREE(parser, p); 1547 } 1548 /* free m_openInternalEntities and m_freeInternalEntities */ 1549 entityList = parser->m_openInternalEntities; 1550 for (;;) { 1551 OPEN_INTERNAL_ENTITY *openEntity; 1552 if (entityList == NULL) { 1553 if (parser->m_freeInternalEntities == NULL) 1554 break; 1555 entityList = parser->m_freeInternalEntities; 1556 parser->m_freeInternalEntities = NULL; 1557 } 1558 openEntity = entityList; 1559 entityList = entityList->next; 1560 FREE(parser, openEntity); 1561 } 1562 1563 destroyBindings(parser->m_freeBindingList, parser); 1564 destroyBindings(parser->m_inheritedBindings, parser); 1565 poolDestroy(&parser->m_tempPool); 1566 poolDestroy(&parser->m_temp2Pool); 1567 FREE(parser, (void *)parser->m_protocolEncodingName); 1568 #ifdef XML_DTD 1569 /* external parameter entity parsers share the DTD structure 1570 parser->m_dtd with the root parser, so we must not destroy it 1571 */ 1572 if (! parser->m_isParamEntity && parser->m_dtd) 1573 #else 1574 if (parser->m_dtd) 1575 #endif /* XML_DTD */ 1576 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, 1577 &parser->m_mem); 1578 FREE(parser, (void *)parser->m_atts); 1579 #ifdef XML_ATTR_INFO 1580 FREE(parser, (void *)parser->m_attInfo); 1581 #endif 1582 FREE(parser, parser->m_groupConnector); 1583 FREE(parser, parser->m_buffer); 1584 FREE(parser, parser->m_dataBuf); 1585 FREE(parser, parser->m_nsAtts); 1586 FREE(parser, parser->m_unknownEncodingMem); 1587 if (parser->m_unknownEncodingRelease) 1588 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); 1589 FREE(parser, parser); 1590 } 1591 1592 void XMLCALL 1593 XML_UseParserAsHandlerArg(XML_Parser parser) { 1594 if (parser != NULL) 1595 parser->m_handlerArg = parser; 1596 } 1597 1598 enum XML_Error XMLCALL 1599 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { 1600 if (parser == NULL) 1601 return XML_ERROR_INVALID_ARGUMENT; 1602 #ifdef XML_DTD 1603 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1604 if (parser->m_parsingStatus.parsing == XML_PARSING 1605 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1606 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; 1607 parser->m_useForeignDTD = useDTD; 1608 return XML_ERROR_NONE; 1609 #else 1610 UNUSED_P(useDTD); 1611 return XML_ERROR_FEATURE_REQUIRES_XML_DTD; 1612 #endif 1613 } 1614 1615 void XMLCALL 1616 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { 1617 if (parser == NULL) 1618 return; 1619 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1620 if (parser->m_parsingStatus.parsing == XML_PARSING 1621 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1622 return; 1623 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; 1624 } 1625 1626 void XMLCALL 1627 XML_SetUserData(XML_Parser parser, void *p) { 1628 if (parser == NULL) 1629 return; 1630 if (parser->m_handlerArg == parser->m_userData) 1631 parser->m_handlerArg = parser->m_userData = p; 1632 else 1633 parser->m_userData = p; 1634 } 1635 1636 enum XML_Status XMLCALL 1637 XML_SetBase(XML_Parser parser, const XML_Char *p) { 1638 if (parser == NULL) 1639 return XML_STATUS_ERROR; 1640 if (p) { 1641 p = poolCopyString(&parser->m_dtd->pool, p); 1642 if (! p) 1643 return XML_STATUS_ERROR; 1644 parser->m_curBase = p; 1645 } else 1646 parser->m_curBase = NULL; 1647 return XML_STATUS_OK; 1648 } 1649 1650 const XML_Char *XMLCALL 1651 XML_GetBase(XML_Parser parser) { 1652 if (parser == NULL) 1653 return NULL; 1654 return parser->m_curBase; 1655 } 1656 1657 int XMLCALL 1658 XML_GetSpecifiedAttributeCount(XML_Parser parser) { 1659 if (parser == NULL) 1660 return -1; 1661 return parser->m_nSpecifiedAtts; 1662 } 1663 1664 int XMLCALL 1665 XML_GetIdAttributeIndex(XML_Parser parser) { 1666 if (parser == NULL) 1667 return -1; 1668 return parser->m_idAttIndex; 1669 } 1670 1671 #ifdef XML_ATTR_INFO 1672 const XML_AttrInfo *XMLCALL 1673 XML_GetAttributeInfo(XML_Parser parser) { 1674 if (parser == NULL) 1675 return NULL; 1676 return parser->m_attInfo; 1677 } 1678 #endif 1679 1680 void XMLCALL 1681 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, 1682 XML_EndElementHandler end) { 1683 if (parser == NULL) 1684 return; 1685 parser->m_startElementHandler = start; 1686 parser->m_endElementHandler = end; 1687 } 1688 1689 void XMLCALL 1690 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { 1691 if (parser != NULL) 1692 parser->m_startElementHandler = start; 1693 } 1694 1695 void XMLCALL 1696 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { 1697 if (parser != NULL) 1698 parser->m_endElementHandler = end; 1699 } 1700 1701 void XMLCALL 1702 XML_SetCharacterDataHandler(XML_Parser parser, 1703 XML_CharacterDataHandler handler) { 1704 if (parser != NULL) 1705 parser->m_characterDataHandler = handler; 1706 } 1707 1708 void XMLCALL 1709 XML_SetProcessingInstructionHandler(XML_Parser parser, 1710 XML_ProcessingInstructionHandler handler) { 1711 if (parser != NULL) 1712 parser->m_processingInstructionHandler = handler; 1713 } 1714 1715 void XMLCALL 1716 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { 1717 if (parser != NULL) 1718 parser->m_commentHandler = handler; 1719 } 1720 1721 void XMLCALL 1722 XML_SetCdataSectionHandler(XML_Parser parser, 1723 XML_StartCdataSectionHandler start, 1724 XML_EndCdataSectionHandler end) { 1725 if (parser == NULL) 1726 return; 1727 parser->m_startCdataSectionHandler = start; 1728 parser->m_endCdataSectionHandler = end; 1729 } 1730 1731 void XMLCALL 1732 XML_SetStartCdataSectionHandler(XML_Parser parser, 1733 XML_StartCdataSectionHandler start) { 1734 if (parser != NULL) 1735 parser->m_startCdataSectionHandler = start; 1736 } 1737 1738 void XMLCALL 1739 XML_SetEndCdataSectionHandler(XML_Parser parser, 1740 XML_EndCdataSectionHandler end) { 1741 if (parser != NULL) 1742 parser->m_endCdataSectionHandler = end; 1743 } 1744 1745 void XMLCALL 1746 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { 1747 if (parser == NULL) 1748 return; 1749 parser->m_defaultHandler = handler; 1750 parser->m_defaultExpandInternalEntities = XML_FALSE; 1751 } 1752 1753 void XMLCALL 1754 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { 1755 if (parser == NULL) 1756 return; 1757 parser->m_defaultHandler = handler; 1758 parser->m_defaultExpandInternalEntities = XML_TRUE; 1759 } 1760 1761 void XMLCALL 1762 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, 1763 XML_EndDoctypeDeclHandler end) { 1764 if (parser == NULL) 1765 return; 1766 parser->m_startDoctypeDeclHandler = start; 1767 parser->m_endDoctypeDeclHandler = end; 1768 } 1769 1770 void XMLCALL 1771 XML_SetStartDoctypeDeclHandler(XML_Parser parser, 1772 XML_StartDoctypeDeclHandler start) { 1773 if (parser != NULL) 1774 parser->m_startDoctypeDeclHandler = start; 1775 } 1776 1777 void XMLCALL 1778 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { 1779 if (parser != NULL) 1780 parser->m_endDoctypeDeclHandler = end; 1781 } 1782 1783 void XMLCALL 1784 XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 1785 XML_UnparsedEntityDeclHandler handler) { 1786 if (parser != NULL) 1787 parser->m_unparsedEntityDeclHandler = handler; 1788 } 1789 1790 void XMLCALL 1791 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { 1792 if (parser != NULL) 1793 parser->m_notationDeclHandler = handler; 1794 } 1795 1796 void XMLCALL 1797 XML_SetNamespaceDeclHandler(XML_Parser parser, 1798 XML_StartNamespaceDeclHandler start, 1799 XML_EndNamespaceDeclHandler end) { 1800 if (parser == NULL) 1801 return; 1802 parser->m_startNamespaceDeclHandler = start; 1803 parser->m_endNamespaceDeclHandler = end; 1804 } 1805 1806 void XMLCALL 1807 XML_SetStartNamespaceDeclHandler(XML_Parser parser, 1808 XML_StartNamespaceDeclHandler start) { 1809 if (parser != NULL) 1810 parser->m_startNamespaceDeclHandler = start; 1811 } 1812 1813 void XMLCALL 1814 XML_SetEndNamespaceDeclHandler(XML_Parser parser, 1815 XML_EndNamespaceDeclHandler end) { 1816 if (parser != NULL) 1817 parser->m_endNamespaceDeclHandler = end; 1818 } 1819 1820 void XMLCALL 1821 XML_SetNotStandaloneHandler(XML_Parser parser, 1822 XML_NotStandaloneHandler handler) { 1823 if (parser != NULL) 1824 parser->m_notStandaloneHandler = handler; 1825 } 1826 1827 void XMLCALL 1828 XML_SetExternalEntityRefHandler(XML_Parser parser, 1829 XML_ExternalEntityRefHandler handler) { 1830 if (parser != NULL) 1831 parser->m_externalEntityRefHandler = handler; 1832 } 1833 1834 void XMLCALL 1835 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { 1836 if (parser == NULL) 1837 return; 1838 if (arg) 1839 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; 1840 else 1841 parser->m_externalEntityRefHandlerArg = parser; 1842 } 1843 1844 void XMLCALL 1845 XML_SetSkippedEntityHandler(XML_Parser parser, 1846 XML_SkippedEntityHandler handler) { 1847 if (parser != NULL) 1848 parser->m_skippedEntityHandler = handler; 1849 } 1850 1851 void XMLCALL 1852 XML_SetUnknownEncodingHandler(XML_Parser parser, 1853 XML_UnknownEncodingHandler handler, void *data) { 1854 if (parser == NULL) 1855 return; 1856 parser->m_unknownEncodingHandler = handler; 1857 parser->m_unknownEncodingHandlerData = data; 1858 } 1859 1860 void XMLCALL 1861 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { 1862 if (parser != NULL) 1863 parser->m_elementDeclHandler = eldecl; 1864 } 1865 1866 void XMLCALL 1867 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { 1868 if (parser != NULL) 1869 parser->m_attlistDeclHandler = attdecl; 1870 } 1871 1872 void XMLCALL 1873 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { 1874 if (parser != NULL) 1875 parser->m_entityDeclHandler = handler; 1876 } 1877 1878 void XMLCALL 1879 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { 1880 if (parser != NULL) 1881 parser->m_xmlDeclHandler = handler; 1882 } 1883 1884 int XMLCALL 1885 XML_SetParamEntityParsing(XML_Parser parser, 1886 enum XML_ParamEntityParsing peParsing) { 1887 if (parser == NULL) 1888 return 0; 1889 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1890 if (parser->m_parsingStatus.parsing == XML_PARSING 1891 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1892 return 0; 1893 #ifdef XML_DTD 1894 parser->m_paramEntityParsing = peParsing; 1895 return 1; 1896 #else 1897 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; 1898 #endif 1899 } 1900 1901 int XMLCALL 1902 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { 1903 if (parser == NULL) 1904 return 0; 1905 if (parser->m_parentParser) 1906 return XML_SetHashSalt(parser->m_parentParser, hash_salt); 1907 /* block after XML_Parse()/XML_ParseBuffer() has been called */ 1908 if (parser->m_parsingStatus.parsing == XML_PARSING 1909 || parser->m_parsingStatus.parsing == XML_SUSPENDED) 1910 return 0; 1911 parser->m_hash_secret_salt = hash_salt; 1912 return 1; 1913 } 1914 1915 enum XML_Status XMLCALL 1916 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { 1917 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { 1918 if (parser != NULL) 1919 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; 1920 return XML_STATUS_ERROR; 1921 } 1922 switch (parser->m_parsingStatus.parsing) { 1923 case XML_SUSPENDED: 1924 parser->m_errorCode = XML_ERROR_SUSPENDED; 1925 return XML_STATUS_ERROR; 1926 case XML_FINISHED: 1927 parser->m_errorCode = XML_ERROR_FINISHED; 1928 return XML_STATUS_ERROR; 1929 case XML_INITIALIZED: 1930 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 1931 parser->m_errorCode = XML_ERROR_NO_MEMORY; 1932 return XML_STATUS_ERROR; 1933 } 1934 /* fall through */ 1935 default: 1936 parser->m_parsingStatus.parsing = XML_PARSING; 1937 } 1938 1939 #if XML_CONTEXT_BYTES == 0 1940 if (parser->m_bufferPtr == parser->m_bufferEnd) { 1941 const char *end; 1942 int nLeftOver; 1943 enum XML_Status result; 1944 /* Detect overflow (a+b > MAX <==> b > MAX-a) */ 1945 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { 1946 parser->m_errorCode = XML_ERROR_NO_MEMORY; 1947 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 1948 parser->m_processor = errorProcessor; 1949 return XML_STATUS_ERROR; 1950 } 1951 // though this isn't a buffer request, we assume that `len` is the app's 1952 // preferred buffer fill size, and therefore save it here. 1953 parser->m_lastBufferRequestSize = len; 1954 parser->m_parseEndByteIndex += len; 1955 parser->m_positionPtr = s; 1956 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 1957 1958 parser->m_errorCode 1959 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); 1960 1961 if (parser->m_errorCode != XML_ERROR_NONE) { 1962 parser->m_eventEndPtr = parser->m_eventPtr; 1963 parser->m_processor = errorProcessor; 1964 return XML_STATUS_ERROR; 1965 } else { 1966 switch (parser->m_parsingStatus.parsing) { 1967 case XML_SUSPENDED: 1968 result = XML_STATUS_SUSPENDED; 1969 break; 1970 case XML_INITIALIZED: 1971 case XML_PARSING: 1972 if (isFinal) { 1973 parser->m_parsingStatus.parsing = XML_FINISHED; 1974 return XML_STATUS_OK; 1975 } 1976 /* fall through */ 1977 default: 1978 result = XML_STATUS_OK; 1979 } 1980 } 1981 1982 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, 1983 &parser->m_position); 1984 nLeftOver = s + len - end; 1985 if (nLeftOver) { 1986 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED 1987 // (and XML_ERROR_FINISHED) from XML_GetBuffer. 1988 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; 1989 parser->m_parsingStatus.parsing = XML_PARSING; 1990 void *const temp = XML_GetBuffer(parser, nLeftOver); 1991 parser->m_parsingStatus.parsing = originalStatus; 1992 // GetBuffer may have overwritten this, but we want to remember what the 1993 // app requested, not how many bytes were left over after parsing. 1994 parser->m_lastBufferRequestSize = len; 1995 if (temp == NULL) { 1996 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). 1997 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 1998 parser->m_processor = errorProcessor; 1999 return XML_STATUS_ERROR; 2000 } 2001 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we 2002 // don't have any data to preserve, and can copy straight into the start 2003 // of the buffer rather than the GetBuffer return pointer (which may be 2004 // pointing further into the allocated buffer). 2005 memcpy(parser->m_buffer, end, nLeftOver); 2006 } 2007 parser->m_bufferPtr = parser->m_buffer; 2008 parser->m_bufferEnd = parser->m_buffer + nLeftOver; 2009 parser->m_positionPtr = parser->m_bufferPtr; 2010 parser->m_parseEndPtr = parser->m_bufferEnd; 2011 parser->m_eventPtr = parser->m_bufferPtr; 2012 parser->m_eventEndPtr = parser->m_bufferPtr; 2013 return result; 2014 } 2015 #endif /* XML_CONTEXT_BYTES == 0 */ 2016 void *buff = XML_GetBuffer(parser, len); 2017 if (buff == NULL) 2018 return XML_STATUS_ERROR; 2019 if (len > 0) { 2020 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above 2021 memcpy(buff, s, len); 2022 } 2023 return XML_ParseBuffer(parser, len, isFinal); 2024 } 2025 2026 enum XML_Status XMLCALL 2027 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { 2028 const char *start; 2029 enum XML_Status result = XML_STATUS_OK; 2030 2031 if (parser == NULL) 2032 return XML_STATUS_ERROR; 2033 switch (parser->m_parsingStatus.parsing) { 2034 case XML_SUSPENDED: 2035 parser->m_errorCode = XML_ERROR_SUSPENDED; 2036 return XML_STATUS_ERROR; 2037 case XML_FINISHED: 2038 parser->m_errorCode = XML_ERROR_FINISHED; 2039 return XML_STATUS_ERROR; 2040 case XML_INITIALIZED: 2041 /* Has someone called XML_GetBuffer successfully before? */ 2042 if (! parser->m_bufferPtr) { 2043 parser->m_errorCode = XML_ERROR_NO_BUFFER; 2044 return XML_STATUS_ERROR; 2045 } 2046 2047 if (parser->m_parentParser == NULL && ! startParsing(parser)) { 2048 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2049 return XML_STATUS_ERROR; 2050 } 2051 /* fall through */ 2052 default: 2053 parser->m_parsingStatus.parsing = XML_PARSING; 2054 } 2055 2056 start = parser->m_bufferPtr; 2057 parser->m_positionPtr = start; 2058 parser->m_bufferEnd += len; 2059 parser->m_parseEndPtr = parser->m_bufferEnd; 2060 parser->m_parseEndByteIndex += len; 2061 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; 2062 2063 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, 2064 &parser->m_bufferPtr); 2065 2066 if (parser->m_errorCode != XML_ERROR_NONE) { 2067 parser->m_eventEndPtr = parser->m_eventPtr; 2068 parser->m_processor = errorProcessor; 2069 return XML_STATUS_ERROR; 2070 } else { 2071 switch (parser->m_parsingStatus.parsing) { 2072 case XML_SUSPENDED: 2073 result = XML_STATUS_SUSPENDED; 2074 break; 2075 case XML_INITIALIZED: 2076 case XML_PARSING: 2077 if (isFinal) { 2078 parser->m_parsingStatus.parsing = XML_FINISHED; 2079 return result; 2080 } 2081 default:; /* should not happen */ 2082 } 2083 } 2084 2085 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2086 parser->m_bufferPtr, &parser->m_position); 2087 parser->m_positionPtr = parser->m_bufferPtr; 2088 return result; 2089 } 2090 2091 void *XMLCALL 2092 XML_GetBuffer(XML_Parser parser, int len) { 2093 if (parser == NULL) 2094 return NULL; 2095 if (len < 0) { 2096 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2097 return NULL; 2098 } 2099 switch (parser->m_parsingStatus.parsing) { 2100 case XML_SUSPENDED: 2101 parser->m_errorCode = XML_ERROR_SUSPENDED; 2102 return NULL; 2103 case XML_FINISHED: 2104 parser->m_errorCode = XML_ERROR_FINISHED; 2105 return NULL; 2106 default:; 2107 } 2108 2109 // whether or not the request succeeds, `len` seems to be the app's preferred 2110 // buffer fill size; remember it. 2111 parser->m_lastBufferRequestSize = len; 2112 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) 2113 || parser->m_buffer == NULL) { 2114 #if XML_CONTEXT_BYTES > 0 2115 int keep; 2116 #endif /* XML_CONTEXT_BYTES > 0 */ 2117 /* Do not invoke signed arithmetic overflow: */ 2118 int neededSize = (int)((unsigned)len 2119 + (unsigned)EXPAT_SAFE_PTR_DIFF( 2120 parser->m_bufferEnd, parser->m_bufferPtr)); 2121 if (neededSize < 0) { 2122 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2123 return NULL; 2124 } 2125 #if XML_CONTEXT_BYTES > 0 2126 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); 2127 if (keep > XML_CONTEXT_BYTES) 2128 keep = XML_CONTEXT_BYTES; 2129 /* Detect and prevent integer overflow */ 2130 if (keep > INT_MAX - neededSize) { 2131 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2132 return NULL; 2133 } 2134 neededSize += keep; 2135 #endif /* XML_CONTEXT_BYTES > 0 */ 2136 if (parser->m_buffer && parser->m_bufferPtr 2137 && neededSize 2138 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { 2139 #if XML_CONTEXT_BYTES > 0 2140 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { 2141 int offset 2142 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) 2143 - keep; 2144 /* The buffer pointers cannot be NULL here; we have at least some bytes 2145 * in the buffer */ 2146 memmove(parser->m_buffer, &parser->m_buffer[offset], 2147 parser->m_bufferEnd - parser->m_bufferPtr + keep); 2148 parser->m_bufferEnd -= offset; 2149 parser->m_bufferPtr -= offset; 2150 } 2151 #else 2152 memmove(parser->m_buffer, parser->m_bufferPtr, 2153 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2154 parser->m_bufferEnd 2155 = parser->m_buffer 2156 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2157 parser->m_bufferPtr = parser->m_buffer; 2158 #endif /* XML_CONTEXT_BYTES > 0 */ 2159 } else { 2160 char *newBuf; 2161 int bufferSize 2162 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); 2163 if (bufferSize == 0) 2164 bufferSize = INIT_BUFFER_SIZE; 2165 do { 2166 /* Do not invoke signed arithmetic overflow: */ 2167 bufferSize = (int)(2U * (unsigned)bufferSize); 2168 } while (bufferSize < neededSize && bufferSize > 0); 2169 if (bufferSize <= 0) { 2170 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2171 return NULL; 2172 } 2173 newBuf = (char *)MALLOC(parser, bufferSize); 2174 if (newBuf == 0) { 2175 parser->m_errorCode = XML_ERROR_NO_MEMORY; 2176 return NULL; 2177 } 2178 parser->m_bufferLim = newBuf + bufferSize; 2179 #if XML_CONTEXT_BYTES > 0 2180 if (parser->m_bufferPtr) { 2181 memcpy(newBuf, &parser->m_bufferPtr[-keep], 2182 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2183 + keep); 2184 FREE(parser, parser->m_buffer); 2185 parser->m_buffer = newBuf; 2186 parser->m_bufferEnd 2187 = parser->m_buffer 2188 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) 2189 + keep; 2190 parser->m_bufferPtr = parser->m_buffer + keep; 2191 } else { 2192 /* This must be a brand new buffer with no data in it yet */ 2193 parser->m_bufferEnd = newBuf; 2194 parser->m_bufferPtr = parser->m_buffer = newBuf; 2195 } 2196 #else 2197 if (parser->m_bufferPtr) { 2198 memcpy(newBuf, parser->m_bufferPtr, 2199 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); 2200 FREE(parser, parser->m_buffer); 2201 parser->m_bufferEnd 2202 = newBuf 2203 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); 2204 } else { 2205 /* This must be a brand new buffer with no data in it yet */ 2206 parser->m_bufferEnd = newBuf; 2207 } 2208 parser->m_bufferPtr = parser->m_buffer = newBuf; 2209 #endif /* XML_CONTEXT_BYTES > 0 */ 2210 } 2211 parser->m_eventPtr = parser->m_eventEndPtr = NULL; 2212 parser->m_positionPtr = NULL; 2213 } 2214 return parser->m_bufferEnd; 2215 } 2216 2217 enum XML_Status XMLCALL 2218 XML_StopParser(XML_Parser parser, XML_Bool resumable) { 2219 if (parser == NULL) 2220 return XML_STATUS_ERROR; 2221 switch (parser->m_parsingStatus.parsing) { 2222 case XML_SUSPENDED: 2223 if (resumable) { 2224 parser->m_errorCode = XML_ERROR_SUSPENDED; 2225 return XML_STATUS_ERROR; 2226 } 2227 parser->m_parsingStatus.parsing = XML_FINISHED; 2228 break; 2229 case XML_FINISHED: 2230 parser->m_errorCode = XML_ERROR_FINISHED; 2231 return XML_STATUS_ERROR; 2232 default: 2233 if (resumable) { 2234 #ifdef XML_DTD 2235 if (parser->m_isParamEntity) { 2236 parser->m_errorCode = XML_ERROR_SUSPEND_PE; 2237 return XML_STATUS_ERROR; 2238 } 2239 #endif 2240 parser->m_parsingStatus.parsing = XML_SUSPENDED; 2241 } else 2242 parser->m_parsingStatus.parsing = XML_FINISHED; 2243 } 2244 return XML_STATUS_OK; 2245 } 2246 2247 enum XML_Status XMLCALL 2248 XML_ResumeParser(XML_Parser parser) { 2249 enum XML_Status result = XML_STATUS_OK; 2250 2251 if (parser == NULL) 2252 return XML_STATUS_ERROR; 2253 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { 2254 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; 2255 return XML_STATUS_ERROR; 2256 } 2257 parser->m_parsingStatus.parsing = XML_PARSING; 2258 2259 parser->m_errorCode = callProcessor( 2260 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); 2261 2262 if (parser->m_errorCode != XML_ERROR_NONE) { 2263 parser->m_eventEndPtr = parser->m_eventPtr; 2264 parser->m_processor = errorProcessor; 2265 return XML_STATUS_ERROR; 2266 } else { 2267 switch (parser->m_parsingStatus.parsing) { 2268 case XML_SUSPENDED: 2269 result = XML_STATUS_SUSPENDED; 2270 break; 2271 case XML_INITIALIZED: 2272 case XML_PARSING: 2273 if (parser->m_parsingStatus.finalBuffer) { 2274 parser->m_parsingStatus.parsing = XML_FINISHED; 2275 return result; 2276 } 2277 default:; 2278 } 2279 } 2280 2281 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2282 parser->m_bufferPtr, &parser->m_position); 2283 parser->m_positionPtr = parser->m_bufferPtr; 2284 return result; 2285 } 2286 2287 void XMLCALL 2288 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { 2289 if (parser == NULL) 2290 return; 2291 assert(status != NULL); 2292 *status = parser->m_parsingStatus; 2293 } 2294 2295 enum XML_Error XMLCALL 2296 XML_GetErrorCode(XML_Parser parser) { 2297 if (parser == NULL) 2298 return XML_ERROR_INVALID_ARGUMENT; 2299 return parser->m_errorCode; 2300 } 2301 2302 XML_Index XMLCALL 2303 XML_GetCurrentByteIndex(XML_Parser parser) { 2304 if (parser == NULL) 2305 return -1; 2306 if (parser->m_eventPtr) 2307 return (XML_Index)(parser->m_parseEndByteIndex 2308 - (parser->m_parseEndPtr - parser->m_eventPtr)); 2309 return -1; 2310 } 2311 2312 int XMLCALL 2313 XML_GetCurrentByteCount(XML_Parser parser) { 2314 if (parser == NULL) 2315 return 0; 2316 if (parser->m_eventEndPtr && parser->m_eventPtr) 2317 return (int)(parser->m_eventEndPtr - parser->m_eventPtr); 2318 return 0; 2319 } 2320 2321 const char *XMLCALL 2322 XML_GetInputContext(XML_Parser parser, int *offset, int *size) { 2323 #if XML_CONTEXT_BYTES > 0 2324 if (parser == NULL) 2325 return NULL; 2326 if (parser->m_eventPtr && parser->m_buffer) { 2327 if (offset != NULL) 2328 *offset = (int)(parser->m_eventPtr - parser->m_buffer); 2329 if (size != NULL) 2330 *size = (int)(parser->m_bufferEnd - parser->m_buffer); 2331 return parser->m_buffer; 2332 } 2333 #else 2334 (void)parser; 2335 (void)offset; 2336 (void)size; 2337 #endif /* XML_CONTEXT_BYTES > 0 */ 2338 return (const char *)0; 2339 } 2340 2341 XML_Size XMLCALL 2342 XML_GetCurrentLineNumber(XML_Parser parser) { 2343 if (parser == NULL) 2344 return 0; 2345 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2346 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2347 parser->m_eventPtr, &parser->m_position); 2348 parser->m_positionPtr = parser->m_eventPtr; 2349 } 2350 return parser->m_position.lineNumber + 1; 2351 } 2352 2353 XML_Size XMLCALL 2354 XML_GetCurrentColumnNumber(XML_Parser parser) { 2355 if (parser == NULL) 2356 return 0; 2357 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { 2358 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, 2359 parser->m_eventPtr, &parser->m_position); 2360 parser->m_positionPtr = parser->m_eventPtr; 2361 } 2362 return parser->m_position.columnNumber; 2363 } 2364 2365 void XMLCALL 2366 XML_FreeContentModel(XML_Parser parser, XML_Content *model) { 2367 if (parser != NULL) 2368 FREE(parser, model); 2369 } 2370 2371 void *XMLCALL 2372 XML_MemMalloc(XML_Parser parser, size_t size) { 2373 if (parser == NULL) 2374 return NULL; 2375 return MALLOC(parser, size); 2376 } 2377 2378 void *XMLCALL 2379 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { 2380 if (parser == NULL) 2381 return NULL; 2382 return REALLOC(parser, ptr, size); 2383 } 2384 2385 void XMLCALL 2386 XML_MemFree(XML_Parser parser, void *ptr) { 2387 if (parser != NULL) 2388 FREE(parser, ptr); 2389 } 2390 2391 void XMLCALL 2392 XML_DefaultCurrent(XML_Parser parser) { 2393 if (parser == NULL) 2394 return; 2395 if (parser->m_defaultHandler) { 2396 if (parser->m_openInternalEntities) 2397 reportDefault(parser, parser->m_internalEncoding, 2398 parser->m_openInternalEntities->internalEventPtr, 2399 parser->m_openInternalEntities->internalEventEndPtr); 2400 else 2401 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, 2402 parser->m_eventEndPtr); 2403 } 2404 } 2405 2406 const XML_LChar *XMLCALL 2407 XML_ErrorString(enum XML_Error code) { 2408 switch (code) { 2409 case XML_ERROR_NONE: 2410 return NULL; 2411 case XML_ERROR_NO_MEMORY: 2412 return XML_L("out of memory"); 2413 case XML_ERROR_SYNTAX: 2414 return XML_L("syntax error"); 2415 case XML_ERROR_NO_ELEMENTS: 2416 return XML_L("no element found"); 2417 case XML_ERROR_INVALID_TOKEN: 2418 return XML_L("not well-formed (invalid token)"); 2419 case XML_ERROR_UNCLOSED_TOKEN: 2420 return XML_L("unclosed token"); 2421 case XML_ERROR_PARTIAL_CHAR: 2422 return XML_L("partial character"); 2423 case XML_ERROR_TAG_MISMATCH: 2424 return XML_L("mismatched tag"); 2425 case XML_ERROR_DUPLICATE_ATTRIBUTE: 2426 return XML_L("duplicate attribute"); 2427 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: 2428 return XML_L("junk after document element"); 2429 case XML_ERROR_PARAM_ENTITY_REF: 2430 return XML_L("illegal parameter entity reference"); 2431 case XML_ERROR_UNDEFINED_ENTITY: 2432 return XML_L("undefined entity"); 2433 case XML_ERROR_RECURSIVE_ENTITY_REF: 2434 return XML_L("recursive entity reference"); 2435 case XML_ERROR_ASYNC_ENTITY: 2436 return XML_L("asynchronous entity"); 2437 case XML_ERROR_BAD_CHAR_REF: 2438 return XML_L("reference to invalid character number"); 2439 case XML_ERROR_BINARY_ENTITY_REF: 2440 return XML_L("reference to binary entity"); 2441 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: 2442 return XML_L("reference to external entity in attribute"); 2443 case XML_ERROR_MISPLACED_XML_PI: 2444 return XML_L("XML or text declaration not at start of entity"); 2445 case XML_ERROR_UNKNOWN_ENCODING: 2446 return XML_L("unknown encoding"); 2447 case XML_ERROR_INCORRECT_ENCODING: 2448 return XML_L("encoding specified in XML declaration is incorrect"); 2449 case XML_ERROR_UNCLOSED_CDATA_SECTION: 2450 return XML_L("unclosed CDATA section"); 2451 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: 2452 return XML_L("error in processing external entity reference"); 2453 case XML_ERROR_NOT_STANDALONE: 2454 return XML_L("document is not standalone"); 2455 case XML_ERROR_UNEXPECTED_STATE: 2456 return XML_L("unexpected parser state - please send a bug report"); 2457 case XML_ERROR_ENTITY_DECLARED_IN_PE: 2458 return XML_L("entity declared in parameter entity"); 2459 case XML_ERROR_FEATURE_REQUIRES_XML_DTD: 2460 return XML_L("requested feature requires XML_DTD support in Expat"); 2461 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: 2462 return XML_L("cannot change setting once parsing has begun"); 2463 /* Added in 1.95.7. */ 2464 case XML_ERROR_UNBOUND_PREFIX: 2465 return XML_L("unbound prefix"); 2466 /* Added in 1.95.8. */ 2467 case XML_ERROR_UNDECLARING_PREFIX: 2468 return XML_L("must not undeclare prefix"); 2469 case XML_ERROR_INCOMPLETE_PE: 2470 return XML_L("incomplete markup in parameter entity"); 2471 case XML_ERROR_XML_DECL: 2472 return XML_L("XML declaration not well-formed"); 2473 case XML_ERROR_TEXT_DECL: 2474 return XML_L("text declaration not well-formed"); 2475 case XML_ERROR_PUBLICID: 2476 return XML_L("illegal character(s) in public id"); 2477 case XML_ERROR_SUSPENDED: 2478 return XML_L("parser suspended"); 2479 case XML_ERROR_NOT_SUSPENDED: 2480 return XML_L("parser not suspended"); 2481 case XML_ERROR_ABORTED: 2482 return XML_L("parsing aborted"); 2483 case XML_ERROR_FINISHED: 2484 return XML_L("parsing finished"); 2485 case XML_ERROR_SUSPEND_PE: 2486 return XML_L("cannot suspend in external parameter entity"); 2487 /* Added in 2.0.0. */ 2488 case XML_ERROR_RESERVED_PREFIX_XML: 2489 return XML_L( 2490 "reserved prefix (xml) must not be undeclared or bound to another namespace name"); 2491 case XML_ERROR_RESERVED_PREFIX_XMLNS: 2492 return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); 2493 case XML_ERROR_RESERVED_NAMESPACE_URI: 2494 return XML_L( 2495 "prefix must not be bound to one of the reserved namespace names"); 2496 /* Added in 2.2.5. */ 2497 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ 2498 return XML_L("invalid argument"); 2499 /* Added in 2.3.0. */ 2500 case XML_ERROR_NO_BUFFER: 2501 return XML_L( 2502 "a successful prior call to function XML_GetBuffer is required"); 2503 /* Added in 2.4.0. */ 2504 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: 2505 return XML_L( 2506 "limit on input amplification factor (from DTD and entities) breached"); 2507 } 2508 return NULL; 2509 } 2510 2511 const XML_LChar *XMLCALL 2512 XML_ExpatVersion(void) { 2513 /* V1 is used to string-ize the version number. However, it would 2514 string-ize the actual version macro *names* unless we get them 2515 substituted before being passed to V1. CPP is defined to expand 2516 a macro, then rescan for more expansions. Thus, we use V2 to expand 2517 the version macros, then CPP will expand the resulting V1() macro 2518 with the correct numerals. */ 2519 /* ### I'm assuming cpp is portable in this respect... */ 2520 2521 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) 2522 #define V2(a, b, c) XML_L("expat_") V1(a, b, c) 2523 2524 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); 2525 2526 #undef V1 2527 #undef V2 2528 } 2529 2530 XML_Expat_Version XMLCALL 2531 XML_ExpatVersionInfo(void) { 2532 XML_Expat_Version version; 2533 2534 version.major = XML_MAJOR_VERSION; 2535 version.minor = XML_MINOR_VERSION; 2536 version.micro = XML_MICRO_VERSION; 2537 2538 return version; 2539 } 2540 2541 const XML_Feature *XMLCALL 2542 XML_GetFeatureList(void) { 2543 static const XML_Feature features[] = { 2544 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), 2545 sizeof(XML_Char)}, 2546 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), 2547 sizeof(XML_LChar)}, 2548 #ifdef XML_UNICODE 2549 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, 2550 #endif 2551 #ifdef XML_UNICODE_WCHAR_T 2552 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, 2553 #endif 2554 #ifdef XML_DTD 2555 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, 2556 #endif 2557 #if XML_CONTEXT_BYTES > 0 2558 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), 2559 XML_CONTEXT_BYTES}, 2560 #endif 2561 #ifdef XML_MIN_SIZE 2562 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, 2563 #endif 2564 #ifdef XML_NS 2565 {XML_FEATURE_NS, XML_L("XML_NS"), 0}, 2566 #endif 2567 #ifdef XML_LARGE_SIZE 2568 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, 2569 #endif 2570 #ifdef XML_ATTR_INFO 2571 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, 2572 #endif 2573 #if XML_GE == 1 2574 /* Added in Expat 2.4.0 for XML_DTD defined and 2575 * added in Expat 2.6.0 for XML_GE == 1. */ 2576 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, 2577 XML_L("XML_BLAP_MAX_AMP"), 2578 (long int) 2579 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, 2580 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, 2581 XML_L("XML_BLAP_ACT_THRES"), 2582 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, 2583 /* Added in Expat 2.6.0. */ 2584 {XML_FEATURE_GE, XML_L("XML_GE"), 0}, 2585 #endif 2586 {XML_FEATURE_END, NULL, 0}}; 2587 2588 return features; 2589 } 2590 2591 #if XML_GE == 1 2592 XML_Bool XMLCALL 2593 XML_SetBillionLaughsAttackProtectionMaximumAmplification( 2594 XML_Parser parser, float maximumAmplificationFactor) { 2595 if ((parser == NULL) || (parser->m_parentParser != NULL) 2596 || isnan(maximumAmplificationFactor) 2597 || (maximumAmplificationFactor < 1.0f)) { 2598 return XML_FALSE; 2599 } 2600 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; 2601 return XML_TRUE; 2602 } 2603 2604 XML_Bool XMLCALL 2605 XML_SetBillionLaughsAttackProtectionActivationThreshold( 2606 XML_Parser parser, unsigned long long activationThresholdBytes) { 2607 if ((parser == NULL) || (parser->m_parentParser != NULL)) { 2608 return XML_FALSE; 2609 } 2610 parser->m_accounting.activationThresholdBytes = activationThresholdBytes; 2611 return XML_TRUE; 2612 } 2613 #endif /* XML_GE == 1 */ 2614 2615 XML_Bool XMLCALL 2616 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { 2617 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { 2618 parser->m_reparseDeferralEnabled = enabled; 2619 return XML_TRUE; 2620 } 2621 return XML_FALSE; 2622 } 2623 2624 /* Initially tag->rawName always points into the parse buffer; 2625 for those TAG instances opened while the current parse buffer was 2626 processed, and not yet closed, we need to store tag->rawName in a more 2627 permanent location, since the parse buffer is about to be discarded. 2628 */ 2629 static XML_Bool 2630 storeRawNames(XML_Parser parser) { 2631 TAG *tag = parser->m_tagStack; 2632 while (tag) { 2633 int bufSize; 2634 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); 2635 size_t rawNameLen; 2636 char *rawNameBuf = tag->buf + nameLen; 2637 /* Stop if already stored. Since m_tagStack is a stack, we can stop 2638 at the first entry that has already been copied; everything 2639 below it in the stack is already been accounted for in a 2640 previous call to this function. 2641 */ 2642 if (tag->rawName == rawNameBuf) 2643 break; 2644 /* For reuse purposes we need to ensure that the 2645 size of tag->buf is a multiple of sizeof(XML_Char). 2646 */ 2647 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); 2648 /* Detect and prevent integer overflow. */ 2649 if (rawNameLen > (size_t)INT_MAX - nameLen) 2650 return XML_FALSE; 2651 bufSize = nameLen + (int)rawNameLen; 2652 if (bufSize > tag->bufEnd - tag->buf) { 2653 char *temp = (char *)REALLOC(parser, tag->buf, bufSize); 2654 if (temp == NULL) 2655 return XML_FALSE; 2656 /* if tag->name.str points to tag->buf (only when namespace 2657 processing is off) then we have to update it 2658 */ 2659 if (tag->name.str == (XML_Char *)tag->buf) 2660 tag->name.str = (XML_Char *)temp; 2661 /* if tag->name.localPart is set (when namespace processing is on) 2662 then update it as well, since it will always point into tag->buf 2663 */ 2664 if (tag->name.localPart) 2665 tag->name.localPart 2666 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); 2667 tag->buf = temp; 2668 tag->bufEnd = temp + bufSize; 2669 rawNameBuf = temp + nameLen; 2670 } 2671 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); 2672 tag->rawName = rawNameBuf; 2673 tag = tag->parent; 2674 } 2675 return XML_TRUE; 2676 } 2677 2678 static enum XML_Error PTRCALL 2679 contentProcessor(XML_Parser parser, const char *start, const char *end, 2680 const char **endPtr) { 2681 enum XML_Error result = doContent( 2682 parser, 0, parser->m_encoding, start, end, endPtr, 2683 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 2684 if (result == XML_ERROR_NONE) { 2685 if (! storeRawNames(parser)) 2686 return XML_ERROR_NO_MEMORY; 2687 } 2688 return result; 2689 } 2690 2691 static enum XML_Error PTRCALL 2692 externalEntityInitProcessor(XML_Parser parser, const char *start, 2693 const char *end, const char **endPtr) { 2694 enum XML_Error result = initializeEncoding(parser); 2695 if (result != XML_ERROR_NONE) 2696 return result; 2697 parser->m_processor = externalEntityInitProcessor2; 2698 return externalEntityInitProcessor2(parser, start, end, endPtr); 2699 } 2700 2701 static enum XML_Error PTRCALL 2702 externalEntityInitProcessor2(XML_Parser parser, const char *start, 2703 const char *end, const char **endPtr) { 2704 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 2705 int tok = XmlContentTok(parser->m_encoding, start, end, &next); 2706 switch (tok) { 2707 case XML_TOK_BOM: 2708 #if XML_GE == 1 2709 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, 2710 XML_ACCOUNT_DIRECT)) { 2711 accountingOnAbort(parser); 2712 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2713 } 2714 #endif /* XML_GE == 1 */ 2715 2716 /* If we are at the end of the buffer, this would cause the next stage, 2717 i.e. externalEntityInitProcessor3, to pass control directly to 2718 doContent (by detecting XML_TOK_NONE) without processing any xml text 2719 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. 2720 */ 2721 if (next == end && ! parser->m_parsingStatus.finalBuffer) { 2722 *endPtr = next; 2723 return XML_ERROR_NONE; 2724 } 2725 start = next; 2726 break; 2727 case XML_TOK_PARTIAL: 2728 if (! parser->m_parsingStatus.finalBuffer) { 2729 *endPtr = start; 2730 return XML_ERROR_NONE; 2731 } 2732 parser->m_eventPtr = start; 2733 return XML_ERROR_UNCLOSED_TOKEN; 2734 case XML_TOK_PARTIAL_CHAR: 2735 if (! parser->m_parsingStatus.finalBuffer) { 2736 *endPtr = start; 2737 return XML_ERROR_NONE; 2738 } 2739 parser->m_eventPtr = start; 2740 return XML_ERROR_PARTIAL_CHAR; 2741 } 2742 parser->m_processor = externalEntityInitProcessor3; 2743 return externalEntityInitProcessor3(parser, start, end, endPtr); 2744 } 2745 2746 static enum XML_Error PTRCALL 2747 externalEntityInitProcessor3(XML_Parser parser, const char *start, 2748 const char *end, const char **endPtr) { 2749 int tok; 2750 const char *next = start; /* XmlContentTok doesn't always set the last arg */ 2751 parser->m_eventPtr = start; 2752 tok = XmlContentTok(parser->m_encoding, start, end, &next); 2753 /* Note: These bytes are accounted later in: 2754 - processXmlDecl 2755 - externalEntityContentProcessor 2756 */ 2757 parser->m_eventEndPtr = next; 2758 2759 switch (tok) { 2760 case XML_TOK_XML_DECL: { 2761 enum XML_Error result; 2762 result = processXmlDecl(parser, 1, start, next); 2763 if (result != XML_ERROR_NONE) 2764 return result; 2765 switch (parser->m_parsingStatus.parsing) { 2766 case XML_SUSPENDED: 2767 *endPtr = next; 2768 return XML_ERROR_NONE; 2769 case XML_FINISHED: 2770 return XML_ERROR_ABORTED; 2771 default: 2772 start = next; 2773 } 2774 } break; 2775 case XML_TOK_PARTIAL: 2776 if (! parser->m_parsingStatus.finalBuffer) { 2777 *endPtr = start; 2778 return XML_ERROR_NONE; 2779 } 2780 return XML_ERROR_UNCLOSED_TOKEN; 2781 case XML_TOK_PARTIAL_CHAR: 2782 if (! parser->m_parsingStatus.finalBuffer) { 2783 *endPtr = start; 2784 return XML_ERROR_NONE; 2785 } 2786 return XML_ERROR_PARTIAL_CHAR; 2787 } 2788 parser->m_processor = externalEntityContentProcessor; 2789 parser->m_tagLevel = 1; 2790 return externalEntityContentProcessor(parser, start, end, endPtr); 2791 } 2792 2793 static enum XML_Error PTRCALL 2794 externalEntityContentProcessor(XML_Parser parser, const char *start, 2795 const char *end, const char **endPtr) { 2796 enum XML_Error result 2797 = doContent(parser, 1, parser->m_encoding, start, end, endPtr, 2798 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 2799 XML_ACCOUNT_ENTITY_EXPANSION); 2800 if (result == XML_ERROR_NONE) { 2801 if (! storeRawNames(parser)) 2802 return XML_ERROR_NO_MEMORY; 2803 } 2804 return result; 2805 } 2806 2807 static enum XML_Error 2808 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, 2809 const char *s, const char *end, const char **nextPtr, 2810 XML_Bool haveMore, enum XML_Account account) { 2811 /* save one level of indirection */ 2812 DTD *const dtd = parser->m_dtd; 2813 2814 const char **eventPP; 2815 const char **eventEndPP; 2816 if (enc == parser->m_encoding) { 2817 eventPP = &parser->m_eventPtr; 2818 eventEndPP = &parser->m_eventEndPtr; 2819 } else { 2820 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 2821 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 2822 } 2823 *eventPP = s; 2824 2825 for (;;) { 2826 const char *next = s; /* XmlContentTok doesn't always set the last arg */ 2827 int tok = XmlContentTok(enc, s, end, &next); 2828 #if XML_GE == 1 2829 const char *accountAfter 2830 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) 2831 ? (haveMore ? s /* i.e. 0 bytes */ : end) 2832 : next; 2833 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, 2834 account)) { 2835 accountingOnAbort(parser); 2836 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 2837 } 2838 #endif 2839 *eventEndPP = next; 2840 switch (tok) { 2841 case XML_TOK_TRAILING_CR: 2842 if (haveMore) { 2843 *nextPtr = s; 2844 return XML_ERROR_NONE; 2845 } 2846 *eventEndPP = end; 2847 if (parser->m_characterDataHandler) { 2848 XML_Char c = 0xA; 2849 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 2850 } else if (parser->m_defaultHandler) 2851 reportDefault(parser, enc, s, end); 2852 /* We are at the end of the final buffer, should we check for 2853 XML_SUSPENDED, XML_FINISHED? 2854 */ 2855 if (startTagLevel == 0) 2856 return XML_ERROR_NO_ELEMENTS; 2857 if (parser->m_tagLevel != startTagLevel) 2858 return XML_ERROR_ASYNC_ENTITY; 2859 *nextPtr = end; 2860 return XML_ERROR_NONE; 2861 case XML_TOK_NONE: 2862 if (haveMore) { 2863 *nextPtr = s; 2864 return XML_ERROR_NONE; 2865 } 2866 if (startTagLevel > 0) { 2867 if (parser->m_tagLevel != startTagLevel) 2868 return XML_ERROR_ASYNC_ENTITY; 2869 *nextPtr = s; 2870 return XML_ERROR_NONE; 2871 } 2872 return XML_ERROR_NO_ELEMENTS; 2873 case XML_TOK_INVALID: 2874 *eventPP = next; 2875 return XML_ERROR_INVALID_TOKEN; 2876 case XML_TOK_PARTIAL: 2877 if (haveMore) { 2878 *nextPtr = s; 2879 return XML_ERROR_NONE; 2880 } 2881 return XML_ERROR_UNCLOSED_TOKEN; 2882 case XML_TOK_PARTIAL_CHAR: 2883 if (haveMore) { 2884 *nextPtr = s; 2885 return XML_ERROR_NONE; 2886 } 2887 return XML_ERROR_PARTIAL_CHAR; 2888 case XML_TOK_ENTITY_REF: { 2889 const XML_Char *name; 2890 ENTITY *entity; 2891 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 2892 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); 2893 if (ch) { 2894 #if XML_GE == 1 2895 /* NOTE: We are replacing 4-6 characters original input for 1 character 2896 * so there is no amplification and hence recording without 2897 * protection. */ 2898 accountingDiffTolerated(parser, tok, (char *)&ch, 2899 ((char *)&ch) + sizeof(XML_Char), __LINE__, 2900 XML_ACCOUNT_ENTITY_EXPANSION); 2901 #endif /* XML_GE == 1 */ 2902 if (parser->m_characterDataHandler) 2903 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); 2904 else if (parser->m_defaultHandler) 2905 reportDefault(parser, enc, s, next); 2906 break; 2907 } 2908 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 2909 next - enc->minBytesPerChar); 2910 if (! name) 2911 return XML_ERROR_NO_MEMORY; 2912 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 2913 poolDiscard(&dtd->pool); 2914 /* First, determine if a check for an existing declaration is needed; 2915 if yes, check that the entity exists, and that it is internal, 2916 otherwise call the skipped entity or default handler. 2917 */ 2918 if (! dtd->hasParamEntityRefs || dtd->standalone) { 2919 if (! entity) 2920 return XML_ERROR_UNDEFINED_ENTITY; 2921 else if (! entity->is_internal) 2922 return XML_ERROR_ENTITY_DECLARED_IN_PE; 2923 } else if (! entity) { 2924 if (parser->m_skippedEntityHandler) 2925 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 2926 else if (parser->m_defaultHandler) 2927 reportDefault(parser, enc, s, next); 2928 break; 2929 } 2930 if (entity->open) 2931 return XML_ERROR_RECURSIVE_ENTITY_REF; 2932 if (entity->notation) 2933 return XML_ERROR_BINARY_ENTITY_REF; 2934 if (entity->textPtr) { 2935 enum XML_Error result; 2936 if (! parser->m_defaultExpandInternalEntities) { 2937 if (parser->m_skippedEntityHandler) 2938 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 2939 0); 2940 else if (parser->m_defaultHandler) 2941 reportDefault(parser, enc, s, next); 2942 break; 2943 } 2944 result = processInternalEntity(parser, entity, XML_FALSE); 2945 if (result != XML_ERROR_NONE) 2946 return result; 2947 } else if (parser->m_externalEntityRefHandler) { 2948 const XML_Char *context; 2949 entity->open = XML_TRUE; 2950 context = getContext(parser); 2951 entity->open = XML_FALSE; 2952 if (! context) 2953 return XML_ERROR_NO_MEMORY; 2954 if (! parser->m_externalEntityRefHandler( 2955 parser->m_externalEntityRefHandlerArg, context, entity->base, 2956 entity->systemId, entity->publicId)) 2957 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 2958 poolDiscard(&parser->m_tempPool); 2959 } else if (parser->m_defaultHandler) 2960 reportDefault(parser, enc, s, next); 2961 break; 2962 } 2963 case XML_TOK_START_TAG_NO_ATTS: 2964 /* fall through */ 2965 case XML_TOK_START_TAG_WITH_ATTS: { 2966 TAG *tag; 2967 enum XML_Error result; 2968 XML_Char *toPtr; 2969 if (parser->m_freeTagList) { 2970 tag = parser->m_freeTagList; 2971 parser->m_freeTagList = parser->m_freeTagList->parent; 2972 } else { 2973 tag = (TAG *)MALLOC(parser, sizeof(TAG)); 2974 if (! tag) 2975 return XML_ERROR_NO_MEMORY; 2976 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); 2977 if (! tag->buf) { 2978 FREE(parser, tag); 2979 return XML_ERROR_NO_MEMORY; 2980 } 2981 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; 2982 } 2983 tag->bindings = NULL; 2984 tag->parent = parser->m_tagStack; 2985 parser->m_tagStack = tag; 2986 tag->name.localPart = NULL; 2987 tag->name.prefix = NULL; 2988 tag->rawName = s + enc->minBytesPerChar; 2989 tag->rawNameLength = XmlNameLength(enc, tag->rawName); 2990 ++parser->m_tagLevel; 2991 { 2992 const char *rawNameEnd = tag->rawName + tag->rawNameLength; 2993 const char *fromPtr = tag->rawName; 2994 toPtr = (XML_Char *)tag->buf; 2995 for (;;) { 2996 int bufSize; 2997 int convLen; 2998 const enum XML_Convert_Result convert_res 2999 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, 3000 (ICHAR *)tag->bufEnd - 1); 3001 convLen = (int)(toPtr - (XML_Char *)tag->buf); 3002 if ((fromPtr >= rawNameEnd) 3003 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { 3004 tag->name.strLen = convLen; 3005 break; 3006 } 3007 bufSize = (int)(tag->bufEnd - tag->buf) << 1; 3008 { 3009 char *temp = (char *)REALLOC(parser, tag->buf, bufSize); 3010 if (temp == NULL) 3011 return XML_ERROR_NO_MEMORY; 3012 tag->buf = temp; 3013 tag->bufEnd = temp + bufSize; 3014 toPtr = (XML_Char *)temp + convLen; 3015 } 3016 } 3017 } 3018 tag->name.str = (XML_Char *)tag->buf; 3019 *toPtr = XML_T('\0'); 3020 result 3021 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); 3022 if (result) 3023 return result; 3024 if (parser->m_startElementHandler) 3025 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, 3026 (const XML_Char **)parser->m_atts); 3027 else if (parser->m_defaultHandler) 3028 reportDefault(parser, enc, s, next); 3029 poolClear(&parser->m_tempPool); 3030 break; 3031 } 3032 case XML_TOK_EMPTY_ELEMENT_NO_ATTS: 3033 /* fall through */ 3034 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { 3035 const char *rawName = s + enc->minBytesPerChar; 3036 enum XML_Error result; 3037 BINDING *bindings = NULL; 3038 XML_Bool noElmHandlers = XML_TRUE; 3039 TAG_NAME name; 3040 name.str = poolStoreString(&parser->m_tempPool, enc, rawName, 3041 rawName + XmlNameLength(enc, rawName)); 3042 if (! name.str) 3043 return XML_ERROR_NO_MEMORY; 3044 poolFinish(&parser->m_tempPool); 3045 result = storeAtts(parser, enc, s, &name, &bindings, 3046 XML_ACCOUNT_NONE /* token spans whole start tag */); 3047 if (result != XML_ERROR_NONE) { 3048 freeBindings(parser, bindings); 3049 return result; 3050 } 3051 poolFinish(&parser->m_tempPool); 3052 if (parser->m_startElementHandler) { 3053 parser->m_startElementHandler(parser->m_handlerArg, name.str, 3054 (const XML_Char **)parser->m_atts); 3055 noElmHandlers = XML_FALSE; 3056 } 3057 if (parser->m_endElementHandler) { 3058 if (parser->m_startElementHandler) 3059 *eventPP = *eventEndPP; 3060 parser->m_endElementHandler(parser->m_handlerArg, name.str); 3061 noElmHandlers = XML_FALSE; 3062 } 3063 if (noElmHandlers && parser->m_defaultHandler) 3064 reportDefault(parser, enc, s, next); 3065 poolClear(&parser->m_tempPool); 3066 freeBindings(parser, bindings); 3067 } 3068 if ((parser->m_tagLevel == 0) 3069 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3070 if (parser->m_parsingStatus.parsing == XML_SUSPENDED) 3071 parser->m_processor = epilogProcessor; 3072 else 3073 return epilogProcessor(parser, next, end, nextPtr); 3074 } 3075 break; 3076 case XML_TOK_END_TAG: 3077 if (parser->m_tagLevel == startTagLevel) 3078 return XML_ERROR_ASYNC_ENTITY; 3079 else { 3080 int len; 3081 const char *rawName; 3082 TAG *tag = parser->m_tagStack; 3083 rawName = s + enc->minBytesPerChar * 2; 3084 len = XmlNameLength(enc, rawName); 3085 if (len != tag->rawNameLength 3086 || memcmp(tag->rawName, rawName, len) != 0) { 3087 *eventPP = rawName; 3088 return XML_ERROR_TAG_MISMATCH; 3089 } 3090 parser->m_tagStack = tag->parent; 3091 tag->parent = parser->m_freeTagList; 3092 parser->m_freeTagList = tag; 3093 --parser->m_tagLevel; 3094 if (parser->m_endElementHandler) { 3095 const XML_Char *localPart; 3096 const XML_Char *prefix; 3097 XML_Char *uri; 3098 localPart = tag->name.localPart; 3099 if (parser->m_ns && localPart) { 3100 /* localPart and prefix may have been overwritten in 3101 tag->name.str, since this points to the binding->uri 3102 buffer which gets reused; so we have to add them again 3103 */ 3104 uri = (XML_Char *)tag->name.str + tag->name.uriLen; 3105 /* don't need to check for space - already done in storeAtts() */ 3106 while (*localPart) 3107 *uri++ = *localPart++; 3108 prefix = tag->name.prefix; 3109 if (parser->m_ns_triplets && prefix) { 3110 *uri++ = parser->m_namespaceSeparator; 3111 while (*prefix) 3112 *uri++ = *prefix++; 3113 } 3114 *uri = XML_T('\0'); 3115 } 3116 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); 3117 } else if (parser->m_defaultHandler) 3118 reportDefault(parser, enc, s, next); 3119 while (tag->bindings) { 3120 BINDING *b = tag->bindings; 3121 if (parser->m_endNamespaceDeclHandler) 3122 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, 3123 b->prefix->name); 3124 tag->bindings = tag->bindings->nextTagBinding; 3125 b->nextTagBinding = parser->m_freeBindingList; 3126 parser->m_freeBindingList = b; 3127 b->prefix->binding = b->prevPrefixBinding; 3128 } 3129 if ((parser->m_tagLevel == 0) 3130 && (parser->m_parsingStatus.parsing != XML_FINISHED)) { 3131 if (parser->m_parsingStatus.parsing == XML_SUSPENDED) 3132 parser->m_processor = epilogProcessor; 3133 else 3134 return epilogProcessor(parser, next, end, nextPtr); 3135 } 3136 } 3137 break; 3138 case XML_TOK_CHAR_REF: { 3139 int n = XmlCharRefNumber(enc, s); 3140 if (n < 0) 3141 return XML_ERROR_BAD_CHAR_REF; 3142 if (parser->m_characterDataHandler) { 3143 XML_Char buf[XML_ENCODE_MAX]; 3144 parser->m_characterDataHandler(parser->m_handlerArg, buf, 3145 XmlEncode(n, (ICHAR *)buf)); 3146 } else if (parser->m_defaultHandler) 3147 reportDefault(parser, enc, s, next); 3148 } break; 3149 case XML_TOK_XML_DECL: 3150 return XML_ERROR_MISPLACED_XML_PI; 3151 case XML_TOK_DATA_NEWLINE: 3152 if (parser->m_characterDataHandler) { 3153 XML_Char c = 0xA; 3154 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 3155 } else if (parser->m_defaultHandler) 3156 reportDefault(parser, enc, s, next); 3157 break; 3158 case XML_TOK_CDATA_SECT_OPEN: { 3159 enum XML_Error result; 3160 if (parser->m_startCdataSectionHandler) 3161 parser->m_startCdataSectionHandler(parser->m_handlerArg); 3162 /* BEGIN disabled code */ 3163 /* Suppose you doing a transformation on a document that involves 3164 changing only the character data. You set up a defaultHandler 3165 and a characterDataHandler. The defaultHandler simply copies 3166 characters through. The characterDataHandler does the 3167 transformation and writes the characters out escaping them as 3168 necessary. This case will fail to work if we leave out the 3169 following two lines (because & and < inside CDATA sections will 3170 be incorrectly escaped). 3171 3172 However, now we have a start/endCdataSectionHandler, so it seems 3173 easier to let the user deal with this. 3174 */ 3175 else if ((0) && parser->m_characterDataHandler) 3176 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3177 0); 3178 /* END disabled code */ 3179 else if (parser->m_defaultHandler) 3180 reportDefault(parser, enc, s, next); 3181 result 3182 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); 3183 if (result != XML_ERROR_NONE) 3184 return result; 3185 else if (! next) { 3186 parser->m_processor = cdataSectionProcessor; 3187 return result; 3188 } 3189 } break; 3190 case XML_TOK_TRAILING_RSQB: 3191 if (haveMore) { 3192 *nextPtr = s; 3193 return XML_ERROR_NONE; 3194 } 3195 if (parser->m_characterDataHandler) { 3196 if (MUST_CONVERT(enc, s)) { 3197 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3198 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3199 parser->m_characterDataHandler( 3200 parser->m_handlerArg, parser->m_dataBuf, 3201 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3202 } else 3203 parser->m_characterDataHandler( 3204 parser->m_handlerArg, (const XML_Char *)s, 3205 (int)((const XML_Char *)end - (const XML_Char *)s)); 3206 } else if (parser->m_defaultHandler) 3207 reportDefault(parser, enc, s, end); 3208 /* We are at the end of the final buffer, should we check for 3209 XML_SUSPENDED, XML_FINISHED? 3210 */ 3211 if (startTagLevel == 0) { 3212 *eventPP = end; 3213 return XML_ERROR_NO_ELEMENTS; 3214 } 3215 if (parser->m_tagLevel != startTagLevel) { 3216 *eventPP = end; 3217 return XML_ERROR_ASYNC_ENTITY; 3218 } 3219 *nextPtr = end; 3220 return XML_ERROR_NONE; 3221 case XML_TOK_DATA_CHARS: { 3222 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 3223 if (charDataHandler) { 3224 if (MUST_CONVERT(enc, s)) { 3225 for (;;) { 3226 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 3227 const enum XML_Convert_Result convert_res = XmlConvert( 3228 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 3229 *eventEndPP = s; 3230 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 3231 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 3232 if ((convert_res == XML_CONVERT_COMPLETED) 3233 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 3234 break; 3235 *eventPP = s; 3236 } 3237 } else 3238 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 3239 (int)((const XML_Char *)next - (const XML_Char *)s)); 3240 } else if (parser->m_defaultHandler) 3241 reportDefault(parser, enc, s, next); 3242 } break; 3243 case XML_TOK_PI: 3244 if (! reportProcessingInstruction(parser, enc, s, next)) 3245 return XML_ERROR_NO_MEMORY; 3246 break; 3247 case XML_TOK_COMMENT: 3248 if (! reportComment(parser, enc, s, next)) 3249 return XML_ERROR_NO_MEMORY; 3250 break; 3251 default: 3252 /* All of the tokens produced by XmlContentTok() have their own 3253 * explicit cases, so this default is not strictly necessary. 3254 * However it is a useful safety net, so we retain the code and 3255 * simply exclude it from the coverage tests. 3256 * 3257 * LCOV_EXCL_START 3258 */ 3259 if (parser->m_defaultHandler) 3260 reportDefault(parser, enc, s, next); 3261 break; 3262 /* LCOV_EXCL_STOP */ 3263 } 3264 *eventPP = s = next; 3265 switch (parser->m_parsingStatus.parsing) { 3266 case XML_SUSPENDED: 3267 *nextPtr = next; 3268 return XML_ERROR_NONE; 3269 case XML_FINISHED: 3270 return XML_ERROR_ABORTED; 3271 default:; 3272 } 3273 } 3274 /* not reached */ 3275 } 3276 3277 /* This function does not call free() on the allocated memory, merely 3278 * moving it to the parser's m_freeBindingList where it can be freed or 3279 * reused as appropriate. 3280 */ 3281 static void 3282 freeBindings(XML_Parser parser, BINDING *bindings) { 3283 while (bindings) { 3284 BINDING *b = bindings; 3285 3286 /* m_startNamespaceDeclHandler will have been called for this 3287 * binding in addBindings(), so call the end handler now. 3288 */ 3289 if (parser->m_endNamespaceDeclHandler) 3290 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); 3291 3292 bindings = bindings->nextTagBinding; 3293 b->nextTagBinding = parser->m_freeBindingList; 3294 parser->m_freeBindingList = b; 3295 b->prefix->binding = b->prevPrefixBinding; 3296 } 3297 } 3298 3299 /* Precondition: all arguments must be non-NULL; 3300 Purpose: 3301 - normalize attributes 3302 - check attributes for well-formedness 3303 - generate namespace aware attribute names (URI, prefix) 3304 - build list of attributes for startElementHandler 3305 - default attributes 3306 - process namespace declarations (check and report them) 3307 - generate namespace aware element name (URI, prefix) 3308 */ 3309 static enum XML_Error 3310 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, 3311 TAG_NAME *tagNamePtr, BINDING **bindingsPtr, 3312 enum XML_Account account) { 3313 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 3314 ELEMENT_TYPE *elementType; 3315 int nDefaultAtts; 3316 const XML_Char **appAtts; /* the attribute list for the application */ 3317 int attIndex = 0; 3318 int prefixLen; 3319 int i; 3320 int n; 3321 XML_Char *uri; 3322 int nPrefixes = 0; 3323 BINDING *binding; 3324 const XML_Char *localPart; 3325 3326 /* lookup the element type name */ 3327 elementType 3328 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); 3329 if (! elementType) { 3330 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); 3331 if (! name) 3332 return XML_ERROR_NO_MEMORY; 3333 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 3334 sizeof(ELEMENT_TYPE)); 3335 if (! elementType) 3336 return XML_ERROR_NO_MEMORY; 3337 if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) 3338 return XML_ERROR_NO_MEMORY; 3339 } 3340 nDefaultAtts = elementType->nDefaultAtts; 3341 3342 /* get the attributes from the tokenizer */ 3343 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); 3344 3345 /* Detect and prevent integer overflow */ 3346 if (n > INT_MAX - nDefaultAtts) { 3347 return XML_ERROR_NO_MEMORY; 3348 } 3349 3350 if (n + nDefaultAtts > parser->m_attsSize) { 3351 int oldAttsSize = parser->m_attsSize; 3352 ATTRIBUTE *temp; 3353 #ifdef XML_ATTR_INFO 3354 XML_AttrInfo *temp2; 3355 #endif 3356 3357 /* Detect and prevent integer overflow */ 3358 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) 3359 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { 3360 return XML_ERROR_NO_MEMORY; 3361 } 3362 3363 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; 3364 3365 /* Detect and prevent integer overflow. 3366 * The preprocessor guard addresses the "always false" warning 3367 * from -Wtype-limits on platforms where 3368 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3369 #if UINT_MAX >= SIZE_MAX 3370 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { 3371 parser->m_attsSize = oldAttsSize; 3372 return XML_ERROR_NO_MEMORY; 3373 } 3374 #endif 3375 3376 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, 3377 parser->m_attsSize * sizeof(ATTRIBUTE)); 3378 if (temp == NULL) { 3379 parser->m_attsSize = oldAttsSize; 3380 return XML_ERROR_NO_MEMORY; 3381 } 3382 parser->m_atts = temp; 3383 #ifdef XML_ATTR_INFO 3384 /* Detect and prevent integer overflow. 3385 * The preprocessor guard addresses the "always false" warning 3386 * from -Wtype-limits on platforms where 3387 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3388 # if UINT_MAX >= SIZE_MAX 3389 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { 3390 parser->m_attsSize = oldAttsSize; 3391 return XML_ERROR_NO_MEMORY; 3392 } 3393 # endif 3394 3395 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, 3396 parser->m_attsSize * sizeof(XML_AttrInfo)); 3397 if (temp2 == NULL) { 3398 parser->m_attsSize = oldAttsSize; 3399 return XML_ERROR_NO_MEMORY; 3400 } 3401 parser->m_attInfo = temp2; 3402 #endif 3403 if (n > oldAttsSize) 3404 XmlGetAttributes(enc, attStr, n, parser->m_atts); 3405 } 3406 3407 appAtts = (const XML_Char **)parser->m_atts; 3408 for (i = 0; i < n; i++) { 3409 ATTRIBUTE *currAtt = &parser->m_atts[i]; 3410 #ifdef XML_ATTR_INFO 3411 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; 3412 #endif 3413 /* add the name and value to the attribute list */ 3414 ATTRIBUTE_ID *attId 3415 = getAttributeId(parser, enc, currAtt->name, 3416 currAtt->name + XmlNameLength(enc, currAtt->name)); 3417 if (! attId) 3418 return XML_ERROR_NO_MEMORY; 3419 #ifdef XML_ATTR_INFO 3420 currAttInfo->nameStart 3421 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); 3422 currAttInfo->nameEnd 3423 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); 3424 currAttInfo->valueStart = parser->m_parseEndByteIndex 3425 - (parser->m_parseEndPtr - currAtt->valuePtr); 3426 currAttInfo->valueEnd = parser->m_parseEndByteIndex 3427 - (parser->m_parseEndPtr - currAtt->valueEnd); 3428 #endif 3429 /* Detect duplicate attributes by their QNames. This does not work when 3430 namespace processing is turned on and different prefixes for the same 3431 namespace are used. For this case we have a check further down. 3432 */ 3433 if ((attId->name)[-1]) { 3434 if (enc == parser->m_encoding) 3435 parser->m_eventPtr = parser->m_atts[i].name; 3436 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3437 } 3438 (attId->name)[-1] = 1; 3439 appAtts[attIndex++] = attId->name; 3440 if (! parser->m_atts[i].normalized) { 3441 enum XML_Error result; 3442 XML_Bool isCdata = XML_TRUE; 3443 3444 /* figure out whether declared as other than CDATA */ 3445 if (attId->maybeTokenized) { 3446 int j; 3447 for (j = 0; j < nDefaultAtts; j++) { 3448 if (attId == elementType->defaultAtts[j].id) { 3449 isCdata = elementType->defaultAtts[j].isCdata; 3450 break; 3451 } 3452 } 3453 } 3454 3455 /* normalize the attribute value */ 3456 result = storeAttributeValue( 3457 parser, enc, isCdata, parser->m_atts[i].valuePtr, 3458 parser->m_atts[i].valueEnd, &parser->m_tempPool, account); 3459 if (result) 3460 return result; 3461 appAtts[attIndex] = poolStart(&parser->m_tempPool); 3462 poolFinish(&parser->m_tempPool); 3463 } else { 3464 /* the value did not need normalizing */ 3465 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, 3466 parser->m_atts[i].valuePtr, 3467 parser->m_atts[i].valueEnd); 3468 if (appAtts[attIndex] == 0) 3469 return XML_ERROR_NO_MEMORY; 3470 poolFinish(&parser->m_tempPool); 3471 } 3472 /* handle prefixed attribute names */ 3473 if (attId->prefix) { 3474 if (attId->xmlns) { 3475 /* deal with namespace declarations here */ 3476 enum XML_Error result = addBinding(parser, attId->prefix, attId, 3477 appAtts[attIndex], bindingsPtr); 3478 if (result) 3479 return result; 3480 --attIndex; 3481 } else { 3482 /* deal with other prefixed names later */ 3483 attIndex++; 3484 nPrefixes++; 3485 (attId->name)[-1] = 2; 3486 } 3487 } else 3488 attIndex++; 3489 } 3490 3491 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ 3492 parser->m_nSpecifiedAtts = attIndex; 3493 if (elementType->idAtt && (elementType->idAtt->name)[-1]) { 3494 for (i = 0; i < attIndex; i += 2) 3495 if (appAtts[i] == elementType->idAtt->name) { 3496 parser->m_idAttIndex = i; 3497 break; 3498 } 3499 } else 3500 parser->m_idAttIndex = -1; 3501 3502 /* do attribute defaulting */ 3503 for (i = 0; i < nDefaultAtts; i++) { 3504 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; 3505 if (! (da->id->name)[-1] && da->value) { 3506 if (da->id->prefix) { 3507 if (da->id->xmlns) { 3508 enum XML_Error result = addBinding(parser, da->id->prefix, da->id, 3509 da->value, bindingsPtr); 3510 if (result) 3511 return result; 3512 } else { 3513 (da->id->name)[-1] = 2; 3514 nPrefixes++; 3515 appAtts[attIndex++] = da->id->name; 3516 appAtts[attIndex++] = da->value; 3517 } 3518 } else { 3519 (da->id->name)[-1] = 1; 3520 appAtts[attIndex++] = da->id->name; 3521 appAtts[attIndex++] = da->value; 3522 } 3523 } 3524 } 3525 appAtts[attIndex] = 0; 3526 3527 /* expand prefixed attribute names, check for duplicates, 3528 and clear flags that say whether attributes were specified */ 3529 i = 0; 3530 if (nPrefixes) { 3531 int j; /* hash table index */ 3532 unsigned long version = parser->m_nsAttsVersion; 3533 3534 /* Detect and prevent invalid shift */ 3535 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { 3536 return XML_ERROR_NO_MEMORY; 3537 } 3538 3539 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; 3540 unsigned char oldNsAttsPower = parser->m_nsAttsPower; 3541 /* size of hash table must be at least 2 * (# of prefixed attributes) */ 3542 if ((nPrefixes << 1) 3543 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ 3544 NS_ATT *temp; 3545 /* hash table size must also be a power of 2 and >= 8 */ 3546 while (nPrefixes >> parser->m_nsAttsPower++) 3547 ; 3548 if (parser->m_nsAttsPower < 3) 3549 parser->m_nsAttsPower = 3; 3550 3551 /* Detect and prevent invalid shift */ 3552 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { 3553 /* Restore actual size of memory in m_nsAtts */ 3554 parser->m_nsAttsPower = oldNsAttsPower; 3555 return XML_ERROR_NO_MEMORY; 3556 } 3557 3558 nsAttsSize = 1u << parser->m_nsAttsPower; 3559 3560 /* Detect and prevent integer overflow. 3561 * The preprocessor guard addresses the "always false" warning 3562 * from -Wtype-limits on platforms where 3563 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3564 #if UINT_MAX >= SIZE_MAX 3565 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { 3566 /* Restore actual size of memory in m_nsAtts */ 3567 parser->m_nsAttsPower = oldNsAttsPower; 3568 return XML_ERROR_NO_MEMORY; 3569 } 3570 #endif 3571 3572 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, 3573 nsAttsSize * sizeof(NS_ATT)); 3574 if (! temp) { 3575 /* Restore actual size of memory in m_nsAtts */ 3576 parser->m_nsAttsPower = oldNsAttsPower; 3577 return XML_ERROR_NO_MEMORY; 3578 } 3579 parser->m_nsAtts = temp; 3580 version = 0; /* force re-initialization of m_nsAtts hash table */ 3581 } 3582 /* using a version flag saves us from initializing m_nsAtts every time */ 3583 if (! version) { /* initialize version flags when version wraps around */ 3584 version = INIT_ATTS_VERSION; 3585 for (j = nsAttsSize; j != 0;) 3586 parser->m_nsAtts[--j].version = version; 3587 } 3588 parser->m_nsAttsVersion = --version; 3589 3590 /* expand prefixed names and check for duplicates */ 3591 for (; i < attIndex; i += 2) { 3592 const XML_Char *s = appAtts[i]; 3593 if (s[-1] == 2) { /* prefixed */ 3594 ATTRIBUTE_ID *id; 3595 const BINDING *b; 3596 unsigned long uriHash; 3597 struct siphash sip_state; 3598 struct sipkey sip_key; 3599 3600 copy_salt_to_sipkey(parser, &sip_key); 3601 sip24_init(&sip_state, &sip_key); 3602 3603 ((XML_Char *)s)[-1] = 0; /* clear flag */ 3604 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); 3605 if (! id || ! id->prefix) { 3606 /* This code is walking through the appAtts array, dealing 3607 * with (in this case) a prefixed attribute name. To be in 3608 * the array, the attribute must have already been bound, so 3609 * has to have passed through the hash table lookup once 3610 * already. That implies that an entry for it already 3611 * exists, so the lookup above will return a pointer to 3612 * already allocated memory. There is no opportunaity for 3613 * the allocator to fail, so the condition above cannot be 3614 * fulfilled. 3615 * 3616 * Since it is difficult to be certain that the above 3617 * analysis is complete, we retain the test and merely 3618 * remove the code from coverage tests. 3619 */ 3620 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 3621 } 3622 b = id->prefix->binding; 3623 if (! b) 3624 return XML_ERROR_UNBOUND_PREFIX; 3625 3626 for (j = 0; j < b->uriLen; j++) { 3627 const XML_Char c = b->uri[j]; 3628 if (! poolAppendChar(&parser->m_tempPool, c)) 3629 return XML_ERROR_NO_MEMORY; 3630 } 3631 3632 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); 3633 3634 while (*s++ != XML_T(ASCII_COLON)) 3635 ; 3636 3637 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); 3638 3639 do { /* copies null terminator */ 3640 if (! poolAppendChar(&parser->m_tempPool, *s)) 3641 return XML_ERROR_NO_MEMORY; 3642 } while (*s++); 3643 3644 uriHash = (unsigned long)sip24_final(&sip_state); 3645 3646 { /* Check hash table for duplicate of expanded name (uriName). 3647 Derived from code in lookup(parser, HASH_TABLE *table, ...). 3648 */ 3649 unsigned char step = 0; 3650 unsigned long mask = nsAttsSize - 1; 3651 j = uriHash & mask; /* index into hash table */ 3652 while (parser->m_nsAtts[j].version == version) { 3653 /* for speed we compare stored hash values first */ 3654 if (uriHash == parser->m_nsAtts[j].hash) { 3655 const XML_Char *s1 = poolStart(&parser->m_tempPool); 3656 const XML_Char *s2 = parser->m_nsAtts[j].uriName; 3657 /* s1 is null terminated, but not s2 */ 3658 for (; *s1 == *s2 && *s1 != 0; s1++, s2++) 3659 ; 3660 if (*s1 == 0) 3661 return XML_ERROR_DUPLICATE_ATTRIBUTE; 3662 } 3663 if (! step) 3664 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); 3665 j < step ? (j += nsAttsSize - step) : (j -= step); 3666 } 3667 } 3668 3669 if (parser->m_ns_triplets) { /* append namespace separator and prefix */ 3670 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; 3671 s = b->prefix->name; 3672 do { 3673 if (! poolAppendChar(&parser->m_tempPool, *s)) 3674 return XML_ERROR_NO_MEMORY; 3675 } while (*s++); 3676 } 3677 3678 /* store expanded name in attribute list */ 3679 s = poolStart(&parser->m_tempPool); 3680 poolFinish(&parser->m_tempPool); 3681 appAtts[i] = s; 3682 3683 /* fill empty slot with new version, uriName and hash value */ 3684 parser->m_nsAtts[j].version = version; 3685 parser->m_nsAtts[j].hash = uriHash; 3686 parser->m_nsAtts[j].uriName = s; 3687 3688 if (! --nPrefixes) { 3689 i += 2; 3690 break; 3691 } 3692 } else /* not prefixed */ 3693 ((XML_Char *)s)[-1] = 0; /* clear flag */ 3694 } 3695 } 3696 /* clear flags for the remaining attributes */ 3697 for (; i < attIndex; i += 2) 3698 ((XML_Char *)(appAtts[i]))[-1] = 0; 3699 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) 3700 binding->attId->name[-1] = 0; 3701 3702 if (! parser->m_ns) 3703 return XML_ERROR_NONE; 3704 3705 /* expand the element type name */ 3706 if (elementType->prefix) { 3707 binding = elementType->prefix->binding; 3708 if (! binding) 3709 return XML_ERROR_UNBOUND_PREFIX; 3710 localPart = tagNamePtr->str; 3711 while (*localPart++ != XML_T(ASCII_COLON)) 3712 ; 3713 } else if (dtd->defaultPrefix.binding) { 3714 binding = dtd->defaultPrefix.binding; 3715 localPart = tagNamePtr->str; 3716 } else 3717 return XML_ERROR_NONE; 3718 prefixLen = 0; 3719 if (parser->m_ns_triplets && binding->prefix->name) { 3720 for (; binding->prefix->name[prefixLen++];) 3721 ; /* prefixLen includes null terminator */ 3722 } 3723 tagNamePtr->localPart = localPart; 3724 tagNamePtr->uriLen = binding->uriLen; 3725 tagNamePtr->prefix = binding->prefix->name; 3726 tagNamePtr->prefixLen = prefixLen; 3727 for (i = 0; localPart[i++];) 3728 ; /* i includes null terminator */ 3729 3730 /* Detect and prevent integer overflow */ 3731 if (binding->uriLen > INT_MAX - prefixLen 3732 || i > INT_MAX - (binding->uriLen + prefixLen)) { 3733 return XML_ERROR_NO_MEMORY; 3734 } 3735 3736 n = i + binding->uriLen + prefixLen; 3737 if (n > binding->uriAlloc) { 3738 TAG *p; 3739 3740 /* Detect and prevent integer overflow */ 3741 if (n > INT_MAX - EXPAND_SPARE) { 3742 return XML_ERROR_NO_MEMORY; 3743 } 3744 /* Detect and prevent integer overflow. 3745 * The preprocessor guard addresses the "always false" warning 3746 * from -Wtype-limits on platforms where 3747 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3748 #if UINT_MAX >= SIZE_MAX 3749 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 3750 return XML_ERROR_NO_MEMORY; 3751 } 3752 #endif 3753 3754 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); 3755 if (! uri) 3756 return XML_ERROR_NO_MEMORY; 3757 binding->uriAlloc = n + EXPAND_SPARE; 3758 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); 3759 for (p = parser->m_tagStack; p; p = p->parent) 3760 if (p->name.str == binding->uri) 3761 p->name.str = uri; 3762 FREE(parser, binding->uri); 3763 binding->uri = uri; 3764 } 3765 /* if m_namespaceSeparator != '\0' then uri includes it already */ 3766 uri = binding->uri + binding->uriLen; 3767 memcpy(uri, localPart, i * sizeof(XML_Char)); 3768 /* we always have a namespace separator between localPart and prefix */ 3769 if (prefixLen) { 3770 uri += i - 1; 3771 *uri = parser->m_namespaceSeparator; /* replace null terminator */ 3772 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); 3773 } 3774 tagNamePtr->str = binding->uri; 3775 return XML_ERROR_NONE; 3776 } 3777 3778 static XML_Bool 3779 is_rfc3986_uri_char(XML_Char candidate) { 3780 // For the RFC 3986 ANBF grammar see 3781 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A 3782 3783 switch (candidate) { 3784 // From rule "ALPHA" (uppercase half) 3785 case 'A': 3786 case 'B': 3787 case 'C': 3788 case 'D': 3789 case 'E': 3790 case 'F': 3791 case 'G': 3792 case 'H': 3793 case 'I': 3794 case 'J': 3795 case 'K': 3796 case 'L': 3797 case 'M': 3798 case 'N': 3799 case 'O': 3800 case 'P': 3801 case 'Q': 3802 case 'R': 3803 case 'S': 3804 case 'T': 3805 case 'U': 3806 case 'V': 3807 case 'W': 3808 case 'X': 3809 case 'Y': 3810 case 'Z': 3811 3812 // From rule "ALPHA" (lowercase half) 3813 case 'a': 3814 case 'b': 3815 case 'c': 3816 case 'd': 3817 case 'e': 3818 case 'f': 3819 case 'g': 3820 case 'h': 3821 case 'i': 3822 case 'j': 3823 case 'k': 3824 case 'l': 3825 case 'm': 3826 case 'n': 3827 case 'o': 3828 case 'p': 3829 case 'q': 3830 case 'r': 3831 case 's': 3832 case 't': 3833 case 'u': 3834 case 'v': 3835 case 'w': 3836 case 'x': 3837 case 'y': 3838 case 'z': 3839 3840 // From rule "DIGIT" 3841 case '0': 3842 case '1': 3843 case '2': 3844 case '3': 3845 case '4': 3846 case '5': 3847 case '6': 3848 case '7': 3849 case '8': 3850 case '9': 3851 3852 // From rule "pct-encoded" 3853 case '%': 3854 3855 // From rule "unreserved" 3856 case '-': 3857 case '.': 3858 case '_': 3859 case '~': 3860 3861 // From rule "gen-delims" 3862 case ':': 3863 case '/': 3864 case '?': 3865 case '#': 3866 case '[': 3867 case ']': 3868 case '@': 3869 3870 // From rule "sub-delims" 3871 case '!': 3872 case '$': 3873 case '&': 3874 case '\'': 3875 case '(': 3876 case ')': 3877 case '*': 3878 case '+': 3879 case ',': 3880 case ';': 3881 case '=': 3882 return XML_TRUE; 3883 3884 default: 3885 return XML_FALSE; 3886 } 3887 } 3888 3889 /* addBinding() overwrites the value of prefix->binding without checking. 3890 Therefore one must keep track of the old value outside of addBinding(). 3891 */ 3892 static enum XML_Error 3893 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, 3894 const XML_Char *uri, BINDING **bindingsPtr) { 3895 // "http://www.w3.org/XML/1998/namespace" 3896 static const XML_Char xmlNamespace[] 3897 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, 3898 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, 3899 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, 3900 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, 3901 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, 3902 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, 3903 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, 3904 ASCII_e, '\0'}; 3905 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; 3906 // "http://www.w3.org/2000/xmlns/" 3907 static const XML_Char xmlnsNamespace[] 3908 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, 3909 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, 3910 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, 3911 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, 3912 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; 3913 static const int xmlnsLen 3914 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; 3915 3916 XML_Bool mustBeXML = XML_FALSE; 3917 XML_Bool isXML = XML_TRUE; 3918 XML_Bool isXMLNS = XML_TRUE; 3919 3920 BINDING *b; 3921 int len; 3922 3923 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ 3924 if (*uri == XML_T('\0') && prefix->name) 3925 return XML_ERROR_UNDECLARING_PREFIX; 3926 3927 if (prefix->name && prefix->name[0] == XML_T(ASCII_x) 3928 && prefix->name[1] == XML_T(ASCII_m) 3929 && prefix->name[2] == XML_T(ASCII_l)) { 3930 /* Not allowed to bind xmlns */ 3931 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) 3932 && prefix->name[5] == XML_T('\0')) 3933 return XML_ERROR_RESERVED_PREFIX_XMLNS; 3934 3935 if (prefix->name[3] == XML_T('\0')) 3936 mustBeXML = XML_TRUE; 3937 } 3938 3939 for (len = 0; uri[len]; len++) { 3940 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) 3941 isXML = XML_FALSE; 3942 3943 if (! mustBeXML && isXMLNS 3944 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) 3945 isXMLNS = XML_FALSE; 3946 3947 // NOTE: While Expat does not validate namespace URIs against RFC 3986 3948 // today (and is not REQUIRED to do so with regard to the XML 1.0 3949 // namespaces specification) we have to at least make sure, that 3950 // the application on top of Expat (that is likely splitting expanded 3951 // element names ("qualified names") of form 3952 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces 3953 // in its element handler code) cannot be confused by an attacker 3954 // putting additional namespace separator characters into namespace 3955 // declarations. That would be ambiguous and not to be expected. 3956 // 3957 // While the HTML API docs of function XML_ParserCreateNS have been 3958 // advising against use of a namespace separator character that can 3959 // appear in a URI for >20 years now, some widespread applications 3960 // are using URI characters (':' (colon) in particular) for a 3961 // namespace separator, in practice. To keep these applications 3962 // functional, we only reject namespaces URIs containing the 3963 // application-chosen namespace separator if the chosen separator 3964 // is a non-URI character with regard to RFC 3986. 3965 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) 3966 && ! is_rfc3986_uri_char(uri[len])) { 3967 return XML_ERROR_SYNTAX; 3968 } 3969 } 3970 isXML = isXML && len == xmlLen; 3971 isXMLNS = isXMLNS && len == xmlnsLen; 3972 3973 if (mustBeXML != isXML) 3974 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML 3975 : XML_ERROR_RESERVED_NAMESPACE_URI; 3976 3977 if (isXMLNS) 3978 return XML_ERROR_RESERVED_NAMESPACE_URI; 3979 3980 if (parser->m_namespaceSeparator) 3981 len++; 3982 if (parser->m_freeBindingList) { 3983 b = parser->m_freeBindingList; 3984 if (len > b->uriAlloc) { 3985 /* Detect and prevent integer overflow */ 3986 if (len > INT_MAX - EXPAND_SPARE) { 3987 return XML_ERROR_NO_MEMORY; 3988 } 3989 3990 /* Detect and prevent integer overflow. 3991 * The preprocessor guard addresses the "always false" warning 3992 * from -Wtype-limits on platforms where 3993 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 3994 #if UINT_MAX >= SIZE_MAX 3995 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 3996 return XML_ERROR_NO_MEMORY; 3997 } 3998 #endif 3999 4000 XML_Char *temp = (XML_Char *)REALLOC( 4001 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4002 if (temp == NULL) 4003 return XML_ERROR_NO_MEMORY; 4004 b->uri = temp; 4005 b->uriAlloc = len + EXPAND_SPARE; 4006 } 4007 parser->m_freeBindingList = b->nextTagBinding; 4008 } else { 4009 b = (BINDING *)MALLOC(parser, sizeof(BINDING)); 4010 if (! b) 4011 return XML_ERROR_NO_MEMORY; 4012 4013 /* Detect and prevent integer overflow */ 4014 if (len > INT_MAX - EXPAND_SPARE) { 4015 return XML_ERROR_NO_MEMORY; 4016 } 4017 /* Detect and prevent integer overflow. 4018 * The preprocessor guard addresses the "always false" warning 4019 * from -Wtype-limits on platforms where 4020 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 4021 #if UINT_MAX >= SIZE_MAX 4022 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { 4023 return XML_ERROR_NO_MEMORY; 4024 } 4025 #endif 4026 4027 b->uri 4028 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); 4029 if (! b->uri) { 4030 FREE(parser, b); 4031 return XML_ERROR_NO_MEMORY; 4032 } 4033 b->uriAlloc = len + EXPAND_SPARE; 4034 } 4035 b->uriLen = len; 4036 memcpy(b->uri, uri, len * sizeof(XML_Char)); 4037 if (parser->m_namespaceSeparator) 4038 b->uri[len - 1] = parser->m_namespaceSeparator; 4039 b->prefix = prefix; 4040 b->attId = attId; 4041 b->prevPrefixBinding = prefix->binding; 4042 /* NULL binding when default namespace undeclared */ 4043 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) 4044 prefix->binding = NULL; 4045 else 4046 prefix->binding = b; 4047 b->nextTagBinding = *bindingsPtr; 4048 *bindingsPtr = b; 4049 /* if attId == NULL then we are not starting a namespace scope */ 4050 if (attId && parser->m_startNamespaceDeclHandler) 4051 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, 4052 prefix->binding ? uri : 0); 4053 return XML_ERROR_NONE; 4054 } 4055 4056 /* The idea here is to avoid using stack for each CDATA section when 4057 the whole file is parsed with one call. 4058 */ 4059 static enum XML_Error PTRCALL 4060 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, 4061 const char **endPtr) { 4062 enum XML_Error result = doCdataSection( 4063 parser, parser->m_encoding, &start, end, endPtr, 4064 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); 4065 if (result != XML_ERROR_NONE) 4066 return result; 4067 if (start) { 4068 if (parser->m_parentParser) { /* we are parsing an external entity */ 4069 parser->m_processor = externalEntityContentProcessor; 4070 return externalEntityContentProcessor(parser, start, end, endPtr); 4071 } else { 4072 parser->m_processor = contentProcessor; 4073 return contentProcessor(parser, start, end, endPtr); 4074 } 4075 } 4076 return result; 4077 } 4078 4079 /* startPtr gets set to non-null if the section is closed, and to null if 4080 the section is not yet closed. 4081 */ 4082 static enum XML_Error 4083 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4084 const char *end, const char **nextPtr, XML_Bool haveMore, 4085 enum XML_Account account) { 4086 const char *s = *startPtr; 4087 const char **eventPP; 4088 const char **eventEndPP; 4089 if (enc == parser->m_encoding) { 4090 eventPP = &parser->m_eventPtr; 4091 *eventPP = s; 4092 eventEndPP = &parser->m_eventEndPtr; 4093 } else { 4094 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4095 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4096 } 4097 *eventPP = s; 4098 *startPtr = NULL; 4099 4100 for (;;) { 4101 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4102 int tok = XmlCdataSectionTok(enc, s, end, &next); 4103 #if XML_GE == 1 4104 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4105 accountingOnAbort(parser); 4106 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4107 } 4108 #else 4109 UNUSED_P(account); 4110 #endif 4111 *eventEndPP = next; 4112 switch (tok) { 4113 case XML_TOK_CDATA_SECT_CLOSE: 4114 if (parser->m_endCdataSectionHandler) 4115 parser->m_endCdataSectionHandler(parser->m_handlerArg); 4116 /* BEGIN disabled code */ 4117 /* see comment under XML_TOK_CDATA_SECT_OPEN */ 4118 else if ((0) && parser->m_characterDataHandler) 4119 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4120 0); 4121 /* END disabled code */ 4122 else if (parser->m_defaultHandler) 4123 reportDefault(parser, enc, s, next); 4124 *startPtr = next; 4125 *nextPtr = next; 4126 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4127 return XML_ERROR_ABORTED; 4128 else 4129 return XML_ERROR_NONE; 4130 case XML_TOK_DATA_NEWLINE: 4131 if (parser->m_characterDataHandler) { 4132 XML_Char c = 0xA; 4133 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); 4134 } else if (parser->m_defaultHandler) 4135 reportDefault(parser, enc, s, next); 4136 break; 4137 case XML_TOK_DATA_CHARS: { 4138 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; 4139 if (charDataHandler) { 4140 if (MUST_CONVERT(enc, s)) { 4141 for (;;) { 4142 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 4143 const enum XML_Convert_Result convert_res = XmlConvert( 4144 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 4145 *eventEndPP = next; 4146 charDataHandler(parser->m_handlerArg, parser->m_dataBuf, 4147 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 4148 if ((convert_res == XML_CONVERT_COMPLETED) 4149 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 4150 break; 4151 *eventPP = s; 4152 } 4153 } else 4154 charDataHandler(parser->m_handlerArg, (const XML_Char *)s, 4155 (int)((const XML_Char *)next - (const XML_Char *)s)); 4156 } else if (parser->m_defaultHandler) 4157 reportDefault(parser, enc, s, next); 4158 } break; 4159 case XML_TOK_INVALID: 4160 *eventPP = next; 4161 return XML_ERROR_INVALID_TOKEN; 4162 case XML_TOK_PARTIAL_CHAR: 4163 if (haveMore) { 4164 *nextPtr = s; 4165 return XML_ERROR_NONE; 4166 } 4167 return XML_ERROR_PARTIAL_CHAR; 4168 case XML_TOK_PARTIAL: 4169 case XML_TOK_NONE: 4170 if (haveMore) { 4171 *nextPtr = s; 4172 return XML_ERROR_NONE; 4173 } 4174 return XML_ERROR_UNCLOSED_CDATA_SECTION; 4175 default: 4176 /* Every token returned by XmlCdataSectionTok() has its own 4177 * explicit case, so this default case will never be executed. 4178 * We retain it as a safety net and exclude it from the coverage 4179 * statistics. 4180 * 4181 * LCOV_EXCL_START 4182 */ 4183 *eventPP = next; 4184 return XML_ERROR_UNEXPECTED_STATE; 4185 /* LCOV_EXCL_STOP */ 4186 } 4187 4188 *eventPP = s = next; 4189 switch (parser->m_parsingStatus.parsing) { 4190 case XML_SUSPENDED: 4191 *nextPtr = next; 4192 return XML_ERROR_NONE; 4193 case XML_FINISHED: 4194 return XML_ERROR_ABORTED; 4195 default:; 4196 } 4197 } 4198 /* not reached */ 4199 } 4200 4201 #ifdef XML_DTD 4202 4203 /* The idea here is to avoid using stack for each IGNORE section when 4204 the whole file is parsed with one call. 4205 */ 4206 static enum XML_Error PTRCALL 4207 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, 4208 const char **endPtr) { 4209 enum XML_Error result 4210 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, 4211 (XML_Bool)! parser->m_parsingStatus.finalBuffer); 4212 if (result != XML_ERROR_NONE) 4213 return result; 4214 if (start) { 4215 parser->m_processor = prologProcessor; 4216 return prologProcessor(parser, start, end, endPtr); 4217 } 4218 return result; 4219 } 4220 4221 /* startPtr gets set to non-null is the section is closed, and to null 4222 if the section is not yet closed. 4223 */ 4224 static enum XML_Error 4225 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, 4226 const char *end, const char **nextPtr, XML_Bool haveMore) { 4227 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ 4228 int tok; 4229 const char *s = *startPtr; 4230 const char **eventPP; 4231 const char **eventEndPP; 4232 if (enc == parser->m_encoding) { 4233 eventPP = &parser->m_eventPtr; 4234 *eventPP = s; 4235 eventEndPP = &parser->m_eventEndPtr; 4236 } else { 4237 /* It's not entirely clear, but it seems the following two lines 4238 * of code cannot be executed. The only occasions on which 'enc' 4239 * is not 'encoding' are when this function is called 4240 * from the internal entity processing, and IGNORE sections are an 4241 * error in internal entities. 4242 * 4243 * Since it really isn't clear that this is true, we keep the code 4244 * and just remove it from our coverage tests. 4245 * 4246 * LCOV_EXCL_START 4247 */ 4248 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4249 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4250 /* LCOV_EXCL_STOP */ 4251 } 4252 *eventPP = s; 4253 *startPtr = NULL; 4254 tok = XmlIgnoreSectionTok(enc, s, end, &next); 4255 # if XML_GE == 1 4256 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4257 XML_ACCOUNT_DIRECT)) { 4258 accountingOnAbort(parser); 4259 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4260 } 4261 # endif 4262 *eventEndPP = next; 4263 switch (tok) { 4264 case XML_TOK_IGNORE_SECT: 4265 if (parser->m_defaultHandler) 4266 reportDefault(parser, enc, s, next); 4267 *startPtr = next; 4268 *nextPtr = next; 4269 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4270 return XML_ERROR_ABORTED; 4271 else 4272 return XML_ERROR_NONE; 4273 case XML_TOK_INVALID: 4274 *eventPP = next; 4275 return XML_ERROR_INVALID_TOKEN; 4276 case XML_TOK_PARTIAL_CHAR: 4277 if (haveMore) { 4278 *nextPtr = s; 4279 return XML_ERROR_NONE; 4280 } 4281 return XML_ERROR_PARTIAL_CHAR; 4282 case XML_TOK_PARTIAL: 4283 case XML_TOK_NONE: 4284 if (haveMore) { 4285 *nextPtr = s; 4286 return XML_ERROR_NONE; 4287 } 4288 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ 4289 default: 4290 /* All of the tokens that XmlIgnoreSectionTok() returns have 4291 * explicit cases to handle them, so this default case is never 4292 * executed. We keep it as a safety net anyway, and remove it 4293 * from our test coverage statistics. 4294 * 4295 * LCOV_EXCL_START 4296 */ 4297 *eventPP = next; 4298 return XML_ERROR_UNEXPECTED_STATE; 4299 /* LCOV_EXCL_STOP */ 4300 } 4301 /* not reached */ 4302 } 4303 4304 #endif /* XML_DTD */ 4305 4306 static enum XML_Error 4307 initializeEncoding(XML_Parser parser) { 4308 const char *s; 4309 #ifdef XML_UNICODE 4310 char encodingBuf[128]; 4311 /* See comments about `protocolEncodingName` in parserInit() */ 4312 if (! parser->m_protocolEncodingName) 4313 s = NULL; 4314 else { 4315 int i; 4316 for (i = 0; parser->m_protocolEncodingName[i]; i++) { 4317 if (i == sizeof(encodingBuf) - 1 4318 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { 4319 encodingBuf[0] = '\0'; 4320 break; 4321 } 4322 encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; 4323 } 4324 encodingBuf[i] = '\0'; 4325 s = encodingBuf; 4326 } 4327 #else 4328 s = parser->m_protocolEncodingName; 4329 #endif 4330 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( 4331 &parser->m_initEncoding, &parser->m_encoding, s)) 4332 return XML_ERROR_NONE; 4333 return handleUnknownEncoding(parser, parser->m_protocolEncodingName); 4334 } 4335 4336 static enum XML_Error 4337 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, 4338 const char *next) { 4339 const char *encodingName = NULL; 4340 const XML_Char *storedEncName = NULL; 4341 const ENCODING *newEncoding = NULL; 4342 const char *version = NULL; 4343 const char *versionend = NULL; 4344 const XML_Char *storedversion = NULL; 4345 int standalone = -1; 4346 4347 #if XML_GE == 1 4348 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, 4349 XML_ACCOUNT_DIRECT)) { 4350 accountingOnAbort(parser); 4351 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4352 } 4353 #endif 4354 4355 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( 4356 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, 4357 &version, &versionend, &encodingName, &newEncoding, &standalone)) { 4358 if (isGeneralTextEntity) 4359 return XML_ERROR_TEXT_DECL; 4360 else 4361 return XML_ERROR_XML_DECL; 4362 } 4363 if (! isGeneralTextEntity && standalone == 1) { 4364 parser->m_dtd->standalone = XML_TRUE; 4365 #ifdef XML_DTD 4366 if (parser->m_paramEntityParsing 4367 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 4368 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; 4369 #endif /* XML_DTD */ 4370 } 4371 if (parser->m_xmlDeclHandler) { 4372 if (encodingName != NULL) { 4373 storedEncName = poolStoreString( 4374 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4375 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4376 if (! storedEncName) 4377 return XML_ERROR_NO_MEMORY; 4378 poolFinish(&parser->m_temp2Pool); 4379 } 4380 if (version) { 4381 storedversion 4382 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, 4383 versionend - parser->m_encoding->minBytesPerChar); 4384 if (! storedversion) 4385 return XML_ERROR_NO_MEMORY; 4386 } 4387 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, 4388 standalone); 4389 } else if (parser->m_defaultHandler) 4390 reportDefault(parser, parser->m_encoding, s, next); 4391 if (parser->m_protocolEncodingName == NULL) { 4392 if (newEncoding) { 4393 /* Check that the specified encoding does not conflict with what 4394 * the parser has already deduced. Do we have the same number 4395 * of bytes in the smallest representation of a character? If 4396 * this is UTF-16, is it the same endianness? 4397 */ 4398 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar 4399 || (newEncoding->minBytesPerChar == 2 4400 && newEncoding != parser->m_encoding)) { 4401 parser->m_eventPtr = encodingName; 4402 return XML_ERROR_INCORRECT_ENCODING; 4403 } 4404 parser->m_encoding = newEncoding; 4405 } else if (encodingName) { 4406 enum XML_Error result; 4407 if (! storedEncName) { 4408 storedEncName = poolStoreString( 4409 &parser->m_temp2Pool, parser->m_encoding, encodingName, 4410 encodingName + XmlNameLength(parser->m_encoding, encodingName)); 4411 if (! storedEncName) 4412 return XML_ERROR_NO_MEMORY; 4413 } 4414 result = handleUnknownEncoding(parser, storedEncName); 4415 poolClear(&parser->m_temp2Pool); 4416 if (result == XML_ERROR_UNKNOWN_ENCODING) 4417 parser->m_eventPtr = encodingName; 4418 return result; 4419 } 4420 } 4421 4422 if (storedEncName || storedversion) 4423 poolClear(&parser->m_temp2Pool); 4424 4425 return XML_ERROR_NONE; 4426 } 4427 4428 static enum XML_Error 4429 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { 4430 if (parser->m_unknownEncodingHandler) { 4431 XML_Encoding info; 4432 int i; 4433 for (i = 0; i < 256; i++) 4434 info.map[i] = -1; 4435 info.convert = NULL; 4436 info.data = NULL; 4437 info.release = NULL; 4438 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, 4439 encodingName, &info)) { 4440 ENCODING *enc; 4441 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); 4442 if (! parser->m_unknownEncodingMem) { 4443 if (info.release) 4444 info.release(info.data); 4445 return XML_ERROR_NO_MEMORY; 4446 } 4447 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( 4448 parser->m_unknownEncodingMem, info.map, info.convert, info.data); 4449 if (enc) { 4450 parser->m_unknownEncodingData = info.data; 4451 parser->m_unknownEncodingRelease = info.release; 4452 parser->m_encoding = enc; 4453 return XML_ERROR_NONE; 4454 } 4455 } 4456 if (info.release != NULL) 4457 info.release(info.data); 4458 } 4459 return XML_ERROR_UNKNOWN_ENCODING; 4460 } 4461 4462 static enum XML_Error PTRCALL 4463 prologInitProcessor(XML_Parser parser, const char *s, const char *end, 4464 const char **nextPtr) { 4465 enum XML_Error result = initializeEncoding(parser); 4466 if (result != XML_ERROR_NONE) 4467 return result; 4468 parser->m_processor = prologProcessor; 4469 return prologProcessor(parser, s, end, nextPtr); 4470 } 4471 4472 #ifdef XML_DTD 4473 4474 static enum XML_Error PTRCALL 4475 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, 4476 const char **nextPtr) { 4477 enum XML_Error result = initializeEncoding(parser); 4478 if (result != XML_ERROR_NONE) 4479 return result; 4480 4481 /* we know now that XML_Parse(Buffer) has been called, 4482 so we consider the external parameter entity read */ 4483 parser->m_dtd->paramEntityRead = XML_TRUE; 4484 4485 if (parser->m_prologState.inEntityValue) { 4486 parser->m_processor = entityValueInitProcessor; 4487 return entityValueInitProcessor(parser, s, end, nextPtr); 4488 } else { 4489 parser->m_processor = externalParEntProcessor; 4490 return externalParEntProcessor(parser, s, end, nextPtr); 4491 } 4492 } 4493 4494 static enum XML_Error PTRCALL 4495 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, 4496 const char **nextPtr) { 4497 int tok; 4498 const char *start = s; 4499 const char *next = start; 4500 parser->m_eventPtr = start; 4501 4502 for (;;) { 4503 tok = XmlPrologTok(parser->m_encoding, start, end, &next); 4504 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: 4505 - storeEntityValue 4506 - processXmlDecl 4507 */ 4508 parser->m_eventEndPtr = next; 4509 if (tok <= 0) { 4510 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4511 *nextPtr = s; 4512 return XML_ERROR_NONE; 4513 } 4514 switch (tok) { 4515 case XML_TOK_INVALID: 4516 return XML_ERROR_INVALID_TOKEN; 4517 case XML_TOK_PARTIAL: 4518 return XML_ERROR_UNCLOSED_TOKEN; 4519 case XML_TOK_PARTIAL_CHAR: 4520 return XML_ERROR_PARTIAL_CHAR; 4521 case XML_TOK_NONE: /* start == end */ 4522 default: 4523 break; 4524 } 4525 /* found end of entity value - can store it now */ 4526 return storeEntityValue(parser, parser->m_encoding, s, end, 4527 XML_ACCOUNT_DIRECT); 4528 } else if (tok == XML_TOK_XML_DECL) { 4529 enum XML_Error result; 4530 result = processXmlDecl(parser, 0, start, next); 4531 if (result != XML_ERROR_NONE) 4532 return result; 4533 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For 4534 * that to happen, a parameter entity parsing handler must have attempted 4535 * to suspend the parser, which fails and raises an error. The parser can 4536 * be aborted, but can't be suspended. 4537 */ 4538 if (parser->m_parsingStatus.parsing == XML_FINISHED) 4539 return XML_ERROR_ABORTED; 4540 *nextPtr = next; 4541 /* stop scanning for text declaration - we found one */ 4542 parser->m_processor = entityValueProcessor; 4543 return entityValueProcessor(parser, next, end, nextPtr); 4544 } 4545 /* XmlPrologTok has now set the encoding based on the BOM it found, and we 4546 must move s and nextPtr forward to consume the BOM. 4547 4548 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we 4549 would leave the BOM in the buffer and return. On the next call to this 4550 function, our XmlPrologTok call would return XML_TOK_INVALID, since it 4551 is not valid to have multiple BOMs. 4552 */ 4553 else if (tok == XML_TOK_BOM) { 4554 # if XML_GE == 1 4555 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4556 XML_ACCOUNT_DIRECT)) { 4557 accountingOnAbort(parser); 4558 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4559 } 4560 # endif 4561 4562 *nextPtr = next; 4563 s = next; 4564 } 4565 /* If we get this token, we have the start of what might be a 4566 normal tag, but not a declaration (i.e. it doesn't begin with 4567 "<!"). In a DTD context, that isn't legal. 4568 */ 4569 else if (tok == XML_TOK_INSTANCE_START) { 4570 *nextPtr = next; 4571 return XML_ERROR_SYNTAX; 4572 } 4573 start = next; 4574 parser->m_eventPtr = start; 4575 } 4576 } 4577 4578 static enum XML_Error PTRCALL 4579 externalParEntProcessor(XML_Parser parser, const char *s, const char *end, 4580 const char **nextPtr) { 4581 const char *next = s; 4582 int tok; 4583 4584 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4585 if (tok <= 0) { 4586 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4587 *nextPtr = s; 4588 return XML_ERROR_NONE; 4589 } 4590 switch (tok) { 4591 case XML_TOK_INVALID: 4592 return XML_ERROR_INVALID_TOKEN; 4593 case XML_TOK_PARTIAL: 4594 return XML_ERROR_UNCLOSED_TOKEN; 4595 case XML_TOK_PARTIAL_CHAR: 4596 return XML_ERROR_PARTIAL_CHAR; 4597 case XML_TOK_NONE: /* start == end */ 4598 default: 4599 break; 4600 } 4601 } 4602 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. 4603 However, when parsing an external subset, doProlog will not accept a BOM 4604 as valid, and report a syntax error, so we have to skip the BOM, and 4605 account for the BOM bytes. 4606 */ 4607 else if (tok == XML_TOK_BOM) { 4608 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 4609 XML_ACCOUNT_DIRECT)) { 4610 accountingOnAbort(parser); 4611 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4612 } 4613 4614 s = next; 4615 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4616 } 4617 4618 parser->m_processor = prologProcessor; 4619 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4620 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 4621 XML_ACCOUNT_DIRECT); 4622 } 4623 4624 static enum XML_Error PTRCALL 4625 entityValueProcessor(XML_Parser parser, const char *s, const char *end, 4626 const char **nextPtr) { 4627 const char *start = s; 4628 const char *next = s; 4629 const ENCODING *enc = parser->m_encoding; 4630 int tok; 4631 4632 for (;;) { 4633 tok = XmlPrologTok(enc, start, end, &next); 4634 /* Note: These bytes are accounted later in: 4635 - storeEntityValue 4636 */ 4637 if (tok <= 0) { 4638 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { 4639 *nextPtr = s; 4640 return XML_ERROR_NONE; 4641 } 4642 switch (tok) { 4643 case XML_TOK_INVALID: 4644 return XML_ERROR_INVALID_TOKEN; 4645 case XML_TOK_PARTIAL: 4646 return XML_ERROR_UNCLOSED_TOKEN; 4647 case XML_TOK_PARTIAL_CHAR: 4648 return XML_ERROR_PARTIAL_CHAR; 4649 case XML_TOK_NONE: /* start == end */ 4650 default: 4651 break; 4652 } 4653 /* found end of entity value - can store it now */ 4654 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); 4655 } 4656 start = next; 4657 } 4658 } 4659 4660 #endif /* XML_DTD */ 4661 4662 static enum XML_Error PTRCALL 4663 prologProcessor(XML_Parser parser, const char *s, const char *end, 4664 const char **nextPtr) { 4665 const char *next = s; 4666 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 4667 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 4668 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 4669 XML_ACCOUNT_DIRECT); 4670 } 4671 4672 static enum XML_Error 4673 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, 4674 int tok, const char *next, const char **nextPtr, XML_Bool haveMore, 4675 XML_Bool allowClosingDoctype, enum XML_Account account) { 4676 #ifdef XML_DTD 4677 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; 4678 #endif /* XML_DTD */ 4679 static const XML_Char atypeCDATA[] 4680 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; 4681 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; 4682 static const XML_Char atypeIDREF[] 4683 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; 4684 static const XML_Char atypeIDREFS[] 4685 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; 4686 static const XML_Char atypeENTITY[] 4687 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; 4688 static const XML_Char atypeENTITIES[] 4689 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, 4690 ASCII_I, ASCII_E, ASCII_S, '\0'}; 4691 static const XML_Char atypeNMTOKEN[] 4692 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; 4693 static const XML_Char atypeNMTOKENS[] 4694 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, 4695 ASCII_E, ASCII_N, ASCII_S, '\0'}; 4696 static const XML_Char notationPrefix[] 4697 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, 4698 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; 4699 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; 4700 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; 4701 4702 #ifndef XML_DTD 4703 UNUSED_P(account); 4704 #endif 4705 4706 /* save one level of indirection */ 4707 DTD *const dtd = parser->m_dtd; 4708 4709 const char **eventPP; 4710 const char **eventEndPP; 4711 enum XML_Content_Quant quant; 4712 4713 if (enc == parser->m_encoding) { 4714 eventPP = &parser->m_eventPtr; 4715 eventEndPP = &parser->m_eventEndPtr; 4716 } else { 4717 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 4718 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 4719 } 4720 4721 for (;;) { 4722 int role; 4723 XML_Bool handleDefault = XML_TRUE; 4724 *eventPP = s; 4725 *eventEndPP = next; 4726 if (tok <= 0) { 4727 if (haveMore && tok != XML_TOK_INVALID) { 4728 *nextPtr = s; 4729 return XML_ERROR_NONE; 4730 } 4731 switch (tok) { 4732 case XML_TOK_INVALID: 4733 *eventPP = next; 4734 return XML_ERROR_INVALID_TOKEN; 4735 case XML_TOK_PARTIAL: 4736 return XML_ERROR_UNCLOSED_TOKEN; 4737 case XML_TOK_PARTIAL_CHAR: 4738 return XML_ERROR_PARTIAL_CHAR; 4739 case -XML_TOK_PROLOG_S: 4740 tok = -tok; 4741 break; 4742 case XML_TOK_NONE: 4743 #ifdef XML_DTD 4744 /* for internal PE NOT referenced between declarations */ 4745 if (enc != parser->m_encoding 4746 && ! parser->m_openInternalEntities->betweenDecl) { 4747 *nextPtr = s; 4748 return XML_ERROR_NONE; 4749 } 4750 /* WFC: PE Between Declarations - must check that PE contains 4751 complete markup, not only for external PEs, but also for 4752 internal PEs if the reference occurs between declarations. 4753 */ 4754 if (parser->m_isParamEntity || enc != parser->m_encoding) { 4755 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) 4756 == XML_ROLE_ERROR) 4757 return XML_ERROR_INCOMPLETE_PE; 4758 *nextPtr = s; 4759 return XML_ERROR_NONE; 4760 } 4761 #endif /* XML_DTD */ 4762 return XML_ERROR_NO_ELEMENTS; 4763 default: 4764 tok = -tok; 4765 next = end; 4766 break; 4767 } 4768 } 4769 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); 4770 #if XML_GE == 1 4771 switch (role) { 4772 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor 4773 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl 4774 # ifdef XML_DTD 4775 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl 4776 # endif 4777 break; 4778 default: 4779 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { 4780 accountingOnAbort(parser); 4781 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 4782 } 4783 } 4784 #endif 4785 switch (role) { 4786 case XML_ROLE_XML_DECL: { 4787 enum XML_Error result = processXmlDecl(parser, 0, s, next); 4788 if (result != XML_ERROR_NONE) 4789 return result; 4790 enc = parser->m_encoding; 4791 handleDefault = XML_FALSE; 4792 } break; 4793 case XML_ROLE_DOCTYPE_NAME: 4794 if (parser->m_startDoctypeDeclHandler) { 4795 parser->m_doctypeName 4796 = poolStoreString(&parser->m_tempPool, enc, s, next); 4797 if (! parser->m_doctypeName) 4798 return XML_ERROR_NO_MEMORY; 4799 poolFinish(&parser->m_tempPool); 4800 parser->m_doctypePubid = NULL; 4801 handleDefault = XML_FALSE; 4802 } 4803 parser->m_doctypeSysid = NULL; /* always initialize to NULL */ 4804 break; 4805 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: 4806 if (parser->m_startDoctypeDeclHandler) { 4807 parser->m_startDoctypeDeclHandler( 4808 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4809 parser->m_doctypePubid, 1); 4810 parser->m_doctypeName = NULL; 4811 poolClear(&parser->m_tempPool); 4812 handleDefault = XML_FALSE; 4813 } 4814 break; 4815 #ifdef XML_DTD 4816 case XML_ROLE_TEXT_DECL: { 4817 enum XML_Error result = processXmlDecl(parser, 1, s, next); 4818 if (result != XML_ERROR_NONE) 4819 return result; 4820 enc = parser->m_encoding; 4821 handleDefault = XML_FALSE; 4822 } break; 4823 #endif /* XML_DTD */ 4824 case XML_ROLE_DOCTYPE_PUBLIC_ID: 4825 #ifdef XML_DTD 4826 parser->m_useForeignDTD = XML_FALSE; 4827 parser->m_declEntity = (ENTITY *)lookup( 4828 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 4829 if (! parser->m_declEntity) 4830 return XML_ERROR_NO_MEMORY; 4831 #endif /* XML_DTD */ 4832 dtd->hasParamEntityRefs = XML_TRUE; 4833 if (parser->m_startDoctypeDeclHandler) { 4834 XML_Char *pubId; 4835 if (! XmlIsPublicId(enc, s, next, eventPP)) 4836 return XML_ERROR_PUBLICID; 4837 pubId = poolStoreString(&parser->m_tempPool, enc, 4838 s + enc->minBytesPerChar, 4839 next - enc->minBytesPerChar); 4840 if (! pubId) 4841 return XML_ERROR_NO_MEMORY; 4842 normalizePublicId(pubId); 4843 poolFinish(&parser->m_tempPool); 4844 parser->m_doctypePubid = pubId; 4845 handleDefault = XML_FALSE; 4846 goto alreadyChecked; 4847 } 4848 /* fall through */ 4849 case XML_ROLE_ENTITY_PUBLIC_ID: 4850 if (! XmlIsPublicId(enc, s, next, eventPP)) 4851 return XML_ERROR_PUBLICID; 4852 alreadyChecked: 4853 if (dtd->keepProcessing && parser->m_declEntity) { 4854 XML_Char *tem 4855 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 4856 next - enc->minBytesPerChar); 4857 if (! tem) 4858 return XML_ERROR_NO_MEMORY; 4859 normalizePublicId(tem); 4860 parser->m_declEntity->publicId = tem; 4861 poolFinish(&dtd->pool); 4862 /* Don't suppress the default handler if we fell through from 4863 * the XML_ROLE_DOCTYPE_PUBLIC_ID case. 4864 */ 4865 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) 4866 handleDefault = XML_FALSE; 4867 } 4868 break; 4869 case XML_ROLE_DOCTYPE_CLOSE: 4870 if (allowClosingDoctype != XML_TRUE) { 4871 /* Must not close doctype from within expanded parameter entities */ 4872 return XML_ERROR_INVALID_TOKEN; 4873 } 4874 4875 if (parser->m_doctypeName) { 4876 parser->m_startDoctypeDeclHandler( 4877 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, 4878 parser->m_doctypePubid, 0); 4879 poolClear(&parser->m_tempPool); 4880 handleDefault = XML_FALSE; 4881 } 4882 /* parser->m_doctypeSysid will be non-NULL in the case of a previous 4883 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler 4884 was not set, indicating an external subset 4885 */ 4886 #ifdef XML_DTD 4887 if (parser->m_doctypeSysid || parser->m_useForeignDTD) { 4888 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4889 dtd->hasParamEntityRefs = XML_TRUE; 4890 if (parser->m_paramEntityParsing 4891 && parser->m_externalEntityRefHandler) { 4892 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 4893 externalSubsetName, sizeof(ENTITY)); 4894 if (! entity) { 4895 /* The external subset name "#" will have already been 4896 * inserted into the hash table at the start of the 4897 * external entity parsing, so no allocation will happen 4898 * and lookup() cannot fail. 4899 */ 4900 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ 4901 } 4902 if (parser->m_useForeignDTD) 4903 entity->base = parser->m_curBase; 4904 dtd->paramEntityRead = XML_FALSE; 4905 if (! parser->m_externalEntityRefHandler( 4906 parser->m_externalEntityRefHandlerArg, 0, entity->base, 4907 entity->systemId, entity->publicId)) 4908 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4909 if (dtd->paramEntityRead) { 4910 if (! dtd->standalone && parser->m_notStandaloneHandler 4911 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 4912 return XML_ERROR_NOT_STANDALONE; 4913 } 4914 /* if we didn't read the foreign DTD then this means that there 4915 is no external subset and we must reset dtd->hasParamEntityRefs 4916 */ 4917 else if (! parser->m_doctypeSysid) 4918 dtd->hasParamEntityRefs = hadParamEntityRefs; 4919 /* end of DTD - no need to update dtd->keepProcessing */ 4920 } 4921 parser->m_useForeignDTD = XML_FALSE; 4922 } 4923 #endif /* XML_DTD */ 4924 if (parser->m_endDoctypeDeclHandler) { 4925 parser->m_endDoctypeDeclHandler(parser->m_handlerArg); 4926 handleDefault = XML_FALSE; 4927 } 4928 break; 4929 case XML_ROLE_INSTANCE_START: 4930 #ifdef XML_DTD 4931 /* if there is no DOCTYPE declaration then now is the 4932 last chance to read the foreign DTD 4933 */ 4934 if (parser->m_useForeignDTD) { 4935 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; 4936 dtd->hasParamEntityRefs = XML_TRUE; 4937 if (parser->m_paramEntityParsing 4938 && parser->m_externalEntityRefHandler) { 4939 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, 4940 externalSubsetName, sizeof(ENTITY)); 4941 if (! entity) 4942 return XML_ERROR_NO_MEMORY; 4943 entity->base = parser->m_curBase; 4944 dtd->paramEntityRead = XML_FALSE; 4945 if (! parser->m_externalEntityRefHandler( 4946 parser->m_externalEntityRefHandlerArg, 0, entity->base, 4947 entity->systemId, entity->publicId)) 4948 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 4949 if (dtd->paramEntityRead) { 4950 if (! dtd->standalone && parser->m_notStandaloneHandler 4951 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 4952 return XML_ERROR_NOT_STANDALONE; 4953 } 4954 /* if we didn't read the foreign DTD then this means that there 4955 is no external subset and we must reset dtd->hasParamEntityRefs 4956 */ 4957 else 4958 dtd->hasParamEntityRefs = hadParamEntityRefs; 4959 /* end of DTD - no need to update dtd->keepProcessing */ 4960 } 4961 } 4962 #endif /* XML_DTD */ 4963 parser->m_processor = contentProcessor; 4964 return contentProcessor(parser, s, end, nextPtr); 4965 case XML_ROLE_ATTLIST_ELEMENT_NAME: 4966 parser->m_declElementType = getElementType(parser, enc, s, next); 4967 if (! parser->m_declElementType) 4968 return XML_ERROR_NO_MEMORY; 4969 goto checkAttListDeclHandler; 4970 case XML_ROLE_ATTRIBUTE_NAME: 4971 parser->m_declAttributeId = getAttributeId(parser, enc, s, next); 4972 if (! parser->m_declAttributeId) 4973 return XML_ERROR_NO_MEMORY; 4974 parser->m_declAttributeIsCdata = XML_FALSE; 4975 parser->m_declAttributeType = NULL; 4976 parser->m_declAttributeIsId = XML_FALSE; 4977 goto checkAttListDeclHandler; 4978 case XML_ROLE_ATTRIBUTE_TYPE_CDATA: 4979 parser->m_declAttributeIsCdata = XML_TRUE; 4980 parser->m_declAttributeType = atypeCDATA; 4981 goto checkAttListDeclHandler; 4982 case XML_ROLE_ATTRIBUTE_TYPE_ID: 4983 parser->m_declAttributeIsId = XML_TRUE; 4984 parser->m_declAttributeType = atypeID; 4985 goto checkAttListDeclHandler; 4986 case XML_ROLE_ATTRIBUTE_TYPE_IDREF: 4987 parser->m_declAttributeType = atypeIDREF; 4988 goto checkAttListDeclHandler; 4989 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: 4990 parser->m_declAttributeType = atypeIDREFS; 4991 goto checkAttListDeclHandler; 4992 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: 4993 parser->m_declAttributeType = atypeENTITY; 4994 goto checkAttListDeclHandler; 4995 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: 4996 parser->m_declAttributeType = atypeENTITIES; 4997 goto checkAttListDeclHandler; 4998 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: 4999 parser->m_declAttributeType = atypeNMTOKEN; 5000 goto checkAttListDeclHandler; 5001 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: 5002 parser->m_declAttributeType = atypeNMTOKENS; 5003 checkAttListDeclHandler: 5004 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5005 handleDefault = XML_FALSE; 5006 break; 5007 case XML_ROLE_ATTRIBUTE_ENUM_VALUE: 5008 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: 5009 if (dtd->keepProcessing && parser->m_attlistDeclHandler) { 5010 const XML_Char *prefix; 5011 if (parser->m_declAttributeType) { 5012 prefix = enumValueSep; 5013 } else { 5014 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix 5015 : enumValueStart); 5016 } 5017 if (! poolAppendString(&parser->m_tempPool, prefix)) 5018 return XML_ERROR_NO_MEMORY; 5019 if (! poolAppend(&parser->m_tempPool, enc, s, next)) 5020 return XML_ERROR_NO_MEMORY; 5021 parser->m_declAttributeType = parser->m_tempPool.start; 5022 handleDefault = XML_FALSE; 5023 } 5024 break; 5025 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: 5026 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: 5027 if (dtd->keepProcessing) { 5028 if (! defineAttribute(parser->m_declElementType, 5029 parser->m_declAttributeId, 5030 parser->m_declAttributeIsCdata, 5031 parser->m_declAttributeIsId, 0, parser)) 5032 return XML_ERROR_NO_MEMORY; 5033 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5034 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5035 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5036 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5037 /* Enumerated or Notation type */ 5038 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5039 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5040 return XML_ERROR_NO_MEMORY; 5041 parser->m_declAttributeType = parser->m_tempPool.start; 5042 poolFinish(&parser->m_tempPool); 5043 } 5044 *eventEndPP = s; 5045 parser->m_attlistDeclHandler( 5046 parser->m_handlerArg, parser->m_declElementType->name, 5047 parser->m_declAttributeId->name, parser->m_declAttributeType, 0, 5048 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); 5049 handleDefault = XML_FALSE; 5050 } 5051 } 5052 poolClear(&parser->m_tempPool); 5053 break; 5054 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: 5055 case XML_ROLE_FIXED_ATTRIBUTE_VALUE: 5056 if (dtd->keepProcessing) { 5057 const XML_Char *attVal; 5058 enum XML_Error result = storeAttributeValue( 5059 parser, enc, parser->m_declAttributeIsCdata, 5060 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, 5061 XML_ACCOUNT_NONE); 5062 if (result) 5063 return result; 5064 attVal = poolStart(&dtd->pool); 5065 poolFinish(&dtd->pool); 5066 /* ID attributes aren't allowed to have a default */ 5067 if (! defineAttribute( 5068 parser->m_declElementType, parser->m_declAttributeId, 5069 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) 5070 return XML_ERROR_NO_MEMORY; 5071 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { 5072 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) 5073 || (*parser->m_declAttributeType == XML_T(ASCII_N) 5074 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { 5075 /* Enumerated or Notation type */ 5076 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) 5077 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 5078 return XML_ERROR_NO_MEMORY; 5079 parser->m_declAttributeType = parser->m_tempPool.start; 5080 poolFinish(&parser->m_tempPool); 5081 } 5082 *eventEndPP = s; 5083 parser->m_attlistDeclHandler( 5084 parser->m_handlerArg, parser->m_declElementType->name, 5085 parser->m_declAttributeId->name, parser->m_declAttributeType, 5086 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); 5087 poolClear(&parser->m_tempPool); 5088 handleDefault = XML_FALSE; 5089 } 5090 } 5091 break; 5092 case XML_ROLE_ENTITY_VALUE: 5093 if (dtd->keepProcessing) { 5094 #if XML_GE == 1 5095 // This will store the given replacement text in 5096 // parser->m_declEntity->textPtr. 5097 enum XML_Error result 5098 = storeEntityValue(parser, enc, s + enc->minBytesPerChar, 5099 next - enc->minBytesPerChar, XML_ACCOUNT_NONE); 5100 if (parser->m_declEntity) { 5101 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); 5102 parser->m_declEntity->textLen 5103 = (int)(poolLength(&dtd->entityValuePool)); 5104 poolFinish(&dtd->entityValuePool); 5105 if (parser->m_entityDeclHandler) { 5106 *eventEndPP = s; 5107 parser->m_entityDeclHandler( 5108 parser->m_handlerArg, parser->m_declEntity->name, 5109 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5110 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5111 handleDefault = XML_FALSE; 5112 } 5113 } else 5114 poolDiscard(&dtd->entityValuePool); 5115 if (result != XML_ERROR_NONE) 5116 return result; 5117 #else 5118 // This will store "&entity123;" in parser->m_declEntity->textPtr 5119 // to end up as "&entity123;" in the handler. 5120 if (parser->m_declEntity != NULL) { 5121 const enum XML_Error result 5122 = storeSelfEntityValue(parser, parser->m_declEntity); 5123 if (result != XML_ERROR_NONE) 5124 return result; 5125 5126 if (parser->m_entityDeclHandler) { 5127 *eventEndPP = s; 5128 parser->m_entityDeclHandler( 5129 parser->m_handlerArg, parser->m_declEntity->name, 5130 parser->m_declEntity->is_param, parser->m_declEntity->textPtr, 5131 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); 5132 handleDefault = XML_FALSE; 5133 } 5134 } 5135 #endif 5136 } 5137 break; 5138 case XML_ROLE_DOCTYPE_SYSTEM_ID: 5139 #ifdef XML_DTD 5140 parser->m_useForeignDTD = XML_FALSE; 5141 #endif /* XML_DTD */ 5142 dtd->hasParamEntityRefs = XML_TRUE; 5143 if (parser->m_startDoctypeDeclHandler) { 5144 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, 5145 s + enc->minBytesPerChar, 5146 next - enc->minBytesPerChar); 5147 if (parser->m_doctypeSysid == NULL) 5148 return XML_ERROR_NO_MEMORY; 5149 poolFinish(&parser->m_tempPool); 5150 handleDefault = XML_FALSE; 5151 } 5152 #ifdef XML_DTD 5153 else 5154 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL 5155 for the case where no parser->m_startDoctypeDeclHandler is set */ 5156 parser->m_doctypeSysid = externalSubsetName; 5157 #endif /* XML_DTD */ 5158 if (! dtd->standalone 5159 #ifdef XML_DTD 5160 && ! parser->m_paramEntityParsing 5161 #endif /* XML_DTD */ 5162 && parser->m_notStandaloneHandler 5163 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5164 return XML_ERROR_NOT_STANDALONE; 5165 #ifndef XML_DTD 5166 break; 5167 #else /* XML_DTD */ 5168 if (! parser->m_declEntity) { 5169 parser->m_declEntity = (ENTITY *)lookup( 5170 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); 5171 if (! parser->m_declEntity) 5172 return XML_ERROR_NO_MEMORY; 5173 parser->m_declEntity->publicId = NULL; 5174 } 5175 #endif /* XML_DTD */ 5176 /* fall through */ 5177 case XML_ROLE_ENTITY_SYSTEM_ID: 5178 if (dtd->keepProcessing && parser->m_declEntity) { 5179 parser->m_declEntity->systemId 5180 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5181 next - enc->minBytesPerChar); 5182 if (! parser->m_declEntity->systemId) 5183 return XML_ERROR_NO_MEMORY; 5184 parser->m_declEntity->base = parser->m_curBase; 5185 poolFinish(&dtd->pool); 5186 /* Don't suppress the default handler if we fell through from 5187 * the XML_ROLE_DOCTYPE_SYSTEM_ID case. 5188 */ 5189 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) 5190 handleDefault = XML_FALSE; 5191 } 5192 break; 5193 case XML_ROLE_ENTITY_COMPLETE: 5194 #if XML_GE == 0 5195 // This will store "&entity123;" in entity->textPtr 5196 // to end up as "&entity123;" in the handler. 5197 if (parser->m_declEntity != NULL) { 5198 const enum XML_Error result 5199 = storeSelfEntityValue(parser, parser->m_declEntity); 5200 if (result != XML_ERROR_NONE) 5201 return result; 5202 } 5203 #endif 5204 if (dtd->keepProcessing && parser->m_declEntity 5205 && parser->m_entityDeclHandler) { 5206 *eventEndPP = s; 5207 parser->m_entityDeclHandler( 5208 parser->m_handlerArg, parser->m_declEntity->name, 5209 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, 5210 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); 5211 handleDefault = XML_FALSE; 5212 } 5213 break; 5214 case XML_ROLE_ENTITY_NOTATION_NAME: 5215 if (dtd->keepProcessing && parser->m_declEntity) { 5216 parser->m_declEntity->notation 5217 = poolStoreString(&dtd->pool, enc, s, next); 5218 if (! parser->m_declEntity->notation) 5219 return XML_ERROR_NO_MEMORY; 5220 poolFinish(&dtd->pool); 5221 if (parser->m_unparsedEntityDeclHandler) { 5222 *eventEndPP = s; 5223 parser->m_unparsedEntityDeclHandler( 5224 parser->m_handlerArg, parser->m_declEntity->name, 5225 parser->m_declEntity->base, parser->m_declEntity->systemId, 5226 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5227 handleDefault = XML_FALSE; 5228 } else if (parser->m_entityDeclHandler) { 5229 *eventEndPP = s; 5230 parser->m_entityDeclHandler( 5231 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, 5232 parser->m_declEntity->base, parser->m_declEntity->systemId, 5233 parser->m_declEntity->publicId, parser->m_declEntity->notation); 5234 handleDefault = XML_FALSE; 5235 } 5236 } 5237 break; 5238 case XML_ROLE_GENERAL_ENTITY_NAME: { 5239 if (XmlPredefinedEntityName(enc, s, next)) { 5240 parser->m_declEntity = NULL; 5241 break; 5242 } 5243 if (dtd->keepProcessing) { 5244 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5245 if (! name) 5246 return XML_ERROR_NO_MEMORY; 5247 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, 5248 name, sizeof(ENTITY)); 5249 if (! parser->m_declEntity) 5250 return XML_ERROR_NO_MEMORY; 5251 if (parser->m_declEntity->name != name) { 5252 poolDiscard(&dtd->pool); 5253 parser->m_declEntity = NULL; 5254 } else { 5255 poolFinish(&dtd->pool); 5256 parser->m_declEntity->publicId = NULL; 5257 parser->m_declEntity->is_param = XML_FALSE; 5258 /* if we have a parent parser or are reading an internal parameter 5259 entity, then the entity declaration is not considered "internal" 5260 */ 5261 parser->m_declEntity->is_internal 5262 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5263 if (parser->m_entityDeclHandler) 5264 handleDefault = XML_FALSE; 5265 } 5266 } else { 5267 poolDiscard(&dtd->pool); 5268 parser->m_declEntity = NULL; 5269 } 5270 } break; 5271 case XML_ROLE_PARAM_ENTITY_NAME: 5272 #ifdef XML_DTD 5273 if (dtd->keepProcessing) { 5274 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); 5275 if (! name) 5276 return XML_ERROR_NO_MEMORY; 5277 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, 5278 name, sizeof(ENTITY)); 5279 if (! parser->m_declEntity) 5280 return XML_ERROR_NO_MEMORY; 5281 if (parser->m_declEntity->name != name) { 5282 poolDiscard(&dtd->pool); 5283 parser->m_declEntity = NULL; 5284 } else { 5285 poolFinish(&dtd->pool); 5286 parser->m_declEntity->publicId = NULL; 5287 parser->m_declEntity->is_param = XML_TRUE; 5288 /* if we have a parent parser or are reading an internal parameter 5289 entity, then the entity declaration is not considered "internal" 5290 */ 5291 parser->m_declEntity->is_internal 5292 = ! (parser->m_parentParser || parser->m_openInternalEntities); 5293 if (parser->m_entityDeclHandler) 5294 handleDefault = XML_FALSE; 5295 } 5296 } else { 5297 poolDiscard(&dtd->pool); 5298 parser->m_declEntity = NULL; 5299 } 5300 #else /* not XML_DTD */ 5301 parser->m_declEntity = NULL; 5302 #endif /* XML_DTD */ 5303 break; 5304 case XML_ROLE_NOTATION_NAME: 5305 parser->m_declNotationPublicId = NULL; 5306 parser->m_declNotationName = NULL; 5307 if (parser->m_notationDeclHandler) { 5308 parser->m_declNotationName 5309 = poolStoreString(&parser->m_tempPool, enc, s, next); 5310 if (! parser->m_declNotationName) 5311 return XML_ERROR_NO_MEMORY; 5312 poolFinish(&parser->m_tempPool); 5313 handleDefault = XML_FALSE; 5314 } 5315 break; 5316 case XML_ROLE_NOTATION_PUBLIC_ID: 5317 if (! XmlIsPublicId(enc, s, next, eventPP)) 5318 return XML_ERROR_PUBLICID; 5319 if (parser 5320 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ 5321 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, 5322 s + enc->minBytesPerChar, 5323 next - enc->minBytesPerChar); 5324 if (! tem) 5325 return XML_ERROR_NO_MEMORY; 5326 normalizePublicId(tem); 5327 parser->m_declNotationPublicId = tem; 5328 poolFinish(&parser->m_tempPool); 5329 handleDefault = XML_FALSE; 5330 } 5331 break; 5332 case XML_ROLE_NOTATION_SYSTEM_ID: 5333 if (parser->m_declNotationName && parser->m_notationDeclHandler) { 5334 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, 5335 s + enc->minBytesPerChar, 5336 next - enc->minBytesPerChar); 5337 if (! systemId) 5338 return XML_ERROR_NO_MEMORY; 5339 *eventEndPP = s; 5340 parser->m_notationDeclHandler( 5341 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5342 systemId, parser->m_declNotationPublicId); 5343 handleDefault = XML_FALSE; 5344 } 5345 poolClear(&parser->m_tempPool); 5346 break; 5347 case XML_ROLE_NOTATION_NO_SYSTEM_ID: 5348 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { 5349 *eventEndPP = s; 5350 parser->m_notationDeclHandler( 5351 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 5352 0, parser->m_declNotationPublicId); 5353 handleDefault = XML_FALSE; 5354 } 5355 poolClear(&parser->m_tempPool); 5356 break; 5357 case XML_ROLE_ERROR: 5358 switch (tok) { 5359 case XML_TOK_PARAM_ENTITY_REF: 5360 /* PE references in internal subset are 5361 not allowed within declarations. */ 5362 return XML_ERROR_PARAM_ENTITY_REF; 5363 case XML_TOK_XML_DECL: 5364 return XML_ERROR_MISPLACED_XML_PI; 5365 default: 5366 return XML_ERROR_SYNTAX; 5367 } 5368 #ifdef XML_DTD 5369 case XML_ROLE_IGNORE_SECT: { 5370 enum XML_Error result; 5371 if (parser->m_defaultHandler) 5372 reportDefault(parser, enc, s, next); 5373 handleDefault = XML_FALSE; 5374 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); 5375 if (result != XML_ERROR_NONE) 5376 return result; 5377 else if (! next) { 5378 parser->m_processor = ignoreSectionProcessor; 5379 return result; 5380 } 5381 } break; 5382 #endif /* XML_DTD */ 5383 case XML_ROLE_GROUP_OPEN: 5384 if (parser->m_prologState.level >= parser->m_groupSize) { 5385 if (parser->m_groupSize) { 5386 { 5387 /* Detect and prevent integer overflow */ 5388 if (parser->m_groupSize > (unsigned int)(-1) / 2u) { 5389 return XML_ERROR_NO_MEMORY; 5390 } 5391 5392 char *const new_connector = (char *)REALLOC( 5393 parser, parser->m_groupConnector, parser->m_groupSize *= 2); 5394 if (new_connector == NULL) { 5395 parser->m_groupSize /= 2; 5396 return XML_ERROR_NO_MEMORY; 5397 } 5398 parser->m_groupConnector = new_connector; 5399 } 5400 5401 if (dtd->scaffIndex) { 5402 /* Detect and prevent integer overflow. 5403 * The preprocessor guard addresses the "always false" warning 5404 * from -Wtype-limits on platforms where 5405 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 5406 #if UINT_MAX >= SIZE_MAX 5407 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { 5408 return XML_ERROR_NO_MEMORY; 5409 } 5410 #endif 5411 5412 int *const new_scaff_index = (int *)REALLOC( 5413 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); 5414 if (new_scaff_index == NULL) 5415 return XML_ERROR_NO_MEMORY; 5416 dtd->scaffIndex = new_scaff_index; 5417 } 5418 } else { 5419 parser->m_groupConnector 5420 = (char *)MALLOC(parser, parser->m_groupSize = 32); 5421 if (! parser->m_groupConnector) { 5422 parser->m_groupSize = 0; 5423 return XML_ERROR_NO_MEMORY; 5424 } 5425 } 5426 } 5427 parser->m_groupConnector[parser->m_prologState.level] = 0; 5428 if (dtd->in_eldecl) { 5429 int myindex = nextScaffoldPart(parser); 5430 if (myindex < 0) 5431 return XML_ERROR_NO_MEMORY; 5432 assert(dtd->scaffIndex != NULL); 5433 dtd->scaffIndex[dtd->scaffLevel] = myindex; 5434 dtd->scaffLevel++; 5435 dtd->scaffold[myindex].type = XML_CTYPE_SEQ; 5436 if (parser->m_elementDeclHandler) 5437 handleDefault = XML_FALSE; 5438 } 5439 break; 5440 case XML_ROLE_GROUP_SEQUENCE: 5441 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) 5442 return XML_ERROR_SYNTAX; 5443 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; 5444 if (dtd->in_eldecl && parser->m_elementDeclHandler) 5445 handleDefault = XML_FALSE; 5446 break; 5447 case XML_ROLE_GROUP_CHOICE: 5448 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) 5449 return XML_ERROR_SYNTAX; 5450 if (dtd->in_eldecl 5451 && ! parser->m_groupConnector[parser->m_prologState.level] 5452 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5453 != XML_CTYPE_MIXED)) { 5454 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5455 = XML_CTYPE_CHOICE; 5456 if (parser->m_elementDeclHandler) 5457 handleDefault = XML_FALSE; 5458 } 5459 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; 5460 break; 5461 case XML_ROLE_PARAM_ENTITY_REF: 5462 #ifdef XML_DTD 5463 case XML_ROLE_INNER_PARAM_ENTITY_REF: 5464 dtd->hasParamEntityRefs = XML_TRUE; 5465 if (! parser->m_paramEntityParsing) 5466 dtd->keepProcessing = dtd->standalone; 5467 else { 5468 const XML_Char *name; 5469 ENTITY *entity; 5470 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, 5471 next - enc->minBytesPerChar); 5472 if (! name) 5473 return XML_ERROR_NO_MEMORY; 5474 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 5475 poolDiscard(&dtd->pool); 5476 /* first, determine if a check for an existing declaration is needed; 5477 if yes, check that the entity exists, and that it is internal, 5478 otherwise call the skipped entity handler 5479 */ 5480 if (parser->m_prologState.documentEntity 5481 && (dtd->standalone ? ! parser->m_openInternalEntities 5482 : ! dtd->hasParamEntityRefs)) { 5483 if (! entity) 5484 return XML_ERROR_UNDEFINED_ENTITY; 5485 else if (! entity->is_internal) { 5486 /* It's hard to exhaustively search the code to be sure, 5487 * but there doesn't seem to be a way of executing the 5488 * following line. There are two cases: 5489 * 5490 * If 'standalone' is false, the DTD must have no 5491 * parameter entities or we wouldn't have passed the outer 5492 * 'if' statement. That means the only entity in the hash 5493 * table is the external subset name "#" which cannot be 5494 * given as a parameter entity name in XML syntax, so the 5495 * lookup must have returned NULL and we don't even reach 5496 * the test for an internal entity. 5497 * 5498 * If 'standalone' is true, it does not seem to be 5499 * possible to create entities taking this code path that 5500 * are not internal entities, so fail the test above. 5501 * 5502 * Because this analysis is very uncertain, the code is 5503 * being left in place and merely removed from the 5504 * coverage test statistics. 5505 */ 5506 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ 5507 } 5508 } else if (! entity) { 5509 dtd->keepProcessing = dtd->standalone; 5510 /* cannot report skipped entities in declarations */ 5511 if ((role == XML_ROLE_PARAM_ENTITY_REF) 5512 && parser->m_skippedEntityHandler) { 5513 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); 5514 handleDefault = XML_FALSE; 5515 } 5516 break; 5517 } 5518 if (entity->open) 5519 return XML_ERROR_RECURSIVE_ENTITY_REF; 5520 if (entity->textPtr) { 5521 enum XML_Error result; 5522 XML_Bool betweenDecl 5523 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); 5524 result = processInternalEntity(parser, entity, betweenDecl); 5525 if (result != XML_ERROR_NONE) 5526 return result; 5527 handleDefault = XML_FALSE; 5528 break; 5529 } 5530 if (parser->m_externalEntityRefHandler) { 5531 dtd->paramEntityRead = XML_FALSE; 5532 entity->open = XML_TRUE; 5533 entityTrackingOnOpen(parser, entity, __LINE__); 5534 if (! parser->m_externalEntityRefHandler( 5535 parser->m_externalEntityRefHandlerArg, 0, entity->base, 5536 entity->systemId, entity->publicId)) { 5537 entityTrackingOnClose(parser, entity, __LINE__); 5538 entity->open = XML_FALSE; 5539 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 5540 } 5541 entityTrackingOnClose(parser, entity, __LINE__); 5542 entity->open = XML_FALSE; 5543 handleDefault = XML_FALSE; 5544 if (! dtd->paramEntityRead) { 5545 dtd->keepProcessing = dtd->standalone; 5546 break; 5547 } 5548 } else { 5549 dtd->keepProcessing = dtd->standalone; 5550 break; 5551 } 5552 } 5553 #endif /* XML_DTD */ 5554 if (! dtd->standalone && parser->m_notStandaloneHandler 5555 && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) 5556 return XML_ERROR_NOT_STANDALONE; 5557 break; 5558 5559 /* Element declaration stuff */ 5560 5561 case XML_ROLE_ELEMENT_NAME: 5562 if (parser->m_elementDeclHandler) { 5563 parser->m_declElementType = getElementType(parser, enc, s, next); 5564 if (! parser->m_declElementType) 5565 return XML_ERROR_NO_MEMORY; 5566 dtd->scaffLevel = 0; 5567 dtd->scaffCount = 0; 5568 dtd->in_eldecl = XML_TRUE; 5569 handleDefault = XML_FALSE; 5570 } 5571 break; 5572 5573 case XML_ROLE_CONTENT_ANY: 5574 case XML_ROLE_CONTENT_EMPTY: 5575 if (dtd->in_eldecl) { 5576 if (parser->m_elementDeclHandler) { 5577 XML_Content *content 5578 = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); 5579 if (! content) 5580 return XML_ERROR_NO_MEMORY; 5581 content->quant = XML_CQUANT_NONE; 5582 content->name = NULL; 5583 content->numchildren = 0; 5584 content->children = NULL; 5585 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY 5586 : XML_CTYPE_EMPTY); 5587 *eventEndPP = s; 5588 parser->m_elementDeclHandler( 5589 parser->m_handlerArg, parser->m_declElementType->name, content); 5590 handleDefault = XML_FALSE; 5591 } 5592 dtd->in_eldecl = XML_FALSE; 5593 } 5594 break; 5595 5596 case XML_ROLE_CONTENT_PCDATA: 5597 if (dtd->in_eldecl) { 5598 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type 5599 = XML_CTYPE_MIXED; 5600 if (parser->m_elementDeclHandler) 5601 handleDefault = XML_FALSE; 5602 } 5603 break; 5604 5605 case XML_ROLE_CONTENT_ELEMENT: 5606 quant = XML_CQUANT_NONE; 5607 goto elementContent; 5608 case XML_ROLE_CONTENT_ELEMENT_OPT: 5609 quant = XML_CQUANT_OPT; 5610 goto elementContent; 5611 case XML_ROLE_CONTENT_ELEMENT_REP: 5612 quant = XML_CQUANT_REP; 5613 goto elementContent; 5614 case XML_ROLE_CONTENT_ELEMENT_PLUS: 5615 quant = XML_CQUANT_PLUS; 5616 elementContent: 5617 if (dtd->in_eldecl) { 5618 ELEMENT_TYPE *el; 5619 const XML_Char *name; 5620 size_t nameLen; 5621 const char *nxt 5622 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); 5623 int myindex = nextScaffoldPart(parser); 5624 if (myindex < 0) 5625 return XML_ERROR_NO_MEMORY; 5626 dtd->scaffold[myindex].type = XML_CTYPE_NAME; 5627 dtd->scaffold[myindex].quant = quant; 5628 el = getElementType(parser, enc, s, nxt); 5629 if (! el) 5630 return XML_ERROR_NO_MEMORY; 5631 name = el->name; 5632 dtd->scaffold[myindex].name = name; 5633 nameLen = 0; 5634 for (; name[nameLen++];) 5635 ; 5636 5637 /* Detect and prevent integer overflow */ 5638 if (nameLen > UINT_MAX - dtd->contentStringLen) { 5639 return XML_ERROR_NO_MEMORY; 5640 } 5641 5642 dtd->contentStringLen += (unsigned)nameLen; 5643 if (parser->m_elementDeclHandler) 5644 handleDefault = XML_FALSE; 5645 } 5646 break; 5647 5648 case XML_ROLE_GROUP_CLOSE: 5649 quant = XML_CQUANT_NONE; 5650 goto closeGroup; 5651 case XML_ROLE_GROUP_CLOSE_OPT: 5652 quant = XML_CQUANT_OPT; 5653 goto closeGroup; 5654 case XML_ROLE_GROUP_CLOSE_REP: 5655 quant = XML_CQUANT_REP; 5656 goto closeGroup; 5657 case XML_ROLE_GROUP_CLOSE_PLUS: 5658 quant = XML_CQUANT_PLUS; 5659 closeGroup: 5660 if (dtd->in_eldecl) { 5661 if (parser->m_elementDeclHandler) 5662 handleDefault = XML_FALSE; 5663 dtd->scaffLevel--; 5664 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; 5665 if (dtd->scaffLevel == 0) { 5666 if (! handleDefault) { 5667 XML_Content *model = build_model(parser); 5668 if (! model) 5669 return XML_ERROR_NO_MEMORY; 5670 *eventEndPP = s; 5671 parser->m_elementDeclHandler( 5672 parser->m_handlerArg, parser->m_declElementType->name, model); 5673 } 5674 dtd->in_eldecl = XML_FALSE; 5675 dtd->contentStringLen = 0; 5676 } 5677 } 5678 break; 5679 /* End element declaration stuff */ 5680 5681 case XML_ROLE_PI: 5682 if (! reportProcessingInstruction(parser, enc, s, next)) 5683 return XML_ERROR_NO_MEMORY; 5684 handleDefault = XML_FALSE; 5685 break; 5686 case XML_ROLE_COMMENT: 5687 if (! reportComment(parser, enc, s, next)) 5688 return XML_ERROR_NO_MEMORY; 5689 handleDefault = XML_FALSE; 5690 break; 5691 case XML_ROLE_NONE: 5692 switch (tok) { 5693 case XML_TOK_BOM: 5694 handleDefault = XML_FALSE; 5695 break; 5696 } 5697 break; 5698 case XML_ROLE_DOCTYPE_NONE: 5699 if (parser->m_startDoctypeDeclHandler) 5700 handleDefault = XML_FALSE; 5701 break; 5702 case XML_ROLE_ENTITY_NONE: 5703 if (dtd->keepProcessing && parser->m_entityDeclHandler) 5704 handleDefault = XML_FALSE; 5705 break; 5706 case XML_ROLE_NOTATION_NONE: 5707 if (parser->m_notationDeclHandler) 5708 handleDefault = XML_FALSE; 5709 break; 5710 case XML_ROLE_ATTLIST_NONE: 5711 if (dtd->keepProcessing && parser->m_attlistDeclHandler) 5712 handleDefault = XML_FALSE; 5713 break; 5714 case XML_ROLE_ELEMENT_NONE: 5715 if (parser->m_elementDeclHandler) 5716 handleDefault = XML_FALSE; 5717 break; 5718 } /* end of big switch */ 5719 5720 if (handleDefault && parser->m_defaultHandler) 5721 reportDefault(parser, enc, s, next); 5722 5723 switch (parser->m_parsingStatus.parsing) { 5724 case XML_SUSPENDED: 5725 *nextPtr = next; 5726 return XML_ERROR_NONE; 5727 case XML_FINISHED: 5728 return XML_ERROR_ABORTED; 5729 default: 5730 s = next; 5731 tok = XmlPrologTok(enc, s, end, &next); 5732 } 5733 } 5734 /* not reached */ 5735 } 5736 5737 static enum XML_Error PTRCALL 5738 epilogProcessor(XML_Parser parser, const char *s, const char *end, 5739 const char **nextPtr) { 5740 parser->m_processor = epilogProcessor; 5741 parser->m_eventPtr = s; 5742 for (;;) { 5743 const char *next = NULL; 5744 int tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5745 #if XML_GE == 1 5746 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, 5747 XML_ACCOUNT_DIRECT)) { 5748 accountingOnAbort(parser); 5749 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5750 } 5751 #endif 5752 parser->m_eventEndPtr = next; 5753 switch (tok) { 5754 /* report partial linebreak - it might be the last token */ 5755 case -XML_TOK_PROLOG_S: 5756 if (parser->m_defaultHandler) { 5757 reportDefault(parser, parser->m_encoding, s, next); 5758 if (parser->m_parsingStatus.parsing == XML_FINISHED) 5759 return XML_ERROR_ABORTED; 5760 } 5761 *nextPtr = next; 5762 return XML_ERROR_NONE; 5763 case XML_TOK_NONE: 5764 *nextPtr = s; 5765 return XML_ERROR_NONE; 5766 case XML_TOK_PROLOG_S: 5767 if (parser->m_defaultHandler) 5768 reportDefault(parser, parser->m_encoding, s, next); 5769 break; 5770 case XML_TOK_PI: 5771 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) 5772 return XML_ERROR_NO_MEMORY; 5773 break; 5774 case XML_TOK_COMMENT: 5775 if (! reportComment(parser, parser->m_encoding, s, next)) 5776 return XML_ERROR_NO_MEMORY; 5777 break; 5778 case XML_TOK_INVALID: 5779 parser->m_eventPtr = next; 5780 return XML_ERROR_INVALID_TOKEN; 5781 case XML_TOK_PARTIAL: 5782 if (! parser->m_parsingStatus.finalBuffer) { 5783 *nextPtr = s; 5784 return XML_ERROR_NONE; 5785 } 5786 return XML_ERROR_UNCLOSED_TOKEN; 5787 case XML_TOK_PARTIAL_CHAR: 5788 if (! parser->m_parsingStatus.finalBuffer) { 5789 *nextPtr = s; 5790 return XML_ERROR_NONE; 5791 } 5792 return XML_ERROR_PARTIAL_CHAR; 5793 default: 5794 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; 5795 } 5796 parser->m_eventPtr = s = next; 5797 switch (parser->m_parsingStatus.parsing) { 5798 case XML_SUSPENDED: 5799 *nextPtr = next; 5800 return XML_ERROR_NONE; 5801 case XML_FINISHED: 5802 return XML_ERROR_ABORTED; 5803 default:; 5804 } 5805 } 5806 } 5807 5808 static enum XML_Error 5809 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { 5810 const char *textStart, *textEnd; 5811 const char *next; 5812 enum XML_Error result; 5813 OPEN_INTERNAL_ENTITY *openEntity; 5814 5815 if (parser->m_freeInternalEntities) { 5816 openEntity = parser->m_freeInternalEntities; 5817 parser->m_freeInternalEntities = openEntity->next; 5818 } else { 5819 openEntity 5820 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); 5821 if (! openEntity) 5822 return XML_ERROR_NO_MEMORY; 5823 } 5824 entity->open = XML_TRUE; 5825 #if XML_GE == 1 5826 entityTrackingOnOpen(parser, entity, __LINE__); 5827 #endif 5828 entity->processed = 0; 5829 openEntity->next = parser->m_openInternalEntities; 5830 parser->m_openInternalEntities = openEntity; 5831 openEntity->entity = entity; 5832 openEntity->startTagLevel = parser->m_tagLevel; 5833 openEntity->betweenDecl = betweenDecl; 5834 openEntity->internalEventPtr = NULL; 5835 openEntity->internalEventEndPtr = NULL; 5836 textStart = (const char *)entity->textPtr; 5837 textEnd = (const char *)(entity->textPtr + entity->textLen); 5838 /* Set a safe default value in case 'next' does not get set */ 5839 next = textStart; 5840 5841 #ifdef XML_DTD 5842 if (entity->is_param) { 5843 int tok 5844 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 5845 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 5846 tok, next, &next, XML_FALSE, XML_FALSE, 5847 XML_ACCOUNT_ENTITY_EXPANSION); 5848 } else 5849 #endif /* XML_DTD */ 5850 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, 5851 textStart, textEnd, &next, XML_FALSE, 5852 XML_ACCOUNT_ENTITY_EXPANSION); 5853 5854 if (result == XML_ERROR_NONE) { 5855 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5856 entity->processed = (int)(next - textStart); 5857 parser->m_processor = internalEntityProcessor; 5858 } else if (parser->m_openInternalEntities->entity == entity) { 5859 #if XML_GE == 1 5860 entityTrackingOnClose(parser, entity, __LINE__); 5861 #endif /* XML_GE == 1 */ 5862 entity->open = XML_FALSE; 5863 parser->m_openInternalEntities = openEntity->next; 5864 /* put openEntity back in list of free instances */ 5865 openEntity->next = parser->m_freeInternalEntities; 5866 parser->m_freeInternalEntities = openEntity; 5867 } 5868 } 5869 return result; 5870 } 5871 5872 static enum XML_Error PTRCALL 5873 internalEntityProcessor(XML_Parser parser, const char *s, const char *end, 5874 const char **nextPtr) { 5875 ENTITY *entity; 5876 const char *textStart, *textEnd; 5877 const char *next; 5878 enum XML_Error result; 5879 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; 5880 if (! openEntity) 5881 return XML_ERROR_UNEXPECTED_STATE; 5882 5883 entity = openEntity->entity; 5884 textStart = ((const char *)entity->textPtr) + entity->processed; 5885 textEnd = (const char *)(entity->textPtr + entity->textLen); 5886 /* Set a safe default value in case 'next' does not get set */ 5887 next = textStart; 5888 5889 #ifdef XML_DTD 5890 if (entity->is_param) { 5891 int tok 5892 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); 5893 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, 5894 tok, next, &next, XML_FALSE, XML_TRUE, 5895 XML_ACCOUNT_ENTITY_EXPANSION); 5896 } else 5897 #endif /* XML_DTD */ 5898 result = doContent(parser, openEntity->startTagLevel, 5899 parser->m_internalEncoding, textStart, textEnd, &next, 5900 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); 5901 5902 if (result != XML_ERROR_NONE) 5903 return result; 5904 5905 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5906 entity->processed = (int)(next - (const char *)entity->textPtr); 5907 return result; 5908 } 5909 5910 #if XML_GE == 1 5911 entityTrackingOnClose(parser, entity, __LINE__); 5912 #endif 5913 entity->open = XML_FALSE; 5914 parser->m_openInternalEntities = openEntity->next; 5915 /* put openEntity back in list of free instances */ 5916 openEntity->next = parser->m_freeInternalEntities; 5917 parser->m_freeInternalEntities = openEntity; 5918 5919 // If there are more open entities we want to stop right here and have the 5920 // upcoming call to XML_ResumeParser continue with entity content, or it would 5921 // be ignored altogether. 5922 if (parser->m_openInternalEntities != NULL 5923 && parser->m_parsingStatus.parsing == XML_SUSPENDED) { 5924 return XML_ERROR_NONE; 5925 } 5926 5927 #ifdef XML_DTD 5928 if (entity->is_param) { 5929 int tok; 5930 parser->m_processor = prologProcessor; 5931 tok = XmlPrologTok(parser->m_encoding, s, end, &next); 5932 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, 5933 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, 5934 XML_ACCOUNT_DIRECT); 5935 } else 5936 #endif /* XML_DTD */ 5937 { 5938 parser->m_processor = contentProcessor; 5939 /* see externalEntityContentProcessor vs contentProcessor */ 5940 result = doContent(parser, parser->m_parentParser ? 1 : 0, 5941 parser->m_encoding, s, end, nextPtr, 5942 (XML_Bool)! parser->m_parsingStatus.finalBuffer, 5943 XML_ACCOUNT_DIRECT); 5944 if (result == XML_ERROR_NONE) { 5945 if (! storeRawNames(parser)) 5946 return XML_ERROR_NO_MEMORY; 5947 } 5948 return result; 5949 } 5950 } 5951 5952 static enum XML_Error PTRCALL 5953 errorProcessor(XML_Parser parser, const char *s, const char *end, 5954 const char **nextPtr) { 5955 UNUSED_P(s); 5956 UNUSED_P(end); 5957 UNUSED_P(nextPtr); 5958 return parser->m_errorCode; 5959 } 5960 5961 static enum XML_Error 5962 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 5963 const char *ptr, const char *end, STRING_POOL *pool, 5964 enum XML_Account account) { 5965 enum XML_Error result 5966 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); 5967 if (result) 5968 return result; 5969 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) 5970 poolChop(pool); 5971 if (! poolAppendChar(pool, XML_T('\0'))) 5972 return XML_ERROR_NO_MEMORY; 5973 return XML_ERROR_NONE; 5974 } 5975 5976 static enum XML_Error 5977 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, 5978 const char *ptr, const char *end, STRING_POOL *pool, 5979 enum XML_Account account) { 5980 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 5981 #ifndef XML_DTD 5982 UNUSED_P(account); 5983 #endif 5984 5985 for (;;) { 5986 const char *next 5987 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ 5988 int tok = XmlAttributeValueTok(enc, ptr, end, &next); 5989 #if XML_GE == 1 5990 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { 5991 accountingOnAbort(parser); 5992 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 5993 } 5994 #endif 5995 switch (tok) { 5996 case XML_TOK_NONE: 5997 return XML_ERROR_NONE; 5998 case XML_TOK_INVALID: 5999 if (enc == parser->m_encoding) 6000 parser->m_eventPtr = next; 6001 return XML_ERROR_INVALID_TOKEN; 6002 case XML_TOK_PARTIAL: 6003 if (enc == parser->m_encoding) 6004 parser->m_eventPtr = ptr; 6005 return XML_ERROR_INVALID_TOKEN; 6006 case XML_TOK_CHAR_REF: { 6007 XML_Char buf[XML_ENCODE_MAX]; 6008 int i; 6009 int n = XmlCharRefNumber(enc, ptr); 6010 if (n < 0) { 6011 if (enc == parser->m_encoding) 6012 parser->m_eventPtr = ptr; 6013 return XML_ERROR_BAD_CHAR_REF; 6014 } 6015 if (! isCdata && n == 0x20 /* space */ 6016 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6017 break; 6018 n = XmlEncode(n, (ICHAR *)buf); 6019 /* The XmlEncode() functions can never return 0 here. That 6020 * error return happens if the code point passed in is either 6021 * negative or greater than or equal to 0x110000. The 6022 * XmlCharRefNumber() functions will all return a number 6023 * strictly less than 0x110000 or a negative value if an error 6024 * occurred. The negative value is intercepted above, so 6025 * XmlEncode() is never passed a value it might return an 6026 * error for. 6027 */ 6028 for (i = 0; i < n; i++) { 6029 if (! poolAppendChar(pool, buf[i])) 6030 return XML_ERROR_NO_MEMORY; 6031 } 6032 } break; 6033 case XML_TOK_DATA_CHARS: 6034 if (! poolAppend(pool, enc, ptr, next)) 6035 return XML_ERROR_NO_MEMORY; 6036 break; 6037 case XML_TOK_TRAILING_CR: 6038 next = ptr + enc->minBytesPerChar; 6039 /* fall through */ 6040 case XML_TOK_ATTRIBUTE_VALUE_S: 6041 case XML_TOK_DATA_NEWLINE: 6042 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) 6043 break; 6044 if (! poolAppendChar(pool, 0x20)) 6045 return XML_ERROR_NO_MEMORY; 6046 break; 6047 case XML_TOK_ENTITY_REF: { 6048 const XML_Char *name; 6049 ENTITY *entity; 6050 char checkEntityDecl; 6051 XML_Char ch = (XML_Char)XmlPredefinedEntityName( 6052 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); 6053 if (ch) { 6054 #if XML_GE == 1 6055 /* NOTE: We are replacing 4-6 characters original input for 1 character 6056 * so there is no amplification and hence recording without 6057 * protection. */ 6058 accountingDiffTolerated(parser, tok, (char *)&ch, 6059 ((char *)&ch) + sizeof(XML_Char), __LINE__, 6060 XML_ACCOUNT_ENTITY_EXPANSION); 6061 #endif /* XML_GE == 1 */ 6062 if (! poolAppendChar(pool, ch)) 6063 return XML_ERROR_NO_MEMORY; 6064 break; 6065 } 6066 name = poolStoreString(&parser->m_temp2Pool, enc, 6067 ptr + enc->minBytesPerChar, 6068 next - enc->minBytesPerChar); 6069 if (! name) 6070 return XML_ERROR_NO_MEMORY; 6071 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); 6072 poolDiscard(&parser->m_temp2Pool); 6073 /* First, determine if a check for an existing declaration is needed; 6074 if yes, check that the entity exists, and that it is internal. 6075 */ 6076 if (pool == &dtd->pool) /* are we called from prolog? */ 6077 checkEntityDecl = 6078 #ifdef XML_DTD 6079 parser->m_prologState.documentEntity && 6080 #endif /* XML_DTD */ 6081 (dtd->standalone ? ! parser->m_openInternalEntities 6082 : ! dtd->hasParamEntityRefs); 6083 else /* if (pool == &parser->m_tempPool): we are called from content */ 6084 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; 6085 if (checkEntityDecl) { 6086 if (! entity) 6087 return XML_ERROR_UNDEFINED_ENTITY; 6088 else if (! entity->is_internal) 6089 return XML_ERROR_ENTITY_DECLARED_IN_PE; 6090 } else if (! entity) { 6091 /* Cannot report skipped entity here - see comments on 6092 parser->m_skippedEntityHandler. 6093 if (parser->m_skippedEntityHandler) 6094 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6095 */ 6096 /* Cannot call the default handler because this would be 6097 out of sync with the call to the startElementHandler. 6098 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) 6099 reportDefault(parser, enc, ptr, next); 6100 */ 6101 break; 6102 } 6103 if (entity->open) { 6104 if (enc == parser->m_encoding) { 6105 /* It does not appear that this line can be executed. 6106 * 6107 * The "if (entity->open)" check catches recursive entity 6108 * definitions. In order to be called with an open 6109 * entity, it must have gone through this code before and 6110 * been through the recursive call to 6111 * appendAttributeValue() some lines below. That call 6112 * sets the local encoding ("enc") to the parser's 6113 * internal encoding (internal_utf8 or internal_utf16), 6114 * which can never be the same as the principle encoding. 6115 * It doesn't appear there is another code path that gets 6116 * here with entity->open being TRUE. 6117 * 6118 * Since it is not certain that this logic is watertight, 6119 * we keep the line and merely exclude it from coverage 6120 * tests. 6121 */ 6122 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ 6123 } 6124 return XML_ERROR_RECURSIVE_ENTITY_REF; 6125 } 6126 if (entity->notation) { 6127 if (enc == parser->m_encoding) 6128 parser->m_eventPtr = ptr; 6129 return XML_ERROR_BINARY_ENTITY_REF; 6130 } 6131 if (! entity->textPtr) { 6132 if (enc == parser->m_encoding) 6133 parser->m_eventPtr = ptr; 6134 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; 6135 } else { 6136 enum XML_Error result; 6137 const XML_Char *textEnd = entity->textPtr + entity->textLen; 6138 entity->open = XML_TRUE; 6139 #if XML_GE == 1 6140 entityTrackingOnOpen(parser, entity, __LINE__); 6141 #endif 6142 result = appendAttributeValue(parser, parser->m_internalEncoding, 6143 isCdata, (const char *)entity->textPtr, 6144 (const char *)textEnd, pool, 6145 XML_ACCOUNT_ENTITY_EXPANSION); 6146 #if XML_GE == 1 6147 entityTrackingOnClose(parser, entity, __LINE__); 6148 #endif 6149 entity->open = XML_FALSE; 6150 if (result) 6151 return result; 6152 } 6153 } break; 6154 default: 6155 /* The only token returned by XmlAttributeValueTok() that does 6156 * not have an explicit case here is XML_TOK_PARTIAL_CHAR. 6157 * Getting that would require an entity name to contain an 6158 * incomplete XML character (e.g. \xE2\x82); however previous 6159 * tokenisers will have already recognised and rejected such 6160 * names before XmlAttributeValueTok() gets a look-in. This 6161 * default case should be retained as a safety net, but the code 6162 * excluded from coverage tests. 6163 * 6164 * LCOV_EXCL_START 6165 */ 6166 if (enc == parser->m_encoding) 6167 parser->m_eventPtr = ptr; 6168 return XML_ERROR_UNEXPECTED_STATE; 6169 /* LCOV_EXCL_STOP */ 6170 } 6171 ptr = next; 6172 } 6173 /* not reached */ 6174 } 6175 6176 #if XML_GE == 1 6177 static enum XML_Error 6178 storeEntityValue(XML_Parser parser, const ENCODING *enc, 6179 const char *entityTextPtr, const char *entityTextEnd, 6180 enum XML_Account account) { 6181 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6182 STRING_POOL *pool = &(dtd->entityValuePool); 6183 enum XML_Error result = XML_ERROR_NONE; 6184 # ifdef XML_DTD 6185 int oldInEntityValue = parser->m_prologState.inEntityValue; 6186 parser->m_prologState.inEntityValue = 1; 6187 # else 6188 UNUSED_P(account); 6189 # endif /* XML_DTD */ 6190 /* never return Null for the value argument in EntityDeclHandler, 6191 since this would indicate an external entity; therefore we 6192 have to make sure that entityValuePool.start is not null */ 6193 if (! pool->blocks) { 6194 if (! poolGrow(pool)) 6195 return XML_ERROR_NO_MEMORY; 6196 } 6197 6198 for (;;) { 6199 const char *next 6200 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ 6201 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); 6202 6203 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, 6204 account)) { 6205 accountingOnAbort(parser); 6206 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; 6207 goto endEntityValue; 6208 } 6209 6210 switch (tok) { 6211 case XML_TOK_PARAM_ENTITY_REF: 6212 # ifdef XML_DTD 6213 if (parser->m_isParamEntity || enc != parser->m_encoding) { 6214 const XML_Char *name; 6215 ENTITY *entity; 6216 name = poolStoreString(&parser->m_tempPool, enc, 6217 entityTextPtr + enc->minBytesPerChar, 6218 next - enc->minBytesPerChar); 6219 if (! name) { 6220 result = XML_ERROR_NO_MEMORY; 6221 goto endEntityValue; 6222 } 6223 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); 6224 poolDiscard(&parser->m_tempPool); 6225 if (! entity) { 6226 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ 6227 /* cannot report skipped entity here - see comments on 6228 parser->m_skippedEntityHandler 6229 if (parser->m_skippedEntityHandler) 6230 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); 6231 */ 6232 dtd->keepProcessing = dtd->standalone; 6233 goto endEntityValue; 6234 } 6235 if (entity->open) { 6236 if (enc == parser->m_encoding) 6237 parser->m_eventPtr = entityTextPtr; 6238 result = XML_ERROR_RECURSIVE_ENTITY_REF; 6239 goto endEntityValue; 6240 } 6241 if (entity->systemId) { 6242 if (parser->m_externalEntityRefHandler) { 6243 dtd->paramEntityRead = XML_FALSE; 6244 entity->open = XML_TRUE; 6245 entityTrackingOnOpen(parser, entity, __LINE__); 6246 if (! parser->m_externalEntityRefHandler( 6247 parser->m_externalEntityRefHandlerArg, 0, entity->base, 6248 entity->systemId, entity->publicId)) { 6249 entityTrackingOnClose(parser, entity, __LINE__); 6250 entity->open = XML_FALSE; 6251 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; 6252 goto endEntityValue; 6253 } 6254 entityTrackingOnClose(parser, entity, __LINE__); 6255 entity->open = XML_FALSE; 6256 if (! dtd->paramEntityRead) 6257 dtd->keepProcessing = dtd->standalone; 6258 } else 6259 dtd->keepProcessing = dtd->standalone; 6260 } else { 6261 entity->open = XML_TRUE; 6262 entityTrackingOnOpen(parser, entity, __LINE__); 6263 result = storeEntityValue( 6264 parser, parser->m_internalEncoding, (const char *)entity->textPtr, 6265 (const char *)(entity->textPtr + entity->textLen), 6266 XML_ACCOUNT_ENTITY_EXPANSION); 6267 entityTrackingOnClose(parser, entity, __LINE__); 6268 entity->open = XML_FALSE; 6269 if (result) 6270 goto endEntityValue; 6271 } 6272 break; 6273 } 6274 # endif /* XML_DTD */ 6275 /* In the internal subset, PE references are not legal 6276 within markup declarations, e.g entity values in this case. */ 6277 parser->m_eventPtr = entityTextPtr; 6278 result = XML_ERROR_PARAM_ENTITY_REF; 6279 goto endEntityValue; 6280 case XML_TOK_NONE: 6281 result = XML_ERROR_NONE; 6282 goto endEntityValue; 6283 case XML_TOK_ENTITY_REF: 6284 case XML_TOK_DATA_CHARS: 6285 if (! poolAppend(pool, enc, entityTextPtr, next)) { 6286 result = XML_ERROR_NO_MEMORY; 6287 goto endEntityValue; 6288 } 6289 break; 6290 case XML_TOK_TRAILING_CR: 6291 next = entityTextPtr + enc->minBytesPerChar; 6292 /* fall through */ 6293 case XML_TOK_DATA_NEWLINE: 6294 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6295 result = XML_ERROR_NO_MEMORY; 6296 goto endEntityValue; 6297 } 6298 *(pool->ptr)++ = 0xA; 6299 break; 6300 case XML_TOK_CHAR_REF: { 6301 XML_Char buf[XML_ENCODE_MAX]; 6302 int i; 6303 int n = XmlCharRefNumber(enc, entityTextPtr); 6304 if (n < 0) { 6305 if (enc == parser->m_encoding) 6306 parser->m_eventPtr = entityTextPtr; 6307 result = XML_ERROR_BAD_CHAR_REF; 6308 goto endEntityValue; 6309 } 6310 n = XmlEncode(n, (ICHAR *)buf); 6311 /* The XmlEncode() functions can never return 0 here. That 6312 * error return happens if the code point passed in is either 6313 * negative or greater than or equal to 0x110000. The 6314 * XmlCharRefNumber() functions will all return a number 6315 * strictly less than 0x110000 or a negative value if an error 6316 * occurred. The negative value is intercepted above, so 6317 * XmlEncode() is never passed a value it might return an 6318 * error for. 6319 */ 6320 for (i = 0; i < n; i++) { 6321 if (pool->end == pool->ptr && ! poolGrow(pool)) { 6322 result = XML_ERROR_NO_MEMORY; 6323 goto endEntityValue; 6324 } 6325 *(pool->ptr)++ = buf[i]; 6326 } 6327 } break; 6328 case XML_TOK_PARTIAL: 6329 if (enc == parser->m_encoding) 6330 parser->m_eventPtr = entityTextPtr; 6331 result = XML_ERROR_INVALID_TOKEN; 6332 goto endEntityValue; 6333 case XML_TOK_INVALID: 6334 if (enc == parser->m_encoding) 6335 parser->m_eventPtr = next; 6336 result = XML_ERROR_INVALID_TOKEN; 6337 goto endEntityValue; 6338 default: 6339 /* This default case should be unnecessary -- all the tokens 6340 * that XmlEntityValueTok() can return have their own explicit 6341 * cases -- but should be retained for safety. We do however 6342 * exclude it from the coverage statistics. 6343 * 6344 * LCOV_EXCL_START 6345 */ 6346 if (enc == parser->m_encoding) 6347 parser->m_eventPtr = entityTextPtr; 6348 result = XML_ERROR_UNEXPECTED_STATE; 6349 goto endEntityValue; 6350 /* LCOV_EXCL_STOP */ 6351 } 6352 entityTextPtr = next; 6353 } 6354 endEntityValue: 6355 # ifdef XML_DTD 6356 parser->m_prologState.inEntityValue = oldInEntityValue; 6357 # endif /* XML_DTD */ 6358 return result; 6359 } 6360 6361 #else /* XML_GE == 0 */ 6362 6363 static enum XML_Error 6364 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { 6365 // This will store "&entity123;" in entity->textPtr 6366 // to end up as "&entity123;" in the handler. 6367 const char *const entity_start = "&"; 6368 const char *const entity_end = ";"; 6369 6370 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool); 6371 if (! poolAppendString(pool, entity_start) 6372 || ! poolAppendString(pool, entity->name) 6373 || ! poolAppendString(pool, entity_end)) { 6374 poolDiscard(pool); 6375 return XML_ERROR_NO_MEMORY; 6376 } 6377 6378 entity->textPtr = poolStart(pool); 6379 entity->textLen = (int)(poolLength(pool)); 6380 poolFinish(pool); 6381 6382 return XML_ERROR_NONE; 6383 } 6384 6385 #endif /* XML_GE == 0 */ 6386 6387 static void FASTCALL 6388 normalizeLines(XML_Char *s) { 6389 XML_Char *p; 6390 for (;; s++) { 6391 if (*s == XML_T('\0')) 6392 return; 6393 if (*s == 0xD) 6394 break; 6395 } 6396 p = s; 6397 do { 6398 if (*s == 0xD) { 6399 *p++ = 0xA; 6400 if (*++s == 0xA) 6401 s++; 6402 } else 6403 *p++ = *s++; 6404 } while (*s); 6405 *p = XML_T('\0'); 6406 } 6407 6408 static int 6409 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, 6410 const char *start, const char *end) { 6411 const XML_Char *target; 6412 XML_Char *data; 6413 const char *tem; 6414 if (! parser->m_processingInstructionHandler) { 6415 if (parser->m_defaultHandler) 6416 reportDefault(parser, enc, start, end); 6417 return 1; 6418 } 6419 start += enc->minBytesPerChar * 2; 6420 tem = start + XmlNameLength(enc, start); 6421 target = poolStoreString(&parser->m_tempPool, enc, start, tem); 6422 if (! target) 6423 return 0; 6424 poolFinish(&parser->m_tempPool); 6425 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), 6426 end - enc->minBytesPerChar * 2); 6427 if (! data) 6428 return 0; 6429 normalizeLines(data); 6430 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); 6431 poolClear(&parser->m_tempPool); 6432 return 1; 6433 } 6434 6435 static int 6436 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, 6437 const char *end) { 6438 XML_Char *data; 6439 if (! parser->m_commentHandler) { 6440 if (parser->m_defaultHandler) 6441 reportDefault(parser, enc, start, end); 6442 return 1; 6443 } 6444 data = poolStoreString(&parser->m_tempPool, enc, 6445 start + enc->minBytesPerChar * 4, 6446 end - enc->minBytesPerChar * 3); 6447 if (! data) 6448 return 0; 6449 normalizeLines(data); 6450 parser->m_commentHandler(parser->m_handlerArg, data); 6451 poolClear(&parser->m_tempPool); 6452 return 1; 6453 } 6454 6455 static void 6456 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, 6457 const char *end) { 6458 if (MUST_CONVERT(enc, s)) { 6459 enum XML_Convert_Result convert_res; 6460 const char **eventPP; 6461 const char **eventEndPP; 6462 if (enc == parser->m_encoding) { 6463 eventPP = &parser->m_eventPtr; 6464 eventEndPP = &parser->m_eventEndPtr; 6465 } else { 6466 /* To get here, two things must be true; the parser must be 6467 * using a character encoding that is not the same as the 6468 * encoding passed in, and the encoding passed in must need 6469 * conversion to the internal format (UTF-8 unless XML_UNICODE 6470 * is defined). The only occasions on which the encoding passed 6471 * in is not the same as the parser's encoding are when it is 6472 * the internal encoding (e.g. a previously defined parameter 6473 * entity, already converted to internal format). This by 6474 * definition doesn't need conversion, so the whole branch never 6475 * gets executed. 6476 * 6477 * For safety's sake we don't delete these lines and merely 6478 * exclude them from coverage statistics. 6479 * 6480 * LCOV_EXCL_START 6481 */ 6482 eventPP = &(parser->m_openInternalEntities->internalEventPtr); 6483 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); 6484 /* LCOV_EXCL_STOP */ 6485 } 6486 do { 6487 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; 6488 convert_res 6489 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); 6490 *eventEndPP = s; 6491 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, 6492 (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); 6493 *eventPP = s; 6494 } while ((convert_res != XML_CONVERT_COMPLETED) 6495 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); 6496 } else 6497 parser->m_defaultHandler( 6498 parser->m_handlerArg, (const XML_Char *)s, 6499 (int)((const XML_Char *)end - (const XML_Char *)s)); 6500 } 6501 6502 static int 6503 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, 6504 XML_Bool isId, const XML_Char *value, XML_Parser parser) { 6505 DEFAULT_ATTRIBUTE *att; 6506 if (value || isId) { 6507 /* The handling of default attributes gets messed up if we have 6508 a default which duplicates a non-default. */ 6509 int i; 6510 for (i = 0; i < type->nDefaultAtts; i++) 6511 if (attId == type->defaultAtts[i].id) 6512 return 1; 6513 if (isId && ! type->idAtt && ! attId->xmlns) 6514 type->idAtt = attId; 6515 } 6516 if (type->nDefaultAtts == type->allocDefaultAtts) { 6517 if (type->allocDefaultAtts == 0) { 6518 type->allocDefaultAtts = 8; 6519 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( 6520 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 6521 if (! type->defaultAtts) { 6522 type->allocDefaultAtts = 0; 6523 return 0; 6524 } 6525 } else { 6526 DEFAULT_ATTRIBUTE *temp; 6527 6528 /* Detect and prevent integer overflow */ 6529 if (type->allocDefaultAtts > INT_MAX / 2) { 6530 return 0; 6531 } 6532 6533 int count = type->allocDefaultAtts * 2; 6534 6535 /* Detect and prevent integer overflow. 6536 * The preprocessor guard addresses the "always false" warning 6537 * from -Wtype-limits on platforms where 6538 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 6539 #if UINT_MAX >= SIZE_MAX 6540 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { 6541 return 0; 6542 } 6543 #endif 6544 6545 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, 6546 (count * sizeof(DEFAULT_ATTRIBUTE))); 6547 if (temp == NULL) 6548 return 0; 6549 type->allocDefaultAtts = count; 6550 type->defaultAtts = temp; 6551 } 6552 } 6553 att = type->defaultAtts + type->nDefaultAtts; 6554 att->id = attId; 6555 att->value = value; 6556 att->isCdata = isCdata; 6557 if (! isCdata) 6558 attId->maybeTokenized = XML_TRUE; 6559 type->nDefaultAtts += 1; 6560 return 1; 6561 } 6562 6563 static int 6564 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { 6565 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6566 const XML_Char *name; 6567 for (name = elementType->name; *name; name++) { 6568 if (*name == XML_T(ASCII_COLON)) { 6569 PREFIX *prefix; 6570 const XML_Char *s; 6571 for (s = elementType->name; s != name; s++) { 6572 if (! poolAppendChar(&dtd->pool, *s)) 6573 return 0; 6574 } 6575 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6576 return 0; 6577 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), 6578 sizeof(PREFIX)); 6579 if (! prefix) 6580 return 0; 6581 if (prefix->name == poolStart(&dtd->pool)) 6582 poolFinish(&dtd->pool); 6583 else 6584 poolDiscard(&dtd->pool); 6585 elementType->prefix = prefix; 6586 break; 6587 } 6588 } 6589 return 1; 6590 } 6591 6592 static ATTRIBUTE_ID * 6593 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, 6594 const char *end) { 6595 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6596 ATTRIBUTE_ID *id; 6597 const XML_Char *name; 6598 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6599 return NULL; 6600 name = poolStoreString(&dtd->pool, enc, start, end); 6601 if (! name) 6602 return NULL; 6603 /* skip quotation mark - its storage will be reused (like in name[-1]) */ 6604 ++name; 6605 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, 6606 sizeof(ATTRIBUTE_ID)); 6607 if (! id) 6608 return NULL; 6609 if (id->name != name) 6610 poolDiscard(&dtd->pool); 6611 else { 6612 poolFinish(&dtd->pool); 6613 if (! parser->m_ns) 6614 ; 6615 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) 6616 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) 6617 && name[4] == XML_T(ASCII_s) 6618 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { 6619 if (name[5] == XML_T('\0')) 6620 id->prefix = &dtd->defaultPrefix; 6621 else 6622 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, 6623 sizeof(PREFIX)); 6624 id->xmlns = XML_TRUE; 6625 } else { 6626 int i; 6627 for (i = 0; name[i]; i++) { 6628 /* attributes without prefix are *not* in the default namespace */ 6629 if (name[i] == XML_T(ASCII_COLON)) { 6630 int j; 6631 for (j = 0; j < i; j++) { 6632 if (! poolAppendChar(&dtd->pool, name[j])) 6633 return NULL; 6634 } 6635 if (! poolAppendChar(&dtd->pool, XML_T('\0'))) 6636 return NULL; 6637 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, 6638 poolStart(&dtd->pool), sizeof(PREFIX)); 6639 if (! id->prefix) 6640 return NULL; 6641 if (id->prefix->name == poolStart(&dtd->pool)) 6642 poolFinish(&dtd->pool); 6643 else 6644 poolDiscard(&dtd->pool); 6645 break; 6646 } 6647 } 6648 } 6649 } 6650 return id; 6651 } 6652 6653 #define CONTEXT_SEP XML_T(ASCII_FF) 6654 6655 static const XML_Char * 6656 getContext(XML_Parser parser) { 6657 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6658 HASH_TABLE_ITER iter; 6659 XML_Bool needSep = XML_FALSE; 6660 6661 if (dtd->defaultPrefix.binding) { 6662 int i; 6663 int len; 6664 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 6665 return NULL; 6666 len = dtd->defaultPrefix.binding->uriLen; 6667 if (parser->m_namespaceSeparator) 6668 len--; 6669 for (i = 0; i < len; i++) { 6670 if (! poolAppendChar(&parser->m_tempPool, 6671 dtd->defaultPrefix.binding->uri[i])) { 6672 /* Because of memory caching, I don't believe this line can be 6673 * executed. 6674 * 6675 * This is part of a loop copying the default prefix binding 6676 * URI into the parser's temporary string pool. Previously, 6677 * that URI was copied into the same string pool, with a 6678 * terminating NUL character, as part of setContext(). When 6679 * the pool was cleared, that leaves a block definitely big 6680 * enough to hold the URI on the free block list of the pool. 6681 * The URI copy in getContext() therefore cannot run out of 6682 * memory. 6683 * 6684 * If the pool is used between the setContext() and 6685 * getContext() calls, the worst it can do is leave a bigger 6686 * block on the front of the free list. Given that this is 6687 * all somewhat inobvious and program logic can be changed, we 6688 * don't delete the line but we do exclude it from the test 6689 * coverage statistics. 6690 */ 6691 return NULL; /* LCOV_EXCL_LINE */ 6692 } 6693 } 6694 needSep = XML_TRUE; 6695 } 6696 6697 hashTableIterInit(&iter, &(dtd->prefixes)); 6698 for (;;) { 6699 int i; 6700 int len; 6701 const XML_Char *s; 6702 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); 6703 if (! prefix) 6704 break; 6705 if (! prefix->binding) { 6706 /* This test appears to be (justifiable) paranoia. There does 6707 * not seem to be a way of injecting a prefix without a binding 6708 * that doesn't get errored long before this function is called. 6709 * The test should remain for safety's sake, so we instead 6710 * exclude the following line from the coverage statistics. 6711 */ 6712 continue; /* LCOV_EXCL_LINE */ 6713 } 6714 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 6715 return NULL; 6716 for (s = prefix->name; *s; s++) 6717 if (! poolAppendChar(&parser->m_tempPool, *s)) 6718 return NULL; 6719 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) 6720 return NULL; 6721 len = prefix->binding->uriLen; 6722 if (parser->m_namespaceSeparator) 6723 len--; 6724 for (i = 0; i < len; i++) 6725 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) 6726 return NULL; 6727 needSep = XML_TRUE; 6728 } 6729 6730 hashTableIterInit(&iter, &(dtd->generalEntities)); 6731 for (;;) { 6732 const XML_Char *s; 6733 ENTITY *e = (ENTITY *)hashTableIterNext(&iter); 6734 if (! e) 6735 break; 6736 if (! e->open) 6737 continue; 6738 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) 6739 return NULL; 6740 for (s = e->name; *s; s++) 6741 if (! poolAppendChar(&parser->m_tempPool, *s)) 6742 return 0; 6743 needSep = XML_TRUE; 6744 } 6745 6746 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6747 return NULL; 6748 return parser->m_tempPool.start; 6749 } 6750 6751 static XML_Bool 6752 setContext(XML_Parser parser, const XML_Char *context) { 6753 if (context == NULL) { 6754 return XML_FALSE; 6755 } 6756 6757 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 6758 const XML_Char *s = context; 6759 6760 while (*context != XML_T('\0')) { 6761 if (*s == CONTEXT_SEP || *s == XML_T('\0')) { 6762 ENTITY *e; 6763 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6764 return XML_FALSE; 6765 e = (ENTITY *)lookup(parser, &dtd->generalEntities, 6766 poolStart(&parser->m_tempPool), 0); 6767 if (e) 6768 e->open = XML_TRUE; 6769 if (*s != XML_T('\0')) 6770 s++; 6771 context = s; 6772 poolDiscard(&parser->m_tempPool); 6773 } else if (*s == XML_T(ASCII_EQUALS)) { 6774 PREFIX *prefix; 6775 if (poolLength(&parser->m_tempPool) == 0) 6776 prefix = &dtd->defaultPrefix; 6777 else { 6778 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6779 return XML_FALSE; 6780 prefix 6781 = (PREFIX *)lookup(parser, &dtd->prefixes, 6782 poolStart(&parser->m_tempPool), sizeof(PREFIX)); 6783 if (! prefix) 6784 return XML_FALSE; 6785 if (prefix->name == poolStart(&parser->m_tempPool)) { 6786 prefix->name = poolCopyString(&dtd->pool, prefix->name); 6787 if (! prefix->name) 6788 return XML_FALSE; 6789 } 6790 poolDiscard(&parser->m_tempPool); 6791 } 6792 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); 6793 context++) 6794 if (! poolAppendChar(&parser->m_tempPool, *context)) 6795 return XML_FALSE; 6796 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) 6797 return XML_FALSE; 6798 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), 6799 &parser->m_inheritedBindings) 6800 != XML_ERROR_NONE) 6801 return XML_FALSE; 6802 poolDiscard(&parser->m_tempPool); 6803 if (*context != XML_T('\0')) 6804 ++context; 6805 s = context; 6806 } else { 6807 if (! poolAppendChar(&parser->m_tempPool, *s)) 6808 return XML_FALSE; 6809 s++; 6810 } 6811 } 6812 return XML_TRUE; 6813 } 6814 6815 static void FASTCALL 6816 normalizePublicId(XML_Char *publicId) { 6817 XML_Char *p = publicId; 6818 XML_Char *s; 6819 for (s = publicId; *s; s++) { 6820 switch (*s) { 6821 case 0x20: 6822 case 0xD: 6823 case 0xA: 6824 if (p != publicId && p[-1] != 0x20) 6825 *p++ = 0x20; 6826 break; 6827 default: 6828 *p++ = *s; 6829 } 6830 } 6831 if (p != publicId && p[-1] == 0x20) 6832 --p; 6833 *p = XML_T('\0'); 6834 } 6835 6836 static DTD * 6837 dtdCreate(const XML_Memory_Handling_Suite *ms) { 6838 DTD *p = ms->malloc_fcn(sizeof(DTD)); 6839 if (p == NULL) 6840 return p; 6841 poolInit(&(p->pool), ms); 6842 poolInit(&(p->entityValuePool), ms); 6843 hashTableInit(&(p->generalEntities), ms); 6844 hashTableInit(&(p->elementTypes), ms); 6845 hashTableInit(&(p->attributeIds), ms); 6846 hashTableInit(&(p->prefixes), ms); 6847 #ifdef XML_DTD 6848 p->paramEntityRead = XML_FALSE; 6849 hashTableInit(&(p->paramEntities), ms); 6850 #endif /* XML_DTD */ 6851 p->defaultPrefix.name = NULL; 6852 p->defaultPrefix.binding = NULL; 6853 6854 p->in_eldecl = XML_FALSE; 6855 p->scaffIndex = NULL; 6856 p->scaffold = NULL; 6857 p->scaffLevel = 0; 6858 p->scaffSize = 0; 6859 p->scaffCount = 0; 6860 p->contentStringLen = 0; 6861 6862 p->keepProcessing = XML_TRUE; 6863 p->hasParamEntityRefs = XML_FALSE; 6864 p->standalone = XML_FALSE; 6865 return p; 6866 } 6867 6868 static void 6869 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { 6870 HASH_TABLE_ITER iter; 6871 hashTableIterInit(&iter, &(p->elementTypes)); 6872 for (;;) { 6873 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 6874 if (! e) 6875 break; 6876 if (e->allocDefaultAtts != 0) 6877 ms->free_fcn(e->defaultAtts); 6878 } 6879 hashTableClear(&(p->generalEntities)); 6880 #ifdef XML_DTD 6881 p->paramEntityRead = XML_FALSE; 6882 hashTableClear(&(p->paramEntities)); 6883 #endif /* XML_DTD */ 6884 hashTableClear(&(p->elementTypes)); 6885 hashTableClear(&(p->attributeIds)); 6886 hashTableClear(&(p->prefixes)); 6887 poolClear(&(p->pool)); 6888 poolClear(&(p->entityValuePool)); 6889 p->defaultPrefix.name = NULL; 6890 p->defaultPrefix.binding = NULL; 6891 6892 p->in_eldecl = XML_FALSE; 6893 6894 ms->free_fcn(p->scaffIndex); 6895 p->scaffIndex = NULL; 6896 ms->free_fcn(p->scaffold); 6897 p->scaffold = NULL; 6898 6899 p->scaffLevel = 0; 6900 p->scaffSize = 0; 6901 p->scaffCount = 0; 6902 p->contentStringLen = 0; 6903 6904 p->keepProcessing = XML_TRUE; 6905 p->hasParamEntityRefs = XML_FALSE; 6906 p->standalone = XML_FALSE; 6907 } 6908 6909 static void 6910 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { 6911 HASH_TABLE_ITER iter; 6912 hashTableIterInit(&iter, &(p->elementTypes)); 6913 for (;;) { 6914 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 6915 if (! e) 6916 break; 6917 if (e->allocDefaultAtts != 0) 6918 ms->free_fcn(e->defaultAtts); 6919 } 6920 hashTableDestroy(&(p->generalEntities)); 6921 #ifdef XML_DTD 6922 hashTableDestroy(&(p->paramEntities)); 6923 #endif /* XML_DTD */ 6924 hashTableDestroy(&(p->elementTypes)); 6925 hashTableDestroy(&(p->attributeIds)); 6926 hashTableDestroy(&(p->prefixes)); 6927 poolDestroy(&(p->pool)); 6928 poolDestroy(&(p->entityValuePool)); 6929 if (isDocEntity) { 6930 ms->free_fcn(p->scaffIndex); 6931 ms->free_fcn(p->scaffold); 6932 } 6933 ms->free_fcn(p); 6934 } 6935 6936 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. 6937 The new DTD has already been initialized. 6938 */ 6939 static int 6940 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, 6941 const XML_Memory_Handling_Suite *ms) { 6942 HASH_TABLE_ITER iter; 6943 6944 /* Copy the prefix table. */ 6945 6946 hashTableIterInit(&iter, &(oldDtd->prefixes)); 6947 for (;;) { 6948 const XML_Char *name; 6949 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); 6950 if (! oldP) 6951 break; 6952 name = poolCopyString(&(newDtd->pool), oldP->name); 6953 if (! name) 6954 return 0; 6955 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) 6956 return 0; 6957 } 6958 6959 hashTableIterInit(&iter, &(oldDtd->attributeIds)); 6960 6961 /* Copy the attribute id table. */ 6962 6963 for (;;) { 6964 ATTRIBUTE_ID *newA; 6965 const XML_Char *name; 6966 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); 6967 6968 if (! oldA) 6969 break; 6970 /* Remember to allocate the scratch byte before the name. */ 6971 if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) 6972 return 0; 6973 name = poolCopyString(&(newDtd->pool), oldA->name); 6974 if (! name) 6975 return 0; 6976 ++name; 6977 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, 6978 sizeof(ATTRIBUTE_ID)); 6979 if (! newA) 6980 return 0; 6981 newA->maybeTokenized = oldA->maybeTokenized; 6982 if (oldA->prefix) { 6983 newA->xmlns = oldA->xmlns; 6984 if (oldA->prefix == &oldDtd->defaultPrefix) 6985 newA->prefix = &newDtd->defaultPrefix; 6986 else 6987 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 6988 oldA->prefix->name, 0); 6989 } 6990 } 6991 6992 /* Copy the element type table. */ 6993 6994 hashTableIterInit(&iter, &(oldDtd->elementTypes)); 6995 6996 for (;;) { 6997 int i; 6998 ELEMENT_TYPE *newE; 6999 const XML_Char *name; 7000 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); 7001 if (! oldE) 7002 break; 7003 name = poolCopyString(&(newDtd->pool), oldE->name); 7004 if (! name) 7005 return 0; 7006 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, 7007 sizeof(ELEMENT_TYPE)); 7008 if (! newE) 7009 return 0; 7010 if (oldE->nDefaultAtts) { 7011 newE->defaultAtts 7012 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 7013 if (! newE->defaultAtts) { 7014 return 0; 7015 } 7016 } 7017 if (oldE->idAtt) 7018 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), 7019 oldE->idAtt->name, 0); 7020 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; 7021 if (oldE->prefix) 7022 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), 7023 oldE->prefix->name, 0); 7024 for (i = 0; i < newE->nDefaultAtts; i++) { 7025 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( 7026 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); 7027 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; 7028 if (oldE->defaultAtts[i].value) { 7029 newE->defaultAtts[i].value 7030 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); 7031 if (! newE->defaultAtts[i].value) 7032 return 0; 7033 } else 7034 newE->defaultAtts[i].value = NULL; 7035 } 7036 } 7037 7038 /* Copy the entity tables. */ 7039 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), 7040 &(oldDtd->generalEntities))) 7041 return 0; 7042 7043 #ifdef XML_DTD 7044 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), 7045 &(oldDtd->paramEntities))) 7046 return 0; 7047 newDtd->paramEntityRead = oldDtd->paramEntityRead; 7048 #endif /* XML_DTD */ 7049 7050 newDtd->keepProcessing = oldDtd->keepProcessing; 7051 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; 7052 newDtd->standalone = oldDtd->standalone; 7053 7054 /* Don't want deep copying for scaffolding */ 7055 newDtd->in_eldecl = oldDtd->in_eldecl; 7056 newDtd->scaffold = oldDtd->scaffold; 7057 newDtd->contentStringLen = oldDtd->contentStringLen; 7058 newDtd->scaffSize = oldDtd->scaffSize; 7059 newDtd->scaffLevel = oldDtd->scaffLevel; 7060 newDtd->scaffIndex = oldDtd->scaffIndex; 7061 7062 return 1; 7063 } /* End dtdCopy */ 7064 7065 static int 7066 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, 7067 STRING_POOL *newPool, const HASH_TABLE *oldTable) { 7068 HASH_TABLE_ITER iter; 7069 const XML_Char *cachedOldBase = NULL; 7070 const XML_Char *cachedNewBase = NULL; 7071 7072 hashTableIterInit(&iter, oldTable); 7073 7074 for (;;) { 7075 ENTITY *newE; 7076 const XML_Char *name; 7077 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); 7078 if (! oldE) 7079 break; 7080 name = poolCopyString(newPool, oldE->name); 7081 if (! name) 7082 return 0; 7083 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); 7084 if (! newE) 7085 return 0; 7086 if (oldE->systemId) { 7087 const XML_Char *tem = poolCopyString(newPool, oldE->systemId); 7088 if (! tem) 7089 return 0; 7090 newE->systemId = tem; 7091 if (oldE->base) { 7092 if (oldE->base == cachedOldBase) 7093 newE->base = cachedNewBase; 7094 else { 7095 cachedOldBase = oldE->base; 7096 tem = poolCopyString(newPool, cachedOldBase); 7097 if (! tem) 7098 return 0; 7099 cachedNewBase = newE->base = tem; 7100 } 7101 } 7102 if (oldE->publicId) { 7103 tem = poolCopyString(newPool, oldE->publicId); 7104 if (! tem) 7105 return 0; 7106 newE->publicId = tem; 7107 } 7108 } else { 7109 const XML_Char *tem 7110 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); 7111 if (! tem) 7112 return 0; 7113 newE->textPtr = tem; 7114 newE->textLen = oldE->textLen; 7115 } 7116 if (oldE->notation) { 7117 const XML_Char *tem = poolCopyString(newPool, oldE->notation); 7118 if (! tem) 7119 return 0; 7120 newE->notation = tem; 7121 } 7122 newE->is_param = oldE->is_param; 7123 newE->is_internal = oldE->is_internal; 7124 } 7125 return 1; 7126 } 7127 7128 #define INIT_POWER 6 7129 7130 static XML_Bool FASTCALL 7131 keyeq(KEY s1, KEY s2) { 7132 for (; *s1 == *s2; s1++, s2++) 7133 if (*s1 == 0) 7134 return XML_TRUE; 7135 return XML_FALSE; 7136 } 7137 7138 static size_t 7139 keylen(KEY s) { 7140 size_t len = 0; 7141 for (; *s; s++, len++) 7142 ; 7143 return len; 7144 } 7145 7146 static void 7147 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { 7148 key->k[0] = 0; 7149 key->k[1] = get_hash_secret_salt(parser); 7150 } 7151 7152 static unsigned long FASTCALL 7153 hash(XML_Parser parser, KEY s) { 7154 struct siphash state; 7155 struct sipkey key; 7156 (void)sip24_valid; 7157 copy_salt_to_sipkey(parser, &key); 7158 sip24_init(&state, &key); 7159 sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); 7160 return (unsigned long)sip24_final(&state); 7161 } 7162 7163 static NAMED * 7164 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { 7165 size_t i; 7166 if (table->size == 0) { 7167 size_t tsize; 7168 if (! createSize) 7169 return NULL; 7170 table->power = INIT_POWER; 7171 /* table->size is a power of 2 */ 7172 table->size = (size_t)1 << INIT_POWER; 7173 tsize = table->size * sizeof(NAMED *); 7174 table->v = table->mem->malloc_fcn(tsize); 7175 if (! table->v) { 7176 table->size = 0; 7177 return NULL; 7178 } 7179 memset(table->v, 0, tsize); 7180 i = hash(parser, name) & ((unsigned long)table->size - 1); 7181 } else { 7182 unsigned long h = hash(parser, name); 7183 unsigned long mask = (unsigned long)table->size - 1; 7184 unsigned char step = 0; 7185 i = h & mask; 7186 while (table->v[i]) { 7187 if (keyeq(name, table->v[i]->name)) 7188 return table->v[i]; 7189 if (! step) 7190 step = PROBE_STEP(h, mask, table->power); 7191 i < step ? (i += table->size - step) : (i -= step); 7192 } 7193 if (! createSize) 7194 return NULL; 7195 7196 /* check for overflow (table is half full) */ 7197 if (table->used >> (table->power - 1)) { 7198 unsigned char newPower = table->power + 1; 7199 7200 /* Detect and prevent invalid shift */ 7201 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { 7202 return NULL; 7203 } 7204 7205 size_t newSize = (size_t)1 << newPower; 7206 unsigned long newMask = (unsigned long)newSize - 1; 7207 7208 /* Detect and prevent integer overflow */ 7209 if (newSize > (size_t)(-1) / sizeof(NAMED *)) { 7210 return NULL; 7211 } 7212 7213 size_t tsize = newSize * sizeof(NAMED *); 7214 NAMED **newV = table->mem->malloc_fcn(tsize); 7215 if (! newV) 7216 return NULL; 7217 memset(newV, 0, tsize); 7218 for (i = 0; i < table->size; i++) 7219 if (table->v[i]) { 7220 unsigned long newHash = hash(parser, table->v[i]->name); 7221 size_t j = newHash & newMask; 7222 step = 0; 7223 while (newV[j]) { 7224 if (! step) 7225 step = PROBE_STEP(newHash, newMask, newPower); 7226 j < step ? (j += newSize - step) : (j -= step); 7227 } 7228 newV[j] = table->v[i]; 7229 } 7230 table->mem->free_fcn(table->v); 7231 table->v = newV; 7232 table->power = newPower; 7233 table->size = newSize; 7234 i = h & newMask; 7235 step = 0; 7236 while (table->v[i]) { 7237 if (! step) 7238 step = PROBE_STEP(h, newMask, newPower); 7239 i < step ? (i += newSize - step) : (i -= step); 7240 } 7241 } 7242 } 7243 table->v[i] = table->mem->malloc_fcn(createSize); 7244 if (! table->v[i]) 7245 return NULL; 7246 memset(table->v[i], 0, createSize); 7247 table->v[i]->name = name; 7248 (table->used)++; 7249 return table->v[i]; 7250 } 7251 7252 static void FASTCALL 7253 hashTableClear(HASH_TABLE *table) { 7254 size_t i; 7255 for (i = 0; i < table->size; i++) { 7256 table->mem->free_fcn(table->v[i]); 7257 table->v[i] = NULL; 7258 } 7259 table->used = 0; 7260 } 7261 7262 static void FASTCALL 7263 hashTableDestroy(HASH_TABLE *table) { 7264 size_t i; 7265 for (i = 0; i < table->size; i++) 7266 table->mem->free_fcn(table->v[i]); 7267 table->mem->free_fcn(table->v); 7268 } 7269 7270 static void FASTCALL 7271 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { 7272 p->power = 0; 7273 p->size = 0; 7274 p->used = 0; 7275 p->v = NULL; 7276 p->mem = ms; 7277 } 7278 7279 static void FASTCALL 7280 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { 7281 iter->p = table->v; 7282 iter->end = iter->p ? iter->p + table->size : NULL; 7283 } 7284 7285 static NAMED *FASTCALL 7286 hashTableIterNext(HASH_TABLE_ITER *iter) { 7287 while (iter->p != iter->end) { 7288 NAMED *tem = *(iter->p)++; 7289 if (tem) 7290 return tem; 7291 } 7292 return NULL; 7293 } 7294 7295 static void FASTCALL 7296 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { 7297 pool->blocks = NULL; 7298 pool->freeBlocks = NULL; 7299 pool->start = NULL; 7300 pool->ptr = NULL; 7301 pool->end = NULL; 7302 pool->mem = ms; 7303 } 7304 7305 static void FASTCALL 7306 poolClear(STRING_POOL *pool) { 7307 if (! pool->freeBlocks) 7308 pool->freeBlocks = pool->blocks; 7309 else { 7310 BLOCK *p = pool->blocks; 7311 while (p) { 7312 BLOCK *tem = p->next; 7313 p->next = pool->freeBlocks; 7314 pool->freeBlocks = p; 7315 p = tem; 7316 } 7317 } 7318 pool->blocks = NULL; 7319 pool->start = NULL; 7320 pool->ptr = NULL; 7321 pool->end = NULL; 7322 } 7323 7324 static void FASTCALL 7325 poolDestroy(STRING_POOL *pool) { 7326 BLOCK *p = pool->blocks; 7327 while (p) { 7328 BLOCK *tem = p->next; 7329 pool->mem->free_fcn(p); 7330 p = tem; 7331 } 7332 p = pool->freeBlocks; 7333 while (p) { 7334 BLOCK *tem = p->next; 7335 pool->mem->free_fcn(p); 7336 p = tem; 7337 } 7338 } 7339 7340 static XML_Char * 7341 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 7342 const char *end) { 7343 if (! pool->ptr && ! poolGrow(pool)) 7344 return NULL; 7345 for (;;) { 7346 const enum XML_Convert_Result convert_res = XmlConvert( 7347 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end); 7348 if ((convert_res == XML_CONVERT_COMPLETED) 7349 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) 7350 break; 7351 if (! poolGrow(pool)) 7352 return NULL; 7353 } 7354 return pool->start; 7355 } 7356 7357 static const XML_Char *FASTCALL 7358 poolCopyString(STRING_POOL *pool, const XML_Char *s) { 7359 do { 7360 if (! poolAppendChar(pool, *s)) 7361 return NULL; 7362 } while (*s++); 7363 s = pool->start; 7364 poolFinish(pool); 7365 return s; 7366 } 7367 7368 static const XML_Char * 7369 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { 7370 if (! pool->ptr && ! poolGrow(pool)) { 7371 /* The following line is unreachable given the current usage of 7372 * poolCopyStringN(). Currently it is called from exactly one 7373 * place to copy the text of a simple general entity. By that 7374 * point, the name of the entity is already stored in the pool, so 7375 * pool->ptr cannot be NULL. 7376 * 7377 * If poolCopyStringN() is used elsewhere as it well might be, 7378 * this line may well become executable again. Regardless, this 7379 * sort of check shouldn't be removed lightly, so we just exclude 7380 * it from the coverage statistics. 7381 */ 7382 return NULL; /* LCOV_EXCL_LINE */ 7383 } 7384 for (; n > 0; --n, s++) { 7385 if (! poolAppendChar(pool, *s)) 7386 return NULL; 7387 } 7388 s = pool->start; 7389 poolFinish(pool); 7390 return s; 7391 } 7392 7393 static const XML_Char *FASTCALL 7394 poolAppendString(STRING_POOL *pool, const XML_Char *s) { 7395 while (*s) { 7396 if (! poolAppendChar(pool, *s)) 7397 return NULL; 7398 s++; 7399 } 7400 return pool->start; 7401 } 7402 7403 static XML_Char * 7404 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, 7405 const char *end) { 7406 if (! poolAppend(pool, enc, ptr, end)) 7407 return NULL; 7408 if (pool->ptr == pool->end && ! poolGrow(pool)) 7409 return NULL; 7410 *(pool->ptr)++ = 0; 7411 return pool->start; 7412 } 7413 7414 static size_t 7415 poolBytesToAllocateFor(int blockSize) { 7416 /* Unprotected math would be: 7417 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); 7418 ** 7419 ** Detect overflow, avoiding _signed_ overflow undefined behavior 7420 ** For a + b * c we check b * c in isolation first, so that addition of a 7421 ** on top has no chance of making us accept a small non-negative number 7422 */ 7423 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ 7424 7425 if (blockSize <= 0) 7426 return 0; 7427 7428 if (blockSize > (int)(INT_MAX / stretch)) 7429 return 0; 7430 7431 { 7432 const int stretchedBlockSize = blockSize * (int)stretch; 7433 const int bytesToAllocate 7434 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); 7435 if (bytesToAllocate < 0) 7436 return 0; 7437 7438 return (size_t)bytesToAllocate; 7439 } 7440 } 7441 7442 static XML_Bool FASTCALL 7443 poolGrow(STRING_POOL *pool) { 7444 if (pool->freeBlocks) { 7445 if (pool->start == 0) { 7446 pool->blocks = pool->freeBlocks; 7447 pool->freeBlocks = pool->freeBlocks->next; 7448 pool->blocks->next = NULL; 7449 pool->start = pool->blocks->s; 7450 pool->end = pool->start + pool->blocks->size; 7451 pool->ptr = pool->start; 7452 return XML_TRUE; 7453 } 7454 if (pool->end - pool->start < pool->freeBlocks->size) { 7455 BLOCK *tem = pool->freeBlocks->next; 7456 pool->freeBlocks->next = pool->blocks; 7457 pool->blocks = pool->freeBlocks; 7458 pool->freeBlocks = tem; 7459 memcpy(pool->blocks->s, pool->start, 7460 (pool->end - pool->start) * sizeof(XML_Char)); 7461 pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 7462 pool->start = pool->blocks->s; 7463 pool->end = pool->start + pool->blocks->size; 7464 return XML_TRUE; 7465 } 7466 } 7467 if (pool->blocks && pool->start == pool->blocks->s) { 7468 BLOCK *temp; 7469 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); 7470 size_t bytesToAllocate; 7471 7472 /* NOTE: Needs to be calculated prior to calling `realloc` 7473 to avoid dangling pointers: */ 7474 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; 7475 7476 if (blockSize < 0) { 7477 /* This condition traps a situation where either more than 7478 * INT_MAX/2 bytes have already been allocated. This isn't 7479 * readily testable, since it is unlikely that an average 7480 * machine will have that much memory, so we exclude it from the 7481 * coverage statistics. 7482 */ 7483 return XML_FALSE; /* LCOV_EXCL_LINE */ 7484 } 7485 7486 bytesToAllocate = poolBytesToAllocateFor(blockSize); 7487 if (bytesToAllocate == 0) 7488 return XML_FALSE; 7489 7490 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, 7491 (unsigned)bytesToAllocate); 7492 if (temp == NULL) 7493 return XML_FALSE; 7494 pool->blocks = temp; 7495 pool->blocks->size = blockSize; 7496 pool->ptr = pool->blocks->s + offsetInsideBlock; 7497 pool->start = pool->blocks->s; 7498 pool->end = pool->start + blockSize; 7499 } else { 7500 BLOCK *tem; 7501 int blockSize = (int)(pool->end - pool->start); 7502 size_t bytesToAllocate; 7503 7504 if (blockSize < 0) { 7505 /* This condition traps a situation where either more than 7506 * INT_MAX bytes have already been allocated (which is prevented 7507 * by various pieces of program logic, not least this one, never 7508 * mind the unlikelihood of actually having that much memory) or 7509 * the pool control fields have been corrupted (which could 7510 * conceivably happen in an extremely buggy user handler 7511 * function). Either way it isn't readily testable, so we 7512 * exclude it from the coverage statistics. 7513 */ 7514 return XML_FALSE; /* LCOV_EXCL_LINE */ 7515 } 7516 7517 if (blockSize < INIT_BLOCK_SIZE) 7518 blockSize = INIT_BLOCK_SIZE; 7519 else { 7520 /* Detect overflow, avoiding _signed_ overflow undefined behavior */ 7521 if ((int)((unsigned)blockSize * 2U) < 0) { 7522 return XML_FALSE; 7523 } 7524 blockSize *= 2; 7525 } 7526 7527 bytesToAllocate = poolBytesToAllocateFor(blockSize); 7528 if (bytesToAllocate == 0) 7529 return XML_FALSE; 7530 7531 tem = pool->mem->malloc_fcn(bytesToAllocate); 7532 if (! tem) 7533 return XML_FALSE; 7534 tem->size = blockSize; 7535 tem->next = pool->blocks; 7536 pool->blocks = tem; 7537 if (pool->ptr != pool->start) 7538 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); 7539 pool->ptr = tem->s + (pool->ptr - pool->start); 7540 pool->start = tem->s; 7541 pool->end = tem->s + blockSize; 7542 } 7543 return XML_TRUE; 7544 } 7545 7546 static int FASTCALL 7547 nextScaffoldPart(XML_Parser parser) { 7548 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7549 CONTENT_SCAFFOLD *me; 7550 int next; 7551 7552 if (! dtd->scaffIndex) { 7553 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); 7554 if (! dtd->scaffIndex) 7555 return -1; 7556 dtd->scaffIndex[0] = 0; 7557 } 7558 7559 if (dtd->scaffCount >= dtd->scaffSize) { 7560 CONTENT_SCAFFOLD *temp; 7561 if (dtd->scaffold) { 7562 /* Detect and prevent integer overflow */ 7563 if (dtd->scaffSize > UINT_MAX / 2u) { 7564 return -1; 7565 } 7566 /* Detect and prevent integer overflow. 7567 * The preprocessor guard addresses the "always false" warning 7568 * from -Wtype-limits on platforms where 7569 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7570 #if UINT_MAX >= SIZE_MAX 7571 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { 7572 return -1; 7573 } 7574 #endif 7575 7576 temp = (CONTENT_SCAFFOLD *)REALLOC( 7577 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); 7578 if (temp == NULL) 7579 return -1; 7580 dtd->scaffSize *= 2; 7581 } else { 7582 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS 7583 * sizeof(CONTENT_SCAFFOLD)); 7584 if (temp == NULL) 7585 return -1; 7586 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; 7587 } 7588 dtd->scaffold = temp; 7589 } 7590 next = dtd->scaffCount++; 7591 me = &dtd->scaffold[next]; 7592 if (dtd->scaffLevel) { 7593 CONTENT_SCAFFOLD *parent 7594 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; 7595 if (parent->lastchild) { 7596 dtd->scaffold[parent->lastchild].nextsib = next; 7597 } 7598 if (! parent->childcnt) 7599 parent->firstchild = next; 7600 parent->lastchild = next; 7601 parent->childcnt++; 7602 } 7603 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; 7604 return next; 7605 } 7606 7607 static XML_Content * 7608 build_model(XML_Parser parser) { 7609 /* Function build_model transforms the existing parser->m_dtd->scaffold 7610 * array of CONTENT_SCAFFOLD tree nodes into a new array of 7611 * XML_Content tree nodes followed by a gapless list of zero-terminated 7612 * strings. */ 7613 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7614 XML_Content *ret; 7615 XML_Char *str; /* the current string writing location */ 7616 7617 /* Detect and prevent integer overflow. 7618 * The preprocessor guard addresses the "always false" warning 7619 * from -Wtype-limits on platforms where 7620 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ 7621 #if UINT_MAX >= SIZE_MAX 7622 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { 7623 return NULL; 7624 } 7625 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { 7626 return NULL; 7627 } 7628 #endif 7629 if (dtd->scaffCount * sizeof(XML_Content) 7630 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { 7631 return NULL; 7632 } 7633 7634 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) 7635 + (dtd->contentStringLen * sizeof(XML_Char))); 7636 7637 ret = (XML_Content *)MALLOC(parser, allocsize); 7638 if (! ret) 7639 return NULL; 7640 7641 /* What follows is an iterative implementation (of what was previously done 7642 * recursively in a dedicated function called "build_node". The old recursive 7643 * build_node could be forced into stack exhaustion from input as small as a 7644 * few megabyte, and so that was a security issue. Hence, a function call 7645 * stack is avoided now by resolving recursion.) 7646 * 7647 * The iterative approach works as follows: 7648 * 7649 * - We have two writing pointers, both walking up the result array; one does 7650 * the work, the other creates "jobs" for its colleague to do, and leads 7651 * the way: 7652 * 7653 * - The faster one, pointer jobDest, always leads and writes "what job 7654 * to do" by the other, once they reach that place in the 7655 * array: leader "jobDest" stores the source node array index (relative 7656 * to array dtd->scaffold) in field "numchildren". 7657 * 7658 * - The slower one, pointer dest, looks at the value stored in the 7659 * "numchildren" field (which actually holds a source node array index 7660 * at that time) and puts the real data from dtd->scaffold in. 7661 * 7662 * - Before the loop starts, jobDest writes source array index 0 7663 * (where the root node is located) so that dest will have something to do 7664 * when it starts operation. 7665 * 7666 * - Whenever nodes with children are encountered, jobDest appends 7667 * them as new jobs, in order. As a result, tree node siblings are 7668 * adjacent in the resulting array, for example: 7669 * 7670 * [0] root, has two children 7671 * [1] first child of 0, has three children 7672 * [3] first child of 1, does not have children 7673 * [4] second child of 1, does not have children 7674 * [5] third child of 1, does not have children 7675 * [2] second child of 0, does not have children 7676 * 7677 * Or (the same data) presented in flat array view: 7678 * 7679 * [0] root, has two children 7680 * 7681 * [1] first child of 0, has three children 7682 * [2] second child of 0, does not have children 7683 * 7684 * [3] first child of 1, does not have children 7685 * [4] second child of 1, does not have children 7686 * [5] third child of 1, does not have children 7687 * 7688 * - The algorithm repeats until all target array indices have been processed. 7689 */ 7690 XML_Content *dest = ret; /* tree node writing location, moves upwards */ 7691 XML_Content *const destLimit = &ret[dtd->scaffCount]; 7692 XML_Content *jobDest = ret; /* next free writing location in target array */ 7693 str = (XML_Char *)&ret[dtd->scaffCount]; 7694 7695 /* Add the starting job, the root node (index 0) of the source tree */ 7696 (jobDest++)->numchildren = 0; 7697 7698 for (; dest < destLimit; dest++) { 7699 /* Retrieve source tree array index from job storage */ 7700 const int src_node = (int)dest->numchildren; 7701 7702 /* Convert item */ 7703 dest->type = dtd->scaffold[src_node].type; 7704 dest->quant = dtd->scaffold[src_node].quant; 7705 if (dest->type == XML_CTYPE_NAME) { 7706 const XML_Char *src; 7707 dest->name = str; 7708 src = dtd->scaffold[src_node].name; 7709 for (;;) { 7710 *str++ = *src; 7711 if (! *src) 7712 break; 7713 src++; 7714 } 7715 dest->numchildren = 0; 7716 dest->children = NULL; 7717 } else { 7718 unsigned int i; 7719 int cn; 7720 dest->name = NULL; 7721 dest->numchildren = dtd->scaffold[src_node].childcnt; 7722 dest->children = jobDest; 7723 7724 /* Append scaffold indices of children to array */ 7725 for (i = 0, cn = dtd->scaffold[src_node].firstchild; 7726 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) 7727 (jobDest++)->numchildren = (unsigned int)cn; 7728 } 7729 } 7730 7731 return ret; 7732 } 7733 7734 static ELEMENT_TYPE * 7735 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, 7736 const char *end) { 7737 DTD *const dtd = parser->m_dtd; /* save one level of indirection */ 7738 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); 7739 ELEMENT_TYPE *ret; 7740 7741 if (! name) 7742 return NULL; 7743 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, 7744 sizeof(ELEMENT_TYPE)); 7745 if (! ret) 7746 return NULL; 7747 if (ret->name != name) 7748 poolDiscard(&dtd->pool); 7749 else { 7750 poolFinish(&dtd->pool); 7751 if (! setElementTypePrefix(parser, ret)) 7752 return NULL; 7753 } 7754 return ret; 7755 } 7756 7757 static XML_Char * 7758 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { 7759 size_t charsRequired = 0; 7760 XML_Char *result; 7761 7762 /* First determine how long the string is */ 7763 while (s[charsRequired] != 0) { 7764 charsRequired++; 7765 } 7766 /* Include the terminator */ 7767 charsRequired++; 7768 7769 /* Now allocate space for the copy */ 7770 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); 7771 if (result == NULL) 7772 return NULL; 7773 /* Copy the original into place */ 7774 memcpy(result, s, charsRequired * sizeof(XML_Char)); 7775 return result; 7776 } 7777 7778 #if XML_GE == 1 7779 7780 static float 7781 accountingGetCurrentAmplification(XML_Parser rootParser) { 7782 const XmlBigCount countBytesOutput 7783 = rootParser->m_accounting.countBytesDirect 7784 + rootParser->m_accounting.countBytesIndirect; 7785 const float amplificationFactor 7786 = rootParser->m_accounting.countBytesDirect 7787 ? (countBytesOutput 7788 / (float)(rootParser->m_accounting.countBytesDirect)) 7789 : 1.0f; 7790 assert(! rootParser->m_parentParser); 7791 return amplificationFactor; 7792 } 7793 7794 static void 7795 accountingReportStats(XML_Parser originParser, const char *epilog) { 7796 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7797 assert(! rootParser->m_parentParser); 7798 7799 if (rootParser->m_accounting.debugLevel == 0u) { 7800 return; 7801 } 7802 7803 const float amplificationFactor 7804 = accountingGetCurrentAmplification(rootParser); 7805 fprintf(stderr, 7806 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( 7807 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", 7808 (void *)rootParser, rootParser->m_accounting.countBytesDirect, 7809 rootParser->m_accounting.countBytesIndirect, 7810 (double)amplificationFactor, epilog); 7811 } 7812 7813 static void 7814 accountingOnAbort(XML_Parser originParser) { 7815 accountingReportStats(originParser, " ABORTING\n"); 7816 } 7817 7818 static void 7819 accountingReportDiff(XML_Parser rootParser, 7820 unsigned int levelsAwayFromRootParser, const char *before, 7821 const char *after, ptrdiff_t bytesMore, int source_line, 7822 enum XML_Account account) { 7823 assert(! rootParser->m_parentParser); 7824 7825 fprintf(stderr, 7826 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", 7827 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", 7828 levelsAwayFromRootParser, source_line, 10, ""); 7829 7830 const char ellipis[] = "[..]"; 7831 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; 7832 const unsigned int contextLength = 10; 7833 7834 /* Note: Performance is of no concern here */ 7835 const char *walker = before; 7836 if ((rootParser->m_accounting.debugLevel >= 3u) 7837 || (after - before) 7838 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { 7839 for (; walker < after; walker++) { 7840 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7841 } 7842 } else { 7843 for (; walker < before + contextLength; walker++) { 7844 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7845 } 7846 fprintf(stderr, ellipis); 7847 walker = after - contextLength; 7848 for (; walker < after; walker++) { 7849 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); 7850 } 7851 } 7852 fprintf(stderr, "\"\n"); 7853 } 7854 7855 static XML_Bool 7856 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, 7857 const char *after, int source_line, 7858 enum XML_Account account) { 7859 /* Note: We need to check the token type *first* to be sure that 7860 * we can even access variable <after>, safely. 7861 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ 7862 switch (tok) { 7863 case XML_TOK_INVALID: 7864 case XML_TOK_PARTIAL: 7865 case XML_TOK_PARTIAL_CHAR: 7866 case XML_TOK_NONE: 7867 return XML_TRUE; 7868 } 7869 7870 if (account == XML_ACCOUNT_NONE) 7871 return XML_TRUE; /* because these bytes have been accounted for, already */ 7872 7873 unsigned int levelsAwayFromRootParser; 7874 const XML_Parser rootParser 7875 = getRootParserOf(originParser, &levelsAwayFromRootParser); 7876 assert(! rootParser->m_parentParser); 7877 7878 const int isDirect 7879 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); 7880 const ptrdiff_t bytesMore = after - before; 7881 7882 XmlBigCount *const additionTarget 7883 = isDirect ? &rootParser->m_accounting.countBytesDirect 7884 : &rootParser->m_accounting.countBytesIndirect; 7885 7886 /* Detect and avoid integer overflow */ 7887 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) 7888 return XML_FALSE; 7889 *additionTarget += bytesMore; 7890 7891 const XmlBigCount countBytesOutput 7892 = rootParser->m_accounting.countBytesDirect 7893 + rootParser->m_accounting.countBytesIndirect; 7894 const float amplificationFactor 7895 = accountingGetCurrentAmplification(rootParser); 7896 const XML_Bool tolerated 7897 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) 7898 || (amplificationFactor 7899 <= rootParser->m_accounting.maximumAmplificationFactor); 7900 7901 if (rootParser->m_accounting.debugLevel >= 2u) { 7902 accountingReportStats(rootParser, ""); 7903 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, 7904 bytesMore, source_line, account); 7905 } 7906 7907 return tolerated; 7908 } 7909 7910 unsigned long long 7911 testingAccountingGetCountBytesDirect(XML_Parser parser) { 7912 if (! parser) 7913 return 0; 7914 return parser->m_accounting.countBytesDirect; 7915 } 7916 7917 unsigned long long 7918 testingAccountingGetCountBytesIndirect(XML_Parser parser) { 7919 if (! parser) 7920 return 0; 7921 return parser->m_accounting.countBytesIndirect; 7922 } 7923 7924 static void 7925 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, 7926 const char *action, int sourceLine) { 7927 assert(! rootParser->m_parentParser); 7928 if (rootParser->m_entity_stats.debugLevel == 0u) 7929 return; 7930 7931 # if defined(XML_UNICODE) 7932 const char *const entityName = "[..]"; 7933 # else 7934 const char *const entityName = entity->name; 7935 # endif 7936 7937 fprintf( 7938 stderr, 7939 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", 7940 (void *)rootParser, rootParser->m_entity_stats.countEverOpened, 7941 rootParser->m_entity_stats.currentDepth, 7942 rootParser->m_entity_stats.maximumDepthSeen, 7943 (rootParser->m_entity_stats.currentDepth - 1) * 2, "", 7944 entity->is_param ? "%" : "&", entityName, action, entity->textLen, 7945 sourceLine); 7946 } 7947 7948 static void 7949 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { 7950 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7951 assert(! rootParser->m_parentParser); 7952 7953 rootParser->m_entity_stats.countEverOpened++; 7954 rootParser->m_entity_stats.currentDepth++; 7955 if (rootParser->m_entity_stats.currentDepth 7956 > rootParser->m_entity_stats.maximumDepthSeen) { 7957 rootParser->m_entity_stats.maximumDepthSeen++; 7958 } 7959 7960 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); 7961 } 7962 7963 static void 7964 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { 7965 const XML_Parser rootParser = getRootParserOf(originParser, NULL); 7966 assert(! rootParser->m_parentParser); 7967 7968 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); 7969 rootParser->m_entity_stats.currentDepth--; 7970 } 7971 7972 static XML_Parser 7973 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { 7974 XML_Parser rootParser = parser; 7975 unsigned int stepsTakenUpwards = 0; 7976 while (rootParser->m_parentParser) { 7977 rootParser = rootParser->m_parentParser; 7978 stepsTakenUpwards++; 7979 } 7980 assert(! rootParser->m_parentParser); 7981 if (outLevelDiff != NULL) { 7982 *outLevelDiff = stepsTakenUpwards; 7983 } 7984 return rootParser; 7985 } 7986 7987 const char * 7988 unsignedCharToPrintable(unsigned char c) { 7989 switch (c) { 7990 case 0: 7991 return "\\0"; 7992 case 1: 7993 return "\\x1"; 7994 case 2: 7995 return "\\x2"; 7996 case 3: 7997 return "\\x3"; 7998 case 4: 7999 return "\\x4"; 8000 case 5: 8001 return "\\x5"; 8002 case 6: 8003 return "\\x6"; 8004 case 7: 8005 return "\\x7"; 8006 case 8: 8007 return "\\x8"; 8008 case 9: 8009 return "\\t"; 8010 case 10: 8011 return "\\n"; 8012 case 11: 8013 return "\\xB"; 8014 case 12: 8015 return "\\xC"; 8016 case 13: 8017 return "\\r"; 8018 case 14: 8019 return "\\xE"; 8020 case 15: 8021 return "\\xF"; 8022 case 16: 8023 return "\\x10"; 8024 case 17: 8025 return "\\x11"; 8026 case 18: 8027 return "\\x12"; 8028 case 19: 8029 return "\\x13"; 8030 case 20: 8031 return "\\x14"; 8032 case 21: 8033 return "\\x15"; 8034 case 22: 8035 return "\\x16"; 8036 case 23: 8037 return "\\x17"; 8038 case 24: 8039 return "\\x18"; 8040 case 25: 8041 return "\\x19"; 8042 case 26: 8043 return "\\x1A"; 8044 case 27: 8045 return "\\x1B"; 8046 case 28: 8047 return "\\x1C"; 8048 case 29: 8049 return "\\x1D"; 8050 case 30: 8051 return "\\x1E"; 8052 case 31: 8053 return "\\x1F"; 8054 case 32: 8055 return " "; 8056 case 33: 8057 return "!"; 8058 case 34: 8059 return "\\\""; 8060 case 35: 8061 return "#"; 8062 case 36: 8063 return "$"; 8064 case 37: 8065 return "%"; 8066 case 38: 8067 return "&"; 8068 case 39: 8069 return "'"; 8070 case 40: 8071 return "("; 8072 case 41: 8073 return ")"; 8074 case 42: 8075 return "*"; 8076 case 43: 8077 return "+"; 8078 case 44: 8079 return ","; 8080 case 45: 8081 return "-"; 8082 case 46: 8083 return "."; 8084 case 47: 8085 return "/"; 8086 case 48: 8087 return "0"; 8088 case 49: 8089 return "1"; 8090 case 50: 8091 return "2"; 8092 case 51: 8093 return "3"; 8094 case 52: 8095 return "4"; 8096 case 53: 8097 return "5"; 8098 case 54: 8099 return "6"; 8100 case 55: 8101 return "7"; 8102 case 56: 8103 return "8"; 8104 case 57: 8105 return "9"; 8106 case 58: 8107 return ":"; 8108 case 59: 8109 return ";"; 8110 case 60: 8111 return "<"; 8112 case 61: 8113 return "="; 8114 case 62: 8115 return ">"; 8116 case 63: 8117 return "?"; 8118 case 64: 8119 return "@"; 8120 case 65: 8121 return "A"; 8122 case 66: 8123 return "B"; 8124 case 67: 8125 return "C"; 8126 case 68: 8127 return "D"; 8128 case 69: 8129 return "E"; 8130 case 70: 8131 return "F"; 8132 case 71: 8133 return "G"; 8134 case 72: 8135 return "H"; 8136 case 73: 8137 return "I"; 8138 case 74: 8139 return "J"; 8140 case 75: 8141 return "K"; 8142 case 76: 8143 return "L"; 8144 case 77: 8145 return "M"; 8146 case 78: 8147 return "N"; 8148 case 79: 8149 return "O"; 8150 case 80: 8151 return "P"; 8152 case 81: 8153 return "Q"; 8154 case 82: 8155 return "R"; 8156 case 83: 8157 return "S"; 8158 case 84: 8159 return "T"; 8160 case 85: 8161 return "U"; 8162 case 86: 8163 return "V"; 8164 case 87: 8165 return "W"; 8166 case 88: 8167 return "X"; 8168 case 89: 8169 return "Y"; 8170 case 90: 8171 return "Z"; 8172 case 91: 8173 return "["; 8174 case 92: 8175 return "\\\\"; 8176 case 93: 8177 return "]"; 8178 case 94: 8179 return "^"; 8180 case 95: 8181 return "_"; 8182 case 96: 8183 return "`"; 8184 case 97: 8185 return "a"; 8186 case 98: 8187 return "b"; 8188 case 99: 8189 return "c"; 8190 case 100: 8191 return "d"; 8192 case 101: 8193 return "e"; 8194 case 102: 8195 return "f"; 8196 case 103: 8197 return "g"; 8198 case 104: 8199 return "h"; 8200 case 105: 8201 return "i"; 8202 case 106: 8203 return "j"; 8204 case 107: 8205 return "k"; 8206 case 108: 8207 return "l"; 8208 case 109: 8209 return "m"; 8210 case 110: 8211 return "n"; 8212 case 111: 8213 return "o"; 8214 case 112: 8215 return "p"; 8216 case 113: 8217 return "q"; 8218 case 114: 8219 return "r"; 8220 case 115: 8221 return "s"; 8222 case 116: 8223 return "t"; 8224 case 117: 8225 return "u"; 8226 case 118: 8227 return "v"; 8228 case 119: 8229 return "w"; 8230 case 120: 8231 return "x"; 8232 case 121: 8233 return "y"; 8234 case 122: 8235 return "z"; 8236 case 123: 8237 return "{"; 8238 case 124: 8239 return "|"; 8240 case 125: 8241 return "}"; 8242 case 126: 8243 return "~"; 8244 case 127: 8245 return "\\x7F"; 8246 case 128: 8247 return "\\x80"; 8248 case 129: 8249 return "\\x81"; 8250 case 130: 8251 return "\\x82"; 8252 case 131: 8253 return "\\x83"; 8254 case 132: 8255 return "\\x84"; 8256 case 133: 8257 return "\\x85"; 8258 case 134: 8259 return "\\x86"; 8260 case 135: 8261 return "\\x87"; 8262 case 136: 8263 return "\\x88"; 8264 case 137: 8265 return "\\x89"; 8266 case 138: 8267 return "\\x8A"; 8268 case 139: 8269 return "\\x8B"; 8270 case 140: 8271 return "\\x8C"; 8272 case 141: 8273 return "\\x8D"; 8274 case 142: 8275 return "\\x8E"; 8276 case 143: 8277 return "\\x8F"; 8278 case 144: 8279 return "\\x90"; 8280 case 145: 8281 return "\\x91"; 8282 case 146: 8283 return "\\x92"; 8284 case 147: 8285 return "\\x93"; 8286 case 148: 8287 return "\\x94"; 8288 case 149: 8289 return "\\x95"; 8290 case 150: 8291 return "\\x96"; 8292 case 151: 8293 return "\\x97"; 8294 case 152: 8295 return "\\x98"; 8296 case 153: 8297 return "\\x99"; 8298 case 154: 8299 return "\\x9A"; 8300 case 155: 8301 return "\\x9B"; 8302 case 156: 8303 return "\\x9C"; 8304 case 157: 8305 return "\\x9D"; 8306 case 158: 8307 return "\\x9E"; 8308 case 159: 8309 return "\\x9F"; 8310 case 160: 8311 return "\\xA0"; 8312 case 161: 8313 return "\\xA1"; 8314 case 162: 8315 return "\\xA2"; 8316 case 163: 8317 return "\\xA3"; 8318 case 164: 8319 return "\\xA4"; 8320 case 165: 8321 return "\\xA5"; 8322 case 166: 8323 return "\\xA6"; 8324 case 167: 8325 return "\\xA7"; 8326 case 168: 8327 return "\\xA8"; 8328 case 169: 8329 return "\\xA9"; 8330 case 170: 8331 return "\\xAA"; 8332 case 171: 8333 return "\\xAB"; 8334 case 172: 8335 return "\\xAC"; 8336 case 173: 8337 return "\\xAD"; 8338 case 174: 8339 return "\\xAE"; 8340 case 175: 8341 return "\\xAF"; 8342 case 176: 8343 return "\\xB0"; 8344 case 177: 8345 return "\\xB1"; 8346 case 178: 8347 return "\\xB2"; 8348 case 179: 8349 return "\\xB3"; 8350 case 180: 8351 return "\\xB4"; 8352 case 181: 8353 return "\\xB5"; 8354 case 182: 8355 return "\\xB6"; 8356 case 183: 8357 return "\\xB7"; 8358 case 184: 8359 return "\\xB8"; 8360 case 185: 8361 return "\\xB9"; 8362 case 186: 8363 return "\\xBA"; 8364 case 187: 8365 return "\\xBB"; 8366 case 188: 8367 return "\\xBC"; 8368 case 189: 8369 return "\\xBD"; 8370 case 190: 8371 return "\\xBE"; 8372 case 191: 8373 return "\\xBF"; 8374 case 192: 8375 return "\\xC0"; 8376 case 193: 8377 return "\\xC1"; 8378 case 194: 8379 return "\\xC2"; 8380 case 195: 8381 return "\\xC3"; 8382 case 196: 8383 return "\\xC4"; 8384 case 197: 8385 return "\\xC5"; 8386 case 198: 8387 return "\\xC6"; 8388 case 199: 8389 return "\\xC7"; 8390 case 200: 8391 return "\\xC8"; 8392 case 201: 8393 return "\\xC9"; 8394 case 202: 8395 return "\\xCA"; 8396 case 203: 8397 return "\\xCB"; 8398 case 204: 8399 return "\\xCC"; 8400 case 205: 8401 return "\\xCD"; 8402 case 206: 8403 return "\\xCE"; 8404 case 207: 8405 return "\\xCF"; 8406 case 208: 8407 return "\\xD0"; 8408 case 209: 8409 return "\\xD1"; 8410 case 210: 8411 return "\\xD2"; 8412 case 211: 8413 return "\\xD3"; 8414 case 212: 8415 return "\\xD4"; 8416 case 213: 8417 return "\\xD5"; 8418 case 214: 8419 return "\\xD6"; 8420 case 215: 8421 return "\\xD7"; 8422 case 216: 8423 return "\\xD8"; 8424 case 217: 8425 return "\\xD9"; 8426 case 218: 8427 return "\\xDA"; 8428 case 219: 8429 return "\\xDB"; 8430 case 220: 8431 return "\\xDC"; 8432 case 221: 8433 return "\\xDD"; 8434 case 222: 8435 return "\\xDE"; 8436 case 223: 8437 return "\\xDF"; 8438 case 224: 8439 return "\\xE0"; 8440 case 225: 8441 return "\\xE1"; 8442 case 226: 8443 return "\\xE2"; 8444 case 227: 8445 return "\\xE3"; 8446 case 228: 8447 return "\\xE4"; 8448 case 229: 8449 return "\\xE5"; 8450 case 230: 8451 return "\\xE6"; 8452 case 231: 8453 return "\\xE7"; 8454 case 232: 8455 return "\\xE8"; 8456 case 233: 8457 return "\\xE9"; 8458 case 234: 8459 return "\\xEA"; 8460 case 235: 8461 return "\\xEB"; 8462 case 236: 8463 return "\\xEC"; 8464 case 237: 8465 return "\\xED"; 8466 case 238: 8467 return "\\xEE"; 8468 case 239: 8469 return "\\xEF"; 8470 case 240: 8471 return "\\xF0"; 8472 case 241: 8473 return "\\xF1"; 8474 case 242: 8475 return "\\xF2"; 8476 case 243: 8477 return "\\xF3"; 8478 case 244: 8479 return "\\xF4"; 8480 case 245: 8481 return "\\xF5"; 8482 case 246: 8483 return "\\xF6"; 8484 case 247: 8485 return "\\xF7"; 8486 case 248: 8487 return "\\xF8"; 8488 case 249: 8489 return "\\xF9"; 8490 case 250: 8491 return "\\xFA"; 8492 case 251: 8493 return "\\xFB"; 8494 case 252: 8495 return "\\xFC"; 8496 case 253: 8497 return "\\xFD"; 8498 case 254: 8499 return "\\xFE"; 8500 case 255: 8501 return "\\xFF"; 8502 default: 8503 assert(0); /* never gets here */ 8504 return "dead code"; 8505 } 8506 assert(0); /* never gets here */ 8507 } 8508 8509 #endif /* XML_GE == 1 */ 8510 8511 static unsigned long 8512 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { 8513 const char *const valueOrNull = getenv(variableName); 8514 if (valueOrNull == NULL) { 8515 return defaultDebugLevel; 8516 } 8517 const char *const value = valueOrNull; 8518 8519 errno = 0; 8520 char *afterValue = NULL; 8521 unsigned long debugLevel = strtoul(value, &afterValue, 10); 8522 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { 8523 errno = 0; 8524 return defaultDebugLevel; 8525 } 8526 8527 return debugLevel; 8528 } 8529