1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 2 See the file COPYING for copying permission. 3 */ 4 5 #include <stddef.h> 6 7 #ifdef WIN32 8 #include "winconfig.h" 9 #elif defined(MACOS_CLASSIC) 10 #include "macconfig.h" 11 #elif defined(__amigaos__) 12 #include "amigaconfig.h" 13 #elif defined(__WATCOMC__) 14 #include "watcomconfig.h" 15 #else 16 #ifdef HAVE_EXPAT_CONFIG_H 17 #include <expat_config.h> 18 #endif 19 #endif /* ndef WIN32 */ 20 21 #include "expat_external.h" 22 #include "internal.h" 23 #include "xmlrole.h" 24 #include "ascii.h" 25 26 /* Doesn't check: 27 28 that ,| are not mixed in a model group 29 content of literals 30 31 */ 32 33 static const char KW_ANY[] = { 34 ASCII_A, ASCII_N, ASCII_Y, '\0' }; 35 static const char KW_ATTLIST[] = { 36 ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; 37 static const char KW_CDATA[] = { 38 ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; 39 static const char KW_DOCTYPE[] = { 40 ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; 41 static const char KW_ELEMENT[] = { 42 ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; 43 static const char KW_EMPTY[] = { 44 ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; 45 static const char KW_ENTITIES[] = { 46 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, 47 '\0' }; 48 static const char KW_ENTITY[] = { 49 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; 50 static const char KW_FIXED[] = { 51 ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; 52 static const char KW_ID[] = { 53 ASCII_I, ASCII_D, '\0' }; 54 static const char KW_IDREF[] = { 55 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; 56 static const char KW_IDREFS[] = { 57 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; 58 #ifdef XML_DTD 59 static const char KW_IGNORE[] = { 60 ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; 61 #endif 62 static const char KW_IMPLIED[] = { 63 ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; 64 #ifdef XML_DTD 65 static const char KW_INCLUDE[] = { 66 ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; 67 #endif 68 static const char KW_NDATA[] = { 69 ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; 70 static const char KW_NMTOKEN[] = { 71 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; 72 static const char KW_NMTOKENS[] = { 73 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, 74 '\0' }; 75 static const char KW_NOTATION[] = 76 { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, 77 '\0' }; 78 static const char KW_PCDATA[] = { 79 ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; 80 static const char KW_PUBLIC[] = { 81 ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; 82 static const char KW_REQUIRED[] = { 83 ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, 84 '\0' }; 85 static const char KW_SYSTEM[] = { 86 ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; 87 88 #ifndef MIN_BYTES_PER_CHAR 89 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) 90 #endif 91 92 #ifdef XML_DTD 93 #define setTopLevel(state) \ 94 ((state)->handler = ((state)->documentEntity \ 95 ? internalSubset \ 96 : externalSubset1)) 97 #else /* not XML_DTD */ 98 #define setTopLevel(state) ((state)->handler = internalSubset) 99 #endif /* not XML_DTD */ 100 101 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, 102 int tok, 103 const char *ptr, 104 const char *end, 105 const ENCODING *enc); 106 107 static PROLOG_HANDLER 108 prolog0, prolog1, prolog2, 109 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, 110 internalSubset, 111 entity0, entity1, entity2, entity3, entity4, entity5, entity6, 112 entity7, entity8, entity9, entity10, 113 notation0, notation1, notation2, notation3, notation4, 114 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, 115 attlist7, attlist8, attlist9, 116 element0, element1, element2, element3, element4, element5, element6, 117 element7, 118 #ifdef XML_DTD 119 externalSubset0, externalSubset1, 120 condSect0, condSect1, condSect2, 121 #endif /* XML_DTD */ 122 declClose, 123 error; 124 125 static int FASTCALL common(PROLOG_STATE *state, int tok); 126 127 static int PTRCALL 128 prolog0(PROLOG_STATE *state, 129 int tok, 130 const char *ptr, 131 const char *end, 132 const ENCODING *enc) 133 { 134 switch (tok) { 135 case XML_TOK_PROLOG_S: 136 state->handler = prolog1; 137 return XML_ROLE_NONE; 138 case XML_TOK_XML_DECL: 139 state->handler = prolog1; 140 return XML_ROLE_XML_DECL; 141 case XML_TOK_PI: 142 state->handler = prolog1; 143 return XML_ROLE_PI; 144 case XML_TOK_COMMENT: 145 state->handler = prolog1; 146 return XML_ROLE_COMMENT; 147 case XML_TOK_BOM: 148 return XML_ROLE_NONE; 149 case XML_TOK_DECL_OPEN: 150 if (!XmlNameMatchesAscii(enc, 151 ptr + 2 * MIN_BYTES_PER_CHAR(enc), 152 end, 153 KW_DOCTYPE)) 154 break; 155 state->handler = doctype0; 156 return XML_ROLE_DOCTYPE_NONE; 157 case XML_TOK_INSTANCE_START: 158 state->handler = error; 159 return XML_ROLE_INSTANCE_START; 160 } 161 return common(state, tok); 162 } 163 164 static int PTRCALL 165 prolog1(PROLOG_STATE *state, 166 int tok, 167 const char *ptr, 168 const char *end, 169 const ENCODING *enc) 170 { 171 switch (tok) { 172 case XML_TOK_PROLOG_S: 173 return XML_ROLE_NONE; 174 case XML_TOK_PI: 175 return XML_ROLE_PI; 176 case XML_TOK_COMMENT: 177 return XML_ROLE_COMMENT; 178 case XML_TOK_BOM: 179 return XML_ROLE_NONE; 180 case XML_TOK_DECL_OPEN: 181 if (!XmlNameMatchesAscii(enc, 182 ptr + 2 * MIN_BYTES_PER_CHAR(enc), 183 end, 184 KW_DOCTYPE)) 185 break; 186 state->handler = doctype0; 187 return XML_ROLE_DOCTYPE_NONE; 188 case XML_TOK_INSTANCE_START: 189 state->handler = error; 190 return XML_ROLE_INSTANCE_START; 191 } 192 return common(state, tok); 193 } 194 195 static int PTRCALL 196 prolog2(PROLOG_STATE *state, 197 int tok, 198 const char *UNUSED_P(ptr), 199 const char *UNUSED_P(end), 200 const ENCODING *UNUSED_P(enc)) 201 { 202 switch (tok) { 203 case XML_TOK_PROLOG_S: 204 return XML_ROLE_NONE; 205 case XML_TOK_PI: 206 return XML_ROLE_PI; 207 case XML_TOK_COMMENT: 208 return XML_ROLE_COMMENT; 209 case XML_TOK_INSTANCE_START: 210 state->handler = error; 211 return XML_ROLE_INSTANCE_START; 212 } 213 return common(state, tok); 214 } 215 216 static int PTRCALL 217 doctype0(PROLOG_STATE *state, 218 int tok, 219 const char *UNUSED_P(ptr), 220 const char *UNUSED_P(end), 221 const ENCODING *UNUSED_P(enc)) 222 { 223 switch (tok) { 224 case XML_TOK_PROLOG_S: 225 return XML_ROLE_DOCTYPE_NONE; 226 case XML_TOK_NAME: 227 case XML_TOK_PREFIXED_NAME: 228 state->handler = doctype1; 229 return XML_ROLE_DOCTYPE_NAME; 230 } 231 return common(state, tok); 232 } 233 234 static int PTRCALL 235 doctype1(PROLOG_STATE *state, 236 int tok, 237 const char *ptr, 238 const char *end, 239 const ENCODING *enc) 240 { 241 switch (tok) { 242 case XML_TOK_PROLOG_S: 243 return XML_ROLE_DOCTYPE_NONE; 244 case XML_TOK_OPEN_BRACKET: 245 state->handler = internalSubset; 246 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; 247 case XML_TOK_DECL_CLOSE: 248 state->handler = prolog2; 249 return XML_ROLE_DOCTYPE_CLOSE; 250 case XML_TOK_NAME: 251 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 252 state->handler = doctype3; 253 return XML_ROLE_DOCTYPE_NONE; 254 } 255 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 256 state->handler = doctype2; 257 return XML_ROLE_DOCTYPE_NONE; 258 } 259 break; 260 } 261 return common(state, tok); 262 } 263 264 static int PTRCALL 265 doctype2(PROLOG_STATE *state, 266 int tok, 267 const char *UNUSED_P(ptr), 268 const char *UNUSED_P(end), 269 const ENCODING *UNUSED_P(enc)) 270 { 271 switch (tok) { 272 case XML_TOK_PROLOG_S: 273 return XML_ROLE_DOCTYPE_NONE; 274 case XML_TOK_LITERAL: 275 state->handler = doctype3; 276 return XML_ROLE_DOCTYPE_PUBLIC_ID; 277 } 278 return common(state, tok); 279 } 280 281 static int PTRCALL 282 doctype3(PROLOG_STATE *state, 283 int tok, 284 const char *UNUSED_P(ptr), 285 const char *UNUSED_P(end), 286 const ENCODING *UNUSED_P(enc)) 287 { 288 switch (tok) { 289 case XML_TOK_PROLOG_S: 290 return XML_ROLE_DOCTYPE_NONE; 291 case XML_TOK_LITERAL: 292 state->handler = doctype4; 293 return XML_ROLE_DOCTYPE_SYSTEM_ID; 294 } 295 return common(state, tok); 296 } 297 298 static int PTRCALL 299 doctype4(PROLOG_STATE *state, 300 int tok, 301 const char *UNUSED_P(ptr), 302 const char *UNUSED_P(end), 303 const ENCODING *UNUSED_P(enc)) 304 { 305 switch (tok) { 306 case XML_TOK_PROLOG_S: 307 return XML_ROLE_DOCTYPE_NONE; 308 case XML_TOK_OPEN_BRACKET: 309 state->handler = internalSubset; 310 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; 311 case XML_TOK_DECL_CLOSE: 312 state->handler = prolog2; 313 return XML_ROLE_DOCTYPE_CLOSE; 314 } 315 return common(state, tok); 316 } 317 318 static int PTRCALL 319 doctype5(PROLOG_STATE *state, 320 int tok, 321 const char *UNUSED_P(ptr), 322 const char *UNUSED_P(end), 323 const ENCODING *UNUSED_P(enc)) 324 { 325 switch (tok) { 326 case XML_TOK_PROLOG_S: 327 return XML_ROLE_DOCTYPE_NONE; 328 case XML_TOK_DECL_CLOSE: 329 state->handler = prolog2; 330 return XML_ROLE_DOCTYPE_CLOSE; 331 } 332 return common(state, tok); 333 } 334 335 static int PTRCALL 336 internalSubset(PROLOG_STATE *state, 337 int tok, 338 const char *ptr, 339 const char *end, 340 const ENCODING *enc) 341 { 342 switch (tok) { 343 case XML_TOK_PROLOG_S: 344 return XML_ROLE_NONE; 345 case XML_TOK_DECL_OPEN: 346 if (XmlNameMatchesAscii(enc, 347 ptr + 2 * MIN_BYTES_PER_CHAR(enc), 348 end, 349 KW_ENTITY)) { 350 state->handler = entity0; 351 return XML_ROLE_ENTITY_NONE; 352 } 353 if (XmlNameMatchesAscii(enc, 354 ptr + 2 * MIN_BYTES_PER_CHAR(enc), 355 end, 356 KW_ATTLIST)) { 357 state->handler = attlist0; 358 return XML_ROLE_ATTLIST_NONE; 359 } 360 if (XmlNameMatchesAscii(enc, 361 ptr + 2 * MIN_BYTES_PER_CHAR(enc), 362 end, 363 KW_ELEMENT)) { 364 state->handler = element0; 365 return XML_ROLE_ELEMENT_NONE; 366 } 367 if (XmlNameMatchesAscii(enc, 368 ptr + 2 * MIN_BYTES_PER_CHAR(enc), 369 end, 370 KW_NOTATION)) { 371 state->handler = notation0; 372 return XML_ROLE_NOTATION_NONE; 373 } 374 break; 375 case XML_TOK_PI: 376 return XML_ROLE_PI; 377 case XML_TOK_COMMENT: 378 return XML_ROLE_COMMENT; 379 case XML_TOK_PARAM_ENTITY_REF: 380 return XML_ROLE_PARAM_ENTITY_REF; 381 case XML_TOK_CLOSE_BRACKET: 382 state->handler = doctype5; 383 return XML_ROLE_DOCTYPE_NONE; 384 case XML_TOK_NONE: 385 return XML_ROLE_NONE; 386 } 387 return common(state, tok); 388 } 389 390 #ifdef XML_DTD 391 392 static int PTRCALL 393 externalSubset0(PROLOG_STATE *state, 394 int tok, 395 const char *ptr, 396 const char *end, 397 const ENCODING *enc) 398 { 399 state->handler = externalSubset1; 400 if (tok == XML_TOK_XML_DECL) 401 return XML_ROLE_TEXT_DECL; 402 return externalSubset1(state, tok, ptr, end, enc); 403 } 404 405 static int PTRCALL 406 externalSubset1(PROLOG_STATE *state, 407 int tok, 408 const char *ptr, 409 const char *end, 410 const ENCODING *enc) 411 { 412 switch (tok) { 413 case XML_TOK_COND_SECT_OPEN: 414 state->handler = condSect0; 415 return XML_ROLE_NONE; 416 case XML_TOK_COND_SECT_CLOSE: 417 if (state->includeLevel == 0) 418 break; 419 state->includeLevel -= 1; 420 return XML_ROLE_NONE; 421 case XML_TOK_PROLOG_S: 422 return XML_ROLE_NONE; 423 case XML_TOK_CLOSE_BRACKET: 424 break; 425 case XML_TOK_NONE: 426 if (state->includeLevel) 427 break; 428 return XML_ROLE_NONE; 429 default: 430 return internalSubset(state, tok, ptr, end, enc); 431 } 432 return common(state, tok); 433 } 434 435 #endif /* XML_DTD */ 436 437 static int PTRCALL 438 entity0(PROLOG_STATE *state, 439 int tok, 440 const char *UNUSED_P(ptr), 441 const char *UNUSED_P(end), 442 const ENCODING *UNUSED_P(enc)) 443 { 444 switch (tok) { 445 case XML_TOK_PROLOG_S: 446 return XML_ROLE_ENTITY_NONE; 447 case XML_TOK_PERCENT: 448 state->handler = entity1; 449 return XML_ROLE_ENTITY_NONE; 450 case XML_TOK_NAME: 451 state->handler = entity2; 452 return XML_ROLE_GENERAL_ENTITY_NAME; 453 } 454 return common(state, tok); 455 } 456 457 static int PTRCALL 458 entity1(PROLOG_STATE *state, 459 int tok, 460 const char *UNUSED_P(ptr), 461 const char *UNUSED_P(end), 462 const ENCODING *UNUSED_P(enc)) 463 { 464 switch (tok) { 465 case XML_TOK_PROLOG_S: 466 return XML_ROLE_ENTITY_NONE; 467 case XML_TOK_NAME: 468 state->handler = entity7; 469 return XML_ROLE_PARAM_ENTITY_NAME; 470 } 471 return common(state, tok); 472 } 473 474 static int PTRCALL 475 entity2(PROLOG_STATE *state, 476 int tok, 477 const char *ptr, 478 const char *end, 479 const ENCODING *enc) 480 { 481 switch (tok) { 482 case XML_TOK_PROLOG_S: 483 return XML_ROLE_ENTITY_NONE; 484 case XML_TOK_NAME: 485 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 486 state->handler = entity4; 487 return XML_ROLE_ENTITY_NONE; 488 } 489 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 490 state->handler = entity3; 491 return XML_ROLE_ENTITY_NONE; 492 } 493 break; 494 case XML_TOK_LITERAL: 495 state->handler = declClose; 496 state->role_none = XML_ROLE_ENTITY_NONE; 497 return XML_ROLE_ENTITY_VALUE; 498 } 499 return common(state, tok); 500 } 501 502 static int PTRCALL 503 entity3(PROLOG_STATE *state, 504 int tok, 505 const char *UNUSED_P(ptr), 506 const char *UNUSED_P(end), 507 const ENCODING *UNUSED_P(enc)) 508 { 509 switch (tok) { 510 case XML_TOK_PROLOG_S: 511 return XML_ROLE_ENTITY_NONE; 512 case XML_TOK_LITERAL: 513 state->handler = entity4; 514 return XML_ROLE_ENTITY_PUBLIC_ID; 515 } 516 return common(state, tok); 517 } 518 519 static int PTRCALL 520 entity4(PROLOG_STATE *state, 521 int tok, 522 const char *UNUSED_P(ptr), 523 const char *UNUSED_P(end), 524 const ENCODING *UNUSED_P(enc)) 525 { 526 switch (tok) { 527 case XML_TOK_PROLOG_S: 528 return XML_ROLE_ENTITY_NONE; 529 case XML_TOK_LITERAL: 530 state->handler = entity5; 531 return XML_ROLE_ENTITY_SYSTEM_ID; 532 } 533 return common(state, tok); 534 } 535 536 static int PTRCALL 537 entity5(PROLOG_STATE *state, 538 int tok, 539 const char *ptr, 540 const char *end, 541 const ENCODING *enc) 542 { 543 switch (tok) { 544 case XML_TOK_PROLOG_S: 545 return XML_ROLE_ENTITY_NONE; 546 case XML_TOK_DECL_CLOSE: 547 setTopLevel(state); 548 return XML_ROLE_ENTITY_COMPLETE; 549 case XML_TOK_NAME: 550 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { 551 state->handler = entity6; 552 return XML_ROLE_ENTITY_NONE; 553 } 554 break; 555 } 556 return common(state, tok); 557 } 558 559 static int PTRCALL 560 entity6(PROLOG_STATE *state, 561 int tok, 562 const char *UNUSED_P(ptr), 563 const char *UNUSED_P(end), 564 const ENCODING *UNUSED_P(enc)) 565 { 566 switch (tok) { 567 case XML_TOK_PROLOG_S: 568 return XML_ROLE_ENTITY_NONE; 569 case XML_TOK_NAME: 570 state->handler = declClose; 571 state->role_none = XML_ROLE_ENTITY_NONE; 572 return XML_ROLE_ENTITY_NOTATION_NAME; 573 } 574 return common(state, tok); 575 } 576 577 static int PTRCALL 578 entity7(PROLOG_STATE *state, 579 int tok, 580 const char *ptr, 581 const char *end, 582 const ENCODING *enc) 583 { 584 switch (tok) { 585 case XML_TOK_PROLOG_S: 586 return XML_ROLE_ENTITY_NONE; 587 case XML_TOK_NAME: 588 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 589 state->handler = entity9; 590 return XML_ROLE_ENTITY_NONE; 591 } 592 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 593 state->handler = entity8; 594 return XML_ROLE_ENTITY_NONE; 595 } 596 break; 597 case XML_TOK_LITERAL: 598 state->handler = declClose; 599 state->role_none = XML_ROLE_ENTITY_NONE; 600 return XML_ROLE_ENTITY_VALUE; 601 } 602 return common(state, tok); 603 } 604 605 static int PTRCALL 606 entity8(PROLOG_STATE *state, 607 int tok, 608 const char *UNUSED_P(ptr), 609 const char *UNUSED_P(end), 610 const ENCODING *UNUSED_P(enc)) 611 { 612 switch (tok) { 613 case XML_TOK_PROLOG_S: 614 return XML_ROLE_ENTITY_NONE; 615 case XML_TOK_LITERAL: 616 state->handler = entity9; 617 return XML_ROLE_ENTITY_PUBLIC_ID; 618 } 619 return common(state, tok); 620 } 621 622 static int PTRCALL 623 entity9(PROLOG_STATE *state, 624 int tok, 625 const char *UNUSED_P(ptr), 626 const char *UNUSED_P(end), 627 const ENCODING *UNUSED_P(enc)) 628 { 629 switch (tok) { 630 case XML_TOK_PROLOG_S: 631 return XML_ROLE_ENTITY_NONE; 632 case XML_TOK_LITERAL: 633 state->handler = entity10; 634 return XML_ROLE_ENTITY_SYSTEM_ID; 635 } 636 return common(state, tok); 637 } 638 639 static int PTRCALL 640 entity10(PROLOG_STATE *state, 641 int tok, 642 const char *UNUSED_P(ptr), 643 const char *UNUSED_P(end), 644 const ENCODING *UNUSED_P(enc)) 645 { 646 switch (tok) { 647 case XML_TOK_PROLOG_S: 648 return XML_ROLE_ENTITY_NONE; 649 case XML_TOK_DECL_CLOSE: 650 setTopLevel(state); 651 return XML_ROLE_ENTITY_COMPLETE; 652 } 653 return common(state, tok); 654 } 655 656 static int PTRCALL 657 notation0(PROLOG_STATE *state, 658 int tok, 659 const char *UNUSED_P(ptr), 660 const char *UNUSED_P(end), 661 const ENCODING *UNUSED_P(enc)) 662 { 663 switch (tok) { 664 case XML_TOK_PROLOG_S: 665 return XML_ROLE_NOTATION_NONE; 666 case XML_TOK_NAME: 667 state->handler = notation1; 668 return XML_ROLE_NOTATION_NAME; 669 } 670 return common(state, tok); 671 } 672 673 static int PTRCALL 674 notation1(PROLOG_STATE *state, 675 int tok, 676 const char *ptr, 677 const char *end, 678 const ENCODING *enc) 679 { 680 switch (tok) { 681 case XML_TOK_PROLOG_S: 682 return XML_ROLE_NOTATION_NONE; 683 case XML_TOK_NAME: 684 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { 685 state->handler = notation3; 686 return XML_ROLE_NOTATION_NONE; 687 } 688 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { 689 state->handler = notation2; 690 return XML_ROLE_NOTATION_NONE; 691 } 692 break; 693 } 694 return common(state, tok); 695 } 696 697 static int PTRCALL 698 notation2(PROLOG_STATE *state, 699 int tok, 700 const char *UNUSED_P(ptr), 701 const char *UNUSED_P(end), 702 const ENCODING *UNUSED_P(enc)) 703 { 704 switch (tok) { 705 case XML_TOK_PROLOG_S: 706 return XML_ROLE_NOTATION_NONE; 707 case XML_TOK_LITERAL: 708 state->handler = notation4; 709 return XML_ROLE_NOTATION_PUBLIC_ID; 710 } 711 return common(state, tok); 712 } 713 714 static int PTRCALL 715 notation3(PROLOG_STATE *state, 716 int tok, 717 const char *UNUSED_P(ptr), 718 const char *UNUSED_P(end), 719 const ENCODING *UNUSED_P(enc)) 720 { 721 switch (tok) { 722 case XML_TOK_PROLOG_S: 723 return XML_ROLE_NOTATION_NONE; 724 case XML_TOK_LITERAL: 725 state->handler = declClose; 726 state->role_none = XML_ROLE_NOTATION_NONE; 727 return XML_ROLE_NOTATION_SYSTEM_ID; 728 } 729 return common(state, tok); 730 } 731 732 static int PTRCALL 733 notation4(PROLOG_STATE *state, 734 int tok, 735 const char *UNUSED_P(ptr), 736 const char *UNUSED_P(end), 737 const ENCODING *UNUSED_P(enc)) 738 { 739 switch (tok) { 740 case XML_TOK_PROLOG_S: 741 return XML_ROLE_NOTATION_NONE; 742 case XML_TOK_LITERAL: 743 state->handler = declClose; 744 state->role_none = XML_ROLE_NOTATION_NONE; 745 return XML_ROLE_NOTATION_SYSTEM_ID; 746 case XML_TOK_DECL_CLOSE: 747 setTopLevel(state); 748 return XML_ROLE_NOTATION_NO_SYSTEM_ID; 749 } 750 return common(state, tok); 751 } 752 753 static int PTRCALL 754 attlist0(PROLOG_STATE *state, 755 int tok, 756 const char *UNUSED_P(ptr), 757 const char *UNUSED_P(end), 758 const ENCODING *UNUSED_P(enc)) 759 { 760 switch (tok) { 761 case XML_TOK_PROLOG_S: 762 return XML_ROLE_ATTLIST_NONE; 763 case XML_TOK_NAME: 764 case XML_TOK_PREFIXED_NAME: 765 state->handler = attlist1; 766 return XML_ROLE_ATTLIST_ELEMENT_NAME; 767 } 768 return common(state, tok); 769 } 770 771 static int PTRCALL 772 attlist1(PROLOG_STATE *state, 773 int tok, 774 const char *UNUSED_P(ptr), 775 const char *UNUSED_P(end), 776 const ENCODING *UNUSED_P(enc)) 777 { 778 switch (tok) { 779 case XML_TOK_PROLOG_S: 780 return XML_ROLE_ATTLIST_NONE; 781 case XML_TOK_DECL_CLOSE: 782 setTopLevel(state); 783 return XML_ROLE_ATTLIST_NONE; 784 case XML_TOK_NAME: 785 case XML_TOK_PREFIXED_NAME: 786 state->handler = attlist2; 787 return XML_ROLE_ATTRIBUTE_NAME; 788 } 789 return common(state, tok); 790 } 791 792 static int PTRCALL 793 attlist2(PROLOG_STATE *state, 794 int tok, 795 const char *ptr, 796 const char *end, 797 const ENCODING *enc) 798 { 799 switch (tok) { 800 case XML_TOK_PROLOG_S: 801 return XML_ROLE_ATTLIST_NONE; 802 case XML_TOK_NAME: 803 { 804 static const char * const types[] = { 805 KW_CDATA, 806 KW_ID, 807 KW_IDREF, 808 KW_IDREFS, 809 KW_ENTITY, 810 KW_ENTITIES, 811 KW_NMTOKEN, 812 KW_NMTOKENS, 813 }; 814 int i; 815 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) 816 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { 817 state->handler = attlist8; 818 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; 819 } 820 } 821 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { 822 state->handler = attlist5; 823 return XML_ROLE_ATTLIST_NONE; 824 } 825 break; 826 case XML_TOK_OPEN_PAREN: 827 state->handler = attlist3; 828 return XML_ROLE_ATTLIST_NONE; 829 } 830 return common(state, tok); 831 } 832 833 static int PTRCALL 834 attlist3(PROLOG_STATE *state, 835 int tok, 836 const char *UNUSED_P(ptr), 837 const char *UNUSED_P(end), 838 const ENCODING *UNUSED_P(enc)) 839 { 840 switch (tok) { 841 case XML_TOK_PROLOG_S: 842 return XML_ROLE_ATTLIST_NONE; 843 case XML_TOK_NMTOKEN: 844 case XML_TOK_NAME: 845 case XML_TOK_PREFIXED_NAME: 846 state->handler = attlist4; 847 return XML_ROLE_ATTRIBUTE_ENUM_VALUE; 848 } 849 return common(state, tok); 850 } 851 852 static int PTRCALL 853 attlist4(PROLOG_STATE *state, 854 int tok, 855 const char *UNUSED_P(ptr), 856 const char *UNUSED_P(end), 857 const ENCODING *UNUSED_P(enc)) 858 { 859 switch (tok) { 860 case XML_TOK_PROLOG_S: 861 return XML_ROLE_ATTLIST_NONE; 862 case XML_TOK_CLOSE_PAREN: 863 state->handler = attlist8; 864 return XML_ROLE_ATTLIST_NONE; 865 case XML_TOK_OR: 866 state->handler = attlist3; 867 return XML_ROLE_ATTLIST_NONE; 868 } 869 return common(state, tok); 870 } 871 872 static int PTRCALL 873 attlist5(PROLOG_STATE *state, 874 int tok, 875 const char *UNUSED_P(ptr), 876 const char *UNUSED_P(end), 877 const ENCODING *UNUSED_P(enc)) 878 { 879 switch (tok) { 880 case XML_TOK_PROLOG_S: 881 return XML_ROLE_ATTLIST_NONE; 882 case XML_TOK_OPEN_PAREN: 883 state->handler = attlist6; 884 return XML_ROLE_ATTLIST_NONE; 885 } 886 return common(state, tok); 887 } 888 889 static int PTRCALL 890 attlist6(PROLOG_STATE *state, 891 int tok, 892 const char *UNUSED_P(ptr), 893 const char *UNUSED_P(end), 894 const ENCODING *UNUSED_P(enc)) 895 { 896 switch (tok) { 897 case XML_TOK_PROLOG_S: 898 return XML_ROLE_ATTLIST_NONE; 899 case XML_TOK_NAME: 900 state->handler = attlist7; 901 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; 902 } 903 return common(state, tok); 904 } 905 906 static int PTRCALL 907 attlist7(PROLOG_STATE *state, 908 int tok, 909 const char *UNUSED_P(ptr), 910 const char *UNUSED_P(end), 911 const ENCODING *UNUSED_P(enc)) 912 { 913 switch (tok) { 914 case XML_TOK_PROLOG_S: 915 return XML_ROLE_ATTLIST_NONE; 916 case XML_TOK_CLOSE_PAREN: 917 state->handler = attlist8; 918 return XML_ROLE_ATTLIST_NONE; 919 case XML_TOK_OR: 920 state->handler = attlist6; 921 return XML_ROLE_ATTLIST_NONE; 922 } 923 return common(state, tok); 924 } 925 926 /* default value */ 927 static int PTRCALL 928 attlist8(PROLOG_STATE *state, 929 int tok, 930 const char *ptr, 931 const char *end, 932 const ENCODING *enc) 933 { 934 switch (tok) { 935 case XML_TOK_PROLOG_S: 936 return XML_ROLE_ATTLIST_NONE; 937 case XML_TOK_POUND_NAME: 938 if (XmlNameMatchesAscii(enc, 939 ptr + MIN_BYTES_PER_CHAR(enc), 940 end, 941 KW_IMPLIED)) { 942 state->handler = attlist1; 943 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; 944 } 945 if (XmlNameMatchesAscii(enc, 946 ptr + MIN_BYTES_PER_CHAR(enc), 947 end, 948 KW_REQUIRED)) { 949 state->handler = attlist1; 950 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; 951 } 952 if (XmlNameMatchesAscii(enc, 953 ptr + MIN_BYTES_PER_CHAR(enc), 954 end, 955 KW_FIXED)) { 956 state->handler = attlist9; 957 return XML_ROLE_ATTLIST_NONE; 958 } 959 break; 960 case XML_TOK_LITERAL: 961 state->handler = attlist1; 962 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; 963 } 964 return common(state, tok); 965 } 966 967 static int PTRCALL 968 attlist9(PROLOG_STATE *state, 969 int tok, 970 const char *UNUSED_P(ptr), 971 const char *UNUSED_P(end), 972 const ENCODING *UNUSED_P(enc)) 973 { 974 switch (tok) { 975 case XML_TOK_PROLOG_S: 976 return XML_ROLE_ATTLIST_NONE; 977 case XML_TOK_LITERAL: 978 state->handler = attlist1; 979 return XML_ROLE_FIXED_ATTRIBUTE_VALUE; 980 } 981 return common(state, tok); 982 } 983 984 static int PTRCALL 985 element0(PROLOG_STATE *state, 986 int tok, 987 const char *UNUSED_P(ptr), 988 const char *UNUSED_P(end), 989 const ENCODING *UNUSED_P(enc)) 990 { 991 switch (tok) { 992 case XML_TOK_PROLOG_S: 993 return XML_ROLE_ELEMENT_NONE; 994 case XML_TOK_NAME: 995 case XML_TOK_PREFIXED_NAME: 996 state->handler = element1; 997 return XML_ROLE_ELEMENT_NAME; 998 } 999 return common(state, tok); 1000 } 1001 1002 static int PTRCALL 1003 element1(PROLOG_STATE *state, 1004 int tok, 1005 const char *ptr, 1006 const char *end, 1007 const ENCODING *enc) 1008 { 1009 switch (tok) { 1010 case XML_TOK_PROLOG_S: 1011 return XML_ROLE_ELEMENT_NONE; 1012 case XML_TOK_NAME: 1013 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { 1014 state->handler = declClose; 1015 state->role_none = XML_ROLE_ELEMENT_NONE; 1016 return XML_ROLE_CONTENT_EMPTY; 1017 } 1018 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { 1019 state->handler = declClose; 1020 state->role_none = XML_ROLE_ELEMENT_NONE; 1021 return XML_ROLE_CONTENT_ANY; 1022 } 1023 break; 1024 case XML_TOK_OPEN_PAREN: 1025 state->handler = element2; 1026 state->level = 1; 1027 return XML_ROLE_GROUP_OPEN; 1028 } 1029 return common(state, tok); 1030 } 1031 1032 static int PTRCALL 1033 element2(PROLOG_STATE *state, 1034 int tok, 1035 const char *ptr, 1036 const char *end, 1037 const ENCODING *enc) 1038 { 1039 switch (tok) { 1040 case XML_TOK_PROLOG_S: 1041 return XML_ROLE_ELEMENT_NONE; 1042 case XML_TOK_POUND_NAME: 1043 if (XmlNameMatchesAscii(enc, 1044 ptr + MIN_BYTES_PER_CHAR(enc), 1045 end, 1046 KW_PCDATA)) { 1047 state->handler = element3; 1048 return XML_ROLE_CONTENT_PCDATA; 1049 } 1050 break; 1051 case XML_TOK_OPEN_PAREN: 1052 state->level = 2; 1053 state->handler = element6; 1054 return XML_ROLE_GROUP_OPEN; 1055 case XML_TOK_NAME: 1056 case XML_TOK_PREFIXED_NAME: 1057 state->handler = element7; 1058 return XML_ROLE_CONTENT_ELEMENT; 1059 case XML_TOK_NAME_QUESTION: 1060 state->handler = element7; 1061 return XML_ROLE_CONTENT_ELEMENT_OPT; 1062 case XML_TOK_NAME_ASTERISK: 1063 state->handler = element7; 1064 return XML_ROLE_CONTENT_ELEMENT_REP; 1065 case XML_TOK_NAME_PLUS: 1066 state->handler = element7; 1067 return XML_ROLE_CONTENT_ELEMENT_PLUS; 1068 } 1069 return common(state, tok); 1070 } 1071 1072 static int PTRCALL 1073 element3(PROLOG_STATE *state, 1074 int tok, 1075 const char *UNUSED_P(ptr), 1076 const char *UNUSED_P(end), 1077 const ENCODING *UNUSED_P(enc)) 1078 { 1079 switch (tok) { 1080 case XML_TOK_PROLOG_S: 1081 return XML_ROLE_ELEMENT_NONE; 1082 case XML_TOK_CLOSE_PAREN: 1083 state->handler = declClose; 1084 state->role_none = XML_ROLE_ELEMENT_NONE; 1085 return XML_ROLE_GROUP_CLOSE; 1086 case XML_TOK_CLOSE_PAREN_ASTERISK: 1087 state->handler = declClose; 1088 state->role_none = XML_ROLE_ELEMENT_NONE; 1089 return XML_ROLE_GROUP_CLOSE_REP; 1090 case XML_TOK_OR: 1091 state->handler = element4; 1092 return XML_ROLE_ELEMENT_NONE; 1093 } 1094 return common(state, tok); 1095 } 1096 1097 static int PTRCALL 1098 element4(PROLOG_STATE *state, 1099 int tok, 1100 const char *UNUSED_P(ptr), 1101 const char *UNUSED_P(end), 1102 const ENCODING *UNUSED_P(enc)) 1103 { 1104 switch (tok) { 1105 case XML_TOK_PROLOG_S: 1106 return XML_ROLE_ELEMENT_NONE; 1107 case XML_TOK_NAME: 1108 case XML_TOK_PREFIXED_NAME: 1109 state->handler = element5; 1110 return XML_ROLE_CONTENT_ELEMENT; 1111 } 1112 return common(state, tok); 1113 } 1114 1115 static int PTRCALL 1116 element5(PROLOG_STATE *state, 1117 int tok, 1118 const char *UNUSED_P(ptr), 1119 const char *UNUSED_P(end), 1120 const ENCODING *UNUSED_P(enc)) 1121 { 1122 switch (tok) { 1123 case XML_TOK_PROLOG_S: 1124 return XML_ROLE_ELEMENT_NONE; 1125 case XML_TOK_CLOSE_PAREN_ASTERISK: 1126 state->handler = declClose; 1127 state->role_none = XML_ROLE_ELEMENT_NONE; 1128 return XML_ROLE_GROUP_CLOSE_REP; 1129 case XML_TOK_OR: 1130 state->handler = element4; 1131 return XML_ROLE_ELEMENT_NONE; 1132 } 1133 return common(state, tok); 1134 } 1135 1136 static int PTRCALL 1137 element6(PROLOG_STATE *state, 1138 int tok, 1139 const char *UNUSED_P(ptr), 1140 const char *UNUSED_P(end), 1141 const ENCODING *UNUSED_P(enc)) 1142 { 1143 switch (tok) { 1144 case XML_TOK_PROLOG_S: 1145 return XML_ROLE_ELEMENT_NONE; 1146 case XML_TOK_OPEN_PAREN: 1147 state->level += 1; 1148 return XML_ROLE_GROUP_OPEN; 1149 case XML_TOK_NAME: 1150 case XML_TOK_PREFIXED_NAME: 1151 state->handler = element7; 1152 return XML_ROLE_CONTENT_ELEMENT; 1153 case XML_TOK_NAME_QUESTION: 1154 state->handler = element7; 1155 return XML_ROLE_CONTENT_ELEMENT_OPT; 1156 case XML_TOK_NAME_ASTERISK: 1157 state->handler = element7; 1158 return XML_ROLE_CONTENT_ELEMENT_REP; 1159 case XML_TOK_NAME_PLUS: 1160 state->handler = element7; 1161 return XML_ROLE_CONTENT_ELEMENT_PLUS; 1162 } 1163 return common(state, tok); 1164 } 1165 1166 static int PTRCALL 1167 element7(PROLOG_STATE *state, 1168 int tok, 1169 const char *UNUSED_P(ptr), 1170 const char *UNUSED_P(end), 1171 const ENCODING *UNUSED_P(enc)) 1172 { 1173 switch (tok) { 1174 case XML_TOK_PROLOG_S: 1175 return XML_ROLE_ELEMENT_NONE; 1176 case XML_TOK_CLOSE_PAREN: 1177 state->level -= 1; 1178 if (state->level == 0) { 1179 state->handler = declClose; 1180 state->role_none = XML_ROLE_ELEMENT_NONE; 1181 } 1182 return XML_ROLE_GROUP_CLOSE; 1183 case XML_TOK_CLOSE_PAREN_ASTERISK: 1184 state->level -= 1; 1185 if (state->level == 0) { 1186 state->handler = declClose; 1187 state->role_none = XML_ROLE_ELEMENT_NONE; 1188 } 1189 return XML_ROLE_GROUP_CLOSE_REP; 1190 case XML_TOK_CLOSE_PAREN_QUESTION: 1191 state->level -= 1; 1192 if (state->level == 0) { 1193 state->handler = declClose; 1194 state->role_none = XML_ROLE_ELEMENT_NONE; 1195 } 1196 return XML_ROLE_GROUP_CLOSE_OPT; 1197 case XML_TOK_CLOSE_PAREN_PLUS: 1198 state->level -= 1; 1199 if (state->level == 0) { 1200 state->handler = declClose; 1201 state->role_none = XML_ROLE_ELEMENT_NONE; 1202 } 1203 return XML_ROLE_GROUP_CLOSE_PLUS; 1204 case XML_TOK_COMMA: 1205 state->handler = element6; 1206 return XML_ROLE_GROUP_SEQUENCE; 1207 case XML_TOK_OR: 1208 state->handler = element6; 1209 return XML_ROLE_GROUP_CHOICE; 1210 } 1211 return common(state, tok); 1212 } 1213 1214 #ifdef XML_DTD 1215 1216 static int PTRCALL 1217 condSect0(PROLOG_STATE *state, 1218 int tok, 1219 const char *ptr, 1220 const char *end, 1221 const ENCODING *enc) 1222 { 1223 switch (tok) { 1224 case XML_TOK_PROLOG_S: 1225 return XML_ROLE_NONE; 1226 case XML_TOK_NAME: 1227 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { 1228 state->handler = condSect1; 1229 return XML_ROLE_NONE; 1230 } 1231 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { 1232 state->handler = condSect2; 1233 return XML_ROLE_NONE; 1234 } 1235 break; 1236 } 1237 return common(state, tok); 1238 } 1239 1240 static int PTRCALL 1241 condSect1(PROLOG_STATE *state, 1242 int tok, 1243 const char *UNUSED_P(ptr), 1244 const char *UNUSED_P(end), 1245 const ENCODING *UNUSED_P(enc)) 1246 { 1247 switch (tok) { 1248 case XML_TOK_PROLOG_S: 1249 return XML_ROLE_NONE; 1250 case XML_TOK_OPEN_BRACKET: 1251 state->handler = externalSubset1; 1252 state->includeLevel += 1; 1253 return XML_ROLE_NONE; 1254 } 1255 return common(state, tok); 1256 } 1257 1258 static int PTRCALL 1259 condSect2(PROLOG_STATE *state, 1260 int tok, 1261 const char *UNUSED_P(ptr), 1262 const char *UNUSED_P(end), 1263 const ENCODING *UNUSED_P(enc)) 1264 { 1265 switch (tok) { 1266 case XML_TOK_PROLOG_S: 1267 return XML_ROLE_NONE; 1268 case XML_TOK_OPEN_BRACKET: 1269 state->handler = externalSubset1; 1270 return XML_ROLE_IGNORE_SECT; 1271 } 1272 return common(state, tok); 1273 } 1274 1275 #endif /* XML_DTD */ 1276 1277 static int PTRCALL 1278 declClose(PROLOG_STATE *state, 1279 int tok, 1280 const char *UNUSED_P(ptr), 1281 const char *UNUSED_P(end), 1282 const ENCODING *UNUSED_P(enc)) 1283 { 1284 switch (tok) { 1285 case XML_TOK_PROLOG_S: 1286 return state->role_none; 1287 case XML_TOK_DECL_CLOSE: 1288 setTopLevel(state); 1289 return state->role_none; 1290 } 1291 return common(state, tok); 1292 } 1293 1294 static int PTRCALL 1295 error(PROLOG_STATE *UNUSED_P(state), 1296 int UNUSED_P(tok), 1297 const char *UNUSED_P(ptr), 1298 const char *UNUSED_P(end), 1299 const ENCODING *UNUSED_P(enc)) 1300 { 1301 return XML_ROLE_NONE; 1302 } 1303 1304 static int FASTCALL 1305 common(PROLOG_STATE *state, int tok) 1306 { 1307 #ifdef XML_DTD 1308 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) 1309 return XML_ROLE_INNER_PARAM_ENTITY_REF; 1310 #endif 1311 state->handler = error; 1312 return XML_ROLE_ERROR; 1313 } 1314 1315 void 1316 XmlPrologStateInit(PROLOG_STATE *state) 1317 { 1318 state->handler = prolog0; 1319 #ifdef XML_DTD 1320 state->documentEntity = 1; 1321 state->includeLevel = 0; 1322 state->inEntityValue = 0; 1323 #endif /* XML_DTD */ 1324 } 1325 1326 #ifdef XML_DTD 1327 1328 void 1329 XmlPrologStateInitExternalEntity(PROLOG_STATE *state) 1330 { 1331 state->handler = externalSubset0; 1332 state->documentEntity = 0; 1333 state->includeLevel = 0; 1334 } 1335 1336 #endif /* XML_DTD */ 1337