1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
12 Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
16 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
17 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
18 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
19 Licensed under the MIT license:
20
21 Permission is hereby granted, free of charge, to any person obtaining
22 a copy of this software and associated documentation files (the
23 "Software"), to deal in the Software without restriction, including
24 without limitation the rights to use, copy, modify, merge, publish,
25 distribute, sublicense, and/or sell copies of the Software, and to permit
26 persons to whom the Software is furnished to do so, subject to the
27 following conditions:
28
29 The above copyright notice and this permission notice shall be included
30 in all copies or substantial portions of the Software.
31
32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
35 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
36 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
37 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
38 USE OR OTHER DEALINGS IN THE SOFTWARE.
39 */
40
41 #include "expat_config.h"
42
43 #include <stddef.h>
44
45 #ifdef _WIN32
46 # include "winconfig.h"
47 #endif
48
49 #include "expat_external.h"
50 #include "internal.h"
51 #include "xmlrole.h"
52 #include "ascii.h"
53
54 /* Doesn't check:
55
56 that ,| are not mixed in a model group
57 content of literals
58
59 */
60
61 static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'};
62 static const char KW_ATTLIST[]
63 = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'};
64 static const char KW_CDATA[]
65 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
66 static const char KW_DOCTYPE[]
67 = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'};
68 static const char KW_ELEMENT[]
69 = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'};
70 static const char KW_EMPTY[]
71 = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'};
72 static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
73 ASCII_I, ASCII_E, ASCII_S, '\0'};
74 static const char KW_ENTITY[]
75 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
76 static const char KW_FIXED[]
77 = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'};
78 static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'};
79 static const char KW_IDREF[]
80 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
81 static const char KW_IDREFS[]
82 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
83 #ifdef XML_DTD
84 static const char KW_IGNORE[]
85 = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'};
86 #endif
87 static const char KW_IMPLIED[]
88 = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'};
89 #ifdef XML_DTD
90 static const char KW_INCLUDE[]
91 = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'};
92 #endif
93 static const char KW_NDATA[]
94 = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
95 static const char KW_NMTOKEN[]
96 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
97 static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
98 ASCII_E, ASCII_N, ASCII_S, '\0'};
99 static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
100 ASCII_I, ASCII_O, ASCII_N, '\0'};
101 static const char KW_PCDATA[]
102 = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
103 static const char KW_PUBLIC[]
104 = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'};
105 static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I,
106 ASCII_R, ASCII_E, ASCII_D, '\0'};
107 static const char KW_SYSTEM[]
108 = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'};
109
110 #ifndef MIN_BYTES_PER_CHAR
111 # define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
112 #endif
113
114 #ifdef XML_DTD
115 # define setTopLevel(state) \
116 ((state)->handler \
117 = ((state)->documentEntity ? internalSubset : externalSubset1))
118 #else /* not XML_DTD */
119 # define setTopLevel(state) ((state)->handler = internalSubset)
120 #endif /* not XML_DTD */
121
122 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok,
123 const char *ptr, const char *end,
124 const ENCODING *enc);
125
126 static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2,
127 doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2,
128 entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10,
129 notation0, notation1, notation2, notation3, notation4, attlist0, attlist1,
130 attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8,
131 attlist9, element0, element1, element2, element3, element4, element5,
132 element6, element7,
133 #ifdef XML_DTD
134 externalSubset0, externalSubset1, condSect0, condSect1, condSect2,
135 #endif /* XML_DTD */
136 declClose, error;
137
138 static int FASTCALL common(PROLOG_STATE *state, int tok);
139
140 static int PTRCALL
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)141 prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
142 const ENCODING *enc) {
143 switch (tok) {
144 case XML_TOK_PROLOG_S:
145 state->handler = prolog1;
146 return XML_ROLE_NONE;
147 case XML_TOK_XML_DECL:
148 state->handler = prolog1;
149 return XML_ROLE_XML_DECL;
150 case XML_TOK_PI:
151 state->handler = prolog1;
152 return XML_ROLE_PI;
153 case XML_TOK_COMMENT:
154 state->handler = prolog1;
155 return XML_ROLE_COMMENT;
156 case XML_TOK_BOM:
157 return XML_ROLE_NONE;
158 case XML_TOK_DECL_OPEN:
159 if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
160 KW_DOCTYPE))
161 break;
162 state->handler = doctype0;
163 return XML_ROLE_DOCTYPE_NONE;
164 case XML_TOK_INSTANCE_START:
165 state->handler = error;
166 return XML_ROLE_INSTANCE_START;
167 }
168 return common(state, tok);
169 }
170
171 static int PTRCALL
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)172 prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
173 const ENCODING *enc) {
174 switch (tok) {
175 case XML_TOK_PROLOG_S:
176 return XML_ROLE_NONE;
177 case XML_TOK_PI:
178 return XML_ROLE_PI;
179 case XML_TOK_COMMENT:
180 return XML_ROLE_COMMENT;
181 case XML_TOK_BOM:
182 /* This case can never arise. To reach this role function, the
183 * parse must have passed through prolog0 and therefore have had
184 * some form of input, even if only a space. At that point, a
185 * byte order mark is no longer a valid character (though
186 * technically it should be interpreted as a non-breaking space),
187 * so will be rejected by the tokenizing stages.
188 */
189 return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
190 case XML_TOK_DECL_OPEN:
191 if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
192 KW_DOCTYPE))
193 break;
194 state->handler = doctype0;
195 return XML_ROLE_DOCTYPE_NONE;
196 case XML_TOK_INSTANCE_START:
197 state->handler = error;
198 return XML_ROLE_INSTANCE_START;
199 }
200 return common(state, tok);
201 }
202
203 static int PTRCALL
prolog2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)204 prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
205 const ENCODING *enc) {
206 UNUSED_P(ptr);
207 UNUSED_P(end);
208 UNUSED_P(enc);
209 switch (tok) {
210 case XML_TOK_PROLOG_S:
211 return XML_ROLE_NONE;
212 case XML_TOK_PI:
213 return XML_ROLE_PI;
214 case XML_TOK_COMMENT:
215 return XML_ROLE_COMMENT;
216 case XML_TOK_INSTANCE_START:
217 state->handler = error;
218 return XML_ROLE_INSTANCE_START;
219 }
220 return common(state, tok);
221 }
222
223 static int PTRCALL
doctype0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)224 doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
225 const ENCODING *enc) {
226 UNUSED_P(ptr);
227 UNUSED_P(end);
228 UNUSED_P(enc);
229 switch (tok) {
230 case XML_TOK_PROLOG_S:
231 return XML_ROLE_DOCTYPE_NONE;
232 case XML_TOK_NAME:
233 case XML_TOK_PREFIXED_NAME:
234 state->handler = doctype1;
235 return XML_ROLE_DOCTYPE_NAME;
236 }
237 return common(state, tok);
238 }
239
240 static int PTRCALL
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)241 doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
242 const ENCODING *enc) {
243 switch (tok) {
244 case XML_TOK_PROLOG_S:
245 return XML_ROLE_DOCTYPE_NONE;
246 case XML_TOK_OPEN_BRACKET:
247 state->handler = internalSubset;
248 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
249 case XML_TOK_DECL_CLOSE:
250 state->handler = prolog2;
251 return XML_ROLE_DOCTYPE_CLOSE;
252 case XML_TOK_NAME:
253 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
254 state->handler = doctype3;
255 return XML_ROLE_DOCTYPE_NONE;
256 }
257 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
258 state->handler = doctype2;
259 return XML_ROLE_DOCTYPE_NONE;
260 }
261 break;
262 }
263 return common(state, tok);
264 }
265
266 static int PTRCALL
doctype2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)267 doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
268 const ENCODING *enc) {
269 UNUSED_P(ptr);
270 UNUSED_P(end);
271 UNUSED_P(enc);
272 switch (tok) {
273 case XML_TOK_PROLOG_S:
274 return XML_ROLE_DOCTYPE_NONE;
275 case XML_TOK_LITERAL:
276 state->handler = doctype3;
277 return XML_ROLE_DOCTYPE_PUBLIC_ID;
278 }
279 return common(state, tok);
280 }
281
282 static int PTRCALL
doctype3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)283 doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
284 const ENCODING *enc) {
285 UNUSED_P(ptr);
286 UNUSED_P(end);
287 UNUSED_P(enc);
288 switch (tok) {
289 case XML_TOK_PROLOG_S:
290 return XML_ROLE_DOCTYPE_NONE;
291 case XML_TOK_LITERAL:
292 state->handler = doctype4;
293 return XML_ROLE_DOCTYPE_SYSTEM_ID;
294 }
295 return common(state, tok);
296 }
297
298 static int PTRCALL
doctype4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)299 doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
300 const ENCODING *enc) {
301 UNUSED_P(ptr);
302 UNUSED_P(end);
303 UNUSED_P(enc);
304 switch (tok) {
305 case XML_TOK_PROLOG_S:
306 return XML_ROLE_DOCTYPE_NONE;
307 case XML_TOK_OPEN_BRACKET:
308 state->handler = internalSubset;
309 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
310 case XML_TOK_DECL_CLOSE:
311 state->handler = prolog2;
312 return XML_ROLE_DOCTYPE_CLOSE;
313 }
314 return common(state, tok);
315 }
316
317 static int PTRCALL
doctype5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)318 doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
319 const ENCODING *enc) {
320 UNUSED_P(ptr);
321 UNUSED_P(end);
322 UNUSED_P(enc);
323 switch (tok) {
324 case XML_TOK_PROLOG_S:
325 return XML_ROLE_DOCTYPE_NONE;
326 case XML_TOK_DECL_CLOSE:
327 state->handler = prolog2;
328 return XML_ROLE_DOCTYPE_CLOSE;
329 }
330 return common(state, tok);
331 }
332
333 static int PTRCALL
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)334 internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
335 const ENCODING *enc) {
336 switch (tok) {
337 case XML_TOK_PROLOG_S:
338 return XML_ROLE_NONE;
339 case XML_TOK_DECL_OPEN:
340 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
341 KW_ENTITY)) {
342 state->handler = entity0;
343 return XML_ROLE_ENTITY_NONE;
344 }
345 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
346 KW_ATTLIST)) {
347 state->handler = attlist0;
348 return XML_ROLE_ATTLIST_NONE;
349 }
350 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
351 KW_ELEMENT)) {
352 state->handler = element0;
353 return XML_ROLE_ELEMENT_NONE;
354 }
355 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
356 KW_NOTATION)) {
357 state->handler = notation0;
358 return XML_ROLE_NOTATION_NONE;
359 }
360 break;
361 case XML_TOK_PI:
362 return XML_ROLE_PI;
363 case XML_TOK_COMMENT:
364 return XML_ROLE_COMMENT;
365 case XML_TOK_PARAM_ENTITY_REF:
366 return XML_ROLE_PARAM_ENTITY_REF;
367 case XML_TOK_CLOSE_BRACKET:
368 state->handler = doctype5;
369 return XML_ROLE_DOCTYPE_NONE;
370 case XML_TOK_NONE:
371 return XML_ROLE_NONE;
372 }
373 return common(state, tok);
374 }
375
376 #ifdef XML_DTD
377
378 static int PTRCALL
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)379 externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
380 const ENCODING *enc) {
381 state->handler = externalSubset1;
382 if (tok == XML_TOK_XML_DECL)
383 return XML_ROLE_TEXT_DECL;
384 return externalSubset1(state, tok, ptr, end, enc);
385 }
386
387 static int PTRCALL
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)388 externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
389 const ENCODING *enc) {
390 switch (tok) {
391 case XML_TOK_COND_SECT_OPEN:
392 state->handler = condSect0;
393 return XML_ROLE_NONE;
394 case XML_TOK_COND_SECT_CLOSE:
395 if (state->includeLevel == 0)
396 break;
397 state->includeLevel -= 1;
398 return XML_ROLE_NONE;
399 case XML_TOK_PROLOG_S:
400 return XML_ROLE_NONE;
401 case XML_TOK_CLOSE_BRACKET:
402 break;
403 case XML_TOK_NONE:
404 if (state->includeLevel)
405 break;
406 return XML_ROLE_NONE;
407 default:
408 return internalSubset(state, tok, ptr, end, enc);
409 }
410 return common(state, tok);
411 }
412
413 #endif /* XML_DTD */
414
415 static int PTRCALL
entity0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)416 entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
417 const ENCODING *enc) {
418 UNUSED_P(ptr);
419 UNUSED_P(end);
420 UNUSED_P(enc);
421 switch (tok) {
422 case XML_TOK_PROLOG_S:
423 return XML_ROLE_ENTITY_NONE;
424 case XML_TOK_PERCENT:
425 state->handler = entity1;
426 return XML_ROLE_ENTITY_NONE;
427 case XML_TOK_NAME:
428 state->handler = entity2;
429 return XML_ROLE_GENERAL_ENTITY_NAME;
430 }
431 return common(state, tok);
432 }
433
434 static int PTRCALL
entity1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)435 entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
436 const ENCODING *enc) {
437 UNUSED_P(ptr);
438 UNUSED_P(end);
439 UNUSED_P(enc);
440 switch (tok) {
441 case XML_TOK_PROLOG_S:
442 return XML_ROLE_ENTITY_NONE;
443 case XML_TOK_NAME:
444 state->handler = entity7;
445 return XML_ROLE_PARAM_ENTITY_NAME;
446 }
447 return common(state, tok);
448 }
449
450 static int PTRCALL
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)451 entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
452 const ENCODING *enc) {
453 switch (tok) {
454 case XML_TOK_PROLOG_S:
455 return XML_ROLE_ENTITY_NONE;
456 case XML_TOK_NAME:
457 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
458 state->handler = entity4;
459 return XML_ROLE_ENTITY_NONE;
460 }
461 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
462 state->handler = entity3;
463 return XML_ROLE_ENTITY_NONE;
464 }
465 break;
466 case XML_TOK_LITERAL:
467 state->handler = declClose;
468 state->role_none = XML_ROLE_ENTITY_NONE;
469 return XML_ROLE_ENTITY_VALUE;
470 }
471 return common(state, tok);
472 }
473
474 static int PTRCALL
entity3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)475 entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
476 const ENCODING *enc) {
477 UNUSED_P(ptr);
478 UNUSED_P(end);
479 UNUSED_P(enc);
480 switch (tok) {
481 case XML_TOK_PROLOG_S:
482 return XML_ROLE_ENTITY_NONE;
483 case XML_TOK_LITERAL:
484 state->handler = entity4;
485 return XML_ROLE_ENTITY_PUBLIC_ID;
486 }
487 return common(state, tok);
488 }
489
490 static int PTRCALL
entity4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)491 entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
492 const ENCODING *enc) {
493 UNUSED_P(ptr);
494 UNUSED_P(end);
495 UNUSED_P(enc);
496 switch (tok) {
497 case XML_TOK_PROLOG_S:
498 return XML_ROLE_ENTITY_NONE;
499 case XML_TOK_LITERAL:
500 state->handler = entity5;
501 return XML_ROLE_ENTITY_SYSTEM_ID;
502 }
503 return common(state, tok);
504 }
505
506 static int PTRCALL
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)507 entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
508 const ENCODING *enc) {
509 switch (tok) {
510 case XML_TOK_PROLOG_S:
511 return XML_ROLE_ENTITY_NONE;
512 case XML_TOK_DECL_CLOSE:
513 setTopLevel(state);
514 return XML_ROLE_ENTITY_COMPLETE;
515 case XML_TOK_NAME:
516 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
517 state->handler = entity6;
518 return XML_ROLE_ENTITY_NONE;
519 }
520 break;
521 }
522 return common(state, tok);
523 }
524
525 static int PTRCALL
entity6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)526 entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
527 const ENCODING *enc) {
528 UNUSED_P(ptr);
529 UNUSED_P(end);
530 UNUSED_P(enc);
531 switch (tok) {
532 case XML_TOK_PROLOG_S:
533 return XML_ROLE_ENTITY_NONE;
534 case XML_TOK_NAME:
535 state->handler = declClose;
536 state->role_none = XML_ROLE_ENTITY_NONE;
537 return XML_ROLE_ENTITY_NOTATION_NAME;
538 }
539 return common(state, tok);
540 }
541
542 static int PTRCALL
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)543 entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
544 const ENCODING *enc) {
545 switch (tok) {
546 case XML_TOK_PROLOG_S:
547 return XML_ROLE_ENTITY_NONE;
548 case XML_TOK_NAME:
549 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
550 state->handler = entity9;
551 return XML_ROLE_ENTITY_NONE;
552 }
553 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
554 state->handler = entity8;
555 return XML_ROLE_ENTITY_NONE;
556 }
557 break;
558 case XML_TOK_LITERAL:
559 state->handler = declClose;
560 state->role_none = XML_ROLE_ENTITY_NONE;
561 return XML_ROLE_ENTITY_VALUE;
562 }
563 return common(state, tok);
564 }
565
566 static int PTRCALL
entity8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)567 entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
568 const ENCODING *enc) {
569 UNUSED_P(ptr);
570 UNUSED_P(end);
571 UNUSED_P(enc);
572 switch (tok) {
573 case XML_TOK_PROLOG_S:
574 return XML_ROLE_ENTITY_NONE;
575 case XML_TOK_LITERAL:
576 state->handler = entity9;
577 return XML_ROLE_ENTITY_PUBLIC_ID;
578 }
579 return common(state, tok);
580 }
581
582 static int PTRCALL
entity9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)583 entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
584 const ENCODING *enc) {
585 UNUSED_P(ptr);
586 UNUSED_P(end);
587 UNUSED_P(enc);
588 switch (tok) {
589 case XML_TOK_PROLOG_S:
590 return XML_ROLE_ENTITY_NONE;
591 case XML_TOK_LITERAL:
592 state->handler = entity10;
593 return XML_ROLE_ENTITY_SYSTEM_ID;
594 }
595 return common(state, tok);
596 }
597
598 static int PTRCALL
entity10(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)599 entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
600 const ENCODING *enc) {
601 UNUSED_P(ptr);
602 UNUSED_P(end);
603 UNUSED_P(enc);
604 switch (tok) {
605 case XML_TOK_PROLOG_S:
606 return XML_ROLE_ENTITY_NONE;
607 case XML_TOK_DECL_CLOSE:
608 setTopLevel(state);
609 return XML_ROLE_ENTITY_COMPLETE;
610 }
611 return common(state, tok);
612 }
613
614 static int PTRCALL
notation0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)615 notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
616 const ENCODING *enc) {
617 UNUSED_P(ptr);
618 UNUSED_P(end);
619 UNUSED_P(enc);
620 switch (tok) {
621 case XML_TOK_PROLOG_S:
622 return XML_ROLE_NOTATION_NONE;
623 case XML_TOK_NAME:
624 state->handler = notation1;
625 return XML_ROLE_NOTATION_NAME;
626 }
627 return common(state, tok);
628 }
629
630 static int PTRCALL
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)631 notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
632 const ENCODING *enc) {
633 switch (tok) {
634 case XML_TOK_PROLOG_S:
635 return XML_ROLE_NOTATION_NONE;
636 case XML_TOK_NAME:
637 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
638 state->handler = notation3;
639 return XML_ROLE_NOTATION_NONE;
640 }
641 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
642 state->handler = notation2;
643 return XML_ROLE_NOTATION_NONE;
644 }
645 break;
646 }
647 return common(state, tok);
648 }
649
650 static int PTRCALL
notation2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)651 notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
652 const ENCODING *enc) {
653 UNUSED_P(ptr);
654 UNUSED_P(end);
655 UNUSED_P(enc);
656 switch (tok) {
657 case XML_TOK_PROLOG_S:
658 return XML_ROLE_NOTATION_NONE;
659 case XML_TOK_LITERAL:
660 state->handler = notation4;
661 return XML_ROLE_NOTATION_PUBLIC_ID;
662 }
663 return common(state, tok);
664 }
665
666 static int PTRCALL
notation3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)667 notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
668 const ENCODING *enc) {
669 UNUSED_P(ptr);
670 UNUSED_P(end);
671 UNUSED_P(enc);
672 switch (tok) {
673 case XML_TOK_PROLOG_S:
674 return XML_ROLE_NOTATION_NONE;
675 case XML_TOK_LITERAL:
676 state->handler = declClose;
677 state->role_none = XML_ROLE_NOTATION_NONE;
678 return XML_ROLE_NOTATION_SYSTEM_ID;
679 }
680 return common(state, tok);
681 }
682
683 static int PTRCALL
notation4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)684 notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
685 const ENCODING *enc) {
686 UNUSED_P(ptr);
687 UNUSED_P(end);
688 UNUSED_P(enc);
689 switch (tok) {
690 case XML_TOK_PROLOG_S:
691 return XML_ROLE_NOTATION_NONE;
692 case XML_TOK_LITERAL:
693 state->handler = declClose;
694 state->role_none = XML_ROLE_NOTATION_NONE;
695 return XML_ROLE_NOTATION_SYSTEM_ID;
696 case XML_TOK_DECL_CLOSE:
697 setTopLevel(state);
698 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
699 }
700 return common(state, tok);
701 }
702
703 static int PTRCALL
attlist0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)704 attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
705 const ENCODING *enc) {
706 UNUSED_P(ptr);
707 UNUSED_P(end);
708 UNUSED_P(enc);
709 switch (tok) {
710 case XML_TOK_PROLOG_S:
711 return XML_ROLE_ATTLIST_NONE;
712 case XML_TOK_NAME:
713 case XML_TOK_PREFIXED_NAME:
714 state->handler = attlist1;
715 return XML_ROLE_ATTLIST_ELEMENT_NAME;
716 }
717 return common(state, tok);
718 }
719
720 static int PTRCALL
attlist1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)721 attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
722 const ENCODING *enc) {
723 UNUSED_P(ptr);
724 UNUSED_P(end);
725 UNUSED_P(enc);
726 switch (tok) {
727 case XML_TOK_PROLOG_S:
728 return XML_ROLE_ATTLIST_NONE;
729 case XML_TOK_DECL_CLOSE:
730 setTopLevel(state);
731 return XML_ROLE_ATTLIST_NONE;
732 case XML_TOK_NAME:
733 case XML_TOK_PREFIXED_NAME:
734 state->handler = attlist2;
735 return XML_ROLE_ATTRIBUTE_NAME;
736 }
737 return common(state, tok);
738 }
739
740 static int PTRCALL
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)741 attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
742 const ENCODING *enc) {
743 switch (tok) {
744 case XML_TOK_PROLOG_S:
745 return XML_ROLE_ATTLIST_NONE;
746 case XML_TOK_NAME: {
747 static const char *const types[] = {
748 KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS,
749 KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS,
750 };
751 int i;
752 for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++)
753 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
754 state->handler = attlist8;
755 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
756 }
757 }
758 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
759 state->handler = attlist5;
760 return XML_ROLE_ATTLIST_NONE;
761 }
762 break;
763 case XML_TOK_OPEN_PAREN:
764 state->handler = attlist3;
765 return XML_ROLE_ATTLIST_NONE;
766 }
767 return common(state, tok);
768 }
769
770 static int PTRCALL
attlist3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)771 attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
772 const ENCODING *enc) {
773 UNUSED_P(ptr);
774 UNUSED_P(end);
775 UNUSED_P(enc);
776 switch (tok) {
777 case XML_TOK_PROLOG_S:
778 return XML_ROLE_ATTLIST_NONE;
779 case XML_TOK_NMTOKEN:
780 case XML_TOK_NAME:
781 case XML_TOK_PREFIXED_NAME:
782 state->handler = attlist4;
783 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
784 }
785 return common(state, tok);
786 }
787
788 static int PTRCALL
attlist4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)789 attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
790 const ENCODING *enc) {
791 UNUSED_P(ptr);
792 UNUSED_P(end);
793 UNUSED_P(enc);
794 switch (tok) {
795 case XML_TOK_PROLOG_S:
796 return XML_ROLE_ATTLIST_NONE;
797 case XML_TOK_CLOSE_PAREN:
798 state->handler = attlist8;
799 return XML_ROLE_ATTLIST_NONE;
800 case XML_TOK_OR:
801 state->handler = attlist3;
802 return XML_ROLE_ATTLIST_NONE;
803 }
804 return common(state, tok);
805 }
806
807 static int PTRCALL
attlist5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)808 attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
809 const ENCODING *enc) {
810 UNUSED_P(ptr);
811 UNUSED_P(end);
812 UNUSED_P(enc);
813 switch (tok) {
814 case XML_TOK_PROLOG_S:
815 return XML_ROLE_ATTLIST_NONE;
816 case XML_TOK_OPEN_PAREN:
817 state->handler = attlist6;
818 return XML_ROLE_ATTLIST_NONE;
819 }
820 return common(state, tok);
821 }
822
823 static int PTRCALL
attlist6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)824 attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
825 const ENCODING *enc) {
826 UNUSED_P(ptr);
827 UNUSED_P(end);
828 UNUSED_P(enc);
829 switch (tok) {
830 case XML_TOK_PROLOG_S:
831 return XML_ROLE_ATTLIST_NONE;
832 case XML_TOK_NAME:
833 state->handler = attlist7;
834 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
835 }
836 return common(state, tok);
837 }
838
839 static int PTRCALL
attlist7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)840 attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
841 const ENCODING *enc) {
842 UNUSED_P(ptr);
843 UNUSED_P(end);
844 UNUSED_P(enc);
845 switch (tok) {
846 case XML_TOK_PROLOG_S:
847 return XML_ROLE_ATTLIST_NONE;
848 case XML_TOK_CLOSE_PAREN:
849 state->handler = attlist8;
850 return XML_ROLE_ATTLIST_NONE;
851 case XML_TOK_OR:
852 state->handler = attlist6;
853 return XML_ROLE_ATTLIST_NONE;
854 }
855 return common(state, tok);
856 }
857
858 /* default value */
859 static int PTRCALL
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)860 attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
861 const ENCODING *enc) {
862 switch (tok) {
863 case XML_TOK_PROLOG_S:
864 return XML_ROLE_ATTLIST_NONE;
865 case XML_TOK_POUND_NAME:
866 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
867 KW_IMPLIED)) {
868 state->handler = attlist1;
869 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
870 }
871 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
872 KW_REQUIRED)) {
873 state->handler = attlist1;
874 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
875 }
876 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
877 KW_FIXED)) {
878 state->handler = attlist9;
879 return XML_ROLE_ATTLIST_NONE;
880 }
881 break;
882 case XML_TOK_LITERAL:
883 state->handler = attlist1;
884 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
885 }
886 return common(state, tok);
887 }
888
889 static int PTRCALL
attlist9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)890 attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
891 const ENCODING *enc) {
892 UNUSED_P(ptr);
893 UNUSED_P(end);
894 UNUSED_P(enc);
895 switch (tok) {
896 case XML_TOK_PROLOG_S:
897 return XML_ROLE_ATTLIST_NONE;
898 case XML_TOK_LITERAL:
899 state->handler = attlist1;
900 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
901 }
902 return common(state, tok);
903 }
904
905 static int PTRCALL
element0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)906 element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
907 const ENCODING *enc) {
908 UNUSED_P(ptr);
909 UNUSED_P(end);
910 UNUSED_P(enc);
911 switch (tok) {
912 case XML_TOK_PROLOG_S:
913 return XML_ROLE_ELEMENT_NONE;
914 case XML_TOK_NAME:
915 case XML_TOK_PREFIXED_NAME:
916 state->handler = element1;
917 return XML_ROLE_ELEMENT_NAME;
918 }
919 return common(state, tok);
920 }
921
922 static int PTRCALL
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)923 element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
924 const ENCODING *enc) {
925 switch (tok) {
926 case XML_TOK_PROLOG_S:
927 return XML_ROLE_ELEMENT_NONE;
928 case XML_TOK_NAME:
929 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
930 state->handler = declClose;
931 state->role_none = XML_ROLE_ELEMENT_NONE;
932 return XML_ROLE_CONTENT_EMPTY;
933 }
934 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
935 state->handler = declClose;
936 state->role_none = XML_ROLE_ELEMENT_NONE;
937 return XML_ROLE_CONTENT_ANY;
938 }
939 break;
940 case XML_TOK_OPEN_PAREN:
941 state->handler = element2;
942 state->level = 1;
943 return XML_ROLE_GROUP_OPEN;
944 }
945 return common(state, tok);
946 }
947
948 static int PTRCALL
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)949 element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
950 const ENCODING *enc) {
951 switch (tok) {
952 case XML_TOK_PROLOG_S:
953 return XML_ROLE_ELEMENT_NONE;
954 case XML_TOK_POUND_NAME:
955 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
956 KW_PCDATA)) {
957 state->handler = element3;
958 return XML_ROLE_CONTENT_PCDATA;
959 }
960 break;
961 case XML_TOK_OPEN_PAREN:
962 state->level = 2;
963 state->handler = element6;
964 return XML_ROLE_GROUP_OPEN;
965 case XML_TOK_NAME:
966 case XML_TOK_PREFIXED_NAME:
967 state->handler = element7;
968 return XML_ROLE_CONTENT_ELEMENT;
969 case XML_TOK_NAME_QUESTION:
970 state->handler = element7;
971 return XML_ROLE_CONTENT_ELEMENT_OPT;
972 case XML_TOK_NAME_ASTERISK:
973 state->handler = element7;
974 return XML_ROLE_CONTENT_ELEMENT_REP;
975 case XML_TOK_NAME_PLUS:
976 state->handler = element7;
977 return XML_ROLE_CONTENT_ELEMENT_PLUS;
978 }
979 return common(state, tok);
980 }
981
982 static int PTRCALL
element3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)983 element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
984 const ENCODING *enc) {
985 UNUSED_P(ptr);
986 UNUSED_P(end);
987 UNUSED_P(enc);
988 switch (tok) {
989 case XML_TOK_PROLOG_S:
990 return XML_ROLE_ELEMENT_NONE;
991 case XML_TOK_CLOSE_PAREN:
992 state->handler = declClose;
993 state->role_none = XML_ROLE_ELEMENT_NONE;
994 return XML_ROLE_GROUP_CLOSE;
995 case XML_TOK_CLOSE_PAREN_ASTERISK:
996 state->handler = declClose;
997 state->role_none = XML_ROLE_ELEMENT_NONE;
998 return XML_ROLE_GROUP_CLOSE_REP;
999 case XML_TOK_OR:
1000 state->handler = element4;
1001 return XML_ROLE_ELEMENT_NONE;
1002 }
1003 return common(state, tok);
1004 }
1005
1006 static int PTRCALL
element4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1007 element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1008 const ENCODING *enc) {
1009 UNUSED_P(ptr);
1010 UNUSED_P(end);
1011 UNUSED_P(enc);
1012 switch (tok) {
1013 case XML_TOK_PROLOG_S:
1014 return XML_ROLE_ELEMENT_NONE;
1015 case XML_TOK_NAME:
1016 case XML_TOK_PREFIXED_NAME:
1017 state->handler = element5;
1018 return XML_ROLE_CONTENT_ELEMENT;
1019 }
1020 return common(state, tok);
1021 }
1022
1023 static int PTRCALL
element5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1024 element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1025 const ENCODING *enc) {
1026 UNUSED_P(ptr);
1027 UNUSED_P(end);
1028 UNUSED_P(enc);
1029 switch (tok) {
1030 case XML_TOK_PROLOG_S:
1031 return XML_ROLE_ELEMENT_NONE;
1032 case XML_TOK_CLOSE_PAREN_ASTERISK:
1033 state->handler = declClose;
1034 state->role_none = XML_ROLE_ELEMENT_NONE;
1035 return XML_ROLE_GROUP_CLOSE_REP;
1036 case XML_TOK_OR:
1037 state->handler = element4;
1038 return XML_ROLE_ELEMENT_NONE;
1039 }
1040 return common(state, tok);
1041 }
1042
1043 static int PTRCALL
element6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1044 element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1045 const ENCODING *enc) {
1046 UNUSED_P(ptr);
1047 UNUSED_P(end);
1048 UNUSED_P(enc);
1049 switch (tok) {
1050 case XML_TOK_PROLOG_S:
1051 return XML_ROLE_ELEMENT_NONE;
1052 case XML_TOK_OPEN_PAREN:
1053 state->level += 1;
1054 return XML_ROLE_GROUP_OPEN;
1055 case XML_TOK_NAME:
1056 case XML_TOK_PREFIXED_NAME:
1057 state->handler = element7;
1058 return XML_ROLE_CONTENT_ELEMENT;
1059 case XML_TOK_NAME_QUESTION:
1060 state->handler = element7;
1061 return XML_ROLE_CONTENT_ELEMENT_OPT;
1062 case XML_TOK_NAME_ASTERISK:
1063 state->handler = element7;
1064 return XML_ROLE_CONTENT_ELEMENT_REP;
1065 case XML_TOK_NAME_PLUS:
1066 state->handler = element7;
1067 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1068 }
1069 return common(state, tok);
1070 }
1071
1072 static int PTRCALL
element7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1073 element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1074 const ENCODING *enc) {
1075 UNUSED_P(ptr);
1076 UNUSED_P(end);
1077 UNUSED_P(enc);
1078 switch (tok) {
1079 case XML_TOK_PROLOG_S:
1080 return XML_ROLE_ELEMENT_NONE;
1081 case XML_TOK_CLOSE_PAREN:
1082 state->level -= 1;
1083 if (state->level == 0) {
1084 state->handler = declClose;
1085 state->role_none = XML_ROLE_ELEMENT_NONE;
1086 }
1087 return XML_ROLE_GROUP_CLOSE;
1088 case XML_TOK_CLOSE_PAREN_ASTERISK:
1089 state->level -= 1;
1090 if (state->level == 0) {
1091 state->handler = declClose;
1092 state->role_none = XML_ROLE_ELEMENT_NONE;
1093 }
1094 return XML_ROLE_GROUP_CLOSE_REP;
1095 case XML_TOK_CLOSE_PAREN_QUESTION:
1096 state->level -= 1;
1097 if (state->level == 0) {
1098 state->handler = declClose;
1099 state->role_none = XML_ROLE_ELEMENT_NONE;
1100 }
1101 return XML_ROLE_GROUP_CLOSE_OPT;
1102 case XML_TOK_CLOSE_PAREN_PLUS:
1103 state->level -= 1;
1104 if (state->level == 0) {
1105 state->handler = declClose;
1106 state->role_none = XML_ROLE_ELEMENT_NONE;
1107 }
1108 return XML_ROLE_GROUP_CLOSE_PLUS;
1109 case XML_TOK_COMMA:
1110 state->handler = element6;
1111 return XML_ROLE_GROUP_SEQUENCE;
1112 case XML_TOK_OR:
1113 state->handler = element6;
1114 return XML_ROLE_GROUP_CHOICE;
1115 }
1116 return common(state, tok);
1117 }
1118
1119 #ifdef XML_DTD
1120
1121 static int PTRCALL
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1122 condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1123 const ENCODING *enc) {
1124 switch (tok) {
1125 case XML_TOK_PROLOG_S:
1126 return XML_ROLE_NONE;
1127 case XML_TOK_NAME:
1128 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1129 state->handler = condSect1;
1130 return XML_ROLE_NONE;
1131 }
1132 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1133 state->handler = condSect2;
1134 return XML_ROLE_NONE;
1135 }
1136 break;
1137 }
1138 return common(state, tok);
1139 }
1140
1141 static int PTRCALL
condSect1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1142 condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1143 const ENCODING *enc) {
1144 UNUSED_P(ptr);
1145 UNUSED_P(end);
1146 UNUSED_P(enc);
1147 switch (tok) {
1148 case XML_TOK_PROLOG_S:
1149 return XML_ROLE_NONE;
1150 case XML_TOK_OPEN_BRACKET:
1151 state->handler = externalSubset1;
1152 state->includeLevel += 1;
1153 return XML_ROLE_NONE;
1154 }
1155 return common(state, tok);
1156 }
1157
1158 static int PTRCALL
condSect2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1159 condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1160 const ENCODING *enc) {
1161 UNUSED_P(ptr);
1162 UNUSED_P(end);
1163 UNUSED_P(enc);
1164 switch (tok) {
1165 case XML_TOK_PROLOG_S:
1166 return XML_ROLE_NONE;
1167 case XML_TOK_OPEN_BRACKET:
1168 state->handler = externalSubset1;
1169 return XML_ROLE_IGNORE_SECT;
1170 }
1171 return common(state, tok);
1172 }
1173
1174 #endif /* XML_DTD */
1175
1176 static int PTRCALL
declClose(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1177 declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1178 const ENCODING *enc) {
1179 UNUSED_P(ptr);
1180 UNUSED_P(end);
1181 UNUSED_P(enc);
1182 switch (tok) {
1183 case XML_TOK_PROLOG_S:
1184 return state->role_none;
1185 case XML_TOK_DECL_CLOSE:
1186 setTopLevel(state);
1187 return state->role_none;
1188 }
1189 return common(state, tok);
1190 }
1191
1192 /* This function will only be invoked if the internal logic of the
1193 * parser has broken down. It is used in two cases:
1194 *
1195 * 1: When the XML prolog has been finished. At this point the
1196 * processor (the parser level above these role handlers) should
1197 * switch from prologProcessor to contentProcessor and reinitialise
1198 * the handler function.
1199 *
1200 * 2: When an error has been detected (via common() below). At this
1201 * point again the processor should be switched to errorProcessor,
1202 * which will never call a handler.
1203 *
1204 * The result of this is that error() can only be called if the
1205 * processor switch failed to happen, which is an internal error and
1206 * therefore we shouldn't be able to provoke it simply by using the
1207 * library. It is a necessary backstop, however, so we merely exclude
1208 * it from the coverage statistics.
1209 *
1210 * LCOV_EXCL_START
1211 */
1212 static int PTRCALL
error(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1213 error(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1214 const ENCODING *enc) {
1215 UNUSED_P(state);
1216 UNUSED_P(tok);
1217 UNUSED_P(ptr);
1218 UNUSED_P(end);
1219 UNUSED_P(enc);
1220 return XML_ROLE_NONE;
1221 }
1222 /* LCOV_EXCL_STOP */
1223
1224 static int FASTCALL
common(PROLOG_STATE * state,int tok)1225 common(PROLOG_STATE *state, int tok) {
1226 #ifdef XML_DTD
1227 if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1228 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1229 #else
1230 UNUSED_P(tok);
1231 #endif
1232 state->handler = error;
1233 return XML_ROLE_ERROR;
1234 }
1235
1236 void
XmlPrologStateInit(PROLOG_STATE * state)1237 XmlPrologStateInit(PROLOG_STATE *state) {
1238 state->handler = prolog0;
1239 #ifdef XML_DTD
1240 state->documentEntity = 1;
1241 state->includeLevel = 0;
1242 state->inEntityValue = 0;
1243 #endif /* XML_DTD */
1244 }
1245
1246 #ifdef XML_DTD
1247
1248 void
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1249 XmlPrologStateInitExternalEntity(PROLOG_STATE *state) {
1250 state->handler = externalSubset0;
1251 state->documentEntity = 0;
1252 state->includeLevel = 0;
1253 }
1254
1255 #endif /* XML_DTD */
1256