xref: /freebsd/contrib/expat/lib/xmlrole.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2    See the file COPYING for copying permission.
3 */
4 
5 #include <stddef.h>
6 
7 #ifdef COMPILED_FROM_DSP
8 #include "winconfig.h"
9 #elif defined(MACOS_CLASSIC)
10 #include "macconfig.h"
11 #elif defined(__amigaos__)
12 #include "amigaconfig.h"
13 #elif defined(__WATCOMC__)
14 #include "watcomconfig.h"
15 #else
16 #ifdef HAVE_EXPAT_CONFIG_H
17 #include <expat_config.h>
18 #endif
19 #endif /* ndef COMPILED_FROM_DSP */
20 
21 #include "expat_external.h"
22 #include "internal.h"
23 #include "xmlrole.h"
24 #include "ascii.h"
25 
26 /* Doesn't check:
27 
28  that ,| are not mixed in a model group
29  content of literals
30 
31 */
32 
33 static const char KW_ANY[] = {
34     ASCII_A, ASCII_N, ASCII_Y, '\0' };
35 static const char KW_ATTLIST[] = {
36     ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
37 static const char KW_CDATA[] = {
38     ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
39 static const char KW_DOCTYPE[] = {
40     ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
41 static const char KW_ELEMENT[] = {
42     ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
43 static const char KW_EMPTY[] = {
44     ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
45 static const char KW_ENTITIES[] = {
46     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
47     '\0' };
48 static const char KW_ENTITY[] = {
49     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
50 static const char KW_FIXED[] = {
51     ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
52 static const char KW_ID[] = {
53     ASCII_I, ASCII_D, '\0' };
54 static const char KW_IDREF[] = {
55     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
56 static const char KW_IDREFS[] = {
57     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
58 #ifdef XML_DTD
59 static const char KW_IGNORE[] = {
60     ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
61 #endif
62 static const char KW_IMPLIED[] = {
63     ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
64 #ifdef XML_DTD
65 static const char KW_INCLUDE[] = {
66     ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
67 #endif
68 static const char KW_NDATA[] = {
69     ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
70 static const char KW_NMTOKEN[] = {
71     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
72 static const char KW_NMTOKENS[] = {
73     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
74     '\0' };
75 static const char KW_NOTATION[] =
76     { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
77       '\0' };
78 static const char KW_PCDATA[] = {
79     ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
80 static const char KW_PUBLIC[] = {
81     ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
82 static const char KW_REQUIRED[] = {
83     ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
84     '\0' };
85 static const char KW_SYSTEM[] = {
86     ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
87 
88 #ifndef MIN_BYTES_PER_CHAR
89 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
90 #endif
91 
92 #ifdef XML_DTD
93 #define setTopLevel(state) \
94   ((state)->handler = ((state)->documentEntity \
95                        ? internalSubset \
96                        : externalSubset1))
97 #else /* not XML_DTD */
98 #define setTopLevel(state) ((state)->handler = internalSubset)
99 #endif /* not XML_DTD */
100 
101 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
102                                    int tok,
103                                    const char *ptr,
104                                    const char *end,
105                                    const ENCODING *enc);
106 
107 static PROLOG_HANDLER
108   prolog0, prolog1, prolog2,
109   doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
110   internalSubset,
111   entity0, entity1, entity2, entity3, entity4, entity5, entity6,
112   entity7, entity8, entity9, entity10,
113   notation0, notation1, notation2, notation3, notation4,
114   attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
115   attlist7, attlist8, attlist9,
116   element0, element1, element2, element3, element4, element5, element6,
117   element7,
118 #ifdef XML_DTD
119   externalSubset0, externalSubset1,
120   condSect0, condSect1, condSect2,
121 #endif /* XML_DTD */
122   declClose,
123   error;
124 
125 static int FASTCALL common(PROLOG_STATE *state, int tok);
126 
127 static int PTRCALL
128 prolog0(PROLOG_STATE *state,
129         int tok,
130         const char *ptr,
131         const char *end,
132         const ENCODING *enc)
133 {
134   switch (tok) {
135   case XML_TOK_PROLOG_S:
136     state->handler = prolog1;
137     return XML_ROLE_NONE;
138   case XML_TOK_XML_DECL:
139     state->handler = prolog1;
140     return XML_ROLE_XML_DECL;
141   case XML_TOK_PI:
142     state->handler = prolog1;
143     return XML_ROLE_PI;
144   case XML_TOK_COMMENT:
145     state->handler = prolog1;
146     return XML_ROLE_COMMENT;
147   case XML_TOK_BOM:
148     return XML_ROLE_NONE;
149   case XML_TOK_DECL_OPEN:
150     if (!XmlNameMatchesAscii(enc,
151                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
152                              end,
153                              KW_DOCTYPE))
154       break;
155     state->handler = doctype0;
156     return XML_ROLE_DOCTYPE_NONE;
157   case XML_TOK_INSTANCE_START:
158     state->handler = error;
159     return XML_ROLE_INSTANCE_START;
160   }
161   return common(state, tok);
162 }
163 
164 static int PTRCALL
165 prolog1(PROLOG_STATE *state,
166         int tok,
167         const char *ptr,
168         const char *end,
169         const ENCODING *enc)
170 {
171   switch (tok) {
172   case XML_TOK_PROLOG_S:
173     return XML_ROLE_NONE;
174   case XML_TOK_PI:
175     return XML_ROLE_PI;
176   case XML_TOK_COMMENT:
177     return XML_ROLE_COMMENT;
178   case XML_TOK_BOM:
179     return XML_ROLE_NONE;
180   case XML_TOK_DECL_OPEN:
181     if (!XmlNameMatchesAscii(enc,
182                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
183                              end,
184                              KW_DOCTYPE))
185       break;
186     state->handler = doctype0;
187     return XML_ROLE_DOCTYPE_NONE;
188   case XML_TOK_INSTANCE_START:
189     state->handler = error;
190     return XML_ROLE_INSTANCE_START;
191   }
192   return common(state, tok);
193 }
194 
195 static int PTRCALL
196 prolog2(PROLOG_STATE *state,
197         int tok,
198         const char *ptr,
199         const char *end,
200         const ENCODING *enc)
201 {
202   switch (tok) {
203   case XML_TOK_PROLOG_S:
204     return XML_ROLE_NONE;
205   case XML_TOK_PI:
206     return XML_ROLE_PI;
207   case XML_TOK_COMMENT:
208     return XML_ROLE_COMMENT;
209   case XML_TOK_INSTANCE_START:
210     state->handler = error;
211     return XML_ROLE_INSTANCE_START;
212   }
213   return common(state, tok);
214 }
215 
216 static int PTRCALL
217 doctype0(PROLOG_STATE *state,
218          int tok,
219          const char *ptr,
220          const char *end,
221          const ENCODING *enc)
222 {
223   switch (tok) {
224   case XML_TOK_PROLOG_S:
225     return XML_ROLE_DOCTYPE_NONE;
226   case XML_TOK_NAME:
227   case XML_TOK_PREFIXED_NAME:
228     state->handler = doctype1;
229     return XML_ROLE_DOCTYPE_NAME;
230   }
231   return common(state, tok);
232 }
233 
234 static int PTRCALL
235 doctype1(PROLOG_STATE *state,
236          int tok,
237          const char *ptr,
238          const char *end,
239          const ENCODING *enc)
240 {
241   switch (tok) {
242   case XML_TOK_PROLOG_S:
243     return XML_ROLE_DOCTYPE_NONE;
244   case XML_TOK_OPEN_BRACKET:
245     state->handler = internalSubset;
246     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
247   case XML_TOK_DECL_CLOSE:
248     state->handler = prolog2;
249     return XML_ROLE_DOCTYPE_CLOSE;
250   case XML_TOK_NAME:
251     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
252       state->handler = doctype3;
253       return XML_ROLE_DOCTYPE_NONE;
254     }
255     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
256       state->handler = doctype2;
257       return XML_ROLE_DOCTYPE_NONE;
258     }
259     break;
260   }
261   return common(state, tok);
262 }
263 
264 static int PTRCALL
265 doctype2(PROLOG_STATE *state,
266          int tok,
267          const char *ptr,
268          const char *end,
269          const ENCODING *enc)
270 {
271   switch (tok) {
272   case XML_TOK_PROLOG_S:
273     return XML_ROLE_DOCTYPE_NONE;
274   case XML_TOK_LITERAL:
275     state->handler = doctype3;
276     return XML_ROLE_DOCTYPE_PUBLIC_ID;
277   }
278   return common(state, tok);
279 }
280 
281 static int PTRCALL
282 doctype3(PROLOG_STATE *state,
283          int tok,
284          const char *ptr,
285          const char *end,
286          const ENCODING *enc)
287 {
288   switch (tok) {
289   case XML_TOK_PROLOG_S:
290     return XML_ROLE_DOCTYPE_NONE;
291   case XML_TOK_LITERAL:
292     state->handler = doctype4;
293     return XML_ROLE_DOCTYPE_SYSTEM_ID;
294   }
295   return common(state, tok);
296 }
297 
298 static int PTRCALL
299 doctype4(PROLOG_STATE *state,
300          int tok,
301          const char *ptr,
302          const char *end,
303          const ENCODING *enc)
304 {
305   switch (tok) {
306   case XML_TOK_PROLOG_S:
307     return XML_ROLE_DOCTYPE_NONE;
308   case XML_TOK_OPEN_BRACKET:
309     state->handler = internalSubset;
310     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
311   case XML_TOK_DECL_CLOSE:
312     state->handler = prolog2;
313     return XML_ROLE_DOCTYPE_CLOSE;
314   }
315   return common(state, tok);
316 }
317 
318 static int PTRCALL
319 doctype5(PROLOG_STATE *state,
320          int tok,
321          const char *ptr,
322          const char *end,
323          const ENCODING *enc)
324 {
325   switch (tok) {
326   case XML_TOK_PROLOG_S:
327     return XML_ROLE_DOCTYPE_NONE;
328   case XML_TOK_DECL_CLOSE:
329     state->handler = prolog2;
330     return XML_ROLE_DOCTYPE_CLOSE;
331   }
332   return common(state, tok);
333 }
334 
335 static int PTRCALL
336 internalSubset(PROLOG_STATE *state,
337                int tok,
338                const char *ptr,
339                const char *end,
340                const ENCODING *enc)
341 {
342   switch (tok) {
343   case XML_TOK_PROLOG_S:
344     return XML_ROLE_NONE;
345   case XML_TOK_DECL_OPEN:
346     if (XmlNameMatchesAscii(enc,
347                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
348                             end,
349                             KW_ENTITY)) {
350       state->handler = entity0;
351       return XML_ROLE_ENTITY_NONE;
352     }
353     if (XmlNameMatchesAscii(enc,
354                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
355                             end,
356                             KW_ATTLIST)) {
357       state->handler = attlist0;
358       return XML_ROLE_ATTLIST_NONE;
359     }
360     if (XmlNameMatchesAscii(enc,
361                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
362                             end,
363                             KW_ELEMENT)) {
364       state->handler = element0;
365       return XML_ROLE_ELEMENT_NONE;
366     }
367     if (XmlNameMatchesAscii(enc,
368                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
369                             end,
370                             KW_NOTATION)) {
371       state->handler = notation0;
372       return XML_ROLE_NOTATION_NONE;
373     }
374     break;
375   case XML_TOK_PI:
376     return XML_ROLE_PI;
377   case XML_TOK_COMMENT:
378     return XML_ROLE_COMMENT;
379   case XML_TOK_PARAM_ENTITY_REF:
380     return XML_ROLE_PARAM_ENTITY_REF;
381   case XML_TOK_CLOSE_BRACKET:
382     state->handler = doctype5;
383     return XML_ROLE_DOCTYPE_NONE;
384   case XML_TOK_NONE:
385     return XML_ROLE_NONE;
386   }
387   return common(state, tok);
388 }
389 
390 #ifdef XML_DTD
391 
392 static int PTRCALL
393 externalSubset0(PROLOG_STATE *state,
394                 int tok,
395                 const char *ptr,
396                 const char *end,
397                 const ENCODING *enc)
398 {
399   state->handler = externalSubset1;
400   if (tok == XML_TOK_XML_DECL)
401     return XML_ROLE_TEXT_DECL;
402   return externalSubset1(state, tok, ptr, end, enc);
403 }
404 
405 static int PTRCALL
406 externalSubset1(PROLOG_STATE *state,
407                 int tok,
408                 const char *ptr,
409                 const char *end,
410                 const ENCODING *enc)
411 {
412   switch (tok) {
413   case XML_TOK_COND_SECT_OPEN:
414     state->handler = condSect0;
415     return XML_ROLE_NONE;
416   case XML_TOK_COND_SECT_CLOSE:
417     if (state->includeLevel == 0)
418       break;
419     state->includeLevel -= 1;
420     return XML_ROLE_NONE;
421   case XML_TOK_PROLOG_S:
422     return XML_ROLE_NONE;
423   case XML_TOK_CLOSE_BRACKET:
424     break;
425   case XML_TOK_NONE:
426     if (state->includeLevel)
427       break;
428     return XML_ROLE_NONE;
429   default:
430     return internalSubset(state, tok, ptr, end, enc);
431   }
432   return common(state, tok);
433 }
434 
435 #endif /* XML_DTD */
436 
437 static int PTRCALL
438 entity0(PROLOG_STATE *state,
439         int tok,
440         const char *ptr,
441         const char *end,
442         const ENCODING *enc)
443 {
444   switch (tok) {
445   case XML_TOK_PROLOG_S:
446     return XML_ROLE_ENTITY_NONE;
447   case XML_TOK_PERCENT:
448     state->handler = entity1;
449     return XML_ROLE_ENTITY_NONE;
450   case XML_TOK_NAME:
451     state->handler = entity2;
452     return XML_ROLE_GENERAL_ENTITY_NAME;
453   }
454   return common(state, tok);
455 }
456 
457 static int PTRCALL
458 entity1(PROLOG_STATE *state,
459         int tok,
460         const char *ptr,
461         const char *end,
462         const ENCODING *enc)
463 {
464   switch (tok) {
465   case XML_TOK_PROLOG_S:
466     return XML_ROLE_ENTITY_NONE;
467   case XML_TOK_NAME:
468     state->handler = entity7;
469     return XML_ROLE_PARAM_ENTITY_NAME;
470   }
471   return common(state, tok);
472 }
473 
474 static int PTRCALL
475 entity2(PROLOG_STATE *state,
476         int tok,
477         const char *ptr,
478         const char *end,
479         const ENCODING *enc)
480 {
481   switch (tok) {
482   case XML_TOK_PROLOG_S:
483     return XML_ROLE_ENTITY_NONE;
484   case XML_TOK_NAME:
485     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
486       state->handler = entity4;
487       return XML_ROLE_ENTITY_NONE;
488     }
489     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
490       state->handler = entity3;
491       return XML_ROLE_ENTITY_NONE;
492     }
493     break;
494   case XML_TOK_LITERAL:
495     state->handler = declClose;
496     state->role_none = XML_ROLE_ENTITY_NONE;
497     return XML_ROLE_ENTITY_VALUE;
498   }
499   return common(state, tok);
500 }
501 
502 static int PTRCALL
503 entity3(PROLOG_STATE *state,
504         int tok,
505         const char *ptr,
506         const char *end,
507         const ENCODING *enc)
508 {
509   switch (tok) {
510   case XML_TOK_PROLOG_S:
511     return XML_ROLE_ENTITY_NONE;
512   case XML_TOK_LITERAL:
513     state->handler = entity4;
514     return XML_ROLE_ENTITY_PUBLIC_ID;
515   }
516   return common(state, tok);
517 }
518 
519 static int PTRCALL
520 entity4(PROLOG_STATE *state,
521         int tok,
522         const char *ptr,
523         const char *end,
524         const ENCODING *enc)
525 {
526   switch (tok) {
527   case XML_TOK_PROLOG_S:
528     return XML_ROLE_ENTITY_NONE;
529   case XML_TOK_LITERAL:
530     state->handler = entity5;
531     return XML_ROLE_ENTITY_SYSTEM_ID;
532   }
533   return common(state, tok);
534 }
535 
536 static int PTRCALL
537 entity5(PROLOG_STATE *state,
538         int tok,
539         const char *ptr,
540         const char *end,
541         const ENCODING *enc)
542 {
543   switch (tok) {
544   case XML_TOK_PROLOG_S:
545     return XML_ROLE_ENTITY_NONE;
546   case XML_TOK_DECL_CLOSE:
547     setTopLevel(state);
548     return XML_ROLE_ENTITY_COMPLETE;
549   case XML_TOK_NAME:
550     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
551       state->handler = entity6;
552       return XML_ROLE_ENTITY_NONE;
553     }
554     break;
555   }
556   return common(state, tok);
557 }
558 
559 static int PTRCALL
560 entity6(PROLOG_STATE *state,
561         int tok,
562         const char *ptr,
563         const char *end,
564         const ENCODING *enc)
565 {
566   switch (tok) {
567   case XML_TOK_PROLOG_S:
568     return XML_ROLE_ENTITY_NONE;
569   case XML_TOK_NAME:
570     state->handler = declClose;
571     state->role_none = XML_ROLE_ENTITY_NONE;
572     return XML_ROLE_ENTITY_NOTATION_NAME;
573   }
574   return common(state, tok);
575 }
576 
577 static int PTRCALL
578 entity7(PROLOG_STATE *state,
579         int tok,
580         const char *ptr,
581         const char *end,
582         const ENCODING *enc)
583 {
584   switch (tok) {
585   case XML_TOK_PROLOG_S:
586     return XML_ROLE_ENTITY_NONE;
587   case XML_TOK_NAME:
588     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
589       state->handler = entity9;
590       return XML_ROLE_ENTITY_NONE;
591     }
592     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
593       state->handler = entity8;
594       return XML_ROLE_ENTITY_NONE;
595     }
596     break;
597   case XML_TOK_LITERAL:
598     state->handler = declClose;
599     state->role_none = XML_ROLE_ENTITY_NONE;
600     return XML_ROLE_ENTITY_VALUE;
601   }
602   return common(state, tok);
603 }
604 
605 static int PTRCALL
606 entity8(PROLOG_STATE *state,
607         int tok,
608         const char *ptr,
609         const char *end,
610         const ENCODING *enc)
611 {
612   switch (tok) {
613   case XML_TOK_PROLOG_S:
614     return XML_ROLE_ENTITY_NONE;
615   case XML_TOK_LITERAL:
616     state->handler = entity9;
617     return XML_ROLE_ENTITY_PUBLIC_ID;
618   }
619   return common(state, tok);
620 }
621 
622 static int PTRCALL
623 entity9(PROLOG_STATE *state,
624         int tok,
625         const char *ptr,
626         const char *end,
627         const ENCODING *enc)
628 {
629   switch (tok) {
630   case XML_TOK_PROLOG_S:
631     return XML_ROLE_ENTITY_NONE;
632   case XML_TOK_LITERAL:
633     state->handler = entity10;
634     return XML_ROLE_ENTITY_SYSTEM_ID;
635   }
636   return common(state, tok);
637 }
638 
639 static int PTRCALL
640 entity10(PROLOG_STATE *state,
641          int tok,
642          const char *ptr,
643          const char *end,
644          const ENCODING *enc)
645 {
646   switch (tok) {
647   case XML_TOK_PROLOG_S:
648     return XML_ROLE_ENTITY_NONE;
649   case XML_TOK_DECL_CLOSE:
650     setTopLevel(state);
651     return XML_ROLE_ENTITY_COMPLETE;
652   }
653   return common(state, tok);
654 }
655 
656 static int PTRCALL
657 notation0(PROLOG_STATE *state,
658           int tok,
659           const char *ptr,
660           const char *end,
661           const ENCODING *enc)
662 {
663   switch (tok) {
664   case XML_TOK_PROLOG_S:
665     return XML_ROLE_NOTATION_NONE;
666   case XML_TOK_NAME:
667     state->handler = notation1;
668     return XML_ROLE_NOTATION_NAME;
669   }
670   return common(state, tok);
671 }
672 
673 static int PTRCALL
674 notation1(PROLOG_STATE *state,
675           int tok,
676           const char *ptr,
677           const char *end,
678           const ENCODING *enc)
679 {
680   switch (tok) {
681   case XML_TOK_PROLOG_S:
682     return XML_ROLE_NOTATION_NONE;
683   case XML_TOK_NAME:
684     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
685       state->handler = notation3;
686       return XML_ROLE_NOTATION_NONE;
687     }
688     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
689       state->handler = notation2;
690       return XML_ROLE_NOTATION_NONE;
691     }
692     break;
693   }
694   return common(state, tok);
695 }
696 
697 static int PTRCALL
698 notation2(PROLOG_STATE *state,
699           int tok,
700           const char *ptr,
701           const char *end,
702           const ENCODING *enc)
703 {
704   switch (tok) {
705   case XML_TOK_PROLOG_S:
706     return XML_ROLE_NOTATION_NONE;
707   case XML_TOK_LITERAL:
708     state->handler = notation4;
709     return XML_ROLE_NOTATION_PUBLIC_ID;
710   }
711   return common(state, tok);
712 }
713 
714 static int PTRCALL
715 notation3(PROLOG_STATE *state,
716           int tok,
717           const char *ptr,
718           const char *end,
719           const ENCODING *enc)
720 {
721   switch (tok) {
722   case XML_TOK_PROLOG_S:
723     return XML_ROLE_NOTATION_NONE;
724   case XML_TOK_LITERAL:
725     state->handler = declClose;
726     state->role_none = XML_ROLE_NOTATION_NONE;
727     return XML_ROLE_NOTATION_SYSTEM_ID;
728   }
729   return common(state, tok);
730 }
731 
732 static int PTRCALL
733 notation4(PROLOG_STATE *state,
734           int tok,
735           const char *ptr,
736           const char *end,
737           const ENCODING *enc)
738 {
739   switch (tok) {
740   case XML_TOK_PROLOG_S:
741     return XML_ROLE_NOTATION_NONE;
742   case XML_TOK_LITERAL:
743     state->handler = declClose;
744     state->role_none = XML_ROLE_NOTATION_NONE;
745     return XML_ROLE_NOTATION_SYSTEM_ID;
746   case XML_TOK_DECL_CLOSE:
747     setTopLevel(state);
748     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
749   }
750   return common(state, tok);
751 }
752 
753 static int PTRCALL
754 attlist0(PROLOG_STATE *state,
755          int tok,
756          const char *ptr,
757          const char *end,
758          const ENCODING *enc)
759 {
760   switch (tok) {
761   case XML_TOK_PROLOG_S:
762     return XML_ROLE_ATTLIST_NONE;
763   case XML_TOK_NAME:
764   case XML_TOK_PREFIXED_NAME:
765     state->handler = attlist1;
766     return XML_ROLE_ATTLIST_ELEMENT_NAME;
767   }
768   return common(state, tok);
769 }
770 
771 static int PTRCALL
772 attlist1(PROLOG_STATE *state,
773          int tok,
774          const char *ptr,
775          const char *end,
776          const ENCODING *enc)
777 {
778   switch (tok) {
779   case XML_TOK_PROLOG_S:
780     return XML_ROLE_ATTLIST_NONE;
781   case XML_TOK_DECL_CLOSE:
782     setTopLevel(state);
783     return XML_ROLE_ATTLIST_NONE;
784   case XML_TOK_NAME:
785   case XML_TOK_PREFIXED_NAME:
786     state->handler = attlist2;
787     return XML_ROLE_ATTRIBUTE_NAME;
788   }
789   return common(state, tok);
790 }
791 
792 static int PTRCALL
793 attlist2(PROLOG_STATE *state,
794          int tok,
795          const char *ptr,
796          const char *end,
797          const ENCODING *enc)
798 {
799   switch (tok) {
800   case XML_TOK_PROLOG_S:
801     return XML_ROLE_ATTLIST_NONE;
802   case XML_TOK_NAME:
803     {
804       static const char * const types[] = {
805         KW_CDATA,
806         KW_ID,
807         KW_IDREF,
808         KW_IDREFS,
809         KW_ENTITY,
810         KW_ENTITIES,
811         KW_NMTOKEN,
812         KW_NMTOKENS,
813       };
814       int i;
815       for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
816         if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
817           state->handler = attlist8;
818           return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
819         }
820     }
821     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
822       state->handler = attlist5;
823       return XML_ROLE_ATTLIST_NONE;
824     }
825     break;
826   case XML_TOK_OPEN_PAREN:
827     state->handler = attlist3;
828     return XML_ROLE_ATTLIST_NONE;
829   }
830   return common(state, tok);
831 }
832 
833 static int PTRCALL
834 attlist3(PROLOG_STATE *state,
835          int tok,
836          const char *ptr,
837          const char *end,
838          const ENCODING *enc)
839 {
840   switch (tok) {
841   case XML_TOK_PROLOG_S:
842     return XML_ROLE_ATTLIST_NONE;
843   case XML_TOK_NMTOKEN:
844   case XML_TOK_NAME:
845   case XML_TOK_PREFIXED_NAME:
846     state->handler = attlist4;
847     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
848   }
849   return common(state, tok);
850 }
851 
852 static int PTRCALL
853 attlist4(PROLOG_STATE *state,
854          int tok,
855          const char *ptr,
856          const char *end,
857          const ENCODING *enc)
858 {
859   switch (tok) {
860   case XML_TOK_PROLOG_S:
861     return XML_ROLE_ATTLIST_NONE;
862   case XML_TOK_CLOSE_PAREN:
863     state->handler = attlist8;
864     return XML_ROLE_ATTLIST_NONE;
865   case XML_TOK_OR:
866     state->handler = attlist3;
867     return XML_ROLE_ATTLIST_NONE;
868   }
869   return common(state, tok);
870 }
871 
872 static int PTRCALL
873 attlist5(PROLOG_STATE *state,
874          int tok,
875          const char *ptr,
876          const char *end,
877          const ENCODING *enc)
878 {
879   switch (tok) {
880   case XML_TOK_PROLOG_S:
881     return XML_ROLE_ATTLIST_NONE;
882   case XML_TOK_OPEN_PAREN:
883     state->handler = attlist6;
884     return XML_ROLE_ATTLIST_NONE;
885   }
886   return common(state, tok);
887 }
888 
889 static int PTRCALL
890 attlist6(PROLOG_STATE *state,
891          int tok,
892          const char *ptr,
893          const char *end,
894          const ENCODING *enc)
895 {
896   switch (tok) {
897   case XML_TOK_PROLOG_S:
898     return XML_ROLE_ATTLIST_NONE;
899   case XML_TOK_NAME:
900     state->handler = attlist7;
901     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
902   }
903   return common(state, tok);
904 }
905 
906 static int PTRCALL
907 attlist7(PROLOG_STATE *state,
908          int tok,
909          const char *ptr,
910          const char *end,
911          const ENCODING *enc)
912 {
913   switch (tok) {
914   case XML_TOK_PROLOG_S:
915     return XML_ROLE_ATTLIST_NONE;
916   case XML_TOK_CLOSE_PAREN:
917     state->handler = attlist8;
918     return XML_ROLE_ATTLIST_NONE;
919   case XML_TOK_OR:
920     state->handler = attlist6;
921     return XML_ROLE_ATTLIST_NONE;
922   }
923   return common(state, tok);
924 }
925 
926 /* default value */
927 static int PTRCALL
928 attlist8(PROLOG_STATE *state,
929          int tok,
930          const char *ptr,
931          const char *end,
932          const ENCODING *enc)
933 {
934   switch (tok) {
935   case XML_TOK_PROLOG_S:
936     return XML_ROLE_ATTLIST_NONE;
937   case XML_TOK_POUND_NAME:
938     if (XmlNameMatchesAscii(enc,
939                             ptr + MIN_BYTES_PER_CHAR(enc),
940                             end,
941                             KW_IMPLIED)) {
942       state->handler = attlist1;
943       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
944     }
945     if (XmlNameMatchesAscii(enc,
946                             ptr + MIN_BYTES_PER_CHAR(enc),
947                             end,
948                             KW_REQUIRED)) {
949       state->handler = attlist1;
950       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
951     }
952     if (XmlNameMatchesAscii(enc,
953                             ptr + MIN_BYTES_PER_CHAR(enc),
954                             end,
955                             KW_FIXED)) {
956       state->handler = attlist9;
957       return XML_ROLE_ATTLIST_NONE;
958     }
959     break;
960   case XML_TOK_LITERAL:
961     state->handler = attlist1;
962     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
963   }
964   return common(state, tok);
965 }
966 
967 static int PTRCALL
968 attlist9(PROLOG_STATE *state,
969          int tok,
970          const char *ptr,
971          const char *end,
972          const ENCODING *enc)
973 {
974   switch (tok) {
975   case XML_TOK_PROLOG_S:
976     return XML_ROLE_ATTLIST_NONE;
977   case XML_TOK_LITERAL:
978     state->handler = attlist1;
979     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
980   }
981   return common(state, tok);
982 }
983 
984 static int PTRCALL
985 element0(PROLOG_STATE *state,
986          int tok,
987          const char *ptr,
988          const char *end,
989          const ENCODING *enc)
990 {
991   switch (tok) {
992   case XML_TOK_PROLOG_S:
993     return XML_ROLE_ELEMENT_NONE;
994   case XML_TOK_NAME:
995   case XML_TOK_PREFIXED_NAME:
996     state->handler = element1;
997     return XML_ROLE_ELEMENT_NAME;
998   }
999   return common(state, tok);
1000 }
1001 
1002 static int PTRCALL
1003 element1(PROLOG_STATE *state,
1004          int tok,
1005          const char *ptr,
1006          const char *end,
1007          const ENCODING *enc)
1008 {
1009   switch (tok) {
1010   case XML_TOK_PROLOG_S:
1011     return XML_ROLE_ELEMENT_NONE;
1012   case XML_TOK_NAME:
1013     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1014       state->handler = declClose;
1015       state->role_none = XML_ROLE_ELEMENT_NONE;
1016       return XML_ROLE_CONTENT_EMPTY;
1017     }
1018     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1019       state->handler = declClose;
1020       state->role_none = XML_ROLE_ELEMENT_NONE;
1021       return XML_ROLE_CONTENT_ANY;
1022     }
1023     break;
1024   case XML_TOK_OPEN_PAREN:
1025     state->handler = element2;
1026     state->level = 1;
1027     return XML_ROLE_GROUP_OPEN;
1028   }
1029   return common(state, tok);
1030 }
1031 
1032 static int PTRCALL
1033 element2(PROLOG_STATE *state,
1034          int tok,
1035          const char *ptr,
1036          const char *end,
1037          const ENCODING *enc)
1038 {
1039   switch (tok) {
1040   case XML_TOK_PROLOG_S:
1041     return XML_ROLE_ELEMENT_NONE;
1042   case XML_TOK_POUND_NAME:
1043     if (XmlNameMatchesAscii(enc,
1044                             ptr + MIN_BYTES_PER_CHAR(enc),
1045                             end,
1046                             KW_PCDATA)) {
1047       state->handler = element3;
1048       return XML_ROLE_CONTENT_PCDATA;
1049     }
1050     break;
1051   case XML_TOK_OPEN_PAREN:
1052     state->level = 2;
1053     state->handler = element6;
1054     return XML_ROLE_GROUP_OPEN;
1055   case XML_TOK_NAME:
1056   case XML_TOK_PREFIXED_NAME:
1057     state->handler = element7;
1058     return XML_ROLE_CONTENT_ELEMENT;
1059   case XML_TOK_NAME_QUESTION:
1060     state->handler = element7;
1061     return XML_ROLE_CONTENT_ELEMENT_OPT;
1062   case XML_TOK_NAME_ASTERISK:
1063     state->handler = element7;
1064     return XML_ROLE_CONTENT_ELEMENT_REP;
1065   case XML_TOK_NAME_PLUS:
1066     state->handler = element7;
1067     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1068   }
1069   return common(state, tok);
1070 }
1071 
1072 static int PTRCALL
1073 element3(PROLOG_STATE *state,
1074          int tok,
1075          const char *ptr,
1076          const char *end,
1077          const ENCODING *enc)
1078 {
1079   switch (tok) {
1080   case XML_TOK_PROLOG_S:
1081     return XML_ROLE_ELEMENT_NONE;
1082   case XML_TOK_CLOSE_PAREN:
1083     state->handler = declClose;
1084     state->role_none = XML_ROLE_ELEMENT_NONE;
1085     return XML_ROLE_GROUP_CLOSE;
1086   case XML_TOK_CLOSE_PAREN_ASTERISK:
1087     state->handler = declClose;
1088     state->role_none = XML_ROLE_ELEMENT_NONE;
1089     return XML_ROLE_GROUP_CLOSE_REP;
1090   case XML_TOK_OR:
1091     state->handler = element4;
1092     return XML_ROLE_ELEMENT_NONE;
1093   }
1094   return common(state, tok);
1095 }
1096 
1097 static int PTRCALL
1098 element4(PROLOG_STATE *state,
1099          int tok,
1100          const char *ptr,
1101          const char *end,
1102          const ENCODING *enc)
1103 {
1104   switch (tok) {
1105   case XML_TOK_PROLOG_S:
1106     return XML_ROLE_ELEMENT_NONE;
1107   case XML_TOK_NAME:
1108   case XML_TOK_PREFIXED_NAME:
1109     state->handler = element5;
1110     return XML_ROLE_CONTENT_ELEMENT;
1111   }
1112   return common(state, tok);
1113 }
1114 
1115 static int PTRCALL
1116 element5(PROLOG_STATE *state,
1117          int tok,
1118          const char *ptr,
1119          const char *end,
1120          const ENCODING *enc)
1121 {
1122   switch (tok) {
1123   case XML_TOK_PROLOG_S:
1124     return XML_ROLE_ELEMENT_NONE;
1125   case XML_TOK_CLOSE_PAREN_ASTERISK:
1126     state->handler = declClose;
1127     state->role_none = XML_ROLE_ELEMENT_NONE;
1128     return XML_ROLE_GROUP_CLOSE_REP;
1129   case XML_TOK_OR:
1130     state->handler = element4;
1131     return XML_ROLE_ELEMENT_NONE;
1132   }
1133   return common(state, tok);
1134 }
1135 
1136 static int PTRCALL
1137 element6(PROLOG_STATE *state,
1138          int tok,
1139          const char *ptr,
1140          const char *end,
1141          const ENCODING *enc)
1142 {
1143   switch (tok) {
1144   case XML_TOK_PROLOG_S:
1145     return XML_ROLE_ELEMENT_NONE;
1146   case XML_TOK_OPEN_PAREN:
1147     state->level += 1;
1148     return XML_ROLE_GROUP_OPEN;
1149   case XML_TOK_NAME:
1150   case XML_TOK_PREFIXED_NAME:
1151     state->handler = element7;
1152     return XML_ROLE_CONTENT_ELEMENT;
1153   case XML_TOK_NAME_QUESTION:
1154     state->handler = element7;
1155     return XML_ROLE_CONTENT_ELEMENT_OPT;
1156   case XML_TOK_NAME_ASTERISK:
1157     state->handler = element7;
1158     return XML_ROLE_CONTENT_ELEMENT_REP;
1159   case XML_TOK_NAME_PLUS:
1160     state->handler = element7;
1161     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1162   }
1163   return common(state, tok);
1164 }
1165 
1166 static int PTRCALL
1167 element7(PROLOG_STATE *state,
1168          int tok,
1169          const char *ptr,
1170          const char *end,
1171          const ENCODING *enc)
1172 {
1173   switch (tok) {
1174   case XML_TOK_PROLOG_S:
1175     return XML_ROLE_ELEMENT_NONE;
1176   case XML_TOK_CLOSE_PAREN:
1177     state->level -= 1;
1178     if (state->level == 0) {
1179       state->handler = declClose;
1180       state->role_none = XML_ROLE_ELEMENT_NONE;
1181     }
1182     return XML_ROLE_GROUP_CLOSE;
1183   case XML_TOK_CLOSE_PAREN_ASTERISK:
1184     state->level -= 1;
1185     if (state->level == 0) {
1186       state->handler = declClose;
1187       state->role_none = XML_ROLE_ELEMENT_NONE;
1188     }
1189     return XML_ROLE_GROUP_CLOSE_REP;
1190   case XML_TOK_CLOSE_PAREN_QUESTION:
1191     state->level -= 1;
1192     if (state->level == 0) {
1193       state->handler = declClose;
1194       state->role_none = XML_ROLE_ELEMENT_NONE;
1195     }
1196     return XML_ROLE_GROUP_CLOSE_OPT;
1197   case XML_TOK_CLOSE_PAREN_PLUS:
1198     state->level -= 1;
1199     if (state->level == 0) {
1200       state->handler = declClose;
1201       state->role_none = XML_ROLE_ELEMENT_NONE;
1202     }
1203     return XML_ROLE_GROUP_CLOSE_PLUS;
1204   case XML_TOK_COMMA:
1205     state->handler = element6;
1206     return XML_ROLE_GROUP_SEQUENCE;
1207   case XML_TOK_OR:
1208     state->handler = element6;
1209     return XML_ROLE_GROUP_CHOICE;
1210   }
1211   return common(state, tok);
1212 }
1213 
1214 #ifdef XML_DTD
1215 
1216 static int PTRCALL
1217 condSect0(PROLOG_STATE *state,
1218           int tok,
1219           const char *ptr,
1220           const char *end,
1221           const ENCODING *enc)
1222 {
1223   switch (tok) {
1224   case XML_TOK_PROLOG_S:
1225     return XML_ROLE_NONE;
1226   case XML_TOK_NAME:
1227     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1228       state->handler = condSect1;
1229       return XML_ROLE_NONE;
1230     }
1231     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1232       state->handler = condSect2;
1233       return XML_ROLE_NONE;
1234     }
1235     break;
1236   }
1237   return common(state, tok);
1238 }
1239 
1240 static int PTRCALL
1241 condSect1(PROLOG_STATE *state,
1242           int tok,
1243           const char *ptr,
1244           const char *end,
1245           const ENCODING *enc)
1246 {
1247   switch (tok) {
1248   case XML_TOK_PROLOG_S:
1249     return XML_ROLE_NONE;
1250   case XML_TOK_OPEN_BRACKET:
1251     state->handler = externalSubset1;
1252     state->includeLevel += 1;
1253     return XML_ROLE_NONE;
1254   }
1255   return common(state, tok);
1256 }
1257 
1258 static int PTRCALL
1259 condSect2(PROLOG_STATE *state,
1260           int tok,
1261           const char *ptr,
1262           const char *end,
1263           const ENCODING *enc)
1264 {
1265   switch (tok) {
1266   case XML_TOK_PROLOG_S:
1267     return XML_ROLE_NONE;
1268   case XML_TOK_OPEN_BRACKET:
1269     state->handler = externalSubset1;
1270     return XML_ROLE_IGNORE_SECT;
1271   }
1272   return common(state, tok);
1273 }
1274 
1275 #endif /* XML_DTD */
1276 
1277 static int PTRCALL
1278 declClose(PROLOG_STATE *state,
1279           int tok,
1280           const char *ptr,
1281           const char *end,
1282           const ENCODING *enc)
1283 {
1284   switch (tok) {
1285   case XML_TOK_PROLOG_S:
1286     return state->role_none;
1287   case XML_TOK_DECL_CLOSE:
1288     setTopLevel(state);
1289     return state->role_none;
1290   }
1291   return common(state, tok);
1292 }
1293 
1294 static int PTRCALL
1295 error(PROLOG_STATE *state,
1296       int tok,
1297       const char *ptr,
1298       const char *end,
1299       const ENCODING *enc)
1300 {
1301   return XML_ROLE_NONE;
1302 }
1303 
1304 static int FASTCALL
1305 common(PROLOG_STATE *state, int tok)
1306 {
1307 #ifdef XML_DTD
1308   if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1309     return XML_ROLE_INNER_PARAM_ENTITY_REF;
1310 #endif
1311   state->handler = error;
1312   return XML_ROLE_ERROR;
1313 }
1314 
1315 void
1316 XmlPrologStateInit(PROLOG_STATE *state)
1317 {
1318   state->handler = prolog0;
1319 #ifdef XML_DTD
1320   state->documentEntity = 1;
1321   state->includeLevel = 0;
1322   state->inEntityValue = 0;
1323 #endif /* XML_DTD */
1324 }
1325 
1326 #ifdef XML_DTD
1327 
1328 void
1329 XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1330 {
1331   state->handler = externalSubset0;
1332   state->documentEntity = 0;
1333   state->includeLevel = 0;
1334 }
1335 
1336 #endif /* XML_DTD */
1337