xref: /freebsd/contrib/expat/lib/xmlrole.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2    See the file COPYING for copying permission.
3 */
4 
5 #ifdef COMPILED_FROM_DSP
6 #include "winconfig.h"
7 #elif defined(MACOS_CLASSIC)
8 #include "macconfig.h"
9 #else
10 #include <expat_config.h>
11 #endif /* ndef COMPILED_FROM_DSP */
12 
13 #include "internal.h"
14 #include "xmlrole.h"
15 #include "ascii.h"
16 
17 /* Doesn't check:
18 
19  that ,| are not mixed in a model group
20  content of literals
21 
22 */
23 
24 static const char KW_ANY[] = {
25     ASCII_A, ASCII_N, ASCII_Y, '\0' };
26 static const char KW_ATTLIST[] = {
27     ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
28 static const char KW_CDATA[] = {
29     ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
30 static const char KW_DOCTYPE[] = {
31     ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
32 static const char KW_ELEMENT[] = {
33     ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
34 static const char KW_EMPTY[] = {
35     ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
36 static const char KW_ENTITIES[] = {
37     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
38     '\0' };
39 static const char KW_ENTITY[] = {
40     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
41 static const char KW_FIXED[] = {
42     ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
43 static const char KW_ID[] = {
44     ASCII_I, ASCII_D, '\0' };
45 static const char KW_IDREF[] = {
46     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
47 static const char KW_IDREFS[] = {
48     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
49 static const char KW_IGNORE[] = {
50     ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
51 static const char KW_IMPLIED[] = {
52     ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
53 static const char KW_INCLUDE[] = {
54     ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
55 static const char KW_NDATA[] = {
56     ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
57 static const char KW_NMTOKEN[] = {
58     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
59 static const char KW_NMTOKENS[] = {
60     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
61     '\0' };
62 static const char KW_NOTATION[] =
63     { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
64       '\0' };
65 static const char KW_PCDATA[] = {
66     ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
67 static const char KW_PUBLIC[] = {
68     ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
69 static const char KW_REQUIRED[] = {
70     ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
71     '\0' };
72 static const char KW_SYSTEM[] = {
73     ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
74 
75 #ifndef MIN_BYTES_PER_CHAR
76 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
77 #endif
78 
79 #ifdef XML_DTD
80 #define setTopLevel(state) \
81   ((state)->handler = ((state)->documentEntity \
82                        ? internalSubset \
83                        : externalSubset1))
84 #else /* not XML_DTD */
85 #define setTopLevel(state) ((state)->handler = internalSubset)
86 #endif /* not XML_DTD */
87 
88 typedef int FASTCALL PROLOG_HANDLER(PROLOG_STATE *state,
89                                     int tok,
90                                     const char *ptr,
91                                     const char *end,
92                                     const ENCODING *enc);
93 
94 static PROLOG_HANDLER
95   prolog0, prolog1, prolog2,
96   doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
97   internalSubset,
98   entity0, entity1, entity2, entity3, entity4, entity5, entity6,
99   entity7, entity8, entity9, entity10,
100   notation0, notation1, notation2, notation3, notation4,
101   attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
102   attlist7, attlist8, attlist9,
103   element0, element1, element2, element3, element4, element5, element6,
104   element7,
105 #ifdef XML_DTD
106   externalSubset0, externalSubset1,
107   condSect0, condSect1, condSect2,
108 #endif /* XML_DTD */
109   declClose,
110   error;
111 
112 static int FASTCALL common(PROLOG_STATE *state, int tok);
113 
114 static int FASTCALL
115 prolog0(PROLOG_STATE *state,
116         int tok,
117         const char *ptr,
118         const char *end,
119         const ENCODING *enc)
120 {
121   switch (tok) {
122   case XML_TOK_PROLOG_S:
123     state->handler = prolog1;
124     return XML_ROLE_NONE;
125   case XML_TOK_XML_DECL:
126     state->handler = prolog1;
127     return XML_ROLE_XML_DECL;
128   case XML_TOK_PI:
129     state->handler = prolog1;
130     return XML_ROLE_PI;
131   case XML_TOK_COMMENT:
132     state->handler = prolog1;
133     return XML_ROLE_COMMENT;
134   case XML_TOK_BOM:
135     return XML_ROLE_NONE;
136   case XML_TOK_DECL_OPEN:
137     if (!XmlNameMatchesAscii(enc,
138                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
139                              end,
140                              KW_DOCTYPE))
141       break;
142     state->handler = doctype0;
143     return XML_ROLE_DOCTYPE_NONE;
144   case XML_TOK_INSTANCE_START:
145     state->handler = error;
146     return XML_ROLE_INSTANCE_START;
147   }
148   return common(state, tok);
149 }
150 
151 static int FASTCALL
152 prolog1(PROLOG_STATE *state,
153         int tok,
154         const char *ptr,
155         const char *end,
156         const ENCODING *enc)
157 {
158   switch (tok) {
159   case XML_TOK_PROLOG_S:
160     return XML_ROLE_NONE;
161   case XML_TOK_PI:
162     return XML_ROLE_PI;
163   case XML_TOK_COMMENT:
164     return XML_ROLE_COMMENT;
165   case XML_TOK_BOM:
166     return XML_ROLE_NONE;
167   case XML_TOK_DECL_OPEN:
168     if (!XmlNameMatchesAscii(enc,
169                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
170                              end,
171                              KW_DOCTYPE))
172       break;
173     state->handler = doctype0;
174     return XML_ROLE_DOCTYPE_NONE;
175   case XML_TOK_INSTANCE_START:
176     state->handler = error;
177     return XML_ROLE_INSTANCE_START;
178   }
179   return common(state, tok);
180 }
181 
182 static int FASTCALL
183 prolog2(PROLOG_STATE *state,
184         int tok,
185         const char *ptr,
186         const char *end,
187         const ENCODING *enc)
188 {
189   switch (tok) {
190   case XML_TOK_PROLOG_S:
191     return XML_ROLE_NONE;
192   case XML_TOK_PI:
193     return XML_ROLE_PI;
194   case XML_TOK_COMMENT:
195     return XML_ROLE_COMMENT;
196   case XML_TOK_INSTANCE_START:
197     state->handler = error;
198     return XML_ROLE_INSTANCE_START;
199   }
200   return common(state, tok);
201 }
202 
203 static int FASTCALL
204 doctype0(PROLOG_STATE *state,
205          int tok,
206          const char *ptr,
207          const char *end,
208          const ENCODING *enc)
209 {
210   switch (tok) {
211   case XML_TOK_PROLOG_S:
212     return XML_ROLE_DOCTYPE_NONE;
213   case XML_TOK_NAME:
214   case XML_TOK_PREFIXED_NAME:
215     state->handler = doctype1;
216     return XML_ROLE_DOCTYPE_NAME;
217   }
218   return common(state, tok);
219 }
220 
221 static int FASTCALL
222 doctype1(PROLOG_STATE *state,
223          int tok,
224          const char *ptr,
225          const char *end,
226          const ENCODING *enc)
227 {
228   switch (tok) {
229   case XML_TOK_PROLOG_S:
230     return XML_ROLE_DOCTYPE_NONE;
231   case XML_TOK_OPEN_BRACKET:
232     state->handler = internalSubset;
233     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
234   case XML_TOK_DECL_CLOSE:
235     state->handler = prolog2;
236     return XML_ROLE_DOCTYPE_CLOSE;
237   case XML_TOK_NAME:
238     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
239       state->handler = doctype3;
240       return XML_ROLE_DOCTYPE_NONE;
241     }
242     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
243       state->handler = doctype2;
244       return XML_ROLE_DOCTYPE_NONE;
245     }
246     break;
247   }
248   return common(state, tok);
249 }
250 
251 static int FASTCALL
252 doctype2(PROLOG_STATE *state,
253          int tok,
254          const char *ptr,
255          const char *end,
256          const ENCODING *enc)
257 {
258   switch (tok) {
259   case XML_TOK_PROLOG_S:
260     return XML_ROLE_DOCTYPE_NONE;
261   case XML_TOK_LITERAL:
262     state->handler = doctype3;
263     return XML_ROLE_DOCTYPE_PUBLIC_ID;
264   }
265   return common(state, tok);
266 }
267 
268 static int FASTCALL
269 doctype3(PROLOG_STATE *state,
270          int tok,
271          const char *ptr,
272          const char *end,
273          const ENCODING *enc)
274 {
275   switch (tok) {
276   case XML_TOK_PROLOG_S:
277     return XML_ROLE_DOCTYPE_NONE;
278   case XML_TOK_LITERAL:
279     state->handler = doctype4;
280     return XML_ROLE_DOCTYPE_SYSTEM_ID;
281   }
282   return common(state, tok);
283 }
284 
285 static int FASTCALL
286 doctype4(PROLOG_STATE *state,
287          int tok,
288          const char *ptr,
289          const char *end,
290          const ENCODING *enc)
291 {
292   switch (tok) {
293   case XML_TOK_PROLOG_S:
294     return XML_ROLE_DOCTYPE_NONE;
295   case XML_TOK_OPEN_BRACKET:
296     state->handler = internalSubset;
297     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
298   case XML_TOK_DECL_CLOSE:
299     state->handler = prolog2;
300     return XML_ROLE_DOCTYPE_CLOSE;
301   }
302   return common(state, tok);
303 }
304 
305 static int FASTCALL
306 doctype5(PROLOG_STATE *state,
307          int tok,
308          const char *ptr,
309          const char *end,
310          const ENCODING *enc)
311 {
312   switch (tok) {
313   case XML_TOK_PROLOG_S:
314     return XML_ROLE_DOCTYPE_NONE;
315   case XML_TOK_DECL_CLOSE:
316     state->handler = prolog2;
317     return XML_ROLE_DOCTYPE_CLOSE;
318   }
319   return common(state, tok);
320 }
321 
322 static int FASTCALL
323 internalSubset(PROLOG_STATE *state,
324                int tok,
325                const char *ptr,
326                const char *end,
327                const ENCODING *enc)
328 {
329   switch (tok) {
330   case XML_TOK_PROLOG_S:
331     return XML_ROLE_NONE;
332   case XML_TOK_DECL_OPEN:
333     if (XmlNameMatchesAscii(enc,
334                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
335                             end,
336                             KW_ENTITY)) {
337       state->handler = entity0;
338       return XML_ROLE_ENTITY_NONE;
339     }
340     if (XmlNameMatchesAscii(enc,
341                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
342                             end,
343                             KW_ATTLIST)) {
344       state->handler = attlist0;
345       return XML_ROLE_ATTLIST_NONE;
346     }
347     if (XmlNameMatchesAscii(enc,
348                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
349                             end,
350                             KW_ELEMENT)) {
351       state->handler = element0;
352       return XML_ROLE_ELEMENT_NONE;
353     }
354     if (XmlNameMatchesAscii(enc,
355                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
356                             end,
357                             KW_NOTATION)) {
358       state->handler = notation0;
359       return XML_ROLE_NOTATION_NONE;
360     }
361     break;
362   case XML_TOK_PI:
363     return XML_ROLE_PI;
364   case XML_TOK_COMMENT:
365     return XML_ROLE_COMMENT;
366   case XML_TOK_PARAM_ENTITY_REF:
367     return XML_ROLE_PARAM_ENTITY_REF;
368   case XML_TOK_CLOSE_BRACKET:
369     state->handler = doctype5;
370     return XML_ROLE_DOCTYPE_NONE;
371   }
372   return common(state, tok);
373 }
374 
375 #ifdef XML_DTD
376 
377 static int FASTCALL
378 externalSubset0(PROLOG_STATE *state,
379                 int tok,
380                 const char *ptr,
381                 const char *end,
382                 const ENCODING *enc)
383 {
384   state->handler = externalSubset1;
385   if (tok == XML_TOK_XML_DECL)
386     return XML_ROLE_TEXT_DECL;
387   return externalSubset1(state, tok, ptr, end, enc);
388 }
389 
390 static int FASTCALL
391 externalSubset1(PROLOG_STATE *state,
392                 int tok,
393                 const char *ptr,
394                 const char *end,
395                 const ENCODING *enc)
396 {
397   switch (tok) {
398   case XML_TOK_COND_SECT_OPEN:
399     state->handler = condSect0;
400     return XML_ROLE_NONE;
401   case XML_TOK_COND_SECT_CLOSE:
402     if (state->includeLevel == 0)
403       break;
404     state->includeLevel -= 1;
405     return XML_ROLE_NONE;
406   case XML_TOK_PROLOG_S:
407     return XML_ROLE_NONE;
408   case XML_TOK_CLOSE_BRACKET:
409     break;
410   case XML_TOK_NONE:
411     if (state->includeLevel)
412       break;
413     return XML_ROLE_NONE;
414   default:
415     return internalSubset(state, tok, ptr, end, enc);
416   }
417   return common(state, tok);
418 }
419 
420 #endif /* XML_DTD */
421 
422 static int FASTCALL
423 entity0(PROLOG_STATE *state,
424         int tok,
425         const char *ptr,
426         const char *end,
427         const ENCODING *enc)
428 {
429   switch (tok) {
430   case XML_TOK_PROLOG_S:
431     return XML_ROLE_ENTITY_NONE;
432   case XML_TOK_PERCENT:
433     state->handler = entity1;
434     return XML_ROLE_ENTITY_NONE;
435   case XML_TOK_NAME:
436     state->handler = entity2;
437     return XML_ROLE_GENERAL_ENTITY_NAME;
438   }
439   return common(state, tok);
440 }
441 
442 static int FASTCALL
443 entity1(PROLOG_STATE *state,
444         int tok,
445         const char *ptr,
446         const char *end,
447         const ENCODING *enc)
448 {
449   switch (tok) {
450   case XML_TOK_PROLOG_S:
451     return XML_ROLE_ENTITY_NONE;
452   case XML_TOK_NAME:
453     state->handler = entity7;
454     return XML_ROLE_PARAM_ENTITY_NAME;
455   }
456   return common(state, tok);
457 }
458 
459 static int FASTCALL
460 entity2(PROLOG_STATE *state,
461         int tok,
462         const char *ptr,
463         const char *end,
464         const ENCODING *enc)
465 {
466   switch (tok) {
467   case XML_TOK_PROLOG_S:
468     return XML_ROLE_ENTITY_NONE;
469   case XML_TOK_NAME:
470     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
471       state->handler = entity4;
472       return XML_ROLE_ENTITY_NONE;
473     }
474     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
475       state->handler = entity3;
476       return XML_ROLE_ENTITY_NONE;
477     }
478     break;
479   case XML_TOK_LITERAL:
480     state->handler = declClose;
481     state->role_none = XML_ROLE_ENTITY_NONE;
482     return XML_ROLE_ENTITY_VALUE;
483   }
484   return common(state, tok);
485 }
486 
487 static int FASTCALL
488 entity3(PROLOG_STATE *state,
489         int tok,
490         const char *ptr,
491         const char *end,
492         const ENCODING *enc)
493 {
494   switch (tok) {
495   case XML_TOK_PROLOG_S:
496     return XML_ROLE_ENTITY_NONE;
497   case XML_TOK_LITERAL:
498     state->handler = entity4;
499     return XML_ROLE_ENTITY_PUBLIC_ID;
500   }
501   return common(state, tok);
502 }
503 
504 static int FASTCALL
505 entity4(PROLOG_STATE *state,
506         int tok,
507         const char *ptr,
508         const char *end,
509         const ENCODING *enc)
510 {
511   switch (tok) {
512   case XML_TOK_PROLOG_S:
513     return XML_ROLE_ENTITY_NONE;
514   case XML_TOK_LITERAL:
515     state->handler = entity5;
516     return XML_ROLE_ENTITY_SYSTEM_ID;
517   }
518   return common(state, tok);
519 }
520 
521 static int FASTCALL
522 entity5(PROLOG_STATE *state,
523         int tok,
524         const char *ptr,
525         const char *end,
526         const ENCODING *enc)
527 {
528   switch (tok) {
529   case XML_TOK_PROLOG_S:
530     return XML_ROLE_ENTITY_NONE;
531   case XML_TOK_DECL_CLOSE:
532     setTopLevel(state);
533     return XML_ROLE_ENTITY_COMPLETE;
534   case XML_TOK_NAME:
535     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
536       state->handler = entity6;
537       return XML_ROLE_ENTITY_NONE;
538     }
539     break;
540   }
541   return common(state, tok);
542 }
543 
544 static int FASTCALL
545 entity6(PROLOG_STATE *state,
546         int tok,
547         const char *ptr,
548         const char *end,
549         const ENCODING *enc)
550 {
551   switch (tok) {
552   case XML_TOK_PROLOG_S:
553     return XML_ROLE_ENTITY_NONE;
554   case XML_TOK_NAME:
555     state->handler = declClose;
556     state->role_none = XML_ROLE_ENTITY_NONE;
557     return XML_ROLE_ENTITY_NOTATION_NAME;
558   }
559   return common(state, tok);
560 }
561 
562 static int FASTCALL
563 entity7(PROLOG_STATE *state,
564         int tok,
565         const char *ptr,
566         const char *end,
567         const ENCODING *enc)
568 {
569   switch (tok) {
570   case XML_TOK_PROLOG_S:
571     return XML_ROLE_ENTITY_NONE;
572   case XML_TOK_NAME:
573     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
574       state->handler = entity9;
575       return XML_ROLE_ENTITY_NONE;
576     }
577     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
578       state->handler = entity8;
579       return XML_ROLE_ENTITY_NONE;
580     }
581     break;
582   case XML_TOK_LITERAL:
583     state->handler = declClose;
584     state->role_none = XML_ROLE_ENTITY_NONE;
585     return XML_ROLE_ENTITY_VALUE;
586   }
587   return common(state, tok);
588 }
589 
590 static int FASTCALL
591 entity8(PROLOG_STATE *state,
592         int tok,
593         const char *ptr,
594         const char *end,
595         const ENCODING *enc)
596 {
597   switch (tok) {
598   case XML_TOK_PROLOG_S:
599     return XML_ROLE_ENTITY_NONE;
600   case XML_TOK_LITERAL:
601     state->handler = entity9;
602     return XML_ROLE_ENTITY_PUBLIC_ID;
603   }
604   return common(state, tok);
605 }
606 
607 static int FASTCALL
608 entity9(PROLOG_STATE *state,
609         int tok,
610         const char *ptr,
611         const char *end,
612         const ENCODING *enc)
613 {
614   switch (tok) {
615   case XML_TOK_PROLOG_S:
616     return XML_ROLE_ENTITY_NONE;
617   case XML_TOK_LITERAL:
618     state->handler = entity10;
619     return XML_ROLE_ENTITY_SYSTEM_ID;
620   }
621   return common(state, tok);
622 }
623 
624 static int FASTCALL
625 entity10(PROLOG_STATE *state,
626          int tok,
627          const char *ptr,
628          const char *end,
629          const ENCODING *enc)
630 {
631   switch (tok) {
632   case XML_TOK_PROLOG_S:
633     return XML_ROLE_ENTITY_NONE;
634   case XML_TOK_DECL_CLOSE:
635     setTopLevel(state);
636     return XML_ROLE_ENTITY_COMPLETE;
637   }
638   return common(state, tok);
639 }
640 
641 static int FASTCALL
642 notation0(PROLOG_STATE *state,
643           int tok,
644           const char *ptr,
645           const char *end,
646           const ENCODING *enc)
647 {
648   switch (tok) {
649   case XML_TOK_PROLOG_S:
650     return XML_ROLE_NOTATION_NONE;
651   case XML_TOK_NAME:
652     state->handler = notation1;
653     return XML_ROLE_NOTATION_NAME;
654   }
655   return common(state, tok);
656 }
657 
658 static int FASTCALL
659 notation1(PROLOG_STATE *state,
660           int tok,
661           const char *ptr,
662           const char *end,
663           const ENCODING *enc)
664 {
665   switch (tok) {
666   case XML_TOK_PROLOG_S:
667     return XML_ROLE_NOTATION_NONE;
668   case XML_TOK_NAME:
669     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
670       state->handler = notation3;
671       return XML_ROLE_NOTATION_NONE;
672     }
673     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
674       state->handler = notation2;
675       return XML_ROLE_NOTATION_NONE;
676     }
677     break;
678   }
679   return common(state, tok);
680 }
681 
682 static int FASTCALL
683 notation2(PROLOG_STATE *state,
684           int tok,
685           const char *ptr,
686           const char *end,
687           const ENCODING *enc)
688 {
689   switch (tok) {
690   case XML_TOK_PROLOG_S:
691     return XML_ROLE_NOTATION_NONE;
692   case XML_TOK_LITERAL:
693     state->handler = notation4;
694     return XML_ROLE_NOTATION_PUBLIC_ID;
695   }
696   return common(state, tok);
697 }
698 
699 static int FASTCALL
700 notation3(PROLOG_STATE *state,
701           int tok,
702           const char *ptr,
703           const char *end,
704           const ENCODING *enc)
705 {
706   switch (tok) {
707   case XML_TOK_PROLOG_S:
708     return XML_ROLE_NOTATION_NONE;
709   case XML_TOK_LITERAL:
710     state->handler = declClose;
711     state->role_none = XML_ROLE_NOTATION_NONE;
712     return XML_ROLE_NOTATION_SYSTEM_ID;
713   }
714   return common(state, tok);
715 }
716 
717 static int FASTCALL
718 notation4(PROLOG_STATE *state,
719           int tok,
720           const char *ptr,
721           const char *end,
722           const ENCODING *enc)
723 {
724   switch (tok) {
725   case XML_TOK_PROLOG_S:
726     return XML_ROLE_NOTATION_NONE;
727   case XML_TOK_LITERAL:
728     state->handler = declClose;
729     state->role_none = XML_ROLE_NOTATION_NONE;
730     return XML_ROLE_NOTATION_SYSTEM_ID;
731   case XML_TOK_DECL_CLOSE:
732     setTopLevel(state);
733     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
734   }
735   return common(state, tok);
736 }
737 
738 static int FASTCALL
739 attlist0(PROLOG_STATE *state,
740          int tok,
741          const char *ptr,
742          const char *end,
743          const ENCODING *enc)
744 {
745   switch (tok) {
746   case XML_TOK_PROLOG_S:
747     return XML_ROLE_ATTLIST_NONE;
748   case XML_TOK_NAME:
749   case XML_TOK_PREFIXED_NAME:
750     state->handler = attlist1;
751     return XML_ROLE_ATTLIST_ELEMENT_NAME;
752   }
753   return common(state, tok);
754 }
755 
756 static int FASTCALL
757 attlist1(PROLOG_STATE *state,
758          int tok,
759          const char *ptr,
760          const char *end,
761          const ENCODING *enc)
762 {
763   switch (tok) {
764   case XML_TOK_PROLOG_S:
765     return XML_ROLE_ATTLIST_NONE;
766   case XML_TOK_DECL_CLOSE:
767     setTopLevel(state);
768     return XML_ROLE_ATTLIST_NONE;
769   case XML_TOK_NAME:
770   case XML_TOK_PREFIXED_NAME:
771     state->handler = attlist2;
772     return XML_ROLE_ATTRIBUTE_NAME;
773   }
774   return common(state, tok);
775 }
776 
777 static int FASTCALL
778 attlist2(PROLOG_STATE *state,
779          int tok,
780          const char *ptr,
781          const char *end,
782          const ENCODING *enc)
783 {
784   switch (tok) {
785   case XML_TOK_PROLOG_S:
786     return XML_ROLE_ATTLIST_NONE;
787   case XML_TOK_NAME:
788     {
789       static const char *types[] = {
790         KW_CDATA,
791         KW_ID,
792         KW_IDREF,
793         KW_IDREFS,
794         KW_ENTITY,
795         KW_ENTITIES,
796         KW_NMTOKEN,
797         KW_NMTOKENS,
798       };
799       int i;
800       for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
801         if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
802           state->handler = attlist8;
803           return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
804         }
805     }
806     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
807       state->handler = attlist5;
808       return XML_ROLE_ATTLIST_NONE;
809     }
810     break;
811   case XML_TOK_OPEN_PAREN:
812     state->handler = attlist3;
813     return XML_ROLE_ATTLIST_NONE;
814   }
815   return common(state, tok);
816 }
817 
818 static int FASTCALL
819 attlist3(PROLOG_STATE *state,
820          int tok,
821          const char *ptr,
822          const char *end,
823          const ENCODING *enc)
824 {
825   switch (tok) {
826   case XML_TOK_PROLOG_S:
827     return XML_ROLE_ATTLIST_NONE;
828   case XML_TOK_NMTOKEN:
829   case XML_TOK_NAME:
830   case XML_TOK_PREFIXED_NAME:
831     state->handler = attlist4;
832     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
833   }
834   return common(state, tok);
835 }
836 
837 static int FASTCALL
838 attlist4(PROLOG_STATE *state,
839          int tok,
840          const char *ptr,
841          const char *end,
842          const ENCODING *enc)
843 {
844   switch (tok) {
845   case XML_TOK_PROLOG_S:
846     return XML_ROLE_ATTLIST_NONE;
847   case XML_TOK_CLOSE_PAREN:
848     state->handler = attlist8;
849     return XML_ROLE_ATTLIST_NONE;
850   case XML_TOK_OR:
851     state->handler = attlist3;
852     return XML_ROLE_ATTLIST_NONE;
853   }
854   return common(state, tok);
855 }
856 
857 static int FASTCALL
858 attlist5(PROLOG_STATE *state,
859          int tok,
860          const char *ptr,
861          const char *end,
862          const ENCODING *enc)
863 {
864   switch (tok) {
865   case XML_TOK_PROLOG_S:
866     return XML_ROLE_ATTLIST_NONE;
867   case XML_TOK_OPEN_PAREN:
868     state->handler = attlist6;
869     return XML_ROLE_ATTLIST_NONE;
870   }
871   return common(state, tok);
872 }
873 
874 static int FASTCALL
875 attlist6(PROLOG_STATE *state,
876          int tok,
877          const char *ptr,
878          const char *end,
879          const ENCODING *enc)
880 {
881   switch (tok) {
882   case XML_TOK_PROLOG_S:
883     return XML_ROLE_ATTLIST_NONE;
884   case XML_TOK_NAME:
885     state->handler = attlist7;
886     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
887   }
888   return common(state, tok);
889 }
890 
891 static int FASTCALL
892 attlist7(PROLOG_STATE *state,
893          int tok,
894          const char *ptr,
895          const char *end,
896          const ENCODING *enc)
897 {
898   switch (tok) {
899   case XML_TOK_PROLOG_S:
900     return XML_ROLE_ATTLIST_NONE;
901   case XML_TOK_CLOSE_PAREN:
902     state->handler = attlist8;
903     return XML_ROLE_ATTLIST_NONE;
904   case XML_TOK_OR:
905     state->handler = attlist6;
906     return XML_ROLE_ATTLIST_NONE;
907   }
908   return common(state, tok);
909 }
910 
911 /* default value */
912 static int FASTCALL
913 attlist8(PROLOG_STATE *state,
914          int tok,
915          const char *ptr,
916          const char *end,
917          const ENCODING *enc)
918 {
919   switch (tok) {
920   case XML_TOK_PROLOG_S:
921     return XML_ROLE_ATTLIST_NONE;
922   case XML_TOK_POUND_NAME:
923     if (XmlNameMatchesAscii(enc,
924                             ptr + MIN_BYTES_PER_CHAR(enc),
925                             end,
926                             KW_IMPLIED)) {
927       state->handler = attlist1;
928       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
929     }
930     if (XmlNameMatchesAscii(enc,
931                             ptr + MIN_BYTES_PER_CHAR(enc),
932                             end,
933                             KW_REQUIRED)) {
934       state->handler = attlist1;
935       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
936     }
937     if (XmlNameMatchesAscii(enc,
938                             ptr + MIN_BYTES_PER_CHAR(enc),
939                             end,
940                             KW_FIXED)) {
941       state->handler = attlist9;
942       return XML_ROLE_ATTLIST_NONE;
943     }
944     break;
945   case XML_TOK_LITERAL:
946     state->handler = attlist1;
947     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
948   }
949   return common(state, tok);
950 }
951 
952 static int FASTCALL
953 attlist9(PROLOG_STATE *state,
954          int tok,
955          const char *ptr,
956          const char *end,
957          const ENCODING *enc)
958 {
959   switch (tok) {
960   case XML_TOK_PROLOG_S:
961     return XML_ROLE_ATTLIST_NONE;
962   case XML_TOK_LITERAL:
963     state->handler = attlist1;
964     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
965   }
966   return common(state, tok);
967 }
968 
969 static int FASTCALL
970 element0(PROLOG_STATE *state,
971          int tok,
972          const char *ptr,
973          const char *end,
974          const ENCODING *enc)
975 {
976   switch (tok) {
977   case XML_TOK_PROLOG_S:
978     return XML_ROLE_ELEMENT_NONE;
979   case XML_TOK_NAME:
980   case XML_TOK_PREFIXED_NAME:
981     state->handler = element1;
982     return XML_ROLE_ELEMENT_NAME;
983   }
984   return common(state, tok);
985 }
986 
987 static int FASTCALL
988 element1(PROLOG_STATE *state,
989          int tok,
990          const char *ptr,
991          const char *end,
992          const ENCODING *enc)
993 {
994   switch (tok) {
995   case XML_TOK_PROLOG_S:
996     return XML_ROLE_ELEMENT_NONE;
997   case XML_TOK_NAME:
998     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
999       state->handler = declClose;
1000       state->role_none = XML_ROLE_ELEMENT_NONE;
1001       return XML_ROLE_CONTENT_EMPTY;
1002     }
1003     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1004       state->handler = declClose;
1005       state->role_none = XML_ROLE_ELEMENT_NONE;
1006       return XML_ROLE_CONTENT_ANY;
1007     }
1008     break;
1009   case XML_TOK_OPEN_PAREN:
1010     state->handler = element2;
1011     state->level = 1;
1012     return XML_ROLE_GROUP_OPEN;
1013   }
1014   return common(state, tok);
1015 }
1016 
1017 static int FASTCALL
1018 element2(PROLOG_STATE *state,
1019          int tok,
1020          const char *ptr,
1021          const char *end,
1022          const ENCODING *enc)
1023 {
1024   switch (tok) {
1025   case XML_TOK_PROLOG_S:
1026     return XML_ROLE_ELEMENT_NONE;
1027   case XML_TOK_POUND_NAME:
1028     if (XmlNameMatchesAscii(enc,
1029                             ptr + MIN_BYTES_PER_CHAR(enc),
1030                             end,
1031                             KW_PCDATA)) {
1032       state->handler = element3;
1033       return XML_ROLE_CONTENT_PCDATA;
1034     }
1035     break;
1036   case XML_TOK_OPEN_PAREN:
1037     state->level = 2;
1038     state->handler = element6;
1039     return XML_ROLE_GROUP_OPEN;
1040   case XML_TOK_NAME:
1041   case XML_TOK_PREFIXED_NAME:
1042     state->handler = element7;
1043     return XML_ROLE_CONTENT_ELEMENT;
1044   case XML_TOK_NAME_QUESTION:
1045     state->handler = element7;
1046     return XML_ROLE_CONTENT_ELEMENT_OPT;
1047   case XML_TOK_NAME_ASTERISK:
1048     state->handler = element7;
1049     return XML_ROLE_CONTENT_ELEMENT_REP;
1050   case XML_TOK_NAME_PLUS:
1051     state->handler = element7;
1052     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1053   }
1054   return common(state, tok);
1055 }
1056 
1057 static int FASTCALL
1058 element3(PROLOG_STATE *state,
1059          int tok,
1060          const char *ptr,
1061          const char *end,
1062          const ENCODING *enc)
1063 {
1064   switch (tok) {
1065   case XML_TOK_PROLOG_S:
1066     return XML_ROLE_ELEMENT_NONE;
1067   case XML_TOK_CLOSE_PAREN:
1068     state->handler = declClose;
1069     state->role_none = XML_ROLE_ELEMENT_NONE;
1070     return XML_ROLE_GROUP_CLOSE;
1071   case XML_TOK_CLOSE_PAREN_ASTERISK:
1072     state->handler = declClose;
1073     state->role_none = XML_ROLE_ELEMENT_NONE;
1074     return XML_ROLE_GROUP_CLOSE_REP;
1075   case XML_TOK_OR:
1076     state->handler = element4;
1077     return XML_ROLE_ELEMENT_NONE;
1078   }
1079   return common(state, tok);
1080 }
1081 
1082 static int FASTCALL
1083 element4(PROLOG_STATE *state,
1084          int tok,
1085          const char *ptr,
1086          const char *end,
1087          const ENCODING *enc)
1088 {
1089   switch (tok) {
1090   case XML_TOK_PROLOG_S:
1091     return XML_ROLE_ELEMENT_NONE;
1092   case XML_TOK_NAME:
1093   case XML_TOK_PREFIXED_NAME:
1094     state->handler = element5;
1095     return XML_ROLE_CONTENT_ELEMENT;
1096   }
1097   return common(state, tok);
1098 }
1099 
1100 static int FASTCALL
1101 element5(PROLOG_STATE *state,
1102          int tok,
1103          const char *ptr,
1104          const char *end,
1105          const ENCODING *enc)
1106 {
1107   switch (tok) {
1108   case XML_TOK_PROLOG_S:
1109     return XML_ROLE_ELEMENT_NONE;
1110   case XML_TOK_CLOSE_PAREN_ASTERISK:
1111     state->handler = declClose;
1112     state->role_none = XML_ROLE_ELEMENT_NONE;
1113     return XML_ROLE_GROUP_CLOSE_REP;
1114   case XML_TOK_OR:
1115     state->handler = element4;
1116     return XML_ROLE_ELEMENT_NONE;
1117   }
1118   return common(state, tok);
1119 }
1120 
1121 static int FASTCALL
1122 element6(PROLOG_STATE *state,
1123          int tok,
1124          const char *ptr,
1125          const char *end,
1126          const ENCODING *enc)
1127 {
1128   switch (tok) {
1129   case XML_TOK_PROLOG_S:
1130     return XML_ROLE_ELEMENT_NONE;
1131   case XML_TOK_OPEN_PAREN:
1132     state->level += 1;
1133     return XML_ROLE_GROUP_OPEN;
1134   case XML_TOK_NAME:
1135   case XML_TOK_PREFIXED_NAME:
1136     state->handler = element7;
1137     return XML_ROLE_CONTENT_ELEMENT;
1138   case XML_TOK_NAME_QUESTION:
1139     state->handler = element7;
1140     return XML_ROLE_CONTENT_ELEMENT_OPT;
1141   case XML_TOK_NAME_ASTERISK:
1142     state->handler = element7;
1143     return XML_ROLE_CONTENT_ELEMENT_REP;
1144   case XML_TOK_NAME_PLUS:
1145     state->handler = element7;
1146     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1147   }
1148   return common(state, tok);
1149 }
1150 
1151 static int FASTCALL
1152 element7(PROLOG_STATE *state,
1153          int tok,
1154          const char *ptr,
1155          const char *end,
1156          const ENCODING *enc)
1157 {
1158   switch (tok) {
1159   case XML_TOK_PROLOG_S:
1160     return XML_ROLE_ELEMENT_NONE;
1161   case XML_TOK_CLOSE_PAREN:
1162     state->level -= 1;
1163     if (state->level == 0) {
1164       state->handler = declClose;
1165       state->role_none = XML_ROLE_ELEMENT_NONE;
1166     }
1167     return XML_ROLE_GROUP_CLOSE;
1168   case XML_TOK_CLOSE_PAREN_ASTERISK:
1169     state->level -= 1;
1170     if (state->level == 0) {
1171       state->handler = declClose;
1172       state->role_none = XML_ROLE_ELEMENT_NONE;
1173     }
1174     return XML_ROLE_GROUP_CLOSE_REP;
1175   case XML_TOK_CLOSE_PAREN_QUESTION:
1176     state->level -= 1;
1177     if (state->level == 0) {
1178       state->handler = declClose;
1179       state->role_none = XML_ROLE_ELEMENT_NONE;
1180     }
1181     return XML_ROLE_GROUP_CLOSE_OPT;
1182   case XML_TOK_CLOSE_PAREN_PLUS:
1183     state->level -= 1;
1184     if (state->level == 0) {
1185       state->handler = declClose;
1186       state->role_none = XML_ROLE_ELEMENT_NONE;
1187     }
1188     return XML_ROLE_GROUP_CLOSE_PLUS;
1189   case XML_TOK_COMMA:
1190     state->handler = element6;
1191     return XML_ROLE_GROUP_SEQUENCE;
1192   case XML_TOK_OR:
1193     state->handler = element6;
1194     return XML_ROLE_GROUP_CHOICE;
1195   }
1196   return common(state, tok);
1197 }
1198 
1199 #ifdef XML_DTD
1200 
1201 static int FASTCALL
1202 condSect0(PROLOG_STATE *state,
1203           int tok,
1204           const char *ptr,
1205           const char *end,
1206           const ENCODING *enc)
1207 {
1208   switch (tok) {
1209   case XML_TOK_PROLOG_S:
1210     return XML_ROLE_NONE;
1211   case XML_TOK_NAME:
1212     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1213       state->handler = condSect1;
1214       return XML_ROLE_NONE;
1215     }
1216     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1217       state->handler = condSect2;
1218       return XML_ROLE_NONE;
1219     }
1220     break;
1221   }
1222   return common(state, tok);
1223 }
1224 
1225 static int FASTCALL
1226 condSect1(PROLOG_STATE *state,
1227           int tok,
1228           const char *ptr,
1229           const char *end,
1230           const ENCODING *enc)
1231 {
1232   switch (tok) {
1233   case XML_TOK_PROLOG_S:
1234     return XML_ROLE_NONE;
1235   case XML_TOK_OPEN_BRACKET:
1236     state->handler = externalSubset1;
1237     state->includeLevel += 1;
1238     return XML_ROLE_NONE;
1239   }
1240   return common(state, tok);
1241 }
1242 
1243 static int FASTCALL
1244 condSect2(PROLOG_STATE *state,
1245           int tok,
1246           const char *ptr,
1247           const char *end,
1248           const ENCODING *enc)
1249 {
1250   switch (tok) {
1251   case XML_TOK_PROLOG_S:
1252     return XML_ROLE_NONE;
1253   case XML_TOK_OPEN_BRACKET:
1254     state->handler = externalSubset1;
1255     return XML_ROLE_IGNORE_SECT;
1256   }
1257   return common(state, tok);
1258 }
1259 
1260 #endif /* XML_DTD */
1261 
1262 static int FASTCALL
1263 declClose(PROLOG_STATE *state,
1264           int tok,
1265           const char *ptr,
1266           const char *end,
1267           const ENCODING *enc)
1268 {
1269   switch (tok) {
1270   case XML_TOK_PROLOG_S:
1271     return state->role_none;
1272   case XML_TOK_DECL_CLOSE:
1273     setTopLevel(state);
1274     return state->role_none;
1275   }
1276   return common(state, tok);
1277 }
1278 
1279 static int FASTCALL
1280 error(PROLOG_STATE *state,
1281       int tok,
1282       const char *ptr,
1283       const char *end,
1284       const ENCODING *enc)
1285 {
1286   return XML_ROLE_NONE;
1287 }
1288 
1289 static int FASTCALL
1290 common(PROLOG_STATE *state, int tok)
1291 {
1292 #ifdef XML_DTD
1293   if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1294     return XML_ROLE_INNER_PARAM_ENTITY_REF;
1295 #endif
1296   state->handler = error;
1297   return XML_ROLE_ERROR;
1298 }
1299 
1300 void
1301 XmlPrologStateInit(PROLOG_STATE *state)
1302 {
1303   state->handler = prolog0;
1304 #ifdef XML_DTD
1305   state->documentEntity = 1;
1306   state->includeLevel = 0;
1307   state->inEntityValue = 0;
1308 #endif /* XML_DTD */
1309 }
1310 
1311 #ifdef XML_DTD
1312 
1313 void
1314 XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1315 {
1316   state->handler = externalSubset0;
1317   state->documentEntity = 0;
1318   state->includeLevel = 0;
1319 }
1320 
1321 #endif /* XML_DTD */
1322