xref: /freebsd/contrib/expat/lib/xmltok_impl.c (revision 4543ef516683042d46f3bd3bb8a4f3f746e00499)
1cc68614dSXin LI /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
20a48773fSEric van Gyzen                             __  __            _
30a48773fSEric van Gyzen                          ___\ \/ /_ __   __ _| |_
40a48773fSEric van Gyzen                         / _ \\  /| '_ \ / _` | __|
50a48773fSEric van Gyzen                        |  __//  \| |_) | (_| | |_
60a48773fSEric van Gyzen                         \___/_/\_\ .__/ \__,_|\__|
70a48773fSEric van Gyzen                                  |_| XML parser
80a48773fSEric van Gyzen 
90a48773fSEric van Gyzen    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10cc68614dSXin LI    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11cc68614dSXin LI    Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12cc68614dSXin LI    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
13ac69e5d4SEric van Gyzen    Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
14cc68614dSXin LI    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
15cc68614dSXin LI    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
16cc68614dSXin LI    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
17cc68614dSXin LI    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
18cc68614dSXin LI    Copyright (c) 2020      Boris Kolpackov <boris@codesynthesis.com>
19*71f0c44aSXin LI    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
200a48773fSEric van Gyzen    Licensed under the MIT license:
210a48773fSEric van Gyzen 
220a48773fSEric van Gyzen    Permission is  hereby granted,  free of charge,  to any  person obtaining
230a48773fSEric van Gyzen    a  copy  of  this  software   and  associated  documentation  files  (the
240a48773fSEric van Gyzen    "Software"),  to  deal in  the  Software  without restriction,  including
250a48773fSEric van Gyzen    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
260a48773fSEric van Gyzen    distribute, sublicense, and/or sell copies of the Software, and to permit
270a48773fSEric van Gyzen    persons  to whom  the Software  is  furnished to  do so,  subject to  the
280a48773fSEric van Gyzen    following conditions:
290a48773fSEric van Gyzen 
300a48773fSEric van Gyzen    The above copyright  notice and this permission notice  shall be included
310a48773fSEric van Gyzen    in all copies or substantial portions of the Software.
320a48773fSEric van Gyzen 
330a48773fSEric van Gyzen    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
340a48773fSEric van Gyzen    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
350a48773fSEric van Gyzen    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
360a48773fSEric van Gyzen    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
370a48773fSEric van Gyzen    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
380a48773fSEric van Gyzen    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
390a48773fSEric van Gyzen    USE OR OTHER DEALINGS IN THE SOFTWARE.
405bb6a25fSPoul-Henning Kamp */
415bb6a25fSPoul-Henning Kamp 
42220ed979SColeman Kane #ifdef XML_TOK_IMPL_C
43220ed979SColeman Kane 
44cc68614dSXin LI #  ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
455bb6a25fSPoul-Henning Kamp #    define IS_INVALID_CHAR(enc, ptr, n) (0)
465bb6a25fSPoul-Henning Kamp #  endif
475bb6a25fSPoul-Henning Kamp 
485bb6a25fSPoul-Henning Kamp #  define INVALID_LEAD_CASE(n, ptr, nextTokPtr)                                \
495bb6a25fSPoul-Henning Kamp   case BT_LEAD##n:                                                             \
505bb6a25fSPoul-Henning Kamp     if (end - ptr < n)                                                         \
515bb6a25fSPoul-Henning Kamp       return XML_TOK_PARTIAL_CHAR;                                             \
525bb6a25fSPoul-Henning Kamp     if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
535bb6a25fSPoul-Henning Kamp       *(nextTokPtr) = (ptr);                                                   \
545bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;                                                  \
555bb6a25fSPoul-Henning Kamp     }                                                                          \
565bb6a25fSPoul-Henning Kamp     ptr += n;                                                                  \
575bb6a25fSPoul-Henning Kamp     break;
585bb6a25fSPoul-Henning Kamp 
595bb6a25fSPoul-Henning Kamp #  define INVALID_CASES(ptr, nextTokPtr)                                       \
605bb6a25fSPoul-Henning Kamp     INVALID_LEAD_CASE(2, ptr, nextTokPtr)                                      \
615bb6a25fSPoul-Henning Kamp     INVALID_LEAD_CASE(3, ptr, nextTokPtr)                                      \
625bb6a25fSPoul-Henning Kamp     INVALID_LEAD_CASE(4, ptr, nextTokPtr)                                      \
635bb6a25fSPoul-Henning Kamp   case BT_NONXML:                                                              \
645bb6a25fSPoul-Henning Kamp   case BT_MALFORM:                                                             \
655bb6a25fSPoul-Henning Kamp   case BT_TRAIL:                                                               \
665bb6a25fSPoul-Henning Kamp     *(nextTokPtr) = (ptr);                                                     \
675bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
685bb6a25fSPoul-Henning Kamp 
695bb6a25fSPoul-Henning Kamp #  define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr)                        \
705bb6a25fSPoul-Henning Kamp   case BT_LEAD##n:                                                             \
715bb6a25fSPoul-Henning Kamp     if (end - ptr < n)                                                         \
725bb6a25fSPoul-Henning Kamp       return XML_TOK_PARTIAL_CHAR;                                             \
73ac69e5d4SEric van Gyzen     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
745bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;                                                       \
755bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;                                                  \
765bb6a25fSPoul-Henning Kamp     }                                                                          \
775bb6a25fSPoul-Henning Kamp     ptr += n;                                                                  \
785bb6a25fSPoul-Henning Kamp     break;
795bb6a25fSPoul-Henning Kamp 
805bb6a25fSPoul-Henning Kamp #  define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)                          \
815bb6a25fSPoul-Henning Kamp   case BT_NONASCII:                                                            \
825bb6a25fSPoul-Henning Kamp     if (! IS_NAME_CHAR_MINBPC(enc, ptr)) {                                     \
835bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;                                                       \
845bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;                                                  \
855bb6a25fSPoul-Henning Kamp     }                                                                          \
860a48773fSEric van Gyzen     /* fall through */                                                         \
875bb6a25fSPoul-Henning Kamp   case BT_NMSTRT:                                                              \
885bb6a25fSPoul-Henning Kamp   case BT_HEX:                                                                 \
895bb6a25fSPoul-Henning Kamp   case BT_DIGIT:                                                               \
905bb6a25fSPoul-Henning Kamp   case BT_NAME:                                                                \
915bb6a25fSPoul-Henning Kamp   case BT_MINUS:                                                               \
925bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);                                                        \
935bb6a25fSPoul-Henning Kamp     break;                                                                     \
945bb6a25fSPoul-Henning Kamp     CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr)                              \
955bb6a25fSPoul-Henning Kamp     CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr)                              \
965bb6a25fSPoul-Henning Kamp     CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
975bb6a25fSPoul-Henning Kamp 
985bb6a25fSPoul-Henning Kamp #  define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr)                      \
995bb6a25fSPoul-Henning Kamp   case BT_LEAD##n:                                                             \
100*71f0c44aSXin LI     if ((end) - (ptr) < (n))                                                   \
1015bb6a25fSPoul-Henning Kamp       return XML_TOK_PARTIAL_CHAR;                                             \
102ac69e5d4SEric van Gyzen     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
1035bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;                                                       \
1045bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;                                                  \
1055bb6a25fSPoul-Henning Kamp     }                                                                          \
1065bb6a25fSPoul-Henning Kamp     ptr += n;                                                                  \
1075bb6a25fSPoul-Henning Kamp     break;
1085bb6a25fSPoul-Henning Kamp 
1095bb6a25fSPoul-Henning Kamp #  define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)                        \
1105bb6a25fSPoul-Henning Kamp   case BT_NONASCII:                                                            \
1115bb6a25fSPoul-Henning Kamp     if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {                                   \
1125bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;                                                       \
1135bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;                                                  \
1145bb6a25fSPoul-Henning Kamp     }                                                                          \
1150a48773fSEric van Gyzen     /* fall through */                                                         \
1165bb6a25fSPoul-Henning Kamp   case BT_NMSTRT:                                                              \
1175bb6a25fSPoul-Henning Kamp   case BT_HEX:                                                                 \
1185bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);                                                        \
1195bb6a25fSPoul-Henning Kamp     break;                                                                     \
1205bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr)                            \
1215bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr)                            \
1225bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
1235bb6a25fSPoul-Henning Kamp 
1245bb6a25fSPoul-Henning Kamp #  ifndef PREFIX
1255bb6a25fSPoul-Henning Kamp #    define PREFIX(ident) ident
1265bb6a25fSPoul-Henning Kamp #  endif
1275bb6a25fSPoul-Henning Kamp 
128*71f0c44aSXin LI #  define HAS_CHARS(enc, ptr, end, count)                                      \
129*71f0c44aSXin LI     ((end) - (ptr) >= ((count) * MINBPC(enc)))
130be8aff81SXin LI 
1316b2c1e49SXin LI #  define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
132be8aff81SXin LI 
133be8aff81SXin LI #  define REQUIRE_CHARS(enc, ptr, end, count)                                  \
134be8aff81SXin LI     {                                                                          \
135be8aff81SXin LI       if (! HAS_CHARS(enc, ptr, end, count)) {                                 \
136be8aff81SXin LI         return XML_TOK_PARTIAL;                                                \
137be8aff81SXin LI       }                                                                        \
138be8aff81SXin LI     }
139be8aff81SXin LI 
1406b2c1e49SXin LI #  define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
141be8aff81SXin LI 
1425bb6a25fSPoul-Henning Kamp /* ptr points to character following "<!-" */
1435bb6a25fSPoul-Henning Kamp 
144220ed979SColeman Kane static int PTRCALL
PREFIX(scanComment)1456b2c1e49SXin LI PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
1466b2c1e49SXin LI                     const char **nextTokPtr) {
147be8aff81SXin LI   if (HAS_CHAR(enc, ptr, end)) {
1485bb6a25fSPoul-Henning Kamp     if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
1495bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
1505bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
1515bb6a25fSPoul-Henning Kamp     }
1525bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
153be8aff81SXin LI     while (HAS_CHAR(enc, ptr, end)) {
1545bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
1555bb6a25fSPoul-Henning Kamp         INVALID_CASES(ptr, nextTokPtr)
1565bb6a25fSPoul-Henning Kamp       case BT_MINUS:
157be8aff81SXin LI         ptr += MINBPC(enc);
158be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
1595bb6a25fSPoul-Henning Kamp         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160be8aff81SXin LI           ptr += MINBPC(enc);
161be8aff81SXin LI           REQUIRE_CHAR(enc, ptr, end);
1625bb6a25fSPoul-Henning Kamp           if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1635bb6a25fSPoul-Henning Kamp             *nextTokPtr = ptr;
1645bb6a25fSPoul-Henning Kamp             return XML_TOK_INVALID;
1655bb6a25fSPoul-Henning Kamp           }
1665bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr + MINBPC(enc);
1675bb6a25fSPoul-Henning Kamp           return XML_TOK_COMMENT;
1685bb6a25fSPoul-Henning Kamp         }
1695bb6a25fSPoul-Henning Kamp         break;
1705bb6a25fSPoul-Henning Kamp       default:
1715bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
1725bb6a25fSPoul-Henning Kamp         break;
1735bb6a25fSPoul-Henning Kamp       }
1745bb6a25fSPoul-Henning Kamp     }
1755bb6a25fSPoul-Henning Kamp   }
1765bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
1775bb6a25fSPoul-Henning Kamp }
1785bb6a25fSPoul-Henning Kamp 
1795bb6a25fSPoul-Henning Kamp /* ptr points to character following "<!" */
1805bb6a25fSPoul-Henning Kamp 
181220ed979SColeman Kane static int PTRCALL
PREFIX(scanDecl)1826b2c1e49SXin LI PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
1836b2c1e49SXin LI                  const char **nextTokPtr) {
184be8aff81SXin LI   REQUIRE_CHAR(enc, ptr, end);
1855bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
1865bb6a25fSPoul-Henning Kamp   case BT_MINUS:
1875bb6a25fSPoul-Henning Kamp     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1885bb6a25fSPoul-Henning Kamp   case BT_LSQB:
1895bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
1905bb6a25fSPoul-Henning Kamp     return XML_TOK_COND_SECT_OPEN;
1915bb6a25fSPoul-Henning Kamp   case BT_NMSTRT:
1925bb6a25fSPoul-Henning Kamp   case BT_HEX:
1935bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
1945bb6a25fSPoul-Henning Kamp     break;
1955bb6a25fSPoul-Henning Kamp   default:
1965bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
1975bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
1985bb6a25fSPoul-Henning Kamp   }
199be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
2005bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
2015bb6a25fSPoul-Henning Kamp     case BT_PERCNT:
202be8aff81SXin LI       REQUIRE_CHARS(enc, ptr, end, 2);
2035bb6a25fSPoul-Henning Kamp       /* don't allow <!ENTITY% foo "whatever"> */
2045bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
2056b2c1e49SXin LI       case BT_S:
2066b2c1e49SXin LI       case BT_CR:
2076b2c1e49SXin LI       case BT_LF:
2086b2c1e49SXin LI       case BT_PERCNT:
2095bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
2105bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
2115bb6a25fSPoul-Henning Kamp       }
2125bb6a25fSPoul-Henning Kamp       /* fall through */
2136b2c1e49SXin LI     case BT_S:
2146b2c1e49SXin LI     case BT_CR:
2156b2c1e49SXin LI     case BT_LF:
2165bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
2175bb6a25fSPoul-Henning Kamp       return XML_TOK_DECL_OPEN;
2185bb6a25fSPoul-Henning Kamp     case BT_NMSTRT:
2195bb6a25fSPoul-Henning Kamp     case BT_HEX:
2205bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
2215bb6a25fSPoul-Henning Kamp       break;
2225bb6a25fSPoul-Henning Kamp     default:
2235bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
2245bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
2255bb6a25fSPoul-Henning Kamp     }
2265bb6a25fSPoul-Henning Kamp   }
2275bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
2285bb6a25fSPoul-Henning Kamp }
2295bb6a25fSPoul-Henning Kamp 
230220ed979SColeman Kane static int PTRCALL
PREFIX(checkPiTarget)2316b2c1e49SXin LI PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
2326b2c1e49SXin LI                       int *tokPtr) {
2335bb6a25fSPoul-Henning Kamp   int upper = 0;
2346b2c1e49SXin LI   UNUSED_P(enc);
2355bb6a25fSPoul-Henning Kamp   *tokPtr = XML_TOK_PI;
2365bb6a25fSPoul-Henning Kamp   if (end - ptr != MINBPC(enc) * 3)
2375bb6a25fSPoul-Henning Kamp     return 1;
2385bb6a25fSPoul-Henning Kamp   switch (BYTE_TO_ASCII(enc, ptr)) {
2395bb6a25fSPoul-Henning Kamp   case ASCII_x:
2405bb6a25fSPoul-Henning Kamp     break;
2415bb6a25fSPoul-Henning Kamp   case ASCII_X:
2425bb6a25fSPoul-Henning Kamp     upper = 1;
2435bb6a25fSPoul-Henning Kamp     break;
2445bb6a25fSPoul-Henning Kamp   default:
2455bb6a25fSPoul-Henning Kamp     return 1;
2465bb6a25fSPoul-Henning Kamp   }
2475bb6a25fSPoul-Henning Kamp   ptr += MINBPC(enc);
2485bb6a25fSPoul-Henning Kamp   switch (BYTE_TO_ASCII(enc, ptr)) {
2495bb6a25fSPoul-Henning Kamp   case ASCII_m:
2505bb6a25fSPoul-Henning Kamp     break;
2515bb6a25fSPoul-Henning Kamp   case ASCII_M:
2525bb6a25fSPoul-Henning Kamp     upper = 1;
2535bb6a25fSPoul-Henning Kamp     break;
2545bb6a25fSPoul-Henning Kamp   default:
2555bb6a25fSPoul-Henning Kamp     return 1;
2565bb6a25fSPoul-Henning Kamp   }
2575bb6a25fSPoul-Henning Kamp   ptr += MINBPC(enc);
2585bb6a25fSPoul-Henning Kamp   switch (BYTE_TO_ASCII(enc, ptr)) {
2595bb6a25fSPoul-Henning Kamp   case ASCII_l:
2605bb6a25fSPoul-Henning Kamp     break;
2615bb6a25fSPoul-Henning Kamp   case ASCII_L:
2625bb6a25fSPoul-Henning Kamp     upper = 1;
2635bb6a25fSPoul-Henning Kamp     break;
2645bb6a25fSPoul-Henning Kamp   default:
2655bb6a25fSPoul-Henning Kamp     return 1;
2665bb6a25fSPoul-Henning Kamp   }
2675bb6a25fSPoul-Henning Kamp   if (upper)
2685bb6a25fSPoul-Henning Kamp     return 0;
2695bb6a25fSPoul-Henning Kamp   *tokPtr = XML_TOK_XML_DECL;
2705bb6a25fSPoul-Henning Kamp   return 1;
2715bb6a25fSPoul-Henning Kamp }
2725bb6a25fSPoul-Henning Kamp 
2735bb6a25fSPoul-Henning Kamp /* ptr points to character following "<?" */
2745bb6a25fSPoul-Henning Kamp 
275220ed979SColeman Kane static int PTRCALL
PREFIX(scanPi)2766b2c1e49SXin LI PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
2776b2c1e49SXin LI                const char **nextTokPtr) {
2785bb6a25fSPoul-Henning Kamp   int tok;
2795bb6a25fSPoul-Henning Kamp   const char *target = ptr;
280be8aff81SXin LI   REQUIRE_CHAR(enc, ptr, end);
2815bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
2825bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
2835bb6a25fSPoul-Henning Kamp   default:
2845bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
2855bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
2865bb6a25fSPoul-Henning Kamp   }
287be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
2885bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
2895bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
2906b2c1e49SXin LI     case BT_S:
2916b2c1e49SXin LI     case BT_CR:
2926b2c1e49SXin LI     case BT_LF:
2935bb6a25fSPoul-Henning Kamp       if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
2945bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
2955bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
2965bb6a25fSPoul-Henning Kamp       }
2975bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
298be8aff81SXin LI       while (HAS_CHAR(enc, ptr, end)) {
2995bb6a25fSPoul-Henning Kamp         switch (BYTE_TYPE(enc, ptr)) {
3005bb6a25fSPoul-Henning Kamp           INVALID_CASES(ptr, nextTokPtr)
3015bb6a25fSPoul-Henning Kamp         case BT_QUEST:
3025bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
303be8aff81SXin LI           REQUIRE_CHAR(enc, ptr, end);
3045bb6a25fSPoul-Henning Kamp           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
3055bb6a25fSPoul-Henning Kamp             *nextTokPtr = ptr + MINBPC(enc);
3065bb6a25fSPoul-Henning Kamp             return tok;
3075bb6a25fSPoul-Henning Kamp           }
3085bb6a25fSPoul-Henning Kamp           break;
3095bb6a25fSPoul-Henning Kamp         default:
3105bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
3115bb6a25fSPoul-Henning Kamp           break;
3125bb6a25fSPoul-Henning Kamp         }
3135bb6a25fSPoul-Henning Kamp       }
3145bb6a25fSPoul-Henning Kamp       return XML_TOK_PARTIAL;
3155bb6a25fSPoul-Henning Kamp     case BT_QUEST:
3165bb6a25fSPoul-Henning Kamp       if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
3175bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
3185bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
3195bb6a25fSPoul-Henning Kamp       }
3205bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
321be8aff81SXin LI       REQUIRE_CHAR(enc, ptr, end);
3225bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
3235bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr + MINBPC(enc);
3245bb6a25fSPoul-Henning Kamp         return tok;
3255bb6a25fSPoul-Henning Kamp       }
3265bb6a25fSPoul-Henning Kamp       /* fall through */
3275bb6a25fSPoul-Henning Kamp     default:
3285bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
3295bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
3305bb6a25fSPoul-Henning Kamp     }
3315bb6a25fSPoul-Henning Kamp   }
3325bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
3335bb6a25fSPoul-Henning Kamp }
3345bb6a25fSPoul-Henning Kamp 
335220ed979SColeman Kane static int PTRCALL
PREFIX(scanCdataSection)3366b2c1e49SXin LI PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
3376b2c1e49SXin LI                          const char **nextTokPtr) {
3386b2c1e49SXin LI   static const char CDATA_LSQB[]
3396b2c1e49SXin LI       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
3405bb6a25fSPoul-Henning Kamp   int i;
3416b2c1e49SXin LI   UNUSED_P(enc);
3425bb6a25fSPoul-Henning Kamp   /* CDATA[ */
343be8aff81SXin LI   REQUIRE_CHARS(enc, ptr, end, 6);
3445bb6a25fSPoul-Henning Kamp   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
3455bb6a25fSPoul-Henning Kamp     if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
3465bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
3475bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
3485bb6a25fSPoul-Henning Kamp     }
3495bb6a25fSPoul-Henning Kamp   }
3505bb6a25fSPoul-Henning Kamp   *nextTokPtr = ptr;
3515bb6a25fSPoul-Henning Kamp   return XML_TOK_CDATA_SECT_OPEN;
3525bb6a25fSPoul-Henning Kamp }
3535bb6a25fSPoul-Henning Kamp 
354220ed979SColeman Kane static int PTRCALL
PREFIX(cdataSectionTok)3556b2c1e49SXin LI PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
3566b2c1e49SXin LI                         const char **nextTokPtr) {
357be8aff81SXin LI   if (ptr >= end)
3585bb6a25fSPoul-Henning Kamp     return XML_TOK_NONE;
3595bb6a25fSPoul-Henning Kamp   if (MINBPC(enc) > 1) {
3605bb6a25fSPoul-Henning Kamp     size_t n = end - ptr;
3615bb6a25fSPoul-Henning Kamp     if (n & (MINBPC(enc) - 1)) {
3625bb6a25fSPoul-Henning Kamp       n &= ~(MINBPC(enc) - 1);
3635bb6a25fSPoul-Henning Kamp       if (n == 0)
3645bb6a25fSPoul-Henning Kamp         return XML_TOK_PARTIAL;
3655bb6a25fSPoul-Henning Kamp       end = ptr + n;
3665bb6a25fSPoul-Henning Kamp     }
3675bb6a25fSPoul-Henning Kamp   }
3685bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
3695bb6a25fSPoul-Henning Kamp   case BT_RSQB:
3705bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
371be8aff81SXin LI     REQUIRE_CHAR(enc, ptr, end);
3725bb6a25fSPoul-Henning Kamp     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
3735bb6a25fSPoul-Henning Kamp       break;
3745bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
375be8aff81SXin LI     REQUIRE_CHAR(enc, ptr, end);
3765bb6a25fSPoul-Henning Kamp     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
3775bb6a25fSPoul-Henning Kamp       ptr -= MINBPC(enc);
3785bb6a25fSPoul-Henning Kamp       break;
3795bb6a25fSPoul-Henning Kamp     }
3805bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
3815bb6a25fSPoul-Henning Kamp     return XML_TOK_CDATA_SECT_CLOSE;
3825bb6a25fSPoul-Henning Kamp   case BT_CR:
3835bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
384be8aff81SXin LI     REQUIRE_CHAR(enc, ptr, end);
3855bb6a25fSPoul-Henning Kamp     if (BYTE_TYPE(enc, ptr) == BT_LF)
3865bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
3875bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
3885bb6a25fSPoul-Henning Kamp     return XML_TOK_DATA_NEWLINE;
3895bb6a25fSPoul-Henning Kamp   case BT_LF:
3905bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
3915bb6a25fSPoul-Henning Kamp     return XML_TOK_DATA_NEWLINE;
3925bb6a25fSPoul-Henning Kamp     INVALID_CASES(ptr, nextTokPtr)
3935bb6a25fSPoul-Henning Kamp   default:
3945bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
3955bb6a25fSPoul-Henning Kamp     break;
3965bb6a25fSPoul-Henning Kamp   }
397be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
3985bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
3995bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
4005bb6a25fSPoul-Henning Kamp   case BT_LEAD##n:                                                             \
4015bb6a25fSPoul-Henning Kamp     if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
4025bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;                                                       \
4035bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;                                               \
4045bb6a25fSPoul-Henning Kamp     }                                                                          \
4055bb6a25fSPoul-Henning Kamp     ptr += n;                                                                  \
4065bb6a25fSPoul-Henning Kamp     break;
4076b2c1e49SXin LI       LEAD_CASE(2)
4086b2c1e49SXin LI       LEAD_CASE(3)
4096b2c1e49SXin LI       LEAD_CASE(4)
4105bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
4115bb6a25fSPoul-Henning Kamp     case BT_NONXML:
4125bb6a25fSPoul-Henning Kamp     case BT_MALFORM:
4135bb6a25fSPoul-Henning Kamp     case BT_TRAIL:
4145bb6a25fSPoul-Henning Kamp     case BT_CR:
4155bb6a25fSPoul-Henning Kamp     case BT_LF:
4165bb6a25fSPoul-Henning Kamp     case BT_RSQB:
4175bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
4185bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
4195bb6a25fSPoul-Henning Kamp     default:
4205bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
4215bb6a25fSPoul-Henning Kamp       break;
4225bb6a25fSPoul-Henning Kamp     }
4235bb6a25fSPoul-Henning Kamp   }
4245bb6a25fSPoul-Henning Kamp   *nextTokPtr = ptr;
4255bb6a25fSPoul-Henning Kamp   return XML_TOK_DATA_CHARS;
4265bb6a25fSPoul-Henning Kamp }
4275bb6a25fSPoul-Henning Kamp 
4285bb6a25fSPoul-Henning Kamp /* ptr points to character following "</" */
4295bb6a25fSPoul-Henning Kamp 
430220ed979SColeman Kane static int PTRCALL
PREFIX(scanEndTag)4316b2c1e49SXin LI PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
4326b2c1e49SXin LI                    const char **nextTokPtr) {
433be8aff81SXin LI   REQUIRE_CHAR(enc, ptr, end);
4345bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
4355bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
4365bb6a25fSPoul-Henning Kamp   default:
4375bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
4385bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
4395bb6a25fSPoul-Henning Kamp   }
440be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
4415bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
4425bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
4436b2c1e49SXin LI     case BT_S:
4446b2c1e49SXin LI     case BT_CR:
4456b2c1e49SXin LI     case BT_LF:
446be8aff81SXin LI       for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
4475bb6a25fSPoul-Henning Kamp         switch (BYTE_TYPE(enc, ptr)) {
4486b2c1e49SXin LI         case BT_S:
4496b2c1e49SXin LI         case BT_CR:
4506b2c1e49SXin LI         case BT_LF:
4515bb6a25fSPoul-Henning Kamp           break;
4525bb6a25fSPoul-Henning Kamp         case BT_GT:
4535bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr + MINBPC(enc);
4545bb6a25fSPoul-Henning Kamp           return XML_TOK_END_TAG;
4555bb6a25fSPoul-Henning Kamp         default:
4565bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr;
4575bb6a25fSPoul-Henning Kamp           return XML_TOK_INVALID;
4585bb6a25fSPoul-Henning Kamp         }
4595bb6a25fSPoul-Henning Kamp       }
4605bb6a25fSPoul-Henning Kamp       return XML_TOK_PARTIAL;
4615bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
4625bb6a25fSPoul-Henning Kamp     case BT_COLON:
4635bb6a25fSPoul-Henning Kamp       /* no need to check qname syntax here,
4645bb6a25fSPoul-Henning Kamp          since end-tag must match exactly */
4655bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
4665bb6a25fSPoul-Henning Kamp       break;
4675bb6a25fSPoul-Henning Kamp #  endif
4685bb6a25fSPoul-Henning Kamp     case BT_GT:
4695bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
4705bb6a25fSPoul-Henning Kamp       return XML_TOK_END_TAG;
4715bb6a25fSPoul-Henning Kamp     default:
4725bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
4735bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
4745bb6a25fSPoul-Henning Kamp     }
4755bb6a25fSPoul-Henning Kamp   }
4765bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
4775bb6a25fSPoul-Henning Kamp }
4785bb6a25fSPoul-Henning Kamp 
4795bb6a25fSPoul-Henning Kamp /* ptr points to character following "&#X" */
4805bb6a25fSPoul-Henning Kamp 
481220ed979SColeman Kane static int PTRCALL
PREFIX(scanHexCharRef)4826b2c1e49SXin LI PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
4836b2c1e49SXin LI                        const char **nextTokPtr) {
484be8aff81SXin LI   if (HAS_CHAR(enc, ptr, end)) {
4855bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
4865bb6a25fSPoul-Henning Kamp     case BT_DIGIT:
4875bb6a25fSPoul-Henning Kamp     case BT_HEX:
4885bb6a25fSPoul-Henning Kamp       break;
4895bb6a25fSPoul-Henning Kamp     default:
4905bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
4915bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
4925bb6a25fSPoul-Henning Kamp     }
493be8aff81SXin LI     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
4945bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
4955bb6a25fSPoul-Henning Kamp       case BT_DIGIT:
4965bb6a25fSPoul-Henning Kamp       case BT_HEX:
4975bb6a25fSPoul-Henning Kamp         break;
4985bb6a25fSPoul-Henning Kamp       case BT_SEMI:
4995bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr + MINBPC(enc);
5005bb6a25fSPoul-Henning Kamp         return XML_TOK_CHAR_REF;
5015bb6a25fSPoul-Henning Kamp       default:
5025bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
5035bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
5045bb6a25fSPoul-Henning Kamp       }
5055bb6a25fSPoul-Henning Kamp     }
5065bb6a25fSPoul-Henning Kamp   }
5075bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
5085bb6a25fSPoul-Henning Kamp }
5095bb6a25fSPoul-Henning Kamp 
5105bb6a25fSPoul-Henning Kamp /* ptr points to character following "&#" */
5115bb6a25fSPoul-Henning Kamp 
512220ed979SColeman Kane static int PTRCALL
PREFIX(scanCharRef)5136b2c1e49SXin LI PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
5146b2c1e49SXin LI                     const char **nextTokPtr) {
515be8aff81SXin LI   if (HAS_CHAR(enc, ptr, end)) {
5165bb6a25fSPoul-Henning Kamp     if (CHAR_MATCHES(enc, ptr, ASCII_x))
5175bb6a25fSPoul-Henning Kamp       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
5185bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
5195bb6a25fSPoul-Henning Kamp     case BT_DIGIT:
5205bb6a25fSPoul-Henning Kamp       break;
5215bb6a25fSPoul-Henning Kamp     default:
5225bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
5235bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
5245bb6a25fSPoul-Henning Kamp     }
525be8aff81SXin LI     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
5265bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
5275bb6a25fSPoul-Henning Kamp       case BT_DIGIT:
5285bb6a25fSPoul-Henning Kamp         break;
5295bb6a25fSPoul-Henning Kamp       case BT_SEMI:
5305bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr + MINBPC(enc);
5315bb6a25fSPoul-Henning Kamp         return XML_TOK_CHAR_REF;
5325bb6a25fSPoul-Henning Kamp       default:
5335bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
5345bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
5355bb6a25fSPoul-Henning Kamp       }
5365bb6a25fSPoul-Henning Kamp     }
5375bb6a25fSPoul-Henning Kamp   }
5385bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
5395bb6a25fSPoul-Henning Kamp }
5405bb6a25fSPoul-Henning Kamp 
5415bb6a25fSPoul-Henning Kamp /* ptr points to character following "&" */
5425bb6a25fSPoul-Henning Kamp 
543220ed979SColeman Kane static int PTRCALL
PREFIX(scanRef)5445bb6a25fSPoul-Henning Kamp PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
5456b2c1e49SXin LI                 const char **nextTokPtr) {
546be8aff81SXin LI   REQUIRE_CHAR(enc, ptr, end);
5475bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
5485bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
5495bb6a25fSPoul-Henning Kamp   case BT_NUM:
5505bb6a25fSPoul-Henning Kamp     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
5515bb6a25fSPoul-Henning Kamp   default:
5525bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
5535bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
5545bb6a25fSPoul-Henning Kamp   }
555be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
5565bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
5575bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
5585bb6a25fSPoul-Henning Kamp     case BT_SEMI:
5595bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
5605bb6a25fSPoul-Henning Kamp       return XML_TOK_ENTITY_REF;
5615bb6a25fSPoul-Henning Kamp     default:
5625bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
5635bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
5645bb6a25fSPoul-Henning Kamp     }
5655bb6a25fSPoul-Henning Kamp   }
5665bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
5675bb6a25fSPoul-Henning Kamp }
5685bb6a25fSPoul-Henning Kamp 
5695bb6a25fSPoul-Henning Kamp /* ptr points to character following first character of attribute name */
5705bb6a25fSPoul-Henning Kamp 
571220ed979SColeman Kane static int PTRCALL
PREFIX(scanAtts)5725bb6a25fSPoul-Henning Kamp PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
5736b2c1e49SXin LI                  const char **nextTokPtr) {
5745bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
5755bb6a25fSPoul-Henning Kamp   int hadColon = 0;
5765bb6a25fSPoul-Henning Kamp #  endif
577be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
5785bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
5795bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
5805bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
5815bb6a25fSPoul-Henning Kamp     case BT_COLON:
5825bb6a25fSPoul-Henning Kamp       if (hadColon) {
5835bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
5845bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
5855bb6a25fSPoul-Henning Kamp       }
5865bb6a25fSPoul-Henning Kamp       hadColon = 1;
5875bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
588be8aff81SXin LI       REQUIRE_CHAR(enc, ptr, end);
5895bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
5905bb6a25fSPoul-Henning Kamp         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
5915bb6a25fSPoul-Henning Kamp       default:
5925bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
5935bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
5945bb6a25fSPoul-Henning Kamp       }
5955bb6a25fSPoul-Henning Kamp       break;
5965bb6a25fSPoul-Henning Kamp #  endif
5976b2c1e49SXin LI     case BT_S:
5986b2c1e49SXin LI     case BT_CR:
5996b2c1e49SXin LI     case BT_LF:
6005bb6a25fSPoul-Henning Kamp       for (;;) {
6015bb6a25fSPoul-Henning Kamp         int t;
6025bb6a25fSPoul-Henning Kamp 
6035bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
604be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
6055bb6a25fSPoul-Henning Kamp         t = BYTE_TYPE(enc, ptr);
6065bb6a25fSPoul-Henning Kamp         if (t == BT_EQUALS)
6075bb6a25fSPoul-Henning Kamp           break;
6085bb6a25fSPoul-Henning Kamp         switch (t) {
6095bb6a25fSPoul-Henning Kamp         case BT_S:
6105bb6a25fSPoul-Henning Kamp         case BT_LF:
6115bb6a25fSPoul-Henning Kamp         case BT_CR:
6125bb6a25fSPoul-Henning Kamp           break;
6135bb6a25fSPoul-Henning Kamp         default:
6145bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr;
6155bb6a25fSPoul-Henning Kamp           return XML_TOK_INVALID;
6165bb6a25fSPoul-Henning Kamp         }
6175bb6a25fSPoul-Henning Kamp       }
6185bb6a25fSPoul-Henning Kamp       /* fall through */
6196b2c1e49SXin LI     case BT_EQUALS: {
6205bb6a25fSPoul-Henning Kamp       int open;
6215bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
6225bb6a25fSPoul-Henning Kamp       hadColon = 0;
6235bb6a25fSPoul-Henning Kamp #  endif
6245bb6a25fSPoul-Henning Kamp       for (;;) {
6255bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
626be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
6275bb6a25fSPoul-Henning Kamp         open = BYTE_TYPE(enc, ptr);
6285bb6a25fSPoul-Henning Kamp         if (open == BT_QUOT || open == BT_APOS)
6295bb6a25fSPoul-Henning Kamp           break;
6305bb6a25fSPoul-Henning Kamp         switch (open) {
6315bb6a25fSPoul-Henning Kamp         case BT_S:
6325bb6a25fSPoul-Henning Kamp         case BT_LF:
6335bb6a25fSPoul-Henning Kamp         case BT_CR:
6345bb6a25fSPoul-Henning Kamp           break;
6355bb6a25fSPoul-Henning Kamp         default:
6365bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr;
6375bb6a25fSPoul-Henning Kamp           return XML_TOK_INVALID;
6385bb6a25fSPoul-Henning Kamp         }
6395bb6a25fSPoul-Henning Kamp       }
6405bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
6415bb6a25fSPoul-Henning Kamp       /* in attribute value */
6425bb6a25fSPoul-Henning Kamp       for (;;) {
6435bb6a25fSPoul-Henning Kamp         int t;
644be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
6455bb6a25fSPoul-Henning Kamp         t = BYTE_TYPE(enc, ptr);
6465bb6a25fSPoul-Henning Kamp         if (t == open)
6475bb6a25fSPoul-Henning Kamp           break;
6485bb6a25fSPoul-Henning Kamp         switch (t) {
6495bb6a25fSPoul-Henning Kamp           INVALID_CASES(ptr, nextTokPtr)
6506b2c1e49SXin LI         case BT_AMP: {
6515bb6a25fSPoul-Henning Kamp           int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
6525bb6a25fSPoul-Henning Kamp           if (tok <= 0) {
6535bb6a25fSPoul-Henning Kamp             if (tok == XML_TOK_INVALID)
6545bb6a25fSPoul-Henning Kamp               *nextTokPtr = ptr;
6555bb6a25fSPoul-Henning Kamp             return tok;
6565bb6a25fSPoul-Henning Kamp           }
6575bb6a25fSPoul-Henning Kamp           break;
6585bb6a25fSPoul-Henning Kamp         }
6595bb6a25fSPoul-Henning Kamp         case BT_LT:
6605bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr;
6615bb6a25fSPoul-Henning Kamp           return XML_TOK_INVALID;
6625bb6a25fSPoul-Henning Kamp         default:
6635bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
6645bb6a25fSPoul-Henning Kamp           break;
6655bb6a25fSPoul-Henning Kamp         }
6665bb6a25fSPoul-Henning Kamp       }
6675bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
668be8aff81SXin LI       REQUIRE_CHAR(enc, ptr, end);
6695bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
6705bb6a25fSPoul-Henning Kamp       case BT_S:
6715bb6a25fSPoul-Henning Kamp       case BT_CR:
6725bb6a25fSPoul-Henning Kamp       case BT_LF:
6735bb6a25fSPoul-Henning Kamp         break;
6745bb6a25fSPoul-Henning Kamp       case BT_SOL:
6755bb6a25fSPoul-Henning Kamp         goto sol;
6765bb6a25fSPoul-Henning Kamp       case BT_GT:
6775bb6a25fSPoul-Henning Kamp         goto gt;
6785bb6a25fSPoul-Henning Kamp       default:
6795bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
6805bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
6815bb6a25fSPoul-Henning Kamp       }
6825bb6a25fSPoul-Henning Kamp       /* ptr points to closing quote */
6835bb6a25fSPoul-Henning Kamp       for (;;) {
6845bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
685be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
6865bb6a25fSPoul-Henning Kamp         switch (BYTE_TYPE(enc, ptr)) {
6875bb6a25fSPoul-Henning Kamp           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
6886b2c1e49SXin LI         case BT_S:
6896b2c1e49SXin LI         case BT_CR:
6906b2c1e49SXin LI         case BT_LF:
6915bb6a25fSPoul-Henning Kamp           continue;
6925bb6a25fSPoul-Henning Kamp         case BT_GT:
6935bb6a25fSPoul-Henning Kamp         gt:
6945bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr + MINBPC(enc);
6955bb6a25fSPoul-Henning Kamp           return XML_TOK_START_TAG_WITH_ATTS;
6965bb6a25fSPoul-Henning Kamp         case BT_SOL:
6975bb6a25fSPoul-Henning Kamp         sol:
6985bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
699be8aff81SXin LI           REQUIRE_CHAR(enc, ptr, end);
7005bb6a25fSPoul-Henning Kamp           if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
7015bb6a25fSPoul-Henning Kamp             *nextTokPtr = ptr;
7025bb6a25fSPoul-Henning Kamp             return XML_TOK_INVALID;
7035bb6a25fSPoul-Henning Kamp           }
7045bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr + MINBPC(enc);
7055bb6a25fSPoul-Henning Kamp           return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
7065bb6a25fSPoul-Henning Kamp         default:
7075bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr;
7085bb6a25fSPoul-Henning Kamp           return XML_TOK_INVALID;
7095bb6a25fSPoul-Henning Kamp         }
7105bb6a25fSPoul-Henning Kamp         break;
7115bb6a25fSPoul-Henning Kamp       }
7125bb6a25fSPoul-Henning Kamp       break;
7135bb6a25fSPoul-Henning Kamp     }
7145bb6a25fSPoul-Henning Kamp     default:
7155bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
7165bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
7175bb6a25fSPoul-Henning Kamp     }
7185bb6a25fSPoul-Henning Kamp   }
7195bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
7205bb6a25fSPoul-Henning Kamp }
7215bb6a25fSPoul-Henning Kamp 
7225bb6a25fSPoul-Henning Kamp /* ptr points to character following "<" */
7235bb6a25fSPoul-Henning Kamp 
724220ed979SColeman Kane static int PTRCALL
PREFIX(scanLt)7255bb6a25fSPoul-Henning Kamp PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
7266b2c1e49SXin LI                const char **nextTokPtr) {
7275bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
7285bb6a25fSPoul-Henning Kamp   int hadColon;
7295bb6a25fSPoul-Henning Kamp #  endif
730be8aff81SXin LI   REQUIRE_CHAR(enc, ptr, end);
7315bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
7325bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
7335bb6a25fSPoul-Henning Kamp   case BT_EXCL:
734be8aff81SXin LI     ptr += MINBPC(enc);
735be8aff81SXin LI     REQUIRE_CHAR(enc, ptr, end);
7365bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
7375bb6a25fSPoul-Henning Kamp     case BT_MINUS:
7385bb6a25fSPoul-Henning Kamp       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7395bb6a25fSPoul-Henning Kamp     case BT_LSQB:
7406b2c1e49SXin LI       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7415bb6a25fSPoul-Henning Kamp     }
7425bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
7435bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
7445bb6a25fSPoul-Henning Kamp   case BT_QUEST:
7455bb6a25fSPoul-Henning Kamp     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7465bb6a25fSPoul-Henning Kamp   case BT_SOL:
7475bb6a25fSPoul-Henning Kamp     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7485bb6a25fSPoul-Henning Kamp   default:
7495bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
7505bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
7515bb6a25fSPoul-Henning Kamp   }
7525bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
7535bb6a25fSPoul-Henning Kamp   hadColon = 0;
7545bb6a25fSPoul-Henning Kamp #  endif
7555bb6a25fSPoul-Henning Kamp   /* we have a start-tag */
756be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
7575bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
7585bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
7595bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
7605bb6a25fSPoul-Henning Kamp     case BT_COLON:
7615bb6a25fSPoul-Henning Kamp       if (hadColon) {
7625bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
7635bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
7645bb6a25fSPoul-Henning Kamp       }
7655bb6a25fSPoul-Henning Kamp       hadColon = 1;
7665bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
767be8aff81SXin LI       REQUIRE_CHAR(enc, ptr, end);
7685bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
7695bb6a25fSPoul-Henning Kamp         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
7705bb6a25fSPoul-Henning Kamp       default:
7715bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
7725bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
7735bb6a25fSPoul-Henning Kamp       }
7745bb6a25fSPoul-Henning Kamp       break;
7755bb6a25fSPoul-Henning Kamp #  endif
7766b2c1e49SXin LI     case BT_S:
7776b2c1e49SXin LI     case BT_CR:
7786b2c1e49SXin LI     case BT_LF: {
7795bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
780be8aff81SXin LI       while (HAS_CHAR(enc, ptr, end)) {
7815bb6a25fSPoul-Henning Kamp         switch (BYTE_TYPE(enc, ptr)) {
7825bb6a25fSPoul-Henning Kamp           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
7835bb6a25fSPoul-Henning Kamp         case BT_GT:
7845bb6a25fSPoul-Henning Kamp           goto gt;
7855bb6a25fSPoul-Henning Kamp         case BT_SOL:
7865bb6a25fSPoul-Henning Kamp           goto sol;
7876b2c1e49SXin LI         case BT_S:
7886b2c1e49SXin LI         case BT_CR:
7896b2c1e49SXin LI         case BT_LF:
7905bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
7915bb6a25fSPoul-Henning Kamp           continue;
7925bb6a25fSPoul-Henning Kamp         default:
7935bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr;
7945bb6a25fSPoul-Henning Kamp           return XML_TOK_INVALID;
7955bb6a25fSPoul-Henning Kamp         }
7965bb6a25fSPoul-Henning Kamp         return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
7975bb6a25fSPoul-Henning Kamp       }
7985bb6a25fSPoul-Henning Kamp       return XML_TOK_PARTIAL;
7995bb6a25fSPoul-Henning Kamp     }
8005bb6a25fSPoul-Henning Kamp     case BT_GT:
8015bb6a25fSPoul-Henning Kamp     gt:
8025bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
8035bb6a25fSPoul-Henning Kamp       return XML_TOK_START_TAG_NO_ATTS;
8045bb6a25fSPoul-Henning Kamp     case BT_SOL:
8055bb6a25fSPoul-Henning Kamp     sol:
8065bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
807be8aff81SXin LI       REQUIRE_CHAR(enc, ptr, end);
8085bb6a25fSPoul-Henning Kamp       if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
8095bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
8105bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
8115bb6a25fSPoul-Henning Kamp       }
8125bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
8135bb6a25fSPoul-Henning Kamp       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
8145bb6a25fSPoul-Henning Kamp     default:
8155bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
8165bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
8175bb6a25fSPoul-Henning Kamp     }
8185bb6a25fSPoul-Henning Kamp   }
8195bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
8205bb6a25fSPoul-Henning Kamp }
8215bb6a25fSPoul-Henning Kamp 
822220ed979SColeman Kane static int PTRCALL
PREFIX(contentTok)8235bb6a25fSPoul-Henning Kamp PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
8246b2c1e49SXin LI                    const char **nextTokPtr) {
825be8aff81SXin LI   if (ptr >= end)
8265bb6a25fSPoul-Henning Kamp     return XML_TOK_NONE;
8275bb6a25fSPoul-Henning Kamp   if (MINBPC(enc) > 1) {
8285bb6a25fSPoul-Henning Kamp     size_t n = end - ptr;
8295bb6a25fSPoul-Henning Kamp     if (n & (MINBPC(enc) - 1)) {
8305bb6a25fSPoul-Henning Kamp       n &= ~(MINBPC(enc) - 1);
8315bb6a25fSPoul-Henning Kamp       if (n == 0)
8325bb6a25fSPoul-Henning Kamp         return XML_TOK_PARTIAL;
8335bb6a25fSPoul-Henning Kamp       end = ptr + n;
8345bb6a25fSPoul-Henning Kamp     }
8355bb6a25fSPoul-Henning Kamp   }
8365bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
8375bb6a25fSPoul-Henning Kamp   case BT_LT:
8385bb6a25fSPoul-Henning Kamp     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
8395bb6a25fSPoul-Henning Kamp   case BT_AMP:
8405bb6a25fSPoul-Henning Kamp     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
8415bb6a25fSPoul-Henning Kamp   case BT_CR:
8425bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
843be8aff81SXin LI     if (! HAS_CHAR(enc, ptr, end))
8445bb6a25fSPoul-Henning Kamp       return XML_TOK_TRAILING_CR;
8455bb6a25fSPoul-Henning Kamp     if (BYTE_TYPE(enc, ptr) == BT_LF)
8465bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
8475bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
8485bb6a25fSPoul-Henning Kamp     return XML_TOK_DATA_NEWLINE;
8495bb6a25fSPoul-Henning Kamp   case BT_LF:
8505bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
8515bb6a25fSPoul-Henning Kamp     return XML_TOK_DATA_NEWLINE;
8525bb6a25fSPoul-Henning Kamp   case BT_RSQB:
8535bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
854be8aff81SXin LI     if (! HAS_CHAR(enc, ptr, end))
8555bb6a25fSPoul-Henning Kamp       return XML_TOK_TRAILING_RSQB;
8565bb6a25fSPoul-Henning Kamp     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
8575bb6a25fSPoul-Henning Kamp       break;
8585bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
859be8aff81SXin LI     if (! HAS_CHAR(enc, ptr, end))
8605bb6a25fSPoul-Henning Kamp       return XML_TOK_TRAILING_RSQB;
8615bb6a25fSPoul-Henning Kamp     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
8625bb6a25fSPoul-Henning Kamp       ptr -= MINBPC(enc);
8635bb6a25fSPoul-Henning Kamp       break;
8645bb6a25fSPoul-Henning Kamp     }
8655bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
8665bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
8675bb6a25fSPoul-Henning Kamp     INVALID_CASES(ptr, nextTokPtr)
8685bb6a25fSPoul-Henning Kamp   default:
8695bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
8705bb6a25fSPoul-Henning Kamp     break;
8715bb6a25fSPoul-Henning Kamp   }
872be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
8735bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
8745bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
8755bb6a25fSPoul-Henning Kamp   case BT_LEAD##n:                                                             \
8765bb6a25fSPoul-Henning Kamp     if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
8775bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;                                                       \
8785bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;                                               \
8795bb6a25fSPoul-Henning Kamp     }                                                                          \
8805bb6a25fSPoul-Henning Kamp     ptr += n;                                                                  \
8815bb6a25fSPoul-Henning Kamp     break;
8826b2c1e49SXin LI       LEAD_CASE(2)
8836b2c1e49SXin LI       LEAD_CASE(3)
8846b2c1e49SXin LI       LEAD_CASE(4)
8855bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
8865bb6a25fSPoul-Henning Kamp     case BT_RSQB:
887be8aff81SXin LI       if (HAS_CHARS(enc, ptr, end, 2)) {
8885bb6a25fSPoul-Henning Kamp         if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
8895bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
8905bb6a25fSPoul-Henning Kamp           break;
8915bb6a25fSPoul-Henning Kamp         }
892be8aff81SXin LI         if (HAS_CHARS(enc, ptr, end, 3)) {
8935bb6a25fSPoul-Henning Kamp           if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
8945bb6a25fSPoul-Henning Kamp             ptr += MINBPC(enc);
8955bb6a25fSPoul-Henning Kamp             break;
8965bb6a25fSPoul-Henning Kamp           }
8975bb6a25fSPoul-Henning Kamp           *nextTokPtr = ptr + 2 * MINBPC(enc);
8985bb6a25fSPoul-Henning Kamp           return XML_TOK_INVALID;
8995bb6a25fSPoul-Henning Kamp         }
9005bb6a25fSPoul-Henning Kamp       }
9015bb6a25fSPoul-Henning Kamp       /* fall through */
9025bb6a25fSPoul-Henning Kamp     case BT_AMP:
9035bb6a25fSPoul-Henning Kamp     case BT_LT:
9045bb6a25fSPoul-Henning Kamp     case BT_NONXML:
9055bb6a25fSPoul-Henning Kamp     case BT_MALFORM:
9065bb6a25fSPoul-Henning Kamp     case BT_TRAIL:
9075bb6a25fSPoul-Henning Kamp     case BT_CR:
9085bb6a25fSPoul-Henning Kamp     case BT_LF:
9095bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
9105bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
9115bb6a25fSPoul-Henning Kamp     default:
9125bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
9135bb6a25fSPoul-Henning Kamp       break;
9145bb6a25fSPoul-Henning Kamp     }
9155bb6a25fSPoul-Henning Kamp   }
9165bb6a25fSPoul-Henning Kamp   *nextTokPtr = ptr;
9175bb6a25fSPoul-Henning Kamp   return XML_TOK_DATA_CHARS;
9185bb6a25fSPoul-Henning Kamp }
9195bb6a25fSPoul-Henning Kamp 
9205bb6a25fSPoul-Henning Kamp /* ptr points to character following "%" */
9215bb6a25fSPoul-Henning Kamp 
922220ed979SColeman Kane static int PTRCALL
PREFIX(scanPercent)9235bb6a25fSPoul-Henning Kamp PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
9246b2c1e49SXin LI                     const char **nextTokPtr) {
925be8aff81SXin LI   REQUIRE_CHAR(enc, ptr, end);
9265bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
9275bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
9286b2c1e49SXin LI   case BT_S:
9296b2c1e49SXin LI   case BT_LF:
9306b2c1e49SXin LI   case BT_CR:
9316b2c1e49SXin LI   case BT_PERCNT:
9325bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
9335bb6a25fSPoul-Henning Kamp     return XML_TOK_PERCENT;
9345bb6a25fSPoul-Henning Kamp   default:
9355bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
9365bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
9375bb6a25fSPoul-Henning Kamp   }
938be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
9395bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
9405bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
9415bb6a25fSPoul-Henning Kamp     case BT_SEMI:
9425bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
9435bb6a25fSPoul-Henning Kamp       return XML_TOK_PARAM_ENTITY_REF;
9445bb6a25fSPoul-Henning Kamp     default:
9455bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
9465bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
9475bb6a25fSPoul-Henning Kamp     }
9485bb6a25fSPoul-Henning Kamp   }
9495bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
9505bb6a25fSPoul-Henning Kamp }
9515bb6a25fSPoul-Henning Kamp 
952220ed979SColeman Kane static int PTRCALL
PREFIX(scanPoundName)9535bb6a25fSPoul-Henning Kamp PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
9546b2c1e49SXin LI                       const char **nextTokPtr) {
955be8aff81SXin LI   REQUIRE_CHAR(enc, ptr, end);
9565bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
9575bb6a25fSPoul-Henning Kamp     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
9585bb6a25fSPoul-Henning Kamp   default:
9595bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
9605bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
9615bb6a25fSPoul-Henning Kamp   }
962be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
9635bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
9645bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
9656b2c1e49SXin LI     case BT_CR:
9666b2c1e49SXin LI     case BT_LF:
9676b2c1e49SXin LI     case BT_S:
9686b2c1e49SXin LI     case BT_RPAR:
9696b2c1e49SXin LI     case BT_GT:
9706b2c1e49SXin LI     case BT_PERCNT:
9716b2c1e49SXin LI     case BT_VERBAR:
9725bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
9735bb6a25fSPoul-Henning Kamp       return XML_TOK_POUND_NAME;
9745bb6a25fSPoul-Henning Kamp     default:
9755bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
9765bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
9775bb6a25fSPoul-Henning Kamp     }
9785bb6a25fSPoul-Henning Kamp   }
9795bb6a25fSPoul-Henning Kamp   return -XML_TOK_POUND_NAME;
9805bb6a25fSPoul-Henning Kamp }
9815bb6a25fSPoul-Henning Kamp 
982220ed979SColeman Kane static int PTRCALL
PREFIX(scanLit)9836b2c1e49SXin LI PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end,
9846b2c1e49SXin LI                 const char **nextTokPtr) {
985be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
9865bb6a25fSPoul-Henning Kamp     int t = BYTE_TYPE(enc, ptr);
9875bb6a25fSPoul-Henning Kamp     switch (t) {
9885bb6a25fSPoul-Henning Kamp       INVALID_CASES(ptr, nextTokPtr)
9895bb6a25fSPoul-Henning Kamp     case BT_QUOT:
9905bb6a25fSPoul-Henning Kamp     case BT_APOS:
9915bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
9925bb6a25fSPoul-Henning Kamp       if (t != open)
9935bb6a25fSPoul-Henning Kamp         break;
994be8aff81SXin LI       if (! HAS_CHAR(enc, ptr, end))
9955bb6a25fSPoul-Henning Kamp         return -XML_TOK_LITERAL;
9965bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
9975bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
9986b2c1e49SXin LI       case BT_S:
9996b2c1e49SXin LI       case BT_CR:
10006b2c1e49SXin LI       case BT_LF:
10016b2c1e49SXin LI       case BT_GT:
10026b2c1e49SXin LI       case BT_PERCNT:
10036b2c1e49SXin LI       case BT_LSQB:
10045bb6a25fSPoul-Henning Kamp         return XML_TOK_LITERAL;
10055bb6a25fSPoul-Henning Kamp       default:
10065bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
10075bb6a25fSPoul-Henning Kamp       }
10085bb6a25fSPoul-Henning Kamp     default:
10095bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
10105bb6a25fSPoul-Henning Kamp       break;
10115bb6a25fSPoul-Henning Kamp     }
10125bb6a25fSPoul-Henning Kamp   }
10135bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
10145bb6a25fSPoul-Henning Kamp }
10155bb6a25fSPoul-Henning Kamp 
1016220ed979SColeman Kane static int PTRCALL
PREFIX(prologTok)10175bb6a25fSPoul-Henning Kamp PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
10186b2c1e49SXin LI                   const char **nextTokPtr) {
10195bb6a25fSPoul-Henning Kamp   int tok;
1020be8aff81SXin LI   if (ptr >= end)
10215bb6a25fSPoul-Henning Kamp     return XML_TOK_NONE;
10225bb6a25fSPoul-Henning Kamp   if (MINBPC(enc) > 1) {
10235bb6a25fSPoul-Henning Kamp     size_t n = end - ptr;
10245bb6a25fSPoul-Henning Kamp     if (n & (MINBPC(enc) - 1)) {
10255bb6a25fSPoul-Henning Kamp       n &= ~(MINBPC(enc) - 1);
10265bb6a25fSPoul-Henning Kamp       if (n == 0)
10275bb6a25fSPoul-Henning Kamp         return XML_TOK_PARTIAL;
10285bb6a25fSPoul-Henning Kamp       end = ptr + n;
10295bb6a25fSPoul-Henning Kamp     }
10305bb6a25fSPoul-Henning Kamp   }
10315bb6a25fSPoul-Henning Kamp   switch (BYTE_TYPE(enc, ptr)) {
10325bb6a25fSPoul-Henning Kamp   case BT_QUOT:
10335bb6a25fSPoul-Henning Kamp     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
10345bb6a25fSPoul-Henning Kamp   case BT_APOS:
10355bb6a25fSPoul-Henning Kamp     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
10366b2c1e49SXin LI   case BT_LT: {
10375bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
1038be8aff81SXin LI     REQUIRE_CHAR(enc, ptr, end);
10395bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
10405bb6a25fSPoul-Henning Kamp     case BT_EXCL:
10415bb6a25fSPoul-Henning Kamp       return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
10425bb6a25fSPoul-Henning Kamp     case BT_QUEST:
10435bb6a25fSPoul-Henning Kamp       return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
10445bb6a25fSPoul-Henning Kamp     case BT_NMSTRT:
10455bb6a25fSPoul-Henning Kamp     case BT_HEX:
10465bb6a25fSPoul-Henning Kamp     case BT_NONASCII:
10475bb6a25fSPoul-Henning Kamp     case BT_LEAD2:
10485bb6a25fSPoul-Henning Kamp     case BT_LEAD3:
10495bb6a25fSPoul-Henning Kamp     case BT_LEAD4:
10505bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr - MINBPC(enc);
10515bb6a25fSPoul-Henning Kamp       return XML_TOK_INSTANCE_START;
10525bb6a25fSPoul-Henning Kamp     }
10535bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
10545bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
10555bb6a25fSPoul-Henning Kamp   }
10565bb6a25fSPoul-Henning Kamp   case BT_CR:
10575bb6a25fSPoul-Henning Kamp     if (ptr + MINBPC(enc) == end) {
10585bb6a25fSPoul-Henning Kamp       *nextTokPtr = end;
10595bb6a25fSPoul-Henning Kamp       /* indicate that this might be part of a CR/LF pair */
10605bb6a25fSPoul-Henning Kamp       return -XML_TOK_PROLOG_S;
10615bb6a25fSPoul-Henning Kamp     }
10625bb6a25fSPoul-Henning Kamp     /* fall through */
10636b2c1e49SXin LI   case BT_S:
10646b2c1e49SXin LI   case BT_LF:
10655bb6a25fSPoul-Henning Kamp     for (;;) {
10665bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
1067be8aff81SXin LI       if (! HAS_CHAR(enc, ptr, end))
10685bb6a25fSPoul-Henning Kamp         break;
10695bb6a25fSPoul-Henning Kamp       switch (BYTE_TYPE(enc, ptr)) {
10706b2c1e49SXin LI       case BT_S:
10716b2c1e49SXin LI       case BT_LF:
10725bb6a25fSPoul-Henning Kamp         break;
10735bb6a25fSPoul-Henning Kamp       case BT_CR:
10745bb6a25fSPoul-Henning Kamp         /* don't split CR/LF pair */
10755bb6a25fSPoul-Henning Kamp         if (ptr + MINBPC(enc) != end)
10765bb6a25fSPoul-Henning Kamp           break;
10775bb6a25fSPoul-Henning Kamp         /* fall through */
10785bb6a25fSPoul-Henning Kamp       default:
10795bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
10805bb6a25fSPoul-Henning Kamp         return XML_TOK_PROLOG_S;
10815bb6a25fSPoul-Henning Kamp       }
10825bb6a25fSPoul-Henning Kamp     }
10835bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
10845bb6a25fSPoul-Henning Kamp     return XML_TOK_PROLOG_S;
10855bb6a25fSPoul-Henning Kamp   case BT_PERCNT:
10865bb6a25fSPoul-Henning Kamp     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
10875bb6a25fSPoul-Henning Kamp   case BT_COMMA:
10885bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
10895bb6a25fSPoul-Henning Kamp     return XML_TOK_COMMA;
10905bb6a25fSPoul-Henning Kamp   case BT_LSQB:
10915bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
10925bb6a25fSPoul-Henning Kamp     return XML_TOK_OPEN_BRACKET;
10935bb6a25fSPoul-Henning Kamp   case BT_RSQB:
10945bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
1095be8aff81SXin LI     if (! HAS_CHAR(enc, ptr, end))
10965bb6a25fSPoul-Henning Kamp       return -XML_TOK_CLOSE_BRACKET;
10975bb6a25fSPoul-Henning Kamp     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098be8aff81SXin LI       REQUIRE_CHARS(enc, ptr, end, 2);
10995bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
11005bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr + 2 * MINBPC(enc);
11015bb6a25fSPoul-Henning Kamp         return XML_TOK_COND_SECT_CLOSE;
11025bb6a25fSPoul-Henning Kamp       }
11035bb6a25fSPoul-Henning Kamp     }
11045bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
11055bb6a25fSPoul-Henning Kamp     return XML_TOK_CLOSE_BRACKET;
11065bb6a25fSPoul-Henning Kamp   case BT_LPAR:
11075bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
11085bb6a25fSPoul-Henning Kamp     return XML_TOK_OPEN_PAREN;
11095bb6a25fSPoul-Henning Kamp   case BT_RPAR:
11105bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
1111be8aff81SXin LI     if (! HAS_CHAR(enc, ptr, end))
11125bb6a25fSPoul-Henning Kamp       return -XML_TOK_CLOSE_PAREN;
11135bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
11145bb6a25fSPoul-Henning Kamp     case BT_AST:
11155bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
11165bb6a25fSPoul-Henning Kamp       return XML_TOK_CLOSE_PAREN_ASTERISK;
11175bb6a25fSPoul-Henning Kamp     case BT_QUEST:
11185bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
11195bb6a25fSPoul-Henning Kamp       return XML_TOK_CLOSE_PAREN_QUESTION;
11205bb6a25fSPoul-Henning Kamp     case BT_PLUS:
11215bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
11225bb6a25fSPoul-Henning Kamp       return XML_TOK_CLOSE_PAREN_PLUS;
11236b2c1e49SXin LI     case BT_CR:
11246b2c1e49SXin LI     case BT_LF:
11256b2c1e49SXin LI     case BT_S:
11266b2c1e49SXin LI     case BT_GT:
11276b2c1e49SXin LI     case BT_COMMA:
11286b2c1e49SXin LI     case BT_VERBAR:
11295bb6a25fSPoul-Henning Kamp     case BT_RPAR:
11305bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
11315bb6a25fSPoul-Henning Kamp       return XML_TOK_CLOSE_PAREN;
11325bb6a25fSPoul-Henning Kamp     }
11335bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
11345bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
11355bb6a25fSPoul-Henning Kamp   case BT_VERBAR:
11365bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
11375bb6a25fSPoul-Henning Kamp     return XML_TOK_OR;
11385bb6a25fSPoul-Henning Kamp   case BT_GT:
11395bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr + MINBPC(enc);
11405bb6a25fSPoul-Henning Kamp     return XML_TOK_DECL_CLOSE;
11415bb6a25fSPoul-Henning Kamp   case BT_NUM:
11425bb6a25fSPoul-Henning Kamp     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
11435bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
11445bb6a25fSPoul-Henning Kamp   case BT_LEAD##n:                                                             \
11455bb6a25fSPoul-Henning Kamp     if (end - ptr < n)                                                         \
11465bb6a25fSPoul-Henning Kamp       return XML_TOK_PARTIAL_CHAR;                                             \
1147ac69e5d4SEric van Gyzen     if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
1148ac69e5d4SEric van Gyzen       *nextTokPtr = ptr;                                                       \
1149ac69e5d4SEric van Gyzen       return XML_TOK_INVALID;                                                  \
1150ac69e5d4SEric van Gyzen     }                                                                          \
11515bb6a25fSPoul-Henning Kamp     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
11525bb6a25fSPoul-Henning Kamp       ptr += n;                                                                \
11535bb6a25fSPoul-Henning Kamp       tok = XML_TOK_NAME;                                                      \
11545bb6a25fSPoul-Henning Kamp       break;                                                                   \
11555bb6a25fSPoul-Henning Kamp     }                                                                          \
11565bb6a25fSPoul-Henning Kamp     if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
11575bb6a25fSPoul-Henning Kamp       ptr += n;                                                                \
11585bb6a25fSPoul-Henning Kamp       tok = XML_TOK_NMTOKEN;                                                   \
11595bb6a25fSPoul-Henning Kamp       break;                                                                   \
11605bb6a25fSPoul-Henning Kamp     }                                                                          \
11615bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;                                                         \
11625bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
11636b2c1e49SXin LI     LEAD_CASE(2)
11646b2c1e49SXin LI     LEAD_CASE(3)
11656b2c1e49SXin LI     LEAD_CASE(4)
11665bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
11675bb6a25fSPoul-Henning Kamp   case BT_NMSTRT:
11685bb6a25fSPoul-Henning Kamp   case BT_HEX:
11695bb6a25fSPoul-Henning Kamp     tok = XML_TOK_NAME;
11705bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
11715bb6a25fSPoul-Henning Kamp     break;
11725bb6a25fSPoul-Henning Kamp   case BT_DIGIT:
11735bb6a25fSPoul-Henning Kamp   case BT_NAME:
11745bb6a25fSPoul-Henning Kamp   case BT_MINUS:
11755bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
11765bb6a25fSPoul-Henning Kamp   case BT_COLON:
11775bb6a25fSPoul-Henning Kamp #  endif
11785bb6a25fSPoul-Henning Kamp     tok = XML_TOK_NMTOKEN;
11795bb6a25fSPoul-Henning Kamp     ptr += MINBPC(enc);
11805bb6a25fSPoul-Henning Kamp     break;
11815bb6a25fSPoul-Henning Kamp   case BT_NONASCII:
11825bb6a25fSPoul-Henning Kamp     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
11835bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
11845bb6a25fSPoul-Henning Kamp       tok = XML_TOK_NAME;
11855bb6a25fSPoul-Henning Kamp       break;
11865bb6a25fSPoul-Henning Kamp     }
11875bb6a25fSPoul-Henning Kamp     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
11885bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
11895bb6a25fSPoul-Henning Kamp       tok = XML_TOK_NMTOKEN;
11905bb6a25fSPoul-Henning Kamp       break;
11915bb6a25fSPoul-Henning Kamp     }
11925bb6a25fSPoul-Henning Kamp     /* fall through */
11935bb6a25fSPoul-Henning Kamp   default:
11945bb6a25fSPoul-Henning Kamp     *nextTokPtr = ptr;
11955bb6a25fSPoul-Henning Kamp     return XML_TOK_INVALID;
11965bb6a25fSPoul-Henning Kamp   }
1197be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
11985bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
11995bb6a25fSPoul-Henning Kamp       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
12006b2c1e49SXin LI     case BT_GT:
12016b2c1e49SXin LI     case BT_RPAR:
12026b2c1e49SXin LI     case BT_COMMA:
12036b2c1e49SXin LI     case BT_VERBAR:
12046b2c1e49SXin LI     case BT_LSQB:
12056b2c1e49SXin LI     case BT_PERCNT:
12066b2c1e49SXin LI     case BT_S:
12076b2c1e49SXin LI     case BT_CR:
12086b2c1e49SXin LI     case BT_LF:
12095bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
12105bb6a25fSPoul-Henning Kamp       return tok;
12115bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
12125bb6a25fSPoul-Henning Kamp     case BT_COLON:
12135bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
12145bb6a25fSPoul-Henning Kamp       switch (tok) {
12155bb6a25fSPoul-Henning Kamp       case XML_TOK_NAME:
1216be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
12175bb6a25fSPoul-Henning Kamp         tok = XML_TOK_PREFIXED_NAME;
12185bb6a25fSPoul-Henning Kamp         switch (BYTE_TYPE(enc, ptr)) {
12195bb6a25fSPoul-Henning Kamp           CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
12205bb6a25fSPoul-Henning Kamp         default:
12215bb6a25fSPoul-Henning Kamp           tok = XML_TOK_NMTOKEN;
12225bb6a25fSPoul-Henning Kamp           break;
12235bb6a25fSPoul-Henning Kamp         }
12245bb6a25fSPoul-Henning Kamp         break;
12255bb6a25fSPoul-Henning Kamp       case XML_TOK_PREFIXED_NAME:
12265bb6a25fSPoul-Henning Kamp         tok = XML_TOK_NMTOKEN;
12275bb6a25fSPoul-Henning Kamp         break;
12285bb6a25fSPoul-Henning Kamp       }
12295bb6a25fSPoul-Henning Kamp       break;
12305bb6a25fSPoul-Henning Kamp #  endif
12315bb6a25fSPoul-Henning Kamp     case BT_PLUS:
12325bb6a25fSPoul-Henning Kamp       if (tok == XML_TOK_NMTOKEN) {
12335bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
12345bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
12355bb6a25fSPoul-Henning Kamp       }
12365bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
12375bb6a25fSPoul-Henning Kamp       return XML_TOK_NAME_PLUS;
12385bb6a25fSPoul-Henning Kamp     case BT_AST:
12395bb6a25fSPoul-Henning Kamp       if (tok == XML_TOK_NMTOKEN) {
12405bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
12415bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
12425bb6a25fSPoul-Henning Kamp       }
12435bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
12445bb6a25fSPoul-Henning Kamp       return XML_TOK_NAME_ASTERISK;
12455bb6a25fSPoul-Henning Kamp     case BT_QUEST:
12465bb6a25fSPoul-Henning Kamp       if (tok == XML_TOK_NMTOKEN) {
12475bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
12485bb6a25fSPoul-Henning Kamp         return XML_TOK_INVALID;
12495bb6a25fSPoul-Henning Kamp       }
12505bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr + MINBPC(enc);
12515bb6a25fSPoul-Henning Kamp       return XML_TOK_NAME_QUESTION;
12525bb6a25fSPoul-Henning Kamp     default:
12535bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
12545bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
12555bb6a25fSPoul-Henning Kamp     }
12565bb6a25fSPoul-Henning Kamp   }
12575bb6a25fSPoul-Henning Kamp   return -tok;
12585bb6a25fSPoul-Henning Kamp }
12595bb6a25fSPoul-Henning Kamp 
1260220ed979SColeman Kane static int PTRCALL
PREFIX(attributeValueTok)12616b2c1e49SXin LI PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
12626b2c1e49SXin LI                           const char **nextTokPtr) {
12635bb6a25fSPoul-Henning Kamp   const char *start;
1264be8aff81SXin LI   if (ptr >= end)
12655bb6a25fSPoul-Henning Kamp     return XML_TOK_NONE;
12660a48773fSEric van Gyzen   else if (! HAS_CHAR(enc, ptr, end)) {
12670a48773fSEric van Gyzen     /* This line cannot be executed.  The incoming data has already
12680a48773fSEric van Gyzen      * been tokenized once, so incomplete characters like this have
12690a48773fSEric van Gyzen      * already been eliminated from the input.  Retaining the paranoia
12700a48773fSEric van Gyzen      * check is still valuable, however.
12710a48773fSEric van Gyzen      */
12720a48773fSEric van Gyzen     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
12730a48773fSEric van Gyzen   }
12745bb6a25fSPoul-Henning Kamp   start = ptr;
1275be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
12765bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
12775bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
12786b2c1e49SXin LI   case BT_LEAD##n:                                                             \
1279ac69e5d4SEric van Gyzen     ptr += n; /* NOTE: The encoding has already been validated. */             \
12806b2c1e49SXin LI     break;
12816b2c1e49SXin LI       LEAD_CASE(2)
12826b2c1e49SXin LI       LEAD_CASE(3)
12836b2c1e49SXin LI       LEAD_CASE(4)
12845bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
12855bb6a25fSPoul-Henning Kamp     case BT_AMP:
12865bb6a25fSPoul-Henning Kamp       if (ptr == start)
12875bb6a25fSPoul-Henning Kamp         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
12885bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
12895bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
12905bb6a25fSPoul-Henning Kamp     case BT_LT:
12915bb6a25fSPoul-Henning Kamp       /* this is for inside entity references */
12925bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
12935bb6a25fSPoul-Henning Kamp       return XML_TOK_INVALID;
12945bb6a25fSPoul-Henning Kamp     case BT_LF:
12955bb6a25fSPoul-Henning Kamp       if (ptr == start) {
12965bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr + MINBPC(enc);
12975bb6a25fSPoul-Henning Kamp         return XML_TOK_DATA_NEWLINE;
12985bb6a25fSPoul-Henning Kamp       }
12995bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
13005bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
13015bb6a25fSPoul-Henning Kamp     case BT_CR:
13025bb6a25fSPoul-Henning Kamp       if (ptr == start) {
13035bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
1304be8aff81SXin LI         if (! HAS_CHAR(enc, ptr, end))
13055bb6a25fSPoul-Henning Kamp           return XML_TOK_TRAILING_CR;
13065bb6a25fSPoul-Henning Kamp         if (BYTE_TYPE(enc, ptr) == BT_LF)
13075bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
13085bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
13095bb6a25fSPoul-Henning Kamp         return XML_TOK_DATA_NEWLINE;
13105bb6a25fSPoul-Henning Kamp       }
13115bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
13125bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
13135bb6a25fSPoul-Henning Kamp     case BT_S:
13145bb6a25fSPoul-Henning Kamp       if (ptr == start) {
13155bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr + MINBPC(enc);
13165bb6a25fSPoul-Henning Kamp         return XML_TOK_ATTRIBUTE_VALUE_S;
13175bb6a25fSPoul-Henning Kamp       }
13185bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
13195bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
13205bb6a25fSPoul-Henning Kamp     default:
13215bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
13225bb6a25fSPoul-Henning Kamp       break;
13235bb6a25fSPoul-Henning Kamp     }
13245bb6a25fSPoul-Henning Kamp   }
13255bb6a25fSPoul-Henning Kamp   *nextTokPtr = ptr;
13265bb6a25fSPoul-Henning Kamp   return XML_TOK_DATA_CHARS;
13275bb6a25fSPoul-Henning Kamp }
13285bb6a25fSPoul-Henning Kamp 
1329220ed979SColeman Kane static int PTRCALL
PREFIX(entityValueTok)13306b2c1e49SXin LI PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
13316b2c1e49SXin LI                        const char **nextTokPtr) {
13325bb6a25fSPoul-Henning Kamp   const char *start;
1333be8aff81SXin LI   if (ptr >= end)
13345bb6a25fSPoul-Henning Kamp     return XML_TOK_NONE;
13350a48773fSEric van Gyzen   else if (! HAS_CHAR(enc, ptr, end)) {
13360a48773fSEric van Gyzen     /* This line cannot be executed.  The incoming data has already
13370a48773fSEric van Gyzen      * been tokenized once, so incomplete characters like this have
13380a48773fSEric van Gyzen      * already been eliminated from the input.  Retaining the paranoia
13390a48773fSEric van Gyzen      * check is still valuable, however.
13400a48773fSEric van Gyzen      */
13410a48773fSEric van Gyzen     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
13420a48773fSEric van Gyzen   }
13435bb6a25fSPoul-Henning Kamp   start = ptr;
1344be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
13455bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
13465bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
13476b2c1e49SXin LI   case BT_LEAD##n:                                                             \
1348ac69e5d4SEric van Gyzen     ptr += n; /* NOTE: The encoding has already been validated. */             \
13496b2c1e49SXin LI     break;
13506b2c1e49SXin LI       LEAD_CASE(2)
13516b2c1e49SXin LI       LEAD_CASE(3)
13526b2c1e49SXin LI       LEAD_CASE(4)
13535bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
13545bb6a25fSPoul-Henning Kamp     case BT_AMP:
13555bb6a25fSPoul-Henning Kamp       if (ptr == start)
13565bb6a25fSPoul-Henning Kamp         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
13575bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
13585bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
13595bb6a25fSPoul-Henning Kamp     case BT_PERCNT:
13605bb6a25fSPoul-Henning Kamp       if (ptr == start) {
13616b2c1e49SXin LI         int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
13625bb6a25fSPoul-Henning Kamp         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
13635bb6a25fSPoul-Henning Kamp       }
13645bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
13655bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
13665bb6a25fSPoul-Henning Kamp     case BT_LF:
13675bb6a25fSPoul-Henning Kamp       if (ptr == start) {
13685bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr + MINBPC(enc);
13695bb6a25fSPoul-Henning Kamp         return XML_TOK_DATA_NEWLINE;
13705bb6a25fSPoul-Henning Kamp       }
13715bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
13725bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
13735bb6a25fSPoul-Henning Kamp     case BT_CR:
13745bb6a25fSPoul-Henning Kamp       if (ptr == start) {
13755bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
1376be8aff81SXin LI         if (! HAS_CHAR(enc, ptr, end))
13775bb6a25fSPoul-Henning Kamp           return XML_TOK_TRAILING_CR;
13785bb6a25fSPoul-Henning Kamp         if (BYTE_TYPE(enc, ptr) == BT_LF)
13795bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
13805bb6a25fSPoul-Henning Kamp         *nextTokPtr = ptr;
13815bb6a25fSPoul-Henning Kamp         return XML_TOK_DATA_NEWLINE;
13825bb6a25fSPoul-Henning Kamp       }
13835bb6a25fSPoul-Henning Kamp       *nextTokPtr = ptr;
13845bb6a25fSPoul-Henning Kamp       return XML_TOK_DATA_CHARS;
13855bb6a25fSPoul-Henning Kamp     default:
13865bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
13875bb6a25fSPoul-Henning Kamp       break;
13885bb6a25fSPoul-Henning Kamp     }
13895bb6a25fSPoul-Henning Kamp   }
13905bb6a25fSPoul-Henning Kamp   *nextTokPtr = ptr;
13915bb6a25fSPoul-Henning Kamp   return XML_TOK_DATA_CHARS;
13925bb6a25fSPoul-Henning Kamp }
13935bb6a25fSPoul-Henning Kamp 
13945bb6a25fSPoul-Henning Kamp #  ifdef XML_DTD
13955bb6a25fSPoul-Henning Kamp 
1396220ed979SColeman Kane static int PTRCALL
PREFIX(ignoreSectionTok)13976b2c1e49SXin LI PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
13986b2c1e49SXin LI                          const char **nextTokPtr) {
13995bb6a25fSPoul-Henning Kamp   int level = 0;
14005bb6a25fSPoul-Henning Kamp   if (MINBPC(enc) > 1) {
14015bb6a25fSPoul-Henning Kamp     size_t n = end - ptr;
14025bb6a25fSPoul-Henning Kamp     if (n & (MINBPC(enc) - 1)) {
14035bb6a25fSPoul-Henning Kamp       n &= ~(MINBPC(enc) - 1);
14045bb6a25fSPoul-Henning Kamp       end = ptr + n;
14055bb6a25fSPoul-Henning Kamp     }
14065bb6a25fSPoul-Henning Kamp   }
1407be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
14085bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
14095bb6a25fSPoul-Henning Kamp       INVALID_CASES(ptr, nextTokPtr)
14105bb6a25fSPoul-Henning Kamp     case BT_LT:
1411be8aff81SXin LI       ptr += MINBPC(enc);
1412be8aff81SXin LI       REQUIRE_CHAR(enc, ptr, end);
14135bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1414be8aff81SXin LI         ptr += MINBPC(enc);
1415be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
14165bb6a25fSPoul-Henning Kamp         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
14175bb6a25fSPoul-Henning Kamp           ++level;
14185bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
14195bb6a25fSPoul-Henning Kamp         }
14205bb6a25fSPoul-Henning Kamp       }
14215bb6a25fSPoul-Henning Kamp       break;
14225bb6a25fSPoul-Henning Kamp     case BT_RSQB:
1423be8aff81SXin LI       ptr += MINBPC(enc);
1424be8aff81SXin LI       REQUIRE_CHAR(enc, ptr, end);
14255bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1426be8aff81SXin LI         ptr += MINBPC(enc);
1427be8aff81SXin LI         REQUIRE_CHAR(enc, ptr, end);
14285bb6a25fSPoul-Henning Kamp         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
14295bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
14305bb6a25fSPoul-Henning Kamp           if (level == 0) {
14315bb6a25fSPoul-Henning Kamp             *nextTokPtr = ptr;
14325bb6a25fSPoul-Henning Kamp             return XML_TOK_IGNORE_SECT;
14335bb6a25fSPoul-Henning Kamp           }
14345bb6a25fSPoul-Henning Kamp           --level;
14355bb6a25fSPoul-Henning Kamp         }
14365bb6a25fSPoul-Henning Kamp       }
14375bb6a25fSPoul-Henning Kamp       break;
14385bb6a25fSPoul-Henning Kamp     default:
14395bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
14405bb6a25fSPoul-Henning Kamp       break;
14415bb6a25fSPoul-Henning Kamp     }
14425bb6a25fSPoul-Henning Kamp   }
14435bb6a25fSPoul-Henning Kamp   return XML_TOK_PARTIAL;
14445bb6a25fSPoul-Henning Kamp }
14455bb6a25fSPoul-Henning Kamp 
14465bb6a25fSPoul-Henning Kamp #  endif /* XML_DTD */
14475bb6a25fSPoul-Henning Kamp 
1448220ed979SColeman Kane static int PTRCALL
PREFIX(isPublicId)14495bb6a25fSPoul-Henning Kamp PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
14506b2c1e49SXin LI                    const char **badPtr) {
14515bb6a25fSPoul-Henning Kamp   ptr += MINBPC(enc);
14525bb6a25fSPoul-Henning Kamp   end -= MINBPC(enc);
1453be8aff81SXin LI   for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
14545bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
14555bb6a25fSPoul-Henning Kamp     case BT_DIGIT:
14565bb6a25fSPoul-Henning Kamp     case BT_HEX:
14575bb6a25fSPoul-Henning Kamp     case BT_MINUS:
14585bb6a25fSPoul-Henning Kamp     case BT_APOS:
14595bb6a25fSPoul-Henning Kamp     case BT_LPAR:
14605bb6a25fSPoul-Henning Kamp     case BT_RPAR:
14615bb6a25fSPoul-Henning Kamp     case BT_PLUS:
14625bb6a25fSPoul-Henning Kamp     case BT_COMMA:
14635bb6a25fSPoul-Henning Kamp     case BT_SOL:
14645bb6a25fSPoul-Henning Kamp     case BT_EQUALS:
14655bb6a25fSPoul-Henning Kamp     case BT_QUEST:
14665bb6a25fSPoul-Henning Kamp     case BT_CR:
14675bb6a25fSPoul-Henning Kamp     case BT_LF:
14685bb6a25fSPoul-Henning Kamp     case BT_SEMI:
14695bb6a25fSPoul-Henning Kamp     case BT_EXCL:
14705bb6a25fSPoul-Henning Kamp     case BT_AST:
14715bb6a25fSPoul-Henning Kamp     case BT_PERCNT:
14725bb6a25fSPoul-Henning Kamp     case BT_NUM:
14735bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
14745bb6a25fSPoul-Henning Kamp     case BT_COLON:
14755bb6a25fSPoul-Henning Kamp #  endif
14765bb6a25fSPoul-Henning Kamp       break;
14775bb6a25fSPoul-Henning Kamp     case BT_S:
14785bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
14795bb6a25fSPoul-Henning Kamp         *badPtr = ptr;
14805bb6a25fSPoul-Henning Kamp         return 0;
14815bb6a25fSPoul-Henning Kamp       }
14825bb6a25fSPoul-Henning Kamp       break;
14835bb6a25fSPoul-Henning Kamp     case BT_NAME:
14845bb6a25fSPoul-Henning Kamp     case BT_NMSTRT:
14855bb6a25fSPoul-Henning Kamp       if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
14865bb6a25fSPoul-Henning Kamp         break;
14870a48773fSEric van Gyzen       /* fall through */
14885bb6a25fSPoul-Henning Kamp     default:
14895bb6a25fSPoul-Henning Kamp       switch (BYTE_TO_ASCII(enc, ptr)) {
14905bb6a25fSPoul-Henning Kamp       case 0x24: /* $ */
14915bb6a25fSPoul-Henning Kamp       case 0x40: /* @ */
14925bb6a25fSPoul-Henning Kamp         break;
14935bb6a25fSPoul-Henning Kamp       default:
14945bb6a25fSPoul-Henning Kamp         *badPtr = ptr;
14955bb6a25fSPoul-Henning Kamp         return 0;
14965bb6a25fSPoul-Henning Kamp       }
14975bb6a25fSPoul-Henning Kamp       break;
14985bb6a25fSPoul-Henning Kamp     }
14995bb6a25fSPoul-Henning Kamp   }
15005bb6a25fSPoul-Henning Kamp   return 1;
15015bb6a25fSPoul-Henning Kamp }
15025bb6a25fSPoul-Henning Kamp 
15035bb6a25fSPoul-Henning Kamp /* This must only be called for a well-formed start-tag or empty
15045bb6a25fSPoul-Henning Kamp    element tag.  Returns the number of attributes.  Pointers to the
15055bb6a25fSPoul-Henning Kamp    first attsMax attributes are stored in atts.
15065bb6a25fSPoul-Henning Kamp */
15075bb6a25fSPoul-Henning Kamp 
1508220ed979SColeman Kane static int PTRCALL
PREFIX(getAtts)15096b2c1e49SXin LI PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
15106b2c1e49SXin LI                 ATTRIBUTE *atts) {
15115bb6a25fSPoul-Henning Kamp   enum { other, inName, inValue } state = inName;
15125bb6a25fSPoul-Henning Kamp   int nAtts = 0;
15135bb6a25fSPoul-Henning Kamp   int open = 0; /* defined when state == inValue;
15145bb6a25fSPoul-Henning Kamp                    initialization just to shut up compilers */
15155bb6a25fSPoul-Henning Kamp 
15165bb6a25fSPoul-Henning Kamp   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
15175bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
15185bb6a25fSPoul-Henning Kamp #  define START_NAME                                                           \
15195bb6a25fSPoul-Henning Kamp     if (state == other) {                                                      \
15205bb6a25fSPoul-Henning Kamp       if (nAtts < attsMax) {                                                   \
15215bb6a25fSPoul-Henning Kamp         atts[nAtts].name = ptr;                                                \
15225bb6a25fSPoul-Henning Kamp         atts[nAtts].normalized = 1;                                            \
15235bb6a25fSPoul-Henning Kamp       }                                                                        \
15245bb6a25fSPoul-Henning Kamp       state = inName;                                                          \
15255bb6a25fSPoul-Henning Kamp     }
15265bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
1527ac69e5d4SEric van Gyzen   case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
15286b2c1e49SXin LI     START_NAME ptr += (n - MINBPC(enc));                                       \
15296b2c1e49SXin LI     break;
15306b2c1e49SXin LI       LEAD_CASE(2)
15316b2c1e49SXin LI       LEAD_CASE(3)
15326b2c1e49SXin LI       LEAD_CASE(4)
15335bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
15345bb6a25fSPoul-Henning Kamp     case BT_NONASCII:
15355bb6a25fSPoul-Henning Kamp     case BT_NMSTRT:
15365bb6a25fSPoul-Henning Kamp     case BT_HEX:
15375bb6a25fSPoul-Henning Kamp       START_NAME
15385bb6a25fSPoul-Henning Kamp       break;
15395bb6a25fSPoul-Henning Kamp #  undef START_NAME
15405bb6a25fSPoul-Henning Kamp     case BT_QUOT:
15415bb6a25fSPoul-Henning Kamp       if (state != inValue) {
15425bb6a25fSPoul-Henning Kamp         if (nAtts < attsMax)
15435bb6a25fSPoul-Henning Kamp           atts[nAtts].valuePtr = ptr + MINBPC(enc);
15445bb6a25fSPoul-Henning Kamp         state = inValue;
15455bb6a25fSPoul-Henning Kamp         open = BT_QUOT;
15466b2c1e49SXin LI       } else if (open == BT_QUOT) {
15475bb6a25fSPoul-Henning Kamp         state = other;
15485bb6a25fSPoul-Henning Kamp         if (nAtts < attsMax)
15495bb6a25fSPoul-Henning Kamp           atts[nAtts].valueEnd = ptr;
15505bb6a25fSPoul-Henning Kamp         nAtts++;
15515bb6a25fSPoul-Henning Kamp       }
15525bb6a25fSPoul-Henning Kamp       break;
15535bb6a25fSPoul-Henning Kamp     case BT_APOS:
15545bb6a25fSPoul-Henning Kamp       if (state != inValue) {
15555bb6a25fSPoul-Henning Kamp         if (nAtts < attsMax)
15565bb6a25fSPoul-Henning Kamp           atts[nAtts].valuePtr = ptr + MINBPC(enc);
15575bb6a25fSPoul-Henning Kamp         state = inValue;
15585bb6a25fSPoul-Henning Kamp         open = BT_APOS;
15596b2c1e49SXin LI       } else if (open == BT_APOS) {
15605bb6a25fSPoul-Henning Kamp         state = other;
15615bb6a25fSPoul-Henning Kamp         if (nAtts < attsMax)
15625bb6a25fSPoul-Henning Kamp           atts[nAtts].valueEnd = ptr;
15635bb6a25fSPoul-Henning Kamp         nAtts++;
15645bb6a25fSPoul-Henning Kamp       }
15655bb6a25fSPoul-Henning Kamp       break;
15665bb6a25fSPoul-Henning Kamp     case BT_AMP:
15675bb6a25fSPoul-Henning Kamp       if (nAtts < attsMax)
15685bb6a25fSPoul-Henning Kamp         atts[nAtts].normalized = 0;
15695bb6a25fSPoul-Henning Kamp       break;
15705bb6a25fSPoul-Henning Kamp     case BT_S:
15715bb6a25fSPoul-Henning Kamp       if (state == inName)
15725bb6a25fSPoul-Henning Kamp         state = other;
15736b2c1e49SXin LI       else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
15745bb6a25fSPoul-Henning Kamp                && (ptr == atts[nAtts].valuePtr
15755bb6a25fSPoul-Henning Kamp                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
15765bb6a25fSPoul-Henning Kamp                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
15775bb6a25fSPoul-Henning Kamp                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
15785bb6a25fSPoul-Henning Kamp         atts[nAtts].normalized = 0;
15795bb6a25fSPoul-Henning Kamp       break;
15806b2c1e49SXin LI     case BT_CR:
15816b2c1e49SXin LI     case BT_LF:
15825bb6a25fSPoul-Henning Kamp       /* This case ensures that the first attribute name is counted
15835bb6a25fSPoul-Henning Kamp          Apart from that we could just change state on the quote. */
15845bb6a25fSPoul-Henning Kamp       if (state == inName)
15855bb6a25fSPoul-Henning Kamp         state = other;
15865bb6a25fSPoul-Henning Kamp       else if (state == inValue && nAtts < attsMax)
15875bb6a25fSPoul-Henning Kamp         atts[nAtts].normalized = 0;
15885bb6a25fSPoul-Henning Kamp       break;
15895bb6a25fSPoul-Henning Kamp     case BT_GT:
15905bb6a25fSPoul-Henning Kamp     case BT_SOL:
15915bb6a25fSPoul-Henning Kamp       if (state != inValue)
15925bb6a25fSPoul-Henning Kamp         return nAtts;
15935bb6a25fSPoul-Henning Kamp       break;
15945bb6a25fSPoul-Henning Kamp     default:
15955bb6a25fSPoul-Henning Kamp       break;
15965bb6a25fSPoul-Henning Kamp     }
15975bb6a25fSPoul-Henning Kamp   }
15985bb6a25fSPoul-Henning Kamp   /* not reached */
15995bb6a25fSPoul-Henning Kamp }
16005bb6a25fSPoul-Henning Kamp 
1601220ed979SColeman Kane static int PTRFASTCALL
PREFIX(charRefNumber)16026b2c1e49SXin LI PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
16035bb6a25fSPoul-Henning Kamp   int result = 0;
16045bb6a25fSPoul-Henning Kamp   /* skip &# */
16056b2c1e49SXin LI   UNUSED_P(enc);
16065bb6a25fSPoul-Henning Kamp   ptr += 2 * MINBPC(enc);
16075bb6a25fSPoul-Henning Kamp   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
16086b2c1e49SXin LI     for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
16095bb6a25fSPoul-Henning Kamp          ptr += MINBPC(enc)) {
16105bb6a25fSPoul-Henning Kamp       int c = BYTE_TO_ASCII(enc, ptr);
16115bb6a25fSPoul-Henning Kamp       switch (c) {
16126b2c1e49SXin LI       case ASCII_0:
16136b2c1e49SXin LI       case ASCII_1:
16146b2c1e49SXin LI       case ASCII_2:
16156b2c1e49SXin LI       case ASCII_3:
16166b2c1e49SXin LI       case ASCII_4:
16176b2c1e49SXin LI       case ASCII_5:
16186b2c1e49SXin LI       case ASCII_6:
16196b2c1e49SXin LI       case ASCII_7:
16206b2c1e49SXin LI       case ASCII_8:
16216b2c1e49SXin LI       case ASCII_9:
16225bb6a25fSPoul-Henning Kamp         result <<= 4;
16235bb6a25fSPoul-Henning Kamp         result |= (c - ASCII_0);
16245bb6a25fSPoul-Henning Kamp         break;
16256b2c1e49SXin LI       case ASCII_A:
16266b2c1e49SXin LI       case ASCII_B:
16276b2c1e49SXin LI       case ASCII_C:
16286b2c1e49SXin LI       case ASCII_D:
16296b2c1e49SXin LI       case ASCII_E:
16306b2c1e49SXin LI       case ASCII_F:
16315bb6a25fSPoul-Henning Kamp         result <<= 4;
16325bb6a25fSPoul-Henning Kamp         result += 10 + (c - ASCII_A);
16335bb6a25fSPoul-Henning Kamp         break;
16346b2c1e49SXin LI       case ASCII_a:
16356b2c1e49SXin LI       case ASCII_b:
16366b2c1e49SXin LI       case ASCII_c:
16376b2c1e49SXin LI       case ASCII_d:
16386b2c1e49SXin LI       case ASCII_e:
16396b2c1e49SXin LI       case ASCII_f:
16405bb6a25fSPoul-Henning Kamp         result <<= 4;
16415bb6a25fSPoul-Henning Kamp         result += 10 + (c - ASCII_a);
16425bb6a25fSPoul-Henning Kamp         break;
16435bb6a25fSPoul-Henning Kamp       }
16445bb6a25fSPoul-Henning Kamp       if (result >= 0x110000)
16455bb6a25fSPoul-Henning Kamp         return -1;
16465bb6a25fSPoul-Henning Kamp     }
16476b2c1e49SXin LI   } else {
16485bb6a25fSPoul-Henning Kamp     for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
16495bb6a25fSPoul-Henning Kamp       int c = BYTE_TO_ASCII(enc, ptr);
16505bb6a25fSPoul-Henning Kamp       result *= 10;
16515bb6a25fSPoul-Henning Kamp       result += (c - ASCII_0);
16525bb6a25fSPoul-Henning Kamp       if (result >= 0x110000)
16535bb6a25fSPoul-Henning Kamp         return -1;
16545bb6a25fSPoul-Henning Kamp     }
16555bb6a25fSPoul-Henning Kamp   }
16565bb6a25fSPoul-Henning Kamp   return checkCharRefNumber(result);
16575bb6a25fSPoul-Henning Kamp }
16585bb6a25fSPoul-Henning Kamp 
1659220ed979SColeman Kane static int PTRCALL
PREFIX(predefinedEntityName)16606b2c1e49SXin LI PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
16616b2c1e49SXin LI                              const char *end) {
16626b2c1e49SXin LI   UNUSED_P(enc);
16635bb6a25fSPoul-Henning Kamp   switch ((end - ptr) / MINBPC(enc)) {
16645bb6a25fSPoul-Henning Kamp   case 2:
16655bb6a25fSPoul-Henning Kamp     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
16665bb6a25fSPoul-Henning Kamp       switch (BYTE_TO_ASCII(enc, ptr)) {
16675bb6a25fSPoul-Henning Kamp       case ASCII_l:
16685bb6a25fSPoul-Henning Kamp         return ASCII_LT;
16695bb6a25fSPoul-Henning Kamp       case ASCII_g:
16705bb6a25fSPoul-Henning Kamp         return ASCII_GT;
16715bb6a25fSPoul-Henning Kamp       }
16725bb6a25fSPoul-Henning Kamp     }
16735bb6a25fSPoul-Henning Kamp     break;
16745bb6a25fSPoul-Henning Kamp   case 3:
16755bb6a25fSPoul-Henning Kamp     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
16765bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
16775bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
16785bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
16795bb6a25fSPoul-Henning Kamp         if (CHAR_MATCHES(enc, ptr, ASCII_p))
16805bb6a25fSPoul-Henning Kamp           return ASCII_AMP;
16815bb6a25fSPoul-Henning Kamp       }
16825bb6a25fSPoul-Henning Kamp     }
16835bb6a25fSPoul-Henning Kamp     break;
16845bb6a25fSPoul-Henning Kamp   case 4:
16855bb6a25fSPoul-Henning Kamp     switch (BYTE_TO_ASCII(enc, ptr)) {
16865bb6a25fSPoul-Henning Kamp     case ASCII_q:
16875bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
16885bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
16895bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
16905bb6a25fSPoul-Henning Kamp         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
16915bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
16925bb6a25fSPoul-Henning Kamp           if (CHAR_MATCHES(enc, ptr, ASCII_t))
16935bb6a25fSPoul-Henning Kamp             return ASCII_QUOT;
16945bb6a25fSPoul-Henning Kamp         }
16955bb6a25fSPoul-Henning Kamp       }
16965bb6a25fSPoul-Henning Kamp       break;
16975bb6a25fSPoul-Henning Kamp     case ASCII_a:
16985bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
16995bb6a25fSPoul-Henning Kamp       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
17005bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
17015bb6a25fSPoul-Henning Kamp         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
17025bb6a25fSPoul-Henning Kamp           ptr += MINBPC(enc);
17035bb6a25fSPoul-Henning Kamp           if (CHAR_MATCHES(enc, ptr, ASCII_s))
17045bb6a25fSPoul-Henning Kamp             return ASCII_APOS;
17055bb6a25fSPoul-Henning Kamp         }
17065bb6a25fSPoul-Henning Kamp       }
17075bb6a25fSPoul-Henning Kamp       break;
17085bb6a25fSPoul-Henning Kamp     }
17095bb6a25fSPoul-Henning Kamp   }
17105bb6a25fSPoul-Henning Kamp   return 0;
17115bb6a25fSPoul-Henning Kamp }
17125bb6a25fSPoul-Henning Kamp 
1713220ed979SColeman Kane static int PTRCALL
PREFIX(nameMatchesAscii)17146b2c1e49SXin LI PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
17156b2c1e49SXin LI                          const char *end1, const char *ptr2) {
17166b2c1e49SXin LI   UNUSED_P(enc);
17175bb6a25fSPoul-Henning Kamp   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
17180a48773fSEric van Gyzen     if (end1 - ptr1 < MINBPC(enc)) {
17190a48773fSEric van Gyzen       /* This line cannot be executed.  The incoming data has already
17200a48773fSEric van Gyzen        * been tokenized once, so incomplete characters like this have
17210a48773fSEric van Gyzen        * already been eliminated from the input.  Retaining the
17220a48773fSEric van Gyzen        * paranoia check is still valuable, however.
17230a48773fSEric van Gyzen        */
17240a48773fSEric van Gyzen       return 0; /* LCOV_EXCL_LINE */
17250a48773fSEric van Gyzen     }
17265bb6a25fSPoul-Henning Kamp     if (! CHAR_MATCHES(enc, ptr1, *ptr2))
17275bb6a25fSPoul-Henning Kamp       return 0;
17285bb6a25fSPoul-Henning Kamp   }
17295bb6a25fSPoul-Henning Kamp   return ptr1 == end1;
17305bb6a25fSPoul-Henning Kamp }
17315bb6a25fSPoul-Henning Kamp 
1732220ed979SColeman Kane static int PTRFASTCALL
PREFIX(nameLength)17336b2c1e49SXin LI PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
17345bb6a25fSPoul-Henning Kamp   const char *start = ptr;
17355bb6a25fSPoul-Henning Kamp   for (;;) {
17365bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
17375bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
17386b2c1e49SXin LI   case BT_LEAD##n:                                                             \
1739ac69e5d4SEric van Gyzen     ptr += n; /* NOTE: The encoding has already been validated. */             \
17406b2c1e49SXin LI     break;
17416b2c1e49SXin LI       LEAD_CASE(2)
17426b2c1e49SXin LI       LEAD_CASE(3)
17436b2c1e49SXin LI       LEAD_CASE(4)
17445bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
17455bb6a25fSPoul-Henning Kamp     case BT_NONASCII:
17465bb6a25fSPoul-Henning Kamp     case BT_NMSTRT:
17475bb6a25fSPoul-Henning Kamp #  ifdef XML_NS
17485bb6a25fSPoul-Henning Kamp     case BT_COLON:
17495bb6a25fSPoul-Henning Kamp #  endif
17505bb6a25fSPoul-Henning Kamp     case BT_HEX:
17515bb6a25fSPoul-Henning Kamp     case BT_DIGIT:
17525bb6a25fSPoul-Henning Kamp     case BT_NAME:
17535bb6a25fSPoul-Henning Kamp     case BT_MINUS:
17545bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
17555bb6a25fSPoul-Henning Kamp       break;
17565bb6a25fSPoul-Henning Kamp     default:
1757220ed979SColeman Kane       return (int)(ptr - start);
17585bb6a25fSPoul-Henning Kamp     }
17595bb6a25fSPoul-Henning Kamp   }
17605bb6a25fSPoul-Henning Kamp }
17615bb6a25fSPoul-Henning Kamp 
1762220ed979SColeman Kane static const char *PTRFASTCALL
PREFIX(skipS)17636b2c1e49SXin LI PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
17645bb6a25fSPoul-Henning Kamp   for (;;) {
17655bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
17665bb6a25fSPoul-Henning Kamp     case BT_LF:
17675bb6a25fSPoul-Henning Kamp     case BT_CR:
17685bb6a25fSPoul-Henning Kamp     case BT_S:
17695bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
17705bb6a25fSPoul-Henning Kamp       break;
17715bb6a25fSPoul-Henning Kamp     default:
17725bb6a25fSPoul-Henning Kamp       return ptr;
17735bb6a25fSPoul-Henning Kamp     }
17745bb6a25fSPoul-Henning Kamp   }
17755bb6a25fSPoul-Henning Kamp }
17765bb6a25fSPoul-Henning Kamp 
1777220ed979SColeman Kane static void PTRCALL
PREFIX(updatePosition)17786b2c1e49SXin LI PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
17796b2c1e49SXin LI                        POSITION *pos) {
1780be8aff81SXin LI   while (HAS_CHAR(enc, ptr, end)) {
17815bb6a25fSPoul-Henning Kamp     switch (BYTE_TYPE(enc, ptr)) {
17825bb6a25fSPoul-Henning Kamp #  define LEAD_CASE(n)                                                         \
17835bb6a25fSPoul-Henning Kamp   case BT_LEAD##n:                                                             \
1784ac69e5d4SEric van Gyzen     ptr += n; /* NOTE: The encoding has already been validated. */             \
1785cc68614dSXin LI     pos->columnNumber++;                                                       \
17865bb6a25fSPoul-Henning Kamp     break;
17876b2c1e49SXin LI       LEAD_CASE(2)
17886b2c1e49SXin LI       LEAD_CASE(3)
17896b2c1e49SXin LI       LEAD_CASE(4)
17905bb6a25fSPoul-Henning Kamp #  undef LEAD_CASE
17915bb6a25fSPoul-Henning Kamp     case BT_LF:
1792cc68614dSXin LI       pos->columnNumber = 0;
17935bb6a25fSPoul-Henning Kamp       pos->lineNumber++;
17945bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
17955bb6a25fSPoul-Henning Kamp       break;
17965bb6a25fSPoul-Henning Kamp     case BT_CR:
17975bb6a25fSPoul-Henning Kamp       pos->lineNumber++;
17985bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
1799be8aff81SXin LI       if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
18005bb6a25fSPoul-Henning Kamp         ptr += MINBPC(enc);
1801cc68614dSXin LI       pos->columnNumber = 0;
18025bb6a25fSPoul-Henning Kamp       break;
18035bb6a25fSPoul-Henning Kamp     default:
18045bb6a25fSPoul-Henning Kamp       ptr += MINBPC(enc);
1805cc68614dSXin LI       pos->columnNumber++;
18065bb6a25fSPoul-Henning Kamp       break;
18075bb6a25fSPoul-Henning Kamp     }
18085bb6a25fSPoul-Henning Kamp   }
18095bb6a25fSPoul-Henning Kamp }
18105bb6a25fSPoul-Henning Kamp 
18115bb6a25fSPoul-Henning Kamp #  undef DO_LEAD_CASE
18125bb6a25fSPoul-Henning Kamp #  undef MULTIBYTE_CASES
18135bb6a25fSPoul-Henning Kamp #  undef INVALID_CASES
18145bb6a25fSPoul-Henning Kamp #  undef CHECK_NAME_CASE
18155bb6a25fSPoul-Henning Kamp #  undef CHECK_NAME_CASES
18165bb6a25fSPoul-Henning Kamp #  undef CHECK_NMSTRT_CASE
18175bb6a25fSPoul-Henning Kamp #  undef CHECK_NMSTRT_CASES
1818220ed979SColeman Kane 
1819220ed979SColeman Kane #endif /* XML_TOK_IMPL_C */
1820