1cc68614dSXin LI /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
20a48773fSEric van Gyzen __ __ _
30a48773fSEric van Gyzen ___\ \/ /_ __ __ _| |_
40a48773fSEric van Gyzen / _ \\ /| '_ \ / _` | __|
50a48773fSEric van Gyzen | __// \| |_) | (_| | |_
60a48773fSEric van Gyzen \___/_/\_\ .__/ \__,_|\__|
70a48773fSEric van Gyzen |_| XML parser
80a48773fSEric van Gyzen
90a48773fSEric van Gyzen Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10cc68614dSXin LI Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11cc68614dSXin LI Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12cc68614dSXin LI Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
13ac69e5d4SEric van Gyzen Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
14cc68614dSXin LI Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
15cc68614dSXin LI Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
16cc68614dSXin LI Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
17cc68614dSXin LI Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
18cc68614dSXin LI Copyright (c) 2020 Boris Kolpackov <boris@codesynthesis.com>
19*71f0c44aSXin LI Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
200a48773fSEric van Gyzen Licensed under the MIT license:
210a48773fSEric van Gyzen
220a48773fSEric van Gyzen Permission is hereby granted, free of charge, to any person obtaining
230a48773fSEric van Gyzen a copy of this software and associated documentation files (the
240a48773fSEric van Gyzen "Software"), to deal in the Software without restriction, including
250a48773fSEric van Gyzen without limitation the rights to use, copy, modify, merge, publish,
260a48773fSEric van Gyzen distribute, sublicense, and/or sell copies of the Software, and to permit
270a48773fSEric van Gyzen persons to whom the Software is furnished to do so, subject to the
280a48773fSEric van Gyzen following conditions:
290a48773fSEric van Gyzen
300a48773fSEric van Gyzen The above copyright notice and this permission notice shall be included
310a48773fSEric van Gyzen in all copies or substantial portions of the Software.
320a48773fSEric van Gyzen
330a48773fSEric van Gyzen THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
340a48773fSEric van Gyzen EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
350a48773fSEric van Gyzen MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
360a48773fSEric van Gyzen NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
370a48773fSEric van Gyzen DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
380a48773fSEric van Gyzen OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
390a48773fSEric van Gyzen USE OR OTHER DEALINGS IN THE SOFTWARE.
405bb6a25fSPoul-Henning Kamp */
415bb6a25fSPoul-Henning Kamp
42220ed979SColeman Kane #ifdef XML_TOK_IMPL_C
43220ed979SColeman Kane
44cc68614dSXin LI # ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
455bb6a25fSPoul-Henning Kamp # define IS_INVALID_CHAR(enc, ptr, n) (0)
465bb6a25fSPoul-Henning Kamp # endif
475bb6a25fSPoul-Henning Kamp
485bb6a25fSPoul-Henning Kamp # define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
495bb6a25fSPoul-Henning Kamp case BT_LEAD##n: \
505bb6a25fSPoul-Henning Kamp if (end - ptr < n) \
515bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL_CHAR; \
525bb6a25fSPoul-Henning Kamp if (IS_INVALID_CHAR(enc, ptr, n)) { \
535bb6a25fSPoul-Henning Kamp *(nextTokPtr) = (ptr); \
545bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID; \
555bb6a25fSPoul-Henning Kamp } \
565bb6a25fSPoul-Henning Kamp ptr += n; \
575bb6a25fSPoul-Henning Kamp break;
585bb6a25fSPoul-Henning Kamp
595bb6a25fSPoul-Henning Kamp # define INVALID_CASES(ptr, nextTokPtr) \
605bb6a25fSPoul-Henning Kamp INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
615bb6a25fSPoul-Henning Kamp INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
625bb6a25fSPoul-Henning Kamp INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
635bb6a25fSPoul-Henning Kamp case BT_NONXML: \
645bb6a25fSPoul-Henning Kamp case BT_MALFORM: \
655bb6a25fSPoul-Henning Kamp case BT_TRAIL: \
665bb6a25fSPoul-Henning Kamp *(nextTokPtr) = (ptr); \
675bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
685bb6a25fSPoul-Henning Kamp
695bb6a25fSPoul-Henning Kamp # define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
705bb6a25fSPoul-Henning Kamp case BT_LEAD##n: \
715bb6a25fSPoul-Henning Kamp if (end - ptr < n) \
725bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL_CHAR; \
73ac69e5d4SEric van Gyzen if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
745bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr; \
755bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID; \
765bb6a25fSPoul-Henning Kamp } \
775bb6a25fSPoul-Henning Kamp ptr += n; \
785bb6a25fSPoul-Henning Kamp break;
795bb6a25fSPoul-Henning Kamp
805bb6a25fSPoul-Henning Kamp # define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
815bb6a25fSPoul-Henning Kamp case BT_NONASCII: \
825bb6a25fSPoul-Henning Kamp if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \
835bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr; \
845bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID; \
855bb6a25fSPoul-Henning Kamp } \
860a48773fSEric van Gyzen /* fall through */ \
875bb6a25fSPoul-Henning Kamp case BT_NMSTRT: \
885bb6a25fSPoul-Henning Kamp case BT_HEX: \
895bb6a25fSPoul-Henning Kamp case BT_DIGIT: \
905bb6a25fSPoul-Henning Kamp case BT_NAME: \
915bb6a25fSPoul-Henning Kamp case BT_MINUS: \
925bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc); \
935bb6a25fSPoul-Henning Kamp break; \
945bb6a25fSPoul-Henning Kamp CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
955bb6a25fSPoul-Henning Kamp CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
965bb6a25fSPoul-Henning Kamp CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
975bb6a25fSPoul-Henning Kamp
985bb6a25fSPoul-Henning Kamp # define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
995bb6a25fSPoul-Henning Kamp case BT_LEAD##n: \
100*71f0c44aSXin LI if ((end) - (ptr) < (n)) \
1015bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL_CHAR; \
102ac69e5d4SEric van Gyzen if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
1035bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr; \
1045bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID; \
1055bb6a25fSPoul-Henning Kamp } \
1065bb6a25fSPoul-Henning Kamp ptr += n; \
1075bb6a25fSPoul-Henning Kamp break;
1085bb6a25fSPoul-Henning Kamp
1095bb6a25fSPoul-Henning Kamp # define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
1105bb6a25fSPoul-Henning Kamp case BT_NONASCII: \
1115bb6a25fSPoul-Henning Kamp if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
1125bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr; \
1135bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID; \
1145bb6a25fSPoul-Henning Kamp } \
1150a48773fSEric van Gyzen /* fall through */ \
1165bb6a25fSPoul-Henning Kamp case BT_NMSTRT: \
1175bb6a25fSPoul-Henning Kamp case BT_HEX: \
1185bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc); \
1195bb6a25fSPoul-Henning Kamp break; \
1205bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
1215bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
1225bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
1235bb6a25fSPoul-Henning Kamp
1245bb6a25fSPoul-Henning Kamp # ifndef PREFIX
1255bb6a25fSPoul-Henning Kamp # define PREFIX(ident) ident
1265bb6a25fSPoul-Henning Kamp # endif
1275bb6a25fSPoul-Henning Kamp
128*71f0c44aSXin LI # define HAS_CHARS(enc, ptr, end, count) \
129*71f0c44aSXin LI ((end) - (ptr) >= ((count) * MINBPC(enc)))
130be8aff81SXin LI
1316b2c1e49SXin LI # define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
132be8aff81SXin LI
133be8aff81SXin LI # define REQUIRE_CHARS(enc, ptr, end, count) \
134be8aff81SXin LI { \
135be8aff81SXin LI if (! HAS_CHARS(enc, ptr, end, count)) { \
136be8aff81SXin LI return XML_TOK_PARTIAL; \
137be8aff81SXin LI } \
138be8aff81SXin LI }
139be8aff81SXin LI
1406b2c1e49SXin LI # define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
141be8aff81SXin LI
1425bb6a25fSPoul-Henning Kamp /* ptr points to character following "<!-" */
1435bb6a25fSPoul-Henning Kamp
144220ed979SColeman Kane static int PTRCALL
PREFIX(scanComment)1456b2c1e49SXin LI PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
1466b2c1e49SXin LI const char **nextTokPtr) {
147be8aff81SXin LI if (HAS_CHAR(enc, ptr, end)) {
1485bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
1495bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
1505bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
1515bb6a25fSPoul-Henning Kamp }
1525bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
153be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
1545bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
1555bb6a25fSPoul-Henning Kamp INVALID_CASES(ptr, nextTokPtr)
1565bb6a25fSPoul-Henning Kamp case BT_MINUS:
157be8aff81SXin LI ptr += MINBPC(enc);
158be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
1595bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160be8aff81SXin LI ptr += MINBPC(enc);
161be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
1625bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1635bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
1645bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
1655bb6a25fSPoul-Henning Kamp }
1665bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
1675bb6a25fSPoul-Henning Kamp return XML_TOK_COMMENT;
1685bb6a25fSPoul-Henning Kamp }
1695bb6a25fSPoul-Henning Kamp break;
1705bb6a25fSPoul-Henning Kamp default:
1715bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1725bb6a25fSPoul-Henning Kamp break;
1735bb6a25fSPoul-Henning Kamp }
1745bb6a25fSPoul-Henning Kamp }
1755bb6a25fSPoul-Henning Kamp }
1765bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
1775bb6a25fSPoul-Henning Kamp }
1785bb6a25fSPoul-Henning Kamp
1795bb6a25fSPoul-Henning Kamp /* ptr points to character following "<!" */
1805bb6a25fSPoul-Henning Kamp
181220ed979SColeman Kane static int PTRCALL
PREFIX(scanDecl)1826b2c1e49SXin LI PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
1836b2c1e49SXin LI const char **nextTokPtr) {
184be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
1855bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
1865bb6a25fSPoul-Henning Kamp case BT_MINUS:
1875bb6a25fSPoul-Henning Kamp return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1885bb6a25fSPoul-Henning Kamp case BT_LSQB:
1895bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
1905bb6a25fSPoul-Henning Kamp return XML_TOK_COND_SECT_OPEN;
1915bb6a25fSPoul-Henning Kamp case BT_NMSTRT:
1925bb6a25fSPoul-Henning Kamp case BT_HEX:
1935bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1945bb6a25fSPoul-Henning Kamp break;
1955bb6a25fSPoul-Henning Kamp default:
1965bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
1975bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
1985bb6a25fSPoul-Henning Kamp }
199be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
2005bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
2015bb6a25fSPoul-Henning Kamp case BT_PERCNT:
202be8aff81SXin LI REQUIRE_CHARS(enc, ptr, end, 2);
2035bb6a25fSPoul-Henning Kamp /* don't allow <!ENTITY% foo "whatever"> */
2045bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
2056b2c1e49SXin LI case BT_S:
2066b2c1e49SXin LI case BT_CR:
2076b2c1e49SXin LI case BT_LF:
2086b2c1e49SXin LI case BT_PERCNT:
2095bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
2105bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
2115bb6a25fSPoul-Henning Kamp }
2125bb6a25fSPoul-Henning Kamp /* fall through */
2136b2c1e49SXin LI case BT_S:
2146b2c1e49SXin LI case BT_CR:
2156b2c1e49SXin LI case BT_LF:
2165bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
2175bb6a25fSPoul-Henning Kamp return XML_TOK_DECL_OPEN;
2185bb6a25fSPoul-Henning Kamp case BT_NMSTRT:
2195bb6a25fSPoul-Henning Kamp case BT_HEX:
2205bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
2215bb6a25fSPoul-Henning Kamp break;
2225bb6a25fSPoul-Henning Kamp default:
2235bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
2245bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
2255bb6a25fSPoul-Henning Kamp }
2265bb6a25fSPoul-Henning Kamp }
2275bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
2285bb6a25fSPoul-Henning Kamp }
2295bb6a25fSPoul-Henning Kamp
230220ed979SColeman Kane static int PTRCALL
PREFIX(checkPiTarget)2316b2c1e49SXin LI PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
2326b2c1e49SXin LI int *tokPtr) {
2335bb6a25fSPoul-Henning Kamp int upper = 0;
2346b2c1e49SXin LI UNUSED_P(enc);
2355bb6a25fSPoul-Henning Kamp *tokPtr = XML_TOK_PI;
2365bb6a25fSPoul-Henning Kamp if (end - ptr != MINBPC(enc) * 3)
2375bb6a25fSPoul-Henning Kamp return 1;
2385bb6a25fSPoul-Henning Kamp switch (BYTE_TO_ASCII(enc, ptr)) {
2395bb6a25fSPoul-Henning Kamp case ASCII_x:
2405bb6a25fSPoul-Henning Kamp break;
2415bb6a25fSPoul-Henning Kamp case ASCII_X:
2425bb6a25fSPoul-Henning Kamp upper = 1;
2435bb6a25fSPoul-Henning Kamp break;
2445bb6a25fSPoul-Henning Kamp default:
2455bb6a25fSPoul-Henning Kamp return 1;
2465bb6a25fSPoul-Henning Kamp }
2475bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
2485bb6a25fSPoul-Henning Kamp switch (BYTE_TO_ASCII(enc, ptr)) {
2495bb6a25fSPoul-Henning Kamp case ASCII_m:
2505bb6a25fSPoul-Henning Kamp break;
2515bb6a25fSPoul-Henning Kamp case ASCII_M:
2525bb6a25fSPoul-Henning Kamp upper = 1;
2535bb6a25fSPoul-Henning Kamp break;
2545bb6a25fSPoul-Henning Kamp default:
2555bb6a25fSPoul-Henning Kamp return 1;
2565bb6a25fSPoul-Henning Kamp }
2575bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
2585bb6a25fSPoul-Henning Kamp switch (BYTE_TO_ASCII(enc, ptr)) {
2595bb6a25fSPoul-Henning Kamp case ASCII_l:
2605bb6a25fSPoul-Henning Kamp break;
2615bb6a25fSPoul-Henning Kamp case ASCII_L:
2625bb6a25fSPoul-Henning Kamp upper = 1;
2635bb6a25fSPoul-Henning Kamp break;
2645bb6a25fSPoul-Henning Kamp default:
2655bb6a25fSPoul-Henning Kamp return 1;
2665bb6a25fSPoul-Henning Kamp }
2675bb6a25fSPoul-Henning Kamp if (upper)
2685bb6a25fSPoul-Henning Kamp return 0;
2695bb6a25fSPoul-Henning Kamp *tokPtr = XML_TOK_XML_DECL;
2705bb6a25fSPoul-Henning Kamp return 1;
2715bb6a25fSPoul-Henning Kamp }
2725bb6a25fSPoul-Henning Kamp
2735bb6a25fSPoul-Henning Kamp /* ptr points to character following "<?" */
2745bb6a25fSPoul-Henning Kamp
275220ed979SColeman Kane static int PTRCALL
PREFIX(scanPi)2766b2c1e49SXin LI PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
2776b2c1e49SXin LI const char **nextTokPtr) {
2785bb6a25fSPoul-Henning Kamp int tok;
2795bb6a25fSPoul-Henning Kamp const char *target = ptr;
280be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
2815bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
2825bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
2835bb6a25fSPoul-Henning Kamp default:
2845bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
2855bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
2865bb6a25fSPoul-Henning Kamp }
287be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
2885bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
2895bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
2906b2c1e49SXin LI case BT_S:
2916b2c1e49SXin LI case BT_CR:
2926b2c1e49SXin LI case BT_LF:
2935bb6a25fSPoul-Henning Kamp if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
2945bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
2955bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
2965bb6a25fSPoul-Henning Kamp }
2975bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
298be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
2995bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
3005bb6a25fSPoul-Henning Kamp INVALID_CASES(ptr, nextTokPtr)
3015bb6a25fSPoul-Henning Kamp case BT_QUEST:
3025bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
303be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
3045bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
3055bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
3065bb6a25fSPoul-Henning Kamp return tok;
3075bb6a25fSPoul-Henning Kamp }
3085bb6a25fSPoul-Henning Kamp break;
3095bb6a25fSPoul-Henning Kamp default:
3105bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
3115bb6a25fSPoul-Henning Kamp break;
3125bb6a25fSPoul-Henning Kamp }
3135bb6a25fSPoul-Henning Kamp }
3145bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
3155bb6a25fSPoul-Henning Kamp case BT_QUEST:
3165bb6a25fSPoul-Henning Kamp if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
3175bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
3185bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
3195bb6a25fSPoul-Henning Kamp }
3205bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
321be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
3225bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
3235bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
3245bb6a25fSPoul-Henning Kamp return tok;
3255bb6a25fSPoul-Henning Kamp }
3265bb6a25fSPoul-Henning Kamp /* fall through */
3275bb6a25fSPoul-Henning Kamp default:
3285bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
3295bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
3305bb6a25fSPoul-Henning Kamp }
3315bb6a25fSPoul-Henning Kamp }
3325bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
3335bb6a25fSPoul-Henning Kamp }
3345bb6a25fSPoul-Henning Kamp
335220ed979SColeman Kane static int PTRCALL
PREFIX(scanCdataSection)3366b2c1e49SXin LI PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
3376b2c1e49SXin LI const char **nextTokPtr) {
3386b2c1e49SXin LI static const char CDATA_LSQB[]
3396b2c1e49SXin LI = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
3405bb6a25fSPoul-Henning Kamp int i;
3416b2c1e49SXin LI UNUSED_P(enc);
3425bb6a25fSPoul-Henning Kamp /* CDATA[ */
343be8aff81SXin LI REQUIRE_CHARS(enc, ptr, end, 6);
3445bb6a25fSPoul-Henning Kamp for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
3455bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
3465bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
3475bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
3485bb6a25fSPoul-Henning Kamp }
3495bb6a25fSPoul-Henning Kamp }
3505bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
3515bb6a25fSPoul-Henning Kamp return XML_TOK_CDATA_SECT_OPEN;
3525bb6a25fSPoul-Henning Kamp }
3535bb6a25fSPoul-Henning Kamp
354220ed979SColeman Kane static int PTRCALL
PREFIX(cdataSectionTok)3556b2c1e49SXin LI PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
3566b2c1e49SXin LI const char **nextTokPtr) {
357be8aff81SXin LI if (ptr >= end)
3585bb6a25fSPoul-Henning Kamp return XML_TOK_NONE;
3595bb6a25fSPoul-Henning Kamp if (MINBPC(enc) > 1) {
3605bb6a25fSPoul-Henning Kamp size_t n = end - ptr;
3615bb6a25fSPoul-Henning Kamp if (n & (MINBPC(enc) - 1)) {
3625bb6a25fSPoul-Henning Kamp n &= ~(MINBPC(enc) - 1);
3635bb6a25fSPoul-Henning Kamp if (n == 0)
3645bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
3655bb6a25fSPoul-Henning Kamp end = ptr + n;
3665bb6a25fSPoul-Henning Kamp }
3675bb6a25fSPoul-Henning Kamp }
3685bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
3695bb6a25fSPoul-Henning Kamp case BT_RSQB:
3705bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
371be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
3725bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
3735bb6a25fSPoul-Henning Kamp break;
3745bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
375be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
3765bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
3775bb6a25fSPoul-Henning Kamp ptr -= MINBPC(enc);
3785bb6a25fSPoul-Henning Kamp break;
3795bb6a25fSPoul-Henning Kamp }
3805bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
3815bb6a25fSPoul-Henning Kamp return XML_TOK_CDATA_SECT_CLOSE;
3825bb6a25fSPoul-Henning Kamp case BT_CR:
3835bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
384be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
3855bb6a25fSPoul-Henning Kamp if (BYTE_TYPE(enc, ptr) == BT_LF)
3865bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
3875bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
3885bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
3895bb6a25fSPoul-Henning Kamp case BT_LF:
3905bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
3915bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
3925bb6a25fSPoul-Henning Kamp INVALID_CASES(ptr, nextTokPtr)
3935bb6a25fSPoul-Henning Kamp default:
3945bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
3955bb6a25fSPoul-Henning Kamp break;
3965bb6a25fSPoul-Henning Kamp }
397be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
3985bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
3995bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
4005bb6a25fSPoul-Henning Kamp case BT_LEAD##n: \
4015bb6a25fSPoul-Henning Kamp if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
4025bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr; \
4035bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS; \
4045bb6a25fSPoul-Henning Kamp } \
4055bb6a25fSPoul-Henning Kamp ptr += n; \
4065bb6a25fSPoul-Henning Kamp break;
4076b2c1e49SXin LI LEAD_CASE(2)
4086b2c1e49SXin LI LEAD_CASE(3)
4096b2c1e49SXin LI LEAD_CASE(4)
4105bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
4115bb6a25fSPoul-Henning Kamp case BT_NONXML:
4125bb6a25fSPoul-Henning Kamp case BT_MALFORM:
4135bb6a25fSPoul-Henning Kamp case BT_TRAIL:
4145bb6a25fSPoul-Henning Kamp case BT_CR:
4155bb6a25fSPoul-Henning Kamp case BT_LF:
4165bb6a25fSPoul-Henning Kamp case BT_RSQB:
4175bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
4185bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
4195bb6a25fSPoul-Henning Kamp default:
4205bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
4215bb6a25fSPoul-Henning Kamp break;
4225bb6a25fSPoul-Henning Kamp }
4235bb6a25fSPoul-Henning Kamp }
4245bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
4255bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
4265bb6a25fSPoul-Henning Kamp }
4275bb6a25fSPoul-Henning Kamp
4285bb6a25fSPoul-Henning Kamp /* ptr points to character following "</" */
4295bb6a25fSPoul-Henning Kamp
430220ed979SColeman Kane static int PTRCALL
PREFIX(scanEndTag)4316b2c1e49SXin LI PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
4326b2c1e49SXin LI const char **nextTokPtr) {
433be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
4345bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
4355bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
4365bb6a25fSPoul-Henning Kamp default:
4375bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
4385bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
4395bb6a25fSPoul-Henning Kamp }
440be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
4415bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
4425bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
4436b2c1e49SXin LI case BT_S:
4446b2c1e49SXin LI case BT_CR:
4456b2c1e49SXin LI case BT_LF:
446be8aff81SXin LI for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
4475bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
4486b2c1e49SXin LI case BT_S:
4496b2c1e49SXin LI case BT_CR:
4506b2c1e49SXin LI case BT_LF:
4515bb6a25fSPoul-Henning Kamp break;
4525bb6a25fSPoul-Henning Kamp case BT_GT:
4535bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
4545bb6a25fSPoul-Henning Kamp return XML_TOK_END_TAG;
4555bb6a25fSPoul-Henning Kamp default:
4565bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
4575bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
4585bb6a25fSPoul-Henning Kamp }
4595bb6a25fSPoul-Henning Kamp }
4605bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
4615bb6a25fSPoul-Henning Kamp # ifdef XML_NS
4625bb6a25fSPoul-Henning Kamp case BT_COLON:
4635bb6a25fSPoul-Henning Kamp /* no need to check qname syntax here,
4645bb6a25fSPoul-Henning Kamp since end-tag must match exactly */
4655bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
4665bb6a25fSPoul-Henning Kamp break;
4675bb6a25fSPoul-Henning Kamp # endif
4685bb6a25fSPoul-Henning Kamp case BT_GT:
4695bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
4705bb6a25fSPoul-Henning Kamp return XML_TOK_END_TAG;
4715bb6a25fSPoul-Henning Kamp default:
4725bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
4735bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
4745bb6a25fSPoul-Henning Kamp }
4755bb6a25fSPoul-Henning Kamp }
4765bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
4775bb6a25fSPoul-Henning Kamp }
4785bb6a25fSPoul-Henning Kamp
4795bb6a25fSPoul-Henning Kamp /* ptr points to character following "&#X" */
4805bb6a25fSPoul-Henning Kamp
481220ed979SColeman Kane static int PTRCALL
PREFIX(scanHexCharRef)4826b2c1e49SXin LI PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
4836b2c1e49SXin LI const char **nextTokPtr) {
484be8aff81SXin LI if (HAS_CHAR(enc, ptr, end)) {
4855bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
4865bb6a25fSPoul-Henning Kamp case BT_DIGIT:
4875bb6a25fSPoul-Henning Kamp case BT_HEX:
4885bb6a25fSPoul-Henning Kamp break;
4895bb6a25fSPoul-Henning Kamp default:
4905bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
4915bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
4925bb6a25fSPoul-Henning Kamp }
493be8aff81SXin LI for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
4945bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
4955bb6a25fSPoul-Henning Kamp case BT_DIGIT:
4965bb6a25fSPoul-Henning Kamp case BT_HEX:
4975bb6a25fSPoul-Henning Kamp break;
4985bb6a25fSPoul-Henning Kamp case BT_SEMI:
4995bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
5005bb6a25fSPoul-Henning Kamp return XML_TOK_CHAR_REF;
5015bb6a25fSPoul-Henning Kamp default:
5025bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
5035bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
5045bb6a25fSPoul-Henning Kamp }
5055bb6a25fSPoul-Henning Kamp }
5065bb6a25fSPoul-Henning Kamp }
5075bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
5085bb6a25fSPoul-Henning Kamp }
5095bb6a25fSPoul-Henning Kamp
5105bb6a25fSPoul-Henning Kamp /* ptr points to character following "&#" */
5115bb6a25fSPoul-Henning Kamp
512220ed979SColeman Kane static int PTRCALL
PREFIX(scanCharRef)5136b2c1e49SXin LI PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
5146b2c1e49SXin LI const char **nextTokPtr) {
515be8aff81SXin LI if (HAS_CHAR(enc, ptr, end)) {
5165bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_x))
5175bb6a25fSPoul-Henning Kamp return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
5185bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
5195bb6a25fSPoul-Henning Kamp case BT_DIGIT:
5205bb6a25fSPoul-Henning Kamp break;
5215bb6a25fSPoul-Henning Kamp default:
5225bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
5235bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
5245bb6a25fSPoul-Henning Kamp }
525be8aff81SXin LI for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
5265bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
5275bb6a25fSPoul-Henning Kamp case BT_DIGIT:
5285bb6a25fSPoul-Henning Kamp break;
5295bb6a25fSPoul-Henning Kamp case BT_SEMI:
5305bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
5315bb6a25fSPoul-Henning Kamp return XML_TOK_CHAR_REF;
5325bb6a25fSPoul-Henning Kamp default:
5335bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
5345bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
5355bb6a25fSPoul-Henning Kamp }
5365bb6a25fSPoul-Henning Kamp }
5375bb6a25fSPoul-Henning Kamp }
5385bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
5395bb6a25fSPoul-Henning Kamp }
5405bb6a25fSPoul-Henning Kamp
5415bb6a25fSPoul-Henning Kamp /* ptr points to character following "&" */
5425bb6a25fSPoul-Henning Kamp
543220ed979SColeman Kane static int PTRCALL
PREFIX(scanRef)5445bb6a25fSPoul-Henning Kamp PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
5456b2c1e49SXin LI const char **nextTokPtr) {
546be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
5475bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
5485bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
5495bb6a25fSPoul-Henning Kamp case BT_NUM:
5505bb6a25fSPoul-Henning Kamp return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
5515bb6a25fSPoul-Henning Kamp default:
5525bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
5535bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
5545bb6a25fSPoul-Henning Kamp }
555be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
5565bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
5575bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
5585bb6a25fSPoul-Henning Kamp case BT_SEMI:
5595bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
5605bb6a25fSPoul-Henning Kamp return XML_TOK_ENTITY_REF;
5615bb6a25fSPoul-Henning Kamp default:
5625bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
5635bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
5645bb6a25fSPoul-Henning Kamp }
5655bb6a25fSPoul-Henning Kamp }
5665bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
5675bb6a25fSPoul-Henning Kamp }
5685bb6a25fSPoul-Henning Kamp
5695bb6a25fSPoul-Henning Kamp /* ptr points to character following first character of attribute name */
5705bb6a25fSPoul-Henning Kamp
571220ed979SColeman Kane static int PTRCALL
PREFIX(scanAtts)5725bb6a25fSPoul-Henning Kamp PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
5736b2c1e49SXin LI const char **nextTokPtr) {
5745bb6a25fSPoul-Henning Kamp # ifdef XML_NS
5755bb6a25fSPoul-Henning Kamp int hadColon = 0;
5765bb6a25fSPoul-Henning Kamp # endif
577be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
5785bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
5795bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
5805bb6a25fSPoul-Henning Kamp # ifdef XML_NS
5815bb6a25fSPoul-Henning Kamp case BT_COLON:
5825bb6a25fSPoul-Henning Kamp if (hadColon) {
5835bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
5845bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
5855bb6a25fSPoul-Henning Kamp }
5865bb6a25fSPoul-Henning Kamp hadColon = 1;
5875bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
588be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
5895bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
5905bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
5915bb6a25fSPoul-Henning Kamp default:
5925bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
5935bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
5945bb6a25fSPoul-Henning Kamp }
5955bb6a25fSPoul-Henning Kamp break;
5965bb6a25fSPoul-Henning Kamp # endif
5976b2c1e49SXin LI case BT_S:
5986b2c1e49SXin LI case BT_CR:
5996b2c1e49SXin LI case BT_LF:
6005bb6a25fSPoul-Henning Kamp for (;;) {
6015bb6a25fSPoul-Henning Kamp int t;
6025bb6a25fSPoul-Henning Kamp
6035bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
604be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
6055bb6a25fSPoul-Henning Kamp t = BYTE_TYPE(enc, ptr);
6065bb6a25fSPoul-Henning Kamp if (t == BT_EQUALS)
6075bb6a25fSPoul-Henning Kamp break;
6085bb6a25fSPoul-Henning Kamp switch (t) {
6095bb6a25fSPoul-Henning Kamp case BT_S:
6105bb6a25fSPoul-Henning Kamp case BT_LF:
6115bb6a25fSPoul-Henning Kamp case BT_CR:
6125bb6a25fSPoul-Henning Kamp break;
6135bb6a25fSPoul-Henning Kamp default:
6145bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
6155bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
6165bb6a25fSPoul-Henning Kamp }
6175bb6a25fSPoul-Henning Kamp }
6185bb6a25fSPoul-Henning Kamp /* fall through */
6196b2c1e49SXin LI case BT_EQUALS: {
6205bb6a25fSPoul-Henning Kamp int open;
6215bb6a25fSPoul-Henning Kamp # ifdef XML_NS
6225bb6a25fSPoul-Henning Kamp hadColon = 0;
6235bb6a25fSPoul-Henning Kamp # endif
6245bb6a25fSPoul-Henning Kamp for (;;) {
6255bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
626be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
6275bb6a25fSPoul-Henning Kamp open = BYTE_TYPE(enc, ptr);
6285bb6a25fSPoul-Henning Kamp if (open == BT_QUOT || open == BT_APOS)
6295bb6a25fSPoul-Henning Kamp break;
6305bb6a25fSPoul-Henning Kamp switch (open) {
6315bb6a25fSPoul-Henning Kamp case BT_S:
6325bb6a25fSPoul-Henning Kamp case BT_LF:
6335bb6a25fSPoul-Henning Kamp case BT_CR:
6345bb6a25fSPoul-Henning Kamp break;
6355bb6a25fSPoul-Henning Kamp default:
6365bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
6375bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
6385bb6a25fSPoul-Henning Kamp }
6395bb6a25fSPoul-Henning Kamp }
6405bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
6415bb6a25fSPoul-Henning Kamp /* in attribute value */
6425bb6a25fSPoul-Henning Kamp for (;;) {
6435bb6a25fSPoul-Henning Kamp int t;
644be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
6455bb6a25fSPoul-Henning Kamp t = BYTE_TYPE(enc, ptr);
6465bb6a25fSPoul-Henning Kamp if (t == open)
6475bb6a25fSPoul-Henning Kamp break;
6485bb6a25fSPoul-Henning Kamp switch (t) {
6495bb6a25fSPoul-Henning Kamp INVALID_CASES(ptr, nextTokPtr)
6506b2c1e49SXin LI case BT_AMP: {
6515bb6a25fSPoul-Henning Kamp int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
6525bb6a25fSPoul-Henning Kamp if (tok <= 0) {
6535bb6a25fSPoul-Henning Kamp if (tok == XML_TOK_INVALID)
6545bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
6555bb6a25fSPoul-Henning Kamp return tok;
6565bb6a25fSPoul-Henning Kamp }
6575bb6a25fSPoul-Henning Kamp break;
6585bb6a25fSPoul-Henning Kamp }
6595bb6a25fSPoul-Henning Kamp case BT_LT:
6605bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
6615bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
6625bb6a25fSPoul-Henning Kamp default:
6635bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
6645bb6a25fSPoul-Henning Kamp break;
6655bb6a25fSPoul-Henning Kamp }
6665bb6a25fSPoul-Henning Kamp }
6675bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
668be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
6695bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
6705bb6a25fSPoul-Henning Kamp case BT_S:
6715bb6a25fSPoul-Henning Kamp case BT_CR:
6725bb6a25fSPoul-Henning Kamp case BT_LF:
6735bb6a25fSPoul-Henning Kamp break;
6745bb6a25fSPoul-Henning Kamp case BT_SOL:
6755bb6a25fSPoul-Henning Kamp goto sol;
6765bb6a25fSPoul-Henning Kamp case BT_GT:
6775bb6a25fSPoul-Henning Kamp goto gt;
6785bb6a25fSPoul-Henning Kamp default:
6795bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
6805bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
6815bb6a25fSPoul-Henning Kamp }
6825bb6a25fSPoul-Henning Kamp /* ptr points to closing quote */
6835bb6a25fSPoul-Henning Kamp for (;;) {
6845bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
685be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
6865bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
6875bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
6886b2c1e49SXin LI case BT_S:
6896b2c1e49SXin LI case BT_CR:
6906b2c1e49SXin LI case BT_LF:
6915bb6a25fSPoul-Henning Kamp continue;
6925bb6a25fSPoul-Henning Kamp case BT_GT:
6935bb6a25fSPoul-Henning Kamp gt:
6945bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
6955bb6a25fSPoul-Henning Kamp return XML_TOK_START_TAG_WITH_ATTS;
6965bb6a25fSPoul-Henning Kamp case BT_SOL:
6975bb6a25fSPoul-Henning Kamp sol:
6985bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
699be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
7005bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
7015bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7025bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7035bb6a25fSPoul-Henning Kamp }
7045bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
7055bb6a25fSPoul-Henning Kamp return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
7065bb6a25fSPoul-Henning Kamp default:
7075bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7085bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7095bb6a25fSPoul-Henning Kamp }
7105bb6a25fSPoul-Henning Kamp break;
7115bb6a25fSPoul-Henning Kamp }
7125bb6a25fSPoul-Henning Kamp break;
7135bb6a25fSPoul-Henning Kamp }
7145bb6a25fSPoul-Henning Kamp default:
7155bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7165bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7175bb6a25fSPoul-Henning Kamp }
7185bb6a25fSPoul-Henning Kamp }
7195bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
7205bb6a25fSPoul-Henning Kamp }
7215bb6a25fSPoul-Henning Kamp
7225bb6a25fSPoul-Henning Kamp /* ptr points to character following "<" */
7235bb6a25fSPoul-Henning Kamp
724220ed979SColeman Kane static int PTRCALL
PREFIX(scanLt)7255bb6a25fSPoul-Henning Kamp PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
7266b2c1e49SXin LI const char **nextTokPtr) {
7275bb6a25fSPoul-Henning Kamp # ifdef XML_NS
7285bb6a25fSPoul-Henning Kamp int hadColon;
7295bb6a25fSPoul-Henning Kamp # endif
730be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
7315bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
7325bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
7335bb6a25fSPoul-Henning Kamp case BT_EXCL:
734be8aff81SXin LI ptr += MINBPC(enc);
735be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
7365bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
7375bb6a25fSPoul-Henning Kamp case BT_MINUS:
7385bb6a25fSPoul-Henning Kamp return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7395bb6a25fSPoul-Henning Kamp case BT_LSQB:
7406b2c1e49SXin LI return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7415bb6a25fSPoul-Henning Kamp }
7425bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7435bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7445bb6a25fSPoul-Henning Kamp case BT_QUEST:
7455bb6a25fSPoul-Henning Kamp return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7465bb6a25fSPoul-Henning Kamp case BT_SOL:
7475bb6a25fSPoul-Henning Kamp return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
7485bb6a25fSPoul-Henning Kamp default:
7495bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7505bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7515bb6a25fSPoul-Henning Kamp }
7525bb6a25fSPoul-Henning Kamp # ifdef XML_NS
7535bb6a25fSPoul-Henning Kamp hadColon = 0;
7545bb6a25fSPoul-Henning Kamp # endif
7555bb6a25fSPoul-Henning Kamp /* we have a start-tag */
756be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
7575bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
7585bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
7595bb6a25fSPoul-Henning Kamp # ifdef XML_NS
7605bb6a25fSPoul-Henning Kamp case BT_COLON:
7615bb6a25fSPoul-Henning Kamp if (hadColon) {
7625bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7635bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7645bb6a25fSPoul-Henning Kamp }
7655bb6a25fSPoul-Henning Kamp hadColon = 1;
7665bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
767be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
7685bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
7695bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
7705bb6a25fSPoul-Henning Kamp default:
7715bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7725bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7735bb6a25fSPoul-Henning Kamp }
7745bb6a25fSPoul-Henning Kamp break;
7755bb6a25fSPoul-Henning Kamp # endif
7766b2c1e49SXin LI case BT_S:
7776b2c1e49SXin LI case BT_CR:
7786b2c1e49SXin LI case BT_LF: {
7795bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
780be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
7815bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
7825bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
7835bb6a25fSPoul-Henning Kamp case BT_GT:
7845bb6a25fSPoul-Henning Kamp goto gt;
7855bb6a25fSPoul-Henning Kamp case BT_SOL:
7865bb6a25fSPoul-Henning Kamp goto sol;
7876b2c1e49SXin LI case BT_S:
7886b2c1e49SXin LI case BT_CR:
7896b2c1e49SXin LI case BT_LF:
7905bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
7915bb6a25fSPoul-Henning Kamp continue;
7925bb6a25fSPoul-Henning Kamp default:
7935bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
7945bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
7955bb6a25fSPoul-Henning Kamp }
7965bb6a25fSPoul-Henning Kamp return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
7975bb6a25fSPoul-Henning Kamp }
7985bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
7995bb6a25fSPoul-Henning Kamp }
8005bb6a25fSPoul-Henning Kamp case BT_GT:
8015bb6a25fSPoul-Henning Kamp gt:
8025bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
8035bb6a25fSPoul-Henning Kamp return XML_TOK_START_TAG_NO_ATTS;
8045bb6a25fSPoul-Henning Kamp case BT_SOL:
8055bb6a25fSPoul-Henning Kamp sol:
8065bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
807be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
8085bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
8095bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
8105bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
8115bb6a25fSPoul-Henning Kamp }
8125bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
8135bb6a25fSPoul-Henning Kamp return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
8145bb6a25fSPoul-Henning Kamp default:
8155bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
8165bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
8175bb6a25fSPoul-Henning Kamp }
8185bb6a25fSPoul-Henning Kamp }
8195bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
8205bb6a25fSPoul-Henning Kamp }
8215bb6a25fSPoul-Henning Kamp
822220ed979SColeman Kane static int PTRCALL
PREFIX(contentTok)8235bb6a25fSPoul-Henning Kamp PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
8246b2c1e49SXin LI const char **nextTokPtr) {
825be8aff81SXin LI if (ptr >= end)
8265bb6a25fSPoul-Henning Kamp return XML_TOK_NONE;
8275bb6a25fSPoul-Henning Kamp if (MINBPC(enc) > 1) {
8285bb6a25fSPoul-Henning Kamp size_t n = end - ptr;
8295bb6a25fSPoul-Henning Kamp if (n & (MINBPC(enc) - 1)) {
8305bb6a25fSPoul-Henning Kamp n &= ~(MINBPC(enc) - 1);
8315bb6a25fSPoul-Henning Kamp if (n == 0)
8325bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
8335bb6a25fSPoul-Henning Kamp end = ptr + n;
8345bb6a25fSPoul-Henning Kamp }
8355bb6a25fSPoul-Henning Kamp }
8365bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
8375bb6a25fSPoul-Henning Kamp case BT_LT:
8385bb6a25fSPoul-Henning Kamp return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
8395bb6a25fSPoul-Henning Kamp case BT_AMP:
8405bb6a25fSPoul-Henning Kamp return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
8415bb6a25fSPoul-Henning Kamp case BT_CR:
8425bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
843be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
8445bb6a25fSPoul-Henning Kamp return XML_TOK_TRAILING_CR;
8455bb6a25fSPoul-Henning Kamp if (BYTE_TYPE(enc, ptr) == BT_LF)
8465bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
8475bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
8485bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
8495bb6a25fSPoul-Henning Kamp case BT_LF:
8505bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
8515bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
8525bb6a25fSPoul-Henning Kamp case BT_RSQB:
8535bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
854be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
8555bb6a25fSPoul-Henning Kamp return XML_TOK_TRAILING_RSQB;
8565bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
8575bb6a25fSPoul-Henning Kamp break;
8585bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
859be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
8605bb6a25fSPoul-Henning Kamp return XML_TOK_TRAILING_RSQB;
8615bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
8625bb6a25fSPoul-Henning Kamp ptr -= MINBPC(enc);
8635bb6a25fSPoul-Henning Kamp break;
8645bb6a25fSPoul-Henning Kamp }
8655bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
8665bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
8675bb6a25fSPoul-Henning Kamp INVALID_CASES(ptr, nextTokPtr)
8685bb6a25fSPoul-Henning Kamp default:
8695bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
8705bb6a25fSPoul-Henning Kamp break;
8715bb6a25fSPoul-Henning Kamp }
872be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
8735bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
8745bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
8755bb6a25fSPoul-Henning Kamp case BT_LEAD##n: \
8765bb6a25fSPoul-Henning Kamp if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
8775bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr; \
8785bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS; \
8795bb6a25fSPoul-Henning Kamp } \
8805bb6a25fSPoul-Henning Kamp ptr += n; \
8815bb6a25fSPoul-Henning Kamp break;
8826b2c1e49SXin LI LEAD_CASE(2)
8836b2c1e49SXin LI LEAD_CASE(3)
8846b2c1e49SXin LI LEAD_CASE(4)
8855bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
8865bb6a25fSPoul-Henning Kamp case BT_RSQB:
887be8aff81SXin LI if (HAS_CHARS(enc, ptr, end, 2)) {
8885bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
8895bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
8905bb6a25fSPoul-Henning Kamp break;
8915bb6a25fSPoul-Henning Kamp }
892be8aff81SXin LI if (HAS_CHARS(enc, ptr, end, 3)) {
8935bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
8945bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
8955bb6a25fSPoul-Henning Kamp break;
8965bb6a25fSPoul-Henning Kamp }
8975bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + 2 * MINBPC(enc);
8985bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
8995bb6a25fSPoul-Henning Kamp }
9005bb6a25fSPoul-Henning Kamp }
9015bb6a25fSPoul-Henning Kamp /* fall through */
9025bb6a25fSPoul-Henning Kamp case BT_AMP:
9035bb6a25fSPoul-Henning Kamp case BT_LT:
9045bb6a25fSPoul-Henning Kamp case BT_NONXML:
9055bb6a25fSPoul-Henning Kamp case BT_MALFORM:
9065bb6a25fSPoul-Henning Kamp case BT_TRAIL:
9075bb6a25fSPoul-Henning Kamp case BT_CR:
9085bb6a25fSPoul-Henning Kamp case BT_LF:
9095bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9105bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
9115bb6a25fSPoul-Henning Kamp default:
9125bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
9135bb6a25fSPoul-Henning Kamp break;
9145bb6a25fSPoul-Henning Kamp }
9155bb6a25fSPoul-Henning Kamp }
9165bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9175bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
9185bb6a25fSPoul-Henning Kamp }
9195bb6a25fSPoul-Henning Kamp
9205bb6a25fSPoul-Henning Kamp /* ptr points to character following "%" */
9215bb6a25fSPoul-Henning Kamp
922220ed979SColeman Kane static int PTRCALL
PREFIX(scanPercent)9235bb6a25fSPoul-Henning Kamp PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
9246b2c1e49SXin LI const char **nextTokPtr) {
925be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
9265bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
9275bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
9286b2c1e49SXin LI case BT_S:
9296b2c1e49SXin LI case BT_LF:
9306b2c1e49SXin LI case BT_CR:
9316b2c1e49SXin LI case BT_PERCNT:
9325bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9335bb6a25fSPoul-Henning Kamp return XML_TOK_PERCENT;
9345bb6a25fSPoul-Henning Kamp default:
9355bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9365bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
9375bb6a25fSPoul-Henning Kamp }
938be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
9395bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
9405bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
9415bb6a25fSPoul-Henning Kamp case BT_SEMI:
9425bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
9435bb6a25fSPoul-Henning Kamp return XML_TOK_PARAM_ENTITY_REF;
9445bb6a25fSPoul-Henning Kamp default:
9455bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9465bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
9475bb6a25fSPoul-Henning Kamp }
9485bb6a25fSPoul-Henning Kamp }
9495bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
9505bb6a25fSPoul-Henning Kamp }
9515bb6a25fSPoul-Henning Kamp
952220ed979SColeman Kane static int PTRCALL
PREFIX(scanPoundName)9535bb6a25fSPoul-Henning Kamp PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
9546b2c1e49SXin LI const char **nextTokPtr) {
955be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
9565bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
9575bb6a25fSPoul-Henning Kamp CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
9585bb6a25fSPoul-Henning Kamp default:
9595bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9605bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
9615bb6a25fSPoul-Henning Kamp }
962be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
9635bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
9645bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
9656b2c1e49SXin LI case BT_CR:
9666b2c1e49SXin LI case BT_LF:
9676b2c1e49SXin LI case BT_S:
9686b2c1e49SXin LI case BT_RPAR:
9696b2c1e49SXin LI case BT_GT:
9706b2c1e49SXin LI case BT_PERCNT:
9716b2c1e49SXin LI case BT_VERBAR:
9725bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9735bb6a25fSPoul-Henning Kamp return XML_TOK_POUND_NAME;
9745bb6a25fSPoul-Henning Kamp default:
9755bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9765bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
9775bb6a25fSPoul-Henning Kamp }
9785bb6a25fSPoul-Henning Kamp }
9795bb6a25fSPoul-Henning Kamp return -XML_TOK_POUND_NAME;
9805bb6a25fSPoul-Henning Kamp }
9815bb6a25fSPoul-Henning Kamp
982220ed979SColeman Kane static int PTRCALL
PREFIX(scanLit)9836b2c1e49SXin LI PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end,
9846b2c1e49SXin LI const char **nextTokPtr) {
985be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
9865bb6a25fSPoul-Henning Kamp int t = BYTE_TYPE(enc, ptr);
9875bb6a25fSPoul-Henning Kamp switch (t) {
9885bb6a25fSPoul-Henning Kamp INVALID_CASES(ptr, nextTokPtr)
9895bb6a25fSPoul-Henning Kamp case BT_QUOT:
9905bb6a25fSPoul-Henning Kamp case BT_APOS:
9915bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
9925bb6a25fSPoul-Henning Kamp if (t != open)
9935bb6a25fSPoul-Henning Kamp break;
994be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
9955bb6a25fSPoul-Henning Kamp return -XML_TOK_LITERAL;
9965bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
9975bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
9986b2c1e49SXin LI case BT_S:
9996b2c1e49SXin LI case BT_CR:
10006b2c1e49SXin LI case BT_LF:
10016b2c1e49SXin LI case BT_GT:
10026b2c1e49SXin LI case BT_PERCNT:
10036b2c1e49SXin LI case BT_LSQB:
10045bb6a25fSPoul-Henning Kamp return XML_TOK_LITERAL;
10055bb6a25fSPoul-Henning Kamp default:
10065bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
10075bb6a25fSPoul-Henning Kamp }
10085bb6a25fSPoul-Henning Kamp default:
10095bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
10105bb6a25fSPoul-Henning Kamp break;
10115bb6a25fSPoul-Henning Kamp }
10125bb6a25fSPoul-Henning Kamp }
10135bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
10145bb6a25fSPoul-Henning Kamp }
10155bb6a25fSPoul-Henning Kamp
1016220ed979SColeman Kane static int PTRCALL
PREFIX(prologTok)10175bb6a25fSPoul-Henning Kamp PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
10186b2c1e49SXin LI const char **nextTokPtr) {
10195bb6a25fSPoul-Henning Kamp int tok;
1020be8aff81SXin LI if (ptr >= end)
10215bb6a25fSPoul-Henning Kamp return XML_TOK_NONE;
10225bb6a25fSPoul-Henning Kamp if (MINBPC(enc) > 1) {
10235bb6a25fSPoul-Henning Kamp size_t n = end - ptr;
10245bb6a25fSPoul-Henning Kamp if (n & (MINBPC(enc) - 1)) {
10255bb6a25fSPoul-Henning Kamp n &= ~(MINBPC(enc) - 1);
10265bb6a25fSPoul-Henning Kamp if (n == 0)
10275bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
10285bb6a25fSPoul-Henning Kamp end = ptr + n;
10295bb6a25fSPoul-Henning Kamp }
10305bb6a25fSPoul-Henning Kamp }
10315bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
10325bb6a25fSPoul-Henning Kamp case BT_QUOT:
10335bb6a25fSPoul-Henning Kamp return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
10345bb6a25fSPoul-Henning Kamp case BT_APOS:
10355bb6a25fSPoul-Henning Kamp return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
10366b2c1e49SXin LI case BT_LT: {
10375bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1038be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
10395bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
10405bb6a25fSPoul-Henning Kamp case BT_EXCL:
10415bb6a25fSPoul-Henning Kamp return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
10425bb6a25fSPoul-Henning Kamp case BT_QUEST:
10435bb6a25fSPoul-Henning Kamp return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
10445bb6a25fSPoul-Henning Kamp case BT_NMSTRT:
10455bb6a25fSPoul-Henning Kamp case BT_HEX:
10465bb6a25fSPoul-Henning Kamp case BT_NONASCII:
10475bb6a25fSPoul-Henning Kamp case BT_LEAD2:
10485bb6a25fSPoul-Henning Kamp case BT_LEAD3:
10495bb6a25fSPoul-Henning Kamp case BT_LEAD4:
10505bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr - MINBPC(enc);
10515bb6a25fSPoul-Henning Kamp return XML_TOK_INSTANCE_START;
10525bb6a25fSPoul-Henning Kamp }
10535bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
10545bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
10555bb6a25fSPoul-Henning Kamp }
10565bb6a25fSPoul-Henning Kamp case BT_CR:
10575bb6a25fSPoul-Henning Kamp if (ptr + MINBPC(enc) == end) {
10585bb6a25fSPoul-Henning Kamp *nextTokPtr = end;
10595bb6a25fSPoul-Henning Kamp /* indicate that this might be part of a CR/LF pair */
10605bb6a25fSPoul-Henning Kamp return -XML_TOK_PROLOG_S;
10615bb6a25fSPoul-Henning Kamp }
10625bb6a25fSPoul-Henning Kamp /* fall through */
10636b2c1e49SXin LI case BT_S:
10646b2c1e49SXin LI case BT_LF:
10655bb6a25fSPoul-Henning Kamp for (;;) {
10665bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1067be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
10685bb6a25fSPoul-Henning Kamp break;
10695bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
10706b2c1e49SXin LI case BT_S:
10716b2c1e49SXin LI case BT_LF:
10725bb6a25fSPoul-Henning Kamp break;
10735bb6a25fSPoul-Henning Kamp case BT_CR:
10745bb6a25fSPoul-Henning Kamp /* don't split CR/LF pair */
10755bb6a25fSPoul-Henning Kamp if (ptr + MINBPC(enc) != end)
10765bb6a25fSPoul-Henning Kamp break;
10775bb6a25fSPoul-Henning Kamp /* fall through */
10785bb6a25fSPoul-Henning Kamp default:
10795bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
10805bb6a25fSPoul-Henning Kamp return XML_TOK_PROLOG_S;
10815bb6a25fSPoul-Henning Kamp }
10825bb6a25fSPoul-Henning Kamp }
10835bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
10845bb6a25fSPoul-Henning Kamp return XML_TOK_PROLOG_S;
10855bb6a25fSPoul-Henning Kamp case BT_PERCNT:
10865bb6a25fSPoul-Henning Kamp return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
10875bb6a25fSPoul-Henning Kamp case BT_COMMA:
10885bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
10895bb6a25fSPoul-Henning Kamp return XML_TOK_COMMA;
10905bb6a25fSPoul-Henning Kamp case BT_LSQB:
10915bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
10925bb6a25fSPoul-Henning Kamp return XML_TOK_OPEN_BRACKET;
10935bb6a25fSPoul-Henning Kamp case BT_RSQB:
10945bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1095be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
10965bb6a25fSPoul-Henning Kamp return -XML_TOK_CLOSE_BRACKET;
10975bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098be8aff81SXin LI REQUIRE_CHARS(enc, ptr, end, 2);
10995bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
11005bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + 2 * MINBPC(enc);
11015bb6a25fSPoul-Henning Kamp return XML_TOK_COND_SECT_CLOSE;
11025bb6a25fSPoul-Henning Kamp }
11035bb6a25fSPoul-Henning Kamp }
11045bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
11055bb6a25fSPoul-Henning Kamp return XML_TOK_CLOSE_BRACKET;
11065bb6a25fSPoul-Henning Kamp case BT_LPAR:
11075bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
11085bb6a25fSPoul-Henning Kamp return XML_TOK_OPEN_PAREN;
11095bb6a25fSPoul-Henning Kamp case BT_RPAR:
11105bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1111be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
11125bb6a25fSPoul-Henning Kamp return -XML_TOK_CLOSE_PAREN;
11135bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
11145bb6a25fSPoul-Henning Kamp case BT_AST:
11155bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
11165bb6a25fSPoul-Henning Kamp return XML_TOK_CLOSE_PAREN_ASTERISK;
11175bb6a25fSPoul-Henning Kamp case BT_QUEST:
11185bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
11195bb6a25fSPoul-Henning Kamp return XML_TOK_CLOSE_PAREN_QUESTION;
11205bb6a25fSPoul-Henning Kamp case BT_PLUS:
11215bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
11225bb6a25fSPoul-Henning Kamp return XML_TOK_CLOSE_PAREN_PLUS;
11236b2c1e49SXin LI case BT_CR:
11246b2c1e49SXin LI case BT_LF:
11256b2c1e49SXin LI case BT_S:
11266b2c1e49SXin LI case BT_GT:
11276b2c1e49SXin LI case BT_COMMA:
11286b2c1e49SXin LI case BT_VERBAR:
11295bb6a25fSPoul-Henning Kamp case BT_RPAR:
11305bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
11315bb6a25fSPoul-Henning Kamp return XML_TOK_CLOSE_PAREN;
11325bb6a25fSPoul-Henning Kamp }
11335bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
11345bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
11355bb6a25fSPoul-Henning Kamp case BT_VERBAR:
11365bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
11375bb6a25fSPoul-Henning Kamp return XML_TOK_OR;
11385bb6a25fSPoul-Henning Kamp case BT_GT:
11395bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
11405bb6a25fSPoul-Henning Kamp return XML_TOK_DECL_CLOSE;
11415bb6a25fSPoul-Henning Kamp case BT_NUM:
11425bb6a25fSPoul-Henning Kamp return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
11435bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
11445bb6a25fSPoul-Henning Kamp case BT_LEAD##n: \
11455bb6a25fSPoul-Henning Kamp if (end - ptr < n) \
11465bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL_CHAR; \
1147ac69e5d4SEric van Gyzen if (IS_INVALID_CHAR(enc, ptr, n)) { \
1148ac69e5d4SEric van Gyzen *nextTokPtr = ptr; \
1149ac69e5d4SEric van Gyzen return XML_TOK_INVALID; \
1150ac69e5d4SEric van Gyzen } \
11515bb6a25fSPoul-Henning Kamp if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
11525bb6a25fSPoul-Henning Kamp ptr += n; \
11535bb6a25fSPoul-Henning Kamp tok = XML_TOK_NAME; \
11545bb6a25fSPoul-Henning Kamp break; \
11555bb6a25fSPoul-Henning Kamp } \
11565bb6a25fSPoul-Henning Kamp if (IS_NAME_CHAR(enc, ptr, n)) { \
11575bb6a25fSPoul-Henning Kamp ptr += n; \
11585bb6a25fSPoul-Henning Kamp tok = XML_TOK_NMTOKEN; \
11595bb6a25fSPoul-Henning Kamp break; \
11605bb6a25fSPoul-Henning Kamp } \
11615bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr; \
11625bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
11636b2c1e49SXin LI LEAD_CASE(2)
11646b2c1e49SXin LI LEAD_CASE(3)
11656b2c1e49SXin LI LEAD_CASE(4)
11665bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
11675bb6a25fSPoul-Henning Kamp case BT_NMSTRT:
11685bb6a25fSPoul-Henning Kamp case BT_HEX:
11695bb6a25fSPoul-Henning Kamp tok = XML_TOK_NAME;
11705bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
11715bb6a25fSPoul-Henning Kamp break;
11725bb6a25fSPoul-Henning Kamp case BT_DIGIT:
11735bb6a25fSPoul-Henning Kamp case BT_NAME:
11745bb6a25fSPoul-Henning Kamp case BT_MINUS:
11755bb6a25fSPoul-Henning Kamp # ifdef XML_NS
11765bb6a25fSPoul-Henning Kamp case BT_COLON:
11775bb6a25fSPoul-Henning Kamp # endif
11785bb6a25fSPoul-Henning Kamp tok = XML_TOK_NMTOKEN;
11795bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
11805bb6a25fSPoul-Henning Kamp break;
11815bb6a25fSPoul-Henning Kamp case BT_NONASCII:
11825bb6a25fSPoul-Henning Kamp if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
11835bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
11845bb6a25fSPoul-Henning Kamp tok = XML_TOK_NAME;
11855bb6a25fSPoul-Henning Kamp break;
11865bb6a25fSPoul-Henning Kamp }
11875bb6a25fSPoul-Henning Kamp if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
11885bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
11895bb6a25fSPoul-Henning Kamp tok = XML_TOK_NMTOKEN;
11905bb6a25fSPoul-Henning Kamp break;
11915bb6a25fSPoul-Henning Kamp }
11925bb6a25fSPoul-Henning Kamp /* fall through */
11935bb6a25fSPoul-Henning Kamp default:
11945bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
11955bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
11965bb6a25fSPoul-Henning Kamp }
1197be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
11985bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
11995bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
12006b2c1e49SXin LI case BT_GT:
12016b2c1e49SXin LI case BT_RPAR:
12026b2c1e49SXin LI case BT_COMMA:
12036b2c1e49SXin LI case BT_VERBAR:
12046b2c1e49SXin LI case BT_LSQB:
12056b2c1e49SXin LI case BT_PERCNT:
12066b2c1e49SXin LI case BT_S:
12076b2c1e49SXin LI case BT_CR:
12086b2c1e49SXin LI case BT_LF:
12095bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
12105bb6a25fSPoul-Henning Kamp return tok;
12115bb6a25fSPoul-Henning Kamp # ifdef XML_NS
12125bb6a25fSPoul-Henning Kamp case BT_COLON:
12135bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
12145bb6a25fSPoul-Henning Kamp switch (tok) {
12155bb6a25fSPoul-Henning Kamp case XML_TOK_NAME:
1216be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
12175bb6a25fSPoul-Henning Kamp tok = XML_TOK_PREFIXED_NAME;
12185bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
12195bb6a25fSPoul-Henning Kamp CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
12205bb6a25fSPoul-Henning Kamp default:
12215bb6a25fSPoul-Henning Kamp tok = XML_TOK_NMTOKEN;
12225bb6a25fSPoul-Henning Kamp break;
12235bb6a25fSPoul-Henning Kamp }
12245bb6a25fSPoul-Henning Kamp break;
12255bb6a25fSPoul-Henning Kamp case XML_TOK_PREFIXED_NAME:
12265bb6a25fSPoul-Henning Kamp tok = XML_TOK_NMTOKEN;
12275bb6a25fSPoul-Henning Kamp break;
12285bb6a25fSPoul-Henning Kamp }
12295bb6a25fSPoul-Henning Kamp break;
12305bb6a25fSPoul-Henning Kamp # endif
12315bb6a25fSPoul-Henning Kamp case BT_PLUS:
12325bb6a25fSPoul-Henning Kamp if (tok == XML_TOK_NMTOKEN) {
12335bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
12345bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
12355bb6a25fSPoul-Henning Kamp }
12365bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
12375bb6a25fSPoul-Henning Kamp return XML_TOK_NAME_PLUS;
12385bb6a25fSPoul-Henning Kamp case BT_AST:
12395bb6a25fSPoul-Henning Kamp if (tok == XML_TOK_NMTOKEN) {
12405bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
12415bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
12425bb6a25fSPoul-Henning Kamp }
12435bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
12445bb6a25fSPoul-Henning Kamp return XML_TOK_NAME_ASTERISK;
12455bb6a25fSPoul-Henning Kamp case BT_QUEST:
12465bb6a25fSPoul-Henning Kamp if (tok == XML_TOK_NMTOKEN) {
12475bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
12485bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
12495bb6a25fSPoul-Henning Kamp }
12505bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
12515bb6a25fSPoul-Henning Kamp return XML_TOK_NAME_QUESTION;
12525bb6a25fSPoul-Henning Kamp default:
12535bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
12545bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
12555bb6a25fSPoul-Henning Kamp }
12565bb6a25fSPoul-Henning Kamp }
12575bb6a25fSPoul-Henning Kamp return -tok;
12585bb6a25fSPoul-Henning Kamp }
12595bb6a25fSPoul-Henning Kamp
1260220ed979SColeman Kane static int PTRCALL
PREFIX(attributeValueTok)12616b2c1e49SXin LI PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
12626b2c1e49SXin LI const char **nextTokPtr) {
12635bb6a25fSPoul-Henning Kamp const char *start;
1264be8aff81SXin LI if (ptr >= end)
12655bb6a25fSPoul-Henning Kamp return XML_TOK_NONE;
12660a48773fSEric van Gyzen else if (! HAS_CHAR(enc, ptr, end)) {
12670a48773fSEric van Gyzen /* This line cannot be executed. The incoming data has already
12680a48773fSEric van Gyzen * been tokenized once, so incomplete characters like this have
12690a48773fSEric van Gyzen * already been eliminated from the input. Retaining the paranoia
12700a48773fSEric van Gyzen * check is still valuable, however.
12710a48773fSEric van Gyzen */
12720a48773fSEric van Gyzen return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
12730a48773fSEric van Gyzen }
12745bb6a25fSPoul-Henning Kamp start = ptr;
1275be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
12765bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
12775bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
12786b2c1e49SXin LI case BT_LEAD##n: \
1279ac69e5d4SEric van Gyzen ptr += n; /* NOTE: The encoding has already been validated. */ \
12806b2c1e49SXin LI break;
12816b2c1e49SXin LI LEAD_CASE(2)
12826b2c1e49SXin LI LEAD_CASE(3)
12836b2c1e49SXin LI LEAD_CASE(4)
12845bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
12855bb6a25fSPoul-Henning Kamp case BT_AMP:
12865bb6a25fSPoul-Henning Kamp if (ptr == start)
12875bb6a25fSPoul-Henning Kamp return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
12885bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
12895bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
12905bb6a25fSPoul-Henning Kamp case BT_LT:
12915bb6a25fSPoul-Henning Kamp /* this is for inside entity references */
12925bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
12935bb6a25fSPoul-Henning Kamp return XML_TOK_INVALID;
12945bb6a25fSPoul-Henning Kamp case BT_LF:
12955bb6a25fSPoul-Henning Kamp if (ptr == start) {
12965bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
12975bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
12985bb6a25fSPoul-Henning Kamp }
12995bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13005bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13015bb6a25fSPoul-Henning Kamp case BT_CR:
13025bb6a25fSPoul-Henning Kamp if (ptr == start) {
13035bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1304be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
13055bb6a25fSPoul-Henning Kamp return XML_TOK_TRAILING_CR;
13065bb6a25fSPoul-Henning Kamp if (BYTE_TYPE(enc, ptr) == BT_LF)
13075bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
13085bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13095bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
13105bb6a25fSPoul-Henning Kamp }
13115bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13125bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13135bb6a25fSPoul-Henning Kamp case BT_S:
13145bb6a25fSPoul-Henning Kamp if (ptr == start) {
13155bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
13165bb6a25fSPoul-Henning Kamp return XML_TOK_ATTRIBUTE_VALUE_S;
13175bb6a25fSPoul-Henning Kamp }
13185bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13195bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13205bb6a25fSPoul-Henning Kamp default:
13215bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
13225bb6a25fSPoul-Henning Kamp break;
13235bb6a25fSPoul-Henning Kamp }
13245bb6a25fSPoul-Henning Kamp }
13255bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13265bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13275bb6a25fSPoul-Henning Kamp }
13285bb6a25fSPoul-Henning Kamp
1329220ed979SColeman Kane static int PTRCALL
PREFIX(entityValueTok)13306b2c1e49SXin LI PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
13316b2c1e49SXin LI const char **nextTokPtr) {
13325bb6a25fSPoul-Henning Kamp const char *start;
1333be8aff81SXin LI if (ptr >= end)
13345bb6a25fSPoul-Henning Kamp return XML_TOK_NONE;
13350a48773fSEric van Gyzen else if (! HAS_CHAR(enc, ptr, end)) {
13360a48773fSEric van Gyzen /* This line cannot be executed. The incoming data has already
13370a48773fSEric van Gyzen * been tokenized once, so incomplete characters like this have
13380a48773fSEric van Gyzen * already been eliminated from the input. Retaining the paranoia
13390a48773fSEric van Gyzen * check is still valuable, however.
13400a48773fSEric van Gyzen */
13410a48773fSEric van Gyzen return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
13420a48773fSEric van Gyzen }
13435bb6a25fSPoul-Henning Kamp start = ptr;
1344be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
13455bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
13465bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
13476b2c1e49SXin LI case BT_LEAD##n: \
1348ac69e5d4SEric van Gyzen ptr += n; /* NOTE: The encoding has already been validated. */ \
13496b2c1e49SXin LI break;
13506b2c1e49SXin LI LEAD_CASE(2)
13516b2c1e49SXin LI LEAD_CASE(3)
13526b2c1e49SXin LI LEAD_CASE(4)
13535bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
13545bb6a25fSPoul-Henning Kamp case BT_AMP:
13555bb6a25fSPoul-Henning Kamp if (ptr == start)
13565bb6a25fSPoul-Henning Kamp return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
13575bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13585bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13595bb6a25fSPoul-Henning Kamp case BT_PERCNT:
13605bb6a25fSPoul-Henning Kamp if (ptr == start) {
13616b2c1e49SXin LI int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
13625bb6a25fSPoul-Henning Kamp return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
13635bb6a25fSPoul-Henning Kamp }
13645bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13655bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13665bb6a25fSPoul-Henning Kamp case BT_LF:
13675bb6a25fSPoul-Henning Kamp if (ptr == start) {
13685bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr + MINBPC(enc);
13695bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
13705bb6a25fSPoul-Henning Kamp }
13715bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13725bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13735bb6a25fSPoul-Henning Kamp case BT_CR:
13745bb6a25fSPoul-Henning Kamp if (ptr == start) {
13755bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1376be8aff81SXin LI if (! HAS_CHAR(enc, ptr, end))
13775bb6a25fSPoul-Henning Kamp return XML_TOK_TRAILING_CR;
13785bb6a25fSPoul-Henning Kamp if (BYTE_TYPE(enc, ptr) == BT_LF)
13795bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
13805bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13815bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_NEWLINE;
13825bb6a25fSPoul-Henning Kamp }
13835bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13845bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13855bb6a25fSPoul-Henning Kamp default:
13865bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
13875bb6a25fSPoul-Henning Kamp break;
13885bb6a25fSPoul-Henning Kamp }
13895bb6a25fSPoul-Henning Kamp }
13905bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
13915bb6a25fSPoul-Henning Kamp return XML_TOK_DATA_CHARS;
13925bb6a25fSPoul-Henning Kamp }
13935bb6a25fSPoul-Henning Kamp
13945bb6a25fSPoul-Henning Kamp # ifdef XML_DTD
13955bb6a25fSPoul-Henning Kamp
1396220ed979SColeman Kane static int PTRCALL
PREFIX(ignoreSectionTok)13976b2c1e49SXin LI PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
13986b2c1e49SXin LI const char **nextTokPtr) {
13995bb6a25fSPoul-Henning Kamp int level = 0;
14005bb6a25fSPoul-Henning Kamp if (MINBPC(enc) > 1) {
14015bb6a25fSPoul-Henning Kamp size_t n = end - ptr;
14025bb6a25fSPoul-Henning Kamp if (n & (MINBPC(enc) - 1)) {
14035bb6a25fSPoul-Henning Kamp n &= ~(MINBPC(enc) - 1);
14045bb6a25fSPoul-Henning Kamp end = ptr + n;
14055bb6a25fSPoul-Henning Kamp }
14065bb6a25fSPoul-Henning Kamp }
1407be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
14085bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
14095bb6a25fSPoul-Henning Kamp INVALID_CASES(ptr, nextTokPtr)
14105bb6a25fSPoul-Henning Kamp case BT_LT:
1411be8aff81SXin LI ptr += MINBPC(enc);
1412be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
14135bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1414be8aff81SXin LI ptr += MINBPC(enc);
1415be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
14165bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
14175bb6a25fSPoul-Henning Kamp ++level;
14185bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
14195bb6a25fSPoul-Henning Kamp }
14205bb6a25fSPoul-Henning Kamp }
14215bb6a25fSPoul-Henning Kamp break;
14225bb6a25fSPoul-Henning Kamp case BT_RSQB:
1423be8aff81SXin LI ptr += MINBPC(enc);
1424be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
14255bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1426be8aff81SXin LI ptr += MINBPC(enc);
1427be8aff81SXin LI REQUIRE_CHAR(enc, ptr, end);
14285bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
14295bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
14305bb6a25fSPoul-Henning Kamp if (level == 0) {
14315bb6a25fSPoul-Henning Kamp *nextTokPtr = ptr;
14325bb6a25fSPoul-Henning Kamp return XML_TOK_IGNORE_SECT;
14335bb6a25fSPoul-Henning Kamp }
14345bb6a25fSPoul-Henning Kamp --level;
14355bb6a25fSPoul-Henning Kamp }
14365bb6a25fSPoul-Henning Kamp }
14375bb6a25fSPoul-Henning Kamp break;
14385bb6a25fSPoul-Henning Kamp default:
14395bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
14405bb6a25fSPoul-Henning Kamp break;
14415bb6a25fSPoul-Henning Kamp }
14425bb6a25fSPoul-Henning Kamp }
14435bb6a25fSPoul-Henning Kamp return XML_TOK_PARTIAL;
14445bb6a25fSPoul-Henning Kamp }
14455bb6a25fSPoul-Henning Kamp
14465bb6a25fSPoul-Henning Kamp # endif /* XML_DTD */
14475bb6a25fSPoul-Henning Kamp
1448220ed979SColeman Kane static int PTRCALL
PREFIX(isPublicId)14495bb6a25fSPoul-Henning Kamp PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
14506b2c1e49SXin LI const char **badPtr) {
14515bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
14525bb6a25fSPoul-Henning Kamp end -= MINBPC(enc);
1453be8aff81SXin LI for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
14545bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
14555bb6a25fSPoul-Henning Kamp case BT_DIGIT:
14565bb6a25fSPoul-Henning Kamp case BT_HEX:
14575bb6a25fSPoul-Henning Kamp case BT_MINUS:
14585bb6a25fSPoul-Henning Kamp case BT_APOS:
14595bb6a25fSPoul-Henning Kamp case BT_LPAR:
14605bb6a25fSPoul-Henning Kamp case BT_RPAR:
14615bb6a25fSPoul-Henning Kamp case BT_PLUS:
14625bb6a25fSPoul-Henning Kamp case BT_COMMA:
14635bb6a25fSPoul-Henning Kamp case BT_SOL:
14645bb6a25fSPoul-Henning Kamp case BT_EQUALS:
14655bb6a25fSPoul-Henning Kamp case BT_QUEST:
14665bb6a25fSPoul-Henning Kamp case BT_CR:
14675bb6a25fSPoul-Henning Kamp case BT_LF:
14685bb6a25fSPoul-Henning Kamp case BT_SEMI:
14695bb6a25fSPoul-Henning Kamp case BT_EXCL:
14705bb6a25fSPoul-Henning Kamp case BT_AST:
14715bb6a25fSPoul-Henning Kamp case BT_PERCNT:
14725bb6a25fSPoul-Henning Kamp case BT_NUM:
14735bb6a25fSPoul-Henning Kamp # ifdef XML_NS
14745bb6a25fSPoul-Henning Kamp case BT_COLON:
14755bb6a25fSPoul-Henning Kamp # endif
14765bb6a25fSPoul-Henning Kamp break;
14775bb6a25fSPoul-Henning Kamp case BT_S:
14785bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
14795bb6a25fSPoul-Henning Kamp *badPtr = ptr;
14805bb6a25fSPoul-Henning Kamp return 0;
14815bb6a25fSPoul-Henning Kamp }
14825bb6a25fSPoul-Henning Kamp break;
14835bb6a25fSPoul-Henning Kamp case BT_NAME:
14845bb6a25fSPoul-Henning Kamp case BT_NMSTRT:
14855bb6a25fSPoul-Henning Kamp if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
14865bb6a25fSPoul-Henning Kamp break;
14870a48773fSEric van Gyzen /* fall through */
14885bb6a25fSPoul-Henning Kamp default:
14895bb6a25fSPoul-Henning Kamp switch (BYTE_TO_ASCII(enc, ptr)) {
14905bb6a25fSPoul-Henning Kamp case 0x24: /* $ */
14915bb6a25fSPoul-Henning Kamp case 0x40: /* @ */
14925bb6a25fSPoul-Henning Kamp break;
14935bb6a25fSPoul-Henning Kamp default:
14945bb6a25fSPoul-Henning Kamp *badPtr = ptr;
14955bb6a25fSPoul-Henning Kamp return 0;
14965bb6a25fSPoul-Henning Kamp }
14975bb6a25fSPoul-Henning Kamp break;
14985bb6a25fSPoul-Henning Kamp }
14995bb6a25fSPoul-Henning Kamp }
15005bb6a25fSPoul-Henning Kamp return 1;
15015bb6a25fSPoul-Henning Kamp }
15025bb6a25fSPoul-Henning Kamp
15035bb6a25fSPoul-Henning Kamp /* This must only be called for a well-formed start-tag or empty
15045bb6a25fSPoul-Henning Kamp element tag. Returns the number of attributes. Pointers to the
15055bb6a25fSPoul-Henning Kamp first attsMax attributes are stored in atts.
15065bb6a25fSPoul-Henning Kamp */
15075bb6a25fSPoul-Henning Kamp
1508220ed979SColeman Kane static int PTRCALL
PREFIX(getAtts)15096b2c1e49SXin LI PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
15106b2c1e49SXin LI ATTRIBUTE *atts) {
15115bb6a25fSPoul-Henning Kamp enum { other, inName, inValue } state = inName;
15125bb6a25fSPoul-Henning Kamp int nAtts = 0;
15135bb6a25fSPoul-Henning Kamp int open = 0; /* defined when state == inValue;
15145bb6a25fSPoul-Henning Kamp initialization just to shut up compilers */
15155bb6a25fSPoul-Henning Kamp
15165bb6a25fSPoul-Henning Kamp for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
15175bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
15185bb6a25fSPoul-Henning Kamp # define START_NAME \
15195bb6a25fSPoul-Henning Kamp if (state == other) { \
15205bb6a25fSPoul-Henning Kamp if (nAtts < attsMax) { \
15215bb6a25fSPoul-Henning Kamp atts[nAtts].name = ptr; \
15225bb6a25fSPoul-Henning Kamp atts[nAtts].normalized = 1; \
15235bb6a25fSPoul-Henning Kamp } \
15245bb6a25fSPoul-Henning Kamp state = inName; \
15255bb6a25fSPoul-Henning Kamp }
15265bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
1527ac69e5d4SEric van Gyzen case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \
15286b2c1e49SXin LI START_NAME ptr += (n - MINBPC(enc)); \
15296b2c1e49SXin LI break;
15306b2c1e49SXin LI LEAD_CASE(2)
15316b2c1e49SXin LI LEAD_CASE(3)
15326b2c1e49SXin LI LEAD_CASE(4)
15335bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
15345bb6a25fSPoul-Henning Kamp case BT_NONASCII:
15355bb6a25fSPoul-Henning Kamp case BT_NMSTRT:
15365bb6a25fSPoul-Henning Kamp case BT_HEX:
15375bb6a25fSPoul-Henning Kamp START_NAME
15385bb6a25fSPoul-Henning Kamp break;
15395bb6a25fSPoul-Henning Kamp # undef START_NAME
15405bb6a25fSPoul-Henning Kamp case BT_QUOT:
15415bb6a25fSPoul-Henning Kamp if (state != inValue) {
15425bb6a25fSPoul-Henning Kamp if (nAtts < attsMax)
15435bb6a25fSPoul-Henning Kamp atts[nAtts].valuePtr = ptr + MINBPC(enc);
15445bb6a25fSPoul-Henning Kamp state = inValue;
15455bb6a25fSPoul-Henning Kamp open = BT_QUOT;
15466b2c1e49SXin LI } else if (open == BT_QUOT) {
15475bb6a25fSPoul-Henning Kamp state = other;
15485bb6a25fSPoul-Henning Kamp if (nAtts < attsMax)
15495bb6a25fSPoul-Henning Kamp atts[nAtts].valueEnd = ptr;
15505bb6a25fSPoul-Henning Kamp nAtts++;
15515bb6a25fSPoul-Henning Kamp }
15525bb6a25fSPoul-Henning Kamp break;
15535bb6a25fSPoul-Henning Kamp case BT_APOS:
15545bb6a25fSPoul-Henning Kamp if (state != inValue) {
15555bb6a25fSPoul-Henning Kamp if (nAtts < attsMax)
15565bb6a25fSPoul-Henning Kamp atts[nAtts].valuePtr = ptr + MINBPC(enc);
15575bb6a25fSPoul-Henning Kamp state = inValue;
15585bb6a25fSPoul-Henning Kamp open = BT_APOS;
15596b2c1e49SXin LI } else if (open == BT_APOS) {
15605bb6a25fSPoul-Henning Kamp state = other;
15615bb6a25fSPoul-Henning Kamp if (nAtts < attsMax)
15625bb6a25fSPoul-Henning Kamp atts[nAtts].valueEnd = ptr;
15635bb6a25fSPoul-Henning Kamp nAtts++;
15645bb6a25fSPoul-Henning Kamp }
15655bb6a25fSPoul-Henning Kamp break;
15665bb6a25fSPoul-Henning Kamp case BT_AMP:
15675bb6a25fSPoul-Henning Kamp if (nAtts < attsMax)
15685bb6a25fSPoul-Henning Kamp atts[nAtts].normalized = 0;
15695bb6a25fSPoul-Henning Kamp break;
15705bb6a25fSPoul-Henning Kamp case BT_S:
15715bb6a25fSPoul-Henning Kamp if (state == inName)
15725bb6a25fSPoul-Henning Kamp state = other;
15736b2c1e49SXin LI else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
15745bb6a25fSPoul-Henning Kamp && (ptr == atts[nAtts].valuePtr
15755bb6a25fSPoul-Henning Kamp || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
15765bb6a25fSPoul-Henning Kamp || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
15775bb6a25fSPoul-Henning Kamp || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
15785bb6a25fSPoul-Henning Kamp atts[nAtts].normalized = 0;
15795bb6a25fSPoul-Henning Kamp break;
15806b2c1e49SXin LI case BT_CR:
15816b2c1e49SXin LI case BT_LF:
15825bb6a25fSPoul-Henning Kamp /* This case ensures that the first attribute name is counted
15835bb6a25fSPoul-Henning Kamp Apart from that we could just change state on the quote. */
15845bb6a25fSPoul-Henning Kamp if (state == inName)
15855bb6a25fSPoul-Henning Kamp state = other;
15865bb6a25fSPoul-Henning Kamp else if (state == inValue && nAtts < attsMax)
15875bb6a25fSPoul-Henning Kamp atts[nAtts].normalized = 0;
15885bb6a25fSPoul-Henning Kamp break;
15895bb6a25fSPoul-Henning Kamp case BT_GT:
15905bb6a25fSPoul-Henning Kamp case BT_SOL:
15915bb6a25fSPoul-Henning Kamp if (state != inValue)
15925bb6a25fSPoul-Henning Kamp return nAtts;
15935bb6a25fSPoul-Henning Kamp break;
15945bb6a25fSPoul-Henning Kamp default:
15955bb6a25fSPoul-Henning Kamp break;
15965bb6a25fSPoul-Henning Kamp }
15975bb6a25fSPoul-Henning Kamp }
15985bb6a25fSPoul-Henning Kamp /* not reached */
15995bb6a25fSPoul-Henning Kamp }
16005bb6a25fSPoul-Henning Kamp
1601220ed979SColeman Kane static int PTRFASTCALL
PREFIX(charRefNumber)16026b2c1e49SXin LI PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
16035bb6a25fSPoul-Henning Kamp int result = 0;
16045bb6a25fSPoul-Henning Kamp /* skip &# */
16056b2c1e49SXin LI UNUSED_P(enc);
16065bb6a25fSPoul-Henning Kamp ptr += 2 * MINBPC(enc);
16075bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
16086b2c1e49SXin LI for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
16095bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc)) {
16105bb6a25fSPoul-Henning Kamp int c = BYTE_TO_ASCII(enc, ptr);
16115bb6a25fSPoul-Henning Kamp switch (c) {
16126b2c1e49SXin LI case ASCII_0:
16136b2c1e49SXin LI case ASCII_1:
16146b2c1e49SXin LI case ASCII_2:
16156b2c1e49SXin LI case ASCII_3:
16166b2c1e49SXin LI case ASCII_4:
16176b2c1e49SXin LI case ASCII_5:
16186b2c1e49SXin LI case ASCII_6:
16196b2c1e49SXin LI case ASCII_7:
16206b2c1e49SXin LI case ASCII_8:
16216b2c1e49SXin LI case ASCII_9:
16225bb6a25fSPoul-Henning Kamp result <<= 4;
16235bb6a25fSPoul-Henning Kamp result |= (c - ASCII_0);
16245bb6a25fSPoul-Henning Kamp break;
16256b2c1e49SXin LI case ASCII_A:
16266b2c1e49SXin LI case ASCII_B:
16276b2c1e49SXin LI case ASCII_C:
16286b2c1e49SXin LI case ASCII_D:
16296b2c1e49SXin LI case ASCII_E:
16306b2c1e49SXin LI case ASCII_F:
16315bb6a25fSPoul-Henning Kamp result <<= 4;
16325bb6a25fSPoul-Henning Kamp result += 10 + (c - ASCII_A);
16335bb6a25fSPoul-Henning Kamp break;
16346b2c1e49SXin LI case ASCII_a:
16356b2c1e49SXin LI case ASCII_b:
16366b2c1e49SXin LI case ASCII_c:
16376b2c1e49SXin LI case ASCII_d:
16386b2c1e49SXin LI case ASCII_e:
16396b2c1e49SXin LI case ASCII_f:
16405bb6a25fSPoul-Henning Kamp result <<= 4;
16415bb6a25fSPoul-Henning Kamp result += 10 + (c - ASCII_a);
16425bb6a25fSPoul-Henning Kamp break;
16435bb6a25fSPoul-Henning Kamp }
16445bb6a25fSPoul-Henning Kamp if (result >= 0x110000)
16455bb6a25fSPoul-Henning Kamp return -1;
16465bb6a25fSPoul-Henning Kamp }
16476b2c1e49SXin LI } else {
16485bb6a25fSPoul-Henning Kamp for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
16495bb6a25fSPoul-Henning Kamp int c = BYTE_TO_ASCII(enc, ptr);
16505bb6a25fSPoul-Henning Kamp result *= 10;
16515bb6a25fSPoul-Henning Kamp result += (c - ASCII_0);
16525bb6a25fSPoul-Henning Kamp if (result >= 0x110000)
16535bb6a25fSPoul-Henning Kamp return -1;
16545bb6a25fSPoul-Henning Kamp }
16555bb6a25fSPoul-Henning Kamp }
16565bb6a25fSPoul-Henning Kamp return checkCharRefNumber(result);
16575bb6a25fSPoul-Henning Kamp }
16585bb6a25fSPoul-Henning Kamp
1659220ed979SColeman Kane static int PTRCALL
PREFIX(predefinedEntityName)16606b2c1e49SXin LI PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
16616b2c1e49SXin LI const char *end) {
16626b2c1e49SXin LI UNUSED_P(enc);
16635bb6a25fSPoul-Henning Kamp switch ((end - ptr) / MINBPC(enc)) {
16645bb6a25fSPoul-Henning Kamp case 2:
16655bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
16665bb6a25fSPoul-Henning Kamp switch (BYTE_TO_ASCII(enc, ptr)) {
16675bb6a25fSPoul-Henning Kamp case ASCII_l:
16685bb6a25fSPoul-Henning Kamp return ASCII_LT;
16695bb6a25fSPoul-Henning Kamp case ASCII_g:
16705bb6a25fSPoul-Henning Kamp return ASCII_GT;
16715bb6a25fSPoul-Henning Kamp }
16725bb6a25fSPoul-Henning Kamp }
16735bb6a25fSPoul-Henning Kamp break;
16745bb6a25fSPoul-Henning Kamp case 3:
16755bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
16765bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
16775bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
16785bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
16795bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_p))
16805bb6a25fSPoul-Henning Kamp return ASCII_AMP;
16815bb6a25fSPoul-Henning Kamp }
16825bb6a25fSPoul-Henning Kamp }
16835bb6a25fSPoul-Henning Kamp break;
16845bb6a25fSPoul-Henning Kamp case 4:
16855bb6a25fSPoul-Henning Kamp switch (BYTE_TO_ASCII(enc, ptr)) {
16865bb6a25fSPoul-Henning Kamp case ASCII_q:
16875bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
16885bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
16895bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
16905bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
16915bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
16925bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_t))
16935bb6a25fSPoul-Henning Kamp return ASCII_QUOT;
16945bb6a25fSPoul-Henning Kamp }
16955bb6a25fSPoul-Henning Kamp }
16965bb6a25fSPoul-Henning Kamp break;
16975bb6a25fSPoul-Henning Kamp case ASCII_a:
16985bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
16995bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
17005bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
17015bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
17025bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
17035bb6a25fSPoul-Henning Kamp if (CHAR_MATCHES(enc, ptr, ASCII_s))
17045bb6a25fSPoul-Henning Kamp return ASCII_APOS;
17055bb6a25fSPoul-Henning Kamp }
17065bb6a25fSPoul-Henning Kamp }
17075bb6a25fSPoul-Henning Kamp break;
17085bb6a25fSPoul-Henning Kamp }
17095bb6a25fSPoul-Henning Kamp }
17105bb6a25fSPoul-Henning Kamp return 0;
17115bb6a25fSPoul-Henning Kamp }
17125bb6a25fSPoul-Henning Kamp
1713220ed979SColeman Kane static int PTRCALL
PREFIX(nameMatchesAscii)17146b2c1e49SXin LI PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
17156b2c1e49SXin LI const char *end1, const char *ptr2) {
17166b2c1e49SXin LI UNUSED_P(enc);
17175bb6a25fSPoul-Henning Kamp for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
17180a48773fSEric van Gyzen if (end1 - ptr1 < MINBPC(enc)) {
17190a48773fSEric van Gyzen /* This line cannot be executed. The incoming data has already
17200a48773fSEric van Gyzen * been tokenized once, so incomplete characters like this have
17210a48773fSEric van Gyzen * already been eliminated from the input. Retaining the
17220a48773fSEric van Gyzen * paranoia check is still valuable, however.
17230a48773fSEric van Gyzen */
17240a48773fSEric van Gyzen return 0; /* LCOV_EXCL_LINE */
17250a48773fSEric van Gyzen }
17265bb6a25fSPoul-Henning Kamp if (! CHAR_MATCHES(enc, ptr1, *ptr2))
17275bb6a25fSPoul-Henning Kamp return 0;
17285bb6a25fSPoul-Henning Kamp }
17295bb6a25fSPoul-Henning Kamp return ptr1 == end1;
17305bb6a25fSPoul-Henning Kamp }
17315bb6a25fSPoul-Henning Kamp
1732220ed979SColeman Kane static int PTRFASTCALL
PREFIX(nameLength)17336b2c1e49SXin LI PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
17345bb6a25fSPoul-Henning Kamp const char *start = ptr;
17355bb6a25fSPoul-Henning Kamp for (;;) {
17365bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
17375bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
17386b2c1e49SXin LI case BT_LEAD##n: \
1739ac69e5d4SEric van Gyzen ptr += n; /* NOTE: The encoding has already been validated. */ \
17406b2c1e49SXin LI break;
17416b2c1e49SXin LI LEAD_CASE(2)
17426b2c1e49SXin LI LEAD_CASE(3)
17436b2c1e49SXin LI LEAD_CASE(4)
17445bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
17455bb6a25fSPoul-Henning Kamp case BT_NONASCII:
17465bb6a25fSPoul-Henning Kamp case BT_NMSTRT:
17475bb6a25fSPoul-Henning Kamp # ifdef XML_NS
17485bb6a25fSPoul-Henning Kamp case BT_COLON:
17495bb6a25fSPoul-Henning Kamp # endif
17505bb6a25fSPoul-Henning Kamp case BT_HEX:
17515bb6a25fSPoul-Henning Kamp case BT_DIGIT:
17525bb6a25fSPoul-Henning Kamp case BT_NAME:
17535bb6a25fSPoul-Henning Kamp case BT_MINUS:
17545bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
17555bb6a25fSPoul-Henning Kamp break;
17565bb6a25fSPoul-Henning Kamp default:
1757220ed979SColeman Kane return (int)(ptr - start);
17585bb6a25fSPoul-Henning Kamp }
17595bb6a25fSPoul-Henning Kamp }
17605bb6a25fSPoul-Henning Kamp }
17615bb6a25fSPoul-Henning Kamp
1762220ed979SColeman Kane static const char *PTRFASTCALL
PREFIX(skipS)17636b2c1e49SXin LI PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
17645bb6a25fSPoul-Henning Kamp for (;;) {
17655bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
17665bb6a25fSPoul-Henning Kamp case BT_LF:
17675bb6a25fSPoul-Henning Kamp case BT_CR:
17685bb6a25fSPoul-Henning Kamp case BT_S:
17695bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
17705bb6a25fSPoul-Henning Kamp break;
17715bb6a25fSPoul-Henning Kamp default:
17725bb6a25fSPoul-Henning Kamp return ptr;
17735bb6a25fSPoul-Henning Kamp }
17745bb6a25fSPoul-Henning Kamp }
17755bb6a25fSPoul-Henning Kamp }
17765bb6a25fSPoul-Henning Kamp
1777220ed979SColeman Kane static void PTRCALL
PREFIX(updatePosition)17786b2c1e49SXin LI PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
17796b2c1e49SXin LI POSITION *pos) {
1780be8aff81SXin LI while (HAS_CHAR(enc, ptr, end)) {
17815bb6a25fSPoul-Henning Kamp switch (BYTE_TYPE(enc, ptr)) {
17825bb6a25fSPoul-Henning Kamp # define LEAD_CASE(n) \
17835bb6a25fSPoul-Henning Kamp case BT_LEAD##n: \
1784ac69e5d4SEric van Gyzen ptr += n; /* NOTE: The encoding has already been validated. */ \
1785cc68614dSXin LI pos->columnNumber++; \
17865bb6a25fSPoul-Henning Kamp break;
17876b2c1e49SXin LI LEAD_CASE(2)
17886b2c1e49SXin LI LEAD_CASE(3)
17896b2c1e49SXin LI LEAD_CASE(4)
17905bb6a25fSPoul-Henning Kamp # undef LEAD_CASE
17915bb6a25fSPoul-Henning Kamp case BT_LF:
1792cc68614dSXin LI pos->columnNumber = 0;
17935bb6a25fSPoul-Henning Kamp pos->lineNumber++;
17945bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
17955bb6a25fSPoul-Henning Kamp break;
17965bb6a25fSPoul-Henning Kamp case BT_CR:
17975bb6a25fSPoul-Henning Kamp pos->lineNumber++;
17985bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1799be8aff81SXin LI if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
18005bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1801cc68614dSXin LI pos->columnNumber = 0;
18025bb6a25fSPoul-Henning Kamp break;
18035bb6a25fSPoul-Henning Kamp default:
18045bb6a25fSPoul-Henning Kamp ptr += MINBPC(enc);
1805cc68614dSXin LI pos->columnNumber++;
18065bb6a25fSPoul-Henning Kamp break;
18075bb6a25fSPoul-Henning Kamp }
18085bb6a25fSPoul-Henning Kamp }
18095bb6a25fSPoul-Henning Kamp }
18105bb6a25fSPoul-Henning Kamp
18115bb6a25fSPoul-Henning Kamp # undef DO_LEAD_CASE
18125bb6a25fSPoul-Henning Kamp # undef MULTIBYTE_CASES
18135bb6a25fSPoul-Henning Kamp # undef INVALID_CASES
18145bb6a25fSPoul-Henning Kamp # undef CHECK_NAME_CASE
18155bb6a25fSPoul-Henning Kamp # undef CHECK_NAME_CASES
18165bb6a25fSPoul-Henning Kamp # undef CHECK_NMSTRT_CASE
18175bb6a25fSPoul-Henning Kamp # undef CHECK_NMSTRT_CASES
1818220ed979SColeman Kane
1819220ed979SColeman Kane #endif /* XML_TOK_IMPL_C */
1820