xref: /freebsd/contrib/expat/xmlwf/ct.c (revision cc68614da8232d8baaca0ae0d0dd8f890f06623e)
10a48773fSEric van Gyzen /*
20a48773fSEric van Gyzen                             __  __            _
30a48773fSEric van Gyzen                          ___\ \/ /_ __   __ _| |_
40a48773fSEric van Gyzen                         / _ \\  /| '_ \ / _` | __|
50a48773fSEric van Gyzen                        |  __//  \| |_) | (_| | |_
60a48773fSEric van Gyzen                         \___/_/\_\ .__/ \__,_|\__|
70a48773fSEric van Gyzen                                  |_| XML parser
80a48773fSEric van Gyzen 
90a48773fSEric van Gyzen    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10*cc68614dSXin LI    Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
11*cc68614dSXin LI    Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
120a48773fSEric van Gyzen    Licensed under the MIT license:
130a48773fSEric van Gyzen 
140a48773fSEric van Gyzen    Permission is  hereby granted,  free of charge,  to any  person obtaining
150a48773fSEric van Gyzen    a  copy  of  this  software   and  associated  documentation  files  (the
160a48773fSEric van Gyzen    "Software"),  to  deal in  the  Software  without restriction,  including
170a48773fSEric van Gyzen    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
180a48773fSEric van Gyzen    distribute, sublicense, and/or sell copies of the Software, and to permit
190a48773fSEric van Gyzen    persons  to whom  the Software  is  furnished to  do so,  subject to  the
200a48773fSEric van Gyzen    following conditions:
210a48773fSEric van Gyzen 
220a48773fSEric van Gyzen    The above copyright  notice and this permission notice  shall be included
230a48773fSEric van Gyzen    in all copies or substantial portions of the Software.
240a48773fSEric van Gyzen 
250a48773fSEric van Gyzen    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
260a48773fSEric van Gyzen    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
270a48773fSEric van Gyzen    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
280a48773fSEric van Gyzen    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
290a48773fSEric van Gyzen    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
300a48773fSEric van Gyzen    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
310a48773fSEric van Gyzen    USE OR OTHER DEALINGS IN THE SOFTWARE.
320a48773fSEric van Gyzen */
330a48773fSEric van Gyzen 
345bb6a25fSPoul-Henning Kamp #define CHARSET_MAX 41
355bb6a25fSPoul-Henning Kamp 
365bb6a25fSPoul-Henning Kamp static const char *
getTok(const char ** pp)376b2c1e49SXin LI getTok(const char **pp) {
385bb6a25fSPoul-Henning Kamp   enum { inAtom, inString, init, inComment };
395bb6a25fSPoul-Henning Kamp   int state = init;
405bb6a25fSPoul-Henning Kamp   const char *tokStart = 0;
415bb6a25fSPoul-Henning Kamp   for (;;) {
425bb6a25fSPoul-Henning Kamp     switch (**pp) {
435bb6a25fSPoul-Henning Kamp     case '\0':
445bb6a25fSPoul-Henning Kamp       return 0;
455bb6a25fSPoul-Henning Kamp     case ' ':
465bb6a25fSPoul-Henning Kamp     case '\r':
475bb6a25fSPoul-Henning Kamp     case '\t':
485bb6a25fSPoul-Henning Kamp     case '\n':
495bb6a25fSPoul-Henning Kamp       if (state == inAtom)
505bb6a25fSPoul-Henning Kamp         return tokStart;
515bb6a25fSPoul-Henning Kamp       break;
525bb6a25fSPoul-Henning Kamp     case '(':
535bb6a25fSPoul-Henning Kamp       if (state == inAtom)
545bb6a25fSPoul-Henning Kamp         return tokStart;
555bb6a25fSPoul-Henning Kamp       if (state != inString)
565bb6a25fSPoul-Henning Kamp         state++;
575bb6a25fSPoul-Henning Kamp       break;
585bb6a25fSPoul-Henning Kamp     case ')':
595bb6a25fSPoul-Henning Kamp       if (state > init)
605bb6a25fSPoul-Henning Kamp         --state;
615bb6a25fSPoul-Henning Kamp       else if (state != inString)
625bb6a25fSPoul-Henning Kamp         return 0;
635bb6a25fSPoul-Henning Kamp       break;
645bb6a25fSPoul-Henning Kamp     case ';':
655bb6a25fSPoul-Henning Kamp     case '/':
665bb6a25fSPoul-Henning Kamp     case '=':
675bb6a25fSPoul-Henning Kamp       if (state == inAtom)
685bb6a25fSPoul-Henning Kamp         return tokStart;
695bb6a25fSPoul-Henning Kamp       if (state == init)
705bb6a25fSPoul-Henning Kamp         return (*pp)++;
715bb6a25fSPoul-Henning Kamp       break;
725bb6a25fSPoul-Henning Kamp     case '\\':
735bb6a25fSPoul-Henning Kamp       ++*pp;
745bb6a25fSPoul-Henning Kamp       if (**pp == '\0')
755bb6a25fSPoul-Henning Kamp         return 0;
765bb6a25fSPoul-Henning Kamp       break;
775bb6a25fSPoul-Henning Kamp     case '"':
785bb6a25fSPoul-Henning Kamp       switch (state) {
795bb6a25fSPoul-Henning Kamp       case inString:
805bb6a25fSPoul-Henning Kamp         ++*pp;
815bb6a25fSPoul-Henning Kamp         return tokStart;
825bb6a25fSPoul-Henning Kamp       case inAtom:
835bb6a25fSPoul-Henning Kamp         return tokStart;
845bb6a25fSPoul-Henning Kamp       case init:
855bb6a25fSPoul-Henning Kamp         tokStart = *pp;
865bb6a25fSPoul-Henning Kamp         state = inString;
875bb6a25fSPoul-Henning Kamp         break;
885bb6a25fSPoul-Henning Kamp       }
895bb6a25fSPoul-Henning Kamp       break;
905bb6a25fSPoul-Henning Kamp     default:
915bb6a25fSPoul-Henning Kamp       if (state == init) {
925bb6a25fSPoul-Henning Kamp         tokStart = *pp;
935bb6a25fSPoul-Henning Kamp         state = inAtom;
945bb6a25fSPoul-Henning Kamp       }
955bb6a25fSPoul-Henning Kamp       break;
965bb6a25fSPoul-Henning Kamp     }
975bb6a25fSPoul-Henning Kamp     ++*pp;
985bb6a25fSPoul-Henning Kamp   }
995bb6a25fSPoul-Henning Kamp   /* not reached */
1005bb6a25fSPoul-Henning Kamp }
1015bb6a25fSPoul-Henning Kamp 
1025bb6a25fSPoul-Henning Kamp /* key must be lowercase ASCII */
1035bb6a25fSPoul-Henning Kamp 
1045bb6a25fSPoul-Henning Kamp static int
matchkey(const char * start,const char * end,const char * key)1056b2c1e49SXin LI matchkey(const char *start, const char *end, const char *key) {
1065bb6a25fSPoul-Henning Kamp   if (! start)
1075bb6a25fSPoul-Henning Kamp     return 0;
1085bb6a25fSPoul-Henning Kamp   for (; start != end; start++, key++)
1095bb6a25fSPoul-Henning Kamp     if (*start != *key && *start != 'A' + (*key - 'a'))
1105bb6a25fSPoul-Henning Kamp       return 0;
1115bb6a25fSPoul-Henning Kamp   return *key == '\0';
1125bb6a25fSPoul-Henning Kamp }
1135bb6a25fSPoul-Henning Kamp 
1145bb6a25fSPoul-Henning Kamp void
getXMLCharset(const char * buf,char * charset)1156b2c1e49SXin LI getXMLCharset(const char *buf, char *charset) {
1165bb6a25fSPoul-Henning Kamp   const char *next, *p;
1175bb6a25fSPoul-Henning Kamp 
1185bb6a25fSPoul-Henning Kamp   charset[0] = '\0';
1195bb6a25fSPoul-Henning Kamp   next = buf;
1205bb6a25fSPoul-Henning Kamp   p = getTok(&next);
1215bb6a25fSPoul-Henning Kamp   if (matchkey(p, next, "text"))
1225bb6a25fSPoul-Henning Kamp     strcpy(charset, "us-ascii");
1235bb6a25fSPoul-Henning Kamp   else if (! matchkey(p, next, "application"))
1245bb6a25fSPoul-Henning Kamp     return;
1255bb6a25fSPoul-Henning Kamp   p = getTok(&next);
1265bb6a25fSPoul-Henning Kamp   if (! p || *p != '/')
1275bb6a25fSPoul-Henning Kamp     return;
1285bb6a25fSPoul-Henning Kamp   p = getTok(&next);
1295bb6a25fSPoul-Henning Kamp   if (matchkey(p, next, "xml"))
1305bb6a25fSPoul-Henning Kamp     isXml = 1;
1315bb6a25fSPoul-Henning Kamp   p = getTok(&next);
1325bb6a25fSPoul-Henning Kamp   while (p) {
1335bb6a25fSPoul-Henning Kamp     if (*p == ';') {
1345bb6a25fSPoul-Henning Kamp       p = getTok(&next);
1355bb6a25fSPoul-Henning Kamp       if (matchkey(p, next, "charset")) {
1365bb6a25fSPoul-Henning Kamp         p = getTok(&next);
1375bb6a25fSPoul-Henning Kamp         if (p && *p == '=') {
1385bb6a25fSPoul-Henning Kamp           p = getTok(&next);
1395bb6a25fSPoul-Henning Kamp           if (p) {
1405bb6a25fSPoul-Henning Kamp             char *s = charset;
1415bb6a25fSPoul-Henning Kamp             if (*p == '"') {
1425bb6a25fSPoul-Henning Kamp               while (++p != next - 1) {
1435bb6a25fSPoul-Henning Kamp                 if (*p == '\\')
1445bb6a25fSPoul-Henning Kamp                   ++p;
1455bb6a25fSPoul-Henning Kamp                 if (s == charset + CHARSET_MAX - 1) {
1465bb6a25fSPoul-Henning Kamp                   charset[0] = '\0';
1475bb6a25fSPoul-Henning Kamp                   break;
1485bb6a25fSPoul-Henning Kamp                 }
1495bb6a25fSPoul-Henning Kamp                 *s++ = *p;
1505bb6a25fSPoul-Henning Kamp               }
1515bb6a25fSPoul-Henning Kamp               *s++ = '\0';
1526b2c1e49SXin LI             } else {
1535bb6a25fSPoul-Henning Kamp               if (next - p > CHARSET_MAX - 1)
1545bb6a25fSPoul-Henning Kamp                 break;
1555bb6a25fSPoul-Henning Kamp               while (p != next)
1565bb6a25fSPoul-Henning Kamp                 *s++ = *p++;
1575bb6a25fSPoul-Henning Kamp               *s = 0;
1585bb6a25fSPoul-Henning Kamp               break;
1595bb6a25fSPoul-Henning Kamp             }
1605bb6a25fSPoul-Henning Kamp           }
1615bb6a25fSPoul-Henning Kamp         }
1625bb6a25fSPoul-Henning Kamp       }
1636b2c1e49SXin LI     } else
1645bb6a25fSPoul-Henning Kamp       p = getTok(&next);
1655bb6a25fSPoul-Henning Kamp   }
1665bb6a25fSPoul-Henning Kamp }
1675bb6a25fSPoul-Henning Kamp 
1685bb6a25fSPoul-Henning Kamp int
main(int argc,char ** argv)1696b2c1e49SXin LI main(int argc, char **argv) {
1705bb6a25fSPoul-Henning Kamp   char buf[CHARSET_MAX];
1715bb6a25fSPoul-Henning Kamp   getXMLCharset(argv[1], buf);
1725bb6a25fSPoul-Henning Kamp   printf("charset = \"%s\"\n", buf);
1735bb6a25fSPoul-Henning Kamp   return 0;
1745bb6a25fSPoul-Henning Kamp }
175