xref: /freebsd/contrib/expat/xmlwf/ct.c (revision 5bb6a25f8f50862853de4bdb01ecb4152f10d0e7)
15bb6a25fSPoul-Henning Kamp #define CHARSET_MAX 41
25bb6a25fSPoul-Henning Kamp 
35bb6a25fSPoul-Henning Kamp static const char *
45bb6a25fSPoul-Henning Kamp getTok(const char **pp)
55bb6a25fSPoul-Henning Kamp {
65bb6a25fSPoul-Henning Kamp   enum { inAtom, inString, init, inComment };
75bb6a25fSPoul-Henning Kamp   int state = init;
85bb6a25fSPoul-Henning Kamp   const char *tokStart = 0;
95bb6a25fSPoul-Henning Kamp   for (;;) {
105bb6a25fSPoul-Henning Kamp     switch (**pp) {
115bb6a25fSPoul-Henning Kamp     case '\0':
125bb6a25fSPoul-Henning Kamp       return 0;
135bb6a25fSPoul-Henning Kamp     case ' ':
145bb6a25fSPoul-Henning Kamp     case '\r':
155bb6a25fSPoul-Henning Kamp     case '\t':
165bb6a25fSPoul-Henning Kamp     case '\n':
175bb6a25fSPoul-Henning Kamp       if (state == inAtom)
185bb6a25fSPoul-Henning Kamp         return tokStart;
195bb6a25fSPoul-Henning Kamp       break;
205bb6a25fSPoul-Henning Kamp     case '(':
215bb6a25fSPoul-Henning Kamp       if (state == inAtom)
225bb6a25fSPoul-Henning Kamp         return tokStart;
235bb6a25fSPoul-Henning Kamp       if (state != inString)
245bb6a25fSPoul-Henning Kamp         state++;
255bb6a25fSPoul-Henning Kamp       break;
265bb6a25fSPoul-Henning Kamp     case ')':
275bb6a25fSPoul-Henning Kamp       if (state > init)
285bb6a25fSPoul-Henning Kamp         --state;
295bb6a25fSPoul-Henning Kamp       else if (state != inString)
305bb6a25fSPoul-Henning Kamp         return 0;
315bb6a25fSPoul-Henning Kamp       break;
325bb6a25fSPoul-Henning Kamp     case ';':
335bb6a25fSPoul-Henning Kamp     case '/':
345bb6a25fSPoul-Henning Kamp     case '=':
355bb6a25fSPoul-Henning Kamp       if (state == inAtom)
365bb6a25fSPoul-Henning Kamp         return tokStart;
375bb6a25fSPoul-Henning Kamp       if (state == init)
385bb6a25fSPoul-Henning Kamp         return (*pp)++;
395bb6a25fSPoul-Henning Kamp       break;
405bb6a25fSPoul-Henning Kamp     case '\\':
415bb6a25fSPoul-Henning Kamp       ++*pp;
425bb6a25fSPoul-Henning Kamp       if (**pp == '\0')
435bb6a25fSPoul-Henning Kamp         return 0;
445bb6a25fSPoul-Henning Kamp       break;
455bb6a25fSPoul-Henning Kamp     case '"':
465bb6a25fSPoul-Henning Kamp       switch (state) {
475bb6a25fSPoul-Henning Kamp       case inString:
485bb6a25fSPoul-Henning Kamp         ++*pp;
495bb6a25fSPoul-Henning Kamp         return tokStart;
505bb6a25fSPoul-Henning Kamp       case inAtom:
515bb6a25fSPoul-Henning Kamp         return tokStart;
525bb6a25fSPoul-Henning Kamp       case init:
535bb6a25fSPoul-Henning Kamp         tokStart = *pp;
545bb6a25fSPoul-Henning Kamp         state = inString;
555bb6a25fSPoul-Henning Kamp         break;
565bb6a25fSPoul-Henning Kamp       }
575bb6a25fSPoul-Henning Kamp       break;
585bb6a25fSPoul-Henning Kamp     default:
595bb6a25fSPoul-Henning Kamp       if (state == init) {
605bb6a25fSPoul-Henning Kamp         tokStart = *pp;
615bb6a25fSPoul-Henning Kamp         state = inAtom;
625bb6a25fSPoul-Henning Kamp       }
635bb6a25fSPoul-Henning Kamp       break;
645bb6a25fSPoul-Henning Kamp     }
655bb6a25fSPoul-Henning Kamp     ++*pp;
665bb6a25fSPoul-Henning Kamp   }
675bb6a25fSPoul-Henning Kamp   /* not reached */
685bb6a25fSPoul-Henning Kamp }
695bb6a25fSPoul-Henning Kamp 
705bb6a25fSPoul-Henning Kamp /* key must be lowercase ASCII */
715bb6a25fSPoul-Henning Kamp 
725bb6a25fSPoul-Henning Kamp static int
735bb6a25fSPoul-Henning Kamp matchkey(const char *start, const char *end, const char *key)
745bb6a25fSPoul-Henning Kamp {
755bb6a25fSPoul-Henning Kamp   if (!start)
765bb6a25fSPoul-Henning Kamp     return 0;
775bb6a25fSPoul-Henning Kamp   for (; start != end; start++, key++)
785bb6a25fSPoul-Henning Kamp     if (*start != *key && *start != 'A' + (*key - 'a'))
795bb6a25fSPoul-Henning Kamp       return 0;
805bb6a25fSPoul-Henning Kamp   return *key == '\0';
815bb6a25fSPoul-Henning Kamp }
825bb6a25fSPoul-Henning Kamp 
835bb6a25fSPoul-Henning Kamp void
845bb6a25fSPoul-Henning Kamp getXMLCharset(const char *buf, char *charset)
855bb6a25fSPoul-Henning Kamp {
865bb6a25fSPoul-Henning Kamp   const char *next, *p;
875bb6a25fSPoul-Henning Kamp 
885bb6a25fSPoul-Henning Kamp   charset[0] = '\0';
895bb6a25fSPoul-Henning Kamp   next = buf;
905bb6a25fSPoul-Henning Kamp   p = getTok(&next);
915bb6a25fSPoul-Henning Kamp   if (matchkey(p, next, "text"))
925bb6a25fSPoul-Henning Kamp     strcpy(charset, "us-ascii");
935bb6a25fSPoul-Henning Kamp   else if (!matchkey(p, next, "application"))
945bb6a25fSPoul-Henning Kamp     return;
955bb6a25fSPoul-Henning Kamp   p = getTok(&next);
965bb6a25fSPoul-Henning Kamp   if (!p || *p != '/')
975bb6a25fSPoul-Henning Kamp     return;
985bb6a25fSPoul-Henning Kamp   p = getTok(&next);
995bb6a25fSPoul-Henning Kamp   if (matchkey(p, next, "xml"))
1005bb6a25fSPoul-Henning Kamp     isXml = 1;
1015bb6a25fSPoul-Henning Kamp   p = getTok(&next);
1025bb6a25fSPoul-Henning Kamp   while (p) {
1035bb6a25fSPoul-Henning Kamp     if (*p == ';') {
1045bb6a25fSPoul-Henning Kamp       p = getTok(&next);
1055bb6a25fSPoul-Henning Kamp       if (matchkey(p, next, "charset")) {
1065bb6a25fSPoul-Henning Kamp         p = getTok(&next);
1075bb6a25fSPoul-Henning Kamp         if (p && *p == '=') {
1085bb6a25fSPoul-Henning Kamp           p = getTok(&next);
1095bb6a25fSPoul-Henning Kamp           if (p) {
1105bb6a25fSPoul-Henning Kamp             char *s = charset;
1115bb6a25fSPoul-Henning Kamp             if (*p == '"') {
1125bb6a25fSPoul-Henning Kamp               while (++p != next - 1) {
1135bb6a25fSPoul-Henning Kamp                 if (*p == '\\')
1145bb6a25fSPoul-Henning Kamp                   ++p;
1155bb6a25fSPoul-Henning Kamp                 if (s == charset + CHARSET_MAX - 1) {
1165bb6a25fSPoul-Henning Kamp                   charset[0] = '\0';
1175bb6a25fSPoul-Henning Kamp                   break;
1185bb6a25fSPoul-Henning Kamp                 }
1195bb6a25fSPoul-Henning Kamp                 *s++ = *p;
1205bb6a25fSPoul-Henning Kamp               }
1215bb6a25fSPoul-Henning Kamp               *s++ = '\0';
1225bb6a25fSPoul-Henning Kamp             }
1235bb6a25fSPoul-Henning Kamp             else {
1245bb6a25fSPoul-Henning Kamp               if (next - p > CHARSET_MAX - 1)
1255bb6a25fSPoul-Henning Kamp                 break;
1265bb6a25fSPoul-Henning Kamp               while (p != next)
1275bb6a25fSPoul-Henning Kamp                 *s++ = *p++;
1285bb6a25fSPoul-Henning Kamp               *s = 0;
1295bb6a25fSPoul-Henning Kamp               break;
1305bb6a25fSPoul-Henning Kamp             }
1315bb6a25fSPoul-Henning Kamp           }
1325bb6a25fSPoul-Henning Kamp         }
1335bb6a25fSPoul-Henning Kamp       }
1345bb6a25fSPoul-Henning Kamp     }
1355bb6a25fSPoul-Henning Kamp   else
1365bb6a25fSPoul-Henning Kamp     p = getTok(&next);
1375bb6a25fSPoul-Henning Kamp   }
1385bb6a25fSPoul-Henning Kamp }
1395bb6a25fSPoul-Henning Kamp 
1405bb6a25fSPoul-Henning Kamp int
1415bb6a25fSPoul-Henning Kamp main(int argc, char **argv)
1425bb6a25fSPoul-Henning Kamp {
1435bb6a25fSPoul-Henning Kamp   char buf[CHARSET_MAX];
1445bb6a25fSPoul-Henning Kamp   getXMLCharset(argv[1], buf);
1455bb6a25fSPoul-Henning Kamp   printf("charset = \"%s\"\n", buf);
1465bb6a25fSPoul-Henning Kamp   return 0;
1475bb6a25fSPoul-Henning Kamp }
148