xref: /freebsd/contrib/expat/xmlwf/ct.c (revision 8df8b2d3e51d1b816201d8a1fe8bc29fe192e562)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #define CHARSET_MAX 41
34 
35 static const char *
36 getTok(const char **pp)
37 {
38   enum { inAtom, inString, init, inComment };
39   int state = init;
40   const char *tokStart = 0;
41   for (;;) {
42     switch (**pp) {
43     case '\0':
44       return 0;
45     case ' ':
46     case '\r':
47     case '\t':
48     case '\n':
49       if (state == inAtom)
50         return tokStart;
51       break;
52     case '(':
53       if (state == inAtom)
54         return tokStart;
55       if (state != inString)
56         state++;
57       break;
58     case ')':
59       if (state > init)
60         --state;
61       else if (state != inString)
62         return 0;
63       break;
64     case ';':
65     case '/':
66     case '=':
67       if (state == inAtom)
68         return tokStart;
69       if (state == init)
70         return (*pp)++;
71       break;
72     case '\\':
73       ++*pp;
74       if (**pp == '\0')
75         return 0;
76       break;
77     case '"':
78       switch (state) {
79       case inString:
80         ++*pp;
81         return tokStart;
82       case inAtom:
83         return tokStart;
84       case init:
85         tokStart = *pp;
86         state = inString;
87         break;
88       }
89       break;
90     default:
91       if (state == init) {
92         tokStart = *pp;
93         state = inAtom;
94       }
95       break;
96     }
97     ++*pp;
98   }
99   /* not reached */
100 }
101 
102 /* key must be lowercase ASCII */
103 
104 static int
105 matchkey(const char *start, const char *end, const char *key)
106 {
107   if (!start)
108     return 0;
109   for (; start != end; start++, key++)
110     if (*start != *key && *start != 'A' + (*key - 'a'))
111       return 0;
112   return *key == '\0';
113 }
114 
115 void
116 getXMLCharset(const char *buf, char *charset)
117 {
118   const char *next, *p;
119 
120   charset[0] = '\0';
121   next = buf;
122   p = getTok(&next);
123   if (matchkey(p, next, "text"))
124     strcpy(charset, "us-ascii");
125   else if (!matchkey(p, next, "application"))
126     return;
127   p = getTok(&next);
128   if (!p || *p != '/')
129     return;
130   p = getTok(&next);
131   if (matchkey(p, next, "xml"))
132     isXml = 1;
133   p = getTok(&next);
134   while (p) {
135     if (*p == ';') {
136       p = getTok(&next);
137       if (matchkey(p, next, "charset")) {
138         p = getTok(&next);
139         if (p && *p == '=') {
140           p = getTok(&next);
141           if (p) {
142             char *s = charset;
143             if (*p == '"') {
144               while (++p != next - 1) {
145                 if (*p == '\\')
146                   ++p;
147                 if (s == charset + CHARSET_MAX - 1) {
148                   charset[0] = '\0';
149                   break;
150                 }
151                 *s++ = *p;
152               }
153               *s++ = '\0';
154             }
155             else {
156               if (next - p > CHARSET_MAX - 1)
157                 break;
158               while (p != next)
159                 *s++ = *p++;
160               *s = 0;
161               break;
162             }
163           }
164         }
165       }
166     }
167   else
168     p = getTok(&next);
169   }
170 }
171 
172 int
173 main(int argc, char **argv)
174 {
175   char buf[CHARSET_MAX];
176   getXMLCharset(argv[1], buf);
177   printf("charset = \"%s\"\n", buf);
178   return 0;
179 }
180