1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2011 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24 * RE character class support
25 */
26
27 #include "reglib.h"
28
29 struct Ctype_s; typedef struct Ctype_s Ctype_t;
30
31 struct Ctype_s
32 {
33 const char* name;
34 size_t size;
35 regclass_t ctype;
36 Ctype_t* next;
37 #if _lib_wctype
38 wctype_t wtype;
39 #endif
40 };
41
42 static Ctype_t* ctypes;
43
44 /*
45 * this stuff gets around posix failure to define isblank,
46 * and the fact that ctype functions are macros
47 * and any local extensions that may not even have functions or macros
48 */
49
50 #if _need_iswblank
51
52 int
_reg_iswblank(wint_t wc)53 _reg_iswblank(wint_t wc)
54 {
55 static int initialized;
56 static wctype_t wt;
57
58 if (!initialized)
59 {
60 initialized = 1;
61 wt = wctype("blank");
62 }
63 return iswctype(wc, wt);
64 }
65
66 #endif
67
Isalnum(int c)68 static int Isalnum(int c) { return iswalnum(c); }
Isalpha(int c)69 static int Isalpha(int c) { return iswalpha(c); }
Isblank(int c)70 static int Isblank(int c) { return iswblank(c); }
Iscntrl(int c)71 static int Iscntrl(int c) { return iswcntrl(c); }
Isdigit(int c)72 static int Isdigit(int c) { return iswdigit(c); }
Notdigit(int c)73 static int Notdigit(int c) { return !iswdigit(c); }
Isgraph(int c)74 static int Isgraph(int c) { return iswgraph(c); }
Islower(int c)75 static int Islower(int c) { return iswlower(c); }
Isprint(int c)76 static int Isprint(int c) { return iswprint(c); }
Ispunct(int c)77 static int Ispunct(int c) { return iswpunct(c); }
Isspace(int c)78 static int Isspace(int c) { return iswspace(c); }
Notspace(int c)79 static int Notspace(int c) { return !iswspace(c); }
Isupper(int c)80 static int Isupper(int c) { return iswupper(c); }
Isword(int c)81 static int Isword(int c) { return iswalnum(c) || c == '_'; }
Notword(int c)82 static int Notword(int c) { return !iswalnum(c) && c != '_'; }
Isxdigit(int c)83 static int Isxdigit(int c) { return iswxdigit(c);}
84
85 #if _lib_wctype
86
87 static int Is_wc_1(int);
88 static int Is_wc_2(int);
89 static int Is_wc_3(int);
90 static int Is_wc_4(int);
91 static int Is_wc_5(int);
92 static int Is_wc_6(int);
93 static int Is_wc_7(int);
94 static int Is_wc_8(int);
95 static int Is_wc_9(int);
96 static int Is_wc_10(int);
97 static int Is_wc_11(int);
98 static int Is_wc_12(int);
99 static int Is_wc_13(int);
100 static int Is_wc_14(int);
101 static int Is_wc_15(int);
102 static int Is_wc_16(int);
103
104 #endif
105
106 #define SZ(s) s,(sizeof(s)-1)
107
108 static Ctype_t ctype[] =
109 {
110 { SZ("alnum"), Isalnum },
111 { SZ("alpha"), Isalpha },
112 { SZ("blank"), Isblank },
113 { SZ("cntrl"), Iscntrl },
114 { SZ("digit"), Isdigit },
115 { SZ("graph"), Isgraph },
116 { SZ("lower"), Islower },
117 { SZ("print"), Isprint },
118 { SZ("punct"), Ispunct },
119 { SZ("space"), Isspace },
120 { SZ("upper"), Isupper },
121 { SZ("word"), Isword },
122 { SZ("xdigit"),Isxdigit},
123
124 #define CTYPES 13
125
126 #if _lib_wctype
127 { 0, 0, Is_wc_1 },
128 { 0, 0, Is_wc_2 },
129 { 0, 0, Is_wc_3 },
130 { 0, 0, Is_wc_4 },
131 { 0, 0, Is_wc_5 },
132 { 0, 0, Is_wc_6 },
133 { 0, 0, Is_wc_7 },
134 { 0, 0, Is_wc_8 },
135 { 0, 0, Is_wc_9 },
136 { 0, 0, Is_wc_10 },
137 { 0, 0, Is_wc_11 },
138 { 0, 0, Is_wc_12 },
139 { 0, 0, Is_wc_13 },
140 { 0, 0, Is_wc_14 },
141 { 0, 0, Is_wc_15 },
142 { 0, 0, Is_wc_16 },
143
144 #define WTYPES 16
145
146 #else
147
148 #define WTYPES 0
149
150 #endif
151 };
152
153 #if _lib_wctype
154
Is_wc_1(int c)155 static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
Is_wc_2(int c)156 static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
Is_wc_3(int c)157 static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
Is_wc_4(int c)158 static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
Is_wc_5(int c)159 static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
Is_wc_6(int c)160 static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
Is_wc_7(int c)161 static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
Is_wc_8(int c)162 static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
Is_wc_9(int c)163 static int Is_wc_9(int c) { return iswctype(c, ctype[CTYPES+8].wtype); }
Is_wc_10(int c)164 static int Is_wc_10(int c) { return iswctype(c, ctype[CTYPES+9].wtype); }
Is_wc_11(int c)165 static int Is_wc_11(int c) { return iswctype(c, ctype[CTYPES+10].wtype); }
Is_wc_12(int c)166 static int Is_wc_12(int c) { return iswctype(c, ctype[CTYPES+11].wtype); }
Is_wc_13(int c)167 static int Is_wc_13(int c) { return iswctype(c, ctype[CTYPES+12].wtype); }
Is_wc_14(int c)168 static int Is_wc_14(int c) { return iswctype(c, ctype[CTYPES+13].wtype); }
Is_wc_15(int c)169 static int Is_wc_15(int c) { return iswctype(c, ctype[CTYPES+14].wtype); }
Is_wc_16(int c)170 static int Is_wc_16(int c) { return iswctype(c, ctype[CTYPES+15].wtype); }
171
172 #endif
173
174 /*
175 * return pointer to ctype function for :class:] in s
176 * s points to the first char after the initial [
177 * dynamic wctype classes are locale-specific
178 * dynamic entry locale is punned in Ctype_t.next
179 * the search does a lazy (one entry at a time) flush on locale mismatch
180 * if e!=0 it points to next char in s
181 * 0 returned on error
182 */
183
184 regclass_t
regclass(const char * s,char ** e)185 regclass(const char* s, char** e)
186 {
187 register Ctype_t* cp;
188 register int c;
189 register size_t n;
190 register const char* t;
191 Ctype_t* lc;
192 Ctype_t* xp;
193 Ctype_t* zp;
194
195 if (!(c = *s++))
196 return 0;
197 for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
198 if (*t != c || !(n = t - s))
199 return 0;
200 for (cp = ctypes; cp; cp = cp->next)
201 if (n == cp->size && strneq(s, cp->name, n))
202 goto found;
203 xp = zp = 0;
204 lc = (Ctype_t*)setlocale(LC_CTYPE, NiL);
205 for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
206 {
207 #if _lib_wctype
208 if (!zp)
209 {
210 if (!cp->size)
211 zp = cp;
212 else if (!xp && cp->next && cp->next != lc)
213 xp = cp;
214 }
215 #endif
216 if (n == cp->size && strneq(s, cp->name, n) && (!cp->next || cp->next == lc))
217 goto found;
218 }
219 #if _lib_wctype
220 if (!(cp = zp))
221 {
222 if (!(cp = xp))
223 return 0;
224 cp->size = 0;
225 if (!streq(cp->name, s))
226 {
227 free((char*)cp->name);
228 cp->name = 0;
229 }
230 }
231 if (!cp->name)
232 {
233 if (!(cp->name = (const char*)memdup(s, n + 1)))
234 return 0;
235 *((char*)cp->name + n) = 0;
236 }
237 /* mvs.390 needs the (char*) cast -- barf */
238 if (!(cp->wtype = wctype((char*)cp->name)))
239 {
240 free((char*)cp->name);
241 cp->name = 0;
242 return 0;
243 }
244 cp->size = n;
245 cp->next = lc;
246 #endif
247 found:
248 if (e)
249 *e = (char*)t + 2;
250 return cp->ctype;
251 }
252
253 /*
254 * associate the ctype function fun with name
255 */
256
257 int
regaddclass(const char * name,regclass_t fun)258 regaddclass(const char* name, regclass_t fun)
259 {
260 register Ctype_t* cp;
261 register Ctype_t* np;
262 register size_t n;
263
264 n = strlen(name);
265 for (cp = ctypes; cp; cp = cp->next)
266 if (cp->size == n && strneq(name, cp->name, n))
267 {
268 cp->ctype = fun;
269 return 0;
270 }
271 if (!(np = newof(0, Ctype_t, 1, n + 1)))
272 return REG_ESPACE;
273 np->size = n;
274 np->name = strcpy((char*)(np + 1), name);
275 np->ctype = fun;
276 np->next = ctypes;
277 ctypes = np;
278 return 0;
279 }
280
281 /*
282 * return pointer to ctype function for token
283 */
284
285 regclass_t
classfun(int type)286 classfun(int type)
287 {
288 switch (type)
289 {
290 case T_ALNUM: return Isword;
291 case T_ALNUM_NOT: return Notword;
292 case T_DIGIT: return Isdigit;
293 case T_DIGIT_NOT: return Notdigit;
294 case T_SPACE: return Isspace;
295 case T_SPACE_NOT: return Notspace;
296 }
297 return 0;
298 }
299