1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24 * RE character class support
25 */
26
27 #include "reglib.h"
28
29 struct Ctype_s; typedef struct Ctype_s Ctype_t;
30
31 struct Ctype_s
32 {
33 const char* name;
34 size_t size;
35 regclass_t ctype;
36 Ctype_t* next;
37 #if _lib_wctype
38 wctype_t wtype;
39 #endif
40 };
41
42 static Ctype_t* ctypes;
43
44 #define CTYPES 12
45 #if _lib_wctype
46 #define WTYPES 8
47 #else
48 #define WTYPES 0
49 #endif
50
51 /*
52 * this stuff gets around posix failure to define isblank,
53 * and the fact that ctype functions are macros
54 * and any local extensions that may not even have functions or macros
55 */
56
57 #if _need_iswblank
58
59 int
_reg_iswblank(wint_t wc)60 _reg_iswblank(wint_t wc)
61 {
62 static int initialized;
63 static wctype_t wt;
64
65 if (!initialized)
66 {
67 initialized = 1;
68 wt = wctype("blank");
69 }
70 return iswctype(wc, wt);
71 }
72
73 #endif
74
Isalnum(int c)75 static int Isalnum(int c) { return iswalnum(c); }
Isalpha(int c)76 static int Isalpha(int c) { return iswalpha(c); }
Isblank(int c)77 static int Isblank(int c) { return iswblank(c); }
Iscntrl(int c)78 static int Iscntrl(int c) { return iswcntrl(c); }
Isdigit(int c)79 static int Isdigit(int c) { return iswdigit(c); }
Notdigit(int c)80 static int Notdigit(int c) { return !iswdigit(c); }
Isgraph(int c)81 static int Isgraph(int c) { return iswgraph(c); }
Islower(int c)82 static int Islower(int c) { return iswlower(c); }
Isprint(int c)83 static int Isprint(int c) { return iswprint(c); }
Ispunct(int c)84 static int Ispunct(int c) { return iswpunct(c); }
Isspace(int c)85 static int Isspace(int c) { return iswspace(c); }
Notspace(int c)86 static int Notspace(int c) { return !iswspace(c); }
Isupper(int c)87 static int Isupper(int c) { return iswupper(c); }
Isword(int c)88 static int Isword(int c) { return iswalnum(c) || c == '_'; }
Notword(int c)89 static int Notword(int c) { return !iswalnum(c) && c != '_'; }
Isxdigit(int c)90 static int Isxdigit(int c) { return iswxdigit(c);}
91
92 #if _lib_wctype
93
94 static int Is_wc_1(int);
95 static int Is_wc_2(int);
96 static int Is_wc_3(int);
97 static int Is_wc_4(int);
98 static int Is_wc_5(int);
99 static int Is_wc_6(int);
100 static int Is_wc_7(int);
101 static int Is_wc_8(int);
102
103 #endif
104
105 #define SZ(s) s,(sizeof(s)-1)
106
107 static Ctype_t ctype[] =
108 {
109 { SZ("alnum"), Isalnum },
110 { SZ("alpha"), Isalpha },
111 { SZ("blank"), Isblank },
112 { SZ("cntrl"), Iscntrl },
113 { SZ("digit"), Isdigit },
114 { SZ("graph"), Isgraph },
115 { SZ("lower"), Islower },
116 { SZ("print"), Isprint },
117 { SZ("punct"), Ispunct },
118 { SZ("space"), Isspace },
119 { SZ("upper"), Isupper },
120 { SZ("word"), Isword },
121 { SZ("xdigit"),Isxdigit},
122 #if _lib_wctype
123 { 0, 0, Is_wc_1 },
124 { 0, 0, Is_wc_2 },
125 { 0, 0, Is_wc_3 },
126 { 0, 0, Is_wc_4 },
127 { 0, 0, Is_wc_5 },
128 { 0, 0, Is_wc_6 },
129 { 0, 0, Is_wc_7 },
130 { 0, 0, Is_wc_8 },
131 #endif
132 };
133
134 #if _lib_wctype
135
Is_wc_1(int c)136 static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
Is_wc_2(int c)137 static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
Is_wc_3(int c)138 static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
Is_wc_4(int c)139 static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
Is_wc_5(int c)140 static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
Is_wc_6(int c)141 static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
Is_wc_7(int c)142 static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
Is_wc_8(int c)143 static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
144
145 #endif
146
147 /*
148 * return pointer to ctype function for :class:] in s
149 * s points to the first char after the initial [
150 * if e!=0 it points to next char in s
151 * 0 returned on error
152 */
153
154 regclass_t
regclass(const char * s,char ** e)155 regclass(const char* s, char** e)
156 {
157 register Ctype_t* cp;
158 register int c;
159 register size_t n;
160 register const char* t;
161
162 if (c = *s++)
163 {
164 for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
165 if (*t != c)
166 return 0;
167 n = t - s;
168 for (cp = ctypes; cp; cp = cp->next)
169 if (n == cp->size && strneq(s, cp->name, n))
170 goto found;
171 for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
172 {
173 #if _lib_wctype
174 if (!cp->size && (cp->name = (const char*)memdup(s, n + 1)))
175 {
176 *((char*)cp->name + n) = 0;
177 /* mvs.390 needs the (char*) cast -- barf */
178 if (!(cp->wtype = wctype((char*)cp->name)))
179 {
180 free((char*)cp->name);
181 return 0;
182 }
183 cp->size = n;
184 goto found;
185 }
186 #endif
187 if (n == cp->size && strneq(s, cp->name, n))
188 goto found;
189 }
190 }
191 return 0;
192 found:
193 if (e)
194 *e = (char*)t + 2;
195 return cp->ctype;
196 }
197
198 /*
199 * associate the ctype function fun with name
200 */
201
202 int
regaddclass(const char * name,regclass_t fun)203 regaddclass(const char* name, regclass_t fun)
204 {
205 register Ctype_t* cp;
206 register Ctype_t* np;
207 register size_t n;
208
209 n = strlen(name);
210 for (cp = ctypes; cp; cp = cp->next)
211 if (cp->size == n && strneq(name, cp->name, n))
212 {
213 cp->ctype = fun;
214 return 0;
215 }
216 if (!(np = newof(0, Ctype_t, 1, n + 1)))
217 return REG_ESPACE;
218 np->size = n;
219 np->name = strcpy((char*)(np + 1), name);
220 np->ctype = fun;
221 np->next = ctypes;
222 ctypes = np;
223 return 0;
224 }
225
226 /*
227 * return pointer to ctype function for token
228 */
229
230 regclass_t
classfun(int type)231 classfun(int type)
232 {
233 switch (type)
234 {
235 case T_ALNUM: return Isword;
236 case T_ALNUM_NOT: return Notword;
237 case T_DIGIT: return Isdigit;
238 case T_DIGIT_NOT: return Notdigit;
239 case T_SPACE: return Isspace;
240 case T_SPACE_NOT: return Notspace;
241 }
242 return 0;
243 }
244