xref: /titanic_52/usr/src/contrib/ast/src/lib/libast/regex/regclass.c (revision 906afcb89d0412cc073b95c2d701a804a8cdb62c)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * RE character class support
25  */
26 
27 #include "reglib.h"
28 
29 struct Ctype_s; typedef struct Ctype_s Ctype_t;
30 
31 struct Ctype_s
32 {
33 	const char*	name;
34 	size_t		size;
35 	regclass_t	ctype;
36 	Ctype_t*	next;
37 #if _lib_wctype
38 	wctype_t	wtype;
39 #endif
40 };
41 
42 static Ctype_t*		ctypes;
43 
44 /*
45  * this stuff gets around posix failure to define isblank,
46  * and the fact that ctype functions are macros
47  * and any local extensions that may not even have functions or macros
48  */
49 
50 #if _need_iswblank
51 
52 int
53 _reg_iswblank(wint_t wc)
54 {
55 	static int	initialized;
56 	static wctype_t	wt;
57 
58 	if (!initialized)
59 	{
60 		initialized = 1;
61 		wt = wctype("blank");
62 	}
63 	return iswctype(wc, wt);
64 }
65 
66 #endif
67 
68 static int  Isalnum(int c) { return  iswalnum(c); }
69 static int  Isalpha(int c) { return  iswalpha(c); }
70 static int  Isblank(int c) { return  iswblank(c); }
71 static int  Iscntrl(int c) { return  iswcntrl(c); }
72 static int  Isdigit(int c) { return  iswdigit(c); }
73 static int Notdigit(int c) { return !iswdigit(c); }
74 static int  Isgraph(int c) { return  iswgraph(c); }
75 static int  Islower(int c) { return  iswlower(c); }
76 static int  Isprint(int c) { return  iswprint(c); }
77 static int  Ispunct(int c) { return  iswpunct(c); }
78 static int  Isspace(int c) { return  iswspace(c); }
79 static int Notspace(int c) { return !iswspace(c); }
80 static int  Isupper(int c) { return  iswupper(c); }
81 static int   Isword(int c) { return  iswalnum(c) || c == '_'; }
82 static int  Notword(int c) { return !iswalnum(c) && c != '_'; }
83 static int Isxdigit(int c) { return  iswxdigit(c);}
84 
85 #if _lib_wctype
86 
87 static int Is_wc_1(int);
88 static int Is_wc_2(int);
89 static int Is_wc_3(int);
90 static int Is_wc_4(int);
91 static int Is_wc_5(int);
92 static int Is_wc_6(int);
93 static int Is_wc_7(int);
94 static int Is_wc_8(int);
95 static int Is_wc_9(int);
96 static int Is_wc_10(int);
97 static int Is_wc_11(int);
98 static int Is_wc_12(int);
99 static int Is_wc_13(int);
100 static int Is_wc_14(int);
101 static int Is_wc_15(int);
102 static int Is_wc_16(int);
103 
104 #endif
105 
106 #define SZ(s)		s,(sizeof(s)-1)
107 
108 static Ctype_t ctype[] =
109 {
110 	{ SZ("alnum"), Isalnum },
111 	{ SZ("alpha"), Isalpha },
112 	{ SZ("blank"), Isblank },
113 	{ SZ("cntrl"), Iscntrl },
114 	{ SZ("digit"), Isdigit },
115 	{ SZ("graph"), Isgraph },
116 	{ SZ("lower"), Islower },
117 	{ SZ("print"), Isprint },
118 	{ SZ("punct"), Ispunct },
119 	{ SZ("space"), Isspace },
120 	{ SZ("upper"), Isupper },
121 	{ SZ("word"),  Isword  },
122 	{ SZ("xdigit"),Isxdigit},
123 
124 #define CTYPES		13
125 
126 #if _lib_wctype
127 	{ 0, 0,        Is_wc_1 },
128 	{ 0, 0,        Is_wc_2 },
129 	{ 0, 0,        Is_wc_3 },
130 	{ 0, 0,        Is_wc_4 },
131 	{ 0, 0,        Is_wc_5 },
132 	{ 0, 0,        Is_wc_6 },
133 	{ 0, 0,        Is_wc_7 },
134 	{ 0, 0,        Is_wc_8 },
135 	{ 0, 0,        Is_wc_9 },
136 	{ 0, 0,        Is_wc_10 },
137 	{ 0, 0,        Is_wc_11 },
138 	{ 0, 0,        Is_wc_12 },
139 	{ 0, 0,        Is_wc_13 },
140 	{ 0, 0,        Is_wc_14 },
141 	{ 0, 0,        Is_wc_15 },
142 	{ 0, 0,        Is_wc_16 },
143 
144 #define WTYPES		16
145 
146 #else
147 
148 #define WTYPES		0
149 
150 #endif
151 };
152 
153 #if _lib_wctype
154 
155 static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
156 static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
157 static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
158 static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
159 static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
160 static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
161 static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
162 static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
163 static int Is_wc_9(int c) { return iswctype(c, ctype[CTYPES+8].wtype); }
164 static int Is_wc_10(int c) { return iswctype(c, ctype[CTYPES+9].wtype); }
165 static int Is_wc_11(int c) { return iswctype(c, ctype[CTYPES+10].wtype); }
166 static int Is_wc_12(int c) { return iswctype(c, ctype[CTYPES+11].wtype); }
167 static int Is_wc_13(int c) { return iswctype(c, ctype[CTYPES+12].wtype); }
168 static int Is_wc_14(int c) { return iswctype(c, ctype[CTYPES+13].wtype); }
169 static int Is_wc_15(int c) { return iswctype(c, ctype[CTYPES+14].wtype); }
170 static int Is_wc_16(int c) { return iswctype(c, ctype[CTYPES+15].wtype); }
171 
172 #endif
173 
174 /*
175  * return pointer to ctype function for :class:] in s
176  * s points to the first char after the initial [
177  * dynamic wctype classes are locale-specific
178  * dynamic entry locale is punned in Ctype_t.next
179  * the search does a lazy (one entry at a time) flush on locale mismatch
180  * if e!=0 it points to next char in s
181  * 0 returned on error
182  */
183 
184 regclass_t
185 regclass(const char* s, char** e)
186 {
187 	register Ctype_t*	cp;
188 	register int		c;
189 	register size_t		n;
190 	register const char*	t;
191 	Ctype_t*		lc;
192 	Ctype_t*		xp;
193 	Ctype_t*		zp;
194 
195 	if (!(c = *s++))
196 		return 0;
197 	for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
198 	if (*t != c || !(n = t - s))
199 		return 0;
200 	for (cp = ctypes; cp; cp = cp->next)
201 		if (n == cp->size && strneq(s, cp->name, n))
202 			goto found;
203 	xp = zp = 0;
204 	lc = (Ctype_t*)setlocale(LC_CTYPE, NiL);
205 	for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
206 	{
207 #if _lib_wctype
208 		if (!zp)
209 		{
210 			if (!cp->size)
211 				zp = cp;
212 			else if (!xp && cp->next && cp->next != lc)
213 				xp = cp;
214 		}
215 #endif
216 		if (n == cp->size && strneq(s, cp->name, n) && (!cp->next || cp->next == lc))
217 			goto found;
218 	}
219 #if _lib_wctype
220 	if (!(cp = zp))
221 	{
222 		if (!(cp = xp))
223 			return 0;
224 		cp->size = 0;
225 		if (!streq(cp->name, s))
226 		{
227 			free((char*)cp->name);
228 			cp->name = 0;
229 		}
230 	}
231 	if (!cp->name)
232 	{
233 		if (!(cp->name = (const char*)memdup(s, n + 1)))
234 			return 0;
235 		*((char*)cp->name + n) = 0;
236 	}
237 	/* mvs.390 needs the (char*) cast -- barf */
238 	if (!(cp->wtype = wctype((char*)cp->name)))
239 	{
240 		free((char*)cp->name);
241 		cp->name = 0;
242 		return 0;
243 	}
244 	cp->size = n;
245 	cp->next = lc;
246 #endif
247  found:
248 	if (e)
249 		*e = (char*)t + 2;
250 	return cp->ctype;
251 }
252 
253 /*
254  * associate the ctype function fun with name
255  */
256 
257 int
258 regaddclass(const char* name, regclass_t fun)
259 {
260 	register Ctype_t*	cp;
261 	register Ctype_t*	np;
262 	register size_t		n;
263 
264 	n = strlen(name);
265 	for (cp = ctypes; cp; cp = cp->next)
266 		if (cp->size == n && strneq(name, cp->name, n))
267 		{
268 			cp->ctype = fun;
269 			return 0;
270 		}
271 	if (!(np = newof(0, Ctype_t, 1, n + 1)))
272 		return REG_ESPACE;
273 	np->size = n;
274 	np->name = strcpy((char*)(np + 1), name);
275 	np->ctype = fun;
276 	np->next = ctypes;
277 	ctypes = np;
278 	return 0;
279 }
280 
281 /*
282  * return pointer to ctype function for token
283  */
284 
285 regclass_t
286 classfun(int type)
287 {
288 	switch (type)
289 	{
290 	case T_ALNUM:		return  Isword;
291 	case T_ALNUM_NOT:	return Notword;
292 	case T_DIGIT:		return  Isdigit;
293 	case T_DIGIT_NOT:	return Notdigit;
294 	case T_SPACE:		return  Isspace;
295 	case T_SPACE_NOT:	return Notspace;
296 	}
297 	return 0;
298 }
299