xref: /titanic_51/usr/src/lib/libast/common/regex/regclass.c (revision 99dda20867d903eec23291ba1ecb18a82d70096b)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * RE character class support
25  */
26 
27 #include "reglib.h"
28 
29 struct Ctype_s; typedef struct Ctype_s Ctype_t;
30 
31 struct Ctype_s
32 {
33 	const char*	name;
34 	size_t		size;
35 	regclass_t	ctype;
36 	Ctype_t*	next;
37 #if _lib_wctype
38 	wctype_t	wtype;
39 #endif
40 };
41 
42 static Ctype_t*		ctypes;
43 
44 #define CTYPES		12
45 #if _lib_wctype
46 #define WTYPES		8
47 #else
48 #define WTYPES		0
49 #endif
50 
51 /*
52  * this stuff gets around posix failure to define isblank,
53  * and the fact that ctype functions are macros
54  * and any local extensions that may not even have functions or macros
55  */
56 
57 #if _need_iswblank
58 
59 int
60 _reg_iswblank(wint_t wc)
61 {
62 	static int	initialized;
63 	static wctype_t	wt;
64 
65 	if (!initialized)
66 	{
67 		initialized = 1;
68 		wt = wctype("blank");
69 	}
70 	return iswctype(wc, wt);
71 }
72 
73 #endif
74 
75 static int  Isalnum(int c) { return  iswalnum(c); }
76 static int  Isalpha(int c) { return  iswalpha(c); }
77 static int  Isblank(int c) { return  iswblank(c); }
78 static int  Iscntrl(int c) { return  iswcntrl(c); }
79 static int  Isdigit(int c) { return  iswdigit(c); }
80 static int Notdigit(int c) { return !iswdigit(c); }
81 static int  Isgraph(int c) { return  iswgraph(c); }
82 static int  Islower(int c) { return  iswlower(c); }
83 static int  Isprint(int c) { return  iswprint(c); }
84 static int  Ispunct(int c) { return  iswpunct(c); }
85 static int  Isspace(int c) { return  iswspace(c); }
86 static int Notspace(int c) { return !iswspace(c); }
87 static int  Isupper(int c) { return  iswupper(c); }
88 static int   Isword(int c) { return  iswalnum(c) || c == '_'; }
89 static int  Notword(int c) { return !iswalnum(c) && c != '_'; }
90 static int Isxdigit(int c) { return  iswxdigit(c);}
91 
92 #if _lib_wctype
93 
94 static int Is_wc_1(int);
95 static int Is_wc_2(int);
96 static int Is_wc_3(int);
97 static int Is_wc_4(int);
98 static int Is_wc_5(int);
99 static int Is_wc_6(int);
100 static int Is_wc_7(int);
101 static int Is_wc_8(int);
102 
103 #endif
104 
105 #define SZ(s)		s,(sizeof(s)-1)
106 
107 static Ctype_t ctype[] =
108 {
109 	{ SZ("alnum"), Isalnum },
110 	{ SZ("alpha"), Isalpha },
111 	{ SZ("blank"), Isblank },
112 	{ SZ("cntrl"), Iscntrl },
113 	{ SZ("digit"), Isdigit },
114 	{ SZ("graph"), Isgraph },
115 	{ SZ("lower"), Islower },
116 	{ SZ("print"), Isprint },
117 	{ SZ("punct"), Ispunct },
118 	{ SZ("space"), Isspace },
119 	{ SZ("upper"), Isupper },
120 	{ SZ("word"),  Isword  },
121 	{ SZ("xdigit"),Isxdigit},
122 #if _lib_wctype
123 	{ 0, 0,        Is_wc_1 },
124 	{ 0, 0,        Is_wc_2 },
125 	{ 0, 0,        Is_wc_3 },
126 	{ 0, 0,        Is_wc_4 },
127 	{ 0, 0,        Is_wc_5 },
128 	{ 0, 0,        Is_wc_6 },
129 	{ 0, 0,        Is_wc_7 },
130 	{ 0, 0,        Is_wc_8 },
131 #endif
132 };
133 
134 #if _lib_wctype
135 
136 static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
137 static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
138 static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
139 static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
140 static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
141 static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
142 static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
143 static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
144 
145 #endif
146 
147 /*
148  * return pointer to ctype function for :class:] in s
149  * s points to the first char after the initial [
150  * if e!=0 it points to next char in s
151  * 0 returned on error
152  */
153 
154 regclass_t
155 regclass(const char* s, char** e)
156 {
157 	register Ctype_t*	cp;
158 	register int		c;
159 	register size_t		n;
160 	register const char*	t;
161 
162 	if (c = *s++)
163 	{
164 		for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
165 		if (*t != c)
166 			return 0;
167 		n = t - s;
168 		for (cp = ctypes; cp; cp = cp->next)
169 			if (n == cp->size && strneq(s, cp->name, n))
170 				goto found;
171 		for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
172 		{
173 #if _lib_wctype
174 			if (!cp->size && (cp->name = (const char*)memdup(s, n + 1)))
175 			{
176 				*((char*)cp->name + n) = 0;
177 				/* mvs.390 needs the (char*) cast -- barf */
178 				if (!(cp->wtype = wctype((char*)cp->name)))
179 				{
180 					free((char*)cp->name);
181 					return 0;
182 				}
183 				cp->size = n;
184 				goto found;
185 			}
186 #endif
187 			if (n == cp->size && strneq(s, cp->name, n))
188 				goto found;
189 		}
190 	}
191 	return 0;
192  found:
193 	if (e)
194 		*e = (char*)t + 2;
195 	return cp->ctype;
196 }
197 
198 /*
199  * associate the ctype function fun with name
200  */
201 
202 int
203 regaddclass(const char* name, regclass_t fun)
204 {
205 	register Ctype_t*	cp;
206 	register Ctype_t*	np;
207 	register size_t		n;
208 
209 	n = strlen(name);
210 	for (cp = ctypes; cp; cp = cp->next)
211 		if (cp->size == n && strneq(name, cp->name, n))
212 		{
213 			cp->ctype = fun;
214 			return 0;
215 		}
216 	if (!(np = newof(0, Ctype_t, 1, n + 1)))
217 		return REG_ESPACE;
218 	np->size = n;
219 	np->name = strcpy((char*)(np + 1), name);
220 	np->ctype = fun;
221 	np->next = ctypes;
222 	ctypes = np;
223 	return 0;
224 }
225 
226 /*
227  * return pointer to ctype function for token
228  */
229 
230 regclass_t
231 classfun(int type)
232 {
233 	switch (type)
234 	{
235 	case T_ALNUM:		return  Isword;
236 	case T_ALNUM_NOT:	return Notword;
237 	case T_DIGIT:		return  Isdigit;
238 	case T_DIGIT_NOT:	return Notdigit;
239 	case T_SPACE:		return  Isspace;
240 	case T_SPACE_NOT:	return Notspace;
241 	}
242 	return 0;
243 }
244