1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2011 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 /* 24 * RE character class support 25 */ 26 27 #include "reglib.h" 28 29 struct Ctype_s; typedef struct Ctype_s Ctype_t; 30 31 struct Ctype_s 32 { 33 const char* name; 34 size_t size; 35 regclass_t ctype; 36 Ctype_t* next; 37 #if _lib_wctype 38 wctype_t wtype; 39 #endif 40 }; 41 42 static Ctype_t* ctypes; 43 44 /* 45 * this stuff gets around posix failure to define isblank, 46 * and the fact that ctype functions are macros 47 * and any local extensions that may not even have functions or macros 48 */ 49 50 #if _need_iswblank 51 52 int 53 _reg_iswblank(wint_t wc) 54 { 55 static int initialized; 56 static wctype_t wt; 57 58 if (!initialized) 59 { 60 initialized = 1; 61 wt = wctype("blank"); 62 } 63 return iswctype(wc, wt); 64 } 65 66 #endif 67 68 static int Isalnum(int c) { return iswalnum(c); } 69 static int Isalpha(int c) { return iswalpha(c); } 70 static int Isblank(int c) { return iswblank(c); } 71 static int Iscntrl(int c) { return iswcntrl(c); } 72 static int Isdigit(int c) { return iswdigit(c); } 73 static int Notdigit(int c) { return !iswdigit(c); } 74 static int Isgraph(int c) { return iswgraph(c); } 75 static int Islower(int c) { return iswlower(c); } 76 static int Isprint(int c) { return iswprint(c); } 77 static int Ispunct(int c) { return iswpunct(c); } 78 static int Isspace(int c) { return iswspace(c); } 79 static int Notspace(int c) { return !iswspace(c); } 80 static int Isupper(int c) { return iswupper(c); } 81 static int Isword(int c) { return iswalnum(c) || c == '_'; } 82 static int Notword(int c) { return !iswalnum(c) && c != '_'; } 83 static int Isxdigit(int c) { return iswxdigit(c);} 84 85 #if _lib_wctype 86 87 static int Is_wc_1(int); 88 static int Is_wc_2(int); 89 static int Is_wc_3(int); 90 static int Is_wc_4(int); 91 static int Is_wc_5(int); 92 static int Is_wc_6(int); 93 static int Is_wc_7(int); 94 static int Is_wc_8(int); 95 static int Is_wc_9(int); 96 static int Is_wc_10(int); 97 static int Is_wc_11(int); 98 static int Is_wc_12(int); 99 static int Is_wc_13(int); 100 static int Is_wc_14(int); 101 static int Is_wc_15(int); 102 static int Is_wc_16(int); 103 104 #endif 105 106 #define SZ(s) s,(sizeof(s)-1) 107 108 static Ctype_t ctype[] = 109 { 110 { SZ("alnum"), Isalnum }, 111 { SZ("alpha"), Isalpha }, 112 { SZ("blank"), Isblank }, 113 { SZ("cntrl"), Iscntrl }, 114 { SZ("digit"), Isdigit }, 115 { SZ("graph"), Isgraph }, 116 { SZ("lower"), Islower }, 117 { SZ("print"), Isprint }, 118 { SZ("punct"), Ispunct }, 119 { SZ("space"), Isspace }, 120 { SZ("upper"), Isupper }, 121 { SZ("word"), Isword }, 122 { SZ("xdigit"),Isxdigit}, 123 124 #define CTYPES 13 125 126 #if _lib_wctype 127 { 0, 0, Is_wc_1 }, 128 { 0, 0, Is_wc_2 }, 129 { 0, 0, Is_wc_3 }, 130 { 0, 0, Is_wc_4 }, 131 { 0, 0, Is_wc_5 }, 132 { 0, 0, Is_wc_6 }, 133 { 0, 0, Is_wc_7 }, 134 { 0, 0, Is_wc_8 }, 135 { 0, 0, Is_wc_9 }, 136 { 0, 0, Is_wc_10 }, 137 { 0, 0, Is_wc_11 }, 138 { 0, 0, Is_wc_12 }, 139 { 0, 0, Is_wc_13 }, 140 { 0, 0, Is_wc_14 }, 141 { 0, 0, Is_wc_15 }, 142 { 0, 0, Is_wc_16 }, 143 144 #define WTYPES 16 145 146 #else 147 148 #define WTYPES 0 149 150 #endif 151 }; 152 153 #if _lib_wctype 154 155 static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); } 156 static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); } 157 static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); } 158 static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); } 159 static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); } 160 static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); } 161 static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); } 162 static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); } 163 static int Is_wc_9(int c) { return iswctype(c, ctype[CTYPES+8].wtype); } 164 static int Is_wc_10(int c) { return iswctype(c, ctype[CTYPES+9].wtype); } 165 static int Is_wc_11(int c) { return iswctype(c, ctype[CTYPES+10].wtype); } 166 static int Is_wc_12(int c) { return iswctype(c, ctype[CTYPES+11].wtype); } 167 static int Is_wc_13(int c) { return iswctype(c, ctype[CTYPES+12].wtype); } 168 static int Is_wc_14(int c) { return iswctype(c, ctype[CTYPES+13].wtype); } 169 static int Is_wc_15(int c) { return iswctype(c, ctype[CTYPES+14].wtype); } 170 static int Is_wc_16(int c) { return iswctype(c, ctype[CTYPES+15].wtype); } 171 172 #endif 173 174 /* 175 * return pointer to ctype function for :class:] in s 176 * s points to the first char after the initial [ 177 * dynamic wctype classes are locale-specific 178 * dynamic entry locale is punned in Ctype_t.next 179 * the search does a lazy (one entry at a time) flush on locale mismatch 180 * if e!=0 it points to next char in s 181 * 0 returned on error 182 */ 183 184 regclass_t 185 regclass(const char* s, char** e) 186 { 187 register Ctype_t* cp; 188 register int c; 189 register size_t n; 190 register const char* t; 191 Ctype_t* lc; 192 Ctype_t* xp; 193 Ctype_t* zp; 194 195 if (!(c = *s++)) 196 return 0; 197 for (t = s; *t && (*t != c || *(t + 1) != ']'); t++); 198 if (*t != c || !(n = t - s)) 199 return 0; 200 for (cp = ctypes; cp; cp = cp->next) 201 if (n == cp->size && strneq(s, cp->name, n)) 202 goto found; 203 xp = zp = 0; 204 lc = (Ctype_t*)setlocale(LC_CTYPE, NiL); 205 for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++) 206 { 207 #if _lib_wctype 208 if (!zp) 209 { 210 if (!cp->size) 211 zp = cp; 212 else if (!xp && cp->next && cp->next != lc) 213 xp = cp; 214 } 215 #endif 216 if (n == cp->size && strneq(s, cp->name, n) && (!cp->next || cp->next == lc)) 217 goto found; 218 } 219 #if _lib_wctype 220 if (!(cp = zp)) 221 { 222 if (!(cp = xp)) 223 return 0; 224 cp->size = 0; 225 if (!streq(cp->name, s)) 226 { 227 free((char*)cp->name); 228 cp->name = 0; 229 } 230 } 231 if (!cp->name) 232 { 233 if (!(cp->name = (const char*)memdup(s, n + 1))) 234 return 0; 235 *((char*)cp->name + n) = 0; 236 } 237 /* mvs.390 needs the (char*) cast -- barf */ 238 if (!(cp->wtype = wctype((char*)cp->name))) 239 { 240 free((char*)cp->name); 241 cp->name = 0; 242 return 0; 243 } 244 cp->size = n; 245 cp->next = lc; 246 #endif 247 found: 248 if (e) 249 *e = (char*)t + 2; 250 return cp->ctype; 251 } 252 253 /* 254 * associate the ctype function fun with name 255 */ 256 257 int 258 regaddclass(const char* name, regclass_t fun) 259 { 260 register Ctype_t* cp; 261 register Ctype_t* np; 262 register size_t n; 263 264 n = strlen(name); 265 for (cp = ctypes; cp; cp = cp->next) 266 if (cp->size == n && strneq(name, cp->name, n)) 267 { 268 cp->ctype = fun; 269 return 0; 270 } 271 if (!(np = newof(0, Ctype_t, 1, n + 1))) 272 return REG_ESPACE; 273 np->size = n; 274 np->name = strcpy((char*)(np + 1), name); 275 np->ctype = fun; 276 np->next = ctypes; 277 ctypes = np; 278 return 0; 279 } 280 281 /* 282 * return pointer to ctype function for token 283 */ 284 285 regclass_t 286 classfun(int type) 287 { 288 switch (type) 289 { 290 case T_ALNUM: return Isword; 291 case T_ALNUM_NOT: return Notword; 292 case T_DIGIT: return Isdigit; 293 case T_DIGIT_NOT: return Notdigit; 294 case T_SPACE: return Isspace; 295 case T_SPACE_NOT: return Notspace; 296 } 297 return 0; 298 } 299