1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2008 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 /* 24 * RE character class support 25 */ 26 27 #include "reglib.h" 28 29 struct Ctype_s; typedef struct Ctype_s Ctype_t; 30 31 struct Ctype_s 32 { 33 const char* name; 34 size_t size; 35 regclass_t ctype; 36 Ctype_t* next; 37 #if _lib_wctype 38 wctype_t wtype; 39 #endif 40 }; 41 42 static Ctype_t* ctypes; 43 44 #define CTYPES 12 45 #if _lib_wctype 46 #define WTYPES 8 47 #else 48 #define WTYPES 0 49 #endif 50 51 /* 52 * this stuff gets around posix failure to define isblank, 53 * and the fact that ctype functions are macros 54 * and any local extensions that may not even have functions or macros 55 */ 56 57 #if _need_iswblank 58 59 int 60 _reg_iswblank(wint_t wc) 61 { 62 static int initialized; 63 static wctype_t wt; 64 65 if (!initialized) 66 { 67 initialized = 1; 68 wt = wctype("blank"); 69 } 70 return iswctype(wc, wt); 71 } 72 73 #endif 74 75 static int Isalnum(int c) { return iswalnum(c); } 76 static int Isalpha(int c) { return iswalpha(c); } 77 static int Isblank(int c) { return iswblank(c); } 78 static int Iscntrl(int c) { return iswcntrl(c); } 79 static int Isdigit(int c) { return iswdigit(c); } 80 static int Notdigit(int c) { return !iswdigit(c); } 81 static int Isgraph(int c) { return iswgraph(c); } 82 static int Islower(int c) { return iswlower(c); } 83 static int Isprint(int c) { return iswprint(c); } 84 static int Ispunct(int c) { return iswpunct(c); } 85 static int Isspace(int c) { return iswspace(c); } 86 static int Notspace(int c) { return !iswspace(c); } 87 static int Isupper(int c) { return iswupper(c); } 88 static int Isword(int c) { return iswalnum(c) || c == '_'; } 89 static int Notword(int c) { return !iswalnum(c) && c != '_'; } 90 static int Isxdigit(int c) { return iswxdigit(c);} 91 92 #if _lib_wctype 93 94 static int Is_wc_1(int); 95 static int Is_wc_2(int); 96 static int Is_wc_3(int); 97 static int Is_wc_4(int); 98 static int Is_wc_5(int); 99 static int Is_wc_6(int); 100 static int Is_wc_7(int); 101 static int Is_wc_8(int); 102 103 #endif 104 105 #define SZ(s) s,(sizeof(s)-1) 106 107 static Ctype_t ctype[] = 108 { 109 { SZ("alnum"), Isalnum }, 110 { SZ("alpha"), Isalpha }, 111 { SZ("blank"), Isblank }, 112 { SZ("cntrl"), Iscntrl }, 113 { SZ("digit"), Isdigit }, 114 { SZ("graph"), Isgraph }, 115 { SZ("lower"), Islower }, 116 { SZ("print"), Isprint }, 117 { SZ("punct"), Ispunct }, 118 { SZ("space"), Isspace }, 119 { SZ("upper"), Isupper }, 120 { SZ("word"), Isword }, 121 { SZ("xdigit"),Isxdigit}, 122 #if _lib_wctype 123 { 0, 0, Is_wc_1 }, 124 { 0, 0, Is_wc_2 }, 125 { 0, 0, Is_wc_3 }, 126 { 0, 0, Is_wc_4 }, 127 { 0, 0, Is_wc_5 }, 128 { 0, 0, Is_wc_6 }, 129 { 0, 0, Is_wc_7 }, 130 { 0, 0, Is_wc_8 }, 131 #endif 132 }; 133 134 #if _lib_wctype 135 136 static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); } 137 static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); } 138 static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); } 139 static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); } 140 static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); } 141 static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); } 142 static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); } 143 static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); } 144 145 #endif 146 147 /* 148 * return pointer to ctype function for :class:] in s 149 * s points to the first char after the initial [ 150 * if e!=0 it points to next char in s 151 * 0 returned on error 152 */ 153 154 regclass_t 155 regclass(const char* s, char** e) 156 { 157 register Ctype_t* cp; 158 register int c; 159 register size_t n; 160 register const char* t; 161 162 if (c = *s++) 163 { 164 for (t = s; *t && (*t != c || *(t + 1) != ']'); t++); 165 if (*t != c) 166 return 0; 167 n = t - s; 168 for (cp = ctypes; cp; cp = cp->next) 169 if (n == cp->size && strneq(s, cp->name, n)) 170 goto found; 171 for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++) 172 { 173 #if _lib_wctype 174 if (!cp->size && (cp->name = (const char*)memdup(s, n + 1))) 175 { 176 *((char*)cp->name + n) = 0; 177 /* mvs.390 needs the (char*) cast -- barf */ 178 if (!(cp->wtype = wctype((char*)cp->name))) 179 { 180 free((char*)cp->name); 181 return 0; 182 } 183 cp->size = n; 184 goto found; 185 } 186 #endif 187 if (n == cp->size && strneq(s, cp->name, n)) 188 goto found; 189 } 190 } 191 return 0; 192 found: 193 if (e) 194 *e = (char*)t + 2; 195 return cp->ctype; 196 } 197 198 /* 199 * associate the ctype function fun with name 200 */ 201 202 int 203 regaddclass(const char* name, regclass_t fun) 204 { 205 register Ctype_t* cp; 206 register Ctype_t* np; 207 register size_t n; 208 209 n = strlen(name); 210 for (cp = ctypes; cp; cp = cp->next) 211 if (cp->size == n && strneq(name, cp->name, n)) 212 { 213 cp->ctype = fun; 214 return 0; 215 } 216 if (!(np = newof(0, Ctype_t, 1, n + 1))) 217 return REG_ESPACE; 218 np->size = n; 219 np->name = strcpy((char*)(np + 1), name); 220 np->ctype = fun; 221 np->next = ctypes; 222 ctypes = np; 223 return 0; 224 } 225 226 /* 227 * return pointer to ctype function for token 228 */ 229 230 regclass_t 231 classfun(int type) 232 { 233 switch (type) 234 { 235 case T_ALNUM: return Isword; 236 case T_ALNUM_NOT: return Notword; 237 case T_DIGIT: return Isdigit; 238 case T_DIGIT_NOT: return Notdigit; 239 case T_SPACE: return Isspace; 240 case T_SPACE_NOT: return Notspace; 241 } 242 return 0; 243 } 244