1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Knowledge Ventures * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * posix regex state and alloc 26 */ 27 28 #include "reglib.h" 29 30 #include <ccode.h> 31 32 /* 33 * state shared by all threads 34 */ 35 36 State_t state = 37 { 38 { -1, -1 }, 39 40 /* 41 * escape code table 42 * the "funny" things get special treatment at ends of BRE 43 * 44 * BRE 0:normal 1:escaped 2:escaped-char-class 45 * ERE 3:normal 4:escaped 5:escaped-char-class 46 * ARE 6:normal 7:escaped 8:escaped-char-class 47 * SRE 9:normal 10:escaped 11:escaped-char-class 48 * KRE 12:normal 13:escaped 14:escaped-char-class 49 */ 50 51 '\\', 52 '\\', '\\', '\\', 53 '\\', '\\', '\\', 54 '\\', '\\', '\\', 55 '\\', '\\', '\\', 56 '\\', '\\', '\\', 57 '^', /* funny */ 58 '^', '^', '^', 59 T_CFLX, '^', '^', 60 T_CFLX, '^', '^', 61 '^', '^', '^', 62 '^', '^', '^', 63 '.', 64 T_DOT, '.', T_BAD, 65 T_DOT, '.', T_BAD, 66 T_DOT, '.', T_BAD, 67 '.', '.', T_BAD, 68 '.', '.', T_BAD, 69 '$', /* funny */ 70 '$', '$', T_BAD, 71 T_DOLL, '$', T_BAD, 72 T_DOLL, '$', T_BAD, 73 '$', '$', T_BAD, 74 '$', '$', T_BAD, 75 '*', 76 T_STAR, '*', T_BAD, 77 T_STAR, '*', T_BAD, 78 T_STAR, '*', T_BAD, 79 T_STAR, '*', '*', 80 T_STAR, '*', '*', 81 '[', 82 T_BRA, '[', '[', 83 T_BRA, '[', '[', 84 T_BRA, '[', '[', 85 T_BRA, '[', '[', 86 T_BRA, '[', '[', 87 '|', 88 '|', T_BAD, T_BAD, 89 T_BAR, '|', T_BAD, 90 T_BAR, '|', T_BAD, 91 '|', '|', T_BAD, 92 T_BAR, '|', T_BAD, 93 '+', 94 '+', T_BAD, T_BAD, 95 T_PLUS, '+', T_BAD, 96 T_PLUS, '+', T_BAD, 97 '+', '+', T_BAD, 98 T_PLUS, '+', T_BAD, 99 '?', 100 '?', T_BAD, T_BAD, 101 T_QUES, '?', T_BAD, 102 T_QUES, '?', T_BAD, 103 T_QUES, '?', '?', 104 T_QUES, '?', '?', 105 '(', 106 '(', T_OPEN, T_BAD, 107 T_OPEN, '(', T_BAD, 108 T_OPEN, '(', T_BAD, 109 '(', '(', '(', 110 T_OPEN, '(', '(', 111 ')', 112 ')', T_CLOSE, T_BAD, 113 T_CLOSE, ')', T_BAD, 114 T_CLOSE, ')', T_BAD, 115 ')', ')', ')', 116 T_CLOSE, ')', ')', 117 '{', 118 '{', T_LEFT, T_BAD, 119 T_LEFT, '{', T_BAD, 120 T_LEFT, '{', T_BAD, 121 '{', '{', '{', 122 T_LEFT, '{', '{', 123 '}', 124 '}', T_RIGHT, T_BAD, 125 '}', T_BAD, T_BAD, 126 '}', T_BAD, T_BAD, 127 '}', '}', '}', 128 '}', '}', '}', 129 '&', 130 '&', T_BAD, T_BAD, 131 '&', T_AND, T_BAD, 132 T_AND, '&', T_BAD, 133 '&', '&', T_BAD, 134 T_AND, '&', T_BAD, 135 '!', 136 '!', T_BAD, T_BAD, 137 '!', T_BANG, T_BAD, 138 T_BANG, '!', T_BAD, 139 '!', '!', T_BAD, 140 T_BANG, '!', T_BAD, 141 '@', 142 '@', T_BAD, T_BAD, 143 '@', T_BAD, T_BAD, 144 '@', T_BAD, T_BAD, 145 '@', '@', T_BAD, 146 T_AT, '@', T_BAD, 147 '~', 148 '~', T_BAD, T_BAD, 149 '~', T_BAD, T_BAD, 150 '~', T_BAD, T_BAD, 151 '~', '~', T_BAD, 152 T_TILDE, '~', T_BAD, 153 '%', 154 '%', T_BAD, T_BAD, 155 '%', T_BAD, T_BAD, 156 '%', T_BAD, T_BAD, 157 '%', '%', T_BAD, 158 T_PERCENT, '%', T_BAD, 159 '<', 160 '<', T_LT, T_BAD, 161 '<', T_LT, T_BAD, 162 T_LT, '<', T_BAD, 163 '<', '<', T_BAD, 164 '<', '<', T_BAD, 165 '>', 166 '>', T_GT, T_BAD, 167 '>', T_GT, T_BAD, 168 T_GT, '>', T_BAD, 169 '>', '>', T_BAD, 170 '>', '>', T_BAD, 171 172 /* backrefs */ 173 174 '0', 175 '0', T_BACK+0, T_ESCAPE, 176 '0', T_BACK+0, T_ESCAPE, 177 '0', T_BACK+0, T_ESCAPE, 178 '0', T_BACK+0, T_ESCAPE, 179 '0', T_BACK+0, T_ESCAPE, 180 '1', 181 '1', T_BACK+1, T_ESCAPE, 182 '1', T_BACK+1, T_ESCAPE, 183 '1', T_BACK+1, T_ESCAPE, 184 '1', T_BACK+1, T_ESCAPE, 185 '1', T_BACK+1, T_ESCAPE, 186 '2', 187 '2', T_BACK+2, T_ESCAPE, 188 '2', T_BACK+2, T_ESCAPE, 189 '2', T_BACK+2, T_ESCAPE, 190 '2', T_BACK+2, T_ESCAPE, 191 '2', T_BACK+2, T_ESCAPE, 192 '3', 193 '3', T_BACK+3, T_ESCAPE, 194 '3', T_BACK+3, T_ESCAPE, 195 '3', T_BACK+3, T_ESCAPE, 196 '3', T_BACK+3, T_ESCAPE, 197 '3', T_BACK+3, T_ESCAPE, 198 '4', 199 '4', T_BACK+4, T_ESCAPE, 200 '4', T_BACK+4, T_ESCAPE, 201 '4', T_BACK+4, T_ESCAPE, 202 '4', T_BACK+4, T_ESCAPE, 203 '4', T_BACK+4, T_ESCAPE, 204 '5', 205 '5', T_BACK+5, T_ESCAPE, 206 '5', T_BACK+5, T_ESCAPE, 207 '5', T_BACK+5, T_ESCAPE, 208 '5', T_BACK+5, T_ESCAPE, 209 '5', T_BACK+5, T_ESCAPE, 210 '6', 211 '6', T_BACK+6, T_ESCAPE, 212 '6', T_BACK+6, T_ESCAPE, 213 '6', T_BACK+6, T_ESCAPE, 214 '6', T_BACK+6, T_ESCAPE, 215 '6', T_BACK+6, T_ESCAPE, 216 '7', 217 '7', T_BACK+7, T_ESCAPE, 218 '7', T_BACK+7, T_ESCAPE, 219 '7', T_BACK+7, T_ESCAPE, 220 '7', T_BACK+7, T_ESCAPE, 221 '7', T_BACK+7, T_ESCAPE, 222 '8', 223 '8', T_BACK+8, T_ESCAPE, 224 '8', T_BACK+8, T_ESCAPE, 225 '8', T_BACK+8, T_ESCAPE, 226 '8', '8', T_ESCAPE, 227 '8', T_BACK+8, T_ESCAPE, 228 '9', 229 '9', T_BACK+9, T_ESCAPE, 230 '9', T_BACK+9, T_ESCAPE, 231 '9', T_BACK+9, T_ESCAPE, 232 '9', '9', T_ESCAPE, 233 '9', T_BACK+9, T_ESCAPE, 234 235 /* perl */ 236 237 'A', 238 'A', T_BEG_STR, T_BAD, 239 'A', T_BEG_STR, T_BAD, 240 'A', T_BEG_STR, T_BAD, 241 'A', T_BEG_STR, T_BAD, 242 'A', T_BEG_STR, T_BAD, 243 'b', 244 'b', T_WORD, '\b', 245 'b', T_WORD, '\b', 246 'b', T_WORD, '\b', 247 'b', T_WORD, '\b', 248 'b', T_WORD, '\b', 249 'B', 250 'B', T_WORD_NOT, T_BAD, 251 'B', T_WORD_NOT, T_BAD, 252 'B', T_WORD_NOT, T_BAD, 253 'B', T_WORD_NOT, T_BAD, 254 'B', T_WORD_NOT, T_BAD, 255 'd', 256 'd', T_DIGIT, T_DIGIT, 257 'd', T_DIGIT, T_DIGIT, 258 'd', T_DIGIT, T_DIGIT, 259 'd', T_DIGIT, T_DIGIT, 260 'd', T_DIGIT, T_DIGIT, 261 'D', 262 'D', T_DIGIT_NOT, T_DIGIT_NOT, 263 'D', T_DIGIT_NOT, T_DIGIT_NOT, 264 'D', T_DIGIT_NOT, T_DIGIT_NOT, 265 'D', T_DIGIT_NOT, T_DIGIT_NOT, 266 'D', T_DIGIT_NOT, T_DIGIT_NOT, 267 's', 268 's', T_SPACE, T_SPACE, 269 's', T_SPACE, T_SPACE, 270 's', T_SPACE, T_SPACE, 271 's', T_SPACE, T_SPACE, 272 's', T_SPACE, T_SPACE, 273 'S', 274 'S', T_SPACE_NOT, T_SPACE_NOT, 275 'S', T_SPACE_NOT, T_SPACE_NOT, 276 'S', T_SPACE_NOT, T_SPACE_NOT, 277 'S', T_SPACE_NOT, T_SPACE_NOT, 278 'S', T_SPACE_NOT, T_SPACE_NOT, 279 'w', 280 'w', T_ALNUM, T_ALNUM, 281 'w', T_ALNUM, T_ALNUM, 282 'w', T_ALNUM, T_ALNUM, 283 'w', T_ALNUM, T_ALNUM, 284 'w', T_ALNUM, T_ALNUM, 285 'W', 286 'W', T_ALNUM_NOT, T_ALNUM_NOT, 287 'W', T_ALNUM_NOT, T_ALNUM_NOT, 288 'W', T_ALNUM_NOT, T_ALNUM_NOT, 289 'W', T_ALNUM_NOT, T_ALNUM_NOT, 290 'W', T_ALNUM_NOT, T_ALNUM_NOT, 291 'z', 292 'z', T_FIN_STR, T_BAD, 293 'z', T_FIN_STR, T_BAD, 294 'z', T_FIN_STR, T_BAD, 295 'z', T_FIN_STR, T_BAD, 296 'z', T_FIN_STR, T_BAD, 297 'Z', 298 'Z', T_END_STR, T_BAD, 299 'Z', T_END_STR, T_BAD, 300 'Z', T_END_STR, T_BAD, 301 'Z', T_END_STR, T_BAD, 302 'Z', T_END_STR, T_BAD, 303 304 /* C escapes */ 305 306 'a', 307 'a', CC_bel, CC_bel, 308 'a', CC_bel, CC_bel, 309 'a', CC_bel, CC_bel, 310 'a', CC_bel, CC_bel, 311 'a', CC_bel, CC_bel, 312 'c', 313 'c', T_ESCAPE, T_ESCAPE, 314 'c', T_ESCAPE, T_ESCAPE, 315 'c', T_ESCAPE, T_ESCAPE, 316 'c', T_ESCAPE, T_ESCAPE, 317 'c', T_ESCAPE, T_ESCAPE, 318 'C', 319 'C', T_ESCAPE, T_ESCAPE, 320 'C', T_ESCAPE, T_ESCAPE, 321 'C', T_ESCAPE, T_ESCAPE, 322 'C', T_ESCAPE, T_ESCAPE, 323 'C', T_ESCAPE, T_ESCAPE, 324 'e', 325 'e', CC_esc, CC_esc, 326 'e', CC_esc, CC_esc, 327 'e', CC_esc, CC_esc, 328 'e', CC_esc, CC_esc, 329 'e', CC_esc, CC_esc, 330 'E', 331 'E', CC_esc, CC_esc, 332 'E', CC_esc, CC_esc, 333 'E', CC_esc, CC_esc, 334 'E', CC_esc, CC_esc, 335 'E', CC_esc, CC_esc, 336 'f', 337 'f', '\f', '\f', 338 'f', '\f', '\f', 339 'f', '\f', '\f', 340 'f', '\f', '\f', 341 'f', '\f', '\f', 342 'n', 343 'n', '\n', '\n', 344 'n', '\n', '\n', 345 'n', '\n', '\n', 346 'n', '\n', '\n', 347 'n', '\n', '\n', 348 'r', 349 'r', '\r', '\r', 350 'r', '\r', '\r', 351 'r', '\r', '\r', 352 'r', '\r', '\r', 353 'r', '\r', '\r', 354 't', 355 't', '\t', '\t', 356 't', '\t', '\t', 357 't', '\t', '\t', 358 't', '\t', '\t', 359 't', '\t', '\t', 360 'v', 361 'v', CC_vt, CC_vt, 362 'v', CC_vt, CC_vt, 363 'v', CC_vt, CC_vt, 364 'v', CC_vt, CC_vt, 365 'v', CC_vt, CC_vt, 366 'x', 367 'x', T_ESCAPE, T_ESCAPE, 368 'x', T_ESCAPE, T_ESCAPE, 369 'x', T_ESCAPE, T_ESCAPE, 370 'x', T_ESCAPE, T_ESCAPE, 371 'x', T_ESCAPE, T_ESCAPE, 372 }; 373 374 /* 375 * all allocation/free done here 376 * interface compatible with vmresize() 377 * 378 * malloc(n) alloc(0,n) 379 * realloc(p,n) alloc(p,n) 380 * free(p) alloc(p,0) 381 */ 382 383 void* 384 alloc(register regdisc_t* disc, void* p, size_t n) 385 { 386 if (disc->re_resizef) 387 { 388 if (!n && (disc->re_flags & REG_NOFREE)) 389 return 0; 390 return (*disc->re_resizef)(disc->re_resizehandle, p, n); 391 } 392 else if (!n) 393 { 394 if (!(disc->re_flags & REG_NOFREE)) 395 free(p); 396 return 0; 397 } 398 else if (p) 399 return realloc(p, n); 400 else 401 return malloc(n); 402 } 403