1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2010 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * posix regex state and alloc 26 */ 27 28 #include "reglib.h" 29 30 #if _PACKAGE_ast 31 32 #include <ccode.h> 33 34 #else 35 36 #define CC_bel '\a' 37 #define CC_esc '\033' 38 #define CC_vt '\v' 39 40 #endif 41 42 /* 43 * state shared by all threads 44 */ 45 46 State_t state = 47 { 48 { -1, -1 }, 49 50 /* 51 * escape code table 52 * the "funny" things get special treatment at ends of BRE 53 * 54 * BRE 0:normal 1:escaped 2:escaped-char-class 55 * ERE 3:normal 4:escaped 5:escaped-char-class 56 * ARE 6:normal 7:escaped 8:escaped-char-class 57 * SRE 9:normal 10:escaped 11:escaped-char-class 58 * KRE 12:normal 13:escaped 14:escaped-char-class 59 */ 60 61 '\\', 62 '\\', '\\', '\\', 63 '\\', '\\', '\\', 64 '\\', '\\', '\\', 65 '\\', '\\', '\\', 66 '\\', '\\', '\\', 67 '^', /* funny */ 68 '^', '^', '^', 69 T_CFLX, '^', '^', 70 T_CFLX, '^', '^', 71 '^', '^', '^', 72 '^', '^', '^', 73 '.', 74 T_DOT, '.', T_BAD, 75 T_DOT, '.', T_BAD, 76 T_DOT, '.', T_BAD, 77 '.', '.', T_BAD, 78 '.', '.', T_BAD, 79 '$', /* funny */ 80 '$', '$', T_BAD, 81 T_DOLL, '$', T_BAD, 82 T_DOLL, '$', T_BAD, 83 '$', '$', T_BAD, 84 '$', '$', T_BAD, 85 '*', 86 T_STAR, '*', T_BAD, 87 T_STAR, '*', T_BAD, 88 T_STAR, '*', T_BAD, 89 T_STAR, '*', '*', 90 T_STAR, '*', '*', 91 '[', 92 T_BRA, '[', '[', 93 T_BRA, '[', '[', 94 T_BRA, '[', '[', 95 T_BRA, '[', '[', 96 T_BRA, '[', '[', 97 '|', 98 '|', T_BAD, T_BAD, 99 T_BAR, '|', T_BAD, 100 T_BAR, '|', T_BAD, 101 '|', '|', T_BAD, 102 T_BAR, '|', T_BAD, 103 '+', 104 '+', T_BAD, T_BAD, 105 T_PLUS, '+', T_BAD, 106 T_PLUS, '+', T_BAD, 107 '+', '+', T_BAD, 108 T_PLUS, '+', T_BAD, 109 '?', 110 '?', T_BAD, T_BAD, 111 T_QUES, '?', T_BAD, 112 T_QUES, '?', T_BAD, 113 T_QUES, '?', '?', 114 T_QUES, '?', '?', 115 '(', 116 '(', T_OPEN, T_BAD, 117 T_OPEN, '(', T_BAD, 118 T_OPEN, '(', T_BAD, 119 '(', '(', '(', 120 T_OPEN, '(', '(', 121 ')', 122 ')', T_CLOSE, T_BAD, 123 T_CLOSE, ')', T_BAD, 124 T_CLOSE, ')', T_BAD, 125 ')', ')', ')', 126 T_CLOSE, ')', ')', 127 '{', 128 '{', T_LEFT, T_BAD, 129 T_LEFT, '{', T_BAD, 130 T_LEFT, '{', T_BAD, 131 '{', '{', '{', 132 T_LEFT, '{', '{', 133 '}', 134 '}', T_RIGHT, T_BAD, 135 '}', T_BAD, T_BAD, 136 '}', T_BAD, T_BAD, 137 '}', '}', '}', 138 '}', '}', '}', 139 '&', 140 '&', T_BAD, T_BAD, 141 '&', T_AND, T_BAD, 142 T_AND, '&', T_BAD, 143 '&', '&', T_BAD, 144 T_AND, '&', T_BAD, 145 '!', 146 '!', T_BAD, T_BAD, 147 '!', T_BANG, T_BAD, 148 T_BANG, '!', T_BAD, 149 '!', '!', T_BAD, 150 T_BANG, '!', T_BAD, 151 '@', 152 '@', T_BAD, T_BAD, 153 '@', T_BAD, T_BAD, 154 '@', T_BAD, T_BAD, 155 '@', '@', T_BAD, 156 T_AT, '@', T_BAD, 157 '~', 158 '~', T_BAD, T_BAD, 159 '~', T_BAD, T_BAD, 160 '~', T_BAD, T_BAD, 161 '~', '~', T_BAD, 162 T_TILDE, '~', T_BAD, 163 '%', 164 '%', T_BAD, T_BAD, 165 '%', T_BAD, T_BAD, 166 '%', T_BAD, T_BAD, 167 '%', '%', T_BAD, 168 T_PERCENT, '%', T_BAD, 169 '<', 170 '<', T_LT, T_BAD, 171 '<', T_LT, T_BAD, 172 T_LT, '<', T_BAD, 173 '<', '<', T_BAD, 174 '<', '<', T_BAD, 175 '>', 176 '>', T_GT, T_BAD, 177 '>', T_GT, T_BAD, 178 T_GT, '>', T_BAD, 179 '>', '>', T_BAD, 180 '>', '>', T_BAD, 181 182 /* backrefs */ 183 184 '0', 185 '0', T_BACK+0, T_ESCAPE, 186 '0', T_BACK+0, T_ESCAPE, 187 '0', T_BACK+0, T_ESCAPE, 188 '0', T_BACK+0, T_ESCAPE, 189 '0', T_BACK+0, T_ESCAPE, 190 '1', 191 '1', T_BACK+1, T_ESCAPE, 192 '1', T_BACK+1, T_ESCAPE, 193 '1', T_BACK+1, T_ESCAPE, 194 '1', T_BACK+1, T_ESCAPE, 195 '1', T_BACK+1, T_ESCAPE, 196 '2', 197 '2', T_BACK+2, T_ESCAPE, 198 '2', T_BACK+2, T_ESCAPE, 199 '2', T_BACK+2, T_ESCAPE, 200 '2', T_BACK+2, T_ESCAPE, 201 '2', T_BACK+2, T_ESCAPE, 202 '3', 203 '3', T_BACK+3, T_ESCAPE, 204 '3', T_BACK+3, T_ESCAPE, 205 '3', T_BACK+3, T_ESCAPE, 206 '3', T_BACK+3, T_ESCAPE, 207 '3', T_BACK+3, T_ESCAPE, 208 '4', 209 '4', T_BACK+4, T_ESCAPE, 210 '4', T_BACK+4, T_ESCAPE, 211 '4', T_BACK+4, T_ESCAPE, 212 '4', T_BACK+4, T_ESCAPE, 213 '4', T_BACK+4, T_ESCAPE, 214 '5', 215 '5', T_BACK+5, T_ESCAPE, 216 '5', T_BACK+5, T_ESCAPE, 217 '5', T_BACK+5, T_ESCAPE, 218 '5', T_BACK+5, T_ESCAPE, 219 '5', T_BACK+5, T_ESCAPE, 220 '6', 221 '6', T_BACK+6, T_ESCAPE, 222 '6', T_BACK+6, T_ESCAPE, 223 '6', T_BACK+6, T_ESCAPE, 224 '6', T_BACK+6, T_ESCAPE, 225 '6', T_BACK+6, T_ESCAPE, 226 '7', 227 '7', T_BACK+7, T_ESCAPE, 228 '7', T_BACK+7, T_ESCAPE, 229 '7', T_BACK+7, T_ESCAPE, 230 '7', T_BACK+7, T_ESCAPE, 231 '7', T_BACK+7, T_ESCAPE, 232 '8', 233 '8', T_BACK+8, T_ESCAPE, 234 '8', T_BACK+8, T_ESCAPE, 235 '8', T_BACK+8, T_ESCAPE, 236 '8', '8', T_ESCAPE, 237 '8', T_BACK+8, T_ESCAPE, 238 '9', 239 '9', T_BACK+9, T_ESCAPE, 240 '9', T_BACK+9, T_ESCAPE, 241 '9', T_BACK+9, T_ESCAPE, 242 '9', '9', T_ESCAPE, 243 '9', T_BACK+9, T_ESCAPE, 244 245 /* perl */ 246 247 'A', 248 'A', T_BEG_STR, T_BAD, 249 'A', T_BEG_STR, T_BAD, 250 'A', T_BEG_STR, T_BAD, 251 'A', T_BEG_STR, T_BAD, 252 'A', T_BEG_STR, T_BAD, 253 'b', 254 'b', T_WORD, '\b', 255 'b', T_WORD, '\b', 256 'b', T_WORD, '\b', 257 'b', T_WORD, '\b', 258 'b', T_WORD, '\b', 259 'B', 260 'B', T_WORD_NOT, T_BAD, 261 'B', T_WORD_NOT, T_BAD, 262 'B', T_WORD_NOT, T_BAD, 263 'B', T_WORD_NOT, T_BAD, 264 'B', T_WORD_NOT, T_BAD, 265 'd', 266 'd', T_DIGIT, T_DIGIT, 267 'd', T_DIGIT, T_DIGIT, 268 'd', T_DIGIT, T_DIGIT, 269 'd', T_DIGIT, T_DIGIT, 270 'd', T_DIGIT, T_DIGIT, 271 'D', 272 'D', T_DIGIT_NOT, T_DIGIT_NOT, 273 'D', T_DIGIT_NOT, T_DIGIT_NOT, 274 'D', T_DIGIT_NOT, T_DIGIT_NOT, 275 'D', T_DIGIT_NOT, T_DIGIT_NOT, 276 'D', T_DIGIT_NOT, T_DIGIT_NOT, 277 's', 278 's', T_SPACE, T_SPACE, 279 's', T_SPACE, T_SPACE, 280 's', T_SPACE, T_SPACE, 281 's', T_SPACE, T_SPACE, 282 's', T_SPACE, T_SPACE, 283 'S', 284 'S', T_SPACE_NOT, T_SPACE_NOT, 285 'S', T_SPACE_NOT, T_SPACE_NOT, 286 'S', T_SPACE_NOT, T_SPACE_NOT, 287 'S', T_SPACE_NOT, T_SPACE_NOT, 288 'S', T_SPACE_NOT, T_SPACE_NOT, 289 'w', 290 'w', T_ALNUM, T_ALNUM, 291 'w', T_ALNUM, T_ALNUM, 292 'w', T_ALNUM, T_ALNUM, 293 'w', T_ALNUM, T_ALNUM, 294 'w', T_ALNUM, T_ALNUM, 295 'W', 296 'W', T_ALNUM_NOT, T_ALNUM_NOT, 297 'W', T_ALNUM_NOT, T_ALNUM_NOT, 298 'W', T_ALNUM_NOT, T_ALNUM_NOT, 299 'W', T_ALNUM_NOT, T_ALNUM_NOT, 300 'W', T_ALNUM_NOT, T_ALNUM_NOT, 301 'z', 302 'z', T_FIN_STR, T_BAD, 303 'z', T_FIN_STR, T_BAD, 304 'z', T_FIN_STR, T_BAD, 305 'z', T_FIN_STR, T_BAD, 306 'z', T_FIN_STR, T_BAD, 307 'Z', 308 'Z', T_END_STR, T_BAD, 309 'Z', T_END_STR, T_BAD, 310 'Z', T_END_STR, T_BAD, 311 'Z', T_END_STR, T_BAD, 312 'Z', T_END_STR, T_BAD, 313 314 /* C escapes */ 315 316 'a', 317 'a', CC_bel, CC_bel, 318 'a', CC_bel, CC_bel, 319 'a', CC_bel, CC_bel, 320 'a', CC_bel, CC_bel, 321 'a', CC_bel, CC_bel, 322 'c', 323 'c', T_ESCAPE, T_ESCAPE, 324 'c', T_ESCAPE, T_ESCAPE, 325 'c', T_ESCAPE, T_ESCAPE, 326 'c', T_ESCAPE, T_ESCAPE, 327 'c', T_ESCAPE, T_ESCAPE, 328 'C', 329 'C', T_ESCAPE, T_ESCAPE, 330 'C', T_ESCAPE, T_ESCAPE, 331 'C', T_ESCAPE, T_ESCAPE, 332 'C', T_ESCAPE, T_ESCAPE, 333 'C', T_ESCAPE, T_ESCAPE, 334 'e', 335 'e', CC_esc, CC_esc, 336 'e', CC_esc, CC_esc, 337 'e', CC_esc, CC_esc, 338 'e', CC_esc, CC_esc, 339 'e', CC_esc, CC_esc, 340 'E', 341 'E', CC_esc, CC_esc, 342 'E', CC_esc, CC_esc, 343 'E', CC_esc, CC_esc, 344 'E', CC_esc, CC_esc, 345 'E', CC_esc, CC_esc, 346 'f', 347 'f', '\f', '\f', 348 'f', '\f', '\f', 349 'f', '\f', '\f', 350 'f', '\f', '\f', 351 'f', '\f', '\f', 352 'n', 353 'n', '\n', '\n', 354 'n', '\n', '\n', 355 'n', '\n', '\n', 356 'n', '\n', '\n', 357 'n', '\n', '\n', 358 'r', 359 'r', '\r', '\r', 360 'r', '\r', '\r', 361 'r', '\r', '\r', 362 'r', '\r', '\r', 363 'r', '\r', '\r', 364 't', 365 't', '\t', '\t', 366 't', '\t', '\t', 367 't', '\t', '\t', 368 't', '\t', '\t', 369 't', '\t', '\t', 370 'v', 371 'v', CC_vt, CC_vt, 372 'v', CC_vt, CC_vt, 373 'v', CC_vt, CC_vt, 374 'v', CC_vt, CC_vt, 375 'v', CC_vt, CC_vt, 376 'x', 377 'x', T_ESCAPE, T_ESCAPE, 378 'x', T_ESCAPE, T_ESCAPE, 379 'x', T_ESCAPE, T_ESCAPE, 380 'x', T_ESCAPE, T_ESCAPE, 381 'x', T_ESCAPE, T_ESCAPE, 382 }; 383 384 /* 385 * all allocation/free done here 386 * interface compatible with vmresize() 387 * 388 * malloc(n) alloc(0,n) 389 * realloc(p,n) alloc(p,n) 390 * free(p) alloc(p,0) 391 */ 392 393 void* 394 alloc(register regdisc_t* disc, void* p, size_t n) 395 { 396 if (disc->re_resizef) 397 { 398 if (!n && (disc->re_flags & REG_NOFREE)) 399 return 0; 400 return (*disc->re_resizef)(disc->re_resizehandle, p, n); 401 } 402 else if (!n) 403 { 404 if (!(disc->re_flags & REG_NOFREE)) 405 free(p); 406 return 0; 407 } 408 else if (p) 409 return realloc(p, n); 410 else 411 return malloc(n); 412 } 413