1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1988 AT&T */ 22 /* All Rights Reserved */ 23 24 /* 25 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 26 * 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #ifndef _REGEXP_H 32 #define _REGEXP_H 33 34 #include <string.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #define CBRA 2 41 #define CCHR 4 42 #define CDOT 8 43 #define CCL 12 44 #define CXCL 16 45 #define CDOL 20 46 #define CCEOF 22 47 #define CKET 24 48 #define CBACK 36 49 #define NCCL 40 50 51 #define STAR 01 52 #define RNGE 03 53 54 #define NBRA 9 55 56 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] 57 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) 58 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) 59 60 static char *braslist[NBRA]; 61 static char *braelist[NBRA]; 62 int sed, nbra; 63 char *loc1, *loc2, *locs; 64 static int nodelim; 65 66 int circf; 67 static int low; 68 static int size; 69 70 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 71 72 int advance(const char *lp, const char *ep); 73 static void getrnge(const char *str); 74 75 char * 76 compile(char *instring, char *ep, const char *endbuf, int seof) 77 { 78 INIT /* Dependent declarations and initializations */ 79 register int c; 80 register int eof = seof; 81 char *lastep; 82 int cclcnt; 83 char bracket[NBRA], *bracketp; 84 int closed; 85 int neg; 86 int lc; 87 int i, cflg; 88 int iflag; /* used for non-ascii characters in brackets */ 89 90 #ifdef __lint 91 /* make lint happy */ 92 c = nodelim; 93 #endif 94 95 lastep = NULL; 96 if ((c = GETC()) == eof || c == '\n') { 97 if (c == '\n') { 98 UNGETC(c); 99 nodelim = 1; 100 } 101 if (*ep == 0 && !sed) 102 ERROR(41); 103 RETURN(ep); 104 } 105 bracketp = bracket; 106 circf = closed = nbra = 0; 107 if (c == '^') 108 circf++; 109 else 110 UNGETC(c); 111 for (;;) { 112 if (ep >= endbuf) 113 ERROR(50); 114 c = GETC(); 115 if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) 116 lastep = ep; 117 if (c == eof) { 118 *ep++ = CCEOF; 119 if (bracketp != bracket) 120 ERROR(42); 121 RETURN(ep); 122 } 123 switch (c) { 124 125 case '.': 126 *ep++ = CDOT; 127 continue; 128 129 case '\n': 130 if (!sed) { 131 UNGETC(c); 132 *ep++ = CCEOF; 133 nodelim = 1; 134 if (bracketp != bracket) 135 ERROR(42); 136 RETURN(ep); 137 } else ERROR(36); 138 case '*': 139 if (lastep == NULL || *lastep == CBRA || 140 *lastep == CKET) 141 goto defchar; 142 *lastep |= STAR; 143 continue; 144 145 case '$': 146 if (PEEKC() != eof && PEEKC() != '\n') 147 goto defchar; 148 *ep++ = CDOL; 149 continue; 150 151 case '[': 152 if (&ep[17] >= endbuf) 153 ERROR(50); 154 155 *ep++ = CCL; 156 lc = 0; 157 for (i = 0; i < 16; i++) 158 ep[i] = 0; 159 160 neg = 0; 161 if ((c = GETC()) == '^') { 162 neg = 1; 163 c = GETC(); 164 } 165 iflag = 1; 166 do { 167 c &= 0377; 168 if (c == '\0' || c == '\n') 169 ERROR(49); 170 if ((c & 0200) && iflag) { 171 iflag = 0; 172 if (&ep[32] >= endbuf) 173 ERROR(50); 174 ep[-1] = CXCL; 175 for (i = 16; i < 32; i++) 176 ep[i] = 0; 177 } 178 if (c == '-' && lc != 0) { 179 if ((c = GETC()) == ']') { 180 PLACE('-'); 181 break; 182 } 183 if ((c & 0200) && iflag) { 184 iflag = 0; 185 if (&ep[32] >= endbuf) 186 ERROR(50); 187 ep[-1] = CXCL; 188 for (i = 16; i < 32; i++) 189 ep[i] = 0; 190 } 191 while (lc < c) { 192 PLACE(lc); 193 lc++; 194 } 195 } 196 lc = c; 197 PLACE(c); 198 } while ((c = GETC()) != ']'); 199 200 if (iflag) 201 iflag = 16; 202 else 203 iflag = 32; 204 205 if (neg) { 206 if (iflag == 32) { 207 for (cclcnt = 0; cclcnt < iflag; 208 cclcnt++) 209 ep[cclcnt] ^= 0377; 210 ep[0] &= 0376; 211 } else { 212 ep[-1] = NCCL; 213 /* make nulls match so test fails */ 214 ep[0] |= 01; 215 } 216 } 217 218 ep += iflag; 219 220 continue; 221 222 case '\\': 223 switch (c = GETC()) { 224 225 case '(': 226 if (nbra >= NBRA) 227 ERROR(43); 228 *bracketp++ = (char)nbra; 229 *ep++ = CBRA; 230 *ep++ = (char)nbra++; 231 continue; 232 233 case ')': 234 if (bracketp <= bracket) 235 ERROR(42); 236 *ep++ = CKET; 237 *ep++ = *--bracketp; 238 closed++; 239 continue; 240 241 case '{': 242 if (lastep == NULL) 243 goto defchar; 244 *lastep |= RNGE; 245 cflg = 0; 246 nlim: 247 c = GETC(); 248 i = 0; 249 do { 250 if ('0' <= c && c <= '9') 251 i = 10 * i + c - '0'; 252 else 253 ERROR(16); 254 } while (((c = GETC()) != '\\') && (c != ',')); 255 if (i >= 255) 256 ERROR(11); 257 *ep++ = (char)i; 258 if (c == ',') { 259 if (cflg++) 260 ERROR(44); 261 if ((c = GETC()) == '\\') 262 *ep++ = (char)255; 263 else { 264 UNGETC(c); 265 goto nlim; 266 /* get 2'nd number */ 267 } 268 } 269 if (GETC() != '}') 270 ERROR(45); 271 if (!cflg) /* one number */ 272 *ep++ = (char)i; 273 else if ((ep[-1] & 0377) < (ep[-2] & 0377)) 274 ERROR(46); 275 continue; 276 277 case '\n': 278 ERROR(36); 279 280 case 'n': 281 c = '\n'; 282 goto defchar; 283 284 default: 285 if (c >= '1' && c <= '9') { 286 if ((c -= '1') >= closed) 287 ERROR(25); 288 *ep++ = CBACK; 289 *ep++ = (char)c; 290 continue; 291 } 292 /* FALLTHROUGH */ 293 } 294 /* FALLTHROUGH */ 295 /* Drop through to default to use \ to turn off special chars */ 296 297 defchar: 298 default: 299 lastep = ep; 300 *ep++ = CCHR; 301 *ep++ = (char)c; 302 } 303 } 304 /*NOTREACHED*/ 305 } 306 307 int 308 step(const char *p1, const char *p2) 309 { 310 char c; 311 312 313 if (circf) { 314 loc1 = (char *)p1; 315 return (advance(p1, p2)); 316 } 317 /* fast check for first character */ 318 if (*p2 == CCHR) { 319 c = p2[1]; 320 do { 321 if (*p1 != c) 322 continue; 323 if (advance(p1, p2)) { 324 loc1 = (char *)p1; 325 return (1); 326 } 327 } while (*p1++); 328 return (0); 329 } 330 /* regular algorithm */ 331 do { 332 if (advance(p1, p2)) { 333 loc1 = (char *)p1; 334 return (1); 335 } 336 } while (*p1++); 337 return (0); 338 } 339 340 int 341 advance(const char *lp, const char *ep) 342 { 343 const char *curlp; 344 int c; 345 char *bbeg; 346 register char neg; 347 size_t ct; 348 349 for (;;) { 350 neg = 0; 351 switch (*ep++) { 352 353 case CCHR: 354 if (*ep++ == *lp++) 355 continue; 356 return (0); 357 /*FALLTHRU*/ 358 359 case CDOT: 360 if (*lp++) 361 continue; 362 return (0); 363 /*FALLTHRU*/ 364 365 case CDOL: 366 if (*lp == 0) 367 continue; 368 return (0); 369 /*FALLTHRU*/ 370 371 case CCEOF: 372 loc2 = (char *)lp; 373 return (1); 374 /*FALLTHRU*/ 375 376 case CXCL: 377 c = (unsigned char)*lp++; 378 if (ISTHERE(c)) { 379 ep += 32; 380 continue; 381 } 382 return (0); 383 /*FALLTHRU*/ 384 385 case NCCL: 386 neg = 1; 387 /*FALLTHRU*/ 388 389 case CCL: 390 c = *lp++; 391 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { 392 ep += 16; 393 continue; 394 } 395 return (0); 396 /*FALLTHRU*/ 397 398 case CBRA: 399 braslist[*ep++] = (char *)lp; 400 continue; 401 /*FALLTHRU*/ 402 403 case CKET: 404 braelist[*ep++] = (char *)lp; 405 continue; 406 /*FALLTHRU*/ 407 408 case CCHR | RNGE: 409 c = *ep++; 410 getrnge(ep); 411 while (low--) 412 if (*lp++ != c) 413 return (0); 414 curlp = lp; 415 while (size--) 416 if (*lp++ != c) 417 break; 418 if (size < 0) 419 lp++; 420 ep += 2; 421 goto star; 422 /*FALLTHRU*/ 423 424 case CDOT | RNGE: 425 getrnge(ep); 426 while (low--) 427 if (*lp++ == '\0') 428 return (0); 429 curlp = lp; 430 while (size--) 431 if (*lp++ == '\0') 432 break; 433 if (size < 0) 434 lp++; 435 ep += 2; 436 goto star; 437 /*FALLTHRU*/ 438 439 case CXCL | RNGE: 440 getrnge(ep + 32); 441 while (low--) { 442 c = (unsigned char)*lp++; 443 if (!ISTHERE(c)) 444 return (0); 445 } 446 curlp = lp; 447 while (size--) { 448 c = (unsigned char)*lp++; 449 if (!ISTHERE(c)) 450 break; 451 } 452 if (size < 0) 453 lp++; 454 ep += 34; /* 32 + 2 */ 455 goto star; 456 /*FALLTHRU*/ 457 458 case NCCL | RNGE: 459 neg = 1; 460 /*FALLTHRU*/ 461 462 case CCL | RNGE: 463 getrnge(ep + 16); 464 while (low--) { 465 c = *lp++; 466 if (((c & 0200) || !ISTHERE(c)) ^ neg) 467 return (0); 468 } 469 curlp = lp; 470 while (size--) { 471 c = *lp++; 472 if (((c & 0200) || !ISTHERE(c)) ^ neg) 473 break; 474 } 475 if (size < 0) 476 lp++; 477 ep += 18; /* 16 + 2 */ 478 goto star; 479 /*FALLTHRU*/ 480 481 case CBACK: 482 bbeg = braslist[*ep]; 483 ct = braelist[*ep++] - bbeg; 484 485 if (ecmp(bbeg, lp, ct)) { 486 lp += ct; 487 continue; 488 } 489 return (0); 490 /*FALLTHRU*/ 491 492 case CBACK | STAR: 493 bbeg = braslist[*ep]; 494 ct = braelist[*ep++] - bbeg; 495 curlp = lp; 496 while (ecmp(bbeg, lp, ct)) 497 lp += ct; 498 499 while (lp >= curlp) { 500 if (advance(lp, ep)) 501 return (1); 502 lp -= ct; 503 } 504 return (0); 505 /*FALLTHRU*/ 506 507 case CDOT | STAR: 508 curlp = lp; 509 while (*lp++) 510 ; 511 goto star; 512 /*FALLTHRU*/ 513 514 case CCHR | STAR: 515 curlp = lp; 516 while (*lp++ == *ep) 517 ; 518 ep++; 519 goto star; 520 /*FALLTHRU*/ 521 522 case CXCL | STAR: 523 curlp = lp; 524 do { 525 c = (unsigned char)*lp++; 526 } while (ISTHERE(c)); 527 ep += 32; 528 goto star; 529 /*FALLTHRU*/ 530 531 case NCCL | STAR: 532 neg = 1; 533 /*FALLTHRU*/ 534 535 case CCL | STAR: 536 curlp = lp; 537 do { 538 c = *lp++; 539 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); 540 ep += 16; 541 goto star; 542 /*FALLTHRU*/ 543 544 star: 545 do { 546 if (--lp == locs) 547 break; 548 if (advance(lp, ep)) 549 return (1); 550 } while (lp > curlp); 551 return (0); 552 553 } 554 } 555 /*NOTREACHED*/ 556 } 557 558 static void 559 getrnge(const char *str) 560 { 561 low = *str++ & 0377; 562 size = ((*str & 0377) == 255)? 20000: (*str &0377) - low; 563 } 564 565 #ifdef __cplusplus 566 } 567 #endif 568 569 #endif /* _REGEXP_H */ 570