1 /* 2 * Copyright (c) 1980 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 */ 6 7 #pragma ident "%Z%%M% %I% %E% SMI" 8 9 #include <ctype.h> 10 11 typedef int boolean; 12 #define TRUE 1 13 #define FALSE 0 14 #define NIL 0 15 16 extern boolean l_onecase; /* true if upper and lower equivalent */ 17 extern char *l_idchars; /* set of characters legal in identifiers 18 in addition to letters and digits */ 19 20 extern char *strchr(); 21 static void expconv(void); 22 23 #define isidchr(c) \ 24 (isalnum(c) || ((c) != NIL && strchr(l_idchars, (c)) != NIL)) 25 #define makelower(c) (isupper((c)) ? tolower((c)) : (c)) 26 27 /* STRNCMP - like strncmp except that we convert the 28 * first string to lower case before comparing 29 * if l_onecase is set. 30 */ 31 32 int 33 STRNCMP(char *s1, char *s2, int len) 34 { 35 if (l_onecase) { 36 do 37 if (*s2 - makelower(*s1)) 38 return (*s2 - makelower(*s1)); 39 else { 40 s2++; 41 s1++; 42 } 43 while (--len); 44 } else { 45 do 46 if (*s2 - *s1) 47 return (*s2 - *s1); 48 else { 49 s2++; 50 s1++; 51 } 52 while (--len); 53 } 54 return(0); 55 } 56 57 /* The following routine converts an irregular expression to 58 * internal format. 59 * 60 * Either meta symbols (\a \d or \p) or character strings or 61 * operations ( alternation or parenthesizing ) can be 62 * specified. Each starts with a descriptor byte. The descriptor 63 * byte has STR set for strings, META set for meta symbols 64 * and OPER set for operations. 65 * The descriptor byte can also have the OPT bit set if the object 66 * defined is optional. Also ALT can be set to indicate an alternation. 67 * 68 * For metasymbols the byte following the descriptor byte identities 69 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For 70 * strings the byte after the descriptor is a character count for 71 * the string: 72 * 73 * meta symbols := descriptor 74 * symbol 75 * 76 * strings := descriptor 77 * character count 78 * the string 79 * 80 * operations := descriptor 81 * symbol 82 * character count 83 */ 84 85 /* 86 * handy macros for accessing parts of match blocks 87 */ 88 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */ 89 #define MNEXT(A) (A+2) /* character following a metasymbol block */ 90 91 #define OSYM(A) (*(A+1)) /* symbol in an operation block */ 92 #define OCNT(A) (*(A+2)) /* character count */ 93 #define ONEXT(A) (A+3) /* next character after the operation */ 94 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */ 95 96 #define SCNT(A) (*(A+1)) /* byte count of a string */ 97 #define SSTR(A) (A+2) /* address of the string */ 98 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */ 99 100 /* 101 * bit flags in the descriptor 102 */ 103 #define OPT 1 104 #define STR 2 105 #define META 4 106 #define ALT 8 107 #define OPER 16 108 109 char *ure; /* pointer current position in unconverted exp */ 110 char *ccre; /* pointer to current position in converted exp*/ 111 char *malloc(); 112 113 char * 114 convexp(char *re) 115 /* re - unconverted irregular expression */ 116 { 117 char *cre; /* pointer to converted regular expression */ 118 119 /* allocate room for the converted expression */ 120 if (re == NIL) 121 return (NIL); 122 if (*re == '\0') 123 return (NIL); 124 cre = malloc (4 * strlen(re) + 3); 125 ccre = cre; 126 ure = re; 127 128 /* start the conversion with a \a */ 129 *cre = META | OPT; 130 MSYM(cre) = 'a'; 131 ccre = MNEXT(cre); 132 133 /* start the conversion (its recursive) */ 134 expconv (); 135 *ccre = 0; 136 return (cre); 137 } 138 139 static void 140 expconv(void) 141 { 142 char *cs; /* pointer to current symbol in converted exp */ 143 char c; /* character being processed */ 144 char *acs; /* pinter to last alternate */ 145 int temp; 146 147 /* let the conversion begin */ 148 acs = NIL; 149 cs = NIL; 150 while (*ure != NIL) { 151 switch (c = *ure++) { 152 153 case '\\': 154 switch (c = *ure++) { 155 156 /* escaped characters are just characters */ 157 default: 158 if (cs == NIL || (*cs & STR) == 0) { 159 cs = ccre; 160 *cs = STR; 161 SCNT(cs) = 1; 162 ccre += 2; 163 } else 164 SCNT(cs)++; 165 *ccre++ = c; 166 break; 167 168 /* normal(?) metacharacters */ 169 case 'a': 170 case 'd': 171 case 'e': 172 case 'p': 173 if (acs != NIL && acs != cs) { 174 do { 175 temp = OCNT(acs); 176 OCNT(acs) = ccre - acs; 177 acs -= temp; 178 } while (temp != 0); 179 acs = NIL; 180 } 181 cs = ccre; 182 *cs = META; 183 MSYM(cs) = c; 184 ccre = MNEXT(cs); 185 break; 186 } 187 break; 188 189 /* just put the symbol in */ 190 case '^': 191 case '$': 192 if (acs != NIL && acs != cs) { 193 do { 194 temp = OCNT(acs); 195 OCNT(acs) = ccre - acs; 196 acs -= temp; 197 } while (temp != 0); 198 acs = NIL; 199 } 200 cs = ccre; 201 *cs = META; 202 MSYM(cs) = c; 203 ccre = MNEXT(cs); 204 break; 205 206 /* mark the last match sequence as optional */ 207 case '?': 208 if (cs) 209 *cs = *cs | OPT; 210 break; 211 212 /* recurse and define a subexpression */ 213 case '(': 214 if (acs != NIL && acs != cs) { 215 do { 216 temp = OCNT(acs); 217 OCNT(acs) = ccre - acs; 218 acs -= temp; 219 } while (temp != 0); 220 acs = NIL; 221 } 222 cs = ccre; 223 *cs = OPER; 224 OSYM(cs) = '('; 225 ccre = ONEXT(cs); 226 expconv (); 227 OCNT(cs) = ccre - cs; /* offset to next symbol */ 228 break; 229 230 /* return from a recursion */ 231 case ')': 232 if (acs != NIL) { 233 do { 234 temp = OCNT(acs); 235 OCNT(acs) = ccre - acs; 236 acs -= temp; 237 } while (temp != 0); 238 acs = NIL; 239 } 240 cs = ccre; 241 *cs = META; 242 MSYM(cs) = c; 243 ccre = MNEXT(cs); 244 return; 245 246 /* mark the last match sequence as having an alternate */ 247 /* the third byte will contain an offset to jump over the */ 248 /* alternate match in case the first did not fail */ 249 case '|': 250 if (acs != NIL && acs != cs) 251 OCNT(ccre) = ccre - acs; /* make a back pointer */ 252 else 253 OCNT(ccre) = 0; 254 *cs |= ALT; 255 cs = ccre; 256 *cs = OPER; 257 OSYM(cs) = '|'; 258 ccre = ONEXT(cs); 259 acs = cs; /* remember that the pointer is to be filles */ 260 break; 261 262 /* if its not a metasymbol just build a scharacter string */ 263 default: 264 if (cs == NIL || (*cs & STR) == 0) { 265 cs = ccre; 266 *cs = STR; 267 SCNT(cs) = 1; 268 ccre = SSTR(cs); 269 } else 270 SCNT(cs)++; 271 *ccre++ = c; 272 break; 273 } 274 } 275 if (acs != NIL) { 276 do { 277 temp = OCNT(acs); 278 OCNT(acs) = ccre - acs; 279 acs -= temp; 280 } while (temp != 0); 281 acs = NIL; 282 } 283 } 284 /* end of convertre */ 285 286 287 /* 288 * The following routine recognises an irregular expresion 289 * with the following special characters: 290 * 291 * \? - means last match was optional 292 * \a - matches any number of characters 293 * \d - matches any number of spaces and tabs 294 * \p - matches any number of alphanumeric 295 * characters. The 296 * characters matched will be copied into 297 * the area pointed to by 'name'. 298 * \| - alternation 299 * \( \) - grouping used mostly for alternation and 300 * optionality 301 * 302 * The irregular expression must be translated to internal form 303 * prior to calling this routine 304 * 305 * The value returned is the pointer to the first non \a 306 * character matched. 307 */ 308 309 boolean _escaped; /* true if we are currently _escaped */ 310 char *Start; /* start of string */ 311 312 char * 313 expmatch(char *s, char *re, char *mstring) 314 /* s - string to check for a match in */ 315 /* re - a converted irregular expression */ 316 /* mstring - where to put whatever matches a \p */ 317 { 318 char *cs; /* the current symbol */ 319 char *ptr, *s1; /* temporary pointer */ 320 boolean matched; /* a temporary boolean */ 321 322 /* initial conditions */ 323 if (re == NIL) 324 return (NIL); 325 cs = re; 326 matched = FALSE; 327 328 /* loop till expression string is exhausted (or at least pretty tired) */ 329 while (*cs) { 330 switch (*cs & (OPER | STR | META)) { 331 332 /* try to match a string */ 333 case STR: 334 matched = !STRNCMP (s, SSTR(cs), SCNT(cs)); 335 if (matched) { 336 337 /* hoorah it matches */ 338 s += SCNT(cs); 339 cs = SNEXT(cs); 340 } else if (*cs & ALT) { 341 342 /* alternation, skip to next expression */ 343 cs = SNEXT(cs); 344 } else if (*cs & OPT) { 345 346 /* the match is optional */ 347 cs = SNEXT(cs); 348 matched = 1; /* indicate a successful match */ 349 } else { 350 351 /* no match, error return */ 352 return (NIL); 353 } 354 break; 355 356 /* an operator, do something fancy */ 357 case OPER: 358 switch (OSYM(cs)) { 359 360 /* this is an alternation */ 361 case '|': 362 if (matched) 363 364 /* last thing in the alternation was a match, skip ahead */ 365 cs = OPTR(cs); 366 else 367 368 /* no match, keep trying */ 369 cs = ONEXT(cs); 370 break; 371 372 /* this is a grouping, recurse */ 373 case '(': 374 ptr = expmatch (s, ONEXT(cs), mstring); 375 if (ptr != NIL) { 376 377 /* the subexpression matched */ 378 matched = 1; 379 s = ptr; 380 } else if (*cs & ALT) { 381 382 /* alternation, skip to next expression */ 383 matched = 0; 384 } else if (*cs & OPT) { 385 386 /* the match is optional */ 387 matched = 1; /* indicate a successful match */ 388 } else { 389 390 /* no match, error return */ 391 return (NIL); 392 } 393 cs = OPTR(cs); 394 break; 395 } 396 break; 397 398 /* try to match a metasymbol */ 399 case META: 400 switch (MSYM(cs)) { 401 402 /* try to match anything and remember what was matched */ 403 case 'p': 404 /* 405 * This is really the same as trying the match the 406 * remaining parts of the expression to any subset 407 * of the string. 408 */ 409 s1 = s; 410 do { 411 ptr = expmatch (s1, MNEXT(cs), mstring); 412 if (ptr != NIL && s1 != s) { 413 414 /* we have a match, remember the match */ 415 strncpy (mstring, s, s1 - s); 416 mstring[s1 - s] = '\0'; 417 return (ptr); 418 } else if (ptr != NIL && (*cs & OPT)) { 419 420 /* it was aoptional so no match is ok */ 421 return (ptr); 422 } else if (ptr != NIL) { 423 424 /* not optional and we still matched */ 425 return (NIL); 426 } 427 if (!isidchr(*s1)) 428 return (NIL); 429 if (*s1 == '\\') 430 _escaped = _escaped ? FALSE : TRUE; 431 else 432 _escaped = FALSE; 433 } while (*s1++); 434 return (NIL); 435 436 /* try to match anything */ 437 case 'a': 438 /* 439 * This is really the same as trying the match the 440 * remaining parts of the expression to any subset 441 * of the string. 442 */ 443 s1 = s; 444 do { 445 ptr = expmatch (s1, MNEXT(cs), mstring); 446 if (ptr != NIL && s1 != s) { 447 448 /* we have a match */ 449 return (ptr); 450 } else if (ptr != NIL && (*cs & OPT)) { 451 452 /* it was aoptional so no match is ok */ 453 return (ptr); 454 } else if (ptr != NIL) { 455 456 /* not optional and we still matched */ 457 return (NIL); 458 } 459 if (*s1 == '\\') 460 _escaped = _escaped ? FALSE : TRUE; 461 else 462 _escaped = FALSE; 463 } while (*s1++); 464 return (NIL); 465 466 /* fail if we are currently _escaped */ 467 case 'e': 468 if (_escaped) 469 return(NIL); 470 cs = MNEXT(cs); 471 break; 472 473 /* match any number of tabs and spaces */ 474 case 'd': 475 ptr = s; 476 while (*s == ' ' || *s == '\t') 477 s++; 478 if (s != ptr || s == Start) { 479 480 /* match, be happy */ 481 matched = 1; 482 cs = MNEXT(cs); 483 } else if (*s == '\n' || *s == '\0') { 484 485 /* match, be happy */ 486 matched = 1; 487 cs = MNEXT(cs); 488 } else if (*cs & ALT) { 489 490 /* try the next part */ 491 matched = 0; 492 cs = MNEXT(cs); 493 } else if (*cs & OPT) { 494 495 /* doesn't matter */ 496 matched = 1; 497 cs = MNEXT(cs); 498 } else 499 500 /* no match, error return */ 501 return (NIL); 502 break; 503 504 /* check for end of line */ 505 case '$': 506 if (*s == '\0' || *s == '\n') { 507 508 /* match, be happy */ 509 s++; 510 matched = 1; 511 cs = MNEXT(cs); 512 } else if (*cs & ALT) { 513 514 /* try the next part */ 515 matched = 0; 516 cs = MNEXT(cs); 517 } else if (*cs & OPT) { 518 519 /* doesn't matter */ 520 matched = 1; 521 cs = MNEXT(cs); 522 } else 523 524 /* no match, error return */ 525 return (NIL); 526 break; 527 528 /* check for start of line */ 529 case '^': 530 if (s == Start) { 531 532 /* match, be happy */ 533 matched = 1; 534 cs = MNEXT(cs); 535 } else if (*cs & ALT) { 536 537 /* try the next part */ 538 matched = 0; 539 cs = MNEXT(cs); 540 } else if (*cs & OPT) { 541 542 /* doesn't matter */ 543 matched = 1; 544 cs = MNEXT(cs); 545 } else 546 547 /* no match, error return */ 548 return (NIL); 549 break; 550 551 /* end of a subexpression, return success */ 552 case ')': 553 return (s); 554 } 555 break; 556 } 557 } 558 return (s); 559 } 560