1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <ctype.h> 34 #include <stdlib.h> 35 #include <stdbool.h> 36 #include <string.h> 37 38 #include "extern.h" 39 40 static void expconv(void); 41 42 bool _escaped; /* true if we are currently x_escaped */ 43 char *s_start; /* start of string */ 44 bool l_onecase; /* true if upper and lower equivalent */ 45 46 #define makelower(c) (isupper((c)) ? tolower((c)) : (c)) 47 48 /* STRNCMP - like strncmp except that we convert the 49 * first string to lower case before comparing 50 * if l_onecase is set. 51 */ 52 53 int 54 STRNCMP(register char *s1, register char *s2, register int len) 55 { 56 if (l_onecase) { 57 do 58 if (*s2 - makelower(*s1)) 59 return (*s2 - makelower(*s1)); 60 else { 61 s2++; 62 s1++; 63 } 64 while (--len); 65 } else { 66 do 67 if (*s2 - *s1) 68 return (*s2 - *s1); 69 else { 70 s2++; 71 s1++; 72 } 73 while (--len); 74 } 75 return(0); 76 } 77 78 /* The following routine converts an irregular expression to 79 * internal format. 80 * 81 * Either meta symbols (\a \d or \p) or character strings or 82 * operations ( alternation or parenthesizing ) can be 83 * specified. Each starts with a descriptor byte. The descriptor 84 * byte has STR set for strings, META set for meta symbols 85 * and OPER set for operations. 86 * The descriptor byte can also have the OPT bit set if the object 87 * defined is optional. Also ALT can be set to indicate an alternation. 88 * 89 * For metasymbols the byte following the descriptor byte identities 90 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For 91 * strings the byte after the descriptor is a character count for 92 * the string: 93 * 94 * meta symbols := descriptor 95 * symbol 96 * 97 * strings := descriptor 98 * character count 99 * the string 100 * 101 * operations := descriptor 102 * symbol 103 * character count 104 */ 105 106 /* 107 * handy macros for accessing parts of match blocks 108 */ 109 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */ 110 #define MNEXT(A) (A+2) /* character following a metasymbol block */ 111 112 #define OSYM(A) (*(A+1)) /* symbol in an operation block */ 113 #define OCNT(A) (*(A+2)) /* character count */ 114 #define ONEXT(A) (A+3) /* next character after the operation */ 115 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */ 116 117 #define SCNT(A) (*(A+1)) /* byte count of a string */ 118 #define SSTR(A) (A+2) /* address of the string */ 119 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */ 120 121 /* 122 * bit flags in the descriptor 123 */ 124 #define OPT 1 125 #define STR 2 126 #define META 4 127 #define ALT 8 128 #define OPER 16 129 130 static char *ccre; /* pointer to current position in converted exp*/ 131 static char *ure; /* pointer current position in unconverted exp */ 132 133 /* re: unconverted irregular expression */ 134 char * 135 convexp(char *re) 136 { 137 register char *cre; /* pointer to converted regular expression */ 138 139 /* allocate room for the converted expression */ 140 if (re == NULL) 141 return (NULL); 142 if (*re == '\0') 143 return (NULL); 144 cre = malloc(4 * strlen(re) + 3); 145 ccre = cre; 146 ure = re; 147 148 /* start the conversion with a \a */ 149 *cre = META | OPT; 150 MSYM(cre) = 'a'; 151 ccre = MNEXT(cre); 152 153 /* start the conversion (its recursive) */ 154 expconv (); 155 *ccre = 0; 156 return (cre); 157 } 158 159 static void 160 expconv() 161 { 162 register char *cs; /* pointer to current symbol in converted exp */ 163 register char c; /* character being processed */ 164 register char *acs; /* pinter to last alternate */ 165 register int temp; 166 167 /* let the conversion begin */ 168 acs = NULL; 169 cs = NULL; 170 while (*ure) { 171 switch (c = *ure++) { 172 173 case '\\': 174 switch (c = *ure++) { 175 176 /* escaped characters are just characters */ 177 default: 178 if (cs == NULL || (*cs & STR) == 0) { 179 cs = ccre; 180 *cs = STR; 181 SCNT(cs) = 1; 182 ccre += 2; 183 } else 184 SCNT(cs)++; 185 *ccre++ = c; 186 break; 187 188 /* normal(?) metacharacters */ 189 case 'a': 190 case 'd': 191 case 'e': 192 case 'p': 193 if (acs != NULL && acs != cs) { 194 do { 195 temp = OCNT(acs); 196 OCNT(acs) = ccre - acs; 197 acs -= temp; 198 } while (temp != 0); 199 acs = NULL; 200 } 201 cs = ccre; 202 *cs = META; 203 MSYM(cs) = c; 204 ccre = MNEXT(cs); 205 break; 206 } 207 break; 208 209 /* just put the symbol in */ 210 case '^': 211 case '$': 212 if (acs != NULL && acs != cs) { 213 do { 214 temp = OCNT(acs); 215 OCNT(acs) = ccre - acs; 216 acs -= temp; 217 } while (temp != 0); 218 acs = NULL; 219 } 220 cs = ccre; 221 *cs = META; 222 MSYM(cs) = c; 223 ccre = MNEXT(cs); 224 break; 225 226 /* mark the last match sequence as optional */ 227 case '?': 228 if (cs) 229 *cs = *cs | OPT; 230 break; 231 232 /* recurse and define a subexpression */ 233 case '(': 234 if (acs != NULL && acs != cs) { 235 do { 236 temp = OCNT(acs); 237 OCNT(acs) = ccre - acs; 238 acs -= temp; 239 } while (temp != 0); 240 acs = NULL; 241 } 242 cs = ccre; 243 *cs = OPER; 244 OSYM(cs) = '('; 245 ccre = ONEXT(cs); 246 expconv(); 247 OCNT(cs) = ccre - cs; /* offset to next symbol */ 248 break; 249 250 /* return from a recursion */ 251 case ')': 252 if (acs != NULL) { 253 do { 254 temp = OCNT(acs); 255 OCNT(acs) = ccre - acs; 256 acs -= temp; 257 } while (temp != 0); 258 acs = NULL; 259 } 260 cs = ccre; 261 *cs = META; 262 MSYM(cs) = c; 263 ccre = MNEXT(cs); 264 return; 265 266 /* mark the last match sequence as having an alternate */ 267 /* the third byte will contain an offset to jump over the */ 268 /* alternate match in case the first did not fail */ 269 case '|': 270 if (acs != NULL && acs != cs) 271 OCNT(ccre) = ccre - acs; /* make a back pointer */ 272 else 273 OCNT(ccre) = 0; 274 *cs |= ALT; 275 cs = ccre; 276 *cs = OPER; 277 OSYM(cs) = '|'; 278 ccre = ONEXT(cs); 279 acs = cs; /* remember that the pointer is to be filles */ 280 break; 281 282 /* if its not a metasymbol just build a scharacter string */ 283 default: 284 if (cs == NULL || (*cs & STR) == 0) { 285 cs = ccre; 286 *cs = STR; 287 SCNT(cs) = 1; 288 ccre = SSTR(cs); 289 } else 290 SCNT(cs)++; 291 *ccre++ = c; 292 break; 293 } 294 } 295 if (acs != NULL) { 296 do { 297 temp = OCNT(acs); 298 OCNT(acs) = ccre - acs; 299 acs -= temp; 300 } while (temp != 0); 301 acs = NULL; 302 } 303 return; 304 } 305 /* end of convertre */ 306 307 308 /* 309 * The following routine recognises an irregular expression 310 * with the following special characters: 311 * 312 * \? - means last match was optional 313 * \a - matches any number of characters 314 * \d - matches any number of spaces and tabs 315 * \p - matches any number of alphanumeric 316 * characters. The 317 * characters matched will be copied into 318 * the area pointed to by 'name'. 319 * \| - alternation 320 * \( \) - grouping used mostly for alternation and 321 * optionality 322 * 323 * The irregular expression must be translated to internal form 324 * prior to calling this routine 325 * 326 * The value returned is the pointer to the first non \a 327 * character matched. 328 */ 329 330 /* 331 * s: string to check for a match in 332 * re: a converted irregular expression 333 * mstring: where to put whatever matches a \p 334 */ 335 char * 336 expmatch (register char *s, register char *re, register char *mstring) 337 { 338 register char *cs; /* the current symbol */ 339 register char *ptr,*s1; /* temporary pointer */ 340 bool matched; /* a temporary bool */ 341 342 /* initial conditions */ 343 if (re == NULL) 344 return (NULL); 345 cs = re; 346 matched = false; 347 348 /* loop till expression string is exhausted (or at least pretty tired) */ 349 while (*cs) { 350 switch (*cs & (OPER | STR | META)) { 351 352 /* try to match a string */ 353 case STR: 354 matched = !STRNCMP (s, SSTR(cs), SCNT(cs)); 355 if (matched) { 356 357 /* hoorah it matches */ 358 s += SCNT(cs); 359 cs = SNEXT(cs); 360 } else if (*cs & ALT) { 361 362 /* alternation, skip to next expression */ 363 cs = SNEXT(cs); 364 } else if (*cs & OPT) { 365 366 /* the match is optional */ 367 cs = SNEXT(cs); 368 matched = 1; /* indicate a successful match */ 369 } else { 370 371 /* no match, error return */ 372 return (NULL); 373 } 374 break; 375 376 /* an operator, do something fancy */ 377 case OPER: 378 switch (OSYM(cs)) { 379 380 /* this is an alternation */ 381 case '|': 382 if (matched) 383 384 /* last thing in the alternation was a match, skip ahead */ 385 cs = OPTR(cs); 386 else 387 388 /* no match, keep trying */ 389 cs = ONEXT(cs); 390 break; 391 392 /* this is a grouping, recurse */ 393 case '(': 394 ptr = expmatch(s, ONEXT(cs), mstring); 395 if (ptr != NULL) { 396 397 /* the subexpression matched */ 398 matched = 1; 399 s = ptr; 400 } else if (*cs & ALT) { 401 402 /* alternation, skip to next expression */ 403 matched = 0; 404 } else if (*cs & OPT) { 405 406 /* the match is optional */ 407 matched = 1; /* indicate a successful match */ 408 } else { 409 410 /* no match, error return */ 411 return (NULL); 412 } 413 cs = OPTR(cs); 414 break; 415 } 416 break; 417 418 /* try to match a metasymbol */ 419 case META: 420 switch (MSYM(cs)) { 421 422 /* try to match anything and remember what was matched */ 423 case 'p': 424 /* 425 * This is really the same as trying the match the 426 * remaining parts of the expression to any subset 427 * of the string. 428 */ 429 s1 = s; 430 do { 431 ptr = expmatch(s1, MNEXT(cs), mstring); 432 if (ptr != NULL && s1 != s) { 433 434 /* we have a match, remember the match */ 435 strncpy (mstring, s, s1 - s); 436 mstring[s1 - s] = '\0'; 437 return (ptr); 438 } else if (ptr != NULL && (*cs & OPT)) { 439 440 /* it was aoptional so no match is ok */ 441 return (ptr); 442 } else if (ptr != NULL) { 443 444 /* not optional and we still matched */ 445 return (NULL); 446 } 447 if (!(isalnum(*s1) || *s1 == '_' || 448 /* C++ destructor */ 449 *s1 == '~' || 450 /* C++ scope operator */ 451 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' && 452 (s1++, true)))) 453 return (NULL); 454 if (*s1 == '\\') 455 _escaped = _escaped ? false : true; 456 else 457 _escaped = false; 458 } while (*s1++); 459 return (NULL); 460 461 /* try to match anything */ 462 case 'a': 463 /* 464 * This is really the same as trying the match the 465 * remaining parts of the expression to any subset 466 * of the string. 467 */ 468 s1 = s; 469 do { 470 ptr = expmatch(s1, MNEXT(cs), mstring); 471 if (ptr != NULL && s1 != s) { 472 473 /* we have a match */ 474 return (ptr); 475 } else if (ptr != NULL && (*cs & OPT)) { 476 477 /* it was aoptional so no match is ok */ 478 return (ptr); 479 } else if (ptr != NULL) { 480 481 /* not optional and we still matched */ 482 return (NULL); 483 } 484 if (*s1 == '\\') 485 _escaped = _escaped ? false : true; 486 else 487 _escaped = false; 488 } while (*s1++); 489 return (NULL); 490 491 /* fail if we are currently _escaped */ 492 case 'e': 493 if (_escaped) 494 return(NULL); 495 cs = MNEXT(cs); 496 break; 497 498 /* match any number of tabs and spaces */ 499 case 'd': 500 ptr = s; 501 while (*s == ' ' || *s == '\t') 502 s++; 503 if (s != ptr || s == s_start) { 504 505 /* match, be happy */ 506 matched = 1; 507 cs = MNEXT(cs); 508 } else if (*s == '\n' || *s == '\0') { 509 510 /* match, be happy */ 511 matched = 1; 512 cs = MNEXT(cs); 513 } else if (*cs & ALT) { 514 515 /* try the next part */ 516 matched = 0; 517 cs = MNEXT(cs); 518 } else if (*cs & OPT) { 519 520 /* doesn't matter */ 521 matched = 1; 522 cs = MNEXT(cs); 523 } else 524 525 /* no match, error return */ 526 return (NULL); 527 break; 528 529 /* check for end of line */ 530 case '$': 531 if (*s == '\0' || *s == '\n') { 532 533 /* match, be happy */ 534 s++; 535 matched = 1; 536 cs = MNEXT(cs); 537 } else if (*cs & ALT) { 538 539 /* try the next part */ 540 matched = 0; 541 cs = MNEXT(cs); 542 } else if (*cs & OPT) { 543 544 /* doesn't matter */ 545 matched = 1; 546 cs = MNEXT(cs); 547 } else 548 549 /* no match, error return */ 550 return (NULL); 551 break; 552 553 /* check for start of line */ 554 case '^': 555 if (s == s_start) { 556 557 /* match, be happy */ 558 matched = 1; 559 cs = MNEXT(cs); 560 } else if (*cs & ALT) { 561 562 /* try the next part */ 563 matched = 0; 564 cs = MNEXT(cs); 565 } else if (*cs & OPT) { 566 567 /* doesn't matter */ 568 matched = 1; 569 cs = MNEXT(cs); 570 } else 571 572 /* no match, error return */ 573 return (NULL); 574 break; 575 576 /* end of a subexpression, return success */ 577 case ')': 578 return (s); 579 } 580 break; 581 } 582 } 583 return (s); 584 } 585