1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <ctype.h>
34 #include <stdlib.h>
35 #include <stdbool.h>
36 #include <string.h>
37
38 #include "extern.h"
39
40 static void expconv(void);
41
42 bool _escaped; /* true if we are currently x_escaped */
43 char *s_start; /* start of string */
44 bool l_onecase; /* true if upper and lower equivalent */
45
46 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
47
48 /* STRNCMP - like strncmp except that we convert the
49 * first string to lower case before comparing
50 * if l_onecase is set.
51 */
52
53 int
STRNCMP(register char * s1,register char * s2,register int len)54 STRNCMP(register char *s1, register char *s2, register int len)
55 {
56 if (l_onecase) {
57 do
58 if (*s2 - makelower(*s1))
59 return (*s2 - makelower(*s1));
60 else {
61 s2++;
62 s1++;
63 }
64 while (--len);
65 } else {
66 do
67 if (*s2 - *s1)
68 return (*s2 - *s1);
69 else {
70 s2++;
71 s1++;
72 }
73 while (--len);
74 }
75 return(0);
76 }
77
78 /* The following routine converts an irregular expression to
79 * internal format.
80 *
81 * Either meta symbols (\a \d or \p) or character strings or
82 * operations ( alternation or parenthesizing ) can be
83 * specified. Each starts with a descriptor byte. The descriptor
84 * byte has STR set for strings, META set for meta symbols
85 * and OPER set for operations.
86 * The descriptor byte can also have the OPT bit set if the object
87 * defined is optional. Also ALT can be set to indicate an alternation.
88 *
89 * For metasymbols the byte following the descriptor byte identities
90 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
91 * strings the byte after the descriptor is a character count for
92 * the string:
93 *
94 * meta symbols := descriptor
95 * symbol
96 *
97 * strings := descriptor
98 * character count
99 * the string
100 *
101 * operations := descriptor
102 * symbol
103 * character count
104 */
105
106 /*
107 * handy macros for accessing parts of match blocks
108 */
109 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
110 #define MNEXT(A) (A+2) /* character following a metasymbol block */
111
112 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
113 #define OCNT(A) (*(A+2)) /* character count */
114 #define ONEXT(A) (A+3) /* next character after the operation */
115 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
116
117 #define SCNT(A) (*(A+1)) /* byte count of a string */
118 #define SSTR(A) (A+2) /* address of the string */
119 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
120
121 /*
122 * bit flags in the descriptor
123 */
124 #define OPT 1
125 #define STR 2
126 #define META 4
127 #define ALT 8
128 #define OPER 16
129
130 static char *ccre; /* pointer to current position in converted exp*/
131 static char *ure; /* pointer current position in unconverted exp */
132
133 /* re: unconverted irregular expression */
134 char *
convexp(char * re)135 convexp(char *re)
136 {
137 register char *cre; /* pointer to converted regular expression */
138
139 /* allocate room for the converted expression */
140 if (re == NULL)
141 return (NULL);
142 if (*re == '\0')
143 return (NULL);
144 cre = malloc(4 * strlen(re) + 3);
145 ccre = cre;
146 ure = re;
147
148 /* start the conversion with a \a */
149 *cre = META | OPT;
150 MSYM(cre) = 'a';
151 ccre = MNEXT(cre);
152
153 /* start the conversion (its recursive) */
154 expconv ();
155 *ccre = 0;
156 return (cre);
157 }
158
159 static void
expconv()160 expconv()
161 {
162 register char *cs; /* pointer to current symbol in converted exp */
163 register char c; /* character being processed */
164 register char *acs; /* pinter to last alternate */
165 register int temp;
166
167 /* let the conversion begin */
168 acs = NULL;
169 cs = NULL;
170 while (*ure) {
171 switch (c = *ure++) {
172
173 case '\\':
174 switch (c = *ure++) {
175
176 /* escaped characters are just characters */
177 default:
178 if (cs == NULL || (*cs & STR) == 0) {
179 cs = ccre;
180 *cs = STR;
181 SCNT(cs) = 1;
182 ccre += 2;
183 } else
184 SCNT(cs)++;
185 *ccre++ = c;
186 break;
187
188 /* normal(?) metacharacters */
189 case 'a':
190 case 'd':
191 case 'e':
192 case 'p':
193 if (acs != NULL && acs != cs) {
194 do {
195 temp = OCNT(acs);
196 OCNT(acs) = ccre - acs;
197 acs -= temp;
198 } while (temp != 0);
199 acs = NULL;
200 }
201 cs = ccre;
202 *cs = META;
203 MSYM(cs) = c;
204 ccre = MNEXT(cs);
205 break;
206 }
207 break;
208
209 /* just put the symbol in */
210 case '^':
211 case '$':
212 if (acs != NULL && acs != cs) {
213 do {
214 temp = OCNT(acs);
215 OCNT(acs) = ccre - acs;
216 acs -= temp;
217 } while (temp != 0);
218 acs = NULL;
219 }
220 cs = ccre;
221 *cs = META;
222 MSYM(cs) = c;
223 ccre = MNEXT(cs);
224 break;
225
226 /* mark the last match sequence as optional */
227 case '?':
228 if (cs)
229 *cs = *cs | OPT;
230 break;
231
232 /* recurse and define a subexpression */
233 case '(':
234 if (acs != NULL && acs != cs) {
235 do {
236 temp = OCNT(acs);
237 OCNT(acs) = ccre - acs;
238 acs -= temp;
239 } while (temp != 0);
240 acs = NULL;
241 }
242 cs = ccre;
243 *cs = OPER;
244 OSYM(cs) = '(';
245 ccre = ONEXT(cs);
246 expconv();
247 OCNT(cs) = ccre - cs; /* offset to next symbol */
248 break;
249
250 /* return from a recursion */
251 case ')':
252 if (acs != NULL) {
253 do {
254 temp = OCNT(acs);
255 OCNT(acs) = ccre - acs;
256 acs -= temp;
257 } while (temp != 0);
258 acs = NULL;
259 }
260 cs = ccre;
261 *cs = META;
262 MSYM(cs) = c;
263 ccre = MNEXT(cs);
264 return;
265
266 /* mark the last match sequence as having an alternate */
267 /* the third byte will contain an offset to jump over the */
268 /* alternate match in case the first did not fail */
269 case '|':
270 if (acs != NULL && acs != cs)
271 OCNT(ccre) = ccre - acs; /* make a back pointer */
272 else
273 OCNT(ccre) = 0;
274 *cs |= ALT;
275 cs = ccre;
276 *cs = OPER;
277 OSYM(cs) = '|';
278 ccre = ONEXT(cs);
279 acs = cs; /* remember that the pointer is to be filles */
280 break;
281
282 /* if its not a metasymbol just build a scharacter string */
283 default:
284 if (cs == NULL || (*cs & STR) == 0) {
285 cs = ccre;
286 *cs = STR;
287 SCNT(cs) = 1;
288 ccre = SSTR(cs);
289 } else
290 SCNT(cs)++;
291 *ccre++ = c;
292 break;
293 }
294 }
295 if (acs != NULL) {
296 do {
297 temp = OCNT(acs);
298 OCNT(acs) = ccre - acs;
299 acs -= temp;
300 } while (temp != 0);
301 acs = NULL;
302 }
303 return;
304 }
305 /* end of convertre */
306
307
308 /*
309 * The following routine recognises an irregular expression
310 * with the following special characters:
311 *
312 * \? - means last match was optional
313 * \a - matches any number of characters
314 * \d - matches any number of spaces and tabs
315 * \p - matches any number of alphanumeric
316 * characters. The
317 * characters matched will be copied into
318 * the area pointed to by 'name'.
319 * \| - alternation
320 * \( \) - grouping used mostly for alternation and
321 * optionality
322 *
323 * The irregular expression must be translated to internal form
324 * prior to calling this routine
325 *
326 * The value returned is the pointer to the first non \a
327 * character matched.
328 */
329
330 /*
331 * s: string to check for a match in
332 * re: a converted irregular expression
333 * mstring: where to put whatever matches a \p
334 */
335 char *
expmatch(register char * s,register char * re,register char * mstring)336 expmatch (register char *s, register char *re, register char *mstring)
337 {
338 register char *cs; /* the current symbol */
339 register char *ptr,*s1; /* temporary pointer */
340 bool matched; /* a temporary bool */
341
342 /* initial conditions */
343 if (re == NULL)
344 return (NULL);
345 cs = re;
346 matched = false;
347
348 /* loop till expression string is exhausted (or at least pretty tired) */
349 while (*cs) {
350 switch (*cs & (OPER | STR | META)) {
351
352 /* try to match a string */
353 case STR:
354 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
355 if (matched) {
356
357 /* hoorah it matches */
358 s += SCNT(cs);
359 cs = SNEXT(cs);
360 } else if (*cs & ALT) {
361
362 /* alternation, skip to next expression */
363 cs = SNEXT(cs);
364 } else if (*cs & OPT) {
365
366 /* the match is optional */
367 cs = SNEXT(cs);
368 matched = 1; /* indicate a successful match */
369 } else {
370
371 /* no match, error return */
372 return (NULL);
373 }
374 break;
375
376 /* an operator, do something fancy */
377 case OPER:
378 switch (OSYM(cs)) {
379
380 /* this is an alternation */
381 case '|':
382 if (matched)
383
384 /* last thing in the alternation was a match, skip ahead */
385 cs = OPTR(cs);
386 else
387
388 /* no match, keep trying */
389 cs = ONEXT(cs);
390 break;
391
392 /* this is a grouping, recurse */
393 case '(':
394 ptr = expmatch(s, ONEXT(cs), mstring);
395 if (ptr != NULL) {
396
397 /* the subexpression matched */
398 matched = 1;
399 s = ptr;
400 } else if (*cs & ALT) {
401
402 /* alternation, skip to next expression */
403 matched = 0;
404 } else if (*cs & OPT) {
405
406 /* the match is optional */
407 matched = 1; /* indicate a successful match */
408 } else {
409
410 /* no match, error return */
411 return (NULL);
412 }
413 cs = OPTR(cs);
414 break;
415 }
416 break;
417
418 /* try to match a metasymbol */
419 case META:
420 switch (MSYM(cs)) {
421
422 /* try to match anything and remember what was matched */
423 case 'p':
424 /*
425 * This is really the same as trying the match the
426 * remaining parts of the expression to any subset
427 * of the string.
428 */
429 s1 = s;
430 do {
431 ptr = expmatch(s1, MNEXT(cs), mstring);
432 if (ptr != NULL && s1 != s) {
433
434 /* we have a match, remember the match */
435 strncpy (mstring, s, s1 - s);
436 mstring[s1 - s] = '\0';
437 return (ptr);
438 } else if (ptr != NULL && (*cs & OPT)) {
439
440 /* it was aoptional so no match is ok */
441 return (ptr);
442 } else if (ptr != NULL) {
443
444 /* not optional and we still matched */
445 return (NULL);
446 }
447 if (!(isalnum(*s1) || *s1 == '_' ||
448 /* C++ destructor */
449 *s1 == '~' ||
450 /* C++ scope operator */
451 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' &&
452 (s1++, true))))
453 return (NULL);
454 if (*s1 == '\\')
455 _escaped = _escaped ? false : true;
456 else
457 _escaped = false;
458 } while (*s1++);
459 return (NULL);
460
461 /* try to match anything */
462 case 'a':
463 /*
464 * This is really the same as trying the match the
465 * remaining parts of the expression to any subset
466 * of the string.
467 */
468 s1 = s;
469 do {
470 ptr = expmatch(s1, MNEXT(cs), mstring);
471 if (ptr != NULL && s1 != s) {
472
473 /* we have a match */
474 return (ptr);
475 } else if (ptr != NULL && (*cs & OPT)) {
476
477 /* it was aoptional so no match is ok */
478 return (ptr);
479 } else if (ptr != NULL) {
480
481 /* not optional and we still matched */
482 return (NULL);
483 }
484 if (*s1 == '\\')
485 _escaped = _escaped ? false : true;
486 else
487 _escaped = false;
488 } while (*s1++);
489 return (NULL);
490
491 /* fail if we are currently _escaped */
492 case 'e':
493 if (_escaped)
494 return(NULL);
495 cs = MNEXT(cs);
496 break;
497
498 /* match any number of tabs and spaces */
499 case 'd':
500 ptr = s;
501 while (*s == ' ' || *s == '\t')
502 s++;
503 if (s != ptr || s == s_start) {
504
505 /* match, be happy */
506 matched = 1;
507 cs = MNEXT(cs);
508 } else if (*s == '\n' || *s == '\0') {
509
510 /* match, be happy */
511 matched = 1;
512 cs = MNEXT(cs);
513 } else if (*cs & ALT) {
514
515 /* try the next part */
516 matched = 0;
517 cs = MNEXT(cs);
518 } else if (*cs & OPT) {
519
520 /* doesn't matter */
521 matched = 1;
522 cs = MNEXT(cs);
523 } else
524
525 /* no match, error return */
526 return (NULL);
527 break;
528
529 /* check for end of line */
530 case '$':
531 if (*s == '\0' || *s == '\n') {
532
533 /* match, be happy */
534 s++;
535 matched = 1;
536 cs = MNEXT(cs);
537 } else if (*cs & ALT) {
538
539 /* try the next part */
540 matched = 0;
541 cs = MNEXT(cs);
542 } else if (*cs & OPT) {
543
544 /* doesn't matter */
545 matched = 1;
546 cs = MNEXT(cs);
547 } else
548
549 /* no match, error return */
550 return (NULL);
551 break;
552
553 /* check for start of line */
554 case '^':
555 if (s == s_start) {
556
557 /* match, be happy */
558 matched = 1;
559 cs = MNEXT(cs);
560 } else if (*cs & ALT) {
561
562 /* try the next part */
563 matched = 0;
564 cs = MNEXT(cs);
565 } else if (*cs & OPT) {
566
567 /* doesn't matter */
568 matched = 1;
569 cs = MNEXT(cs);
570 } else
571
572 /* no match, error return */
573 return (NULL);
574 break;
575
576 /* end of a subexpression, return success */
577 case ')':
578 return (s);
579 }
580 break;
581 }
582 }
583 return (s);
584 }
585