1 /*
2 * Copyright (c) 1980 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 */
6
7 #include <ctype.h>
8
9 typedef int boolean;
10 #define TRUE 1
11 #define FALSE 0
12 #define NIL 0
13
14 extern boolean l_onecase; /* true if upper and lower equivalent */
15 extern char *l_idchars; /* set of characters legal in identifiers
16 in addition to letters and digits */
17
18 extern char *strchr();
19 static void expconv(void);
20
21 #define isidchr(c) \
22 (isalnum(c) || ((c) != NIL && strchr(l_idchars, (c)) != NIL))
23 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
24
25 /* STRNCMP - like strncmp except that we convert the
26 * first string to lower case before comparing
27 * if l_onecase is set.
28 */
29
30 int
STRNCMP(char * s1,char * s2,int len)31 STRNCMP(char *s1, char *s2, int len)
32 {
33 if (l_onecase) {
34 do
35 if (*s2 - makelower(*s1))
36 return (*s2 - makelower(*s1));
37 else {
38 s2++;
39 s1++;
40 }
41 while (--len);
42 } else {
43 do
44 if (*s2 - *s1)
45 return (*s2 - *s1);
46 else {
47 s2++;
48 s1++;
49 }
50 while (--len);
51 }
52 return(0);
53 }
54
55 /* The following routine converts an irregular expression to
56 * internal format.
57 *
58 * Either meta symbols (\a \d or \p) or character strings or
59 * operations ( alternation or parenthesizing ) can be
60 * specified. Each starts with a descriptor byte. The descriptor
61 * byte has STR set for strings, META set for meta symbols
62 * and OPER set for operations.
63 * The descriptor byte can also have the OPT bit set if the object
64 * defined is optional. Also ALT can be set to indicate an alternation.
65 *
66 * For metasymbols the byte following the descriptor byte identities
67 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
68 * strings the byte after the descriptor is a character count for
69 * the string:
70 *
71 * meta symbols := descriptor
72 * symbol
73 *
74 * strings := descriptor
75 * character count
76 * the string
77 *
78 * operations := descriptor
79 * symbol
80 * character count
81 */
82
83 /*
84 * handy macros for accessing parts of match blocks
85 */
86 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
87 #define MNEXT(A) (A+2) /* character following a metasymbol block */
88
89 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
90 #define OCNT(A) (*(A+2)) /* character count */
91 #define ONEXT(A) (A+3) /* next character after the operation */
92 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
93
94 #define SCNT(A) (*(A+1)) /* byte count of a string */
95 #define SSTR(A) (A+2) /* address of the string */
96 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
97
98 /*
99 * bit flags in the descriptor
100 */
101 #define OPT 1
102 #define STR 2
103 #define META 4
104 #define ALT 8
105 #define OPER 16
106
107 char *ure; /* pointer current position in unconverted exp */
108 char *ccre; /* pointer to current position in converted exp*/
109 char *malloc();
110
111 char *
convexp(char * re)112 convexp(char *re)
113 /* re - unconverted irregular expression */
114 {
115 char *cre; /* pointer to converted regular expression */
116
117 /* allocate room for the converted expression */
118 if (re == NIL)
119 return (NIL);
120 if (*re == '\0')
121 return (NIL);
122 cre = malloc (4 * strlen(re) + 3);
123 ccre = cre;
124 ure = re;
125
126 /* start the conversion with a \a */
127 *cre = META | OPT;
128 MSYM(cre) = 'a';
129 ccre = MNEXT(cre);
130
131 /* start the conversion (its recursive) */
132 expconv ();
133 *ccre = 0;
134 return (cre);
135 }
136
137 static void
expconv(void)138 expconv(void)
139 {
140 char *cs; /* pointer to current symbol in converted exp */
141 char c; /* character being processed */
142 char *acs; /* pinter to last alternate */
143 int temp;
144
145 /* let the conversion begin */
146 acs = NIL;
147 cs = NIL;
148 while (*ure != NIL) {
149 switch (c = *ure++) {
150
151 case '\\':
152 switch (c = *ure++) {
153
154 /* escaped characters are just characters */
155 default:
156 if (cs == NIL || (*cs & STR) == 0) {
157 cs = ccre;
158 *cs = STR;
159 SCNT(cs) = 1;
160 ccre += 2;
161 } else {
162 SCNT(cs)++;
163 }
164 *ccre++ = c;
165 break;
166
167 /* normal(?) metacharacters */
168 case 'a':
169 case 'd':
170 case 'e':
171 case 'p':
172 if (acs != NIL && acs != cs) {
173 do {
174 temp = OCNT(acs);
175 OCNT(acs) = ccre - acs;
176 acs -= temp;
177 } while (temp != 0);
178 acs = NIL;
179 }
180 cs = ccre;
181 *cs = META;
182 MSYM(cs) = c;
183 ccre = MNEXT(cs);
184 break;
185 }
186 break;
187
188 /* just put the symbol in */
189 case '^':
190 case '$':
191 if (acs != NIL && acs != cs) {
192 do {
193 temp = OCNT(acs);
194 OCNT(acs) = ccre - acs;
195 acs -= temp;
196 } while (temp != 0);
197 acs = NIL;
198 }
199 cs = ccre;
200 *cs = META;
201 MSYM(cs) = c;
202 ccre = MNEXT(cs);
203 break;
204
205 /* mark the last match sequence as optional */
206 case '?':
207 if (cs)
208 *cs = *cs | OPT;
209 break;
210
211 /* recurse and define a subexpression */
212 case '(':
213 if (acs != NIL && acs != cs) {
214 do {
215 temp = OCNT(acs);
216 OCNT(acs) = ccre - acs;
217 acs -= temp;
218 } while (temp != 0);
219 acs = NIL;
220 }
221 cs = ccre;
222 *cs = OPER;
223 OSYM(cs) = '(';
224 ccre = ONEXT(cs);
225 expconv ();
226 OCNT(cs) = ccre - cs; /* offset to next symbol */
227 break;
228
229 /* return from a recursion */
230 case ')':
231 if (acs != NIL) {
232 do {
233 temp = OCNT(acs);
234 OCNT(acs) = ccre - acs;
235 acs -= temp;
236 } while (temp != 0);
237 acs = NIL;
238 }
239 cs = ccre;
240 *cs = META;
241 MSYM(cs) = c;
242 ccre = MNEXT(cs);
243 return;
244
245 /* mark the last match sequence as having an alternate */
246 /* the third byte will contain an offset to jump over the */
247 /* alternate match in case the first did not fail */
248 case '|':
249 if (acs != NIL && acs != cs)
250 OCNT(ccre) = ccre - acs; /* make a back pointer */
251 else
252 OCNT(ccre) = 0;
253 *cs |= ALT;
254 cs = ccre;
255 *cs = OPER;
256 OSYM(cs) = '|';
257 ccre = ONEXT(cs);
258 acs = cs; /* remember that the pointer is to be filles */
259 break;
260
261 /* if its not a metasymbol just build a scharacter string */
262 default:
263 if (cs == NIL || (*cs & STR) == 0) {
264 cs = ccre;
265 *cs = STR;
266 SCNT(cs) = 1;
267 ccre = SSTR(cs);
268 } else
269 SCNT(cs)++;
270 *ccre++ = c;
271 break;
272 }
273 }
274 if (acs != NIL) {
275 do {
276 temp = OCNT(acs);
277 OCNT(acs) = ccre - acs;
278 acs -= temp;
279 } while (temp != 0);
280 acs = NIL;
281 }
282 }
283 /* end of convertre */
284
285
286 /*
287 * The following routine recognises an irregular expresion
288 * with the following special characters:
289 *
290 * \? - means last match was optional
291 * \a - matches any number of characters
292 * \d - matches any number of spaces and tabs
293 * \p - matches any number of alphanumeric
294 * characters. The
295 * characters matched will be copied into
296 * the area pointed to by 'name'.
297 * \| - alternation
298 * \( \) - grouping used mostly for alternation and
299 * optionality
300 *
301 * The irregular expression must be translated to internal form
302 * prior to calling this routine
303 *
304 * The value returned is the pointer to the first non \a
305 * character matched.
306 */
307
308 extern boolean _escaped; /* true if we are currently _escaped */
309 extern char *Start; /* start of string */
310
311 char *
expmatch(char * s,char * re,char * mstring)312 expmatch(char *s, char *re, char *mstring)
313 /* s - string to check for a match in */
314 /* re - a converted irregular expression */
315 /* mstring - where to put whatever matches a \p */
316 {
317 char *cs; /* the current symbol */
318 char *ptr, *s1; /* temporary pointer */
319 boolean matched; /* a temporary boolean */
320
321 /* initial conditions */
322 if (re == NIL)
323 return (NIL);
324 cs = re;
325 matched = FALSE;
326
327 /* loop till expression string is exhausted (or at least pretty tired) */
328 while (*cs) {
329 switch (*cs & (OPER | STR | META)) {
330
331 /* try to match a string */
332 case STR:
333 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
334 if (matched) {
335
336 /* hoorah it matches */
337 s += SCNT(cs);
338 cs = SNEXT(cs);
339 } else if (*cs & ALT) {
340
341 /* alternation, skip to next expression */
342 cs = SNEXT(cs);
343 } else if (*cs & OPT) {
344
345 /* the match is optional */
346 cs = SNEXT(cs);
347 matched = 1; /* indicate a successful match */
348 } else {
349
350 /* no match, error return */
351 return (NIL);
352 }
353 break;
354
355 /* an operator, do something fancy */
356 case OPER:
357 switch (OSYM(cs)) {
358
359 /* this is an alternation */
360 case '|':
361 if (matched)
362
363 /* last thing in the alternation was a match, skip ahead */
364 cs = OPTR(cs);
365 else
366
367 /* no match, keep trying */
368 cs = ONEXT(cs);
369 break;
370
371 /* this is a grouping, recurse */
372 case '(':
373 ptr = expmatch (s, ONEXT(cs), mstring);
374 if (ptr != NIL) {
375
376 /* the subexpression matched */
377 matched = 1;
378 s = ptr;
379 } else if (*cs & ALT) {
380
381 /* alternation, skip to next expression */
382 matched = 0;
383 } else if (*cs & OPT) {
384
385 /* the match is optional */
386 matched = 1; /* indicate a successful match */
387 } else {
388
389 /* no match, error return */
390 return (NIL);
391 }
392 cs = OPTR(cs);
393 break;
394 }
395 break;
396
397 /* try to match a metasymbol */
398 case META:
399 switch (MSYM(cs)) {
400
401 /* try to match anything and remember what was matched */
402 case 'p':
403 /*
404 * This is really the same as trying the match the
405 * remaining parts of the expression to any subset
406 * of the string.
407 */
408 s1 = s;
409 do {
410 ptr = expmatch (s1, MNEXT(cs), mstring);
411 if (ptr != NIL && s1 != s) {
412
413 /* we have a match, remember the match */
414 strncpy (mstring, s, s1 - s);
415 mstring[s1 - s] = '\0';
416 return (ptr);
417 } else if (ptr != NIL && (*cs & OPT)) {
418
419 /* it was aoptional so no match is ok */
420 return (ptr);
421 } else if (ptr != NIL) {
422
423 /* not optional and we still matched */
424 return (NIL);
425 }
426 if (!isidchr(*s1))
427 return (NIL);
428 if (*s1 == '\\')
429 _escaped = _escaped ? FALSE : TRUE;
430 else
431 _escaped = FALSE;
432 } while (*s1++);
433 return (NIL);
434
435 /* try to match anything */
436 case 'a':
437 /*
438 * This is really the same as trying the match the
439 * remaining parts of the expression to any subset
440 * of the string.
441 */
442 s1 = s;
443 do {
444 ptr = expmatch (s1, MNEXT(cs), mstring);
445 if (ptr != NIL && s1 != s) {
446
447 /* we have a match */
448 return (ptr);
449 } else if (ptr != NIL && (*cs & OPT)) {
450
451 /* it was aoptional so no match is ok */
452 return (ptr);
453 } else if (ptr != NIL) {
454
455 /* not optional and we still matched */
456 return (NIL);
457 }
458 if (*s1 == '\\')
459 _escaped = _escaped ? FALSE : TRUE;
460 else
461 _escaped = FALSE;
462 } while (*s1++);
463 return (NIL);
464
465 /* fail if we are currently _escaped */
466 case 'e':
467 if (_escaped)
468 return(NIL);
469 cs = MNEXT(cs);
470 break;
471
472 /* match any number of tabs and spaces */
473 case 'd':
474 ptr = s;
475 while (*s == ' ' || *s == '\t')
476 s++;
477 if (s != ptr || s == Start) {
478
479 /* match, be happy */
480 matched = 1;
481 cs = MNEXT(cs);
482 } else if (*s == '\n' || *s == '\0') {
483
484 /* match, be happy */
485 matched = 1;
486 cs = MNEXT(cs);
487 } else if (*cs & ALT) {
488
489 /* try the next part */
490 matched = 0;
491 cs = MNEXT(cs);
492 } else if (*cs & OPT) {
493
494 /* doesn't matter */
495 matched = 1;
496 cs = MNEXT(cs);
497 } else
498
499 /* no match, error return */
500 return (NIL);
501 break;
502
503 /* check for end of line */
504 case '$':
505 if (*s == '\0' || *s == '\n') {
506
507 /* match, be happy */
508 s++;
509 matched = 1;
510 cs = MNEXT(cs);
511 } else if (*cs & ALT) {
512
513 /* try the next part */
514 matched = 0;
515 cs = MNEXT(cs);
516 } else if (*cs & OPT) {
517
518 /* doesn't matter */
519 matched = 1;
520 cs = MNEXT(cs);
521 } else
522
523 /* no match, error return */
524 return (NIL);
525 break;
526
527 /* check for start of line */
528 case '^':
529 if (s == Start) {
530
531 /* match, be happy */
532 matched = 1;
533 cs = MNEXT(cs);
534 } else if (*cs & ALT) {
535
536 /* try the next part */
537 matched = 0;
538 cs = MNEXT(cs);
539 } else if (*cs & OPT) {
540
541 /* doesn't matter */
542 matched = 1;
543 cs = MNEXT(cs);
544 } else
545
546 /* no match, error return */
547 return (NIL);
548 break;
549
550 /* end of a subexpression, return success */
551 case ')':
552 return (s);
553 }
554 break;
555 }
556 }
557 return (s);
558 }
559