1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2012 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23
24 /*
25 * D. G. Korn
26 * G. S. Fowler
27 * AT&T Research
28 *
29 * match shell file patterns -- derived from Bourne and Korn shell gmatch()
30 *
31 * sh pattern egrep RE description
32 * ---------- -------- -----------
33 * * .* 0 or more chars
34 * ? . any single char
35 * [.] [.] char class
36 * [!.] [^.] negated char class
37 * [[:.:]] [[:.:]] ctype class
38 * [[=.=]] [[=.=]] equivalence class
39 * [[...]] [[...]] collation element
40 * *(.) (.)* 0 or more of
41 * +(.) (.)+ 1 or more of
42 * ?(.) (.)? 0 or 1 of
43 * (.) (.) 1 of
44 * @(.) (.) 1 of
45 * a|b a|b a or b
46 * \# () subgroup back reference [1-9]
47 * a&b a and b
48 * !(.) none of
49 *
50 * \ used to escape metacharacters
51 *
52 * *, ?, (, |, &, ), [, \ must be \'d outside of [...]
53 * only ] must be \'d inside [...]
54 *
55 * BUG: unbalanced ) terminates top level pattern
56 */
57
58 #include <ast.h>
59 #include <ctype.h>
60 #include <hashkey.h>
61
62 #ifndef isblank
63 #define isblank(x) ((x)==' '||(x)=='\t')
64 #endif
65
66 #ifndef isgraph
67 #define isgraph(x) (isprint(x)&&!isblank(x))
68 #endif
69
70 #define MAXGROUP 10
71
72 typedef struct
73 {
74 char* beg[MAXGROUP];
75 char* end[MAXGROUP];
76 char* next_s;
77 short groups;
78 } Group_t;
79
80 typedef struct
81 {
82 Group_t current;
83 Group_t best;
84 char* last_s;
85 char* next_p;
86 } Match_t;
87
88 #define mbgetchar(p) (*p++)
89
90 #ifndef isxdigit
91 #define isxdigit(c) ((c)>='0'&&(c)<='9'||(c)>='a'&&(c)<='f'||(c)>='A'&&(c)<='F')
92 #endif
93
94 #define getsource(s,e) (((s)>=(e))?0:mbgetchar(s))
95
96 #define COLL_MAX 3
97
98 /*
99 * gobble chars up to <sub> or ) keeping track of (...) and [...]
100 * sub must be one of { '|', '&', 0 }
101 * 0 returned if s runs out
102 */
103
104 static char*
gobble(Match_t * mp,register char * s,register int sub,int * g,int clear)105 gobble(Match_t* mp, register char* s, register int sub, int* g, int clear)
106 {
107 register int p = 0;
108 register char* b = 0;
109 int c = 0;
110 int n;
111
112 for (;;)
113 switch (mbgetchar(s))
114 {
115 case '\\':
116 if (mbgetchar(s))
117 break;
118 /*FALLTHROUGH*/
119 case 0:
120 return 0;
121 case '[':
122 if (!b)
123 {
124 if (*s == '!' || *s == '^')
125 mbgetchar(s);
126 b = s;
127 }
128 else if (*s == '.' || *s == '=' || *s == ':')
129 c = *s;
130 break;
131 case ']':
132 if (b)
133 {
134 if (*(s - 2) == c)
135 c = 0;
136 else if (b != (s - 1))
137 b = 0;
138 }
139 break;
140 case '(':
141 if (!b)
142 {
143 p++;
144 n = (*g)++;
145 if (clear)
146 {
147 if (!sub)
148 n++;
149 if (n < MAXGROUP)
150 mp->current.beg[n] = mp->current.end[n] = 0;
151 }
152 }
153 break;
154 case ')':
155 if (!b && p-- <= 0)
156 return sub ? 0 : s;
157 break;
158 case '|':
159 if (!b && !p && sub == '|')
160 return s;
161 break;
162 }
163 }
164
165 static int grpmatch(Match_t*, int, char*, register char*, char*, int);
166
167 /*
168 * match a single pattern
169 * e is the end (0) of the substring in s
170 * r marks the start of a repeated subgroup pattern
171 */
172
173 static int
onematch(Match_t * mp,int g,char * s,char * p,char * e,char * r,int flags)174 onematch(Match_t* mp, int g, char* s, char* p, char* e, char* r, int flags)
175 {
176 register int pc;
177 register int sc;
178 register int n;
179 register int icase;
180 char* olds;
181 char* oldp;
182
183 icase = flags & STR_ICASE;
184 do
185 {
186 olds = s;
187 sc = getsource(s, e);
188 if (icase && isupper(sc))
189 sc = tolower(sc);
190 oldp = p;
191 switch (pc = mbgetchar(p))
192 {
193 case '(':
194 case '*':
195 case '?':
196 case '+':
197 case '@':
198 case '!':
199 if (pc == '(' || *p == '(')
200 {
201 char* subp;
202 int oldg;
203
204 s = olds;
205 subp = p + (pc != '(');
206 oldg = g;
207 n = ++g;
208 if (g < MAXGROUP && (!r || g > mp->current.groups))
209 mp->current.beg[g] = mp->current.end[g] = 0;
210 if (!(p = gobble(mp, subp, 0, &g, !r)))
211 return 0;
212 if (pc == '*' || pc == '?' || pc == '+' && oldp == r)
213 {
214 if (onematch(mp, g, s, p, e, NiL, flags))
215 return 1;
216 if (!sc || !getsource(s, e))
217 {
218 mp->current.groups = oldg;
219 return 0;
220 }
221 }
222 if (pc == '*' || pc == '+')
223 {
224 p = oldp;
225 sc = n - 1;
226 }
227 else
228 sc = g;
229 pc = (pc != '!');
230 do
231 {
232 if (grpmatch(mp, n, olds, subp, s, flags) == pc)
233 {
234 if (n < MAXGROUP)
235 {
236 if (!mp->current.beg[n] || mp->current.beg[n] > olds)
237 mp->current.beg[n] = olds;
238 if (s > mp->current.end[n])
239 mp->current.end[n] = s;
240 }
241 if (onematch(mp, sc, s, p, e, oldp, flags))
242 {
243 if (p == oldp && n < MAXGROUP)
244 {
245 if (!mp->current.beg[n] || mp->current.beg[n] > olds)
246 mp->current.beg[n] = olds;
247 if (s > mp->current.end[n])
248 mp->current.end[n] = s;
249 }
250 return 1;
251 }
252 }
253 } while (s < e && mbgetchar(s));
254 mp->current.groups = oldg;
255 return 0;
256 }
257 else if (pc == '*')
258 {
259 /*
260 * several stars are the same as one
261 */
262
263 while (*p == '*' && *(p + 1) != '(')
264 p++;
265 oldp = p;
266 switch (pc = mbgetchar(p))
267 {
268 case '@':
269 case '!':
270 case '+':
271 n = *p == '(';
272 break;
273 case '(':
274 case '[':
275 case '?':
276 case '*':
277 n = 1;
278 break;
279 case 0:
280 case '|':
281 case '&':
282 case ')':
283 mp->current.next_s = (flags & STR_MAXIMAL) ? e : olds;
284 mp->next_p = oldp;
285 mp->current.groups = g;
286 if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && mp->current.next_s > mp->best.next_s || !(flags & STR_MAXIMAL) && mp->current.next_s < mp->best.next_s))
287 mp->best = mp->current;
288 return 1;
289 case '\\':
290 if (!(pc = mbgetchar(p)))
291 return 0;
292 if (pc >= '0' && pc <= '9')
293 {
294 n = pc - '0';
295 if (n <= g && mp->current.beg[n])
296 pc = *mp->current.beg[n];
297 }
298 /*FALLTHROUGH*/
299 default:
300 if (icase && isupper(pc))
301 pc = tolower(pc);
302 n = 0;
303 break;
304 }
305 p = oldp;
306 for (;;)
307 {
308 if ((n || pc == sc) && onematch(mp, g, olds, p, e, NiL, flags))
309 return 1;
310 if (!sc)
311 return 0;
312 olds = s;
313 sc = getsource(s, e);
314 if ((flags & STR_ICASE) && isupper(sc))
315 sc = tolower(sc);
316 }
317 }
318 else if (pc != '?' && pc != sc)
319 return 0;
320 break;
321 case 0:
322 if (!(flags & STR_MAXIMAL))
323 sc = 0;
324 /*FALLTHROUGH*/
325 case '|':
326 case '&':
327 case ')':
328 if (!sc)
329 {
330 mp->current.next_s = olds;
331 mp->next_p = oldp;
332 mp->current.groups = g;
333 }
334 if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && olds > mp->best.next_s || !(flags & STR_MAXIMAL) && olds < mp->best.next_s))
335 {
336 mp->best = mp->current;
337 mp->best.next_s = olds;
338 mp->best.groups = g;
339 }
340 return !sc;
341 case '[':
342 {
343 /*UNDENT...*/
344
345 int invert;
346 int x;
347 int ok = 0;
348 char* range;
349
350 if (!sc)
351 return 0;
352 range = 0;
353 n = 0;
354 if (invert = *p == '!' || *p == '^')
355 p++;
356 for (;;)
357 {
358 oldp = p;
359 if (!(pc = mbgetchar(p)))
360 return 0;
361 else if (pc == '[' && (*p == ':' || *p == '=' || *p == '.'))
362 {
363 x = 0;
364 n = mbgetchar(p);
365 oldp = p;
366 for (;;)
367 {
368 if (!(pc = mbgetchar(p)))
369 return 0;
370 if (pc == n && *p == ']')
371 break;
372 x++;
373 }
374 mbgetchar(p);
375 if (ok)
376 /*NOP*/;
377 else if (n == ':')
378 {
379 switch (HASHNKEY5(x, oldp[0], oldp[1], oldp[2], oldp[3], oldp[4]))
380 {
381 case HASHNKEY5(5,'a','l','n','u','m'):
382 if (isalnum(sc))
383 ok = 1;
384 break;
385 case HASHNKEY5(5,'a','l','p','h','a'):
386 if (isalpha(sc))
387 ok = 1;
388 break;
389 case HASHNKEY5(5,'b','l','a','n','k'):
390 if (isblank(sc))
391 ok = 1;
392 break;
393 case HASHNKEY5(5,'c','n','t','r','l'):
394 if (iscntrl(sc))
395 ok = 1;
396 break;
397 case HASHNKEY5(5,'d','i','g','i','t'):
398 if (isdigit(sc))
399 ok = 1;
400 break;
401 case HASHNKEY5(5,'g','r','a','p','h'):
402 if (isgraph(sc))
403 ok = 1;
404 break;
405 case HASHNKEY5(5,'l','o','w','e','r'):
406 if (islower(sc))
407 ok = 1;
408 break;
409 case HASHNKEY5(5,'p','r','i','n','t'):
410 if (isprint(sc))
411 ok = 1;
412 break;
413 case HASHNKEY5(5,'p','u','n','c','t'):
414 if (ispunct(sc))
415 ok = 1;
416 break;
417 case HASHNKEY5(5,'s','p','a','c','e'):
418 if (isspace(sc))
419 ok = 1;
420 break;
421 case HASHNKEY5(5,'u','p','p','e','r'):
422 if (icase ? islower(sc) : isupper(sc))
423 ok = 1;
424 break;
425 case HASHNKEY5(6,'x','d','i','g','i'):
426 if (oldp[5] == 't' && isxdigit(sc))
427 ok = 1;
428 break;
429 }
430 }
431 else if (range)
432 goto getrange;
433 else if (*p == '-' && *(p + 1) != ']')
434 {
435 mbgetchar(p);
436 range = oldp;
437 }
438 else if (isalpha(*oldp) && isalpha(*olds) && tolower(*oldp) == tolower(*olds) || sc == mbgetchar(oldp))
439 ok = 1;
440 n = 1;
441 }
442 else if (pc == ']' && n)
443 {
444 if (ok != invert)
445 break;
446 return 0;
447 }
448 else if (pc == '\\' && (oldp = p, !(pc = mbgetchar(p))))
449 return 0;
450 else if (ok)
451 /*NOP*/;
452 else if (range)
453 {
454 getrange:
455 if (icase && isupper(pc))
456 pc = tolower(pc);
457 x = mbgetchar(range);
458 if (icase && isupper(x))
459 x = tolower(x);
460 if (sc == x || sc == pc || sc > x && sc < pc)
461 ok = 1;
462 if (*p == '-' && *(p + 1) != ']')
463 {
464 mbgetchar(p);
465 range = oldp;
466 }
467 else
468 range = 0;
469 n = 1;
470 }
471 else if (*p == '-' && *(p + 1) != ']')
472 {
473 mbgetchar(p);
474 range = oldp;
475 n = 1;
476 }
477 else
478 {
479 if (icase && isupper(pc))
480 pc = tolower(pc);
481 if (sc == pc)
482 ok = 1;
483 n = pc;
484 }
485 }
486
487 /*...INDENT*/
488 }
489 break;
490 case '\\':
491 if (!(pc = mbgetchar(p)))
492 return 0;
493 if (pc >= '0' && pc <= '9')
494 {
495 n = pc - '0';
496 if (n <= g && (oldp = mp->current.beg[n]))
497 {
498 while (oldp < mp->current.end[n])
499 if (!*olds || *olds++ != *oldp++)
500 return 0;
501 s = olds;
502 break;
503 }
504 }
505 /*FALLTHROUGH*/
506 default:
507 if (icase && isupper(pc))
508 pc = tolower(pc);
509 if (pc != sc)
510 return 0;
511 break;
512 }
513 } while (sc);
514 return 0;
515 }
516
517 /*
518 * match any pattern in a group
519 * | and & subgroups are parsed here
520 */
521
522 static int
grpmatch(Match_t * mp,int g,char * s,register char * p,char * e,int flags)523 grpmatch(Match_t* mp, int g, char* s, register char* p, char* e, int flags)
524 {
525 register char* a;
526
527 do
528 {
529 for (a = p; onematch(mp, g, s, a, e, NiL, flags); a++)
530 if (*(a = mp->next_p) != '&')
531 return 1;
532 } while (p = gobble(mp, p, '|', &g, 1));
533 return 0;
534 }
535
536 /*
537 * subgroup match
538 * 0 returned if no match
539 * otherwise number of subgroups matched returned
540 * match group begin offsets are even elements of sub
541 * match group end offsets are odd elements of sub
542 * the matched string is from s+sub[0] up to but not
543 * including s+sub[1]
544 */
545
546 int
strgrpmatch(const char * b,const char * p,ssize_t * sub,int n,int flags)547 strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, int flags)
548 {
549 register int i;
550 register char* s;
551 char* e;
552 Match_t match;
553
554 s = (char*)b;
555 match.last_s = e = s + strlen(s);
556 for (;;)
557 {
558 match.best.next_s = 0;
559 match.current.groups = 0;
560 if ((i = grpmatch(&match, 0, s, (char*)p, e, flags)) || match.best.next_s)
561 {
562 if (!i)
563 match.current = match.best;
564 match.current.groups++;
565 match.current.end[0] = match.current.next_s;
566 break;
567 }
568 if ((flags & STR_LEFT) || s >= e)
569 return 0;
570 s++;
571 }
572 if ((flags & STR_RIGHT) && match.current.next_s != e)
573 return 0;
574 if (!sub)
575 return 1;
576 match.current.beg[0] = s;
577 s = (char*)b;
578 if (n > match.current.groups)
579 n = match.current.groups;
580 for (i = 0; i < n; i++)
581 {
582 sub[i * 2] = match.current.end[i] ? match.current.beg[i] - s : 0;
583 sub[i * 2 + 1] = match.current.end[i] ? match.current.end[i] - s : 0;
584 }
585 return n;
586 }
587
588 /*
589 * compare the string s with the shell pattern p
590 * returns 1 for match 0 otherwise
591 */
592
593 int
strmatch(const char * s,const char * p)594 strmatch(const char* s, const char* p)
595 {
596 return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
597 }
598