1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin * *
3da2e3ebdSchin * This software is part of the ast package *
4*3e14f97fSRoger A. Faulkner * Copyright (c) 1985-2010 AT&T Intellectual Property *
5da2e3ebdSchin * and is licensed under the *
6da2e3ebdSchin * Common Public License, Version 1.0 *
77c2fbfb3SApril Chin * by AT&T Intellectual Property *
8da2e3ebdSchin * *
9da2e3ebdSchin * A copy of the License is available at *
10da2e3ebdSchin * http://www.opensource.org/licenses/cpl1.0.txt *
11da2e3ebdSchin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12da2e3ebdSchin * *
13da2e3ebdSchin * Information and Software Systems Research *
14da2e3ebdSchin * AT&T Research *
15da2e3ebdSchin * Florham Park NJ *
16da2e3ebdSchin * *
17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> *
18da2e3ebdSchin * David Korn <dgk@research.att.com> *
19da2e3ebdSchin * Phong Vo <kpv@research.att.com> *
20da2e3ebdSchin * *
21da2e3ebdSchin ***********************************************************************/
22da2e3ebdSchin #pragma prototyped
23da2e3ebdSchin /*
24da2e3ebdSchin * regcmp implementation
25da2e3ebdSchin */
26da2e3ebdSchin
27da2e3ebdSchin #include <ast.h>
28da2e3ebdSchin #include <libgen.h>
29da2e3ebdSchin #include <regex.h>
30da2e3ebdSchin #include <align.h>
31da2e3ebdSchin
32da2e3ebdSchin #define INC (2*1024)
33da2e3ebdSchin #define TOT (16*1024)
34da2e3ebdSchin #define SUB 10
35da2e3ebdSchin
36da2e3ebdSchin typedef struct
37da2e3ebdSchin {
38da2e3ebdSchin char* cur;
39da2e3ebdSchin regex_t re;
40da2e3ebdSchin unsigned char sub[SUB];
41da2e3ebdSchin int nsub;
42da2e3ebdSchin size_t size;
43da2e3ebdSchin char buf[ALIGN_BOUND2];
44da2e3ebdSchin } Regex_t;
45da2e3ebdSchin
46da2e3ebdSchin __DEFINE__(char*, __loc1, 0);
47da2e3ebdSchin
48da2e3ebdSchin static void*
block(void * handle,void * data,size_t size)49da2e3ebdSchin block(void* handle, void* data, size_t size)
50da2e3ebdSchin {
51da2e3ebdSchin register Regex_t* re = (Regex_t*)handle;
52da2e3ebdSchin
53da2e3ebdSchin if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
54da2e3ebdSchin return 0;
55da2e3ebdSchin data = (void*)re->cur;
56da2e3ebdSchin re->cur += size;
57da2e3ebdSchin return data;
58da2e3ebdSchin }
59da2e3ebdSchin
60da2e3ebdSchin char*
regcmp(const char * pattern,...)61da2e3ebdSchin regcmp(const char* pattern, ...)
62da2e3ebdSchin {
63da2e3ebdSchin register char* s;
64da2e3ebdSchin register Regex_t* re;
65da2e3ebdSchin register size_t n;
66da2e3ebdSchin register int c;
67da2e3ebdSchin register int p;
68da2e3ebdSchin int b;
69da2e3ebdSchin int i;
70da2e3ebdSchin int j;
71da2e3ebdSchin int nsub;
72da2e3ebdSchin register Sfio_t* sp;
73da2e3ebdSchin unsigned char paren[128];
74da2e3ebdSchin unsigned char sub[SUB];
75da2e3ebdSchin va_list ap;
76da2e3ebdSchin
77da2e3ebdSchin va_start(ap, pattern);
78da2e3ebdSchin if (!pattern || !*pattern || !(sp = sfstropen()))
79da2e3ebdSchin return 0;
80da2e3ebdSchin memset(paren, 0, sizeof(paren));
81da2e3ebdSchin n = 0;
82da2e3ebdSchin p = -1;
83da2e3ebdSchin b = 0;
84da2e3ebdSchin nsub = 0;
85da2e3ebdSchin s = (char*)pattern;
86da2e3ebdSchin do
87da2e3ebdSchin {
88da2e3ebdSchin while (c = *s++)
89da2e3ebdSchin {
90da2e3ebdSchin if (c == '\\')
91da2e3ebdSchin {
92da2e3ebdSchin sfputc(sp, c);
93da2e3ebdSchin if (!(c = *s++))
94da2e3ebdSchin break;
95da2e3ebdSchin }
96da2e3ebdSchin else if (b)
97da2e3ebdSchin {
98da2e3ebdSchin if (c == ']')
99da2e3ebdSchin b = 0;
100da2e3ebdSchin }
101da2e3ebdSchin else if (c == '[')
102da2e3ebdSchin {
103da2e3ebdSchin b = 1;
104da2e3ebdSchin if (*s == '^')
105da2e3ebdSchin {
106da2e3ebdSchin sfputc(sp, c);
107da2e3ebdSchin c = *s++;
108da2e3ebdSchin }
109da2e3ebdSchin if (*s == ']')
110da2e3ebdSchin {
111da2e3ebdSchin sfputc(sp, c);
112da2e3ebdSchin c = *s++;
113da2e3ebdSchin }
114da2e3ebdSchin }
115da2e3ebdSchin else if (c == '(')
116da2e3ebdSchin {
117da2e3ebdSchin /*
118da2e3ebdSchin * someone explain in one sentence why
119da2e3ebdSchin * a cast is needed to make this work
120da2e3ebdSchin */
121da2e3ebdSchin
122da2e3ebdSchin if (p < (int)(elementsof(paren) - 1))
123da2e3ebdSchin p++;
124da2e3ebdSchin paren[p] = ++n;
125da2e3ebdSchin }
126da2e3ebdSchin else if (c == ')' && p >= 0)
127da2e3ebdSchin {
128da2e3ebdSchin for (i = p; i > 0; i--)
129da2e3ebdSchin if (paren[i])
130da2e3ebdSchin break;
131da2e3ebdSchin if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
132da2e3ebdSchin {
133da2e3ebdSchin s += 2;
134da2e3ebdSchin j -= '0';
135da2e3ebdSchin if (nsub <= j)
136da2e3ebdSchin {
137da2e3ebdSchin if (!nsub)
138da2e3ebdSchin memset(sub, 0, sizeof(sub));
139da2e3ebdSchin nsub = j + 1;
140da2e3ebdSchin }
141da2e3ebdSchin sub[j] = paren[i] + 1;
142da2e3ebdSchin }
143da2e3ebdSchin paren[i] = 0;
144da2e3ebdSchin }
145da2e3ebdSchin sfputc(sp, c);
146da2e3ebdSchin }
147da2e3ebdSchin } while (s = va_arg(ap, char*));
148da2e3ebdSchin va_end(ap);
149da2e3ebdSchin if (!(s = sfstruse(sp)))
150da2e3ebdSchin {
151da2e3ebdSchin sfstrclose(sp);
152da2e3ebdSchin return 0;
153da2e3ebdSchin }
154da2e3ebdSchin re = 0;
155da2e3ebdSchin n = 0;
156da2e3ebdSchin do
157da2e3ebdSchin {
158da2e3ebdSchin if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
159da2e3ebdSchin {
160da2e3ebdSchin if (re)
161da2e3ebdSchin free(re);
162da2e3ebdSchin sfstrclose(sp);
163da2e3ebdSchin return 0;
164da2e3ebdSchin }
165da2e3ebdSchin re->cur = re->buf;
166da2e3ebdSchin re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
167da2e3ebdSchin regalloc(re, block, REG_NOFREE);
168da2e3ebdSchin c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
169da2e3ebdSchin regalloc(NiL, NiL, 0);
170da2e3ebdSchin } while (c == REG_ESPACE);
171da2e3ebdSchin sfstrclose(sp);
172da2e3ebdSchin if (c)
173da2e3ebdSchin {
174da2e3ebdSchin free(re);
175da2e3ebdSchin return 0;
176da2e3ebdSchin }
177da2e3ebdSchin if (re->nsub = nsub)
178da2e3ebdSchin memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
179da2e3ebdSchin return (char*)re;
180da2e3ebdSchin }
181da2e3ebdSchin
182da2e3ebdSchin char*
regex(const char * handle,const char * subject,...)183da2e3ebdSchin regex(const char* handle, const char* subject, ...)
184da2e3ebdSchin {
185da2e3ebdSchin register Regex_t* re;
186da2e3ebdSchin register int n;
187da2e3ebdSchin register int i;
188da2e3ebdSchin register int k;
189da2e3ebdSchin char* sub[SUB + 1];
190da2e3ebdSchin regmatch_t match[SUB + 1];
191da2e3ebdSchin va_list ap;
192da2e3ebdSchin
193da2e3ebdSchin va_start(ap, subject);
194da2e3ebdSchin if (!(re = (Regex_t*)handle) || !subject)
195da2e3ebdSchin return 0;
196da2e3ebdSchin for (n = 0; n < re->nsub; n++)
197da2e3ebdSchin sub[n] = va_arg(ap, char*);
198da2e3ebdSchin va_end(ap);
199da2e3ebdSchin if (regexec(&re->re, subject, SUB + 1, match, 0))
200da2e3ebdSchin return 0;
201da2e3ebdSchin for (n = 0; n < re->nsub; n++)
202da2e3ebdSchin if (i = re->sub[n])
203da2e3ebdSchin {
204da2e3ebdSchin i--;
205da2e3ebdSchin k = match[i].rm_eo - match[i].rm_so;
206da2e3ebdSchin strncpy(sub[n], subject + match[i].rm_so, k);
207da2e3ebdSchin *(sub[n] + k) = 0;
208da2e3ebdSchin }
209da2e3ebdSchin __loc1 = (char*)subject + match[0].rm_so;
210da2e3ebdSchin return (char*)subject + match[0].rm_eo;
211da2e3ebdSchin }
212