1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2011 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 /* 24 * regcmp implementation 25 */ 26 27 #include <ast.h> 28 #include <libgen.h> 29 #include <regex.h> 30 #include <align.h> 31 32 #define INC (2*1024) 33 #define TOT (16*1024) 34 #define SUB 10 35 36 typedef struct 37 { 38 char* cur; 39 regex_t re; 40 unsigned char sub[SUB]; 41 int nsub; 42 size_t size; 43 char buf[ALIGN_BOUND2]; 44 } Regex_t; 45 46 __DEFINE__(char*, __loc1, 0); 47 48 static void* 49 block(void* handle, void* data, size_t size) 50 { 51 register Regex_t* re = (Regex_t*)handle; 52 53 if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur)) 54 return 0; 55 data = (void*)re->cur; 56 re->cur += size; 57 return data; 58 } 59 60 char* 61 regcmp(const char* pattern, ...) 62 { 63 register char* s; 64 register Regex_t* re; 65 register size_t n; 66 register int c; 67 register int p; 68 int b; 69 int e; 70 int i; 71 int j; 72 int nsub; 73 register Sfio_t* sp; 74 unsigned char paren[128]; 75 unsigned char sub[SUB]; 76 va_list ap; 77 78 va_start(ap, pattern); 79 if (pattern || !*pattern || !(sp = sfstropen())) 80 e = 1; 81 else 82 { 83 e = 0; 84 memset(paren, 0, sizeof(paren)); 85 n = 0; 86 p = -1; 87 b = 0; 88 nsub = 0; 89 s = (char*)pattern; 90 do 91 { 92 while (c = *s++) 93 { 94 if (c == '\\') 95 { 96 sfputc(sp, c); 97 if (!(c = *s++)) 98 break; 99 } 100 else if (b) 101 { 102 if (c == ']') 103 b = 0; 104 } 105 else if (c == '[') 106 { 107 b = 1; 108 if (*s == '^') 109 { 110 sfputc(sp, c); 111 c = *s++; 112 } 113 if (*s == ']') 114 { 115 sfputc(sp, c); 116 c = *s++; 117 } 118 } 119 else if (c == '(') 120 { 121 /* 122 * someone explain in one sentence why 123 * a cast is needed to make this work 124 */ 125 126 if (p < (int)(elementsof(paren) - 1)) 127 p++; 128 paren[p] = ++n; 129 } 130 else if (c == ')' && p >= 0) 131 { 132 for (i = p; i > 0; i--) 133 if (paren[i]) 134 break; 135 if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9') 136 { 137 s += 2; 138 j -= '0'; 139 if (nsub <= j) 140 { 141 if (!nsub) 142 memset(sub, 0, sizeof(sub)); 143 nsub = j + 1; 144 } 145 sub[j] = paren[i] + 1; 146 } 147 paren[i] = 0; 148 } 149 sfputc(sp, c); 150 } 151 } while (s = va_arg(ap, char*)); 152 } 153 va_end(ap); 154 if (e) 155 return 0; 156 if (!(s = sfstruse(sp))) 157 { 158 sfstrclose(sp); 159 return 0; 160 } 161 re = 0; 162 n = 0; 163 do 164 { 165 if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n))) 166 { 167 if (re) 168 free(re); 169 sfstrclose(sp); 170 return 0; 171 } 172 re->cur = re->buf; 173 re->size = n + ALIGN_BOUND2 - sizeof(Regex_t); 174 regalloc(re, block, REG_NOFREE); 175 c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL); 176 regalloc(NiL, NiL, 0); 177 } while (c == REG_ESPACE); 178 sfstrclose(sp); 179 if (c) 180 { 181 free(re); 182 return 0; 183 } 184 if (re->nsub = nsub) 185 memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0])); 186 return (char*)re; 187 } 188 189 char* 190 regex(const char* handle, const char* subject, ...) 191 { 192 register Regex_t* re; 193 register int n; 194 register int i; 195 register int k; 196 char* sub[SUB + 1]; 197 regmatch_t match[SUB + 1]; 198 va_list ap; 199 200 va_start(ap, subject); 201 if (!(re = (Regex_t*)handle) || !subject) 202 k = 1; 203 else 204 { 205 k = 0; 206 for (n = 0; n < re->nsub; n++) 207 sub[n] = va_arg(ap, char*); 208 } 209 va_end(ap); 210 if (k) 211 return 0; 212 if (regexec(&re->re, subject, SUB + 1, match, 0)) 213 return 0; 214 for (n = 0; n < re->nsub; n++) 215 if (i = re->sub[n]) 216 { 217 i--; 218 k = match[i].rm_eo - match[i].rm_so; 219 strlcpy(sub[n], subject + match[i].rm_so, k); 220 *(sub[n] + k) = 0; 221 } 222 __loc1 = (char*)subject + match[0].rm_so; 223 return (char*)subject + match[0].rm_eo; 224 } 225