1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Knowledge Ventures * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 /* 24 * regcmp implementation 25 */ 26 27 #include <ast.h> 28 #include <libgen.h> 29 #include <regex.h> 30 #include <align.h> 31 32 #define INC (2*1024) 33 #define TOT (16*1024) 34 #define SUB 10 35 36 typedef struct 37 { 38 char* cur; 39 regex_t re; 40 unsigned char sub[SUB]; 41 int nsub; 42 size_t size; 43 char buf[ALIGN_BOUND2]; 44 } Regex_t; 45 46 __DEFINE__(char*, __loc1, 0); 47 48 static void* 49 block(void* handle, void* data, size_t size) 50 { 51 register Regex_t* re = (Regex_t*)handle; 52 53 if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur)) 54 return 0; 55 data = (void*)re->cur; 56 re->cur += size; 57 return data; 58 } 59 60 char* 61 regcmp(const char* pattern, ...) 62 { 63 register char* s; 64 register Regex_t* re; 65 register size_t n; 66 register int c; 67 register int p; 68 int b; 69 int i; 70 int j; 71 int nsub; 72 register Sfio_t* sp; 73 unsigned char paren[128]; 74 unsigned char sub[SUB]; 75 va_list ap; 76 77 va_start(ap, pattern); 78 if (!pattern || !*pattern || !(sp = sfstropen())) 79 return 0; 80 memset(paren, 0, sizeof(paren)); 81 n = 0; 82 p = -1; 83 b = 0; 84 nsub = 0; 85 s = (char*)pattern; 86 do 87 { 88 while (c = *s++) 89 { 90 if (c == '\\') 91 { 92 sfputc(sp, c); 93 if (!(c = *s++)) 94 break; 95 } 96 else if (b) 97 { 98 if (c == ']') 99 b = 0; 100 } 101 else if (c == '[') 102 { 103 b = 1; 104 if (*s == '^') 105 { 106 sfputc(sp, c); 107 c = *s++; 108 } 109 if (*s == ']') 110 { 111 sfputc(sp, c); 112 c = *s++; 113 } 114 } 115 else if (c == '(') 116 { 117 /* 118 * someone explain in one sentence why 119 * a cast is needed to make this work 120 */ 121 122 if (p < (int)(elementsof(paren) - 1)) 123 p++; 124 paren[p] = ++n; 125 } 126 else if (c == ')' && p >= 0) 127 { 128 for (i = p; i > 0; i--) 129 if (paren[i]) 130 break; 131 if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9') 132 { 133 s += 2; 134 j -= '0'; 135 if (nsub <= j) 136 { 137 if (!nsub) 138 memset(sub, 0, sizeof(sub)); 139 nsub = j + 1; 140 } 141 sub[j] = paren[i] + 1; 142 } 143 paren[i] = 0; 144 } 145 sfputc(sp, c); 146 } 147 } while (s = va_arg(ap, char*)); 148 va_end(ap); 149 if (!(s = sfstruse(sp))) 150 { 151 sfstrclose(sp); 152 return 0; 153 } 154 re = 0; 155 n = 0; 156 do 157 { 158 if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n))) 159 { 160 if (re) 161 free(re); 162 sfstrclose(sp); 163 return 0; 164 } 165 re->cur = re->buf; 166 re->size = n + ALIGN_BOUND2 - sizeof(Regex_t); 167 regalloc(re, block, REG_NOFREE); 168 c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL); 169 regalloc(NiL, NiL, 0); 170 } while (c == REG_ESPACE); 171 sfstrclose(sp); 172 if (c) 173 { 174 free(re); 175 return 0; 176 } 177 if (re->nsub = nsub) 178 memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0])); 179 return (char*)re; 180 } 181 182 char* 183 regex(const char* handle, const char* subject, ...) 184 { 185 register Regex_t* re; 186 register int n; 187 register int i; 188 register int k; 189 char* sub[SUB + 1]; 190 regmatch_t match[SUB + 1]; 191 va_list ap; 192 193 va_start(ap, subject); 194 if (!(re = (Regex_t*)handle) || !subject) 195 return 0; 196 for (n = 0; n < re->nsub; n++) 197 sub[n] = va_arg(ap, char*); 198 va_end(ap); 199 if (regexec(&re->re, subject, SUB + 1, match, 0)) 200 return 0; 201 for (n = 0; n < re->nsub; n++) 202 if (i = re->sub[n]) 203 { 204 i--; 205 k = match[i].rm_eo - match[i].rm_so; 206 strncpy(sub[n], subject + match[i].rm_so, k); 207 *(sub[n] + k) = 0; 208 } 209 __loc1 = (char*)subject + match[0].rm_so; 210 return (char*)subject + match[0].rm_eo; 211 } 212