/*********************************************************************** * * * This software is part of the ast package * * Copyright (c) 1985-2009 AT&T Intellectual Property * * and is licensed under the * * Common Public License, Version 1.0 * * by AT&T Intellectual Property * * * * A copy of the License is available at * * http://www.opensource.org/licenses/cpl1.0.txt * * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * * * * Information and Software Systems Research * * AT&T Research * * Florham Park NJ * * * * Glenn Fowler * * David Korn * * Phong Vo * * * ***********************************************************************/ #pragma prototyped /* * regcmp implementation */ #include #include #include #include #define INC (2*1024) #define TOT (16*1024) #define SUB 10 typedef struct { char* cur; regex_t re; unsigned char sub[SUB]; int nsub; size_t size; char buf[ALIGN_BOUND2]; } Regex_t; __DEFINE__(char*, __loc1, 0); static void* block(void* handle, void* data, size_t size) { register Regex_t* re = (Regex_t*)handle; if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur)) return 0; data = (void*)re->cur; re->cur += size; return data; } char* regcmp(const char* pattern, ...) { register char* s; register Regex_t* re; register size_t n; register int c; register int p; int b; int i; int j; int nsub; register Sfio_t* sp; unsigned char paren[128]; unsigned char sub[SUB]; va_list ap; va_start(ap, pattern); if (!pattern || !*pattern || !(sp = sfstropen())) return 0; memset(paren, 0, sizeof(paren)); n = 0; p = -1; b = 0; nsub = 0; s = (char*)pattern; do { while (c = *s++) { if (c == '\\') { sfputc(sp, c); if (!(c = *s++)) break; } else if (b) { if (c == ']') b = 0; } else if (c == '[') { b = 1; if (*s == '^') { sfputc(sp, c); c = *s++; } if (*s == ']') { sfputc(sp, c); c = *s++; } } else if (c == '(') { /* * someone explain in one sentence why * a cast is needed to make this work */ if (p < (int)(elementsof(paren) - 1)) p++; paren[p] = ++n; } else if (c == ')' && p >= 0) { for (i = p; i > 0; i--) if (paren[i]) break; if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9') { s += 2; j -= '0'; if (nsub <= j) { if (!nsub) memset(sub, 0, sizeof(sub)); nsub = j + 1; } sub[j] = paren[i] + 1; } paren[i] = 0; } sfputc(sp, c); } } while (s = va_arg(ap, char*)); va_end(ap); if (!(s = sfstruse(sp))) { sfstrclose(sp); return 0; } re = 0; n = 0; do { if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n))) { if (re) free(re); sfstrclose(sp); return 0; } re->cur = re->buf; re->size = n + ALIGN_BOUND2 - sizeof(Regex_t); regalloc(re, block, REG_NOFREE); c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL); regalloc(NiL, NiL, 0); } while (c == REG_ESPACE); sfstrclose(sp); if (c) { free(re); return 0; } if (re->nsub = nsub) memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0])); return (char*)re; } char* regex(const char* handle, const char* subject, ...) { register Regex_t* re; register int n; register int i; register int k; char* sub[SUB + 1]; regmatch_t match[SUB + 1]; va_list ap; va_start(ap, subject); if (!(re = (Regex_t*)handle) || !subject) return 0; for (n = 0; n < re->nsub; n++) sub[n] = va_arg(ap, char*); va_end(ap); if (regexec(&re->re, subject, SUB + 1, match, 0)) return 0; for (n = 0; n < re->nsub; n++) if (i = re->sub[n]) { i--; k = match[i].rm_eo - match[i].rm_so; strncpy(sub[n], subject + match[i].rm_so, k); *(sub[n] + k) = 0; } __loc1 = (char*)subject + match[0].rm_so; return (char*)subject + match[0].rm_eo; }