1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2009 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * regex library interface 26 */ 27 28 #ifdef _AST_STD_I 29 #define _REGEX_H -1 30 #define regex_t int 31 #define regmatch_t int 32 #endif 33 #ifndef _REGEX_H 34 #define _REGEX_H 1 35 #undef regex_t 36 #undef regmatch_t 37 38 #include <ast_common.h> 39 40 #define REG_VERSION 20030916L 41 42 /* regcomp flags */ 43 44 #define REG_AUGMENTED 0x00000001 /* enable ! & < > */ 45 #define REG_EXTENDED 0x00000002 /* enable ( | ) */ 46 #define REG_ICASE 0x00000004 /* ignore case in match */ 47 #define REG_NEWLINE 0x00000008 /* ^/$ match embedded \n */ 48 #define REG_NOSUB 0x00000010 /* don't report subexp matches */ 49 #define REG_SHELL 0x00000020 /* shell pattern syntax */ 50 51 /* nonstandard regcomp flags */ 52 53 #define REG_LEFT 0x00000100 /* implicit ^... */ 54 #define REG_LITERAL 0x00000200 /* no operators */ 55 #define REG_MINIMAL 0x00000400 /* minimal match */ 56 #define REG_NULL 0x00000800 /* allow null patterns */ 57 #define REG_RIGHT 0x00001000 /* implicit ...$ */ 58 #define REG_LENIENT 0x00002000 /* look the other way */ 59 #define REG_ESCAPE 0x00004000 /* \ escapes delimiter in [...] */ 60 #define REG_FIRST 0x00008000 /* first match found will do */ 61 #define REG_MULTIPLE 0x00010000 /* multiple \n sep patterns */ 62 #define REG_DISCIPLINE 0x00020000 /* regex_t.re_disc is valid */ 63 #define REG_SPAN 0x00040000 /* . matches \n */ 64 #define REG_COMMENT 0x00080000 /* ignore pattern space & #...\n*/ 65 #define REG_MULTIREF 0x00100000 /* multiple digit backrefs */ 66 #define REG_MUSTDELIM 0x08000000 /* all delimiters required */ 67 #define REG_DELIMITED 0x10000000 /* pattern[0] is delimiter */ 68 #define REG_SHELL_GROUP 0x20000000 /* (|&) inside [@|&](...) only */ 69 70 #define REG_SHELL_DOT 0x00200000 /* explicit leading . match */ 71 #define REG_SHELL_ESCAPED 0x00400000 /* \ not special */ 72 #define REG_SHELL_PATH 0x00800000 /* explicit / match */ 73 74 #define REG_REGEXP 0x40000000 /* <regexp.h> compatibility */ 75 76 /* regexec flags */ 77 78 #define REG_NOTBOL 0x00000040 /* ^ is not a special char */ 79 #define REG_NOTEOL 0x00000080 /* $ is not a special char */ 80 81 /* nonstandard regexec flags */ 82 83 #define REG_INVERT 0x01000000 /* invert regrexec match sense */ 84 #define REG_STARTEND 0x02000000 /* subject==match[0].rm_{so,eo} */ 85 #define REG_ADVANCE 0x04000000 /* advance match[0].rm_{so,eo} */ 86 87 /* regalloc flags */ 88 89 #define REG_NOFREE 0x00000001 /* don't free */ 90 91 /* regsub flags */ 92 93 #define REG_SUB_ALL 0x00000001 /* substitute all occurrences */ 94 #define REG_SUB_LOWER 0x00000002 /* substitute to lower case */ 95 #define REG_SUB_UPPER 0x00000004 /* substitute to upper case */ 96 #define REG_SUB_PRINT 0x00000010 /* internal no-op */ 97 #define REG_SUB_NUMBER 0x00000020 /* internal no-op */ 98 #define REG_SUB_STOP 0x00000040 /* internal no-op */ 99 #define REG_SUB_WRITE 0x00000080 /* internal no-op */ 100 #define REG_SUB_LAST 0x00000100 /* last substitution option */ 101 #define REG_SUB_FULL 0x00000200 /* fully delimited */ 102 #define REG_SUB_USER 0x00001000 /* first user flag bit */ 103 104 /* regex error codes */ 105 106 #define REG_ENOSYS (-1) /* not supported */ 107 #define REG_NOMATCH 1 /* regexec didn't match */ 108 #define REG_BADPAT 2 /* invalid regular expression */ 109 #define REG_ECOLLATE 3 /* invalid collation element */ 110 #define REG_ECTYPE 4 /* invalid character class */ 111 #define REG_EESCAPE 5 /* trailing \ in pattern */ 112 #define REG_ESUBREG 6 /* invalid \digit backreference */ 113 #define REG_EBRACK 7 /* [...] imbalance */ 114 #define REG_EPAREN 8 /* \(...\) or (...) imbalance */ 115 #define REG_EBRACE 9 /* \{...\} or {...} imbalance */ 116 #define REG_BADBR 10 /* invalid {...} digits */ 117 #define REG_ERANGE 11 /* invalid [...] range endpoint */ 118 #define REG_ESPACE 12 /* out of space */ 119 #define REG_BADRPT 13 /* unary op not preceeded by re */ 120 #define REG_ENULL 14 /* empty subexpr in pattern */ 121 #define REG_ECOUNT 15 /* re component count overflow */ 122 #define REG_BADESC 16 /* invalid \char escape */ 123 #define REG_VERSIONID 17 /* version id (pseudo error) */ 124 #define REG_EFLAGS 18 /* flags conflict */ 125 #define REG_EDELIM 19 /* invalid or omitted delimiter */ 126 #define REG_PANIC 20 /* unrecoverable internal error */ 127 128 struct regex_s; typedef struct regex_s regex_t; 129 struct regdisc_s; typedef struct regdisc_s regdisc_t; 130 131 typedef int (*regclass_t)(int); 132 typedef int32_t regflags_t; 133 typedef int regoff_t; 134 typedef int (*regerror_t)(const regex_t*, regdisc_t*, int, ...); 135 typedef void* (*regcomp_t)(const regex_t*, const char*, size_t, regdisc_t*); 136 typedef int (*regexec_t)(const regex_t*, void*, const char*, size_t, const char*, size_t, char**, regdisc_t*); 137 typedef void* (*regresize_t)(void*, void*, size_t); 138 typedef int (*regrecord_t)(void*, const char*, size_t); 139 140 typedef struct regmatch_s 141 { 142 regoff_t rm_so; /* offset of start */ 143 regoff_t rm_eo; /* offset of end */ 144 } regmatch_t; 145 146 typedef struct regsub_s 147 { 148 regflags_t re_flags; /* regsubcomp() flags */ 149 char* re_buf; /* regsubexec() output buffer */ 150 size_t re_len; /* re_buf length */ 151 int re_min; /* regsubcomp() min matches */ 152 #ifdef _REG_SUB_PRIVATE_ 153 _REG_SUB_PRIVATE_ 154 #endif 155 } regsub_t; 156 157 struct regdisc_s 158 { 159 unsigned long re_version; /* discipline version */ 160 regflags_t re_flags; /* discipline flags */ 161 regerror_t re_errorf; /* error function */ 162 int re_errorlevel; /* errorf level */ 163 regresize_t re_resizef; /* alloc/free function */ 164 void* re_resizehandle;/* resizef handle */ 165 regcomp_t re_compf; /* (?{...}) compile function */ 166 regexec_t re_execf; /* (?{...}) execute function */ 167 unsigned char* re_map; /* external to native ccode map */ 168 }; 169 170 typedef struct regstat_s 171 { 172 regflags_t re_flags; /* REG_LEFT|REG_RIGHT */ 173 ssize_t re_min; /* min anchored match length */ 174 ssize_t re_max; /* max anchored match length */ 175 ssize_t re_record; /* regrexec() match length */ 176 } regstat_t; 177 178 struct regex_s 179 { 180 size_t re_nsub; /* number of subexpressions */ 181 struct reglib_s*re_info; /* library private info */ 182 size_t re_npat; /* number of pattern chars used */ 183 regdisc_t* re_disc; /* REG_DISCIPLINE discipline */ 184 regsub_t* re_sub; /* regsubcomp() data */ 185 }; 186 187 #define reginit(disc) (memset(disc,0,sizeof(*(disc))),(disc)->re_version=REG_VERSION) 188 189 #if _BLD_ast && defined(__EXPORT__) 190 #define extern __EXPORT__ 191 #endif 192 193 extern int regcomp(regex_t*, const char*, regflags_t); 194 extern size_t regerror(int, const regex_t*, char*, size_t); 195 extern int regexec(const regex_t*, const char*, size_t, regmatch_t*, regflags_t); 196 extern void regfree(regex_t*); 197 198 /* nonstandard hooks */ 199 200 #define _REG_cache 1 /* have regcache() */ 201 #define _REG_class 1 /* have regclass() */ 202 #define _REG_collate 1 /* have regcollate(), regclass() */ 203 #define _REG_comb 1 /* have regcomb() */ 204 #define _REG_decomp 1 /* have regdecomp() */ 205 #define _REG_dup 1 /* have regdup() */ 206 #define _REG_fatal 1 /* have regfatal(), regfatalpat() */ 207 #define _REG_ncomp 1 /* have regncomp() */ 208 #define _REG_nexec 1 /* have regnexec() */ 209 #define _REG_rexec 1 /* have regrexec(), regrecord() */ 210 #define _REG_stat 1 /* have regstat() */ 211 #define _REG_subcomp 1 /* have regsubcomp(), regsubexec() */ 212 213 extern regclass_t regclass(const char*, char**); 214 extern int regaddclass(const char*, regclass_t); 215 extern int regcollate(const char*, char**, char*, int); 216 extern int regcomb(regex_t*, regex_t*); 217 extern size_t regdecomp(regex_t*, regflags_t, char*, size_t); 218 extern int regdup(regex_t*, regex_t*); 219 extern int regncomp(regex_t*, const char*, size_t, regflags_t); 220 extern int regnexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t); 221 extern void regfatal(regex_t*, int, int); 222 extern void regfatalpat(regex_t*, int, int, const char*); 223 extern int regrecord(const regex_t*); 224 extern int regrexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t, int, void*, regrecord_t); 225 extern regstat_t* regstat(const regex_t*); 226 227 extern regex_t* regcache(const char*, regflags_t, int*); 228 229 extern int regsubcomp(regex_t*, const char*, const regflags_t*, int, regflags_t); 230 extern int regsubexec(const regex_t*, const char*, size_t, regmatch_t*); 231 extern int regsubflags(regex_t*, const char*, char**, int, const regflags_t*, int*, regflags_t*); 232 extern void regsubfree(regex_t*); 233 234 /* obsolete hooks */ 235 236 #ifndef _SFIO_H 237 struct _sfio_s; 238 #endif 239 240 extern void regalloc(void*, regresize_t, regflags_t); 241 extern int regsub(const regex_t*, struct _sfio_s*, const char*, const char*, size_t, regmatch_t*, regflags_t); 242 243 #undef extern 244 245 #endif 246