xref: /titanic_51/usr/src/lib/libast/common/include/regex.h (revision 29493bd8e037cbaea9095b34172305abb589cb6b)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * regex library interface
26  */
27 
28 #ifdef	_AST_STD_I
29 #define _REGEX_H	-1
30 #define regex_t		int
31 #define regmatch_t	int
32 #endif
33 #ifndef _REGEX_H
34 #define _REGEX_H	1
35 #undef	regex_t
36 #undef	regmatch_t
37 
38 #include <ast_common.h>
39 
40 #define REG_VERSION	20030916L
41 
42 /* regcomp flags */
43 
44 #define REG_AUGMENTED	0x00000001	/* enable ! & < >		*/
45 #define REG_EXTENDED	0x00000002	/* enable ( | )			*/
46 #define REG_ICASE	0x00000004	/* ignore case in match		*/
47 #define REG_NEWLINE	0x00000008	/* ^/$ match embedded \n	*/
48 #define REG_NOSUB	0x00000010	/* don't report subexp matches	*/
49 #define REG_SHELL	0x00000020	/* shell pattern syntax		*/
50 
51 /* nonstandard regcomp flags */
52 
53 #define REG_LEFT	0x00000100	/* implicit ^...		*/
54 #define REG_LITERAL	0x00000200	/* no operators			*/
55 #define REG_MINIMAL	0x00000400	/* minimal match		*/
56 #define REG_NULL	0x00000800	/* allow null patterns		*/
57 #define REG_RIGHT	0x00001000	/* implicit ...$		*/
58 #define REG_LENIENT	0x00002000	/* look the other way		*/
59 #define REG_ESCAPE	0x00004000	/* \ escapes delimiter in [...]	*/
60 #define REG_FIRST	0x00008000	/* first match found will do	*/
61 #define REG_MULTIPLE	0x00010000	/* multiple \n sep patterns	*/
62 #define REG_DISCIPLINE	0x00020000	/* regex_t.re_disc is valid	*/
63 #define REG_SPAN	0x00040000	/* . matches \n			*/
64 #define REG_COMMENT	0x00080000	/* ignore pattern space & #...\n*/
65 #define REG_MULTIREF	0x00100000	/* multiple digit backrefs	*/
66 #define REG_MUSTDELIM	0x08000000	/* all delimiters required	*/
67 #define REG_DELIMITED	0x10000000	/* pattern[0] is delimiter	*/
68 #define REG_SHELL_GROUP	0x20000000	/* (|&) inside [@|&](...) only	*/
69 
70 #define REG_SHELL_DOT	0x00200000	/* explicit leading . match	*/
71 #define REG_SHELL_ESCAPED 0x00400000	/* \ not special		*/
72 #define REG_SHELL_PATH	0x00800000	/* explicit / match		*/
73 
74 /* regexec flags */
75 
76 #define REG_NOTBOL	0x00000040	/* ^ is not a special char	*/
77 #define REG_NOTEOL	0x00000080	/* $ is not a special char	*/
78 
79 /* nonstandard regexec flags */
80 
81 #define REG_INVERT	0x01000000	/* invert regrexec match sense	*/
82 #define REG_STARTEND	0x02000000	/* subject==match[0].rm_{so,eo} */
83 #define REG_ADVANCE	0x04000000	/* advance match[0].rm_{so,eo}	*/
84 
85 /* regalloc flags */
86 
87 #define REG_NOFREE	0x00000001	/* don't free			*/
88 
89 /* regsub flags */
90 
91 #define REG_SUB_ALL	0x00000001	/* substitute all occurrences	*/
92 #define REG_SUB_LOWER	0x00000002	/* substitute to lower case	*/
93 #define REG_SUB_UPPER	0x00000004	/* substitute to upper case	*/
94 #define REG_SUB_PRINT	0x00000010	/* internal no-op		*/
95 #define REG_SUB_NUMBER	0x00000020	/* internal no-op		*/
96 #define REG_SUB_STOP	0x00000040	/* internal no-op		*/
97 #define REG_SUB_WRITE	0x00000080	/* internal no-op		*/
98 #define REG_SUB_LAST	0x00000100	/* last substitution option	*/
99 #define REG_SUB_FULL	0x00000200	/* fully delimited		*/
100 #define REG_SUB_USER	0x00001000	/* first user flag bit		*/
101 
102 /* regex error codes */
103 
104 #define REG_ENOSYS	(-1)		/* not supported		*/
105 #define REG_NOMATCH	1		/* regexec didn't match		*/
106 #define REG_BADPAT	2		/* invalid regular expression	*/
107 #define REG_ECOLLATE	3		/* invalid collation element	*/
108 #define REG_ECTYPE	4		/* invalid character class	*/
109 #define REG_EESCAPE	5		/* trailing \ in pattern	*/
110 #define REG_ESUBREG	6		/* invalid \digit backreference	*/
111 #define REG_EBRACK	7		/* [...] imbalance		*/
112 #define REG_EPAREN	8		/* \(...\) or (...) imbalance	*/
113 #define REG_EBRACE	9		/* \{...\} or {...} imbalance	*/
114 #define REG_BADBR	10		/* invalid {...} digits		*/
115 #define REG_ERANGE	11		/* invalid [...] range endpoint	*/
116 #define REG_ESPACE	12		/* out of space			*/
117 #define REG_BADRPT	13		/* unary op not preceeded by re	*/
118 #define REG_ENULL	14		/* empty subexpr in pattern	*/
119 #define REG_ECOUNT	15		/* re component count overflow	*/
120 #define REG_BADESC	16		/* invalid \char escape		*/
121 #define REG_VERSIONID	17		/* version id (pseudo error)	*/
122 #define REG_EFLAGS	18		/* flags conflict		*/
123 #define REG_EDELIM	19		/* invalid or omitted delimiter	*/
124 #define REG_PANIC	20		/* unrecoverable internal error	*/
125 
126 struct regex_s; typedef struct regex_s regex_t;
127 struct regdisc_s; typedef struct regdisc_s regdisc_t;
128 
129 typedef int (*regclass_t)(int);
130 typedef int32_t regflags_t;
131 typedef int regoff_t;
132 typedef int (*regerror_t)(const regex_t*, regdisc_t*, int, ...);
133 typedef void* (*regcomp_t)(const regex_t*, const char*, size_t, regdisc_t*);
134 typedef int (*regexec_t)(const regex_t*, void*, const char*, size_t, const char*, size_t, char**, regdisc_t*);
135 typedef void* (*regresize_t)(void*, void*, size_t);
136 typedef int (*regrecord_t)(void*, const char*, size_t);
137 
138 typedef struct regmatch_s
139 {
140 	regoff_t	rm_so;		/* offset of start		*/
141 	regoff_t	rm_eo;		/* offset of end		*/
142 } regmatch_t;
143 
144 typedef struct regsub_s
145 {
146 	regflags_t	re_flags;	/* regsubcomp() flags		*/
147 	char*		re_buf;		/* regsubexec() output buffer	*/
148 	size_t		re_len;		/* re_buf length		*/
149 	int		re_min;		/* regsubcomp() min matches	*/
150 #ifdef _REG_SUB_PRIVATE_
151 	_REG_SUB_PRIVATE_
152 #endif
153 } regsub_t;
154 
155 struct regdisc_s
156 {
157 	unsigned long	re_version;	/* discipline version		*/
158 	regflags_t	re_flags;	/* discipline flags		*/
159 	regerror_t	re_errorf;	/* error function		*/
160 	int		re_errorlevel;	/* errorf level			*/
161 	regresize_t	re_resizef;	/* alloc/free function		*/
162 	void*		re_resizehandle;/* resizef handle		*/
163 	regcomp_t	re_compf;	/* (?{...}) compile function	*/
164 	regexec_t	re_execf;	/* (?{...}) execute function	*/
165 	unsigned char*	re_map;		/* external to native ccode map	*/
166 };
167 
168 typedef struct regstat_s
169 {
170 	regflags_t	re_flags;	/* REG_LEFT|REG_RIGHT		*/
171 	ssize_t		re_min;		/* min anchored match length	*/
172 	ssize_t		re_max;		/* max anchored match length	*/
173 	ssize_t		re_record;	/* regrexec() match length	*/
174 } regstat_t;
175 
176 struct regex_s
177 {
178 	size_t		re_nsub;	/* number of subexpressions	*/
179 	struct reglib_s*re_info;	/* library private info		*/
180 	size_t		re_npat;	/* number of pattern chars used	*/
181 	regdisc_t*	re_disc;	/* REG_DISCIPLINE discipline	*/
182 	regsub_t*	re_sub;		/* regsubcomp() data		*/
183 };
184 
185 #define reginit(disc)	(memset(disc,0,sizeof(*(disc))),(disc)->re_version=REG_VERSION)
186 
187 #if _BLD_ast && defined(__EXPORT__)
188 #define extern		__EXPORT__
189 #endif
190 
191 extern int	regcomp(regex_t*, const char*, regflags_t);
192 extern size_t	regerror(int, const regex_t*, char*, size_t);
193 extern int	regexec(const regex_t*, const char*, size_t, regmatch_t*, regflags_t);
194 extern void	regfree(regex_t*);
195 
196 /* nonstandard hooks */
197 
198 #define _REG_cache	1	/* have regcache()			*/
199 #define _REG_class	1	/* have regclass()			*/
200 #define _REG_collate	1	/* have regcollate(), regclass()	*/
201 #define _REG_comb	1	/* have regcomb()			*/
202 #define _REG_decomp	1	/* have regdecomp()			*/
203 #define _REG_dup	1	/* have regdup()			*/
204 #define _REG_fatal	1	/* have regfatal(), regfatalpat()	*/
205 #define _REG_ncomp	1	/* have regncomp()			*/
206 #define _REG_nexec	1	/* have regnexec()			*/
207 #define _REG_rexec	1	/* have regrexec(), regrecord()		*/
208 #define _REG_stat	1	/* have regstat()			*/
209 #define _REG_subcomp	1	/* have regsubcomp(), regsubexec()	*/
210 
211 extern regclass_t regclass(const char*, char**);
212 extern int	regaddclass(const char*, regclass_t);
213 extern int	regcollate(const char*, char**, char*, int);
214 extern int	regcomb(regex_t*, regex_t*);
215 extern size_t	regdecomp(regex_t*, regflags_t, char*, size_t);
216 extern int	regdup(regex_t*, regex_t*);
217 extern int	regncomp(regex_t*, const char*, size_t, regflags_t);
218 extern int	regnexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t);
219 extern void	regfatal(regex_t*, int, int);
220 extern void	regfatalpat(regex_t*, int, int, const char*);
221 extern int	regrecord(const regex_t*);
222 extern int	regrexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t, int, void*, regrecord_t);
223 extern regstat_t* regstat(const regex_t*);
224 
225 extern regex_t*	regcache(const char*, regflags_t, int*);
226 
227 extern int	regsubcomp(regex_t*, const char*, const regflags_t*, int, regflags_t);
228 extern int	regsubexec(const regex_t*, const char*, size_t, regmatch_t*);
229 extern int	regsubflags(regex_t*, const char*, char**, int, const regflags_t*, int*, regflags_t*);
230 extern void	regsubfree(regex_t*);
231 
232 /* obsolete hooks */
233 
234 #ifndef _SFIO_H
235 struct _sfio_s;
236 #endif
237 
238 extern void	regalloc(void*, regresize_t, regflags_t);
239 extern int	regsub(const regex_t*, struct _sfio_s*, const char*, const char*, size_t, regmatch_t*, regflags_t);
240 
241 #undef	extern
242 
243 #endif
244