xref: /titanic_44/usr/src/cmd/expr/compile.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 1995-2003 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*
28*7c478bd9Sstevel@tonic-gate  * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
29*7c478bd9Sstevel@tonic-gate  *	using regcomp(3c), regexec(3c) interfaces. This is an XCU4
30*7c478bd9Sstevel@tonic-gate  *	porting aid. switches out to libgen compile/step if collation
31*7c478bd9Sstevel@tonic-gate  *	table not present.
32*7c478bd9Sstevel@tonic-gate  *
33*7c478bd9Sstevel@tonic-gate  *	Goal is to work with vi and sed/ed.
34*7c478bd9Sstevel@tonic-gate  * 	Returns expbuf in dhl format (encoding of first two bytes).
35*7c478bd9Sstevel@tonic-gate  * 	Note also that this is profoundly single threaded.  You
36*7c478bd9Sstevel@tonic-gate  *	cannot call compile twice with two separate search strings
37*7c478bd9Sstevel@tonic-gate  *	because the second call will wipe out the earlier stored string.
38*7c478bd9Sstevel@tonic-gate  *	This must be fixed, plus a general cleanup should be performed
39*7c478bd9Sstevel@tonic-gate  *	if this is to be integrated into libc.
40*7c478bd9Sstevel@tonic-gate  *
41*7c478bd9Sstevel@tonic-gate  */
42*7c478bd9Sstevel@tonic-gate 
43*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
44*7c478bd9Sstevel@tonic-gate 
45*7c478bd9Sstevel@tonic-gate #include <stdio.h>
46*7c478bd9Sstevel@tonic-gate #include <widec.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
48*7c478bd9Sstevel@tonic-gate #include <regex.h>
49*7c478bd9Sstevel@tonic-gate #include <locale.h>
50*7c478bd9Sstevel@tonic-gate #include <stdlib.h>
51*7c478bd9Sstevel@tonic-gate #include <locale.h>
52*7c478bd9Sstevel@tonic-gate #include <string.h>
53*7c478bd9Sstevel@tonic-gate #include <unistd.h>
54*7c478bd9Sstevel@tonic-gate #include <regexpr.h>
55*7c478bd9Sstevel@tonic-gate 
56*7c478bd9Sstevel@tonic-gate /*
57*7c478bd9Sstevel@tonic-gate  * psuedo compile/step/advance global variables
58*7c478bd9Sstevel@tonic-gate  */
59*7c478bd9Sstevel@tonic-gate extern int nbra;
60*7c478bd9Sstevel@tonic-gate extern char *locs; 		/* for stopping execess recursion */
61*7c478bd9Sstevel@tonic-gate extern char *loc1;  		/* 1st character which matched RE */
62*7c478bd9Sstevel@tonic-gate extern char *loc2; 		/* char after lst char in matched RE */
63*7c478bd9Sstevel@tonic-gate extern char *braslist[]; 	/* start of nbra subexp  */
64*7c478bd9Sstevel@tonic-gate extern char *braelist[]; 	/* end of nbra subexp    */
65*7c478bd9Sstevel@tonic-gate extern int regerrno;
66*7c478bd9Sstevel@tonic-gate extern int reglength;
67*7c478bd9Sstevel@tonic-gate 
68*7c478bd9Sstevel@tonic-gate int regcomp_flags;		/* interface to specify cflags for regcomp */
69*7c478bd9Sstevel@tonic-gate 
70*7c478bd9Sstevel@tonic-gate void regex_comp_free(void *a);
71*7c478bd9Sstevel@tonic-gate static int dhl_step(const char *str, const char *ep);
72*7c478bd9Sstevel@tonic-gate static int dhl_advance(const char *str, const char *ep);
73*7c478bd9Sstevel@tonic-gate static int map_errnos(int);		/* Convert regcomp error */
74*7c478bd9Sstevel@tonic-gate static int dhl_doit(const char *, const regex_t *, const int flags);
75*7c478bd9Sstevel@tonic-gate static char * dhl_compile(const char *instr, char *ep, char *endbuf);
76*7c478bd9Sstevel@tonic-gate 
77*7c478bd9Sstevel@tonic-gate /*
78*7c478bd9Sstevel@tonic-gate  * # of sub re's: NOTE: For now limit on bra list defined here
79*7c478bd9Sstevel@tonic-gate  * but fix is to add maxbra define to to regex.h
80*7c478bd9Sstevel@tonic-gate  * One problem is that a bigger number is a performance hit since
81*7c478bd9Sstevel@tonic-gate  * regexec() has a slow initialization loop that goes around SEPSIZE times
82*7c478bd9Sstevel@tonic-gate  */
83*7c478bd9Sstevel@tonic-gate #define	SEPSIZE 20
84*7c478bd9Sstevel@tonic-gate static regmatch_t rm[SEPSIZE];		/* ptr to list of RE matches */
85*7c478bd9Sstevel@tonic-gate 
86*7c478bd9Sstevel@tonic-gate /*
87*7c478bd9Sstevel@tonic-gate  * Structure to contain dl encoded first two bytes for vi, plus hold two
88*7c478bd9Sstevel@tonic-gate  * regex structures, one for advance and one for step.
89*7c478bd9Sstevel@tonic-gate  */
90*7c478bd9Sstevel@tonic-gate static struct regex_comp {
91*7c478bd9Sstevel@tonic-gate 	char 	r_head[2];		/* Header for DL encoding for vi */
92*7c478bd9Sstevel@tonic-gate 	regex_t r_stp;			/* For use by step */
93*7c478bd9Sstevel@tonic-gate 	regex_t r_adv;			/* For use by advance */
94*7c478bd9Sstevel@tonic-gate } reg_comp;
95*7c478bd9Sstevel@tonic-gate 
96*7c478bd9Sstevel@tonic-gate /*
97*7c478bd9Sstevel@tonic-gate  * global value for the size of a regex_comp structure:
98*7c478bd9Sstevel@tonic-gate  */
99*7c478bd9Sstevel@tonic-gate size_t regexc_size = sizeof (reg_comp);
100*7c478bd9Sstevel@tonic-gate 
101*7c478bd9Sstevel@tonic-gate 
102*7c478bd9Sstevel@tonic-gate char *
compile(const char * instr,char * expbuf,char * endbuf)103*7c478bd9Sstevel@tonic-gate compile(const char *instr, char *expbuf, char *endbuf)
104*7c478bd9Sstevel@tonic-gate {
105*7c478bd9Sstevel@tonic-gate 	return (dhl_compile(instr, expbuf, endbuf));
106*7c478bd9Sstevel@tonic-gate }
107*7c478bd9Sstevel@tonic-gate 
108*7c478bd9Sstevel@tonic-gate int
step(const char * instr,const char * expbuf)109*7c478bd9Sstevel@tonic-gate step(const char *instr, const char *expbuf)
110*7c478bd9Sstevel@tonic-gate {
111*7c478bd9Sstevel@tonic-gate 	return (dhl_step(instr, expbuf));
112*7c478bd9Sstevel@tonic-gate }
113*7c478bd9Sstevel@tonic-gate 
114*7c478bd9Sstevel@tonic-gate int
advance(const char * instr,const char * expbuf)115*7c478bd9Sstevel@tonic-gate advance(const char *instr, const char *expbuf)
116*7c478bd9Sstevel@tonic-gate {
117*7c478bd9Sstevel@tonic-gate 	return (dhl_advance(instr, expbuf));
118*7c478bd9Sstevel@tonic-gate }
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate 
121*7c478bd9Sstevel@tonic-gate /*
122*7c478bd9Sstevel@tonic-gate  * the compile and step routines here simulate the old libgen routines of
123*7c478bd9Sstevel@tonic-gate  * compile/step Re: regexpr(3G). in order to do this, we must assume
124*7c478bd9Sstevel@tonic-gate  * that expbuf[] consists of the following format:
125*7c478bd9Sstevel@tonic-gate  *	1) the first two bytes consist of a special encoding - see below.
126*7c478bd9Sstevel@tonic-gate  *	2) the next part is a regex_t used by regexec()/regcomp() for step
127*7c478bd9Sstevel@tonic-gate  *	3) the final part is a regex_t used by regexec()/regcomp() for advance
128*7c478bd9Sstevel@tonic-gate  *
129*7c478bd9Sstevel@tonic-gate  * the special encoding of the first two bytes is referenced throughout
130*7c478bd9Sstevel@tonic-gate  * vi. apparently expbuf[0] is set to:
131*7c478bd9Sstevel@tonic-gate  *	= 0 upon initialization
132*7c478bd9Sstevel@tonic-gate  *	= 1 if the first char of the RE is a ^
133*7c478bd9Sstevel@tonic-gate  *	= 0 if the first char of the RE isn't a ^
134*7c478bd9Sstevel@tonic-gate  * and expbuf[1-35+]	= bitmap of the type of RE chars in the expression.
135*7c478bd9Sstevel@tonic-gate  * this is apparently 0 if there's no RE.
136*7c478bd9Sstevel@tonic-gate  * Here, we use expbuf[0] in a similar fashion; and expbuf[1] is non-zero
137*7c478bd9Sstevel@tonic-gate  * if there's at least 1 RE in the string.
138*7c478bd9Sstevel@tonic-gate  * I say "apparently" as the code to compile()/step() is poorly written.
139*7c478bd9Sstevel@tonic-gate  */
140*7c478bd9Sstevel@tonic-gate static char *
dhl_compile(instr,expbuf,endbuf)141*7c478bd9Sstevel@tonic-gate dhl_compile(instr, expbuf, endbuf)
142*7c478bd9Sstevel@tonic-gate const char *instr;		/* the regular expression		*/
143*7c478bd9Sstevel@tonic-gate char *expbuf;			/* where the compiled RE gets placed	*/
144*7c478bd9Sstevel@tonic-gate char *endbuf;			/* ending addr of expbuf		*/
145*7c478bd9Sstevel@tonic-gate {
146*7c478bd9Sstevel@tonic-gate 	int rv;
147*7c478bd9Sstevel@tonic-gate 	int alloc = 0;
148*7c478bd9Sstevel@tonic-gate 	char adv_instr[4096];	/* PLENTY big temp buffer */
149*7c478bd9Sstevel@tonic-gate 	char *instrp;		/* PLENTY big temp buffer */
150*7c478bd9Sstevel@tonic-gate 
151*7c478bd9Sstevel@tonic-gate 	if (*instr == (char) NULL) {
152*7c478bd9Sstevel@tonic-gate 		regerrno = 41;
153*7c478bd9Sstevel@tonic-gate 		return (NULL);
154*7c478bd9Sstevel@tonic-gate 	}
155*7c478bd9Sstevel@tonic-gate 
156*7c478bd9Sstevel@tonic-gate 	/*
157*7c478bd9Sstevel@tonic-gate 	 * Check values of expbuf and endbuf
158*7c478bd9Sstevel@tonic-gate 	 */
159*7c478bd9Sstevel@tonic-gate 	if (expbuf == NULL) {
160*7c478bd9Sstevel@tonic-gate 		if ((expbuf = malloc(regexc_size)) == NULL) {
161*7c478bd9Sstevel@tonic-gate 			regerrno = 50;
162*7c478bd9Sstevel@tonic-gate 			return (NULL);
163*7c478bd9Sstevel@tonic-gate 		}
164*7c478bd9Sstevel@tonic-gate 		memset(&reg_comp, 0, regexc_size);
165*7c478bd9Sstevel@tonic-gate 		alloc = 1;
166*7c478bd9Sstevel@tonic-gate 		endbuf = expbuf + regexc_size;
167*7c478bd9Sstevel@tonic-gate 	} else {		/* Check if enough memory was allocated */
168*7c478bd9Sstevel@tonic-gate 		if (expbuf + regexc_size > endbuf) {
169*7c478bd9Sstevel@tonic-gate 			regerrno = 50;
170*7c478bd9Sstevel@tonic-gate 			return (NULL);
171*7c478bd9Sstevel@tonic-gate 		}
172*7c478bd9Sstevel@tonic-gate 		memcpy(&reg_comp, expbuf, regexc_size);
173*7c478bd9Sstevel@tonic-gate 	}
174*7c478bd9Sstevel@tonic-gate 
175*7c478bd9Sstevel@tonic-gate 	/*
176*7c478bd9Sstevel@tonic-gate 	 * Clear global flags
177*7c478bd9Sstevel@tonic-gate 	 */
178*7c478bd9Sstevel@tonic-gate 	nbra = 0;
179*7c478bd9Sstevel@tonic-gate 	regerrno = 0;
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate 	/*
182*7c478bd9Sstevel@tonic-gate 	 * Free any data being held for previous search strings
183*7c478bd9Sstevel@tonic-gate 	 */
184*7c478bd9Sstevel@tonic-gate 	regex_comp_free(&reg_comp);
185*7c478bd9Sstevel@tonic-gate 
186*7c478bd9Sstevel@tonic-gate 	/*
187*7c478bd9Sstevel@tonic-gate 	 * We call regcomp twice, once to get a regex_t for use by step()
188*7c478bd9Sstevel@tonic-gate 	 * and then again with for use by advance()
189*7c478bd9Sstevel@tonic-gate 	 */
190*7c478bd9Sstevel@tonic-gate 	if ((rv = regcomp(&reg_comp.r_stp, instr, regcomp_flags)) != 0) {
191*7c478bd9Sstevel@tonic-gate 		regerrno = map_errnos(rv);	/* Convert regcomp error */
192*7c478bd9Sstevel@tonic-gate 		goto out;
193*7c478bd9Sstevel@tonic-gate 	}
194*7c478bd9Sstevel@tonic-gate 	/*
195*7c478bd9Sstevel@tonic-gate 	 * To support advance, which assumes an implicit ^ to match at start
196*7c478bd9Sstevel@tonic-gate 	 * of line we prepend a ^ to the pattern by copying to a temp buffer
197*7c478bd9Sstevel@tonic-gate 	 */
198*7c478bd9Sstevel@tonic-gate 
199*7c478bd9Sstevel@tonic-gate 	if (instr[0] == '^')
200*7c478bd9Sstevel@tonic-gate 		instrp = (char *) instr; /* String already has leading ^ */
201*7c478bd9Sstevel@tonic-gate 	else {
202*7c478bd9Sstevel@tonic-gate 		adv_instr[0] = '^';
203*7c478bd9Sstevel@tonic-gate 		strncpy(&adv_instr[1], instr, 2048);
204*7c478bd9Sstevel@tonic-gate 		instrp = adv_instr;
205*7c478bd9Sstevel@tonic-gate 	}
206*7c478bd9Sstevel@tonic-gate 
207*7c478bd9Sstevel@tonic-gate 	if ((rv = regcomp(&reg_comp.r_adv, instrp, regcomp_flags)) != 0) {
208*7c478bd9Sstevel@tonic-gate 		regerrno = map_errnos(rv);	/* Convert regcomp error */
209*7c478bd9Sstevel@tonic-gate 		goto out;
210*7c478bd9Sstevel@tonic-gate 	}
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate 	/*
213*7c478bd9Sstevel@tonic-gate 	 * update global variables
214*7c478bd9Sstevel@tonic-gate 	 */
215*7c478bd9Sstevel@tonic-gate 	nbra = (int) reg_comp.r_adv.re_nsub > 0 ?
216*7c478bd9Sstevel@tonic-gate 	    (int) reg_comp.r_adv.re_nsub : 0;
217*7c478bd9Sstevel@tonic-gate 	regerrno = 0;
218*7c478bd9Sstevel@tonic-gate 
219*7c478bd9Sstevel@tonic-gate 	/*
220*7c478bd9Sstevel@tonic-gate 	 * Set the header flags for use by vi
221*7c478bd9Sstevel@tonic-gate 	 */
222*7c478bd9Sstevel@tonic-gate 	if (instr[0] == '^') 		/* if beginning of string,	*/
223*7c478bd9Sstevel@tonic-gate 		reg_comp.r_head[0] = 1;	/* set special flag		*/
224*7c478bd9Sstevel@tonic-gate 	else
225*7c478bd9Sstevel@tonic-gate 		reg_comp.r_head[0] = 0;	/* clear special flag		*/
226*7c478bd9Sstevel@tonic-gate 	/*
227*7c478bd9Sstevel@tonic-gate 	 * note that for a single BRE, nbra will be 0 here.
228*7c478bd9Sstevel@tonic-gate 	 * we're guaranteed that, at this point, a RE has been found.
229*7c478bd9Sstevel@tonic-gate 	 */
230*7c478bd9Sstevel@tonic-gate 	reg_comp.r_head[1] = 1;	/* set special flag		*/
231*7c478bd9Sstevel@tonic-gate 	/*
232*7c478bd9Sstevel@tonic-gate 	 * Copy our reg_comp structure to expbuf
233*7c478bd9Sstevel@tonic-gate 	 */
234*7c478bd9Sstevel@tonic-gate 	(void) memcpy(expbuf, (char *) &reg_comp, regexc_size);
235*7c478bd9Sstevel@tonic-gate 
236*7c478bd9Sstevel@tonic-gate out:
237*7c478bd9Sstevel@tonic-gate 	/*
238*7c478bd9Sstevel@tonic-gate 	 * Return code from libgen regcomp with mods.  Note weird return
239*7c478bd9Sstevel@tonic-gate 	 * value - if space is malloc'd return pointer to start of space,
240*7c478bd9Sstevel@tonic-gate 	 * if user provided his own space, return pointer to 1+last byte
241*7c478bd9Sstevel@tonic-gate 	 * of his space.
242*7c478bd9Sstevel@tonic-gate 	 */
243*7c478bd9Sstevel@tonic-gate 	if (regerrno != 0) {
244*7c478bd9Sstevel@tonic-gate 		if (alloc)
245*7c478bd9Sstevel@tonic-gate 			free(expbuf);
246*7c478bd9Sstevel@tonic-gate 		return (NULL);
247*7c478bd9Sstevel@tonic-gate 	}
248*7c478bd9Sstevel@tonic-gate 	reglength = regexc_size;
249*7c478bd9Sstevel@tonic-gate 
250*7c478bd9Sstevel@tonic-gate 	if (alloc)
251*7c478bd9Sstevel@tonic-gate 		return (expbuf);
252*7c478bd9Sstevel@tonic-gate 	else
253*7c478bd9Sstevel@tonic-gate 		return (expbuf + regexc_size);
254*7c478bd9Sstevel@tonic-gate }
255*7c478bd9Sstevel@tonic-gate 
256*7c478bd9Sstevel@tonic-gate 
257*7c478bd9Sstevel@tonic-gate /*
258*7c478bd9Sstevel@tonic-gate  * dhl_step: step through a string until a RE match is found, or end of str
259*7c478bd9Sstevel@tonic-gate  */
260*7c478bd9Sstevel@tonic-gate static int
dhl_step(str,ep)261*7c478bd9Sstevel@tonic-gate dhl_step(str, ep)
262*7c478bd9Sstevel@tonic-gate const char *str;		/* characters to be checked for a match	*/
263*7c478bd9Sstevel@tonic-gate const char *ep;			/* compiled RE from dhl_compile()	*/
264*7c478bd9Sstevel@tonic-gate {
265*7c478bd9Sstevel@tonic-gate 	/*
266*7c478bd9Sstevel@tonic-gate 	 * Check if we're passed a null ep
267*7c478bd9Sstevel@tonic-gate 	 */
268*7c478bd9Sstevel@tonic-gate 	if (ep == NULL) {
269*7c478bd9Sstevel@tonic-gate 		regerrno = 41;	/* No remembered search string error */
270*7c478bd9Sstevel@tonic-gate 		return (0);
271*7c478bd9Sstevel@tonic-gate 	}
272*7c478bd9Sstevel@tonic-gate 	/*
273*7c478bd9Sstevel@tonic-gate 	 * Call common routine with r_stp (step) structure
274*7c478bd9Sstevel@tonic-gate 	 */
275*7c478bd9Sstevel@tonic-gate 	return (dhl_doit(str, &(((struct regex_comp *) ep)->r_stp),
276*7c478bd9Sstevel@tonic-gate 	    ((locs != NULL) ? REG_NOTBOL : 0)));
277*7c478bd9Sstevel@tonic-gate }
278*7c478bd9Sstevel@tonic-gate 
279*7c478bd9Sstevel@tonic-gate /*
280*7c478bd9Sstevel@tonic-gate  * dhl_advance: implement advance
281*7c478bd9Sstevel@tonic-gate  */
282*7c478bd9Sstevel@tonic-gate static int
dhl_advance(str,ep)283*7c478bd9Sstevel@tonic-gate dhl_advance(str, ep)
284*7c478bd9Sstevel@tonic-gate const char *str;		/* characters to be checked for a match	*/
285*7c478bd9Sstevel@tonic-gate const char *ep;			/* compiled RE from dhl_compile()	*/
286*7c478bd9Sstevel@tonic-gate {
287*7c478bd9Sstevel@tonic-gate 	int rv;
288*7c478bd9Sstevel@tonic-gate 	/*
289*7c478bd9Sstevel@tonic-gate 	 * Check if we're passed a null ep
290*7c478bd9Sstevel@tonic-gate 	 */
291*7c478bd9Sstevel@tonic-gate 	if (ep == NULL) {
292*7c478bd9Sstevel@tonic-gate 		regerrno = 41;	/* No remembered search string error */
293*7c478bd9Sstevel@tonic-gate 		return (0);
294*7c478bd9Sstevel@tonic-gate 	}
295*7c478bd9Sstevel@tonic-gate 	/*
296*7c478bd9Sstevel@tonic-gate 	 * Call common routine with r_adv (advance) structure
297*7c478bd9Sstevel@tonic-gate 	 */
298*7c478bd9Sstevel@tonic-gate 	rv = dhl_doit(str, &(((struct regex_comp *) ep)->r_adv), 0);
299*7c478bd9Sstevel@tonic-gate 	loc1 = NULL;		/* Clear it per the compile man page */
300*7c478bd9Sstevel@tonic-gate 	return (rv);
301*7c478bd9Sstevel@tonic-gate }
302*7c478bd9Sstevel@tonic-gate 
303*7c478bd9Sstevel@tonic-gate /*
304*7c478bd9Sstevel@tonic-gate  * dhl_doit - common code for step and advance
305*7c478bd9Sstevel@tonic-gate  */
306*7c478bd9Sstevel@tonic-gate static int
dhl_doit(str,rep,flags)307*7c478bd9Sstevel@tonic-gate dhl_doit(str, rep, flags)
308*7c478bd9Sstevel@tonic-gate const char *str;		/* characters to be checked for a match	*/
309*7c478bd9Sstevel@tonic-gate const regex_t *rep;
310*7c478bd9Sstevel@tonic-gate const int flags;		/* flags to be passed to regexec directly */
311*7c478bd9Sstevel@tonic-gate {
312*7c478bd9Sstevel@tonic-gate 	int rv;
313*7c478bd9Sstevel@tonic-gate 	int i;
314*7c478bd9Sstevel@tonic-gate 	regmatch_t *prm;	/* ptr to current regmatch_t		*/
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 	/*
317*7c478bd9Sstevel@tonic-gate 	 * Check if we're passed a null regex_t
318*7c478bd9Sstevel@tonic-gate 	 */
319*7c478bd9Sstevel@tonic-gate 	if (rep == NULL) {
320*7c478bd9Sstevel@tonic-gate 		regerrno = 41;	/* No remembered search string error */
321*7c478bd9Sstevel@tonic-gate 		return (0);
322*7c478bd9Sstevel@tonic-gate 	}
323*7c478bd9Sstevel@tonic-gate 
324*7c478bd9Sstevel@tonic-gate 	regerrno = 0;
325*7c478bd9Sstevel@tonic-gate 	prm = &rm[0];
326*7c478bd9Sstevel@tonic-gate 
327*7c478bd9Sstevel@tonic-gate 	if ((rv = regexec(rep, str, SEPSIZE, prm, flags)) != REG_OK) {
328*7c478bd9Sstevel@tonic-gate 		if (rv == REG_NOMATCH)
329*7c478bd9Sstevel@tonic-gate 			return (0);
330*7c478bd9Sstevel@tonic-gate 		regerrno = map_errnos(rv);
331*7c478bd9Sstevel@tonic-gate 		return (0);
332*7c478bd9Sstevel@tonic-gate 	}
333*7c478bd9Sstevel@tonic-gate 
334*7c478bd9Sstevel@tonic-gate 	loc1 = (char *)str + prm->rm_so;
335*7c478bd9Sstevel@tonic-gate 	loc2 = (char *)str + prm->rm_eo;
336*7c478bd9Sstevel@tonic-gate 
337*7c478bd9Sstevel@tonic-gate 	/*
338*7c478bd9Sstevel@tonic-gate 	 * Now we need to fill up the bra lists with all of the sub re's
339*7c478bd9Sstevel@tonic-gate 	 * Note we subtract nsub -1, and preincrement prm.
340*7c478bd9Sstevel@tonic-gate 	 */
341*7c478bd9Sstevel@tonic-gate 	for (i = 0; i <= rep->re_nsub; i++) {
342*7c478bd9Sstevel@tonic-gate 		prm++;		/* XXX inc past first subexp */
343*7c478bd9Sstevel@tonic-gate 		braslist[i] = (char *)str + prm->rm_so;
344*7c478bd9Sstevel@tonic-gate 		braelist[i] = (char *)str + prm->rm_eo;
345*7c478bd9Sstevel@tonic-gate 		if (i >= SEPSIZE) {
346*7c478bd9Sstevel@tonic-gate 			regerrno = 50; 	/* regex overflow */
347*7c478bd9Sstevel@tonic-gate 			return (0);
348*7c478bd9Sstevel@tonic-gate 		}
349*7c478bd9Sstevel@tonic-gate 	}
350*7c478bd9Sstevel@tonic-gate 
351*7c478bd9Sstevel@tonic-gate 	/*
352*7c478bd9Sstevel@tonic-gate 	 * Inverse logic, a zero from regexec - success, is a 1
353*7c478bd9Sstevel@tonic-gate 	 * from advance/step.
354*7c478bd9Sstevel@tonic-gate 	 */
355*7c478bd9Sstevel@tonic-gate 
356*7c478bd9Sstevel@tonic-gate 	return (rv == 0);
357*7c478bd9Sstevel@tonic-gate }
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate /*
361*7c478bd9Sstevel@tonic-gate  *	regerrno to compile/step error mapping:
362*7c478bd9Sstevel@tonic-gate  *	This is really a big compromise.  Some errors don't map at all
363*7c478bd9Sstevel@tonic-gate  *	like regcomp error 15 is generated by both compile() error types
364*7c478bd9Sstevel@tonic-gate  *  	44 & 46.  So which one should we map to?
365*7c478bd9Sstevel@tonic-gate  *	Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
366*7c478bd9Sstevel@tonic-gate  *	To do your errors right use xregerr() to get the regcomp error
367*7c478bd9Sstevel@tonic-gate  *	string and print that.
368*7c478bd9Sstevel@tonic-gate  *
369*7c478bd9Sstevel@tonic-gate  * |	regcomp/regexec		     | 	Compile/step/advance		    |
370*7c478bd9Sstevel@tonic-gate  * +---------------------------------+--------------------------------------+
371*7c478bd9Sstevel@tonic-gate  * 0 REG_OK	  Pattern matched	1  - Pattern matched
372*7c478bd9Sstevel@tonic-gate  * 1 REG_NOMATCH  No match		0  - Pattern didn't match
373*7c478bd9Sstevel@tonic-gate  * 2 REG_ECOLLATE Bad collation elmnt.	67 - Returned by compile on mbtowc err
374*7c478bd9Sstevel@tonic-gate  * 3 REG_EESCAPE  trailing \ in patrn	45 - } expected after \.
375*7c478bd9Sstevel@tonic-gate  * 4 REG_ENEWLINE \n before end pattrn	36 - Illegal or missing delimiter.
376*7c478bd9Sstevel@tonic-gate  * 5 REG_ENSUB	  Over 9 \( \) pairs 	43 - Too many \(
377*7c478bd9Sstevel@tonic-gate  * 6 REG_ESUBREG  Bad number in \[0-9]  25 - ``\digit'' out of range.
378*7c478bd9Sstevel@tonic-gate  * 7 REG_EBRACK   [ ] inbalance		49 - [ ] imbalance.
379*7c478bd9Sstevel@tonic-gate  * 8 REG_EPAREN   ( ) inbalance         42 - \(~\) imbalance.
380*7c478bd9Sstevel@tonic-gate  * 9 REG_EBRACE   \{ \} inbalance       45 - } expected after \.
381*7c478bd9Sstevel@tonic-gate  * 10 REG_ERANGE  bad range endpoint	11 - Range endpoint too large.
382*7c478bd9Sstevel@tonic-gate  * 11 REG_ESPACE  no memory for pattern 50 - Regular expression overflow.
383*7c478bd9Sstevel@tonic-gate  * 12 REG_BADRPT  invalid repetition	36 - Illegal or missing delimiter.
384*7c478bd9Sstevel@tonic-gate  * 13 REG_ECTYPE  invalid char-class    67 - illegal byte sequence
385*7c478bd9Sstevel@tonic-gate  * 14 REG_BADPAT  syntax error		50 - Regular expression overflow.
386*7c478bd9Sstevel@tonic-gate  * 15 REG_BADBR   \{ \} contents bad	46 - First number exceeds 2nd in \{~\}
387*7c478bd9Sstevel@tonic-gate  * 16 REG_EFATAL  internal error	50 - Regular expression overflow.
388*7c478bd9Sstevel@tonic-gate  * 17 REG_ECHAR   bad mulitbyte char	67 - illegal byte sequence
389*7c478bd9Sstevel@tonic-gate  * 18 REG_STACK   stack overflow	50 - Regular expression overflow.
390*7c478bd9Sstevel@tonic-gate  * 19 REG_ENOSYS  function not supported 50- Regular expression overflow.
391*7c478bd9Sstevel@tonic-gate  *
392*7c478bd9Sstevel@tonic-gate  *	For reference here's the compile/step errno's. We don't generate
393*7c478bd9Sstevel@tonic-gate  *	41 here - it's done earlier, nor 44 since we can't tell if from 46.
394*7c478bd9Sstevel@tonic-gate  *
395*7c478bd9Sstevel@tonic-gate  *	11 - Range endpoint too large.
396*7c478bd9Sstevel@tonic-gate  *	16 - Bad number.
397*7c478bd9Sstevel@tonic-gate  *	25 - ``\digit'' out of range.
398*7c478bd9Sstevel@tonic-gate  *	36 - Illegal or missing delimiter.
399*7c478bd9Sstevel@tonic-gate  *	41 - No remembered search string.
400*7c478bd9Sstevel@tonic-gate  *	42 - \(~\) imbalance.
401*7c478bd9Sstevel@tonic-gate  *	43 - Too many \(.
402*7c478bd9Sstevel@tonic-gate  *	44 - More than 2 numbers given in "\{~\}"
403*7c478bd9Sstevel@tonic-gate  *	45 - } expected after \.
404*7c478bd9Sstevel@tonic-gate  *	46 - First number exceeds 2nd in "\{~\}"
405*7c478bd9Sstevel@tonic-gate  *	49 - [ ] imbalance.
406*7c478bd9Sstevel@tonic-gate  *	50 - Regular expression overflow.
407*7c478bd9Sstevel@tonic-gate  */
408*7c478bd9Sstevel@tonic-gate 
409*7c478bd9Sstevel@tonic-gate static int
map_errnos(int Errno)410*7c478bd9Sstevel@tonic-gate map_errnos(int Errno)
411*7c478bd9Sstevel@tonic-gate {
412*7c478bd9Sstevel@tonic-gate 	switch (Errno) {
413*7c478bd9Sstevel@tonic-gate 	case REG_ECOLLATE:
414*7c478bd9Sstevel@tonic-gate 		regerrno = 67;
415*7c478bd9Sstevel@tonic-gate 		break;
416*7c478bd9Sstevel@tonic-gate 	case REG_EESCAPE:
417*7c478bd9Sstevel@tonic-gate 		regerrno = 45;
418*7c478bd9Sstevel@tonic-gate 		break;
419*7c478bd9Sstevel@tonic-gate 	case REG_ENEWLINE:
420*7c478bd9Sstevel@tonic-gate 		regerrno = 36;
421*7c478bd9Sstevel@tonic-gate 		break;
422*7c478bd9Sstevel@tonic-gate 	case REG_ENSUB:
423*7c478bd9Sstevel@tonic-gate 		regerrno = 43;
424*7c478bd9Sstevel@tonic-gate 		break;
425*7c478bd9Sstevel@tonic-gate 	case REG_ESUBREG:
426*7c478bd9Sstevel@tonic-gate 		regerrno = 25;
427*7c478bd9Sstevel@tonic-gate 		break;
428*7c478bd9Sstevel@tonic-gate 	case REG_EBRACK:
429*7c478bd9Sstevel@tonic-gate 		regerrno = 49;
430*7c478bd9Sstevel@tonic-gate 		break;
431*7c478bd9Sstevel@tonic-gate 	case REG_EPAREN:
432*7c478bd9Sstevel@tonic-gate 		regerrno = 42;
433*7c478bd9Sstevel@tonic-gate 		break;
434*7c478bd9Sstevel@tonic-gate 	case REG_EBRACE:
435*7c478bd9Sstevel@tonic-gate 		regerrno = 45;
436*7c478bd9Sstevel@tonic-gate 		break;
437*7c478bd9Sstevel@tonic-gate 	case REG_ERANGE:
438*7c478bd9Sstevel@tonic-gate 		regerrno = 11;
439*7c478bd9Sstevel@tonic-gate 		break;
440*7c478bd9Sstevel@tonic-gate 	case REG_ESPACE:
441*7c478bd9Sstevel@tonic-gate 		regerrno = 50;
442*7c478bd9Sstevel@tonic-gate 		break;
443*7c478bd9Sstevel@tonic-gate 	case REG_BADRPT:
444*7c478bd9Sstevel@tonic-gate 		regerrno = 36;
445*7c478bd9Sstevel@tonic-gate 		break;
446*7c478bd9Sstevel@tonic-gate 	case REG_ECTYPE:
447*7c478bd9Sstevel@tonic-gate 		regerrno = 67;
448*7c478bd9Sstevel@tonic-gate 		break;
449*7c478bd9Sstevel@tonic-gate 	case REG_BADPAT:
450*7c478bd9Sstevel@tonic-gate 		regerrno = 50;
451*7c478bd9Sstevel@tonic-gate 		break;
452*7c478bd9Sstevel@tonic-gate 	case REG_BADBR:
453*7c478bd9Sstevel@tonic-gate 		regerrno = 46;
454*7c478bd9Sstevel@tonic-gate 		break;
455*7c478bd9Sstevel@tonic-gate 	case REG_EFATAL:
456*7c478bd9Sstevel@tonic-gate 		regerrno = 50;
457*7c478bd9Sstevel@tonic-gate 		break;
458*7c478bd9Sstevel@tonic-gate 	case REG_ECHAR:
459*7c478bd9Sstevel@tonic-gate 		regerrno = 67;
460*7c478bd9Sstevel@tonic-gate 		break;
461*7c478bd9Sstevel@tonic-gate 	case REG_STACK:
462*7c478bd9Sstevel@tonic-gate 		regerrno = 50;
463*7c478bd9Sstevel@tonic-gate 		break;
464*7c478bd9Sstevel@tonic-gate 	case REG_ENOSYS:
465*7c478bd9Sstevel@tonic-gate 		regerrno = 50;
466*7c478bd9Sstevel@tonic-gate 		break;
467*7c478bd9Sstevel@tonic-gate 	default:
468*7c478bd9Sstevel@tonic-gate 		regerrno = 50;
469*7c478bd9Sstevel@tonic-gate 		break;
470*7c478bd9Sstevel@tonic-gate 	}
471*7c478bd9Sstevel@tonic-gate 	return (regerrno);
472*7c478bd9Sstevel@tonic-gate }
473*7c478bd9Sstevel@tonic-gate 
474*7c478bd9Sstevel@tonic-gate /*
475*7c478bd9Sstevel@tonic-gate  *  This is a routine to clean up the subtle substructure of the struct
476*7c478bd9Sstevel@tonic-gate  *  regex_comp type for use by clients of this module.  Since the struct
477*7c478bd9Sstevel@tonic-gate  *  type is private, we use a generic interface, and trust the
478*7c478bd9Sstevel@tonic-gate  *  application to be damn sure that this operation is valid for the
479*7c478bd9Sstevel@tonic-gate  *  named memory.
480*7c478bd9Sstevel@tonic-gate  */
481*7c478bd9Sstevel@tonic-gate 
482*7c478bd9Sstevel@tonic-gate void
regex_comp_free(void * a)483*7c478bd9Sstevel@tonic-gate regex_comp_free(void * a)
484*7c478bd9Sstevel@tonic-gate {
485*7c478bd9Sstevel@tonic-gate 	/*
486*7c478bd9Sstevel@tonic-gate 	 * Free any data being held for previous search strings
487*7c478bd9Sstevel@tonic-gate 	 */
488*7c478bd9Sstevel@tonic-gate 
489*7c478bd9Sstevel@tonic-gate 	if (((struct regex_comp *) a) == NULL) {
490*7c478bd9Sstevel@tonic-gate 		return;
491*7c478bd9Sstevel@tonic-gate 	}
492*7c478bd9Sstevel@tonic-gate 
493*7c478bd9Sstevel@tonic-gate 	regfree(&((struct regex_comp *)a)->r_stp);
494*7c478bd9Sstevel@tonic-gate 	regfree(&((struct regex_comp *)a)->r_adv);
495*7c478bd9Sstevel@tonic-gate }
496