1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate *
4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate * with the License.
8*7c478bd9Sstevel@tonic-gate *
9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate *
14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate *
20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate * Copyright 1995-2003 Sun Microsystems, Inc. All rights reserved.
24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate */
26*7c478bd9Sstevel@tonic-gate
27*7c478bd9Sstevel@tonic-gate /*
28*7c478bd9Sstevel@tonic-gate * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
29*7c478bd9Sstevel@tonic-gate * using regcomp(3c), regexec(3c) interfaces. This is an XCU4
30*7c478bd9Sstevel@tonic-gate * porting aid. switches out to libgen compile/step if collation
31*7c478bd9Sstevel@tonic-gate * table not present.
32*7c478bd9Sstevel@tonic-gate *
33*7c478bd9Sstevel@tonic-gate * Goal is to work with vi and sed/ed.
34*7c478bd9Sstevel@tonic-gate * Returns expbuf in dhl format (encoding of first two bytes).
35*7c478bd9Sstevel@tonic-gate * Note also that this is profoundly single threaded. You
36*7c478bd9Sstevel@tonic-gate * cannot call compile twice with two separate search strings
37*7c478bd9Sstevel@tonic-gate * because the second call will wipe out the earlier stored string.
38*7c478bd9Sstevel@tonic-gate * This must be fixed, plus a general cleanup should be performed
39*7c478bd9Sstevel@tonic-gate * if this is to be integrated into libc.
40*7c478bd9Sstevel@tonic-gate *
41*7c478bd9Sstevel@tonic-gate */
42*7c478bd9Sstevel@tonic-gate
43*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
44*7c478bd9Sstevel@tonic-gate
45*7c478bd9Sstevel@tonic-gate #include <stdio.h>
46*7c478bd9Sstevel@tonic-gate #include <widec.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
48*7c478bd9Sstevel@tonic-gate #include <regex.h>
49*7c478bd9Sstevel@tonic-gate #include <locale.h>
50*7c478bd9Sstevel@tonic-gate #include <stdlib.h>
51*7c478bd9Sstevel@tonic-gate #include <locale.h>
52*7c478bd9Sstevel@tonic-gate #include <string.h>
53*7c478bd9Sstevel@tonic-gate #include <unistd.h>
54*7c478bd9Sstevel@tonic-gate #include <regexpr.h>
55*7c478bd9Sstevel@tonic-gate
56*7c478bd9Sstevel@tonic-gate /*
57*7c478bd9Sstevel@tonic-gate * psuedo compile/step/advance global variables
58*7c478bd9Sstevel@tonic-gate */
59*7c478bd9Sstevel@tonic-gate extern int nbra;
60*7c478bd9Sstevel@tonic-gate extern char *locs; /* for stopping execess recursion */
61*7c478bd9Sstevel@tonic-gate extern char *loc1; /* 1st character which matched RE */
62*7c478bd9Sstevel@tonic-gate extern char *loc2; /* char after lst char in matched RE */
63*7c478bd9Sstevel@tonic-gate extern char *braslist[]; /* start of nbra subexp */
64*7c478bd9Sstevel@tonic-gate extern char *braelist[]; /* end of nbra subexp */
65*7c478bd9Sstevel@tonic-gate extern int regerrno;
66*7c478bd9Sstevel@tonic-gate extern int reglength;
67*7c478bd9Sstevel@tonic-gate
68*7c478bd9Sstevel@tonic-gate int regcomp_flags; /* interface to specify cflags for regcomp */
69*7c478bd9Sstevel@tonic-gate
70*7c478bd9Sstevel@tonic-gate void regex_comp_free(void *a);
71*7c478bd9Sstevel@tonic-gate static int dhl_step(const char *str, const char *ep);
72*7c478bd9Sstevel@tonic-gate static int dhl_advance(const char *str, const char *ep);
73*7c478bd9Sstevel@tonic-gate static int map_errnos(int); /* Convert regcomp error */
74*7c478bd9Sstevel@tonic-gate static int dhl_doit(const char *, const regex_t *, const int flags);
75*7c478bd9Sstevel@tonic-gate static char * dhl_compile(const char *instr, char *ep, char *endbuf);
76*7c478bd9Sstevel@tonic-gate
77*7c478bd9Sstevel@tonic-gate /*
78*7c478bd9Sstevel@tonic-gate * # of sub re's: NOTE: For now limit on bra list defined here
79*7c478bd9Sstevel@tonic-gate * but fix is to add maxbra define to to regex.h
80*7c478bd9Sstevel@tonic-gate * One problem is that a bigger number is a performance hit since
81*7c478bd9Sstevel@tonic-gate * regexec() has a slow initialization loop that goes around SEPSIZE times
82*7c478bd9Sstevel@tonic-gate */
83*7c478bd9Sstevel@tonic-gate #define SEPSIZE 20
84*7c478bd9Sstevel@tonic-gate static regmatch_t rm[SEPSIZE]; /* ptr to list of RE matches */
85*7c478bd9Sstevel@tonic-gate
86*7c478bd9Sstevel@tonic-gate /*
87*7c478bd9Sstevel@tonic-gate * Structure to contain dl encoded first two bytes for vi, plus hold two
88*7c478bd9Sstevel@tonic-gate * regex structures, one for advance and one for step.
89*7c478bd9Sstevel@tonic-gate */
90*7c478bd9Sstevel@tonic-gate static struct regex_comp {
91*7c478bd9Sstevel@tonic-gate char r_head[2]; /* Header for DL encoding for vi */
92*7c478bd9Sstevel@tonic-gate regex_t r_stp; /* For use by step */
93*7c478bd9Sstevel@tonic-gate regex_t r_adv; /* For use by advance */
94*7c478bd9Sstevel@tonic-gate } reg_comp;
95*7c478bd9Sstevel@tonic-gate
96*7c478bd9Sstevel@tonic-gate /*
97*7c478bd9Sstevel@tonic-gate * global value for the size of a regex_comp structure:
98*7c478bd9Sstevel@tonic-gate */
99*7c478bd9Sstevel@tonic-gate size_t regexc_size = sizeof (reg_comp);
100*7c478bd9Sstevel@tonic-gate
101*7c478bd9Sstevel@tonic-gate
102*7c478bd9Sstevel@tonic-gate char *
compile(const char * instr,char * expbuf,char * endbuf)103*7c478bd9Sstevel@tonic-gate compile(const char *instr, char *expbuf, char *endbuf)
104*7c478bd9Sstevel@tonic-gate {
105*7c478bd9Sstevel@tonic-gate return (dhl_compile(instr, expbuf, endbuf));
106*7c478bd9Sstevel@tonic-gate }
107*7c478bd9Sstevel@tonic-gate
108*7c478bd9Sstevel@tonic-gate int
step(const char * instr,const char * expbuf)109*7c478bd9Sstevel@tonic-gate step(const char *instr, const char *expbuf)
110*7c478bd9Sstevel@tonic-gate {
111*7c478bd9Sstevel@tonic-gate return (dhl_step(instr, expbuf));
112*7c478bd9Sstevel@tonic-gate }
113*7c478bd9Sstevel@tonic-gate
114*7c478bd9Sstevel@tonic-gate int
advance(const char * instr,const char * expbuf)115*7c478bd9Sstevel@tonic-gate advance(const char *instr, const char *expbuf)
116*7c478bd9Sstevel@tonic-gate {
117*7c478bd9Sstevel@tonic-gate return (dhl_advance(instr, expbuf));
118*7c478bd9Sstevel@tonic-gate }
119*7c478bd9Sstevel@tonic-gate
120*7c478bd9Sstevel@tonic-gate
121*7c478bd9Sstevel@tonic-gate /*
122*7c478bd9Sstevel@tonic-gate * the compile and step routines here simulate the old libgen routines of
123*7c478bd9Sstevel@tonic-gate * compile/step Re: regexpr(3G). in order to do this, we must assume
124*7c478bd9Sstevel@tonic-gate * that expbuf[] consists of the following format:
125*7c478bd9Sstevel@tonic-gate * 1) the first two bytes consist of a special encoding - see below.
126*7c478bd9Sstevel@tonic-gate * 2) the next part is a regex_t used by regexec()/regcomp() for step
127*7c478bd9Sstevel@tonic-gate * 3) the final part is a regex_t used by regexec()/regcomp() for advance
128*7c478bd9Sstevel@tonic-gate *
129*7c478bd9Sstevel@tonic-gate * the special encoding of the first two bytes is referenced throughout
130*7c478bd9Sstevel@tonic-gate * vi. apparently expbuf[0] is set to:
131*7c478bd9Sstevel@tonic-gate * = 0 upon initialization
132*7c478bd9Sstevel@tonic-gate * = 1 if the first char of the RE is a ^
133*7c478bd9Sstevel@tonic-gate * = 0 if the first char of the RE isn't a ^
134*7c478bd9Sstevel@tonic-gate * and expbuf[1-35+] = bitmap of the type of RE chars in the expression.
135*7c478bd9Sstevel@tonic-gate * this is apparently 0 if there's no RE.
136*7c478bd9Sstevel@tonic-gate * Here, we use expbuf[0] in a similar fashion; and expbuf[1] is non-zero
137*7c478bd9Sstevel@tonic-gate * if there's at least 1 RE in the string.
138*7c478bd9Sstevel@tonic-gate * I say "apparently" as the code to compile()/step() is poorly written.
139*7c478bd9Sstevel@tonic-gate */
140*7c478bd9Sstevel@tonic-gate static char *
dhl_compile(instr,expbuf,endbuf)141*7c478bd9Sstevel@tonic-gate dhl_compile(instr, expbuf, endbuf)
142*7c478bd9Sstevel@tonic-gate const char *instr; /* the regular expression */
143*7c478bd9Sstevel@tonic-gate char *expbuf; /* where the compiled RE gets placed */
144*7c478bd9Sstevel@tonic-gate char *endbuf; /* ending addr of expbuf */
145*7c478bd9Sstevel@tonic-gate {
146*7c478bd9Sstevel@tonic-gate int rv;
147*7c478bd9Sstevel@tonic-gate int alloc = 0;
148*7c478bd9Sstevel@tonic-gate char adv_instr[4096]; /* PLENTY big temp buffer */
149*7c478bd9Sstevel@tonic-gate char *instrp; /* PLENTY big temp buffer */
150*7c478bd9Sstevel@tonic-gate
151*7c478bd9Sstevel@tonic-gate if (*instr == (char) NULL) {
152*7c478bd9Sstevel@tonic-gate regerrno = 41;
153*7c478bd9Sstevel@tonic-gate return (NULL);
154*7c478bd9Sstevel@tonic-gate }
155*7c478bd9Sstevel@tonic-gate
156*7c478bd9Sstevel@tonic-gate /*
157*7c478bd9Sstevel@tonic-gate * Check values of expbuf and endbuf
158*7c478bd9Sstevel@tonic-gate */
159*7c478bd9Sstevel@tonic-gate if (expbuf == NULL) {
160*7c478bd9Sstevel@tonic-gate if ((expbuf = malloc(regexc_size)) == NULL) {
161*7c478bd9Sstevel@tonic-gate regerrno = 50;
162*7c478bd9Sstevel@tonic-gate return (NULL);
163*7c478bd9Sstevel@tonic-gate }
164*7c478bd9Sstevel@tonic-gate memset(®_comp, 0, regexc_size);
165*7c478bd9Sstevel@tonic-gate alloc = 1;
166*7c478bd9Sstevel@tonic-gate endbuf = expbuf + regexc_size;
167*7c478bd9Sstevel@tonic-gate } else { /* Check if enough memory was allocated */
168*7c478bd9Sstevel@tonic-gate if (expbuf + regexc_size > endbuf) {
169*7c478bd9Sstevel@tonic-gate regerrno = 50;
170*7c478bd9Sstevel@tonic-gate return (NULL);
171*7c478bd9Sstevel@tonic-gate }
172*7c478bd9Sstevel@tonic-gate memcpy(®_comp, expbuf, regexc_size);
173*7c478bd9Sstevel@tonic-gate }
174*7c478bd9Sstevel@tonic-gate
175*7c478bd9Sstevel@tonic-gate /*
176*7c478bd9Sstevel@tonic-gate * Clear global flags
177*7c478bd9Sstevel@tonic-gate */
178*7c478bd9Sstevel@tonic-gate nbra = 0;
179*7c478bd9Sstevel@tonic-gate regerrno = 0;
180*7c478bd9Sstevel@tonic-gate
181*7c478bd9Sstevel@tonic-gate /*
182*7c478bd9Sstevel@tonic-gate * Free any data being held for previous search strings
183*7c478bd9Sstevel@tonic-gate */
184*7c478bd9Sstevel@tonic-gate regex_comp_free(®_comp);
185*7c478bd9Sstevel@tonic-gate
186*7c478bd9Sstevel@tonic-gate /*
187*7c478bd9Sstevel@tonic-gate * We call regcomp twice, once to get a regex_t for use by step()
188*7c478bd9Sstevel@tonic-gate * and then again with for use by advance()
189*7c478bd9Sstevel@tonic-gate */
190*7c478bd9Sstevel@tonic-gate if ((rv = regcomp(®_comp.r_stp, instr, regcomp_flags)) != 0) {
191*7c478bd9Sstevel@tonic-gate regerrno = map_errnos(rv); /* Convert regcomp error */
192*7c478bd9Sstevel@tonic-gate goto out;
193*7c478bd9Sstevel@tonic-gate }
194*7c478bd9Sstevel@tonic-gate /*
195*7c478bd9Sstevel@tonic-gate * To support advance, which assumes an implicit ^ to match at start
196*7c478bd9Sstevel@tonic-gate * of line we prepend a ^ to the pattern by copying to a temp buffer
197*7c478bd9Sstevel@tonic-gate */
198*7c478bd9Sstevel@tonic-gate
199*7c478bd9Sstevel@tonic-gate if (instr[0] == '^')
200*7c478bd9Sstevel@tonic-gate instrp = (char *) instr; /* String already has leading ^ */
201*7c478bd9Sstevel@tonic-gate else {
202*7c478bd9Sstevel@tonic-gate adv_instr[0] = '^';
203*7c478bd9Sstevel@tonic-gate strncpy(&adv_instr[1], instr, 2048);
204*7c478bd9Sstevel@tonic-gate instrp = adv_instr;
205*7c478bd9Sstevel@tonic-gate }
206*7c478bd9Sstevel@tonic-gate
207*7c478bd9Sstevel@tonic-gate if ((rv = regcomp(®_comp.r_adv, instrp, regcomp_flags)) != 0) {
208*7c478bd9Sstevel@tonic-gate regerrno = map_errnos(rv); /* Convert regcomp error */
209*7c478bd9Sstevel@tonic-gate goto out;
210*7c478bd9Sstevel@tonic-gate }
211*7c478bd9Sstevel@tonic-gate
212*7c478bd9Sstevel@tonic-gate /*
213*7c478bd9Sstevel@tonic-gate * update global variables
214*7c478bd9Sstevel@tonic-gate */
215*7c478bd9Sstevel@tonic-gate nbra = (int) reg_comp.r_adv.re_nsub > 0 ?
216*7c478bd9Sstevel@tonic-gate (int) reg_comp.r_adv.re_nsub : 0;
217*7c478bd9Sstevel@tonic-gate regerrno = 0;
218*7c478bd9Sstevel@tonic-gate
219*7c478bd9Sstevel@tonic-gate /*
220*7c478bd9Sstevel@tonic-gate * Set the header flags for use by vi
221*7c478bd9Sstevel@tonic-gate */
222*7c478bd9Sstevel@tonic-gate if (instr[0] == '^') /* if beginning of string, */
223*7c478bd9Sstevel@tonic-gate reg_comp.r_head[0] = 1; /* set special flag */
224*7c478bd9Sstevel@tonic-gate else
225*7c478bd9Sstevel@tonic-gate reg_comp.r_head[0] = 0; /* clear special flag */
226*7c478bd9Sstevel@tonic-gate /*
227*7c478bd9Sstevel@tonic-gate * note that for a single BRE, nbra will be 0 here.
228*7c478bd9Sstevel@tonic-gate * we're guaranteed that, at this point, a RE has been found.
229*7c478bd9Sstevel@tonic-gate */
230*7c478bd9Sstevel@tonic-gate reg_comp.r_head[1] = 1; /* set special flag */
231*7c478bd9Sstevel@tonic-gate /*
232*7c478bd9Sstevel@tonic-gate * Copy our reg_comp structure to expbuf
233*7c478bd9Sstevel@tonic-gate */
234*7c478bd9Sstevel@tonic-gate (void) memcpy(expbuf, (char *) ®_comp, regexc_size);
235*7c478bd9Sstevel@tonic-gate
236*7c478bd9Sstevel@tonic-gate out:
237*7c478bd9Sstevel@tonic-gate /*
238*7c478bd9Sstevel@tonic-gate * Return code from libgen regcomp with mods. Note weird return
239*7c478bd9Sstevel@tonic-gate * value - if space is malloc'd return pointer to start of space,
240*7c478bd9Sstevel@tonic-gate * if user provided his own space, return pointer to 1+last byte
241*7c478bd9Sstevel@tonic-gate * of his space.
242*7c478bd9Sstevel@tonic-gate */
243*7c478bd9Sstevel@tonic-gate if (regerrno != 0) {
244*7c478bd9Sstevel@tonic-gate if (alloc)
245*7c478bd9Sstevel@tonic-gate free(expbuf);
246*7c478bd9Sstevel@tonic-gate return (NULL);
247*7c478bd9Sstevel@tonic-gate }
248*7c478bd9Sstevel@tonic-gate reglength = regexc_size;
249*7c478bd9Sstevel@tonic-gate
250*7c478bd9Sstevel@tonic-gate if (alloc)
251*7c478bd9Sstevel@tonic-gate return (expbuf);
252*7c478bd9Sstevel@tonic-gate else
253*7c478bd9Sstevel@tonic-gate return (expbuf + regexc_size);
254*7c478bd9Sstevel@tonic-gate }
255*7c478bd9Sstevel@tonic-gate
256*7c478bd9Sstevel@tonic-gate
257*7c478bd9Sstevel@tonic-gate /*
258*7c478bd9Sstevel@tonic-gate * dhl_step: step through a string until a RE match is found, or end of str
259*7c478bd9Sstevel@tonic-gate */
260*7c478bd9Sstevel@tonic-gate static int
dhl_step(str,ep)261*7c478bd9Sstevel@tonic-gate dhl_step(str, ep)
262*7c478bd9Sstevel@tonic-gate const char *str; /* characters to be checked for a match */
263*7c478bd9Sstevel@tonic-gate const char *ep; /* compiled RE from dhl_compile() */
264*7c478bd9Sstevel@tonic-gate {
265*7c478bd9Sstevel@tonic-gate /*
266*7c478bd9Sstevel@tonic-gate * Check if we're passed a null ep
267*7c478bd9Sstevel@tonic-gate */
268*7c478bd9Sstevel@tonic-gate if (ep == NULL) {
269*7c478bd9Sstevel@tonic-gate regerrno = 41; /* No remembered search string error */
270*7c478bd9Sstevel@tonic-gate return (0);
271*7c478bd9Sstevel@tonic-gate }
272*7c478bd9Sstevel@tonic-gate /*
273*7c478bd9Sstevel@tonic-gate * Call common routine with r_stp (step) structure
274*7c478bd9Sstevel@tonic-gate */
275*7c478bd9Sstevel@tonic-gate return (dhl_doit(str, &(((struct regex_comp *) ep)->r_stp),
276*7c478bd9Sstevel@tonic-gate ((locs != NULL) ? REG_NOTBOL : 0)));
277*7c478bd9Sstevel@tonic-gate }
278*7c478bd9Sstevel@tonic-gate
279*7c478bd9Sstevel@tonic-gate /*
280*7c478bd9Sstevel@tonic-gate * dhl_advance: implement advance
281*7c478bd9Sstevel@tonic-gate */
282*7c478bd9Sstevel@tonic-gate static int
dhl_advance(str,ep)283*7c478bd9Sstevel@tonic-gate dhl_advance(str, ep)
284*7c478bd9Sstevel@tonic-gate const char *str; /* characters to be checked for a match */
285*7c478bd9Sstevel@tonic-gate const char *ep; /* compiled RE from dhl_compile() */
286*7c478bd9Sstevel@tonic-gate {
287*7c478bd9Sstevel@tonic-gate int rv;
288*7c478bd9Sstevel@tonic-gate /*
289*7c478bd9Sstevel@tonic-gate * Check if we're passed a null ep
290*7c478bd9Sstevel@tonic-gate */
291*7c478bd9Sstevel@tonic-gate if (ep == NULL) {
292*7c478bd9Sstevel@tonic-gate regerrno = 41; /* No remembered search string error */
293*7c478bd9Sstevel@tonic-gate return (0);
294*7c478bd9Sstevel@tonic-gate }
295*7c478bd9Sstevel@tonic-gate /*
296*7c478bd9Sstevel@tonic-gate * Call common routine with r_adv (advance) structure
297*7c478bd9Sstevel@tonic-gate */
298*7c478bd9Sstevel@tonic-gate rv = dhl_doit(str, &(((struct regex_comp *) ep)->r_adv), 0);
299*7c478bd9Sstevel@tonic-gate loc1 = NULL; /* Clear it per the compile man page */
300*7c478bd9Sstevel@tonic-gate return (rv);
301*7c478bd9Sstevel@tonic-gate }
302*7c478bd9Sstevel@tonic-gate
303*7c478bd9Sstevel@tonic-gate /*
304*7c478bd9Sstevel@tonic-gate * dhl_doit - common code for step and advance
305*7c478bd9Sstevel@tonic-gate */
306*7c478bd9Sstevel@tonic-gate static int
dhl_doit(str,rep,flags)307*7c478bd9Sstevel@tonic-gate dhl_doit(str, rep, flags)
308*7c478bd9Sstevel@tonic-gate const char *str; /* characters to be checked for a match */
309*7c478bd9Sstevel@tonic-gate const regex_t *rep;
310*7c478bd9Sstevel@tonic-gate const int flags; /* flags to be passed to regexec directly */
311*7c478bd9Sstevel@tonic-gate {
312*7c478bd9Sstevel@tonic-gate int rv;
313*7c478bd9Sstevel@tonic-gate int i;
314*7c478bd9Sstevel@tonic-gate regmatch_t *prm; /* ptr to current regmatch_t */
315*7c478bd9Sstevel@tonic-gate
316*7c478bd9Sstevel@tonic-gate /*
317*7c478bd9Sstevel@tonic-gate * Check if we're passed a null regex_t
318*7c478bd9Sstevel@tonic-gate */
319*7c478bd9Sstevel@tonic-gate if (rep == NULL) {
320*7c478bd9Sstevel@tonic-gate regerrno = 41; /* No remembered search string error */
321*7c478bd9Sstevel@tonic-gate return (0);
322*7c478bd9Sstevel@tonic-gate }
323*7c478bd9Sstevel@tonic-gate
324*7c478bd9Sstevel@tonic-gate regerrno = 0;
325*7c478bd9Sstevel@tonic-gate prm = &rm[0];
326*7c478bd9Sstevel@tonic-gate
327*7c478bd9Sstevel@tonic-gate if ((rv = regexec(rep, str, SEPSIZE, prm, flags)) != REG_OK) {
328*7c478bd9Sstevel@tonic-gate if (rv == REG_NOMATCH)
329*7c478bd9Sstevel@tonic-gate return (0);
330*7c478bd9Sstevel@tonic-gate regerrno = map_errnos(rv);
331*7c478bd9Sstevel@tonic-gate return (0);
332*7c478bd9Sstevel@tonic-gate }
333*7c478bd9Sstevel@tonic-gate
334*7c478bd9Sstevel@tonic-gate loc1 = (char *)str + prm->rm_so;
335*7c478bd9Sstevel@tonic-gate loc2 = (char *)str + prm->rm_eo;
336*7c478bd9Sstevel@tonic-gate
337*7c478bd9Sstevel@tonic-gate /*
338*7c478bd9Sstevel@tonic-gate * Now we need to fill up the bra lists with all of the sub re's
339*7c478bd9Sstevel@tonic-gate * Note we subtract nsub -1, and preincrement prm.
340*7c478bd9Sstevel@tonic-gate */
341*7c478bd9Sstevel@tonic-gate for (i = 0; i <= rep->re_nsub; i++) {
342*7c478bd9Sstevel@tonic-gate prm++; /* XXX inc past first subexp */
343*7c478bd9Sstevel@tonic-gate braslist[i] = (char *)str + prm->rm_so;
344*7c478bd9Sstevel@tonic-gate braelist[i] = (char *)str + prm->rm_eo;
345*7c478bd9Sstevel@tonic-gate if (i >= SEPSIZE) {
346*7c478bd9Sstevel@tonic-gate regerrno = 50; /* regex overflow */
347*7c478bd9Sstevel@tonic-gate return (0);
348*7c478bd9Sstevel@tonic-gate }
349*7c478bd9Sstevel@tonic-gate }
350*7c478bd9Sstevel@tonic-gate
351*7c478bd9Sstevel@tonic-gate /*
352*7c478bd9Sstevel@tonic-gate * Inverse logic, a zero from regexec - success, is a 1
353*7c478bd9Sstevel@tonic-gate * from advance/step.
354*7c478bd9Sstevel@tonic-gate */
355*7c478bd9Sstevel@tonic-gate
356*7c478bd9Sstevel@tonic-gate return (rv == 0);
357*7c478bd9Sstevel@tonic-gate }
358*7c478bd9Sstevel@tonic-gate
359*7c478bd9Sstevel@tonic-gate
360*7c478bd9Sstevel@tonic-gate /*
361*7c478bd9Sstevel@tonic-gate * regerrno to compile/step error mapping:
362*7c478bd9Sstevel@tonic-gate * This is really a big compromise. Some errors don't map at all
363*7c478bd9Sstevel@tonic-gate * like regcomp error 15 is generated by both compile() error types
364*7c478bd9Sstevel@tonic-gate * 44 & 46. So which one should we map to?
365*7c478bd9Sstevel@tonic-gate * Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
366*7c478bd9Sstevel@tonic-gate * To do your errors right use xregerr() to get the regcomp error
367*7c478bd9Sstevel@tonic-gate * string and print that.
368*7c478bd9Sstevel@tonic-gate *
369*7c478bd9Sstevel@tonic-gate * | regcomp/regexec | Compile/step/advance |
370*7c478bd9Sstevel@tonic-gate * +---------------------------------+--------------------------------------+
371*7c478bd9Sstevel@tonic-gate * 0 REG_OK Pattern matched 1 - Pattern matched
372*7c478bd9Sstevel@tonic-gate * 1 REG_NOMATCH No match 0 - Pattern didn't match
373*7c478bd9Sstevel@tonic-gate * 2 REG_ECOLLATE Bad collation elmnt. 67 - Returned by compile on mbtowc err
374*7c478bd9Sstevel@tonic-gate * 3 REG_EESCAPE trailing \ in patrn 45 - } expected after \.
375*7c478bd9Sstevel@tonic-gate * 4 REG_ENEWLINE \n before end pattrn 36 - Illegal or missing delimiter.
376*7c478bd9Sstevel@tonic-gate * 5 REG_ENSUB Over 9 \( \) pairs 43 - Too many \(
377*7c478bd9Sstevel@tonic-gate * 6 REG_ESUBREG Bad number in \[0-9] 25 - ``\digit'' out of range.
378*7c478bd9Sstevel@tonic-gate * 7 REG_EBRACK [ ] inbalance 49 - [ ] imbalance.
379*7c478bd9Sstevel@tonic-gate * 8 REG_EPAREN ( ) inbalance 42 - \(~\) imbalance.
380*7c478bd9Sstevel@tonic-gate * 9 REG_EBRACE \{ \} inbalance 45 - } expected after \.
381*7c478bd9Sstevel@tonic-gate * 10 REG_ERANGE bad range endpoint 11 - Range endpoint too large.
382*7c478bd9Sstevel@tonic-gate * 11 REG_ESPACE no memory for pattern 50 - Regular expression overflow.
383*7c478bd9Sstevel@tonic-gate * 12 REG_BADRPT invalid repetition 36 - Illegal or missing delimiter.
384*7c478bd9Sstevel@tonic-gate * 13 REG_ECTYPE invalid char-class 67 - illegal byte sequence
385*7c478bd9Sstevel@tonic-gate * 14 REG_BADPAT syntax error 50 - Regular expression overflow.
386*7c478bd9Sstevel@tonic-gate * 15 REG_BADBR \{ \} contents bad 46 - First number exceeds 2nd in \{~\}
387*7c478bd9Sstevel@tonic-gate * 16 REG_EFATAL internal error 50 - Regular expression overflow.
388*7c478bd9Sstevel@tonic-gate * 17 REG_ECHAR bad mulitbyte char 67 - illegal byte sequence
389*7c478bd9Sstevel@tonic-gate * 18 REG_STACK stack overflow 50 - Regular expression overflow.
390*7c478bd9Sstevel@tonic-gate * 19 REG_ENOSYS function not supported 50- Regular expression overflow.
391*7c478bd9Sstevel@tonic-gate *
392*7c478bd9Sstevel@tonic-gate * For reference here's the compile/step errno's. We don't generate
393*7c478bd9Sstevel@tonic-gate * 41 here - it's done earlier, nor 44 since we can't tell if from 46.
394*7c478bd9Sstevel@tonic-gate *
395*7c478bd9Sstevel@tonic-gate * 11 - Range endpoint too large.
396*7c478bd9Sstevel@tonic-gate * 16 - Bad number.
397*7c478bd9Sstevel@tonic-gate * 25 - ``\digit'' out of range.
398*7c478bd9Sstevel@tonic-gate * 36 - Illegal or missing delimiter.
399*7c478bd9Sstevel@tonic-gate * 41 - No remembered search string.
400*7c478bd9Sstevel@tonic-gate * 42 - \(~\) imbalance.
401*7c478bd9Sstevel@tonic-gate * 43 - Too many \(.
402*7c478bd9Sstevel@tonic-gate * 44 - More than 2 numbers given in "\{~\}"
403*7c478bd9Sstevel@tonic-gate * 45 - } expected after \.
404*7c478bd9Sstevel@tonic-gate * 46 - First number exceeds 2nd in "\{~\}"
405*7c478bd9Sstevel@tonic-gate * 49 - [ ] imbalance.
406*7c478bd9Sstevel@tonic-gate * 50 - Regular expression overflow.
407*7c478bd9Sstevel@tonic-gate */
408*7c478bd9Sstevel@tonic-gate
409*7c478bd9Sstevel@tonic-gate static int
map_errnos(int Errno)410*7c478bd9Sstevel@tonic-gate map_errnos(int Errno)
411*7c478bd9Sstevel@tonic-gate {
412*7c478bd9Sstevel@tonic-gate switch (Errno) {
413*7c478bd9Sstevel@tonic-gate case REG_ECOLLATE:
414*7c478bd9Sstevel@tonic-gate regerrno = 67;
415*7c478bd9Sstevel@tonic-gate break;
416*7c478bd9Sstevel@tonic-gate case REG_EESCAPE:
417*7c478bd9Sstevel@tonic-gate regerrno = 45;
418*7c478bd9Sstevel@tonic-gate break;
419*7c478bd9Sstevel@tonic-gate case REG_ENEWLINE:
420*7c478bd9Sstevel@tonic-gate regerrno = 36;
421*7c478bd9Sstevel@tonic-gate break;
422*7c478bd9Sstevel@tonic-gate case REG_ENSUB:
423*7c478bd9Sstevel@tonic-gate regerrno = 43;
424*7c478bd9Sstevel@tonic-gate break;
425*7c478bd9Sstevel@tonic-gate case REG_ESUBREG:
426*7c478bd9Sstevel@tonic-gate regerrno = 25;
427*7c478bd9Sstevel@tonic-gate break;
428*7c478bd9Sstevel@tonic-gate case REG_EBRACK:
429*7c478bd9Sstevel@tonic-gate regerrno = 49;
430*7c478bd9Sstevel@tonic-gate break;
431*7c478bd9Sstevel@tonic-gate case REG_EPAREN:
432*7c478bd9Sstevel@tonic-gate regerrno = 42;
433*7c478bd9Sstevel@tonic-gate break;
434*7c478bd9Sstevel@tonic-gate case REG_EBRACE:
435*7c478bd9Sstevel@tonic-gate regerrno = 45;
436*7c478bd9Sstevel@tonic-gate break;
437*7c478bd9Sstevel@tonic-gate case REG_ERANGE:
438*7c478bd9Sstevel@tonic-gate regerrno = 11;
439*7c478bd9Sstevel@tonic-gate break;
440*7c478bd9Sstevel@tonic-gate case REG_ESPACE:
441*7c478bd9Sstevel@tonic-gate regerrno = 50;
442*7c478bd9Sstevel@tonic-gate break;
443*7c478bd9Sstevel@tonic-gate case REG_BADRPT:
444*7c478bd9Sstevel@tonic-gate regerrno = 36;
445*7c478bd9Sstevel@tonic-gate break;
446*7c478bd9Sstevel@tonic-gate case REG_ECTYPE:
447*7c478bd9Sstevel@tonic-gate regerrno = 67;
448*7c478bd9Sstevel@tonic-gate break;
449*7c478bd9Sstevel@tonic-gate case REG_BADPAT:
450*7c478bd9Sstevel@tonic-gate regerrno = 50;
451*7c478bd9Sstevel@tonic-gate break;
452*7c478bd9Sstevel@tonic-gate case REG_BADBR:
453*7c478bd9Sstevel@tonic-gate regerrno = 46;
454*7c478bd9Sstevel@tonic-gate break;
455*7c478bd9Sstevel@tonic-gate case REG_EFATAL:
456*7c478bd9Sstevel@tonic-gate regerrno = 50;
457*7c478bd9Sstevel@tonic-gate break;
458*7c478bd9Sstevel@tonic-gate case REG_ECHAR:
459*7c478bd9Sstevel@tonic-gate regerrno = 67;
460*7c478bd9Sstevel@tonic-gate break;
461*7c478bd9Sstevel@tonic-gate case REG_STACK:
462*7c478bd9Sstevel@tonic-gate regerrno = 50;
463*7c478bd9Sstevel@tonic-gate break;
464*7c478bd9Sstevel@tonic-gate case REG_ENOSYS:
465*7c478bd9Sstevel@tonic-gate regerrno = 50;
466*7c478bd9Sstevel@tonic-gate break;
467*7c478bd9Sstevel@tonic-gate default:
468*7c478bd9Sstevel@tonic-gate regerrno = 50;
469*7c478bd9Sstevel@tonic-gate break;
470*7c478bd9Sstevel@tonic-gate }
471*7c478bd9Sstevel@tonic-gate return (regerrno);
472*7c478bd9Sstevel@tonic-gate }
473*7c478bd9Sstevel@tonic-gate
474*7c478bd9Sstevel@tonic-gate /*
475*7c478bd9Sstevel@tonic-gate * This is a routine to clean up the subtle substructure of the struct
476*7c478bd9Sstevel@tonic-gate * regex_comp type for use by clients of this module. Since the struct
477*7c478bd9Sstevel@tonic-gate * type is private, we use a generic interface, and trust the
478*7c478bd9Sstevel@tonic-gate * application to be damn sure that this operation is valid for the
479*7c478bd9Sstevel@tonic-gate * named memory.
480*7c478bd9Sstevel@tonic-gate */
481*7c478bd9Sstevel@tonic-gate
482*7c478bd9Sstevel@tonic-gate void
regex_comp_free(void * a)483*7c478bd9Sstevel@tonic-gate regex_comp_free(void * a)
484*7c478bd9Sstevel@tonic-gate {
485*7c478bd9Sstevel@tonic-gate /*
486*7c478bd9Sstevel@tonic-gate * Free any data being held for previous search strings
487*7c478bd9Sstevel@tonic-gate */
488*7c478bd9Sstevel@tonic-gate
489*7c478bd9Sstevel@tonic-gate if (((struct regex_comp *) a) == NULL) {
490*7c478bd9Sstevel@tonic-gate return;
491*7c478bd9Sstevel@tonic-gate }
492*7c478bd9Sstevel@tonic-gate
493*7c478bd9Sstevel@tonic-gate regfree(&((struct regex_comp *)a)->r_stp);
494*7c478bd9Sstevel@tonic-gate regfree(&((struct regex_comp *)a)->r_adv);
495*7c478bd9Sstevel@tonic-gate }
496