xref: /freebsd/stand/common/interp_parse.c (revision ca987d4641cdcd7f27e153db17c5bf064934faf5)
1*ca987d46SWarner Losh /*-
2*ca987d46SWarner Losh  * Redistribution and use in source and binary forms, with or without
3*ca987d46SWarner Losh  * modification, are permitted provided that the following conditions
4*ca987d46SWarner Losh  * are met:
5*ca987d46SWarner Losh  * 1. Redistributions of source code must retain the above copyright
6*ca987d46SWarner Losh  *    notice, this list of conditions and the following disclaimer.
7*ca987d46SWarner Losh  * 2. Redistributions in binary form must reproduce the above copyright
8*ca987d46SWarner Losh  *    notice, this list of conditions and the following disclaimer in the
9*ca987d46SWarner Losh  *    documentation and/or other materials provided with the distribution.
10*ca987d46SWarner Losh  *
11*ca987d46SWarner Losh  * Jordan K. Hubbard
12*ca987d46SWarner Losh  * 29 August 1998
13*ca987d46SWarner Losh  *
14*ca987d46SWarner Losh  * The meat of the simple parser.
15*ca987d46SWarner Losh  */
16*ca987d46SWarner Losh 
17*ca987d46SWarner Losh #include <sys/cdefs.h>
18*ca987d46SWarner Losh __FBSDID("$FreeBSD$");
19*ca987d46SWarner Losh 
20*ca987d46SWarner Losh #include <stand.h>
21*ca987d46SWarner Losh #include <string.h>
22*ca987d46SWarner Losh #include "bootstrap.h"
23*ca987d46SWarner Losh 
24*ca987d46SWarner Losh static void	 clean(void);
25*ca987d46SWarner Losh static int	 insert(int *argcp, char *buf);
26*ca987d46SWarner Losh static char	*variable_lookup(char *name);
27*ca987d46SWarner Losh 
28*ca987d46SWarner Losh #define PARSE_BUFSIZE	1024	/* maximum size of one element */
29*ca987d46SWarner Losh #define MAXARGS		20	/* maximum number of elements */
30*ca987d46SWarner Losh static char		*args[MAXARGS];
31*ca987d46SWarner Losh 
32*ca987d46SWarner Losh /*
33*ca987d46SWarner Losh  * parse: accept a string of input and "parse" it for backslash
34*ca987d46SWarner Losh  * substitutions and environment variable expansions (${var}),
35*ca987d46SWarner Losh  * returning an argc/argv style vector of whitespace separated
36*ca987d46SWarner Losh  * arguments.  Returns 0 on success, 1 on failure (ok, ok, so I
37*ca987d46SWarner Losh  * wimped-out on the error codes! :).
38*ca987d46SWarner Losh  *
39*ca987d46SWarner Losh  * Note that the argv array returned must be freed by the caller, but
40*ca987d46SWarner Losh  * we own the space allocated for arguments and will free that on next
41*ca987d46SWarner Losh  * invocation.  This allows argv consumers to modify the array if
42*ca987d46SWarner Losh  * required.
43*ca987d46SWarner Losh  *
44*ca987d46SWarner Losh  * NB: environment variables that expand to more than one whitespace
45*ca987d46SWarner Losh  * separated token will be returned as a single argv[] element, not
46*ca987d46SWarner Losh  * split in turn.  Expanded text is also immune to further backslash
47*ca987d46SWarner Losh  * elimination or expansion since this is a one-pass, non-recursive
48*ca987d46SWarner Losh  * parser.  You didn't specify more than this so if you want more, ask
49*ca987d46SWarner Losh  * me. - jkh
50*ca987d46SWarner Losh  */
51*ca987d46SWarner Losh 
52*ca987d46SWarner Losh #define PARSE_FAIL(expr) \
53*ca987d46SWarner Losh if (expr) { \
54*ca987d46SWarner Losh     printf("fail at line %d\n", __LINE__); \
55*ca987d46SWarner Losh     clean(); \
56*ca987d46SWarner Losh     free(copy); \
57*ca987d46SWarner Losh     free(buf); \
58*ca987d46SWarner Losh     return 1; \
59*ca987d46SWarner Losh }
60*ca987d46SWarner Losh 
61*ca987d46SWarner Losh /* Accept the usual delimiters for a variable, returning counterpart */
62*ca987d46SWarner Losh static char
63*ca987d46SWarner Losh isdelim(int ch)
64*ca987d46SWarner Losh {
65*ca987d46SWarner Losh     if (ch == '{')
66*ca987d46SWarner Losh 	return '}';
67*ca987d46SWarner Losh     else if (ch == '(')
68*ca987d46SWarner Losh 	return ')';
69*ca987d46SWarner Losh     return '\0';
70*ca987d46SWarner Losh }
71*ca987d46SWarner Losh 
72*ca987d46SWarner Losh static int
73*ca987d46SWarner Losh isquote(int ch)
74*ca987d46SWarner Losh {
75*ca987d46SWarner Losh     return (ch == '\'');
76*ca987d46SWarner Losh }
77*ca987d46SWarner Losh 
78*ca987d46SWarner Losh static int
79*ca987d46SWarner Losh isdquote(int ch)
80*ca987d46SWarner Losh {
81*ca987d46SWarner Losh     return (ch == '"');
82*ca987d46SWarner Losh }
83*ca987d46SWarner Losh 
84*ca987d46SWarner Losh int
85*ca987d46SWarner Losh parse(int *argc, char ***argv, char *str)
86*ca987d46SWarner Losh {
87*ca987d46SWarner Losh     int ac;
88*ca987d46SWarner Losh     char *val, *p, *q, *copy = NULL;
89*ca987d46SWarner Losh     size_t i = 0;
90*ca987d46SWarner Losh     char token, tmp, quote, dquote, *buf;
91*ca987d46SWarner Losh     enum { STR, VAR, WHITE } state;
92*ca987d46SWarner Losh 
93*ca987d46SWarner Losh     ac = *argc = 0;
94*ca987d46SWarner Losh     dquote = quote = 0;
95*ca987d46SWarner Losh     if (!str || (p = copy = backslash(str)) == NULL)
96*ca987d46SWarner Losh 	return 1;
97*ca987d46SWarner Losh 
98*ca987d46SWarner Losh     /* Initialize vector and state */
99*ca987d46SWarner Losh     clean();
100*ca987d46SWarner Losh     state = STR;
101*ca987d46SWarner Losh     buf = (char *)malloc(PARSE_BUFSIZE);
102*ca987d46SWarner Losh     token = 0;
103*ca987d46SWarner Losh 
104*ca987d46SWarner Losh     /* And awaaaaaaaaay we go! */
105*ca987d46SWarner Losh     while (*p) {
106*ca987d46SWarner Losh 	switch (state) {
107*ca987d46SWarner Losh 	case STR:
108*ca987d46SWarner Losh 	    if ((*p == '\\') && p[1]) {
109*ca987d46SWarner Losh 		p++;
110*ca987d46SWarner Losh 		PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
111*ca987d46SWarner Losh 		buf[i++] = *p++;
112*ca987d46SWarner Losh 	    } else if (isquote(*p)) {
113*ca987d46SWarner Losh 		quote = quote ? 0 : *p;
114*ca987d46SWarner Losh 		if (dquote) { /* keep quote */
115*ca987d46SWarner Losh 			PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
116*ca987d46SWarner Losh 			buf[i++] = *p++;
117*ca987d46SWarner Losh 		} else
118*ca987d46SWarner Losh 			++p;
119*ca987d46SWarner Losh 	    } else if (isdquote(*p)) {
120*ca987d46SWarner Losh 		dquote = dquote ? 0 : *p;
121*ca987d46SWarner Losh 		if (quote) { /* keep dquote */
122*ca987d46SWarner Losh 			PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
123*ca987d46SWarner Losh 			buf[i++] = *p++;
124*ca987d46SWarner Losh 		} else
125*ca987d46SWarner Losh 			++p;
126*ca987d46SWarner Losh 	    } else if (isspace(*p) && !quote && !dquote) {
127*ca987d46SWarner Losh 		state = WHITE;
128*ca987d46SWarner Losh 		if (i) {
129*ca987d46SWarner Losh 		    buf[i] = '\0';
130*ca987d46SWarner Losh 		    PARSE_FAIL(insert(&ac, buf));
131*ca987d46SWarner Losh 		    i = 0;
132*ca987d46SWarner Losh 		}
133*ca987d46SWarner Losh 		++p;
134*ca987d46SWarner Losh 	    } else if (*p == '$' && !quote) {
135*ca987d46SWarner Losh 		token = isdelim(*(p + 1));
136*ca987d46SWarner Losh 		if (token)
137*ca987d46SWarner Losh 		    p += 2;
138*ca987d46SWarner Losh 		else
139*ca987d46SWarner Losh 		    ++p;
140*ca987d46SWarner Losh 		state = VAR;
141*ca987d46SWarner Losh 	    } else {
142*ca987d46SWarner Losh 		PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
143*ca987d46SWarner Losh 		buf[i++] = *p++;
144*ca987d46SWarner Losh 	    }
145*ca987d46SWarner Losh 	    break;
146*ca987d46SWarner Losh 
147*ca987d46SWarner Losh 	case WHITE:
148*ca987d46SWarner Losh 	    if (isspace(*p))
149*ca987d46SWarner Losh 		++p;
150*ca987d46SWarner Losh 	    else
151*ca987d46SWarner Losh 		state = STR;
152*ca987d46SWarner Losh 	    break;
153*ca987d46SWarner Losh 
154*ca987d46SWarner Losh 	case VAR:
155*ca987d46SWarner Losh 	    if (token) {
156*ca987d46SWarner Losh 		PARSE_FAIL((q = strchr(p, token)) == NULL);
157*ca987d46SWarner Losh 	    } else {
158*ca987d46SWarner Losh 		q = p;
159*ca987d46SWarner Losh 		while (*q && !isspace(*q))
160*ca987d46SWarner Losh 		    ++q;
161*ca987d46SWarner Losh 	    }
162*ca987d46SWarner Losh 	    tmp = *q;
163*ca987d46SWarner Losh 	    *q = '\0';
164*ca987d46SWarner Losh 	    if ((val = variable_lookup(p)) != NULL) {
165*ca987d46SWarner Losh 		size_t len = strlen(val);
166*ca987d46SWarner Losh 
167*ca987d46SWarner Losh 		strncpy(buf + i, val, PARSE_BUFSIZE - (i + 1));
168*ca987d46SWarner Losh 		i += min(len, PARSE_BUFSIZE - 1);
169*ca987d46SWarner Losh 	    }
170*ca987d46SWarner Losh 	    *q = tmp;	/* restore value */
171*ca987d46SWarner Losh 	    p = q + (token ? 1 : 0);
172*ca987d46SWarner Losh 	    state = STR;
173*ca987d46SWarner Losh 	    break;
174*ca987d46SWarner Losh 	}
175*ca987d46SWarner Losh     }
176*ca987d46SWarner Losh     /* missing terminating ' or " */
177*ca987d46SWarner Losh     PARSE_FAIL(quote || dquote);
178*ca987d46SWarner Losh     /* If at end of token, add it */
179*ca987d46SWarner Losh     if (i && state == STR) {
180*ca987d46SWarner Losh 	buf[i] = '\0';
181*ca987d46SWarner Losh 	PARSE_FAIL(insert(&ac, buf));
182*ca987d46SWarner Losh     }
183*ca987d46SWarner Losh     args[ac] = NULL;
184*ca987d46SWarner Losh     *argc = ac;
185*ca987d46SWarner Losh     *argv = (char **)malloc((sizeof(char *) * ac + 1));
186*ca987d46SWarner Losh     bcopy(args, *argv, sizeof(char *) * ac + 1);
187*ca987d46SWarner Losh     free(buf);
188*ca987d46SWarner Losh     free(copy);
189*ca987d46SWarner Losh     return 0;
190*ca987d46SWarner Losh }
191*ca987d46SWarner Losh 
192*ca987d46SWarner Losh #define MAXARGS	20
193*ca987d46SWarner Losh 
194*ca987d46SWarner Losh /* Clean vector space */
195*ca987d46SWarner Losh static void
196*ca987d46SWarner Losh clean(void)
197*ca987d46SWarner Losh {
198*ca987d46SWarner Losh     int		i;
199*ca987d46SWarner Losh 
200*ca987d46SWarner Losh     for (i = 0; i < MAXARGS; i++) {
201*ca987d46SWarner Losh 	if (args[i] != NULL) {
202*ca987d46SWarner Losh 	    free(args[i]);
203*ca987d46SWarner Losh 	    args[i] = NULL;
204*ca987d46SWarner Losh 	}
205*ca987d46SWarner Losh     }
206*ca987d46SWarner Losh }
207*ca987d46SWarner Losh 
208*ca987d46SWarner Losh static int
209*ca987d46SWarner Losh insert(int *argcp, char *buf)
210*ca987d46SWarner Losh {
211*ca987d46SWarner Losh     if (*argcp >= MAXARGS)
212*ca987d46SWarner Losh 	return 1;
213*ca987d46SWarner Losh     args[(*argcp)++] = strdup(buf);
214*ca987d46SWarner Losh     return 0;
215*ca987d46SWarner Losh }
216*ca987d46SWarner Losh 
217*ca987d46SWarner Losh static char *
218*ca987d46SWarner Losh variable_lookup(char *name)
219*ca987d46SWarner Losh {
220*ca987d46SWarner Losh     /* XXX search "special variable" space first? */
221*ca987d46SWarner Losh     return (char *)getenv(name);
222*ca987d46SWarner Losh }
223