1ca987d46SWarner Losh /*- 2ca987d46SWarner Losh * Redistribution and use in source and binary forms, with or without 3ca987d46SWarner Losh * modification, are permitted provided that the following conditions 4ca987d46SWarner Losh * are met: 5ca987d46SWarner Losh * 1. Redistributions of source code must retain the above copyright 6ca987d46SWarner Losh * notice, this list of conditions and the following disclaimer. 7ca987d46SWarner Losh * 2. Redistributions in binary form must reproduce the above copyright 8ca987d46SWarner Losh * notice, this list of conditions and the following disclaimer in the 9ca987d46SWarner Losh * documentation and/or other materials provided with the distribution. 10ca987d46SWarner Losh * 11ca987d46SWarner Losh * Jordan K. Hubbard 12ca987d46SWarner Losh * 29 August 1998 13ca987d46SWarner Losh * 14ca987d46SWarner Losh * The meat of the simple parser. 15ca987d46SWarner Losh */ 16ca987d46SWarner Losh 17ca987d46SWarner Losh #include <sys/cdefs.h> 18ca987d46SWarner Losh __FBSDID("$FreeBSD$"); 19ca987d46SWarner Losh 20ca987d46SWarner Losh #include <stand.h> 21ca987d46SWarner Losh #include <string.h> 22ca987d46SWarner Losh #include "bootstrap.h" 23ca987d46SWarner Losh 24ca987d46SWarner Losh static void clean(void); 25ca987d46SWarner Losh static int insert(int *argcp, char *buf); 26ca987d46SWarner Losh static char *variable_lookup(char *name); 27ca987d46SWarner Losh 28ca987d46SWarner Losh #define PARSE_BUFSIZE 1024 /* maximum size of one element */ 29ca987d46SWarner Losh #define MAXARGS 20 /* maximum number of elements */ 30ca987d46SWarner Losh static char *args[MAXARGS]; 31ca987d46SWarner Losh 32ca987d46SWarner Losh /* 33ca987d46SWarner Losh * parse: accept a string of input and "parse" it for backslash 34ca987d46SWarner Losh * substitutions and environment variable expansions (${var}), 35ca987d46SWarner Losh * returning an argc/argv style vector of whitespace separated 36ca987d46SWarner Losh * arguments. Returns 0 on success, 1 on failure (ok, ok, so I 37ca987d46SWarner Losh * wimped-out on the error codes! :). 38ca987d46SWarner Losh * 39ca987d46SWarner Losh * Note that the argv array returned must be freed by the caller, but 40ca987d46SWarner Losh * we own the space allocated for arguments and will free that on next 41ca987d46SWarner Losh * invocation. This allows argv consumers to modify the array if 42ca987d46SWarner Losh * required. 43ca987d46SWarner Losh * 44ca987d46SWarner Losh * NB: environment variables that expand to more than one whitespace 45ca987d46SWarner Losh * separated token will be returned as a single argv[] element, not 46ca987d46SWarner Losh * split in turn. Expanded text is also immune to further backslash 47ca987d46SWarner Losh * elimination or expansion since this is a one-pass, non-recursive 48ca987d46SWarner Losh * parser. You didn't specify more than this so if you want more, ask 49ca987d46SWarner Losh * me. - jkh 50ca987d46SWarner Losh */ 51ca987d46SWarner Losh 52ca987d46SWarner Losh #define PARSE_FAIL(expr) \ 53ca987d46SWarner Losh if (expr) { \ 54ca987d46SWarner Losh printf("fail at line %d\n", __LINE__); \ 55ca987d46SWarner Losh clean(); \ 56ca987d46SWarner Losh free(copy); \ 57ca987d46SWarner Losh free(buf); \ 58ca987d46SWarner Losh return 1; \ 59ca987d46SWarner Losh } 60ca987d46SWarner Losh 61ca987d46SWarner Losh /* Accept the usual delimiters for a variable, returning counterpart */ 62ca987d46SWarner Losh static char 63ca987d46SWarner Losh isdelim(int ch) 64ca987d46SWarner Losh { 65*16bb6523SWarner Losh 66ca987d46SWarner Losh if (ch == '{') 67ca987d46SWarner Losh return '}'; 68ca987d46SWarner Losh else if (ch == '(') 69ca987d46SWarner Losh return ')'; 70ca987d46SWarner Losh return '\0'; 71ca987d46SWarner Losh } 72ca987d46SWarner Losh 73ca987d46SWarner Losh static int 74ca987d46SWarner Losh isquote(int ch) 75ca987d46SWarner Losh { 76*16bb6523SWarner Losh 77ca987d46SWarner Losh return (ch == '\''); 78ca987d46SWarner Losh } 79ca987d46SWarner Losh 80ca987d46SWarner Losh static int 81ca987d46SWarner Losh isdquote(int ch) 82ca987d46SWarner Losh { 83*16bb6523SWarner Losh 84ca987d46SWarner Losh return (ch == '"'); 85ca987d46SWarner Losh } 86ca987d46SWarner Losh 87ca987d46SWarner Losh int 88bd04a914SWarner Losh parse(int *argc, char ***argv, const char *str) 89ca987d46SWarner Losh { 90ca987d46SWarner Losh int ac; 91ca987d46SWarner Losh char *val, *p, *q, *copy = NULL; 92ca987d46SWarner Losh size_t i = 0; 93ca987d46SWarner Losh char token, tmp, quote, dquote, *buf; 94ca987d46SWarner Losh enum { STR, VAR, WHITE } state; 95ca987d46SWarner Losh 96ca987d46SWarner Losh ac = *argc = 0; 97ca987d46SWarner Losh dquote = quote = 0; 98ca987d46SWarner Losh if (!str || (p = copy = backslash(str)) == NULL) 99ca987d46SWarner Losh return 1; 100ca987d46SWarner Losh 101ca987d46SWarner Losh /* Initialize vector and state */ 102ca987d46SWarner Losh clean(); 103ca987d46SWarner Losh state = STR; 104ca987d46SWarner Losh buf = (char *)malloc(PARSE_BUFSIZE); 105ca987d46SWarner Losh token = 0; 106ca987d46SWarner Losh 107ca987d46SWarner Losh /* And awaaaaaaaaay we go! */ 108ca987d46SWarner Losh while (*p) { 109ca987d46SWarner Losh switch (state) { 110ca987d46SWarner Losh case STR: 111ca987d46SWarner Losh if ((*p == '\\') && p[1]) { 112ca987d46SWarner Losh p++; 113ca987d46SWarner Losh PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); 114ca987d46SWarner Losh buf[i++] = *p++; 115ca987d46SWarner Losh } else if (isquote(*p)) { 116ca987d46SWarner Losh quote = quote ? 0 : *p; 117ca987d46SWarner Losh if (dquote) { /* keep quote */ 118ca987d46SWarner Losh PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); 119ca987d46SWarner Losh buf[i++] = *p++; 120ca987d46SWarner Losh } else 121ca987d46SWarner Losh ++p; 122ca987d46SWarner Losh } else if (isdquote(*p)) { 123ca987d46SWarner Losh dquote = dquote ? 0 : *p; 124ca987d46SWarner Losh if (quote) { /* keep dquote */ 125ca987d46SWarner Losh PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); 126ca987d46SWarner Losh buf[i++] = *p++; 127ca987d46SWarner Losh } else 128ca987d46SWarner Losh ++p; 129ca987d46SWarner Losh } else if (isspace(*p) && !quote && !dquote) { 130ca987d46SWarner Losh state = WHITE; 131ca987d46SWarner Losh if (i) { 132ca987d46SWarner Losh buf[i] = '\0'; 133ca987d46SWarner Losh PARSE_FAIL(insert(&ac, buf)); 134ca987d46SWarner Losh i = 0; 135ca987d46SWarner Losh } 136ca987d46SWarner Losh ++p; 137ca987d46SWarner Losh } else if (*p == '$' && !quote) { 138ca987d46SWarner Losh token = isdelim(*(p + 1)); 139ca987d46SWarner Losh if (token) 140ca987d46SWarner Losh p += 2; 141ca987d46SWarner Losh else 142ca987d46SWarner Losh ++p; 143ca987d46SWarner Losh state = VAR; 144ca987d46SWarner Losh } else { 145ca987d46SWarner Losh PARSE_FAIL(i == (PARSE_BUFSIZE - 1)); 146ca987d46SWarner Losh buf[i++] = *p++; 147ca987d46SWarner Losh } 148ca987d46SWarner Losh break; 149ca987d46SWarner Losh 150ca987d46SWarner Losh case WHITE: 151ca987d46SWarner Losh if (isspace(*p)) 152ca987d46SWarner Losh ++p; 153ca987d46SWarner Losh else 154ca987d46SWarner Losh state = STR; 155ca987d46SWarner Losh break; 156ca987d46SWarner Losh 157ca987d46SWarner Losh case VAR: 158ca987d46SWarner Losh if (token) { 159ca987d46SWarner Losh PARSE_FAIL((q = strchr(p, token)) == NULL); 160ca987d46SWarner Losh } else { 161ca987d46SWarner Losh q = p; 162ca987d46SWarner Losh while (*q && !isspace(*q)) 163ca987d46SWarner Losh ++q; 164ca987d46SWarner Losh } 165ca987d46SWarner Losh tmp = *q; 166ca987d46SWarner Losh *q = '\0'; 167ca987d46SWarner Losh if ((val = variable_lookup(p)) != NULL) { 168ca987d46SWarner Losh size_t len = strlen(val); 169ca987d46SWarner Losh 170ca987d46SWarner Losh strncpy(buf + i, val, PARSE_BUFSIZE - (i + 1)); 171ca987d46SWarner Losh i += min(len, PARSE_BUFSIZE - 1); 172ca987d46SWarner Losh } 173ca987d46SWarner Losh *q = tmp; /* restore value */ 174ca987d46SWarner Losh p = q + (token ? 1 : 0); 175ca987d46SWarner Losh state = STR; 176ca987d46SWarner Losh break; 177ca987d46SWarner Losh } 178ca987d46SWarner Losh } 179ca987d46SWarner Losh /* missing terminating ' or " */ 180ca987d46SWarner Losh PARSE_FAIL(quote || dquote); 181ca987d46SWarner Losh /* If at end of token, add it */ 182ca987d46SWarner Losh if (i && state == STR) { 183ca987d46SWarner Losh buf[i] = '\0'; 184ca987d46SWarner Losh PARSE_FAIL(insert(&ac, buf)); 185ca987d46SWarner Losh } 186ca987d46SWarner Losh args[ac] = NULL; 187ca987d46SWarner Losh *argc = ac; 188ca987d46SWarner Losh *argv = (char **)malloc((sizeof(char *) * ac + 1)); 189ca987d46SWarner Losh bcopy(args, *argv, sizeof(char *) * ac + 1); 190ca987d46SWarner Losh free(buf); 191ca987d46SWarner Losh free(copy); 192ca987d46SWarner Losh return 0; 193ca987d46SWarner Losh } 194ca987d46SWarner Losh 195ca987d46SWarner Losh #define MAXARGS 20 196ca987d46SWarner Losh 197ca987d46SWarner Losh /* Clean vector space */ 198ca987d46SWarner Losh static void 199ca987d46SWarner Losh clean(void) 200ca987d46SWarner Losh { 201ca987d46SWarner Losh int i; 202ca987d46SWarner Losh 203ca987d46SWarner Losh for (i = 0; i < MAXARGS; i++) { 204ca987d46SWarner Losh if (args[i] != NULL) { 205ca987d46SWarner Losh free(args[i]); 206ca987d46SWarner Losh args[i] = NULL; 207ca987d46SWarner Losh } 208ca987d46SWarner Losh } 209ca987d46SWarner Losh } 210ca987d46SWarner Losh 211ca987d46SWarner Losh static int 212ca987d46SWarner Losh insert(int *argcp, char *buf) 213ca987d46SWarner Losh { 214*16bb6523SWarner Losh 215ca987d46SWarner Losh if (*argcp >= MAXARGS) 216ca987d46SWarner Losh return 1; 217ca987d46SWarner Losh args[(*argcp)++] = strdup(buf); 218ca987d46SWarner Losh return 0; 219ca987d46SWarner Losh } 220ca987d46SWarner Losh 221ca987d46SWarner Losh static char * 222ca987d46SWarner Losh variable_lookup(char *name) 223ca987d46SWarner Losh { 224*16bb6523SWarner Losh 225ca987d46SWarner Losh /* XXX search "special variable" space first? */ 226ca987d46SWarner Losh return (char *)getenv(name); 227ca987d46SWarner Losh } 228