xref: /titanic_50/usr/src/lib/libcmd/common/expr.c (revision 3db3491215579980a91e230cf21b20608fbb8259)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1992-2008 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 
23 /*
24  * expr.c
25  * Written by David Korn
26  * Tue Oct 31 08:48:11 EST 1995
27  */
28 
29 static const char usage[] =
30 "[-?\n@(#)$Id: expr (AT&T Research) 2008-01-30 $\n]"
31 USAGE_LICENSE
32 "[+NAME?expr - evaluate arguments as an expression]"
33 "[+DESCRIPTION?\bexpr\b evaluates an expression given as arguments and writes "
34 	"the result to standard output.  The character \b0\b will be written "
35 	"to indicate a zero value and nothing will be written to indicate an "
36 	"empty string.]"
37 "[+?Most of the functionality of \bexpr\b is provided in a more natural "
38 	"way by the shell, \bsh\b(1), and \bexpr\b is provided primarily "
39 	"for backward compatibility.]"
40 "[+?Terms of the expression must be separate arguments.  A string argument is "
41 	"one that can not be identified as an integer.  Integer-valued "
42 	"arguments may be preceded by a unary plus or minus sign.  Because "
43 	"many of the operators use characters that have special meaning to "
44 	"the shell, they must be quoted when entered from the shell.]"
45 
46 "[+?Expressions are formed from the operators listed below in order "
47 	"of increasing precedence within groups.  All of the operators are "
48 	"left associative. The symbols \aexpr1\a and \aexpr2\a represent "
49 	"expressions formed from strings and integers and the following "
50 	"operators:]{"
51 	"[+\aexpr1\a \b|\b \aexpr2\a?Returns the evaluation of \aexpr1\a if "
52 	"it is neither null nor 0, otherwise returns the evaluation of expr2.]"
53 
54 	"[+\aexpr1\a \b&\b \aexpr2\a?Returns the evaluation of \aexpr1\a if "
55 	"neither expression evaluates to null or 0, otherwise returns 0.]"
56 
57 	"[+\aexpr1\a \aop\a \aexpr2\a?Returns the result of a decimal integer "
58 	"comparison if both arguments are integers; otherwise, returns the "
59 	"result of a string comparison using the locale-specific collation "
60 	"sequence. The result of each comparison will be 1 if the specified "
61 	"relationship is true, or 0 if the relationship is false.  \aop\a "
62 	"can be one of the following:]{"
63 		"[+=?Equal.]"
64 		"[+==?Equal.]"
65 		"[+>?Greater than.]"
66 		"[+>=?Greater than or equal to.]"
67 		"[+<?Less than.]"
68 		"[+<=?Less than or equal to.]"
69 		"[+!=?Not equal to.]"
70 		"}"
71 
72 	"[+\aexpr1\a \aop\a \aexpr2\a?Where \aop\a is \b+\b or \b-\b; "
73 		"addition or subtraction of decimal integer-valued arguments.]"
74 	"[+\aexpr1\a \aop\a \aexpr2\a?Where \aop\a is \b*\b, \b/\b or \b%\b; "
75 		"multiplication, division, or remainder of the	decimal	"
76 		"integer-valued arguments.]"
77 	"[+\aexpr1\a \b::\b \aexpr2\a?The matching operator : compares "
78 		"\aexpr1\a with \aexpr2\a, which must be a BRE.  Normally, "
79 		"the matching operator returns the number of bytes matched "
80 		"and 0 on failure.  However, if the pattern contains at "
81 		"least one sub-expression [\\( . . .\\)]], the string "
82 		"corresponding to \\1 will be returned.]"
83 	"[+( \aexpr1\a )?Grouping symbols.  An expression can "
84 		"be placed within parenthesis to change precedence.]"
85 	"[+match\b \astring\a \aexpr\a?Equivalent to \astring\a \b:\b "
86 		"\aexpr\a.]"
87 	"[+substr\b \astring\a \apos\a \alength\a?\alength\a character "
88 		"substring of \astring\a starting at \apos\a "
89 		"(counting from 1).]"
90 	"[+index\b \astring\a \achars\a?The position in \astring\a "
91 		"(counting from 1) of the leftmost occurrence of any "
92 		"character in \achars\a.]"
93 	"[+length\b \astring\a?The number of characters in \astring\a.]"
94 	"[+quote\b \atoken\a?Treat \atoken\a as a string operand.]"
95 	"}"
96 "[+?For backwards compatibility, unrecognized options beginning with "
97 	"a \b-\b will be treated as operands.  Portable applications "
98 	"should use \b--\b to indicate end of options.]"
99 
100 "\n"
101 "\n operand ...\n"
102 "\n"
103 
104 "[+EXIT STATUS?]{"
105 	"[+0?The expression is neither null nor	0.]"
106 	"[+1?The expression is null or 0.]"
107 	"[+2?Invalid expressions.]"
108 	"[+>2?An error occurred.]"
109 	"}"
110 "[+SEE ALSO?\bregcomp\b(5), \bgrep\b(1), \bsh\b(1)]"
111 ;
112 
113 #include	<cmd.h>
114 #include	<regex.h>
115 
116 #define T_ADD	0x100
117 #define T_MULT	0x200
118 #define T_CMP	0x400
119 #define T_FUN	0x800
120 #define T_OP	7
121 #define T_NUM	1
122 #define T_STR	2
123 
124 #define OP_EQ		(T_CMP|0)
125 #define OP_GT		(T_CMP|1)
126 #define OP_LT		(T_CMP|2)
127 #define OP_GE		(T_CMP|3)
128 #define OP_LE		(T_CMP|4)
129 #define OP_NE		(T_CMP|5)
130 #define OP_PLUS		(T_ADD|0)
131 #define OP_MINUS	(T_ADD|1)
132 #define OP_MULT		(T_MULT|0)
133 #define OP_DIV		(T_MULT|1)
134 #define OP_MOD		(T_MULT|2)
135 #define OP_INDEX	(T_FUN|0)
136 #define OP_LENGTH	(T_FUN|1)
137 #define OP_MATCH	(T_FUN|2)
138 #define OP_QUOTE	(T_FUN|3)
139 #define OP_SUBSTR	(T_FUN|4)
140 
141 #define numeric(np)	((np)->type&T_NUM)
142 
143 static const struct Optable_s
144 {
145 	const char	opname[3];
146 	int		op;
147 }
148 optable[] =
149 {
150 	"|",	'|',
151 	"&",	'&',
152 	"=",	OP_EQ,
153 	"==",	OP_EQ,
154 	">",	OP_GT,
155 	"<",	OP_LT,
156 	">=",	OP_GE,
157 	"<=",	OP_LE,
158 	"!=",	OP_NE,
159 	"+",	OP_PLUS,
160 	"-",	OP_MINUS,
161 	"*",	OP_MULT,
162 	"/",	OP_DIV,
163 	"%",	OP_MOD,
164 	":",	':',
165 	"(",	'(',
166 	")",	')'
167 };
168 
169 typedef struct Node_s
170 {
171 	int	type;
172 	long	num;
173 	char	*str;
174 } Node_t;
175 
176 typedef struct State_s
177 {
178 	int	standard;
179 	char**	arglist;
180 	char	buf[36];
181 } State_t;
182 
183 static int expr_or(State_t*, Node_t*);
184 
185 static int getnode(State_t* state, Node_t *np)
186 {
187 	register char*	sp;
188 	register char*	cp;
189 	register int	i;
190 	register int	j;
191 	register int	k;
192 	register int	tok;
193 	char*		ep;
194 
195 	if (!(cp = *state->arglist++))
196 		error(ERROR_exit(2), "argument expected");
197 	if (!state->standard)
198 		switch (cp[0])
199 		{
200 		case 'i':
201 			if (cp[1] == 'n' && !strcmp(cp, "index"))
202 			{
203 				if (!(cp = *state->arglist++))
204 					error(ERROR_exit(2), "string argument expected");
205 				if (!(ep = *state->arglist++))
206 					error(ERROR_exit(2), "chars argument expected");
207 				np->num = (ep = strpbrk(cp, ep)) ? (ep - cp + 1) : 0;
208 				np->type = T_NUM;
209 				goto next;
210 			}
211 			break;
212 		case 'l':
213 			if (cp[1] == 'e' && !strcmp(cp, "length"))
214 			{
215 				if (!(cp = *state->arglist++))
216 					error(ERROR_exit(2), "string argument expected");
217 				np->num = strlen(cp);
218 				np->type = T_NUM;
219 				goto next;
220 			}
221 			break;
222 		case 'm':
223 			if (cp[1] == 'a' && !strcmp(cp, "match"))
224 			{
225 				if (!(np->str = *state->arglist++))
226 					error(ERROR_exit(2), "pattern argument expected");
227 				np->type = T_STR;
228 				return ':';
229 			}
230 			break;
231 		case 'q':
232 			if (cp[1] == 'u' && !strcmp(cp, "quote") && !(cp = *state->arglist++))
233 				error(ERROR_exit(2), "string argument expected");
234 			break;
235 		case 's':
236 			if (cp[1] == 'u' && !strcmp(cp, "substr"))
237 			{
238 				if (!(sp = *state->arglist++))
239 					error(ERROR_exit(2), "string argument expected");
240 				if (!(cp = *state->arglist++))
241 					error(ERROR_exit(2), "position argument expected");
242 				i = strtol(cp, &ep, 10);
243 				if (*ep || --i < 0)
244 					i = -1;
245 				if (!(cp = *state->arglist++))
246 					error(ERROR_exit(2), "length argument expected");
247 				j = strtol(cp, &ep, 10);
248 				if (*ep)
249 					j = -1;
250 				k = strlen(sp);
251 				if (i < 0 || i >= k || j < 0)
252 					sp = "";
253 				else
254 				{
255 					sp += i;
256 					k -= i;
257 					if (j < k)
258 						sp[j] = 0;
259 				}
260 				np->type = T_STR;
261 				np->str = sp;
262 				goto next;
263 			}
264 			break;
265 		}
266 	if (*cp=='(' && cp[1]==0)
267 	{
268 		tok = expr_or(state, np);
269 		if (tok != ')')
270 			error(ERROR_exit(2),"closing parenthesis missing");
271 	}
272 	else
273 	{
274 		np->type = T_STR;
275 		np->str = cp;
276 		if (*cp)
277 		{
278 			np->num = strtol(np->str,&ep,10);
279 			if (!*ep)
280 				np->type |= T_NUM;
281 		}
282 	}
283  next:
284 	if (!(cp = *state->arglist))
285 		return 0;
286 	state->arglist++;
287 	for (i=0; i < sizeof(optable)/sizeof(*optable); i++)
288 		if (*cp==optable[i].opname[0] && cp[1]==optable[i].opname[1])
289 			return optable[i].op;
290 	error(ERROR_exit(2),"%s: unknown operator argument",cp);
291 	return 0;
292 }
293 
294 static int expr_cond(State_t* state, Node_t *np)
295 {
296 	register int	tok = getnode(state, np);
297 
298 	while (tok==':')
299 	{
300 		regex_t re;
301 		regmatch_t match[2];
302 		int n;
303 		Node_t rp;
304 		char *cp;
305 		tok = getnode(state, &rp);
306 		if (np->type&T_STR)
307 			cp = np->str;
308 		else
309 			sfsprintf(cp=state->buf,sizeof(state->buf),"%d",np->num);
310 		np->num = 0;
311 		np->type = T_NUM;
312 		if (n = regcomp(&re, rp.str, REG_LEFT|REG_LENIENT))
313 			regfatal(&re, ERROR_exit(2), n);
314 		if (!(n = regexec(&re, cp, elementsof(match), match, 0)))
315 		{
316 			if (re.re_nsub > 0)
317 			{
318 				np->type = T_STR;
319 				if (match[1].rm_so >= 0)
320 				{
321 					np->str = cp + match[1].rm_so;
322 					np->str[match[1].rm_eo - match[1].rm_so] = 0;
323 					np->num = strtol(np->str,&cp,10);
324 					if (cp!=np->str && *cp==0)
325 						np->type |= T_NUM;
326 				}
327 				else
328 					np->str = "";
329 			}
330 			else
331 				np->num = match[0].rm_eo - match[0].rm_so;
332 		}
333 		else if (n != REG_NOMATCH)
334 			regfatal(&re, ERROR_exit(2), n);
335 		else if (re.re_nsub)
336 		{
337 			np->str = "";
338 			np->type = T_STR;
339 		}
340 		regfree(&re);
341 	}
342 	return tok;
343 }
344 
345 static int expr_mult(State_t* state, Node_t *np)
346 {
347 	register int	tok = expr_cond(state, np);
348 
349 	while ((tok&~T_OP)==T_MULT)
350 	{
351 		Node_t rp;
352 		int op = (tok&T_OP);
353 		tok = expr_cond(state, &rp);
354 		if (!numeric(np) || !numeric(&rp))
355 			error(ERROR_exit(2),"non-numeric argument");
356 		if (op && rp.num==0)
357 			error(ERROR_exit(2),"division by zero");
358 		switch(op)
359 		{
360 		    case 0:
361 			np->num *= rp.num;
362 			break;
363 		    case 1:
364 			np->num /= rp.num;
365 			break;
366 		    case 2:
367 			np->num %= rp.num;
368 		}
369 		np->type = T_NUM;
370 	}
371 	return tok;
372 }
373 
374 static int expr_add(State_t* state, Node_t *np)
375 {
376 	register int	tok = expr_mult(state, np);
377 
378 	while ((tok&~T_OP)==T_ADD)
379 	{
380 		Node_t rp;
381 		int op = (tok&T_OP);
382 		tok = expr_mult(state, &rp);
383 		if (!numeric(np) || !numeric(&rp))
384 			error(ERROR_exit(2),"non-numeric argument");
385 		if (op)
386 			np->num -= rp.num;
387 		else
388 			np->num += rp.num;
389 		np->type = T_NUM;
390 	}
391 	return tok;
392 }
393 
394 static int expr_cmp(State_t* state, Node_t *np)
395 {
396 	register int	tok = expr_add(state, np);
397 
398 	while ((tok&~T_OP)==T_CMP)
399 	{
400 		Node_t rp;
401 		register char *left,*right;
402 		char buff1[36],buff2[36];
403 		int op = (tok&T_OP);
404 		tok = expr_add(state, &rp);
405 		if (numeric(&rp) && numeric(np))
406 			op |= 010;
407 		else
408 		{
409 			if (np->type&T_STR)
410 				left = np->str;
411 			else
412 				sfsprintf(left=buff1,sizeof(buff1),"%d",np->num);
413 			if (rp.type&T_STR)
414 				right = rp.str;
415 			else
416 				sfsprintf(right=buff2,sizeof(buff2),"%d",rp.num);
417 		}
418 		switch(op)
419 		{
420 		    case 0:
421 			np->num = streq(left,right);
422 			break;
423 		    case 1:
424 			np->num = (strcoll(left,right)>0);
425 			break;
426 		    case 2:
427 			np->num = (strcoll(left,right)<0);
428 			break;
429 		    case 3:
430 			np->num = (strcoll(left,right)>=0);
431 			break;
432 		    case 4:
433 			np->num = (strcoll(left,right)<=0);
434 			break;
435 		    case 5:
436 			np->num = !streq(left,right);
437 			break;
438 		    case 010:
439 			np->num = (np->num==rp.num);
440 			break;
441 		    case 011:
442 			np->num = (np->num>rp.num);
443 			break;
444 		    case 012:
445 			np->num = (np->num<rp.num);
446 			break;
447 		    case 013:
448 			np->num = (np->num>=rp.num);
449 			break;
450 		    case 014:
451 			np->num = (np->num<=rp.num);
452 			break;
453 		    case 015:
454 			np->num = (np->num!=rp.num);
455 			break;
456 		}
457 		np->type = T_NUM;
458 	}
459 	return tok;
460 }
461 
462 static int expr_and(State_t* state, Node_t *np)
463 {
464 	register int	tok = expr_cmp(state, np);
465 	while (tok=='&')
466 	{
467 		Node_t rp;
468 		tok = expr_cmp(state, &rp);
469 		if ((numeric(&rp) && rp.num==0) || *rp.str==0)
470 		{
471 			np->num = 0;
472 			np->type=T_NUM;
473 		}
474 	}
475 	return tok;
476 }
477 
478 static int expr_or(State_t* state, Node_t *np)
479 {
480 	register int	tok = expr_and(state, np);
481 	while (tok=='|')
482 	{
483 		Node_t rp;
484 		tok = expr_and(state, &rp);
485 		if ((numeric(np) && np->num==0) || *np->str==0)
486 			*np = rp;
487 	}
488 	return tok;
489 }
490 
491 int
492 b_expr(int argc, char *argv[], void *context)
493 {
494 	State_t	state;
495 	Node_t	node;
496 	int	n;
497 
498 	cmdinit(argc, argv,context, ERROR_CATALOG, 0);
499 	state.standard = !strcmp(astconf("CONFORMANCE", NiL, NiL), "standard");
500 #if 0
501 	if (state.standard)
502 		state.arglist = argv+1;
503 	else
504 #endif
505 	{
506 		while (n=optget(argv, usage))
507 		{
508 			/*
509 			 * NOTE: this loop ignores all but literal -- and -?
510 			 *	 out of kindness for obsolescent usage
511 			 *	 (and is ok with the standard) but strict
512 			 *	 getopt conformance would give usage for all
513 			 *	 unknown - options
514 			 */
515 			if(n=='?')
516 				error(ERROR_usage(2), "%s", opt_info.arg);
517 			if (opt_info.option[1] != '?')
518 				break;
519 			error(ERROR_usage(2), "%s", opt_info.arg);
520 		}
521 		if (error_info.errors)
522 			error(ERROR_usage(2),"%s",optusage((char*)0));
523 		state.arglist = argv+opt_info.index;
524 	}
525 	if (expr_or(&state, &node))
526 		error(ERROR_exit(2),"syntax error");
527 	if (node.type&T_STR)
528 	{
529 		if (*node.str)
530 			sfprintf(sfstdout,"%s\n",node.str);
531 	}
532 	else
533 		sfprintf(sfstdout,"%d\n",node.num);
534 	return numeric(&node)?node.num==0:*node.str==0;
535 }
536