xref: /illumos-gate/usr/src/cmd/expr/expr.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved	*/
23 
24 
25 /*
26  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27  */
28 
29 #include <stdlib.h>
30 #include <regexpr.h>
31 #include <locale.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <regex.h>
35 #include <limits.h>
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <errno.h>
39 
40 #define	A_STRING 258
41 #define	NOARG 259
42 #define	OR 260
43 #define	AND 261
44 #define	EQ 262
45 #define	LT 263
46 #define	GT 264
47 #define	GEQ 265
48 #define	LEQ 266
49 #define	NEQ 267
50 #define	ADD 268
51 #define	SUBT 269
52 #define	MULT 270
53 #define	DIV 271
54 #define	REM 272
55 #define	MCH 273
56 #define	MATCH 274
57 #define	SUBSTR 275
58 #define	LENGTH 276
59 #define	INDEX  277
60 
61 /* size of subexpression array */
62 #define	MSIZE	LINE_MAX
63 #define	error(c)	errxx()
64 #define	EQL(x, y) (strcmp(x, y) == 0)
65 
66 #define	ERROR(c)	errxx()
67 #define	MAX_MATCH 20
68 static int ematch(char *, char *);
69 static void yyerror(char *);
70 static void errxx();
71 static void *exprmalloc(size_t size);
72 
73 static char *ltoa();
74 static char *lltoa();
75 static char	**Av;
76 static char *buf;
77 static int	Ac;
78 static int	Argi;
79 static int noarg;
80 static int paren;
81 /*
82  *	Array used to store subexpressions in regular expressions
83  *	Only one subexpression allowed per regular expression currently
84  */
85 static char Mstring[1][MSIZE];
86 
87 
88 static char *operator[] = {
89 	"|", "&", "+", "-", "*", "/", "%", ":",
90 	"=", "==", "<", "<=", ">", ">=", "!=",
91 	"match",
92 	"substr", "length", "index",
93 	"\0" };
94 static	int op[] = {
95 	OR, AND, ADD,  SUBT, MULT, DIV, REM, MCH,
96 	EQ, EQ, LT, LEQ, GT, GEQ, NEQ,
97 	MATCH,
98 	SUBSTR, LENGTH, INDEX
99 	};
100 static	int pri[] = {
101 	1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7,
102 	7, 7, 7
103 	};
104 
105 
106 /*
107  * clean_buf - XCU4 mod to remove leading zeros from negative signed
108  *		numeric output, e.g., -00001 becomes -1
109  */
110 static void
111 clean_buf(char *buf)
112 {
113 	int i = 0;
114 	int is_a_num = 1;
115 	int len;
116 	long long num;
117 
118 	if (buf[0] == '\0')
119 		return;
120 	len = strlen(buf);
121 	if (len <= 0)
122 		return;
123 
124 	if (buf[0] == '-') {
125 		i++;		/* Skip the leading '-' see while loop */
126 		if (len <= 1)	/* Is it a '-' all by itself? */
127 			return; /* Yes, so return */
128 
129 		while (i < len) {
130 			if (! isdigit(buf[i])) {
131 				is_a_num = 0;
132 				break;
133 			}
134 			i++;
135 		}
136 		if (is_a_num) {
137 			(void) sscanf(buf, "%lld", &num);
138 			(void) sprintf(buf, "%lld", num);
139 		}
140 	}
141 }
142 
143 /*
144  * End XCU4 mods.
145  */
146 
147 static int
148 yylex(void)
149 {
150 	char *p;
151 	int i;
152 
153 	if (Argi >= Ac)
154 		return (NOARG);
155 
156 	p = Av[Argi];
157 
158 	if ((*p == '(' || *p == ')') && p[1] == '\0')
159 		return ((int)*p);
160 	for (i = 0; *operator[i]; ++i)
161 		if (EQL(operator[i], p))
162 			return (op[i]);
163 
164 
165 	return (A_STRING);
166 }
167 
168 static char *
169 rel(int oper, char *r1, char *r2)
170 {
171 	long long i, l1, l2;
172 
173 	if (ematch(r1, "-\\{0,1\\}[0-9]*$") &&
174 	    ematch(r2, "-\\{0,1\\}[0-9]*$")) {
175 		errno = 0;
176 		l1 = strtoll(r1, (char **)NULL, 10);
177 		l2 = strtoll(r2, (char **)NULL, 10);
178 		if (errno) {
179 #ifdef XPG6
180 		/* XPG6: stdout will always contain newline even on error */
181 			(void) write(1, "\n", 1);
182 #endif
183 			if (errno == ERANGE) {
184 				(void) fprintf(stderr, gettext(
185 				    "expr: Integer argument too large\n"));
186 				exit(3);
187 			} else {
188 				perror("expr");
189 				exit(3);
190 			}
191 		}
192 		switch (oper) {
193 		case EQ:
194 			i = (l1 == l2);
195 			break;
196 		case GT:
197 			i = (l1 > l2);
198 			break;
199 		case GEQ:
200 			i = (l1 >= l2);
201 			break;
202 		case LT:
203 			i = (l1 < l2);
204 			break;
205 		case LEQ:
206 			i = (l1 <= l2);
207 			break;
208 		case NEQ:
209 			i = (l1 != l2);
210 			break;
211 		}
212 	}
213 	else
214 	{
215 			i = strcoll(r1, r2);
216 		switch (oper) {
217 		case EQ:
218 			i = i == 0;
219 			break;
220 		case GT:
221 			i = i > 0;
222 			break;
223 		case GEQ:
224 			i = i >= 0;
225 			break;
226 		case LT:
227 			i = i < 0;
228 			break;
229 		case LEQ:
230 			i = i <= 0;
231 			break;
232 		case NEQ:
233 			i = i != 0;
234 			break;
235 		}
236 	}
237 	return (i ? "1": "0");
238 }
239 
240 static char *
241 arith(int oper, char *r1, char *r2)
242 {
243 	long long i1, i2;
244 	char *rv;
245 
246 	if (!(ematch(r1, "-\\{0,1\\}[0-9]*$") &&
247 	    ematch(r2, "-\\{0,1\\}[0-9]*$")))
248 		yyerror("non-numeric argument");
249 	errno = 0;
250 	i1 = strtoll(r1, (char **)NULL, 10);
251 	i2 = strtoll(r2, (char **)NULL, 10);
252 	if (errno) {
253 #ifdef XPG6
254 	/* XPG6: stdout will always contain newline even on error */
255 		(void) write(1, "\n", 1);
256 #endif
257 		if (errno == ERANGE) {
258 			(void) fprintf(stderr, gettext(
259 			    "expr: Integer argument too large\n"));
260 			exit(3);
261 		} else {
262 			perror("expr");
263 			exit(3);
264 		}
265 	}
266 
267 	switch (oper) {
268 	case ADD:
269 		i1 = i1 + i2;
270 		break;
271 	case SUBT:
272 		i1 = i1 - i2;
273 		break;
274 	case MULT:
275 		i1 = i1 * i2;
276 		break;
277 	case DIV:
278 		if (i2 == 0)
279 			yyerror("division by zero");
280 		i1 = i1 / i2;
281 		break;
282 	case REM:
283 		if (i2 == 0)
284 			yyerror("division by zero");
285 		i1 = i1 % i2;
286 		break;
287 	}
288 	rv = exprmalloc(25);
289 	(void) strcpy(rv, lltoa(i1));
290 	return (rv);
291 }
292 
293 static char
294 *conj(int oper, char *r1, char *r2)
295 {
296 	char *rv;
297 
298 	switch (oper) {
299 
300 	case OR:
301 		if (EQL(r1, "0") || EQL(r1, "")) {
302 			if (EQL(r2, "0") || EQL(r2, ""))
303 				rv = "0";
304 			else
305 				rv = r2;
306 		} else
307 			rv = r1;
308 		break;
309 	case AND:
310 		if (EQL(r1, "0") || EQL(r1, ""))
311 			rv = "0";
312 		else if (EQL(r2, "0") || EQL(r2, ""))
313 			rv = "0";
314 		else
315 			rv = r1;
316 		break;
317 	}
318 	return (rv);
319 }
320 
321 char *
322 substr(char *v, char *s, char *w)
323 {
324 	int si, wi;
325 	char *res;
326 
327 	si = atol(s);
328 	wi = atol(w);
329 	while (--si)
330 		if (*v) ++v;
331 
332 	res = v;
333 
334 	while (wi--)
335 		if (*v) ++v;
336 
337 	*v = '\0';
338 	return (res);
339 }
340 
341 char *
342 index(char *s, char *t)
343 {
344 	long i, j;
345 	char *rv;
346 
347 	for (i = 0; s[i]; ++i)
348 		for (j = 0; t[j]; ++j)
349 			if (s[i] == t[j]) {
350 				(void) strcpy(rv = exprmalloc(8), ltoa(++i));
351 				return (rv);
352 			}
353 	return ("0");
354 }
355 
356 char *
357 length(char *s)
358 {
359 	long i = 0;
360 	char *rv;
361 
362 	while (*s++) ++i;
363 
364 	rv = exprmalloc(8);
365 	(void) strcpy(rv, ltoa(i));
366 	return (rv);
367 }
368 
369 static char *
370 match(char *s, char *p)
371 {
372 	char *rv;
373 	long val;			/* XCU4 */
374 
375 	(void) strcpy(rv = exprmalloc(8), ltoa(val = (long)ematch(s, p)));
376 	if (nbra /* && val != 0 */) {
377 		rv = exprmalloc((unsigned)strlen(Mstring[0]) + 1);
378 		(void) strcpy(rv, Mstring[0]);
379 	}
380 	return (rv);
381 }
382 
383 
384 /*
385  * ematch	- XCU4 mods involve calling compile/advance which simulate
386  *		  the obsolete compile/advance functions using regcomp/regexec
387  */
388 static int
389 ematch(char *s, char *p)
390 {
391 	static char *expbuf;
392 	char *nexpbuf;
393 	int num;
394 #ifdef XPG4
395 	int nmatch;		/* number of matched bytes */
396 	char tempbuf[256];
397 	char *tmptr1 = 0;	/* If tempbuf is not large enough */
398 	char *tmptr;
399 	int nmbchars;		/* number characters in multibyte string */
400 #endif
401 
402 	nexpbuf = compile(p, (char *)0, (char *)0);	/* XCU4 regex mod */
403 	if (0 /* XXX nbra > 1*/)
404 		yyerror("Too many '\\('s");
405 	if (regerrno) {
406 		if (regerrno != 41 || expbuf == NULL)
407 			errxx();
408 	} else {
409 		if (expbuf)
410 			free(expbuf);
411 		expbuf = nexpbuf;
412 	}
413 	if (advance(s, expbuf)) {
414 		if (nbra > 0) {
415 			p = braslist[0];
416 			num = braelist[0] - p;
417 			if ((num > MSIZE - 1) || (num < 0))
418 				yyerror("string too long");
419 			(void) strncpy(Mstring[0], p, num);
420 			Mstring[0][num] = '\0';
421 		}
422 #ifdef XPG4
423 		/*
424 		 *  Use mbstowcs to find the number of multibyte characters
425 		 *  in the multibyte string beginning at s, and
426 		 *  ending at loc2.  Create a separate string
427 		 *  of the substring, so it can be passed to mbstowcs.
428 		 */
429 		nmatch = loc2 - s;
430 		if (nmatch > ((sizeof (tempbuf) / sizeof (char)) - 1)) {
431 			tmptr1 = exprmalloc(nmatch + 1);
432 			tmptr = tmptr1;
433 		} else {
434 			tmptr = tempbuf;
435 		}
436 		memcpy(tmptr, s, nmatch);
437 		*(tmptr + nmatch) = '\0';
438 		if ((nmbchars = mbstowcs(NULL, tmptr, 0)) == -1) {
439 			yyerror("invalid multibyte character encountered");
440 			if (tmptr1 != NULL)
441 				free(tmptr1);
442 			return (0);
443 		}
444 		if (tmptr1 != NULL)
445 			free(tmptr1);
446 		return (nmbchars);
447 #else
448 		return (loc2-s);
449 #endif
450 	}
451 	return (0);
452 }
453 
454 static void
455 errxx()
456 {
457 	yyerror("RE error");
458 }
459 
460 static void
461 yyerror(char *s)
462 {
463 #ifdef XPG6
464 	/* XPG6: stdout will always contain newline even on error */
465 	(void) write(1, "\n", 1);
466 #endif
467 	(void) write(2, "expr: ", 6);
468 	(void) write(2, gettext(s), (unsigned)strlen(gettext(s)));
469 	(void) write(2, "\n", 1);
470 	exit(2);
471 	/* NOTREACHED */
472 }
473 
474 static char *
475 ltoa(long l)
476 {
477 	static char str[20];
478 	char *sp = &str[18];	/* u370 */
479 	int i;
480 	int neg = 0;
481 
482 	if ((unsigned long)l == 0x80000000UL)
483 		return ("-2147483648");
484 	if (l < 0)
485 		++neg, l = -l;
486 	str[19] = '\0';
487 	do {
488 		i = l % 10;
489 		*sp-- = '0' + i;
490 		l /= 10;
491 	} while (l);
492 	if (neg)
493 		*sp-- = '-';
494 	return (++sp);
495 }
496 
497 static char *
498 lltoa(long long l)
499 {
500 	static char str[25];
501 	char *sp = &str[23];
502 	int i;
503 	int neg = 0;
504 
505 	if (l == 0x8000000000000000ULL)
506 		return ("-9223372036854775808");
507 	if (l < 0)
508 		++neg, l = -l;
509 	str[24] = '\0';
510 	do {
511 		i = l % 10;
512 		*sp-- = '0' + i;
513 		l /= 10;
514 	} while (l);
515 	if (neg)
516 		*sp-- = '-';
517 	return (++sp);
518 }
519 
520 static char *
521 expres(int prior, int par)
522 {
523 	int ylex, temp, op1;
524 	char *r1, *ra, *rb, *rc;
525 	ylex = yylex();
526 	if (ylex >= NOARG && ylex < MATCH) {
527 		yyerror("syntax error");
528 	}
529 	if (ylex == A_STRING) {
530 		r1 = Av[Argi++];
531 		temp = Argi;
532 	} else {
533 		if (ylex == '(') {
534 			paren++;
535 			Argi++;
536 			r1 = expres(0, Argi);
537 			Argi--;
538 		}
539 	}
540 lop:
541 	ylex = yylex();
542 	if (ylex > NOARG && ylex < MATCH) {
543 		op1 = ylex;
544 		Argi++;
545 		if (pri[op1-OR] <= prior)
546 			return (r1);
547 		else {
548 			switch (op1) {
549 			case OR:
550 			case AND:
551 				r1 = conj(op1, r1, expres(pri[op1-OR], 0));
552 				break;
553 			case EQ:
554 			case LT:
555 			case GT:
556 			case LEQ:
557 			case GEQ:
558 			case NEQ:
559 				r1 = rel(op1, r1, expres(pri[op1-OR], 0));
560 				break;
561 			case ADD:
562 			case SUBT:
563 			case MULT:
564 			case DIV:
565 			case REM:
566 				r1 = arith(op1, r1, expres(pri[op1-OR], 0));
567 				break;
568 			case MCH:
569 				r1 = match(r1, expres(pri[op1-OR], 0));
570 				break;
571 			}
572 			if (noarg == 1) {
573 				return (r1);
574 			}
575 			Argi--;
576 			goto lop;
577 		}
578 	}
579 	ylex = yylex();
580 	if (ylex == ')') {
581 		if (par == Argi) {
582 			yyerror("syntax error");
583 		}
584 		if (par != 0) {
585 			paren--;
586 			Argi++;
587 		}
588 		Argi++;
589 		return (r1);
590 	}
591 	ylex = yylex();
592 	if (ylex > MCH && ylex <= INDEX) {
593 		if (Argi == temp) {
594 			return (r1);
595 		}
596 		op1 = ylex;
597 		Argi++;
598 		switch (op1) {
599 		case MATCH:
600 			rb = expres(pri[op1-OR], 0);
601 			ra = expres(pri[op1-OR], 0);
602 			break;
603 		case SUBSTR:
604 			rc = expres(pri[op1-OR], 0);
605 			rb = expres(pri[op1-OR], 0);
606 			ra = expres(pri[op1-OR], 0);
607 			break;
608 		case LENGTH:
609 			ra = expres(pri[op1-OR], 0);
610 			break;
611 		case INDEX:
612 			rb = expres(pri[op1-OR], 0);
613 			ra = expres(pri[op1-OR], 0);
614 			break;
615 		}
616 		switch (op1) {
617 		case MATCH:
618 			r1 = match(rb, ra);
619 			break;
620 		case SUBSTR:
621 			r1 = substr(rc, rb, ra);
622 			break;
623 		case LENGTH:
624 			r1 = length(ra);
625 			break;
626 		case INDEX:
627 			r1 = index(rb, ra);
628 			break;
629 		}
630 		if (noarg == 1) {
631 			return (r1);
632 		}
633 		Argi--;
634 		goto lop;
635 	}
636 	ylex = yylex();
637 	if (ylex == NOARG) {
638 		noarg = 1;
639 	}
640 	return (r1);
641 }
642 
643 void *
644 exprmalloc(size_t size)
645 {
646 	void *rv;
647 
648 	if ((rv = malloc(size)) == NULL) {
649 		char *s = gettext("malloc error");
650 
651 		(void) write(2, "expr: ", 6);
652 		(void) write(2, s, (unsigned)strlen(s));
653 		(void) write(2, "\n", 1);
654 		exit(3);
655 	}
656 	return (rv);
657 }
658 
659 int
660 main(int argc, char **argv)
661 {
662 	/*
663 	 * XCU4 allow "--" as argument
664 	 */
665 	if (argc > 1 && strcmp(argv[1], "--") == 0)
666 		argv++, argc--;
667 	/*
668 	 * XCU4 - print usage message when invoked without args
669 	 */
670 	if (argc < 2) {
671 #ifdef XPG6
672 	/* XPG6: stdout will always contain newline even on error */
673 		(void) write(1, "\n", 1);
674 #endif
675 		(void) fprintf(stderr, gettext("Usage: expr expression\n"));
676 		exit(3);
677 	}
678 	Ac = argc;
679 	Argi = 1;
680 	noarg = 0;
681 	paren = 0;
682 	Av = argv;
683 
684 	(void) setlocale(LC_ALL, "");
685 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
686 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
687 #endif
688 	(void) textdomain(TEXT_DOMAIN);
689 	buf = expres(0, 1);
690 	if (Ac != Argi || paren != 0) {
691 		yyerror("syntax error");
692 	}
693 	/*
694 	 * XCU4 - strip leading zeros from numeric output
695 	 */
696 	clean_buf(buf);
697 	(void) write(1, buf, (unsigned)strlen(buf));
698 	(void) write(1, "\n", 1);
699 	return ((strcmp(buf, "0") == 0 || buf[0] == 0) ? 1 : 0);
700 }
701