xref: /illumos-gate/usr/src/cmd/expr/expr.c (revision 8b80e8cb6855118d46f605e91b5ed4ce83417395)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdlib.h>
34 #include <regexpr.h>
35 #include <locale.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <regex.h>
39 #include <limits.h>
40 #include <stdio.h>
41 #include <ctype.h>
42 #include <errno.h>
43 
44 #define	A_STRING 258
45 #define	NOARG 259
46 #define	OR 260
47 #define	AND 261
48 #define	EQ 262
49 #define	LT 263
50 #define	GT 264
51 #define	GEQ 265
52 #define	LEQ 266
53 #define	NEQ 267
54 #define	ADD 268
55 #define	SUBT 269
56 #define	MULT 270
57 #define	DIV 271
58 #define	REM 272
59 #define	MCH 273
60 #define	MATCH 274
61 #ifdef  _iBCS2
62 #define	SUBSTR 276
63 #define	LENGTH 277
64 #define	INDEX  278
65 #endif  /* _iBCS2 */
66 
67 /* size of subexpression array */
68 #define	MSIZE	LINE_MAX
69 #define	error(c)	errxx()
70 #define	EQL(x, y) (strcmp(x, y) == 0)
71 
72 #define	ERROR(c)	errxx()
73 #define	MAX_MATCH 20
74 static int ematch(char *, char *);
75 static void yyerror(char *);
76 static void errxx();
77 static void *exprmalloc(size_t size);
78 
79 long atol();
80 char *strcpy(), *strncpy();
81 void exit();
82 
83 static char *ltoa();
84 static char *lltoa();
85 static char	**Av;
86 static char *buf;
87 static int	Ac;
88 static int	Argi;
89 static int noarg;
90 static int paren;
91 #ifdef  _iBCS2
92 char    *sysv3_set;
93 #endif  /* _iBCS2 */
94 /*
95  *	Array used to store subexpressions in regular expressions
96  *	Only one subexpression allowed per regular expression currently
97  */
98 static char Mstring[1][MSIZE];
99 
100 
101 static char *operator[] = {
102 	"|", "&", "+", "-", "*", "/", "%", ":",
103 	"=", "==", "<", "<=", ">", ">=", "!=",
104 	"match",
105 #ifdef	_iBCS2
106 	"substr", "length", "index",
107 #endif	/* _iBCS2 */
108 	"\0" };
109 static	int op[] = {
110 	OR, AND, ADD,  SUBT, MULT, DIV, REM, MCH,
111 	EQ, EQ, LT, LEQ, GT, GEQ, NEQ,
112 	MATCH
113 #ifdef	_iBCS2
114 , SUBSTR, LENGTH, INDEX
115 #endif	/* _iBCS2 */
116 	};
117 static	int pri[] = {
118 	1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7
119 #ifdef	_iBCS2
120 , 7, 7, 7
121 #endif	/* _iBCS2 */
122 	};
123 
124 
125 /*
126  * clean_buf - XCU4 mod to remove leading zeros from negative signed
127  *		numeric output, e.g., -00001 becomes -1
128  */
129 static void
130 clean_buf(buf)
131 	char *buf;
132 {
133 	int i = 0;
134 	int is_a_num = 1;
135 	int len;
136 	long long num;
137 
138 	if (buf[0] == '\0')
139 		return;
140 	len = strlen(buf);
141 	if (len <= 0)
142 		return;
143 
144 	if (buf[0] == '-') {
145 		i++;		/* Skip the leading '-' see while loop */
146 		if (len <= 1)	/* Is it a '-' all by itself? */
147 			return; /* Yes, so return */
148 
149 		while (i < len) {
150 			if (! isdigit(buf[i])) {
151 				is_a_num = 0;
152 				break;
153 			}
154 			i++;
155 		}
156 		if (is_a_num) {
157 			(void) sscanf(buf, "%lld", &num);
158 			(void) sprintf(buf, "%lld", num);
159 		}
160 	}
161 }
162 
163 /*
164  * End XCU4 mods.
165  */
166 
167 static int
168 yylex()
169 {
170 	char *p;
171 	int i;
172 
173 	if (Argi >= Ac)
174 		return (NOARG);
175 
176 	p = Av[Argi];
177 
178 	if ((*p == '(' || *p == ')') && p[1] == '\0')
179 		return ((int)*p);
180 	for (i = 0; *operator[i]; ++i)
181 		if (EQL(operator[i], p))
182 			return (op[i]);
183 
184 
185 	return (A_STRING);
186 }
187 
188 static char
189 *rel(oper, r1, r2) register char *r1, *r2;
190 {
191 	long long i, l1, l2;
192 
193 	if (ematch(r1, "-\\{0,1\\}[0-9]*$") &&
194 	    ematch(r2, "-\\{0,1\\}[0-9]*$")) {
195 		errno = 0;
196 		l1 = strtoll(r1, (char **)NULL, 10);
197 		l2 = strtoll(r2, (char **)NULL, 10);
198 		if (errno) {
199 #ifdef XPG6
200 		/* XPG6: stdout will always contain newline even on error */
201 			(void) write(1, "\n", 1);
202 #endif
203 			if (errno == ERANGE) {
204 				(void) fprintf(stderr, gettext(
205 				    "expr: Integer argument too large\n"));
206 				exit(3);
207 			} else {
208 				perror("expr");
209 				exit(3);
210 			}
211 		}
212 		switch (oper) {
213 		case EQ:
214 			i = (l1 == l2);
215 			break;
216 		case GT:
217 			i = (l1 > l2);
218 			break;
219 		case GEQ:
220 			i = (l1 >= l2);
221 			break;
222 		case LT:
223 			i = (l1 < l2);
224 			break;
225 		case LEQ:
226 			i = (l1 <= l2);
227 			break;
228 		case NEQ:
229 			i = (l1 != l2);
230 			break;
231 		}
232 	}
233 	else
234 	{
235 			i = strcoll(r1, r2);
236 		switch (oper) {
237 		case EQ:
238 			i = i == 0;
239 			break;
240 		case GT:
241 			i = i > 0;
242 			break;
243 		case GEQ:
244 			i = i >= 0;
245 			break;
246 		case LT:
247 			i = i < 0;
248 			break;
249 		case LEQ:
250 			i = i <= 0;
251 			break;
252 		case NEQ:
253 			i = i != 0;
254 			break;
255 		}
256 	}
257 	return (i ? "1": "0");
258 }
259 
260 static char
261 *arith(oper, r1, r2) char *r1, *r2;
262 {
263 	long long i1, i2;
264 	register char *rv;
265 
266 	if (!(ematch(r1, "-\\{0,1\\}[0-9]*$") &&
267 	    ematch(r2, "-\\{0,1\\}[0-9]*$")))
268 		yyerror("non-numeric argument");
269 	errno = 0;
270 	i1 = strtoll(r1, (char **)NULL, 10);
271 	i2 = strtoll(r2, (char **)NULL, 10);
272 	if (errno) {
273 #ifdef XPG6
274 	/* XPG6: stdout will always contain newline even on error */
275 		(void) write(1, "\n", 1);
276 #endif
277 		if (errno == ERANGE) {
278 			(void) fprintf(stderr, gettext(
279 			    "expr: Integer argument too large\n"));
280 			exit(3);
281 		} else {
282 			perror("expr");
283 			exit(3);
284 		}
285 	}
286 
287 	switch (oper) {
288 	case ADD:
289 		i1 = i1 + i2;
290 		break;
291 	case SUBT:
292 		i1 = i1 - i2;
293 		break;
294 	case MULT:
295 		i1 = i1 * i2;
296 		break;
297 	case DIV:
298 		if (i2 == 0)
299 			yyerror("division by zero");
300 		i1 = i1 / i2;
301 		break;
302 	case REM:
303 		if (i2 == 0)
304 			yyerror("division by zero");
305 		i1 = i1 % i2;
306 		break;
307 	}
308 	rv = exprmalloc(25);
309 	(void) strcpy(rv, lltoa(i1));
310 	return (rv);
311 }
312 
313 static char
314 *conj(oper, r1, r2)
315 	char *r1, *r2;
316 {
317 	register char *rv;
318 
319 	switch (oper) {
320 
321 	case OR:
322 		if (EQL(r1, "0") || EQL(r1, "")) {
323 			if (EQL(r2, "0") || EQL(r2, ""))
324 				rv = "0";
325 			else
326 				rv = r2;
327 		} else
328 			rv = r1;
329 		break;
330 	case AND:
331 		if (EQL(r1, "0") || EQL(r1, ""))
332 			rv = "0";
333 		else if (EQL(r2, "0") || EQL(r2, ""))
334 			rv = "0";
335 		else
336 			rv = r1;
337 		break;
338 	}
339 	return (rv);
340 }
341 
342 #ifdef	_iBCS2
343 char *
344 substr(char *v, char *s, char *w)
345 {
346 	int si, wi;
347 	char *res;
348 
349 	si = atol(s);
350 	wi = atol(w);
351 	while (--si)
352 		if (*v) ++v;
353 
354 	res = v;
355 
356 	while (wi--)
357 		if (*v) ++v;
358 
359 	*v = '\0';
360 	return (res);
361 }
362 
363 char *
364 index(char *s, char *t)
365 {
366 	long i, j;
367 	char *rv;
368 
369 	for (i = 0; s[i]; ++i)
370 		for (j = 0; t[j]; ++j)
371 			if (s[i] == t[j]) {
372 				(void) strcpy(rv = exprmalloc(8), ltoa(++i));
373 				return (rv);
374 			}
375 	return ("0");
376 }
377 
378 char *
379 length(char *s)
380 {
381 	long i = 0;
382 	char *rv;
383 
384 	while (*s++) ++i;
385 
386 	rv = exprmalloc(8);
387 	(void) strcpy(rv, ltoa(i));
388 	return (rv);
389 }
390 #endif	/* _iBCS2 */
391 
392 static char *
393 match(char *s, char *p)
394 {
395 	char *rv;
396 	long val;			/* XCU4 */
397 
398 	(void) strcpy(rv = exprmalloc(8), ltoa(val = (long)ematch(s, p)));
399 	if (nbra /* && val != 0 */) {
400 		rv = exprmalloc((unsigned)strlen(Mstring[0]) + 1);
401 		(void) strcpy(rv, Mstring[0]);
402 	}
403 	return (rv);
404 }
405 
406 
407 /*
408  * ematch 	- XCU4 mods involve calling compile/advance which simulate
409  *		  the obsolete compile/advance functions using regcomp/regexec
410  */
411 static int
412 ematch(char *s, char *p)
413 {
414 	static char *expbuf;
415 	char *nexpbuf;
416 	int num;
417 #ifdef XPG4
418 	int nmatch;		/* number of matched bytes */
419 	char tempbuf[256];
420 	char *tmptr1 = 0;	/* If tempbuf is not large enough */
421 	char *tmptr;
422 	int nmbchars;		/* number characters in multibyte string */
423 #endif
424 
425 	nexpbuf = compile(p, (char *)0, (char *)0);	/* XCU4 regex mod */
426 	if (0 /* XXX nbra > 1*/)
427 		yyerror("Too many '\\('s");
428 	if (regerrno) {
429 		if (regerrno != 41 || expbuf == NULL)
430 			errxx();
431 	} else {
432 		if (expbuf)
433 			free(expbuf);
434 		expbuf = nexpbuf;
435 	}
436 	if (advance(s, expbuf)) {
437 		if (nbra > 0) {
438 			p = braslist[0];
439 			num = braelist[0] - p;
440 			if ((num > MSIZE - 1) || (num < 0))
441 				yyerror("string too long");
442 			(void) strncpy(Mstring[0], p, num);
443 			Mstring[0][num] = '\0';
444 		}
445 #ifdef XPG4
446 		/*
447 		 *  Use mbstowcs to find the number of multibyte characters
448 		 *  in the multibyte string beginning at s, and
449 		 *  ending at loc2.  Create a separate string
450 		 *  of the substring, so it can be passed to mbstowcs.
451 		 */
452 		nmatch = loc2 - s;
453 		if (nmatch > ((sizeof (tempbuf) / sizeof (char)) - 1)) {
454 			tmptr1 = exprmalloc(nmatch + 1);
455 			tmptr = tmptr1;
456 		} else {
457 			tmptr = tempbuf;
458 		}
459 		memcpy(tmptr, s, nmatch);
460 		*(tmptr + nmatch) = '\0';
461 		if ((nmbchars = mbstowcs(NULL, tmptr, NULL)) == -1) {
462 			yyerror("invalid multibyte character encountered");
463 			if (tmptr1 != NULL)
464 				free(tmptr1);
465 			return (0);
466 		}
467 		if (tmptr1 != NULL)
468 			free(tmptr1);
469 		return (nmbchars);
470 #else
471 		return (loc2-s);
472 #endif
473 	}
474 	return (0);
475 }
476 
477 static void
478 errxx()
479 {
480 	yyerror("RE error");
481 }
482 
483 static void
484 yyerror(char *s)
485 {
486 #ifdef XPG6
487 	/* XPG6: stdout will always contain newline even on error */
488 	(void) write(1, "\n", 1);
489 #endif
490 	(void) write(2, "expr: ", 6);
491 	(void) write(2, gettext(s), (unsigned)strlen(gettext(s)));
492 	(void) write(2, "\n", 1);
493 	exit(2);
494 	/* NOTREACHED */
495 }
496 
497 static char *
498 ltoa(long l)
499 {
500 	static char str[20];
501 	char *sp = &str[18];	/* u370 */
502 	int i;
503 	int neg = 0;
504 
505 	if ((unsigned long)l == 0x80000000UL)
506 		return ("-2147483648");
507 	if (l < 0)
508 		++neg, l = -l;
509 	str[19] = '\0';
510 	do {
511 		i = l % 10;
512 		*sp-- = '0' + i;
513 		l /= 10;
514 	} while (l);
515 	if (neg)
516 		*sp-- = '-';
517 	return (++sp);
518 }
519 
520 static char *
521 lltoa(long long l)
522 {
523 	static char str[25];
524 	char *sp = &str[23];
525 	int i;
526 	int neg = 0;
527 
528 	if (l == 0x8000000000000000ULL)
529 		return ("-9223372036854775808");
530 	if (l < 0)
531 		++neg, l = -l;
532 	str[24] = '\0';
533 	do {
534 		i = l % 10;
535 		*sp-- = '0' + i;
536 		l /= 10;
537 	} while (l);
538 	if (neg)
539 		*sp-- = '-';
540 	return (++sp);
541 }
542 
543 static char *
544 expres(int prior, int par)
545 {
546 	int ylex, temp, op1;
547 	char *r1, *ra, *rb, *rc;
548 	ylex = yylex();
549 	if (ylex >= NOARG && ylex < MATCH) {
550 		yyerror("syntax error");
551 	}
552 	if (ylex == A_STRING) {
553 		r1 = Av[Argi++];
554 		temp = Argi;
555 	} else {
556 		if (ylex == '(') {
557 			paren++;
558 			Argi++;
559 			r1 = expres(0, Argi);
560 			Argi--;
561 		}
562 	}
563 lop:
564 	ylex = yylex();
565 	if (ylex > NOARG && ylex < MATCH) {
566 		op1 = ylex;
567 		Argi++;
568 		if (pri[op1-OR] <= prior)
569 			return (r1);
570 		else {
571 			switch (op1) {
572 			case OR:
573 			case AND:
574 				r1 = conj(op1, r1, expres(pri[op1-OR], 0));
575 				break;
576 			case EQ:
577 			case LT:
578 			case GT:
579 			case LEQ:
580 			case GEQ:
581 			case NEQ:
582 				r1 = rel(op1, r1, expres(pri[op1-OR], 0));
583 				break;
584 			case ADD:
585 			case SUBT:
586 			case MULT:
587 			case DIV:
588 			case REM:
589 				r1 = arith(op1, r1, expres(pri[op1-OR], 0));
590 				break;
591 			case MCH:
592 				r1 = match(r1, expres(pri[op1-OR], 0));
593 				break;
594 			}
595 			if (noarg == 1) {
596 				return (r1);
597 			}
598 			Argi--;
599 			goto lop;
600 		}
601 	}
602 	ylex = yylex();
603 	if (ylex == ')') {
604 		if (par == Argi) {
605 			yyerror("syntax error");
606 		}
607 		if (par != 0) {
608 			paren--;
609 			Argi++;
610 		}
611 		Argi++;
612 		return (r1);
613 	}
614 	ylex = yylex();
615 #ifdef	_iBCS2
616 	if (ylex > MCH && ((sysv3_set && ylex <= INDEX) || ylex <= MATCH)) {
617 #else
618 	if (ylex > MCH && ylex <= MATCH) {
619 #endif	/* _iBCS2 */
620 		if (Argi == temp) {
621 			return (r1);
622 		}
623 		op1 = ylex;
624 		Argi++;
625 		switch (op1) {
626 		case MATCH:
627 			rb = expres(pri[op1-OR], 0);
628 			ra = expres(pri[op1-OR], 0);
629 			break;
630 #ifdef	_iBCS2
631 		case SUBSTR:
632 			rc = expres(pri[op1-OR], 0);
633 			rb = expres(pri[op1-OR], 0);
634 			ra = expres(pri[op1-OR], 0);
635 			break;
636 		case LENGTH:
637 			ra = expres(pri[op1-OR], 0);
638 			break;
639 		case INDEX:
640 			rb = expres(pri[op1-OR], 0);
641 			ra = expres(pri[op1-OR], 0);
642 			break;
643 #endif	/* _iBCS2 */
644 		}
645 		switch (op1) {
646 		case MATCH:
647 			r1 = match(rb, ra);
648 			break;
649 #ifdef	_iBCS2
650 		case SUBSTR:
651 			r1 = substr(rc, rb, ra);
652 			break;
653 		case LENGTH:
654 			r1 = length(ra);
655 			break;
656 		case INDEX:
657 			r1 = index(rb, ra);
658 			break;
659 #endif	/* _iBCS2 */
660 		}
661 		if (noarg == 1) {
662 			return (r1);
663 		}
664 		Argi--;
665 		goto lop;
666 	}
667 	ylex = yylex();
668 	if (ylex == NOARG) {
669 		noarg = 1;
670 	}
671 	return (r1);
672 }
673 
674 void *
675 exprmalloc(size_t size)
676 {
677 	void *rv;
678 
679 	if ((rv = malloc(size)) == NULL) {
680 		char *s = gettext("malloc error");
681 
682 		(void) write(2, "expr: ", 6);
683 		(void) write(2, s, (unsigned)strlen(s));
684 		(void) write(2, "\n", 1);
685 		exit(3);
686 	}
687 	return (rv);
688 }
689 
690 int
691 main(int argc, char **argv)
692 {
693 	/*
694 	 * XCU4 allow "--" as argument
695 	 */
696 	if (argc > 1 && strcmp(argv[1], "--") == 0)
697 		argv++, argc--;
698 	/*
699 	 * XCU4 - print usage message when invoked without args
700 	 */
701 	if (argc < 2) {
702 #ifdef XPG6
703 	/* XPG6: stdout will always contain newline even on error */
704 		(void) write(1, "\n", 1);
705 #endif
706 		(void) fprintf(stderr, gettext("Usage: expr expression\n"));
707 		exit(3);
708 	}
709 	Ac = argc;
710 	Argi = 1;
711 	noarg = 0;
712 	paren = 0;
713 	Av = argv;
714 #ifdef	_iBCS2
715 	sysv3_set = getenv("SYSV3");
716 #endif	/* _iBCS2 */
717 
718 	(void) setlocale(LC_ALL, "");
719 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
720 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
721 #endif
722 	(void) textdomain(TEXT_DOMAIN);
723 	buf = expres(0, 1);
724 	if (Ac != Argi || paren != 0) {
725 		yyerror("syntax error");
726 	}
727 	/*
728 	 * XCU4 - strip leading zeros from numeric output
729 	 */
730 	clean_buf(buf);
731 	(void) write(1, buf, (unsigned)strlen(buf));
732 	(void) write(1, "\n", 1);
733 	return ((strcmp(buf, "0") == 0 || buf[0] == 0) ? 1 : 0);
734 }
735