xref: /illumos-gate/usr/src/cmd/expr/expr.c (revision 2e837a72011f54762249b6612c2a64f171efcd43)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved	*/
23 
24 
25 /*
26  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27  */
28 
29 #include <stdlib.h>
30 #include <regexpr.h>
31 #include <locale.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <regex.h>
35 #include <limits.h>
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <errno.h>
39 
40 #define	A_STRING 258
41 #define	NOARG 259
42 #define	OR 260
43 #define	AND 261
44 #define	EQ 262
45 #define	LT 263
46 #define	GT 264
47 #define	GEQ 265
48 #define	LEQ 266
49 #define	NEQ 267
50 #define	ADD 268
51 #define	SUBT 269
52 #define	MULT 270
53 #define	DIV 271
54 #define	REM 272
55 #define	MCH 273
56 #define	MATCH 274
57 #define	SUBSTR 275
58 #define	LENGTH 276
59 #define	INDEX  277
60 
61 /* size of subexpression array */
62 #define	MSIZE	LINE_MAX
63 #define	error(c)	errxx()
64 #define	EQL(x, y) (strcmp(x, y) == 0)
65 
66 #define	ERROR(c)	errxx()
67 #define	MAX_MATCH 20
68 static int ematch(char *, char *);
69 static void yyerror(char *);
70 static void errxx();
71 static void *exprmalloc(size_t size);
72 
73 static char *ltoa();
74 static char *lltoa();
75 static char	**Av;
76 static char *buf;
77 static int	Ac;
78 static int	Argi;
79 static int noarg;
80 static int paren;
81 /*
82  *	Array used to store subexpressions in regular expressions
83  *	Only one subexpression allowed per regular expression currently
84  */
85 static char Mstring[1][MSIZE];
86 
87 
88 static char *operator[] = {
89 	"|", "&", "+", "-", "*", "/", "%", ":",
90 	"=", "==", "<", "<=", ">", ">=", "!=",
91 	"match",
92 	"substr", "length", "index",
93 	"\0" };
94 static	int op[] = {
95 	OR, AND, ADD,  SUBT, MULT, DIV, REM, MCH,
96 	EQ, EQ, LT, LEQ, GT, GEQ, NEQ,
97 	MATCH,
98 	SUBSTR, LENGTH, INDEX
99 	};
100 static	int pri[] = {
101 	1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7,
102 	7, 7, 7
103 	};
104 
105 
106 /*
107  * clean_buf - XCU4 mod to remove leading zeros from negative signed
108  *		numeric output, e.g., -00001 becomes -1
109  */
110 static void
111 clean_buf(char *buf)
112 {
113 	int i = 0;
114 	int is_a_num = 1;
115 	int len;
116 	long long num;
117 
118 	if (buf[0] == '\0')
119 		return;
120 	len = strlen(buf);
121 	if (len <= 0)
122 		return;
123 
124 	if (buf[0] == '-') {
125 		i++;		/* Skip the leading '-' see while loop */
126 		if (len <= 1)	/* Is it a '-' all by itself? */
127 			return; /* Yes, so return */
128 
129 		while (i < len) {
130 			if (! isdigit(buf[i])) {
131 				is_a_num = 0;
132 				break;
133 			}
134 			i++;
135 		}
136 		if (is_a_num) {
137 			(void) sscanf(buf, "%lld", &num);
138 			(void) sprintf(buf, "%lld", num);
139 		}
140 	}
141 }
142 
143 /*
144  * End XCU4 mods.
145  */
146 
147 static int
148 yylex()
149 {
150 	char *p;
151 	int i;
152 
153 	if (Argi >= Ac)
154 		return (NOARG);
155 
156 	p = Av[Argi];
157 
158 	if ((*p == '(' || *p == ')') && p[1] == '\0')
159 		return ((int)*p);
160 	for (i = 0; *operator[i]; ++i)
161 		if (EQL(operator[i], p))
162 			return (op[i]);
163 
164 
165 	return (A_STRING);
166 }
167 
168 static char
169 *rel(oper, r1, r2) register char *r1, *r2;
170 {
171 	long long i, l1, l2;
172 
173 	if (ematch(r1, "-\\{0,1\\}[0-9]*$") &&
174 	    ematch(r2, "-\\{0,1\\}[0-9]*$")) {
175 		errno = 0;
176 		l1 = strtoll(r1, (char **)NULL, 10);
177 		l2 = strtoll(r2, (char **)NULL, 10);
178 		if (errno) {
179 #ifdef XPG6
180 		/* XPG6: stdout will always contain newline even on error */
181 			(void) write(1, "\n", 1);
182 #endif
183 			if (errno == ERANGE) {
184 				(void) fprintf(stderr, gettext(
185 				    "expr: Integer argument too large\n"));
186 				exit(3);
187 			} else {
188 				perror("expr");
189 				exit(3);
190 			}
191 		}
192 		switch (oper) {
193 		case EQ:
194 			i = (l1 == l2);
195 			break;
196 		case GT:
197 			i = (l1 > l2);
198 			break;
199 		case GEQ:
200 			i = (l1 >= l2);
201 			break;
202 		case LT:
203 			i = (l1 < l2);
204 			break;
205 		case LEQ:
206 			i = (l1 <= l2);
207 			break;
208 		case NEQ:
209 			i = (l1 != l2);
210 			break;
211 		}
212 	}
213 	else
214 	{
215 			i = strcoll(r1, r2);
216 		switch (oper) {
217 		case EQ:
218 			i = i == 0;
219 			break;
220 		case GT:
221 			i = i > 0;
222 			break;
223 		case GEQ:
224 			i = i >= 0;
225 			break;
226 		case LT:
227 			i = i < 0;
228 			break;
229 		case LEQ:
230 			i = i <= 0;
231 			break;
232 		case NEQ:
233 			i = i != 0;
234 			break;
235 		}
236 	}
237 	return (i ? "1": "0");
238 }
239 
240 static char
241 *arith(oper, r1, r2) char *r1, *r2;
242 {
243 	long long i1, i2;
244 	register char *rv;
245 
246 	if (!(ematch(r1, "-\\{0,1\\}[0-9]*$") &&
247 	    ematch(r2, "-\\{0,1\\}[0-9]*$")))
248 		yyerror("non-numeric argument");
249 	errno = 0;
250 	i1 = strtoll(r1, (char **)NULL, 10);
251 	i2 = strtoll(r2, (char **)NULL, 10);
252 	if (errno) {
253 #ifdef XPG6
254 	/* XPG6: stdout will always contain newline even on error */
255 		(void) write(1, "\n", 1);
256 #endif
257 		if (errno == ERANGE) {
258 			(void) fprintf(stderr, gettext(
259 			    "expr: Integer argument too large\n"));
260 			exit(3);
261 		} else {
262 			perror("expr");
263 			exit(3);
264 		}
265 	}
266 
267 	switch (oper) {
268 	case ADD:
269 		i1 = i1 + i2;
270 		break;
271 	case SUBT:
272 		i1 = i1 - i2;
273 		break;
274 	case MULT:
275 		i1 = i1 * i2;
276 		break;
277 	case DIV:
278 		if (i2 == 0)
279 			yyerror("division by zero");
280 		i1 = i1 / i2;
281 		break;
282 	case REM:
283 		if (i2 == 0)
284 			yyerror("division by zero");
285 		i1 = i1 % i2;
286 		break;
287 	}
288 	rv = exprmalloc(25);
289 	(void) strcpy(rv, lltoa(i1));
290 	return (rv);
291 }
292 
293 static char
294 *conj(oper, r1, r2)
295 	char *r1, *r2;
296 {
297 	register char *rv;
298 
299 	switch (oper) {
300 
301 	case OR:
302 		if (EQL(r1, "0") || EQL(r1, "")) {
303 			if (EQL(r2, "0") || EQL(r2, ""))
304 				rv = "0";
305 			else
306 				rv = r2;
307 		} else
308 			rv = r1;
309 		break;
310 	case AND:
311 		if (EQL(r1, "0") || EQL(r1, ""))
312 			rv = "0";
313 		else if (EQL(r2, "0") || EQL(r2, ""))
314 			rv = "0";
315 		else
316 			rv = r1;
317 		break;
318 	}
319 	return (rv);
320 }
321 
322 char *
323 substr(char *v, char *s, char *w)
324 {
325 	int si, wi;
326 	char *res;
327 
328 	si = atol(s);
329 	wi = atol(w);
330 	while (--si)
331 		if (*v) ++v;
332 
333 	res = v;
334 
335 	while (wi--)
336 		if (*v) ++v;
337 
338 	*v = '\0';
339 	return (res);
340 }
341 
342 char *
343 index(char *s, char *t)
344 {
345 	long i, j;
346 	char *rv;
347 
348 	for (i = 0; s[i]; ++i)
349 		for (j = 0; t[j]; ++j)
350 			if (s[i] == t[j]) {
351 				(void) strcpy(rv = exprmalloc(8), ltoa(++i));
352 				return (rv);
353 			}
354 	return ("0");
355 }
356 
357 char *
358 length(char *s)
359 {
360 	long i = 0;
361 	char *rv;
362 
363 	while (*s++) ++i;
364 
365 	rv = exprmalloc(8);
366 	(void) strcpy(rv, ltoa(i));
367 	return (rv);
368 }
369 
370 static char *
371 match(char *s, char *p)
372 {
373 	char *rv;
374 	long val;			/* XCU4 */
375 
376 	(void) strcpy(rv = exprmalloc(8), ltoa(val = (long)ematch(s, p)));
377 	if (nbra /* && val != 0 */) {
378 		rv = exprmalloc((unsigned)strlen(Mstring[0]) + 1);
379 		(void) strcpy(rv, Mstring[0]);
380 	}
381 	return (rv);
382 }
383 
384 
385 /*
386  * ematch	- XCU4 mods involve calling compile/advance which simulate
387  *		  the obsolete compile/advance functions using regcomp/regexec
388  */
389 static int
390 ematch(char *s, char *p)
391 {
392 	static char *expbuf;
393 	char *nexpbuf;
394 	int num;
395 #ifdef XPG4
396 	int nmatch;		/* number of matched bytes */
397 	char tempbuf[256];
398 	char *tmptr1 = 0;	/* If tempbuf is not large enough */
399 	char *tmptr;
400 	int nmbchars;		/* number characters in multibyte string */
401 #endif
402 
403 	nexpbuf = compile(p, (char *)0, (char *)0);	/* XCU4 regex mod */
404 	if (0 /* XXX nbra > 1*/)
405 		yyerror("Too many '\\('s");
406 	if (regerrno) {
407 		if (regerrno != 41 || expbuf == NULL)
408 			errxx();
409 	} else {
410 		if (expbuf)
411 			free(expbuf);
412 		expbuf = nexpbuf;
413 	}
414 	if (advance(s, expbuf)) {
415 		if (nbra > 0) {
416 			p = braslist[0];
417 			num = braelist[0] - p;
418 			if ((num > MSIZE - 1) || (num < 0))
419 				yyerror("string too long");
420 			(void) strncpy(Mstring[0], p, num);
421 			Mstring[0][num] = '\0';
422 		}
423 #ifdef XPG4
424 		/*
425 		 *  Use mbstowcs to find the number of multibyte characters
426 		 *  in the multibyte string beginning at s, and
427 		 *  ending at loc2.  Create a separate string
428 		 *  of the substring, so it can be passed to mbstowcs.
429 		 */
430 		nmatch = loc2 - s;
431 		if (nmatch > ((sizeof (tempbuf) / sizeof (char)) - 1)) {
432 			tmptr1 = exprmalloc(nmatch + 1);
433 			tmptr = tmptr1;
434 		} else {
435 			tmptr = tempbuf;
436 		}
437 		memcpy(tmptr, s, nmatch);
438 		*(tmptr + nmatch) = '\0';
439 		if ((nmbchars = mbstowcs(NULL, tmptr, 0)) == -1) {
440 			yyerror("invalid multibyte character encountered");
441 			if (tmptr1 != NULL)
442 				free(tmptr1);
443 			return (0);
444 		}
445 		if (tmptr1 != NULL)
446 			free(tmptr1);
447 		return (nmbchars);
448 #else
449 		return (loc2-s);
450 #endif
451 	}
452 	return (0);
453 }
454 
455 static void
456 errxx()
457 {
458 	yyerror("RE error");
459 }
460 
461 static void
462 yyerror(char *s)
463 {
464 #ifdef XPG6
465 	/* XPG6: stdout will always contain newline even on error */
466 	(void) write(1, "\n", 1);
467 #endif
468 	(void) write(2, "expr: ", 6);
469 	(void) write(2, gettext(s), (unsigned)strlen(gettext(s)));
470 	(void) write(2, "\n", 1);
471 	exit(2);
472 	/* NOTREACHED */
473 }
474 
475 static char *
476 ltoa(long l)
477 {
478 	static char str[20];
479 	char *sp = &str[18];	/* u370 */
480 	int i;
481 	int neg = 0;
482 
483 	if ((unsigned long)l == 0x80000000UL)
484 		return ("-2147483648");
485 	if (l < 0)
486 		++neg, l = -l;
487 	str[19] = '\0';
488 	do {
489 		i = l % 10;
490 		*sp-- = '0' + i;
491 		l /= 10;
492 	} while (l);
493 	if (neg)
494 		*sp-- = '-';
495 	return (++sp);
496 }
497 
498 static char *
499 lltoa(long long l)
500 {
501 	static char str[25];
502 	char *sp = &str[23];
503 	int i;
504 	int neg = 0;
505 
506 	if (l == 0x8000000000000000ULL)
507 		return ("-9223372036854775808");
508 	if (l < 0)
509 		++neg, l = -l;
510 	str[24] = '\0';
511 	do {
512 		i = l % 10;
513 		*sp-- = '0' + i;
514 		l /= 10;
515 	} while (l);
516 	if (neg)
517 		*sp-- = '-';
518 	return (++sp);
519 }
520 
521 static char *
522 expres(int prior, int par)
523 {
524 	int ylex, temp, op1;
525 	char *r1, *ra, *rb, *rc;
526 	ylex = yylex();
527 	if (ylex >= NOARG && ylex < MATCH) {
528 		yyerror("syntax error");
529 	}
530 	if (ylex == A_STRING) {
531 		r1 = Av[Argi++];
532 		temp = Argi;
533 	} else {
534 		if (ylex == '(') {
535 			paren++;
536 			Argi++;
537 			r1 = expres(0, Argi);
538 			Argi--;
539 		}
540 	}
541 lop:
542 	ylex = yylex();
543 	if (ylex > NOARG && ylex < MATCH) {
544 		op1 = ylex;
545 		Argi++;
546 		if (pri[op1-OR] <= prior)
547 			return (r1);
548 		else {
549 			switch (op1) {
550 			case OR:
551 			case AND:
552 				r1 = conj(op1, r1, expres(pri[op1-OR], 0));
553 				break;
554 			case EQ:
555 			case LT:
556 			case GT:
557 			case LEQ:
558 			case GEQ:
559 			case NEQ:
560 				r1 = rel(op1, r1, expres(pri[op1-OR], 0));
561 				break;
562 			case ADD:
563 			case SUBT:
564 			case MULT:
565 			case DIV:
566 			case REM:
567 				r1 = arith(op1, r1, expres(pri[op1-OR], 0));
568 				break;
569 			case MCH:
570 				r1 = match(r1, expres(pri[op1-OR], 0));
571 				break;
572 			}
573 			if (noarg == 1) {
574 				return (r1);
575 			}
576 			Argi--;
577 			goto lop;
578 		}
579 	}
580 	ylex = yylex();
581 	if (ylex == ')') {
582 		if (par == Argi) {
583 			yyerror("syntax error");
584 		}
585 		if (par != 0) {
586 			paren--;
587 			Argi++;
588 		}
589 		Argi++;
590 		return (r1);
591 	}
592 	ylex = yylex();
593 	if (ylex > MCH && ylex <= INDEX) {
594 		if (Argi == temp) {
595 			return (r1);
596 		}
597 		op1 = ylex;
598 		Argi++;
599 		switch (op1) {
600 		case MATCH:
601 			rb = expres(pri[op1-OR], 0);
602 			ra = expres(pri[op1-OR], 0);
603 			break;
604 		case SUBSTR:
605 			rc = expres(pri[op1-OR], 0);
606 			rb = expres(pri[op1-OR], 0);
607 			ra = expres(pri[op1-OR], 0);
608 			break;
609 		case LENGTH:
610 			ra = expres(pri[op1-OR], 0);
611 			break;
612 		case INDEX:
613 			rb = expres(pri[op1-OR], 0);
614 			ra = expres(pri[op1-OR], 0);
615 			break;
616 		}
617 		switch (op1) {
618 		case MATCH:
619 			r1 = match(rb, ra);
620 			break;
621 		case SUBSTR:
622 			r1 = substr(rc, rb, ra);
623 			break;
624 		case LENGTH:
625 			r1 = length(ra);
626 			break;
627 		case INDEX:
628 			r1 = index(rb, ra);
629 			break;
630 		}
631 		if (noarg == 1) {
632 			return (r1);
633 		}
634 		Argi--;
635 		goto lop;
636 	}
637 	ylex = yylex();
638 	if (ylex == NOARG) {
639 		noarg = 1;
640 	}
641 	return (r1);
642 }
643 
644 void *
645 exprmalloc(size_t size)
646 {
647 	void *rv;
648 
649 	if ((rv = malloc(size)) == NULL) {
650 		char *s = gettext("malloc error");
651 
652 		(void) write(2, "expr: ", 6);
653 		(void) write(2, s, (unsigned)strlen(s));
654 		(void) write(2, "\n", 1);
655 		exit(3);
656 	}
657 	return (rv);
658 }
659 
660 int
661 main(int argc, char **argv)
662 {
663 	/*
664 	 * XCU4 allow "--" as argument
665 	 */
666 	if (argc > 1 && strcmp(argv[1], "--") == 0)
667 		argv++, argc--;
668 	/*
669 	 * XCU4 - print usage message when invoked without args
670 	 */
671 	if (argc < 2) {
672 #ifdef XPG6
673 	/* XPG6: stdout will always contain newline even on error */
674 		(void) write(1, "\n", 1);
675 #endif
676 		(void) fprintf(stderr, gettext("Usage: expr expression\n"));
677 		exit(3);
678 	}
679 	Ac = argc;
680 	Argi = 1;
681 	noarg = 0;
682 	paren = 0;
683 	Av = argv;
684 
685 	(void) setlocale(LC_ALL, "");
686 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
687 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
688 #endif
689 	(void) textdomain(TEXT_DOMAIN);
690 	buf = expres(0, 1);
691 	if (Ac != Argi || paren != 0) {
692 		yyerror("syntax error");
693 	}
694 	/*
695 	 * XCU4 - strip leading zeros from numeric output
696 	 */
697 	clean_buf(buf);
698 	(void) write(1, buf, (unsigned)strlen(buf));
699 	(void) write(1, "\n", 1);
700 	return ((strcmp(buf, "0") == 0 || buf[0] == 0) ? 1 : 0);
701 }
702