xref: /illumos-gate/usr/src/cmd/expr/expr.c (revision 6ed9368a130d7c9a82e574da808d34034da33748)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27  */
28 
29 #include <stdlib.h>
30 #include <regexpr.h>
31 #include <locale.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <regex.h>
35 #include <limits.h>
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <errno.h>
39 
40 #define	A_STRING 258
41 #define	NOARG 259
42 #define	OR 260
43 #define	AND 261
44 #define	EQ 262
45 #define	LT 263
46 #define	GT 264
47 #define	GEQ 265
48 #define	LEQ 266
49 #define	NEQ 267
50 #define	ADD 268
51 #define	SUBT 269
52 #define	MULT 270
53 #define	DIV 271
54 #define	REM 272
55 #define	MCH 273
56 #define	MATCH 274
57 #define	SUBSTR 275
58 #define	LENGTH 276
59 #define	INDEX  277
60 
61 /* size of subexpression array */
62 #define	MSIZE	LINE_MAX
63 #define	error(c)	errxx()
64 #define	EQL(x, y) (strcmp(x, y) == 0)
65 
66 #define	ERROR(c)	errxx()
67 #define	MAX_MATCH 20
68 static int ematch(char *, char *);
69 static void yyerror(char *);
70 static void errxx();
71 static void *exprmalloc(size_t size);
72 
73 long atol();
74 char *strcpy(), *strncpy();
75 void exit();
76 
77 static char *ltoa();
78 static char *lltoa();
79 static char	**Av;
80 static char *buf;
81 static int	Ac;
82 static int	Argi;
83 static int noarg;
84 static int paren;
85 /*
86  *	Array used to store subexpressions in regular expressions
87  *	Only one subexpression allowed per regular expression currently
88  */
89 static char Mstring[1][MSIZE];
90 
91 
92 static char *operator[] = {
93 	"|", "&", "+", "-", "*", "/", "%", ":",
94 	"=", "==", "<", "<=", ">", ">=", "!=",
95 	"match",
96 	"substr", "length", "index",
97 	"\0" };
98 static	int op[] = {
99 	OR, AND, ADD,  SUBT, MULT, DIV, REM, MCH,
100 	EQ, EQ, LT, LEQ, GT, GEQ, NEQ,
101 	MATCH,
102 	SUBSTR, LENGTH, INDEX
103 	};
104 static	int pri[] = {
105 	1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7,
106 	7, 7, 7
107 	};
108 
109 
110 /*
111  * clean_buf - XCU4 mod to remove leading zeros from negative signed
112  *		numeric output, e.g., -00001 becomes -1
113  */
114 static void
115 clean_buf(buf)
116 	char *buf;
117 {
118 	int i = 0;
119 	int is_a_num = 1;
120 	int len;
121 	long long num;
122 
123 	if (buf[0] == '\0')
124 		return;
125 	len = strlen(buf);
126 	if (len <= 0)
127 		return;
128 
129 	if (buf[0] == '-') {
130 		i++;		/* Skip the leading '-' see while loop */
131 		if (len <= 1)	/* Is it a '-' all by itself? */
132 			return; /* Yes, so return */
133 
134 		while (i < len) {
135 			if (! isdigit(buf[i])) {
136 				is_a_num = 0;
137 				break;
138 			}
139 			i++;
140 		}
141 		if (is_a_num) {
142 			(void) sscanf(buf, "%lld", &num);
143 			(void) sprintf(buf, "%lld", num);
144 		}
145 	}
146 }
147 
148 /*
149  * End XCU4 mods.
150  */
151 
152 static int
153 yylex()
154 {
155 	char *p;
156 	int i;
157 
158 	if (Argi >= Ac)
159 		return (NOARG);
160 
161 	p = Av[Argi];
162 
163 	if ((*p == '(' || *p == ')') && p[1] == '\0')
164 		return ((int)*p);
165 	for (i = 0; *operator[i]; ++i)
166 		if (EQL(operator[i], p))
167 			return (op[i]);
168 
169 
170 	return (A_STRING);
171 }
172 
173 static char
174 *rel(oper, r1, r2) register char *r1, *r2;
175 {
176 	long long i, l1, l2;
177 
178 	if (ematch(r1, "-\\{0,1\\}[0-9]*$") &&
179 	    ematch(r2, "-\\{0,1\\}[0-9]*$")) {
180 		errno = 0;
181 		l1 = strtoll(r1, (char **)NULL, 10);
182 		l2 = strtoll(r2, (char **)NULL, 10);
183 		if (errno) {
184 #ifdef XPG6
185 		/* XPG6: stdout will always contain newline even on error */
186 			(void) write(1, "\n", 1);
187 #endif
188 			if (errno == ERANGE) {
189 				(void) fprintf(stderr, gettext(
190 				    "expr: Integer argument too large\n"));
191 				exit(3);
192 			} else {
193 				perror("expr");
194 				exit(3);
195 			}
196 		}
197 		switch (oper) {
198 		case EQ:
199 			i = (l1 == l2);
200 			break;
201 		case GT:
202 			i = (l1 > l2);
203 			break;
204 		case GEQ:
205 			i = (l1 >= l2);
206 			break;
207 		case LT:
208 			i = (l1 < l2);
209 			break;
210 		case LEQ:
211 			i = (l1 <= l2);
212 			break;
213 		case NEQ:
214 			i = (l1 != l2);
215 			break;
216 		}
217 	}
218 	else
219 	{
220 			i = strcoll(r1, r2);
221 		switch (oper) {
222 		case EQ:
223 			i = i == 0;
224 			break;
225 		case GT:
226 			i = i > 0;
227 			break;
228 		case GEQ:
229 			i = i >= 0;
230 			break;
231 		case LT:
232 			i = i < 0;
233 			break;
234 		case LEQ:
235 			i = i <= 0;
236 			break;
237 		case NEQ:
238 			i = i != 0;
239 			break;
240 		}
241 	}
242 	return (i ? "1": "0");
243 }
244 
245 static char
246 *arith(oper, r1, r2) char *r1, *r2;
247 {
248 	long long i1, i2;
249 	register char *rv;
250 
251 	if (!(ematch(r1, "-\\{0,1\\}[0-9]*$") &&
252 	    ematch(r2, "-\\{0,1\\}[0-9]*$")))
253 		yyerror("non-numeric argument");
254 	errno = 0;
255 	i1 = strtoll(r1, (char **)NULL, 10);
256 	i2 = strtoll(r2, (char **)NULL, 10);
257 	if (errno) {
258 #ifdef XPG6
259 	/* XPG6: stdout will always contain newline even on error */
260 		(void) write(1, "\n", 1);
261 #endif
262 		if (errno == ERANGE) {
263 			(void) fprintf(stderr, gettext(
264 			    "expr: Integer argument too large\n"));
265 			exit(3);
266 		} else {
267 			perror("expr");
268 			exit(3);
269 		}
270 	}
271 
272 	switch (oper) {
273 	case ADD:
274 		i1 = i1 + i2;
275 		break;
276 	case SUBT:
277 		i1 = i1 - i2;
278 		break;
279 	case MULT:
280 		i1 = i1 * i2;
281 		break;
282 	case DIV:
283 		if (i2 == 0)
284 			yyerror("division by zero");
285 		i1 = i1 / i2;
286 		break;
287 	case REM:
288 		if (i2 == 0)
289 			yyerror("division by zero");
290 		i1 = i1 % i2;
291 		break;
292 	}
293 	rv = exprmalloc(25);
294 	(void) strcpy(rv, lltoa(i1));
295 	return (rv);
296 }
297 
298 static char
299 *conj(oper, r1, r2)
300 	char *r1, *r2;
301 {
302 	register char *rv;
303 
304 	switch (oper) {
305 
306 	case OR:
307 		if (EQL(r1, "0") || EQL(r1, "")) {
308 			if (EQL(r2, "0") || EQL(r2, ""))
309 				rv = "0";
310 			else
311 				rv = r2;
312 		} else
313 			rv = r1;
314 		break;
315 	case AND:
316 		if (EQL(r1, "0") || EQL(r1, ""))
317 			rv = "0";
318 		else if (EQL(r2, "0") || EQL(r2, ""))
319 			rv = "0";
320 		else
321 			rv = r1;
322 		break;
323 	}
324 	return (rv);
325 }
326 
327 char *
328 substr(char *v, char *s, char *w)
329 {
330 	int si, wi;
331 	char *res;
332 
333 	si = atol(s);
334 	wi = atol(w);
335 	while (--si)
336 		if (*v) ++v;
337 
338 	res = v;
339 
340 	while (wi--)
341 		if (*v) ++v;
342 
343 	*v = '\0';
344 	return (res);
345 }
346 
347 char *
348 index(char *s, char *t)
349 {
350 	long i, j;
351 	char *rv;
352 
353 	for (i = 0; s[i]; ++i)
354 		for (j = 0; t[j]; ++j)
355 			if (s[i] == t[j]) {
356 				(void) strcpy(rv = exprmalloc(8), ltoa(++i));
357 				return (rv);
358 			}
359 	return ("0");
360 }
361 
362 char *
363 length(char *s)
364 {
365 	long i = 0;
366 	char *rv;
367 
368 	while (*s++) ++i;
369 
370 	rv = exprmalloc(8);
371 	(void) strcpy(rv, ltoa(i));
372 	return (rv);
373 }
374 
375 static char *
376 match(char *s, char *p)
377 {
378 	char *rv;
379 	long val;			/* XCU4 */
380 
381 	(void) strcpy(rv = exprmalloc(8), ltoa(val = (long)ematch(s, p)));
382 	if (nbra /* && val != 0 */) {
383 		rv = exprmalloc((unsigned)strlen(Mstring[0]) + 1);
384 		(void) strcpy(rv, Mstring[0]);
385 	}
386 	return (rv);
387 }
388 
389 
390 /*
391  * ematch 	- XCU4 mods involve calling compile/advance which simulate
392  *		  the obsolete compile/advance functions using regcomp/regexec
393  */
394 static int
395 ematch(char *s, char *p)
396 {
397 	static char *expbuf;
398 	char *nexpbuf;
399 	int num;
400 #ifdef XPG4
401 	int nmatch;		/* number of matched bytes */
402 	char tempbuf[256];
403 	char *tmptr1 = 0;	/* If tempbuf is not large enough */
404 	char *tmptr;
405 	int nmbchars;		/* number characters in multibyte string */
406 #endif
407 
408 	nexpbuf = compile(p, (char *)0, (char *)0);	/* XCU4 regex mod */
409 	if (0 /* XXX nbra > 1*/)
410 		yyerror("Too many '\\('s");
411 	if (regerrno) {
412 		if (regerrno != 41 || expbuf == NULL)
413 			errxx();
414 	} else {
415 		if (expbuf)
416 			free(expbuf);
417 		expbuf = nexpbuf;
418 	}
419 	if (advance(s, expbuf)) {
420 		if (nbra > 0) {
421 			p = braslist[0];
422 			num = braelist[0] - p;
423 			if ((num > MSIZE - 1) || (num < 0))
424 				yyerror("string too long");
425 			(void) strncpy(Mstring[0], p, num);
426 			Mstring[0][num] = '\0';
427 		}
428 #ifdef XPG4
429 		/*
430 		 *  Use mbstowcs to find the number of multibyte characters
431 		 *  in the multibyte string beginning at s, and
432 		 *  ending at loc2.  Create a separate string
433 		 *  of the substring, so it can be passed to mbstowcs.
434 		 */
435 		nmatch = loc2 - s;
436 		if (nmatch > ((sizeof (tempbuf) / sizeof (char)) - 1)) {
437 			tmptr1 = exprmalloc(nmatch + 1);
438 			tmptr = tmptr1;
439 		} else {
440 			tmptr = tempbuf;
441 		}
442 		memcpy(tmptr, s, nmatch);
443 		*(tmptr + nmatch) = '\0';
444 		if ((nmbchars = mbstowcs(NULL, tmptr, NULL)) == -1) {
445 			yyerror("invalid multibyte character encountered");
446 			if (tmptr1 != NULL)
447 				free(tmptr1);
448 			return (0);
449 		}
450 		if (tmptr1 != NULL)
451 			free(tmptr1);
452 		return (nmbchars);
453 #else
454 		return (loc2-s);
455 #endif
456 	}
457 	return (0);
458 }
459 
460 static void
461 errxx()
462 {
463 	yyerror("RE error");
464 }
465 
466 static void
467 yyerror(char *s)
468 {
469 #ifdef XPG6
470 	/* XPG6: stdout will always contain newline even on error */
471 	(void) write(1, "\n", 1);
472 #endif
473 	(void) write(2, "expr: ", 6);
474 	(void) write(2, gettext(s), (unsigned)strlen(gettext(s)));
475 	(void) write(2, "\n", 1);
476 	exit(2);
477 	/* NOTREACHED */
478 }
479 
480 static char *
481 ltoa(long l)
482 {
483 	static char str[20];
484 	char *sp = &str[18];	/* u370 */
485 	int i;
486 	int neg = 0;
487 
488 	if ((unsigned long)l == 0x80000000UL)
489 		return ("-2147483648");
490 	if (l < 0)
491 		++neg, l = -l;
492 	str[19] = '\0';
493 	do {
494 		i = l % 10;
495 		*sp-- = '0' + i;
496 		l /= 10;
497 	} while (l);
498 	if (neg)
499 		*sp-- = '-';
500 	return (++sp);
501 }
502 
503 static char *
504 lltoa(long long l)
505 {
506 	static char str[25];
507 	char *sp = &str[23];
508 	int i;
509 	int neg = 0;
510 
511 	if (l == 0x8000000000000000ULL)
512 		return ("-9223372036854775808");
513 	if (l < 0)
514 		++neg, l = -l;
515 	str[24] = '\0';
516 	do {
517 		i = l % 10;
518 		*sp-- = '0' + i;
519 		l /= 10;
520 	} while (l);
521 	if (neg)
522 		*sp-- = '-';
523 	return (++sp);
524 }
525 
526 static char *
527 expres(int prior, int par)
528 {
529 	int ylex, temp, op1;
530 	char *r1, *ra, *rb, *rc;
531 	ylex = yylex();
532 	if (ylex >= NOARG && ylex < MATCH) {
533 		yyerror("syntax error");
534 	}
535 	if (ylex == A_STRING) {
536 		r1 = Av[Argi++];
537 		temp = Argi;
538 	} else {
539 		if (ylex == '(') {
540 			paren++;
541 			Argi++;
542 			r1 = expres(0, Argi);
543 			Argi--;
544 		}
545 	}
546 lop:
547 	ylex = yylex();
548 	if (ylex > NOARG && ylex < MATCH) {
549 		op1 = ylex;
550 		Argi++;
551 		if (pri[op1-OR] <= prior)
552 			return (r1);
553 		else {
554 			switch (op1) {
555 			case OR:
556 			case AND:
557 				r1 = conj(op1, r1, expres(pri[op1-OR], 0));
558 				break;
559 			case EQ:
560 			case LT:
561 			case GT:
562 			case LEQ:
563 			case GEQ:
564 			case NEQ:
565 				r1 = rel(op1, r1, expres(pri[op1-OR], 0));
566 				break;
567 			case ADD:
568 			case SUBT:
569 			case MULT:
570 			case DIV:
571 			case REM:
572 				r1 = arith(op1, r1, expres(pri[op1-OR], 0));
573 				break;
574 			case MCH:
575 				r1 = match(r1, expres(pri[op1-OR], 0));
576 				break;
577 			}
578 			if (noarg == 1) {
579 				return (r1);
580 			}
581 			Argi--;
582 			goto lop;
583 		}
584 	}
585 	ylex = yylex();
586 	if (ylex == ')') {
587 		if (par == Argi) {
588 			yyerror("syntax error");
589 		}
590 		if (par != 0) {
591 			paren--;
592 			Argi++;
593 		}
594 		Argi++;
595 		return (r1);
596 	}
597 	ylex = yylex();
598 	if (ylex > MCH && ylex <= INDEX) {
599 		if (Argi == temp) {
600 			return (r1);
601 		}
602 		op1 = ylex;
603 		Argi++;
604 		switch (op1) {
605 		case MATCH:
606 			rb = expres(pri[op1-OR], 0);
607 			ra = expres(pri[op1-OR], 0);
608 			break;
609 		case SUBSTR:
610 			rc = expres(pri[op1-OR], 0);
611 			rb = expres(pri[op1-OR], 0);
612 			ra = expres(pri[op1-OR], 0);
613 			break;
614 		case LENGTH:
615 			ra = expres(pri[op1-OR], 0);
616 			break;
617 		case INDEX:
618 			rb = expres(pri[op1-OR], 0);
619 			ra = expres(pri[op1-OR], 0);
620 			break;
621 		}
622 		switch (op1) {
623 		case MATCH:
624 			r1 = match(rb, ra);
625 			break;
626 		case SUBSTR:
627 			r1 = substr(rc, rb, ra);
628 			break;
629 		case LENGTH:
630 			r1 = length(ra);
631 			break;
632 		case INDEX:
633 			r1 = index(rb, ra);
634 			break;
635 		}
636 		if (noarg == 1) {
637 			return (r1);
638 		}
639 		Argi--;
640 		goto lop;
641 	}
642 	ylex = yylex();
643 	if (ylex == NOARG) {
644 		noarg = 1;
645 	}
646 	return (r1);
647 }
648 
649 void *
650 exprmalloc(size_t size)
651 {
652 	void *rv;
653 
654 	if ((rv = malloc(size)) == NULL) {
655 		char *s = gettext("malloc error");
656 
657 		(void) write(2, "expr: ", 6);
658 		(void) write(2, s, (unsigned)strlen(s));
659 		(void) write(2, "\n", 1);
660 		exit(3);
661 	}
662 	return (rv);
663 }
664 
665 int
666 main(int argc, char **argv)
667 {
668 	/*
669 	 * XCU4 allow "--" as argument
670 	 */
671 	if (argc > 1 && strcmp(argv[1], "--") == 0)
672 		argv++, argc--;
673 	/*
674 	 * XCU4 - print usage message when invoked without args
675 	 */
676 	if (argc < 2) {
677 #ifdef XPG6
678 	/* XPG6: stdout will always contain newline even on error */
679 		(void) write(1, "\n", 1);
680 #endif
681 		(void) fprintf(stderr, gettext("Usage: expr expression\n"));
682 		exit(3);
683 	}
684 	Ac = argc;
685 	Argi = 1;
686 	noarg = 0;
687 	paren = 0;
688 	Av = argv;
689 
690 	(void) setlocale(LC_ALL, "");
691 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
692 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
693 #endif
694 	(void) textdomain(TEXT_DOMAIN);
695 	buf = expres(0, 1);
696 	if (Ac != Argi || paren != 0) {
697 		yyerror("syntax error");
698 	}
699 	/*
700 	 * XCU4 - strip leading zeros from numeric output
701 	 */
702 	clean_buf(buf);
703 	(void) write(1, buf, (unsigned)strlen(buf));
704 	(void) write(1, "\n", 1);
705 	return ((strcmp(buf, "0") == 0 || buf[0] == 0) ? 1 : 0);
706 }
707