xref: /titanic_52/usr/src/cmd/expr/expr.c (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdlib.h>
34 #include <regexpr.h>
35 #include <locale.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <regex.h>
39 #include <limits.h>
40 #include <stdio.h>
41 #include <ctype.h>
42 #include <errno.h>
43 
44 #define	A_STRING 258
45 #define	NOARG 259
46 #define	OR 260
47 #define	AND 261
48 #define	EQ 262
49 #define	LT 263
50 #define	GT 264
51 #define	GEQ 265
52 #define	LEQ 266
53 #define	NEQ 267
54 #define	ADD 268
55 #define	SUBT 269
56 #define	MULT 270
57 #define	DIV 271
58 #define	REM 272
59 #define	MCH 273
60 #define	MATCH 274
61 #ifdef  _iBCS2
62 #define	SUBSTR 276
63 #define	LENGTH 277
64 #define	INDEX  278
65 #endif  /* _iBCS2 */
66 
67 /* size of subexpression array */
68 #define	MSIZE	LINE_MAX
69 #define	error(c)	errxx()
70 #define	EQL(x, y) (strcmp(x, y) == 0)
71 
72 #define	ERROR(c)	errxx()
73 #define	MAX_MATCH 20
74 static int ematch(char *, char *);
75 static void yyerror(char *);
76 static void errxx();
77 static void *exprmalloc(size_t size);
78 
79 long atol();
80 char *strcpy(), *strncpy();
81 void exit();
82 
83 static char *ltoa();
84 static char *lltoa();
85 static char	**Av;
86 static char *buf;
87 static int	Ac;
88 static int	Argi;
89 static int noarg;
90 static int paren;
91 #ifdef  _iBCS2
92 char    *sysv3_set;
93 #endif  /* _iBCS2 */
94 /*
95  *	Array used to store subexpressions in regular expressions
96  *	Only one subexpression allowed per regular expression currently
97  */
98 static char Mstring[1][MSIZE];
99 
100 
101 static char *operator[] = {
102 	"|", "&", "+", "-", "*", "/", "%", ":",
103 	"=", "==", "<", "<=", ">", ">=", "!=",
104 	"match",
105 #ifdef	_iBCS2
106 	"substr", "length", "index",
107 #endif	/* _iBCS2 */
108 	"\0" };
109 static	int op[] = {
110 	OR, AND, ADD,  SUBT, MULT, DIV, REM, MCH,
111 	EQ, EQ, LT, LEQ, GT, GEQ, NEQ,
112 	MATCH
113 #ifdef	_iBCS2
114 , SUBSTR, LENGTH, INDEX
115 #endif	/* _iBCS2 */
116 	};
117 static	int pri[] = {
118 	1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7
119 #ifdef	_iBCS2
120 , 7, 7, 7
121 #endif	/* _iBCS2 */
122 	};
123 
124 
125 /*
126  * clean_buf - XCU4 mod to remove leading zeros from negative signed
127  *		numeric output, e.g., -00001 becomes -1
128  */
129 static void
130 clean_buf(buf)
131 	char *buf;
132 {
133 	int i = 0;
134 	int is_a_num = 1;
135 	int len;
136 	long long num;
137 
138 	if (buf[0] == '\0')
139 		return;
140 	len = strlen(buf);
141 	if (len <= 0)
142 		return;
143 
144 	if (buf[0] == '-') {
145 		i++;		/* Skip the leading '-' see while loop */
146 		if (len <= 1)	/* Is it a '-' all by itself? */
147 			return; /* Yes, so return */
148 
149 		while (i < len) {
150 			if (! isdigit(buf[i])) {
151 				is_a_num = 0;
152 				break;
153 			}
154 			i++;
155 		}
156 		if (is_a_num) {
157 			(void) sscanf(buf, "%lld", &num);
158 			(void) sprintf(buf, "%lld", num);
159 		}
160 	}
161 }
162 
163 /*
164  * End XCU4 mods.
165  */
166 
167 static int
168 yylex()
169 {
170 	char *p;
171 	int i;
172 
173 	if (Argi >= Ac)
174 		return (NOARG);
175 
176 	p = Av[Argi];
177 
178 	if ((*p == '(' || *p == ')') && p[1] == '\0')
179 		return ((int)*p);
180 	for (i = 0; *operator[i]; ++i)
181 		if (EQL(operator[i], p))
182 			return (op[i]);
183 
184 
185 	return (A_STRING);
186 }
187 
188 static char
189 *rel(oper, r1, r2) register char *r1, *r2;
190 {
191 	long long i;
192 
193 	if (ematch(r1, "-\\{0,1\\}[0-9]*$") &&
194 	    ematch(r2, "-\\{0,1\\}[0-9]*$")) {
195 		errno = 0;
196 		i = strtoll(r1, (char **)NULL, 10) -
197 		    strtoll(r2, (char **)NULL, 10);
198 		if (errno) {
199 #ifdef XPG6
200 		/* XPG6: stdout will always contain newline even on error */
201 			(void) write(1, "\n", 1);
202 #endif
203 			if (errno == ERANGE) {
204 				(void) fprintf(stderr, gettext(
205 				    "expr: Integer argument too large\n"));
206 				exit(3);
207 			} else {
208 				perror("expr");
209 				exit(3);
210 			}
211 		}
212 	}
213 	else
214 		i = strcoll(r1, r2);
215 	switch (oper) {
216 	case EQ:
217 		i = i == 0;
218 		break;
219 	case GT:
220 		i = i > 0;
221 		break;
222 	case GEQ:
223 		i = i >= 0;
224 		break;
225 	case LT:
226 		i = i < 0;
227 		break;
228 	case LEQ:
229 		i = i <= 0;
230 		break;
231 	case NEQ:
232 		i = i != 0;
233 		break;
234 	}
235 	return (i ? "1": "0");
236 }
237 
238 static char
239 *arith(oper, r1, r2) char *r1, *r2;
240 {
241 	long long i1, i2;
242 	register char *rv;
243 
244 	if (!(ematch(r1, "-\\{0,1\\}[0-9]*$") &&
245 	    ematch(r2, "-\\{0,1\\}[0-9]*$")))
246 		yyerror("non-numeric argument");
247 	errno = 0;
248 	i1 = strtoll(r1, (char **)NULL, 10);
249 	i2 = strtoll(r2, (char **)NULL, 10);
250 	if (errno) {
251 #ifdef XPG6
252 	/* XPG6: stdout will always contain newline even on error */
253 		(void) write(1, "\n", 1);
254 #endif
255 		if (errno == ERANGE) {
256 			(void) fprintf(stderr, gettext(
257 			    "expr: Integer argument too large\n"));
258 			exit(3);
259 		} else {
260 			perror("expr");
261 			exit(3);
262 		}
263 	}
264 
265 	switch (oper) {
266 	case ADD:
267 		i1 = i1 + i2;
268 		break;
269 	case SUBT:
270 		i1 = i1 - i2;
271 		break;
272 	case MULT:
273 		i1 = i1 * i2;
274 		break;
275 	case DIV:
276 		if (i2 == 0)
277 			yyerror("division by zero");
278 		i1 = i1 / i2;
279 		break;
280 	case REM:
281 		if (i2 == 0)
282 			yyerror("division by zero");
283 		i1 = i1 % i2;
284 		break;
285 	}
286 	rv = exprmalloc(25);
287 	(void) strcpy(rv, lltoa(i1));
288 	return (rv);
289 }
290 
291 static char
292 *conj(oper, r1, r2)
293 	char *r1, *r2;
294 {
295 	register char *rv;
296 
297 	switch (oper) {
298 
299 	case OR:
300 		if (EQL(r1, "0") || EQL(r1, "")) {
301 			if (EQL(r2, "0") || EQL(r2, ""))
302 				rv = "0";
303 			else
304 				rv = r2;
305 		} else
306 			rv = r1;
307 		break;
308 	case AND:
309 		if (EQL(r1, "0") || EQL(r1, ""))
310 			rv = "0";
311 		else if (EQL(r2, "0") || EQL(r2, ""))
312 			rv = "0";
313 		else
314 			rv = r1;
315 		break;
316 	}
317 	return (rv);
318 }
319 
320 #ifdef	_iBCS2
321 char *
322 substr(char *v, char *s, char *w)
323 {
324 	int si, wi;
325 	char *res;
326 
327 	si = atol(s);
328 	wi = atol(w);
329 	while (--si)
330 		if (*v) ++v;
331 
332 	res = v;
333 
334 	while (wi--)
335 		if (*v) ++v;
336 
337 	*v = '\0';
338 	return (res);
339 }
340 
341 char *
342 index(char *s, char *t)
343 {
344 	long i, j;
345 	char *rv;
346 
347 	for (i = 0; s[i]; ++i)
348 		for (j = 0; t[j]; ++j)
349 			if (s[i] == t[j]) {
350 				(void) strcpy(rv = exprmalloc(8), ltoa(++i));
351 				return (rv);
352 			}
353 	return ("0");
354 }
355 
356 char *
357 length(char *s)
358 {
359 	long i = 0;
360 	char *rv;
361 
362 	while (*s++) ++i;
363 
364 	rv = exprmalloc(8);
365 	(void) strcpy(rv, ltoa(i));
366 	return (rv);
367 }
368 #endif	/* _iBCS2 */
369 
370 static char *
371 match(char *s, char *p)
372 {
373 	char *rv;
374 	long val;			/* XCU4 */
375 
376 	(void) strcpy(rv = exprmalloc(8), ltoa(val = (long)ematch(s, p)));
377 	if (nbra /* && val != 0 */) {
378 		rv = exprmalloc((unsigned)strlen(Mstring[0]) + 1);
379 		(void) strcpy(rv, Mstring[0]);
380 	}
381 	return (rv);
382 }
383 
384 
385 /*
386  * ematch 	- XCU4 mods involve calling compile/advance which simulate
387  *		  the obsolete compile/advance functions using regcomp/regexec
388  */
389 static int
390 ematch(char *s, char *p)
391 {
392 	static char *expbuf;
393 	char *nexpbuf;
394 	int num;
395 #ifdef XPG4
396 	int nmatch;		/* number of matched bytes */
397 	char tempbuf[256];
398 	char *tmptr1 = 0;	/* If tempbuf is not large enough */
399 	char *tmptr;
400 	int nmbchars;		/* number characters in multibyte string */
401 #endif
402 
403 	nexpbuf = compile(p, (char *)0, (char *)0);	/* XCU4 regex mod */
404 	if (0 /* XXX nbra > 1*/)
405 		yyerror("Too many '\\('s");
406 	if (regerrno) {
407 		if (regerrno != 41 || expbuf == NULL)
408 			errxx();
409 	} else {
410 		if (expbuf)
411 			free(expbuf);
412 		expbuf = nexpbuf;
413 	}
414 	if (advance(s, expbuf)) {
415 		if (nbra > 0) {
416 			p = braslist[0];
417 			num = braelist[0] - p;
418 			if ((num > MSIZE - 1) || (num < 0))
419 				yyerror("string too long");
420 			(void) strncpy(Mstring[0], p, num);
421 			Mstring[0][num] = '\0';
422 		}
423 #ifdef XPG4
424 		/*
425 		 *  Use mbstowcs to find the number of multibyte characters
426 		 *  in the multibyte string beginning at s, and
427 		 *  ending at loc2.  Create a separate string
428 		 *  of the substring, so it can be passed to mbstowcs.
429 		 */
430 		nmatch = loc2 - s;
431 		if (nmatch > ((sizeof (tempbuf) / sizeof (char)) - 1)) {
432 			tmptr1 = exprmalloc(nmatch + 1);
433 			tmptr = tmptr1;
434 		} else {
435 			tmptr = tempbuf;
436 		}
437 		memcpy(tmptr, s, nmatch);
438 		*(tmptr + nmatch) = '\0';
439 		if ((nmbchars = mbstowcs(NULL, tmptr, NULL)) == -1) {
440 			yyerror("invalid multibyte character encountered");
441 			if (tmptr1 != NULL)
442 				free(tmptr1);
443 			return (0);
444 		}
445 		if (tmptr1 != NULL)
446 			free(tmptr1);
447 		return (nmbchars);
448 #else
449 		return (loc2-s);
450 #endif
451 	}
452 	return (0);
453 }
454 
455 static void
456 errxx()
457 {
458 	yyerror("RE error");
459 }
460 
461 static void
462 yyerror(char *s)
463 {
464 #ifdef XPG6
465 	/* XPG6: stdout will always contain newline even on error */
466 	(void) write(1, "\n", 1);
467 #endif
468 	(void) write(2, "expr: ", 6);
469 	(void) write(2, gettext(s), (unsigned)strlen(gettext(s)));
470 	(void) write(2, "\n", 1);
471 	exit(2);
472 	/* NOTREACHED */
473 }
474 
475 static char *
476 ltoa(long l)
477 {
478 	static char str[20];
479 	char *sp = &str[18];	/* u370 */
480 	int i;
481 	int neg = 0;
482 
483 	if ((unsigned long)l == 0x80000000UL)
484 		return ("-2147483648");
485 	if (l < 0)
486 		++neg, l = -l;
487 	str[19] = '\0';
488 	do {
489 		i = l % 10;
490 		*sp-- = '0' + i;
491 		l /= 10;
492 	} while (l);
493 	if (neg)
494 		*sp-- = '-';
495 	return (++sp);
496 }
497 
498 static char *
499 lltoa(long long l)
500 {
501 	static char str[25];
502 	char *sp = &str[23];
503 	int i;
504 	int neg = 0;
505 
506 	if (l == 0x8000000000000000ULL)
507 		return ("-9223372036854775808");
508 	if (l < 0)
509 		++neg, l = -l;
510 	str[24] = '\0';
511 	do {
512 		i = l % 10;
513 		*sp-- = '0' + i;
514 		l /= 10;
515 	} while (l);
516 	if (neg)
517 		*sp-- = '-';
518 	return (++sp);
519 }
520 
521 static char *
522 expres(int prior, int par)
523 {
524 	int ylex, temp, op1;
525 	char *r1, *ra, *rb, *rc;
526 	ylex = yylex();
527 	if (ylex >= NOARG && ylex < MATCH) {
528 		yyerror("syntax error");
529 	}
530 	if (ylex == A_STRING) {
531 		r1 = Av[Argi++];
532 		temp = Argi;
533 	} else {
534 		if (ylex == '(') {
535 			paren++;
536 			Argi++;
537 			r1 = expres(0, Argi);
538 			Argi--;
539 		}
540 	}
541 lop:
542 	ylex = yylex();
543 	if (ylex > NOARG && ylex < MATCH) {
544 		op1 = ylex;
545 		Argi++;
546 		if (pri[op1-OR] <= prior)
547 			return (r1);
548 		else {
549 			switch (op1) {
550 			case OR:
551 			case AND:
552 				r1 = conj(op1, r1, expres(pri[op1-OR], 0));
553 				break;
554 			case EQ:
555 			case LT:
556 			case GT:
557 			case LEQ:
558 			case GEQ:
559 			case NEQ:
560 				r1 = rel(op1, r1, expres(pri[op1-OR], 0));
561 				break;
562 			case ADD:
563 			case SUBT:
564 			case MULT:
565 			case DIV:
566 			case REM:
567 				r1 = arith(op1, r1, expres(pri[op1-OR], 0));
568 				break;
569 			case MCH:
570 				r1 = match(r1, expres(pri[op1-OR], 0));
571 				break;
572 			}
573 			if (noarg == 1) {
574 				return (r1);
575 			}
576 			Argi--;
577 			goto lop;
578 		}
579 	}
580 	ylex = yylex();
581 	if (ylex == ')') {
582 		if (par == Argi) {
583 			yyerror("syntax error");
584 		}
585 		if (par != 0) {
586 			paren--;
587 			Argi++;
588 		}
589 		Argi++;
590 		return (r1);
591 	}
592 	ylex = yylex();
593 #ifdef	_iBCS2
594 	if (ylex > MCH && ((sysv3_set && ylex <= INDEX) || ylex <= MATCH)) {
595 #else
596 	if (ylex > MCH && ylex <= MATCH) {
597 #endif	/* _iBCS2 */
598 		if (Argi == temp) {
599 			return (r1);
600 		}
601 		op1 = ylex;
602 		Argi++;
603 		switch (op1) {
604 		case MATCH:
605 			rb = expres(pri[op1-OR], 0);
606 			ra = expres(pri[op1-OR], 0);
607 			break;
608 #ifdef	_iBCS2
609 		case SUBSTR:
610 			rc = expres(pri[op1-OR], 0);
611 			rb = expres(pri[op1-OR], 0);
612 			ra = expres(pri[op1-OR], 0);
613 			break;
614 		case LENGTH:
615 			ra = expres(pri[op1-OR], 0);
616 			break;
617 		case INDEX:
618 			rb = expres(pri[op1-OR], 0);
619 			ra = expres(pri[op1-OR], 0);
620 			break;
621 #endif	/* _iBCS2 */
622 		}
623 		switch (op1) {
624 		case MATCH:
625 			r1 = match(rb, ra);
626 			break;
627 #ifdef	_iBCS2
628 		case SUBSTR:
629 			r1 = substr(rc, rb, ra);
630 			break;
631 		case LENGTH:
632 			r1 = length(ra);
633 			break;
634 		case INDEX:
635 			r1 = index(rb, ra);
636 			break;
637 #endif	/* _iBCS2 */
638 		}
639 		if (noarg == 1) {
640 			return (r1);
641 		}
642 		Argi--;
643 		goto lop;
644 	}
645 	ylex = yylex();
646 	if (ylex == NOARG) {
647 		noarg = 1;
648 	}
649 	return (r1);
650 }
651 
652 void *
653 exprmalloc(size_t size)
654 {
655 	void *rv;
656 
657 	if ((rv = malloc(size)) == NULL) {
658 		char *s = gettext("malloc error");
659 
660 		(void) write(2, "expr: ", 6);
661 		(void) write(2, s, (unsigned)strlen(s));
662 		(void) write(2, "\n", 1);
663 		exit(3);
664 	}
665 	return (rv);
666 }
667 
668 int
669 main(int argc, char **argv)
670 {
671 	/*
672 	 * XCU4 allow "--" as argument
673 	 */
674 	if (argc > 1 && strcmp(argv[1], "--") == 0)
675 		argv++, argc--;
676 	/*
677 	 * XCU4 - print usage message when invoked without args
678 	 */
679 	if (argc < 2) {
680 #ifdef XPG6
681 	/* XPG6: stdout will always contain newline even on error */
682 		(void) write(1, "\n", 1);
683 #endif
684 		(void) fprintf(stderr, gettext("Usage: expr expression\n"));
685 		exit(3);
686 	}
687 	Ac = argc;
688 	Argi = 1;
689 	noarg = 0;
690 	paren = 0;
691 	Av = argv;
692 #ifdef	_iBCS2
693 	sysv3_set = getenv("SYSV3");
694 #endif	/* _iBCS2 */
695 
696 	(void) setlocale(LC_ALL, "");
697 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
698 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
699 #endif
700 	(void) textdomain(TEXT_DOMAIN);
701 	buf = expres(0, 1);
702 	if (Ac != Argi || paren != 0) {
703 		yyerror("syntax error");
704 	}
705 	/*
706 	 * XCU4 - strip leading zeros from numeric output
707 	 */
708 	clean_buf(buf);
709 	(void) write(1, buf, (unsigned)strlen(buf));
710 	(void) write(1, "\n", 1);
711 	return ((strcmp(buf, "0") == 0 || buf[0] == 0) ? 1 : 0);
712 }
713