xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk4.c (revision bd97c7ce2344fa3252d8785c35895490916bc79b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * awk -- functions
24  *
25  * Copyright (c) 1995, 1996 by Sun Microsystems, Inc.
26  * All rights reserved.
27  *
28  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
29  *
30  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
31  */
32 
33 #include "awk.h"
34 #include "y.tab.h"
35 #include <time.h>
36 #include <sys/wait.h>
37 
38 static uint	nargs(NODE *np);
39 static NODE	*dosub(NODE *np, int glob);
40 static NODE	*docasetr(NODE *np, int upper);
41 static int	asortcmp(const void *npp1, const void *npp2);
42 
43 static char	nargerr[] = "wrong number of arguments to function \"%s\"";
44 static NODE	*asortfunc;		/* Function call for asort() */
45 static NODE	*asnp1, *asnp2;		/* index1, index2 nodes */
46 static int	asarraylen;		/* strlen(array)+1 for asort */
47 
48 /*
49  * Return the value of exp(x).
50  * Usage:	y = exp(x)
51  *		y = exp()
52  */
53 NODE *
54 f_exp(NODE *np)
55 {
56 	register uint na;
57 
58 	if ((na = nargs(np)) > 1)
59 		awkerr(nargerr, s_exp);
60 	return (realnode(exp(exprreal(na==0 ? field0 : getlist(&np)))));
61 }
62 
63 /*
64  * Return the integer part of the argument.
65  * Usage:	i = int(r)
66  *		i = int()
67  */
68 NODE *
69 f_int(NODE *np)
70 {
71 	register uint na;
72 
73 	if ((na = nargs(np)) > 1)
74 		awkerr(nargerr, s_int);
75 	return (intnode(exprint(na==0 ? field0 : getlist(&np))));
76 }
77 
78 /*
79  * Logarithm function.
80  * Usage:	y = log(x)
81  *		y = log()
82  */
83 NODE *
84 f_log(NODE *np)
85 {
86 	register uint na;
87 
88 	if ((na = nargs(np)) > 1)
89 		awkerr(nargerr, s_log);
90 	return (realnode(log(exprreal(na==0 ? field0 : getlist(&np)))));
91 }
92 
93 /*
94  * Square root function.
95  * Usage:	y = sqrt(x)
96  *		y = sqrt()
97  */
98 NODE *
99 f_sqrt(NODE *np)
100 {
101 	register uint na;
102 
103 	if ((na = nargs(np)) > 1)
104 		awkerr(nargerr, s_sqrt);
105 	return (realnode(sqrt(exprreal(na==0 ? field0 : getlist(&np)))));
106 }
107 
108 /*
109  * Trigonometric sine function.
110  * Usage:	y = sin(x)
111  */
112 NODE *
113 f_sin(NODE *np)
114 {
115 	if (nargs(np) != 1)
116 		awkerr(nargerr, s_sin);
117 	return (realnode(sin(exprreal(getlist(&np)))));
118 }
119 
120 /*
121  * Trigonometric cosine function.
122  * Usage:	y = cos(x)
123  */
124 NODE *
125 f_cos(NODE *np)
126 {
127 	if (nargs(np) != 1)
128 		awkerr(nargerr, s_cos);
129 	return (realnode(cos(exprreal(getlist(&np)))));
130 }
131 
132 /*
133  * Arctangent of y/x.
134  * Usage:	z = atan2(y, x)
135  */
136 NODE *
137 f_atan2(NODE *np)
138 {
139 	double y, x;
140 
141 	if (nargs(np) != 2)
142 		awkerr(nargerr, s_atan2);
143 	y = (double)exprreal(getlist(&np));
144 	x = (double)exprreal(getlist(&np));
145 	return (realnode(atan2(y, x)));
146 }
147 
148 /*
149  * Set the seed for the random number generator function -- rand.
150  * Usage:	srand(x)
151  *		srand()
152  */
153 NODE *
154 f_srand(NODE *np)
155 {
156 	register uint na;
157 	register uint seed;
158 	static uint oldseed = 0;
159 
160 	if ((na = nargs(np)) > 1)
161 		awkerr(nargerr, s_srand);
162 	if (na == 0)
163 		seed = (uint)time((time_t *)0); else
164 		seed = (uint)exprint(getlist(&np));
165 	srand(seed);
166 	na = oldseed;
167 	oldseed = seed;
168 	return (intnode((INT)na));
169 }
170 
171 /*
172  * Generate a random number.
173  * Usage:	x = rand()
174  */
175 NODE *
176 f_rand(NODE *np)
177 {
178 	double result;
179 	int expon;
180 	ushort rint;
181 
182 	if (nargs(np) != 0)
183 		awkerr(nargerr, s_rand);
184 	rint = rand() & SHRT_MAX;
185 	result = frexp((double)rint, &expon);
186 	return (realnode((REAL)ldexp(result, expon-15)));
187 }
188 
189 /*
190  * Substitute function.
191  * Usage:	n = sub(regex, replace, target)
192  *		n = sub(regex, replace)
193  */
194 NODE *
195 f_sub(NODE *np)
196 {
197 	return (dosub(np, 1));
198 }
199 
200 /*
201  * Global substitution function.
202  * Usage:	n = gsub(regex, replace, target)
203  *		n = gsub(regex, replace)
204  */
205 NODE *
206 f_gsub(NODE *np)
207 {
208 	return (dosub(np, 0));
209 }
210 
211 /*
212  * Do actual substitutions.
213  * `glob' is the number to substitute, 0 for all.
214  */
215 static NODE *
216 dosub(NODE *np, int glob)
217 {
218 	wchar_t *text;
219 	register wchar_t *sub;
220 	register uint n;
221 	register uint na;
222 	register REGEXP rp;
223 	NODE *left;
224 	static wchar_t *buf;
225 
226 	if ((na = nargs(np)) != 2 && na != 3)
227 		awkerr(nargerr, glob==0 ? s_gsub : s_sub);
228 	rp = getregexp(getlist(&np));
229 	sub = exprstring(getlist(&np));
230 	if (na == 3) {
231 		left = getlist(&np);
232 		text = exprstring(left);
233 	} else {
234 		left = field0;
235 		text = linebuf;
236 	}
237 	switch (REGWDOSUBA(rp, sub, text, &buf, 256, &glob)) {
238 	case REG_OK:
239 	case REG_NOMATCH:
240 		n = glob;
241 		break;
242 	case REG_ESPACE:
243 		if (buf != NULL)
244 			free(buf);
245 		awkerr(nomem);
246 	default:
247 		awkerr(gettext("regular expression error"));
248 	}
249 	(void)assign(left, stringnode(buf, FNOALLOC, wcslen(buf)));
250 	return (intnode((INT)n));
251 }
252 
253 /*
254  * Match function.  Return position (origin 1) or 0 for regular
255  * expression match in string.  Set new variables RSTART and RLENGTH
256  * as well.
257  * Usage:	pos = match(string, re)
258  */
259 NODE *
260 f_match(NODE *np)
261 {
262 	register wchar_t *text;
263 	register REGEXP rp;
264 	register int pos, length;
265 	REGWMATCH_T match[10];
266 
267 	if (nargs(np) != 2)
268 		awkerr(nargerr, s_match);
269 	text = exprstring(getlist(&np));
270 	rp = getregexp(getlist(&np));
271 	if (REGWEXEC(rp, text, 10, match, 0) == REG_OK) {
272 		pos = match[0].rm_sp-text+1;
273 		length = match[0].rm_ep - match[0].rm_sp;
274 	} else {
275 		pos = 0;
276 		length = -1;
277 	}
278 	constant->n_int = length;
279 	(void)assign(vlook(M_MB_L("RLENGTH")), constant);
280 	return (assign(vlook(M_MB_L("RSTART")), intnode((INT)pos)));
281 }
282 
283 /*
284  * Call shell or command interpreter.
285  * Usage:	status = system(command)
286  */
287 NODE *
288 f_system(NODE *np)
289 {
290 	int retcode;
291 
292 	if (nargs(np) != 1)
293 		awkerr(nargerr, s_system);
294 	(void) fflush(stdout);
295 	retcode = system(mbunconvert(exprstring(getlist(&np))));
296 	return (intnode((INT)WEXITSTATUS(retcode)));
297 }
298 
299 /*
300  * Search for string within string.
301  * Usage:	pos = index(string1, string2)
302  */
303 NODE *
304 f_index(NODE *np)
305 {
306 	register wchar_t *s1, *s2;
307 	register int l1, l2;
308 	register int result;
309 
310 	if (nargs(np) != 2)
311 		awkerr(nargerr, s_index);
312 	s1 = (wchar_t *)exprstring(getlist(&np));
313 	s2 = (wchar_t *)exprstring(getlist(&np));
314 	l1 = wcslen(s1);
315 	l2 = wcslen(s2);
316 	result = 1;
317 	while (l2 <= l1) {
318 		if (memcmp(s1, s2, l2 * sizeof(wchar_t)) == 0)
319 			break;
320 		result++;
321 		s1++;
322 		l1--;
323 	}
324 	if (l2 > l1)
325 		result = 0;
326 	return (intnode((INT)result));
327 }
328 
329 /*
330  * Return length of argument or $0
331  * Usage:	n = length(string)
332  *		n = length()
333  *		n = length
334  */
335 NODE *
336 f_length(NODE *np)
337 {
338 	register uint na;
339 
340 	if ((na = nargs(np)) > 1)
341 		awkerr(nargerr, s_length);
342 	if (na == 0)
343 		na = lbuflen; else
344 		na = wcslen((wchar_t *)exprstring(getlist(&np)));
345 	return (intnode((INT)na));
346 }
347 
348 /*
349  * Split string into fields.
350  * Usage: nfields = split(string, array [, separator]);
351  */
352 NODE *
353 f_split(NODE *np)
354 {
355 	register wchar_t *cp;
356 	wchar_t *ep, *saved = 0;
357 	register NODE *tnp, *snp, *otnp;
358 	register NODE *sep;
359 	REGEXP old_resep = 0;
360 	size_t seplen;
361 	uint n;
362 	wint_t c;
363 	wchar_t savesep[20];
364 	wchar_t  *(*old_awkfield)(wchar_t **) = 0;
365 
366 	if ((n = nargs(np))<2 || n>3)
367 		awkerr(nargerr, s_split);
368 	ep = exprstring(snp = getlist(&np));
369 	tnp = getlist(&np);
370 	if (snp->n_type == INDEX && snp->n_left == tnp)
371 		ep = saved = wsdup(ep);
372 	if (n == 3) {
373 		sep = getlist(&np);
374 	} else
375 		sep = NNULL;
376 	switch (tnp->n_type) {
377 	case ARRAY:
378 		delarray(tnp);
379 		break;
380 
381 	case PARM:
382 		break;
383 
384 	case VAR:
385 		if (isstring(tnp->n_flags) && tnp->n_string==_null)
386 			break;
387 		/* FALLTHROUGH */
388 
389 	default:
390 		awkerr(gettext(
391 			"second parameter to \"split\" must be an array"));
392 	}
393 	/*
394 	 * If an argument has been passed in to be used as the
395 	 * field separator check to see if it is a constant regular
396 	 * expression. If so, use it directly otherwise reduce the
397 	 * expression, convert the result into a string and assign it
398 	 * to "FS" (after saving the old value for FS.)
399 	 */
400 	if (sep != NNULL) {
401 		if (sep->n_type == PARM)
402 			sep = sep->n_next;
403 		if (sep->n_type == RE) {
404 			old_resep = resep;
405 			resep = sep->n_regexp;
406 			old_awkfield = awkfield;
407 			awkfield = refield;
408 		} else {
409 			sep = exprreduce(sep);
410 			seplen = wcslen(cp = (wchar_t *)exprstring(varFS));
411 			(void) memcpy(savesep, cp,
412 				(seplen+1) * sizeof(wchar_t));
413 			(void) assign(varFS, sep);
414 		}
415 	}
416 	/*
417 	 * Iterate over the record, extracting each field and assigning it to
418 	 * the corresponding element in the array.
419 	 */
420 	otnp = tnp;	/* save tnp for possible promotion */
421 	tnp = node(INDEX, tnp, constant);
422 	fcount = 0;
423 	for (;;) {
424 		if ((cp = (*awkfield)(&ep)) == NULL) {
425 			if (fcount == 0) {
426 				if (otnp->n_type == PARM)
427 					otnp = otnp->n_next;
428 				promote(otnp);
429 			}
430 			break;
431 		}
432 		c = *ep;
433 		*ep = '\0';
434 		constant->n_int = ++fcount;
435 		(void)assign(tnp, stringnode(cp,FALLOC|FSENSE,(size_t)(ep-cp)));
436 		*ep = c;
437 	}
438 	/*
439 	 * Restore the old record separator/and or regular expression.
440 	 */
441 	if (sep != NNULL) {
442 		if (old_awkfield != 0) {
443 			resep = old_resep;
444 			awkfield = old_awkfield;
445 		} else {
446 			(void)assign(varFS,
447 				stringnode(savesep, FSTATIC, seplen));
448 		}
449 	}
450 	if (saved)
451 		free(saved);
452 	return (intnode((INT)fcount));
453 }
454 
455 /*
456  * Sprintf function.
457  * Usage:	string = sprintf(format, arg, ...)
458  */
459 NODE *
460 f_sprintf(NODE *np)
461 {
462         wchar_t *cp;
463         size_t length;
464 
465         if (nargs(np) == 0)
466                 awkerr(nargerr, s_sprintf);
467         length = xprintf(np, (FILE *)NULL, &cp);
468         np = stringnode(cp, FNOALLOC, length);
469         return (np);
470 }
471 
472 /*
473  * Substring.
474  * newstring = substr(string, start, [length])
475  */
476 NODE *
477 f_substr(NODE *np)
478 {
479 	register STRING str;
480 	register size_t n;
481 	register int start;
482 	register size_t len;
483 
484 	if ((n = nargs(np))<2 || n>3)
485 		awkerr(nargerr, s_substr);
486 	str = exprstring(getlist(&np));
487 	if ((start = (int)exprint(getlist(&np))-1) < 0)
488 		start = 0;
489 	if (n == 3) {
490 		int x;
491 		x = (int)exprint(getlist(&np));
492 		if (x < 0)
493 			len = 0;
494 		else
495 			len = (size_t)x;
496 	} else
497 		len = LARGE;
498 	n = wcslen((wchar_t *)str);
499 	if (start > n)
500 		start = n;
501 	n -= start;
502 	if (len > n)
503 		len = n;
504 	str += start;
505 	n = str[len];
506 	str[len] = '\0';
507 	np = stringnode(str, FALLOC, len);
508 	str[len] = n;
509 	return (np);
510 }
511 
512 /*
513  * Close an output or input file stream.
514  */
515 NODE *
516 f_close(NODE *np)
517 {
518 	register OFILE *op;
519 	register char *name;
520 
521 	if (nargs(np) != 1)
522 		awkerr(nargerr, s_close);
523 	name = mbunconvert(exprstring(getlist(&np)));
524 	for (op = &ofiles[0]; op < &ofiles[NIOSTREAM]; op++)
525 		if (op->f_fp!=FNULL && strcmp(name, op->f_name)==0) {
526 			awkclose(op);
527 			break;
528 		}
529 	if (op >= &ofiles[NIOSTREAM])
530 		return (const1);
531 	return (const0);
532 }
533 
534 /*
535  * Return the integer value of the first character of a string.
536  * Usage:	char = ord(string)
537  */
538 NODE *
539 f_ord(NODE *np)
540 {
541 	if (nargs(np) != 1)
542 		awkerr(nargerr, s_ord);
543 	return (intnode((INT)*exprstring(getlist(&np))));
544 }
545 
546 /*
547  * Return the argument string in lower case:
548  * Usage:
549  *	lower = tolower(upper)
550  */
551 NODE *
552 f_tolower(NODE *np)
553 {
554 	return (docasetr(np, 0));
555 }
556 
557 /*
558  * Return the argument string in upper case:
559  * Usage:
560  *	upper = toupper(lower)
561  */
562 NODE *
563 f_toupper(NODE *np)
564 {
565 	return (docasetr(np, 1));
566 }
567 
568 /*
569  * Sort the array into traversal order by the next "for (i in array)" loop.
570  * Usage:
571  *	asort(array, "cmpfunc")
572  * 	cmpfunc(array, index1, index2)
573  *		returns:
574  *		<0		if 	array[index1] <  array[index2]
575  *		 0		if	array[index1] == array[index2]
576  *		>0		if	array[index1] >  array[index2]
577  */
578 NODE *
579 f_asort(NODE *np)
580 {
581 	NODE *array;
582 	STRING funcname;
583 	register size_t nel;
584 	register NODE *tnp;
585 	register NODE *funcnp;
586 	register NODE **alist, **npp;
587 
588 	if (nargs(np) != 2)
589 		awkerr(nargerr, s_asort);
590 	array = getlist(&np);
591 	if (array->n_type == PARM)
592 		array = array->n_next;
593 	if (array->n_type != ARRAY)
594 		awkerr(gettext("%s function requires an array"),
595 			s_asort);
596 	funcname = exprstring(getlist(&np));
597 	if ((funcnp = vlookup(funcname, 1)) == NNULL
598 	 || funcnp->n_type != UFUNC)
599 		awkerr(gettext("%s: %s is not a function\n"),
600 		    s_asort, funcname);
601 	/*
602 	 * Count size of array, allowing one extra for NULL at end
603 	 */
604 	nel = 1;
605 	for (tnp = array->n_alink; tnp != NNULL; tnp = tnp->n_alink)
606 		++nel;
607 	/*
608 	 * Create UFUNC node that points at the funcnp on left and the
609 	 * list of three variables on right (array, index1, index2)
610 	 *				UFUNC
611 	 *				/    \
612 	 *			   funcnp    COMMA
613 	 *				      /   \
614 	 *				array	  COMMA
615 	 *					  /    \
616 	 *					index1 index2
617 	 */
618 	if (asortfunc == NNULL) {
619 		running = 0;
620 		asortfunc = node(CALLUFUNC, NNULL,
621 				    node(COMMA, NNULL,
622 				    node(COMMA,
623 					asnp1=stringnode(_null, FSTATIC, 0),
624 					asnp2=stringnode(_null, FSTATIC, 0))));
625 		running = 1;
626 	}
627 	asortfunc->n_left = funcnp;
628 	asortfunc->n_right->n_left = array;
629 	asarraylen = wcslen(array->n_name)+1;
630 	alist = (NODE **) emalloc(nel*sizeof(NODE *));
631 	/*
632 	 * Copy array into alist.
633 	 */
634 	npp = alist;
635 	for (tnp = array->n_alink; tnp != NNULL; tnp = tnp->n_alink)
636 		*npp++ = tnp;
637 	*npp = NNULL;
638 	/*
639 	 * Re-order array to this list
640 	 */
641 	qsort((wchar_t *)alist, nel-1, sizeof (NODE *), asortcmp);
642 	tnp = array;
643 	npp = alist;
644 	do {
645 		tnp = tnp->n_alink = *npp;
646 	} while (*npp++ != NNULL);
647 	free((wchar_t *)alist);
648 	return (constundef);
649 }
650 
651 /*
652  * Return the number of arguments of a function.
653  */
654 static uint
655 nargs(NODE *np)
656 {
657 	register int n;
658 
659 	if (np == NNULL)
660 		return (0);
661 	n = 1;
662 	while (np!=NNULL && np->n_type==COMMA) {
663 		np = np->n_right;
664 		n++;
665 	}
666 	return (n);
667 }
668 
669 /*
670  * Do case translation.
671  */
672 static NODE *
673 docasetr(NODE *np, int upper)
674 {
675 	register int c;
676 	register wchar_t *cp;
677 	register wchar_t *str;
678 	register uint na;
679 
680 	if ((na = nargs(np)) > 1)
681 		awkerr(nargerr, upper ? s_toupper : s_tolower);
682 	str = strsave(na==0 ? linebuf : exprstring(getlist(&np)));
683 	cp = str;
684 	if (upper) {
685 		while ((c = *cp++) != '\0')
686 			cp[-1] = towupper(c);
687 	} else {
688 		while ((c = *cp++) != '\0')
689 			cp[-1] = towlower(c);
690 	}
691 	return (stringnode((STRING)str, FNOALLOC, (size_t)(cp-str-1)));
692 }
693 
694 /*
695  * The comparison routine used by qsort inside f_asort()
696  */
697 static int
698 asortcmp(const void *npp1, const void *npp2)
699 {
700 	asnp1->n_strlen =
701 	    wcslen(asnp1->n_string = (*(NODE **)npp1)->n_name+asarraylen);
702 	asnp2->n_strlen =
703 	    wcslen(asnp2->n_string = (*(NODE **)npp2)->n_name+asarraylen);
704 	return ((int)exprint(asortfunc));
705 }
706 
707 #if M_MATHERR
708 #if !defined(__BORLANDC__)&&defined(__TURBOC__)&&__COMPACT__&&__EMULATE__
709 /* So it won't optimize registers our FP is using */
710 #define	flushesbx()	(_BX = 0, _ES = _BX)
711 #else
712 #define	flushesbx()	(0)
713 #endif
714 
715 /*
716  * Math error for awk.
717  */
718 int
719 matherr(struct exception *ep)
720 {
721 	register uint type;
722 	static char msgs[7][256];
723 	static int first_time = 1;
724 
725 	if (first_time) {
726 		msgs[0] = gettext("Unknown FP error"),
727 		msgs[1] = gettext("Domain"),
728 		msgs[2] = gettext("Singularity"),
729 		msgs[3] = gettext("Overflow"),
730 		msgs[4] = gettext("Underflow"),
731 		msgs[5] = gettext("Total loss of precision"),
732 		msgs[6] = gettext("Partial loss of precision")
733 		first_time = 0;
734 	}
735 
736 	if ((type = ep->type) > (uint)PLOSS)
737 		type = 0;
738 	(void)fprintf(stderr, "awk: %s", strmsg(msgs[type]));
739 	(void)fprintf(stderr, gettext(
740 		" error in function %s(%g) at NR=%lld\n"),
741 		((void) flushesbx(), ep->name), ep->arg1, (INT)exprint(varNR));
742 	return (1);
743 }
744 #endif	/*M_MATHERR*/
745