xref: /illumos-gate/usr/src/cmd/oawk/lib.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #include <stdio.h>
31 #include "awk.def"
32 #include "awk.h"
33 #include <ctype.h>
34 #include <wctype.h>
35 #include "awktype.h"
36 #include <stdlib.h>
37 #include <stdarg.h>
38 
39 FILE	*infile	= NULL;
40 wchar_t *file;
41 #define	RECSIZE (5 * 512)
42 wchar_t record[RECSIZE];
43 wchar_t fields[RECSIZE];
44 wchar_t L_NULL[] = L"";
45 
46 
47 #define	MAXFLD	100
48 int	donefld;	/* 1 = implies rec broken into fields */
49 int	donerec;	/* 1 = record is valid (no flds have changed) */
50 int	mustfld;	/* 1 = NF seen, so always break */
51 static wchar_t L_record[] = L"$record";
52 
53 
54 #define	FINIT	{ OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
55 CELL fldtab[MAXFLD] = {		/* room for fields */
56 	{ OCELL, CFLD, L_record, record, 0.0, STR|FLD},
57 		FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
58 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
59 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
60 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
61 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
62 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
63 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
64 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
65 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
66 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
67 };
68 int	maxfld	= 0;	/* last used field */
69 /* pointer to CELL for maximum field assigned to */
70 CELL	*maxmfld = &fldtab[0];
71 
72 static int isclvar(wchar_t *);
73 static void setclvar(wchar_t *);
74 void fldbld(void);
75 
76 int
77 getrec(void)
78 {
79 	wchar_t *rr, *er;
80 	int c, sep;
81 	FILE *inf;
82 	extern int svargc;
83 	extern wchar_t **svargv;
84 
85 
86 	dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
87 	donefld = 0;
88 	donerec = 1;
89 	record[0] = 0;
90 	er = record + RECSIZE;
91 	while (svargc > 0) {
92 		dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
93 		if (infile == NULL) {	/* have to open a new file */
94 			/*
95 			 * If the argument contains a '=', determine if the
96 			 * argument needs to be treated as a variable assignment
97 			 * or as the pathname of a file.
98 			 */
99 			if (isclvar(*svargv)) {
100 				/* it's a var=value argument */
101 				setclvar(*svargv);
102 				if (svargc > 1) {
103 					svargv++;
104 					svargc--;
105 					continue;
106 				}
107 				*svargv = L"-";
108 			}
109 			*FILENAME = file = *svargv;
110 			dprintf("opening file %ws\n", file, NULL, NULL);
111 			if (*file == (wchar_t)L'-')
112 				infile = stdin;
113 			else if ((infile = fopen(toeuccode(file), "r")) == NULL)
114 				error(FATAL, "can't open %ws", file);
115 		}
116 		if ((sep = **RS) == 0)
117 			sep = '\n';
118 		inf = infile;
119 		for (rr = record; /* dummy */; /* dummy */) {
120 			for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
121 			    *rr++ = c)
122 				;
123 			if (rr >= er)
124 				error(FATAL, "record `%.20ws...' too long",
125 				    record);
126 			if (**RS == sep || c == EOF)
127 				break;
128 			if ((c = getwc(inf)) == '\n' || c == EOF)
129 			/* 2 in a row */
130 				break;
131 			*rr++ = '\n';
132 			*rr++ = c;
133 		}
134 		if (rr >= er)
135 			error(FATAL, "record `%.20ws...' too long", record);
136 		*rr = 0;
137 		if (mustfld)
138 			fldbld();
139 		if (c != EOF || rr > record) {	/* normal record */
140 			recloc->tval &= ~NUM;
141 			recloc->tval |= STR;
142 			++nrloc->fval;
143 			nrloc->tval &= ~STR;
144 			nrloc->tval |= NUM;
145 			return (1);
146 		}
147 		/* EOF arrived on this file; set up next */
148 		if (infile != stdin)
149 			fclose(infile);
150 		infile = NULL;
151 		svargc--;
152 		svargv++;
153 	}
154 	return (0);	/* true end of file */
155 }
156 
157 /*
158  * isclvar()
159  *
160  * Returns 1 if the input string, arg, is a variable assignment,
161  * otherwise returns 0.
162  *
163  * An argument to awk can be either a pathname of a file, or a variable
164  * assignment.  An operand that begins with an undersore or alphabetic
165  * character from the portable character set, followed by a sequence of
166  * underscores, digits, and alphabetics from the portable character set,
167  * followed by the '=' character, shall specify a variable assignment
168  * rather than a pathname.
169  */
170 static int
171 isclvar(wchar_t *arg)
172 {
173 	wchar_t	*tmpptr = arg;
174 
175 	if (tmpptr != NULL) {
176 
177 		/* Begins with an underscore or alphabetic character */
178 		if (iswalpha(*tmpptr) || *tmpptr == '_') {
179 
180 			/*
181 			 * followed by a sequence of underscores, digits,
182 			 * and alphabetics
183 			 */
184 			for (tmpptr++; *tmpptr; tmpptr++) {
185 				if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) {
186 					break;
187 				}
188 			}
189 			return (*tmpptr == '=');
190 		}
191 	}
192 
193 	return (0);
194 }
195 
196 static void
197 setclvar(wchar_t *s)	/* set var=value from s */
198 {
199 	wchar_t *p;
200 	CELL *q;
201 
202 
203 	for (p = s; *p != '='; p++)
204 		;
205 	*p++ = 0;
206 	q = setsymtab(s, tostring(p), 0.0, STR, symtab);
207 	setsval(q, p);
208 	dprintf("command line set %ws to |%ws|\n", s, p, NULL);
209 }
210 
211 
212 void
213 fldbld(void)
214 {
215 	wchar_t *r, *fr, sep, c;
216 	static wchar_t L_NF[] = L"NF";
217 	CELL *p, *q;
218 	int i, j;
219 
220 
221 	r = record;
222 	fr = fields;
223 	i = 0;	/* number of fields accumulated here */
224 	if ((sep = **FS) == ' ')
225 		for (i = 0; /* dummy */; /* dummy */) {
226 			c = *r;
227 			while (iswblank(c) || c == '\t' || c == '\n')
228 				c = *(++r);
229 			if (*r == 0)
230 				break;
231 			i++;
232 			if (i >= MAXFLD)
233 				error(FATAL,
234 			"record `%.20ws...' has too many fields", record);
235 			if (!(fldtab[i].tval&FLD))
236 				xfree(fldtab[i].sval);
237 			fldtab[i].sval = fr;
238 			fldtab[i].tval = FLD | STR;
239 			do {
240 				*fr++ = *r++;
241 				c = *r;
242 			} while (! iswblank(c) && c != '\t' &&
243 			    c != '\n' && c != '\0');
244 
245 
246 			*fr++ = 0;
247 
248 	} else if (*r != 0)	/* if 0, it's a null field */
249 		for (;;) {
250 			i++;
251 			if (i >= MAXFLD)
252 				error(FATAL,
253 			"record `%.20ws...' has too many fields", record);
254 			if (!(fldtab[i].tval&FLD))
255 				xfree(fldtab[i].sval);
256 			fldtab[i].sval = fr;
257 			fldtab[i].tval = FLD | STR;
258 			while ((c = *r) != sep && c != '\n' && c != '\0')
259 				/* \n always a separator */
260 				*fr++ = *r++;
261 			*fr++ = 0;
262 			if (*r++ == 0)
263 				break;
264 		}
265 	*fr = 0;
266 	/* clean out junk from previous record */
267 	for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
268 		if (!(p->tval&FLD))
269 			xfree(p->sval);
270 		p->tval = STR | FLD;
271 		p->sval = L_NULL;
272 	}
273 	maxfld = i;
274 	maxmfld = &fldtab[i];
275 	donefld = 1;
276 	for (i = 1; i <= maxfld; i++)
277 		if (isanumber(fldtab[i].sval)) {
278 			fldtab[i].fval = watof(fldtab[i].sval);
279 			fldtab[i].tval |= NUM;
280 		}
281 	setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
282 	if (dbg)
283 		for (i = 0; i <= maxfld; i++)
284 			printf("field %d: |%ws|\n", i, fldtab[i].sval);
285 }
286 
287 
288 void
289 recbld(void)
290 {
291 	int i;
292 	wchar_t *r, *p;
293 
294 
295 	if (donefld == 0 || donerec == 1)
296 		return;
297 	r = record;
298 	for (i = 1; i <= *NF; i++) {
299 		p = getsval(&fldtab[i]);
300 		while (*r++ = *p++)
301 			;
302 		*(r-1) = **OFS;
303 	}
304 	*(r-1) = '\0';
305 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
306 	recloc->tval = STR | FLD;
307 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
308 	if (r > record+RECSIZE)
309 		error(FATAL, "built giant record `%.20ws...'", record);
310 	dprintf("recbld = |%ws|\n", record, NULL, NULL);
311 }
312 
313 
314 CELL *
315 fieldadr(int n)
316 {
317 	if (n < 0 || n >= MAXFLD)
318 		error(FATAL, "trying to access field %d", n);
319 	return (&fldtab[n]);
320 }
321 
322 
323 int	errorflag	= 0;
324 
325 
326 int
327 yyerror(char *s)
328 {
329 	fprintf(stderr,
330 	    gettext("awk: %s near line %lld\n"), gettext(s), lineno);
331 	errorflag = 2;
332 	return (0);
333 }
334 
335 
336 void
337 error(int f, char *fmt, ...)
338 {
339 	va_list ap;
340 
341 	va_start(ap, fmt);
342 	fprintf(stderr, "awk: ");
343 	vfprintf(stderr, gettext(fmt), ap);
344 	va_end(ap);
345 	fprintf(stderr, "\n");
346 	if (NR && *NR > 0)
347 		fprintf(stderr, gettext(" record number %g\n"), *NR);
348 	if (f)
349 		exit(2);
350 }
351 
352 
353 void
354 PUTS(char *s)
355 {
356 	dprintf("%s\n", s, NULL, NULL);
357 }
358 
359 
360 #define	MAXEXPON	38	/* maximum exponenet for fp number */
361 
362 
363 int
364 isanumber(wchar_t *s)
365 {
366 	int d1, d2;
367 	int point;
368 	wchar_t *es;
369 	extern wchar_t	radixpoint;
370 
371 	d1 = d2 = point = 0;
372 	while (*s == ' ' || *s == '\t' || *s == '\n')
373 		s++;
374 	if (*s == '\0')
375 		return (0);	/* empty stuff isn't number */
376 	if (*s == '+' || *s == '-')
377 		s++;
378 	/*
379 	 * Since, iswdigit() will include digit from other than code set 0,
380 	 * we have to check it from code set 0 or not.
381 	 */
382 	if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
383 		return (0);
384 	if (iswdigit(*s) && iswascii(*s)) {
385 		do {
386 			d1++;
387 			s++;
388 		} while (iswdigit(*s) && iswascii(*s));
389 	}
390 	if (d1 >= MAXEXPON)
391 		return (0);	/* too many digits to convert */
392 	if (*s == radixpoint) {
393 		point++;
394 		s++;
395 	}
396 	if (iswdigit(*s) && iswascii(*s)) {
397 		d2++;
398 		do {
399 			s++;
400 		} while (iswdigit(*s) && iswascii(*s));
401 	}
402 
403 
404 	if (!(d1 || point && d2))
405 		return (0);
406 	if (*s == 'e' || *s == 'E') {
407 		s++;
408 		if (*s == '+' || *s == '-')
409 			s++;
410 		if (!(iswdigit(*s) && iswascii(*s)))
411 			return (0);
412 		es = s;
413 		do {
414 			s++;
415 		} while (iswdigit(*s) && iswascii(*s));
416 
417 
418 		if (s - es > 2)
419 			return (0);
420 		else if (s - es == 2 &&
421 		    10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
422 			return (0);
423 	}
424 	while (*s == ' ' || *s == '\t' || *s == '\n')
425 		s++;
426 	if (*s == '\0')
427 		return (1);
428 	else
429 		return (0);
430 }
431 char *
432 toeuccode(str)
433 wchar_t *str;
434 {
435 	static char euccode[RECSIZE];
436 
437 	(void) wcstombs(euccode, str, RECSIZE);
438 	return (euccode);
439 }
440