xref: /illumos-gate/usr/src/cmd/oawk/lib.c (revision 69a119caa6570c7077699161b7c28b6ee9f8b0f4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <stdio.h>
33 #include "awk.def"
34 #include "awk.h"
35 #include <ctype.h>
36 #include <wctype.h>
37 #include "awktype.h"
38 #include <stdlib.h>
39 
40 FILE	*infile	= NULL;
41 wchar_t *file;
42 #define	RECSIZE (5 * 512)
43 wchar_t record[RECSIZE];
44 wchar_t fields[RECSIZE];
45 wchar_t L_NULL[] = L"";
46 
47 
48 #define	MAXFLD	100
49 int	donefld;	/* 1 = implies rec broken into fields */
50 int	donerec;	/* 1 = record is valid (no flds have changed) */
51 int	mustfld;	/* 1 = NF seen, so always break */
52 static wchar_t L_record[] = L"$record";
53 
54 
55 #define	FINIT	{ OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
56 CELL fldtab[MAXFLD] = {		/* room for fields */
57 	{ OCELL, CFLD, L_record, record, 0.0, STR|FLD},
58 		FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
59 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
60 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
61 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
62 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
63 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
64 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
65 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
66 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
67 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
68 };
69 int	maxfld	= 0;	/* last used field */
70 /* pointer to CELL for maximum field assigned to */
71 CELL	*maxmfld = &fldtab[0];
72 
73 static int isclvar(wchar_t *);
74 static void setclvar(wchar_t *);
75 void fldbld(void);
76 
77 int
78 getrec(void)
79 {
80 	wchar_t *rr, *er;
81 	int c, sep;
82 	FILE *inf;
83 	extern int svargc;
84 	extern wchar_t **svargv;
85 
86 
87 	dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
88 	donefld = 0;
89 	donerec = 1;
90 	record[0] = 0;
91 	er = record + RECSIZE;
92 	while (svargc > 0) {
93 		dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
94 		if (infile == NULL) {	/* have to open a new file */
95 			/*
96 			 * If the argument contains a '=', determine if the
97 			 * argument needs to be treated as a variable assignment
98 			 * or as the pathname of a file.
99 			 */
100 			if (isclvar(*svargv)) {
101 				/* it's a var=value argument */
102 				setclvar(*svargv);
103 				if (svargc > 1) {
104 					svargv++;
105 					svargc--;
106 					continue;
107 				}
108 				*svargv = L"-";
109 			}
110 			*FILENAME = file = *svargv;
111 			dprintf("opening file %ws\n", file, NULL, NULL);
112 			if (*file == (wchar_t)L'-')
113 				infile = stdin;
114 			else if ((infile = fopen(toeuccode(file), "r")) == NULL)
115 				error(FATAL, "can't open %ws", file);
116 		}
117 		if ((sep = **RS) == 0)
118 			sep = '\n';
119 		inf = infile;
120 		for (rr = record; /* dummy */; /* dummy */) {
121 			for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
122 			    *rr++ = c)
123 				;
124 			if (rr >= er)
125 				error(FATAL, "record `%.20ws...' too long",
126 				    record);
127 			if (**RS == sep || c == EOF)
128 				break;
129 			if ((c = getwc(inf)) == '\n' || c == EOF)
130 			/* 2 in a row */
131 				break;
132 			*rr++ = '\n';
133 			*rr++ = c;
134 		}
135 		if (rr >= er)
136 			error(FATAL, "record `%.20ws...' too long", record);
137 		*rr = 0;
138 		if (mustfld)
139 			fldbld();
140 		if (c != EOF || rr > record) {	/* normal record */
141 			recloc->tval &= ~NUM;
142 			recloc->tval |= STR;
143 			++nrloc->fval;
144 			nrloc->tval &= ~STR;
145 			nrloc->tval |= NUM;
146 			return (1);
147 		}
148 		/* EOF arrived on this file; set up next */
149 		if (infile != stdin)
150 			fclose(infile);
151 		infile = NULL;
152 		svargc--;
153 		svargv++;
154 	}
155 	return (0);	/* true end of file */
156 }
157 
158 /*
159  * isclvar()
160  *
161  * Returns 1 if the input string, arg, is a variable assignment,
162  * otherwise returns 0.
163  *
164  * An argument to awk can be either a pathname of a file, or a variable
165  * assignment.  An operand that begins with an undersore or alphabetic
166  * character from the portable character set, followed by a sequence of
167  * underscores, digits, and alphabetics from the portable character set,
168  * followed by the '=' character, shall specify a variable assignment
169  * rather than a pathname.
170  */
171 static int
172 isclvar(wchar_t *arg)
173 {
174 	wchar_t	*tmpptr = arg;
175 
176 	if (tmpptr != NULL) {
177 
178 		/* Begins with an underscore or alphabetic character */
179 		if (iswalpha(*tmpptr) || *tmpptr == '_') {
180 
181 			/*
182 			 * followed by a sequence of underscores, digits,
183 			 * and alphabetics
184 			 */
185 			for (tmpptr++; *tmpptr; tmpptr++) {
186 				if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) {
187 					break;
188 				}
189 			}
190 			return (*tmpptr == '=');
191 		}
192 	}
193 
194 	return (0);
195 }
196 
197 static void
198 setclvar(wchar_t *s)	/* set var=value from s */
199 {
200 	wchar_t *p;
201 	CELL *q;
202 
203 
204 	for (p = s; *p != '='; p++)
205 		;
206 	*p++ = 0;
207 	q = setsymtab(s, tostring(p), 0.0, STR, symtab);
208 	setsval(q, p);
209 	dprintf("command line set %ws to |%ws|\n", s, p, NULL);
210 }
211 
212 
213 void
214 fldbld(void)
215 {
216 	wchar_t *r, *fr, sep, c;
217 	static wchar_t L_NF[] = L"NF";
218 	CELL *p, *q;
219 	int i, j;
220 
221 
222 	r = record;
223 	fr = fields;
224 	i = 0;	/* number of fields accumulated here */
225 	if ((sep = **FS) == ' ')
226 		for (i = 0; /* dummy */; /* dummy */) {
227 			c = *r;
228 			while (iswblank(c) || c == '\t' || c == '\n')
229 				c = *(++r);
230 			if (*r == 0)
231 				break;
232 			i++;
233 			if (i >= MAXFLD)
234 				error(FATAL,
235 			"record `%.20ws...' has too many fields", record);
236 			if (!(fldtab[i].tval&FLD))
237 				xfree(fldtab[i].sval);
238 			fldtab[i].sval = fr;
239 			fldtab[i].tval = FLD | STR;
240 			do {
241 				*fr++ = *r++;
242 				c = *r;
243 			} while (! iswblank(c) && c != '\t' &&
244 			    c != '\n' && c != '\0');
245 
246 
247 			*fr++ = 0;
248 
249 	} else if (*r != 0)	/* if 0, it's a null field */
250 		for (;;) {
251 			i++;
252 			if (i >= MAXFLD)
253 				error(FATAL,
254 			"record `%.20ws...' has too many fields", record);
255 			if (!(fldtab[i].tval&FLD))
256 				xfree(fldtab[i].sval);
257 			fldtab[i].sval = fr;
258 			fldtab[i].tval = FLD | STR;
259 			while ((c = *r) != sep && c != '\n' && c != '\0')
260 				/* \n always a separator */
261 				*fr++ = *r++;
262 			*fr++ = 0;
263 			if (*r++ == 0)
264 				break;
265 		}
266 	*fr = 0;
267 	/* clean out junk from previous record */
268 	for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
269 		if (!(p->tval&FLD))
270 			xfree(p->sval);
271 		p->tval = STR | FLD;
272 		p->sval = L_NULL;
273 	}
274 	maxfld = i;
275 	maxmfld = &fldtab[i];
276 	donefld = 1;
277 	for (i = 1; i <= maxfld; i++)
278 		if (isanumber(fldtab[i].sval)) {
279 			fldtab[i].fval = watof(fldtab[i].sval);
280 			fldtab[i].tval |= NUM;
281 		}
282 	setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
283 	if (dbg)
284 		for (i = 0; i <= maxfld; i++)
285 			printf("field %d: |%ws|\n", i, fldtab[i].sval);
286 }
287 
288 
289 void
290 recbld(void)
291 {
292 	int i;
293 	wchar_t *r, *p;
294 
295 
296 	if (donefld == 0 || donerec == 1)
297 		return;
298 	r = record;
299 	for (i = 1; i <= *NF; i++) {
300 		p = getsval(&fldtab[i]);
301 		while (*r++ = *p++)
302 			;
303 		*(r-1) = **OFS;
304 	}
305 	*(r-1) = '\0';
306 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
307 	recloc->tval = STR | FLD;
308 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
309 	if (r > record+RECSIZE)
310 		error(FATAL, "built giant record `%.20ws...'", record);
311 	dprintf("recbld = |%ws|\n", record, NULL, NULL);
312 }
313 
314 
315 CELL *
316 fieldadr(n)
317 {
318 	if (n < 0 || n >= MAXFLD)
319 		error(FATAL, "trying to access field %d", n);
320 	return (&fldtab[n]);
321 }
322 
323 
324 int	errorflag	= 0;
325 
326 
327 int
328 yyerror(char *s)
329 {
330 	fprintf(stderr,
331 	    gettext("awk: %s near line %lld\n"), gettext(s), lineno);
332 	errorflag = 2;
333 	return (0);
334 }
335 
336 
337 void
338 error(f, s, a1, a2, a3, a4, a5, a6, a7)
339 {
340 	fprintf(stderr, "awk: ");
341 	fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7);
342 	fprintf(stderr, "\n");
343 	if (NR && *NR > 0)
344 		fprintf(stderr, gettext(" record number %g\n"), *NR);
345 	if (f)
346 		exit(2);
347 }
348 
349 
350 void
351 PUTS(char *s)
352 {
353 	dprintf("%s\n", s, NULL, NULL);
354 }
355 
356 
357 #define	MAXEXPON	38	/* maximum exponenet for fp number */
358 
359 
360 int
361 isanumber(wchar_t *s)
362 {
363 	int d1, d2;
364 	int point;
365 	wchar_t *es;
366 	extern wchar_t	radixpoint;
367 
368 	d1 = d2 = point = 0;
369 	while (*s == ' ' || *s == '\t' || *s == '\n')
370 		s++;
371 	if (*s == '\0')
372 		return (0);	/* empty stuff isn't number */
373 	if (*s == '+' || *s == '-')
374 		s++;
375 	/*
376 	 * Since, iswdigit() will include digit from other than code set 0,
377 	 * we have to check it from code set 0 or not.
378 	 */
379 	if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
380 		return (0);
381 	if (iswdigit(*s) && iswascii(*s)) {
382 		do {
383 			d1++;
384 			s++;
385 		} while (iswdigit(*s) && iswascii(*s));
386 	}
387 	if (d1 >= MAXEXPON)
388 		return (0);	/* too many digits to convert */
389 	if (*s == radixpoint) {
390 		point++;
391 		s++;
392 	}
393 	if (iswdigit(*s) && iswascii(*s)) {
394 		d2++;
395 		do {
396 			s++;
397 		} while (iswdigit(*s) && iswascii(*s));
398 	}
399 
400 
401 	if (!(d1 || point && d2))
402 		return (0);
403 	if (*s == 'e' || *s == 'E') {
404 		s++;
405 		if (*s == '+' || *s == '-')
406 			s++;
407 		if (!(iswdigit(*s) && iswascii(*s)))
408 			return (0);
409 		es = s;
410 		do {
411 			s++;
412 		} while (iswdigit(*s) && iswascii(*s));
413 
414 
415 		if (s - es > 2)
416 			return (0);
417 		else if (s - es == 2 &&
418 		    10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
419 			return (0);
420 	}
421 	while (*s == ' ' || *s == '\t' || *s == '\n')
422 		s++;
423 	if (*s == '\0')
424 		return (1);
425 	else
426 		return (0);
427 }
428 char *
429 toeuccode(str)
430 wchar_t *str;
431 {
432 	static char euccode[RECSIZE];
433 
434 	(void) wcstombs(euccode, str, RECSIZE);
435 	return (euccode);
436 }
437