xref: /illumos-gate/usr/src/cmd/oawk/lib.c (revision e067707b65c63939ade45e268f2b0dff6f65f75d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <stdio.h>
33 #include "awk.def"
34 #include "awk.h"
35 #include <ctype.h>
36 #include <wctype.h>
37 #include "awktype.h"
38 #include <stdlib.h>
39 
40 FILE	*infile	= NULL;
41 wchar_t *file;
42 #define	RECSIZE (5 * 512)
43 wchar_t record[RECSIZE];
44 wchar_t fields[RECSIZE];
45 wchar_t L_NULL[] = L"";
46 
47 
48 #define	MAXFLD	100
49 int	donefld;	/* 1 = implies rec broken into fields */
50 int	donerec;	/* 1 = record is valid (no flds have changed) */
51 int	mustfld;	/* 1 = NF seen, so always break */
52 static wchar_t L_record[] = L"$record";
53 
54 
55 #define	FINIT	{ OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
56 CELL fldtab[MAXFLD] = {		/* room for fields */
57 	{ OCELL, CFLD, L_record, record, 0.0, STR|FLD},
58 		FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
59 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
60 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
61 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
62 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
63 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
64 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
65 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
66 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
67 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
68 };
69 int	maxfld	= 0;	/* last used field */
70 /* pointer to CELL for maximum field assigned to */
71 CELL	*maxmfld = &fldtab[0];
72 
73 static int isclvar(wchar_t *);
74 
75 getrec()
76 {
77 	wchar_t *rr, *er;
78 	int c, sep;
79 	FILE *inf;
80 	extern int svargc;
81 	extern wchar_t **svargv;
82 
83 
84 	dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
85 	donefld = 0;
86 	donerec = 1;
87 	record[0] = 0;
88 	er = record + RECSIZE;
89 	while (svargc > 0) {
90 		dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
91 		if (infile == NULL) {	/* have to open a new file */
92 			/*
93 			 * If the argument contains a '=', determine if the
94 			 * argument needs to be treated as a variable assignment
95 			 * or as the pathname of a file.
96 			 */
97 			if (isclvar(*svargv)) {
98 				/* it's a var=value argument */
99 				setclvar(*svargv);
100 				if (svargc > 1) {
101 					svargv++;
102 					svargc--;
103 					continue;
104 				}
105 				*svargv = L"-";
106 			}
107 			*FILENAME = file = *svargv;
108 			dprintf("opening file %ws\n", file, NULL, NULL);
109 			if (*file == (wchar_t)L'-')
110 				infile = stdin;
111 			else if ((infile = fopen(toeuccode(file), "r")) == NULL)
112 				error(FATAL, "can't open %ws", file);
113 		}
114 		if ((sep = **RS) == 0)
115 			sep = '\n';
116 		inf = infile;
117 		for (rr = record; /* dummy */; /* dummy */) {
118 			for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
119 			    *rr++ = c)
120 				;
121 			if (rr >= er)
122 				error(FATAL, "record `%.20ws...' too long",
123 				    record);
124 			if (**RS == sep || c == EOF)
125 				break;
126 			if ((c = getwc(inf)) == '\n' || c == EOF)
127 			/* 2 in a row */
128 				break;
129 			*rr++ = '\n';
130 			*rr++ = c;
131 		}
132 		if (rr >= er)
133 			error(FATAL, "record `%.20ws...' too long", record);
134 		*rr = 0;
135 		if (mustfld)
136 			fldbld();
137 		if (c != EOF || rr > record) {	/* normal record */
138 			recloc->tval &= ~NUM;
139 			recloc->tval |= STR;
140 			++nrloc->fval;
141 			nrloc->tval &= ~STR;
142 			nrloc->tval |= NUM;
143 			return (1);
144 		}
145 		/* EOF arrived on this file; set up next */
146 		if (infile != stdin)
147 			fclose(infile);
148 		infile = NULL;
149 		svargc--;
150 		svargv++;
151 	}
152 	return (0);	/* true end of file */
153 }
154 
155 /*
156  * isclvar()
157  *
158  * Returns 1 if the input string, arg, is a variable assignment,
159  * otherwise returns 0.
160  *
161  * An argument to awk can be either a pathname of a file, or a variable
162  * assignment.  An operand that begins with an undersore or alphabetic
163  * character from the portable character set, followed by a sequence of
164  * underscores, digits, and alphabetics from the portable character set,
165  * followed by the '=' character, shall specify a variable assignment
166  * rather than a pathname.
167  */
168 static int
169 isclvar(wchar_t *arg)
170 {
171 	wchar_t	*tmpptr = arg;
172 
173 	if (tmpptr != NULL) {
174 
175 		/* Begins with an underscore or alphabetic character */
176 		if (iswalpha(*tmpptr) || *tmpptr == '_') {
177 
178 			/*
179 			 * followed by a sequence of underscores, digits,
180 			 * and alphabetics
181 			 */
182 			for (tmpptr++; *tmpptr; tmpptr++) {
183 				if (!(isalnum(*tmpptr) || (*tmpptr == '_'))) {
184 					break;
185 				}
186 			}
187 			return (*tmpptr == '=');
188 		}
189 	}
190 
191 	return (0);
192 }
193 
194 setclvar(s)	/* set var=value from s */
195 wchar_t *s;
196 {
197 	wchar_t *p;
198 	CELL *q;
199 
200 
201 	for (p = s; *p != '='; p++)
202 		;
203 	*p++ = 0;
204 	q = setsymtab(s, tostring(p), 0.0, STR, symtab);
205 	setsval(q, p);
206 	dprintf("command line set %ws to |%ws|\n", s, p, NULL);
207 }
208 
209 
210 fldbld()
211 {
212 	wchar_t *r, *fr, sep, c;
213 	static wchar_t L_NF[] = L"NF";
214 	CELL *p, *q;
215 	int i, j;
216 
217 
218 	r = record;
219 	fr = fields;
220 	i = 0;	/* number of fields accumulated here */
221 	if ((sep = **FS) == ' ')
222 		for (i = 0; /* dummy */; /* dummy */) {
223 			c = *r;
224 			while (iswblank(c) || c == '\t' || c == '\n')
225 				c = *(++r);
226 			if (*r == 0)
227 				break;
228 			i++;
229 			if (i >= MAXFLD)
230 				error(FATAL,
231 			"record `%.20ws...' has too many fields", record);
232 			if (!(fldtab[i].tval&FLD))
233 				xfree(fldtab[i].sval);
234 			fldtab[i].sval = fr;
235 			fldtab[i].tval = FLD | STR;
236 			do {
237 				*fr++ = *r++;
238 				c = *r;
239 			} while (! iswblank(c) && c != '\t' &&
240 			    c != '\n' && c != '\0');
241 
242 
243 			*fr++ = 0;
244 
245 	} else if (*r != 0)	/* if 0, it's a null field */
246 		for (;;) {
247 			i++;
248 			if (i >= MAXFLD)
249 				error(FATAL,
250 			"record `%.20ws...' has too many fields", record);
251 			if (!(fldtab[i].tval&FLD))
252 				xfree(fldtab[i].sval);
253 			fldtab[i].sval = fr;
254 			fldtab[i].tval = FLD | STR;
255 			while ((c = *r) != sep && c != '\n' && c != '\0')
256 				/* \n always a separator */
257 				*fr++ = *r++;
258 			*fr++ = 0;
259 			if (*r++ == 0)
260 				break;
261 		}
262 	*fr = 0;
263 	/* clean out junk from previous record */
264 	for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
265 		if (!(p->tval&FLD))
266 			xfree(p->sval);
267 		p->tval = STR | FLD;
268 		p->sval = L_NULL;
269 	}
270 	maxfld = i;
271 	maxmfld = &fldtab[i];
272 	donefld = 1;
273 	for (i = 1; i <= maxfld; i++)
274 		if (isanumber(fldtab[i].sval)) {
275 			fldtab[i].fval = watof(fldtab[i].sval);
276 			fldtab[i].tval |= NUM;
277 		}
278 	setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
279 	if (dbg)
280 		for (i = 0; i <= maxfld; i++)
281 			printf("field %d: |%ws|\n", i, fldtab[i].sval);
282 }
283 
284 
285 recbld()
286 {
287 	int i;
288 	wchar_t *r, *p;
289 
290 
291 	if (donefld == 0 || donerec == 1)
292 		return;
293 	r = record;
294 	for (i = 1; i <= *NF; i++) {
295 		p = getsval(&fldtab[i]);
296 		while (*r++ = *p++)
297 			;
298 		*(r-1) = **OFS;
299 	}
300 	*(r-1) = '\0';
301 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
302 	recloc->tval = STR | FLD;
303 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
304 	if (r > record+RECSIZE)
305 		error(FATAL, "built giant record `%.20ws...'", record);
306 	dprintf("recbld = |%ws|\n", record, NULL, NULL);
307 }
308 
309 
310 CELL *
311 fieldadr(n)
312 {
313 	if (n < 0 || n >= MAXFLD)
314 		error(FATAL, "trying to access field %d", n);
315 	return (&fldtab[n]);
316 }
317 
318 
319 int	errorflag	= 0;
320 
321 
322 yyerror(char *s)
323 {
324 	fprintf(stderr,
325 	    gettext("awk: %s near line %lld\n"), gettext(s), lineno);
326 	errorflag = 2;
327 }
328 
329 
330 error(f, s, a1, a2, a3, a4, a5, a6, a7)
331 {
332 	fprintf(stderr, "awk: ");
333 	fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7);
334 	fprintf(stderr, "\n");
335 	if (NR && *NR > 0)
336 		fprintf(stderr, gettext(" record number %g\n"), *NR);
337 	if (f)
338 		exit(2);
339 }
340 
341 
342 PUTS(s) char *s; {
343 	dprintf("%s\n", s, NULL, NULL);
344 }
345 
346 
347 #define	MAXEXPON	38	/* maximum exponenet for fp number */
348 
349 
350 isanumber(s)
351 wchar_t *s;
352 {
353 	int d1, d2;
354 	int point;
355 	wchar_t *es;
356 	extern wchar_t	radixpoint;
357 
358 	d1 = d2 = point = 0;
359 	while (*s == ' ' || *s == '\t' || *s == '\n')
360 		s++;
361 	if (*s == '\0')
362 		return (0);	/* empty stuff isn't number */
363 	if (*s == '+' || *s == '-')
364 		s++;
365 	/*
366 	 * Since, iswdigit() will include digit from other than code set 0,
367 	 * we have to check it from code set 0 or not.
368 	 */
369 	if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
370 		return (0);
371 	if (iswdigit(*s) && iswascii(*s)) {
372 		do {
373 			d1++;
374 			s++;
375 		} while (iswdigit(*s) && iswascii(*s));
376 	}
377 	if (d1 >= MAXEXPON)
378 		return (0);	/* too many digits to convert */
379 	if (*s == radixpoint) {
380 		point++;
381 		s++;
382 	}
383 	if (iswdigit(*s) && iswascii(*s)) {
384 		d2++;
385 		do {
386 			s++;
387 		} while (iswdigit(*s) && iswascii(*s));
388 	}
389 
390 
391 	if (!(d1 || point && d2))
392 		return (0);
393 	if (*s == 'e' || *s == 'E') {
394 		s++;
395 		if (*s == '+' || *s == '-')
396 			s++;
397 		if (!(iswdigit(*s) && iswascii(*s)))
398 			return (0);
399 		es = s;
400 		do {
401 			s++;
402 		} while (iswdigit(*s) && iswascii(*s));
403 
404 
405 		if (s - es > 2)
406 			return (0);
407 		else if (s - es == 2 &&
408 			10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
409 			return (0);
410 	}
411 	while (*s == ' ' || *s == '\t' || *s == '\n')
412 		s++;
413 	if (*s == '\0')
414 		return (1);
415 	else
416 		return (0);
417 }
418 char *
419 toeuccode(str)
420 wchar_t *str;
421 {
422 	static char euccode[RECSIZE];
423 
424 	(void) wcstombs(euccode, str, RECSIZE);
425 	return (euccode);
426 }
427