xref: /illumos-gate/usr/src/cmd/oawk/lib.c (revision d48be21240dfd051b689384ce2b23479d757f2d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #include <stdio.h>
31 #include "awk.def"
32 #include "awk.h"
33 #include <ctype.h>
34 #include <wctype.h>
35 #include "awktype.h"
36 #include <stdlib.h>
37 
38 FILE	*infile	= NULL;
39 wchar_t *file;
40 #define	RECSIZE (5 * 512)
41 wchar_t record[RECSIZE];
42 wchar_t fields[RECSIZE];
43 wchar_t L_NULL[] = L"";
44 
45 
46 #define	MAXFLD	100
47 int	donefld;	/* 1 = implies rec broken into fields */
48 int	donerec;	/* 1 = record is valid (no flds have changed) */
49 int	mustfld;	/* 1 = NF seen, so always break */
50 static wchar_t L_record[] = L"$record";
51 
52 
53 #define	FINIT	{ OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
54 CELL fldtab[MAXFLD] = {		/* room for fields */
55 	{ OCELL, CFLD, L_record, record, 0.0, STR|FLD},
56 		FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
57 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
58 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
59 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
60 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
61 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
62 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
63 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
64 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
65 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
66 };
67 int	maxfld	= 0;	/* last used field */
68 /* pointer to CELL for maximum field assigned to */
69 CELL	*maxmfld = &fldtab[0];
70 
71 static int isclvar(wchar_t *);
72 static void setclvar(wchar_t *);
73 void fldbld(void);
74 
75 int
76 getrec(void)
77 {
78 	wchar_t *rr, *er;
79 	int c, sep;
80 	FILE *inf;
81 	extern int svargc;
82 	extern wchar_t **svargv;
83 
84 
85 	dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
86 	donefld = 0;
87 	donerec = 1;
88 	record[0] = 0;
89 	er = record + RECSIZE;
90 	while (svargc > 0) {
91 		dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
92 		if (infile == NULL) {	/* have to open a new file */
93 			/*
94 			 * If the argument contains a '=', determine if the
95 			 * argument needs to be treated as a variable assignment
96 			 * or as the pathname of a file.
97 			 */
98 			if (isclvar(*svargv)) {
99 				/* it's a var=value argument */
100 				setclvar(*svargv);
101 				if (svargc > 1) {
102 					svargv++;
103 					svargc--;
104 					continue;
105 				}
106 				*svargv = L"-";
107 			}
108 			*FILENAME = file = *svargv;
109 			dprintf("opening file %ws\n", file, NULL, NULL);
110 			if (*file == (wchar_t)L'-')
111 				infile = stdin;
112 			else if ((infile = fopen(toeuccode(file), "r")) == NULL)
113 				error(FATAL, "can't open %ws", file);
114 		}
115 		if ((sep = **RS) == 0)
116 			sep = '\n';
117 		inf = infile;
118 		for (rr = record; /* dummy */; /* dummy */) {
119 			for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
120 			    *rr++ = c)
121 				;
122 			if (rr >= er)
123 				error(FATAL, "record `%.20ws...' too long",
124 				    record);
125 			if (**RS == sep || c == EOF)
126 				break;
127 			if ((c = getwc(inf)) == '\n' || c == EOF)
128 			/* 2 in a row */
129 				break;
130 			*rr++ = '\n';
131 			*rr++ = c;
132 		}
133 		if (rr >= er)
134 			error(FATAL, "record `%.20ws...' too long", record);
135 		*rr = 0;
136 		if (mustfld)
137 			fldbld();
138 		if (c != EOF || rr > record) {	/* normal record */
139 			recloc->tval &= ~NUM;
140 			recloc->tval |= STR;
141 			++nrloc->fval;
142 			nrloc->tval &= ~STR;
143 			nrloc->tval |= NUM;
144 			return (1);
145 		}
146 		/* EOF arrived on this file; set up next */
147 		if (infile != stdin)
148 			fclose(infile);
149 		infile = NULL;
150 		svargc--;
151 		svargv++;
152 	}
153 	return (0);	/* true end of file */
154 }
155 
156 /*
157  * isclvar()
158  *
159  * Returns 1 if the input string, arg, is a variable assignment,
160  * otherwise returns 0.
161  *
162  * An argument to awk can be either a pathname of a file, or a variable
163  * assignment.  An operand that begins with an undersore or alphabetic
164  * character from the portable character set, followed by a sequence of
165  * underscores, digits, and alphabetics from the portable character set,
166  * followed by the '=' character, shall specify a variable assignment
167  * rather than a pathname.
168  */
169 static int
170 isclvar(wchar_t *arg)
171 {
172 	wchar_t	*tmpptr = arg;
173 
174 	if (tmpptr != NULL) {
175 
176 		/* Begins with an underscore or alphabetic character */
177 		if (iswalpha(*tmpptr) || *tmpptr == '_') {
178 
179 			/*
180 			 * followed by a sequence of underscores, digits,
181 			 * and alphabetics
182 			 */
183 			for (tmpptr++; *tmpptr; tmpptr++) {
184 				if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) {
185 					break;
186 				}
187 			}
188 			return (*tmpptr == '=');
189 		}
190 	}
191 
192 	return (0);
193 }
194 
195 static void
196 setclvar(wchar_t *s)	/* set var=value from s */
197 {
198 	wchar_t *p;
199 	CELL *q;
200 
201 
202 	for (p = s; *p != '='; p++)
203 		;
204 	*p++ = 0;
205 	q = setsymtab(s, tostring(p), 0.0, STR, symtab);
206 	setsval(q, p);
207 	dprintf("command line set %ws to |%ws|\n", s, p, NULL);
208 }
209 
210 
211 void
212 fldbld(void)
213 {
214 	wchar_t *r, *fr, sep, c;
215 	static wchar_t L_NF[] = L"NF";
216 	CELL *p, *q;
217 	int i, j;
218 
219 
220 	r = record;
221 	fr = fields;
222 	i = 0;	/* number of fields accumulated here */
223 	if ((sep = **FS) == ' ')
224 		for (i = 0; /* dummy */; /* dummy */) {
225 			c = *r;
226 			while (iswblank(c) || c == '\t' || c == '\n')
227 				c = *(++r);
228 			if (*r == 0)
229 				break;
230 			i++;
231 			if (i >= MAXFLD)
232 				error(FATAL,
233 			"record `%.20ws...' has too many fields", record);
234 			if (!(fldtab[i].tval&FLD))
235 				xfree(fldtab[i].sval);
236 			fldtab[i].sval = fr;
237 			fldtab[i].tval = FLD | STR;
238 			do {
239 				*fr++ = *r++;
240 				c = *r;
241 			} while (! iswblank(c) && c != '\t' &&
242 			    c != '\n' && c != '\0');
243 
244 
245 			*fr++ = 0;
246 
247 	} else if (*r != 0)	/* if 0, it's a null field */
248 		for (;;) {
249 			i++;
250 			if (i >= MAXFLD)
251 				error(FATAL,
252 			"record `%.20ws...' has too many fields", record);
253 			if (!(fldtab[i].tval&FLD))
254 				xfree(fldtab[i].sval);
255 			fldtab[i].sval = fr;
256 			fldtab[i].tval = FLD | STR;
257 			while ((c = *r) != sep && c != '\n' && c != '\0')
258 				/* \n always a separator */
259 				*fr++ = *r++;
260 			*fr++ = 0;
261 			if (*r++ == 0)
262 				break;
263 		}
264 	*fr = 0;
265 	/* clean out junk from previous record */
266 	for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
267 		if (!(p->tval&FLD))
268 			xfree(p->sval);
269 		p->tval = STR | FLD;
270 		p->sval = L_NULL;
271 	}
272 	maxfld = i;
273 	maxmfld = &fldtab[i];
274 	donefld = 1;
275 	for (i = 1; i <= maxfld; i++)
276 		if (isanumber(fldtab[i].sval)) {
277 			fldtab[i].fval = watof(fldtab[i].sval);
278 			fldtab[i].tval |= NUM;
279 		}
280 	setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
281 	if (dbg)
282 		for (i = 0; i <= maxfld; i++)
283 			printf("field %d: |%ws|\n", i, fldtab[i].sval);
284 }
285 
286 
287 void
288 recbld(void)
289 {
290 	int i;
291 	wchar_t *r, *p;
292 
293 
294 	if (donefld == 0 || donerec == 1)
295 		return;
296 	r = record;
297 	for (i = 1; i <= *NF; i++) {
298 		p = getsval(&fldtab[i]);
299 		while (*r++ = *p++)
300 			;
301 		*(r-1) = **OFS;
302 	}
303 	*(r-1) = '\0';
304 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
305 	recloc->tval = STR | FLD;
306 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
307 	if (r > record+RECSIZE)
308 		error(FATAL, "built giant record `%.20ws...'", record);
309 	dprintf("recbld = |%ws|\n", record, NULL, NULL);
310 }
311 
312 
313 CELL *
314 fieldadr(n)
315 {
316 	if (n < 0 || n >= MAXFLD)
317 		error(FATAL, "trying to access field %d", n);
318 	return (&fldtab[n]);
319 }
320 
321 
322 int	errorflag	= 0;
323 
324 
325 int
326 yyerror(char *s)
327 {
328 	fprintf(stderr,
329 	    gettext("awk: %s near line %lld\n"), gettext(s), lineno);
330 	errorflag = 2;
331 	return (0);
332 }
333 
334 
335 void
336 error(f, s, a1, a2, a3, a4, a5, a6, a7)
337 {
338 	fprintf(stderr, "awk: ");
339 	fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7);
340 	fprintf(stderr, "\n");
341 	if (NR && *NR > 0)
342 		fprintf(stderr, gettext(" record number %g\n"), *NR);
343 	if (f)
344 		exit(2);
345 }
346 
347 
348 void
349 PUTS(char *s)
350 {
351 	dprintf("%s\n", s, NULL, NULL);
352 }
353 
354 
355 #define	MAXEXPON	38	/* maximum exponenet for fp number */
356 
357 
358 int
359 isanumber(wchar_t *s)
360 {
361 	int d1, d2;
362 	int point;
363 	wchar_t *es;
364 	extern wchar_t	radixpoint;
365 
366 	d1 = d2 = point = 0;
367 	while (*s == ' ' || *s == '\t' || *s == '\n')
368 		s++;
369 	if (*s == '\0')
370 		return (0);	/* empty stuff isn't number */
371 	if (*s == '+' || *s == '-')
372 		s++;
373 	/*
374 	 * Since, iswdigit() will include digit from other than code set 0,
375 	 * we have to check it from code set 0 or not.
376 	 */
377 	if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
378 		return (0);
379 	if (iswdigit(*s) && iswascii(*s)) {
380 		do {
381 			d1++;
382 			s++;
383 		} while (iswdigit(*s) && iswascii(*s));
384 	}
385 	if (d1 >= MAXEXPON)
386 		return (0);	/* too many digits to convert */
387 	if (*s == radixpoint) {
388 		point++;
389 		s++;
390 	}
391 	if (iswdigit(*s) && iswascii(*s)) {
392 		d2++;
393 		do {
394 			s++;
395 		} while (iswdigit(*s) && iswascii(*s));
396 	}
397 
398 
399 	if (!(d1 || point && d2))
400 		return (0);
401 	if (*s == 'e' || *s == 'E') {
402 		s++;
403 		if (*s == '+' || *s == '-')
404 			s++;
405 		if (!(iswdigit(*s) && iswascii(*s)))
406 			return (0);
407 		es = s;
408 		do {
409 			s++;
410 		} while (iswdigit(*s) && iswascii(*s));
411 
412 
413 		if (s - es > 2)
414 			return (0);
415 		else if (s - es == 2 &&
416 		    10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
417 			return (0);
418 	}
419 	while (*s == ' ' || *s == '\t' || *s == '\n')
420 		s++;
421 	if (*s == '\0')
422 		return (1);
423 	else
424 		return (0);
425 }
426 char *
427 toeuccode(str)
428 wchar_t *str;
429 {
430 	static char euccode[RECSIZE];
431 
432 	(void) wcstombs(euccode, str, RECSIZE);
433 	return (euccode);
434 }
435