xref: /titanic_51/usr/src/cmd/oawk/lib.c (revision 1a7c1b724419d3cb5fa6eea75123c6b2060ba31b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright (c) 1996-1999 by Sun Microsystems, Inc.
28  * All rights reserved.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdio.h>
34 #include "awk.def"
35 #include "awk.h"
36 #include <ctype.h>
37 #include <wctype.h>
38 #include "awktype.h"
39 #include <stdlib.h>
40 
41 FILE	*infile	= NULL;
42 wchar_t *file;
43 #define	RECSIZE (5 * 512)
44 wchar_t record[RECSIZE];
45 wchar_t fields[RECSIZE];
46 wchar_t L_NULL[] = L"";
47 
48 
49 #define	MAXFLD	100
50 int	donefld;	/* 1 = implies rec broken into fields */
51 int	donerec;	/* 1 = record is valid (no flds have changed) */
52 int	mustfld;	/* 1 = NF seen, so always break */
53 static wchar_t L_record[] = L"$record";
54 
55 
56 #define	FINIT	{ OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
57 CELL fldtab[MAXFLD] = {		/* room for fields */
58 	{ OCELL, CFLD, L_record, record, 0.0, STR|FLD},
59 		FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
60 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
61 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
62 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
63 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
64 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
65 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
66 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
67 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
68 	FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
69 };
70 int	maxfld	= 0;	/* last used field */
71 /* pointer to CELL for maximum field assigned to */
72 CELL	*maxmfld = &fldtab[0];
73 
74 
75 
76 
77 getrec()
78 {
79 	register wchar_t *rr, *er;
80 	register c, sep;
81 	register FILE *inf;
82 	extern int svargc;
83 	extern wchar_t **svargv;
84 
85 
86 	dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
87 	donefld = 0;
88 	donerec = 1;
89 	record[0] = 0;
90 	er = record + RECSIZE;
91 	while (svargc > 0) {
92 		dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
93 		if (infile == NULL) {	/* have to open a new file */
94 			if (member('=', *svargv)) {
95 				/* it's a var=value argument */
96 				setclvar(*svargv);
97 				if (svargc > 1) {
98 					svargv++;
99 					svargc--;
100 					continue;
101 				}
102 				*svargv = L"-";
103 			}
104 			*FILENAME = file = *svargv;
105 			dprintf("opening file %ws\n", file, NULL, NULL);
106 			if (*file == (wchar_t)L'-')
107 				infile = stdin;
108 			else if ((infile = fopen(toeuccode(file), "r")) == NULL)
109 				error(FATAL, "can't open %ws", file);
110 		}
111 		if ((sep = **RS) == 0)
112 			sep = '\n';
113 		inf = infile;
114 		for (rr = record; /* dummy */; /* dummy */) {
115 			for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
116 			    *rr++ = c)
117 				;
118 			if (rr >= er)
119 				error(FATAL, "record `%.20ws...' too long",
120 				    record);
121 			if (**RS == sep || c == EOF)
122 				break;
123 			if ((c = getwc(inf)) == '\n' || c == EOF)
124 			/* 2 in a row */
125 				break;
126 			*rr++ = '\n';
127 			*rr++ = c;
128 		}
129 		if (rr >= er)
130 			error(FATAL, "record `%.20ws...' too long", record);
131 		*rr = 0;
132 		if (mustfld)
133 			fldbld();
134 		if (c != EOF || rr > record) {	/* normal record */
135 			recloc->tval &= ~NUM;
136 			recloc->tval |= STR;
137 			++nrloc->fval;
138 			nrloc->tval &= ~STR;
139 			nrloc->tval |= NUM;
140 			return (1);
141 		}
142 		/* EOF arrived on this file; set up next */
143 		if (infile != stdin)
144 			fclose(infile);
145 		infile = NULL;
146 		svargc--;
147 		svargv++;
148 	}
149 	return (0);	/* true end of file */
150 }
151 
152 
153 setclvar(s)	/* set var=value from s */
154 wchar_t *s;
155 {
156 	wchar_t *p;
157 	CELL *q;
158 
159 
160 	for (p = s; *p != '='; p++)
161 		;
162 	*p++ = 0;
163 	q = setsymtab(s, tostring(p), 0.0, STR, symtab);
164 	setsval(q, p);
165 	dprintf("command line set %ws to |%ws|\n", s, p, NULL);
166 }
167 
168 
169 fldbld()
170 {
171 	register wchar_t *r, *fr, sep, c;
172 	static wchar_t L_NF[] = L"NF";
173 	CELL *p, *q;
174 	int i, j;
175 
176 
177 	r = record;
178 	fr = fields;
179 	i = 0;	/* number of fields accumulated here */
180 	if ((sep = **FS) == ' ')
181 		for (i = 0; /* dummy */; /* dummy */) {
182 			c = *r;
183 			while (iswblank(c) || c == '\t' || c == '\n')
184 				c = *(++r);
185 			if (*r == 0)
186 				break;
187 			i++;
188 			if (i >= MAXFLD)
189 				error(FATAL,
190 			"record `%.20ws...' has too many fields", record);
191 			if (!(fldtab[i].tval&FLD))
192 				xfree(fldtab[i].sval);
193 			fldtab[i].sval = fr;
194 			fldtab[i].tval = FLD | STR;
195 			do {
196 				*fr++ = *r++;
197 				c = *r;
198 			} while (! iswblank(c) && c != '\t' &&
199 				c != '\n' && c != '\0');
200 
201 
202 			*fr++ = 0;
203 
204 	} else if (*r != 0)	/* if 0, it's a null field */
205 		for (;;) {
206 			i++;
207 			if (i >= MAXFLD)
208 				error(FATAL,
209 			"record `%.20ws...' has too many fields", record);
210 			if (!(fldtab[i].tval&FLD))
211 				xfree(fldtab[i].sval);
212 			fldtab[i].sval = fr;
213 			fldtab[i].tval = FLD | STR;
214 			while ((c = *r) != sep && c != '\n' && c != '\0')
215 				/* \n always a separator */
216 				*fr++ = *r++;
217 			*fr++ = 0;
218 			if (*r++ == 0)
219 				break;
220 		}
221 	*fr = 0;
222 	/* clean out junk from previous record */
223 	for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
224 		if (!(p->tval&FLD))
225 			xfree(p->sval);
226 		p->tval = STR | FLD;
227 		p->sval = L_NULL;
228 	}
229 	maxfld = i;
230 	maxmfld = &fldtab[i];
231 	donefld = 1;
232 	for (i = 1; i <= maxfld; i++)
233 		if (isanumber(fldtab[i].sval)) {
234 			fldtab[i].fval = watof(fldtab[i].sval);
235 			fldtab[i].tval |= NUM;
236 		}
237 	setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
238 	if (dbg)
239 		for (i = 0; i <= maxfld; i++)
240 			printf("field %d: |%ws|\n", i, fldtab[i].sval);
241 }
242 
243 
244 recbld()
245 {
246 	int i;
247 	register wchar_t *r, *p;
248 
249 
250 	if (donefld == 0 || donerec == 1)
251 		return;
252 	r = record;
253 	for (i = 1; i <= *NF; i++) {
254 		p = getsval(&fldtab[i]);
255 		while (*r++ = *p++)
256 			;
257 		*(r-1) = **OFS;
258 	}
259 	*(r-1) = '\0';
260 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
261 	recloc->tval = STR | FLD;
262 	dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
263 	if (r > record+RECSIZE)
264 		error(FATAL, "built giant record `%.20ws...'", record);
265 	dprintf("recbld = |%ws|\n", record, NULL, NULL);
266 }
267 
268 
269 CELL *
270 fieldadr(n)
271 {
272 	if (n < 0 || n >= MAXFLD)
273 		error(FATAL, "trying to access field %d", n);
274 	return (&fldtab[n]);
275 }
276 
277 
278 int	errorflag	= 0;
279 
280 
281 yyerror(char *s)
282 {
283 	fprintf(stderr,
284 		gettext("awk: %s near line %lld\n"), gettext(s), lineno);
285 	errorflag = 2;
286 }
287 
288 
289 error(f, s, a1, a2, a3, a4, a5, a6, a7)
290 {
291 	fprintf(stderr, "awk: ");
292 	fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7);
293 	fprintf(stderr, "\n");
294 	if (NR && *NR > 0)
295 		fprintf(stderr, gettext(" record number %g\n"), *NR);
296 	if (f)
297 		exit(2);
298 }
299 
300 
301 PUTS(s) char *s; {
302 	dprintf("%s\n", s, NULL, NULL);
303 }
304 
305 
306 #define	MAXEXPON	38	/* maximum exponenet for fp number */
307 
308 
309 isanumber(s)
310 register wchar_t *s;
311 {
312 	register d1, d2;
313 	int point;
314 	wchar_t *es;
315 	extern wchar_t	radixpoint;
316 
317 	d1 = d2 = point = 0;
318 	while (*s == ' ' || *s == '\t' || *s == '\n')
319 		s++;
320 	if (*s == '\0')
321 		return (0);	/* empty stuff isn't number */
322 	if (*s == '+' || *s == '-')
323 		s++;
324 	/*
325 	 * Since, iswdigit() will include digit from other than code set 0,
326 	 * we have to check it from code set 0 or not.
327 	 */
328 	if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
329 		return (0);
330 	if (iswdigit(*s) && iswascii(*s)) {
331 		do {
332 			d1++;
333 			s++;
334 		} while (iswdigit(*s) && iswascii(*s));
335 	}
336 	if (d1 >= MAXEXPON)
337 		return (0);	/* too many digits to convert */
338 	if (*s == radixpoint) {
339 		point++;
340 		s++;
341 	}
342 	if (iswdigit(*s) && iswascii(*s)) {
343 		d2++;
344 		do {
345 			s++;
346 		} while (iswdigit(*s) && iswascii(*s));
347 	}
348 
349 
350 	if (!(d1 || point && d2))
351 		return (0);
352 	if (*s == 'e' || *s == 'E') {
353 		s++;
354 		if (*s == '+' || *s == '-')
355 			s++;
356 		if (!(iswdigit(*s) && iswascii(*s)))
357 			return (0);
358 		es = s;
359 		do {
360 			s++;
361 		} while (iswdigit(*s) && iswascii(*s));
362 
363 
364 		if (s - es > 2)
365 			return (0);
366 		else if (s - es == 2 &&
367 			10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
368 			return (0);
369 	}
370 	while (*s == ' ' || *s == '\t' || *s == '\n')
371 		s++;
372 	if (*s == '\0')
373 		return (1);
374 	else
375 		return (0);
376 }
377 char *
378 toeuccode(str)
379 wchar_t *str;
380 {
381 	static char euccode[RECSIZE];
382 
383 	(void) wcstombs(euccode, str, RECSIZE);
384 	return (euccode);
385 }
386