1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 #include <stdio.h>
33 #include "awk.def"
34 #include "awk.h"
35 #include <ctype.h>
36 #include <wctype.h>
37 #include "awktype.h"
38 #include <stdlib.h>
39
40 FILE *infile = NULL;
41 wchar_t *file;
42 #define RECSIZE (5 * 512)
43 wchar_t record[RECSIZE];
44 wchar_t fields[RECSIZE];
45 wchar_t L_NULL[] = L"";
46
47
48 #define MAXFLD 100
49 int donefld; /* 1 = implies rec broken into fields */
50 int donerec; /* 1 = record is valid (no flds have changed) */
51 int mustfld; /* 1 = NF seen, so always break */
52 static wchar_t L_record[] = L"$record";
53
54
55 #define FINIT { OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
56 CELL fldtab[MAXFLD] = { /* room for fields */
57 { OCELL, CFLD, L_record, record, 0.0, STR|FLD},
58 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
59 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
60 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
61 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
62 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
63 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
64 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
65 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
66 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
67 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
68 };
69 int maxfld = 0; /* last used field */
70 /* pointer to CELL for maximum field assigned to */
71 CELL *maxmfld = &fldtab[0];
72
73 static int isclvar(wchar_t *);
74 static void setclvar(wchar_t *);
75 void fldbld(void);
76
77 int
getrec(void)78 getrec(void)
79 {
80 wchar_t *rr, *er;
81 int c, sep;
82 FILE *inf;
83 extern int svargc;
84 extern wchar_t **svargv;
85
86
87 dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
88 donefld = 0;
89 donerec = 1;
90 record[0] = 0;
91 er = record + RECSIZE;
92 while (svargc > 0) {
93 dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
94 if (infile == NULL) { /* have to open a new file */
95 /*
96 * If the argument contains a '=', determine if the
97 * argument needs to be treated as a variable assignment
98 * or as the pathname of a file.
99 */
100 if (isclvar(*svargv)) {
101 /* it's a var=value argument */
102 setclvar(*svargv);
103 if (svargc > 1) {
104 svargv++;
105 svargc--;
106 continue;
107 }
108 *svargv = L"-";
109 }
110 *FILENAME = file = *svargv;
111 dprintf("opening file %ws\n", file, NULL, NULL);
112 if (*file == (wchar_t)L'-')
113 infile = stdin;
114 else if ((infile = fopen(toeuccode(file), "r")) == NULL)
115 error(FATAL, "can't open %ws", file);
116 }
117 if ((sep = **RS) == 0)
118 sep = '\n';
119 inf = infile;
120 for (rr = record; /* dummy */; /* dummy */) {
121 for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
122 *rr++ = c)
123 ;
124 if (rr >= er)
125 error(FATAL, "record `%.20ws...' too long",
126 record);
127 if (**RS == sep || c == EOF)
128 break;
129 if ((c = getwc(inf)) == '\n' || c == EOF)
130 /* 2 in a row */
131 break;
132 *rr++ = '\n';
133 *rr++ = c;
134 }
135 if (rr >= er)
136 error(FATAL, "record `%.20ws...' too long", record);
137 *rr = 0;
138 if (mustfld)
139 fldbld();
140 if (c != EOF || rr > record) { /* normal record */
141 recloc->tval &= ~NUM;
142 recloc->tval |= STR;
143 ++nrloc->fval;
144 nrloc->tval &= ~STR;
145 nrloc->tval |= NUM;
146 return (1);
147 }
148 /* EOF arrived on this file; set up next */
149 if (infile != stdin)
150 fclose(infile);
151 infile = NULL;
152 svargc--;
153 svargv++;
154 }
155 return (0); /* true end of file */
156 }
157
158 /*
159 * isclvar()
160 *
161 * Returns 1 if the input string, arg, is a variable assignment,
162 * otherwise returns 0.
163 *
164 * An argument to awk can be either a pathname of a file, or a variable
165 * assignment. An operand that begins with an undersore or alphabetic
166 * character from the portable character set, followed by a sequence of
167 * underscores, digits, and alphabetics from the portable character set,
168 * followed by the '=' character, shall specify a variable assignment
169 * rather than a pathname.
170 */
171 static int
isclvar(wchar_t * arg)172 isclvar(wchar_t *arg)
173 {
174 wchar_t *tmpptr = arg;
175
176 if (tmpptr != NULL) {
177
178 /* Begins with an underscore or alphabetic character */
179 if (iswalpha(*tmpptr) || *tmpptr == '_') {
180
181 /*
182 * followed by a sequence of underscores, digits,
183 * and alphabetics
184 */
185 for (tmpptr++; *tmpptr; tmpptr++) {
186 if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) {
187 break;
188 }
189 }
190 return (*tmpptr == '=');
191 }
192 }
193
194 return (0);
195 }
196
197 static void
setclvar(wchar_t * s)198 setclvar(wchar_t *s) /* set var=value from s */
199 {
200 wchar_t *p;
201 CELL *q;
202
203
204 for (p = s; *p != '='; p++)
205 ;
206 *p++ = 0;
207 q = setsymtab(s, tostring(p), 0.0, STR, symtab);
208 setsval(q, p);
209 dprintf("command line set %ws to |%ws|\n", s, p, NULL);
210 }
211
212
213 void
fldbld(void)214 fldbld(void)
215 {
216 wchar_t *r, *fr, sep, c;
217 static wchar_t L_NF[] = L"NF";
218 CELL *p, *q;
219 int i, j;
220
221
222 r = record;
223 fr = fields;
224 i = 0; /* number of fields accumulated here */
225 if ((sep = **FS) == ' ')
226 for (i = 0; /* dummy */; /* dummy */) {
227 c = *r;
228 while (iswblank(c) || c == '\t' || c == '\n')
229 c = *(++r);
230 if (*r == 0)
231 break;
232 i++;
233 if (i >= MAXFLD)
234 error(FATAL,
235 "record `%.20ws...' has too many fields", record);
236 if (!(fldtab[i].tval&FLD))
237 xfree(fldtab[i].sval);
238 fldtab[i].sval = fr;
239 fldtab[i].tval = FLD | STR;
240 do {
241 *fr++ = *r++;
242 c = *r;
243 } while (! iswblank(c) && c != '\t' &&
244 c != '\n' && c != '\0');
245
246
247 *fr++ = 0;
248
249 } else if (*r != 0) /* if 0, it's a null field */
250 for (;;) {
251 i++;
252 if (i >= MAXFLD)
253 error(FATAL,
254 "record `%.20ws...' has too many fields", record);
255 if (!(fldtab[i].tval&FLD))
256 xfree(fldtab[i].sval);
257 fldtab[i].sval = fr;
258 fldtab[i].tval = FLD | STR;
259 while ((c = *r) != sep && c != '\n' && c != '\0')
260 /* \n always a separator */
261 *fr++ = *r++;
262 *fr++ = 0;
263 if (*r++ == 0)
264 break;
265 }
266 *fr = 0;
267 /* clean out junk from previous record */
268 for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
269 if (!(p->tval&FLD))
270 xfree(p->sval);
271 p->tval = STR | FLD;
272 p->sval = L_NULL;
273 }
274 maxfld = i;
275 maxmfld = &fldtab[i];
276 donefld = 1;
277 for (i = 1; i <= maxfld; i++)
278 if (isanumber(fldtab[i].sval)) {
279 fldtab[i].fval = watof(fldtab[i].sval);
280 fldtab[i].tval |= NUM;
281 }
282 setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
283 if (dbg)
284 for (i = 0; i <= maxfld; i++)
285 printf("field %d: |%ws|\n", i, fldtab[i].sval);
286 }
287
288
289 void
recbld(void)290 recbld(void)
291 {
292 int i;
293 wchar_t *r, *p;
294
295
296 if (donefld == 0 || donerec == 1)
297 return;
298 r = record;
299 for (i = 1; i <= *NF; i++) {
300 p = getsval(&fldtab[i]);
301 while (*r++ = *p++)
302 ;
303 *(r-1) = **OFS;
304 }
305 *(r-1) = '\0';
306 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
307 recloc->tval = STR | FLD;
308 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
309 if (r > record+RECSIZE)
310 error(FATAL, "built giant record `%.20ws...'", record);
311 dprintf("recbld = |%ws|\n", record, NULL, NULL);
312 }
313
314
315 CELL *
fieldadr(n)316 fieldadr(n)
317 {
318 if (n < 0 || n >= MAXFLD)
319 error(FATAL, "trying to access field %d", n);
320 return (&fldtab[n]);
321 }
322
323
324 int errorflag = 0;
325
326
327 int
yyerror(char * s)328 yyerror(char *s)
329 {
330 fprintf(stderr,
331 gettext("awk: %s near line %lld\n"), gettext(s), lineno);
332 errorflag = 2;
333 return (0);
334 }
335
336
337 void
error(f,s,a1,a2,a3,a4,a5,a6,a7)338 error(f, s, a1, a2, a3, a4, a5, a6, a7)
339 {
340 fprintf(stderr, "awk: ");
341 fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7);
342 fprintf(stderr, "\n");
343 if (NR && *NR > 0)
344 fprintf(stderr, gettext(" record number %g\n"), *NR);
345 if (f)
346 exit(2);
347 }
348
349
350 void
PUTS(char * s)351 PUTS(char *s)
352 {
353 dprintf("%s\n", s, NULL, NULL);
354 }
355
356
357 #define MAXEXPON 38 /* maximum exponenet for fp number */
358
359
360 int
isanumber(wchar_t * s)361 isanumber(wchar_t *s)
362 {
363 int d1, d2;
364 int point;
365 wchar_t *es;
366 extern wchar_t radixpoint;
367
368 d1 = d2 = point = 0;
369 while (*s == ' ' || *s == '\t' || *s == '\n')
370 s++;
371 if (*s == '\0')
372 return (0); /* empty stuff isn't number */
373 if (*s == '+' || *s == '-')
374 s++;
375 /*
376 * Since, iswdigit() will include digit from other than code set 0,
377 * we have to check it from code set 0 or not.
378 */
379 if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
380 return (0);
381 if (iswdigit(*s) && iswascii(*s)) {
382 do {
383 d1++;
384 s++;
385 } while (iswdigit(*s) && iswascii(*s));
386 }
387 if (d1 >= MAXEXPON)
388 return (0); /* too many digits to convert */
389 if (*s == radixpoint) {
390 point++;
391 s++;
392 }
393 if (iswdigit(*s) && iswascii(*s)) {
394 d2++;
395 do {
396 s++;
397 } while (iswdigit(*s) && iswascii(*s));
398 }
399
400
401 if (!(d1 || point && d2))
402 return (0);
403 if (*s == 'e' || *s == 'E') {
404 s++;
405 if (*s == '+' || *s == '-')
406 s++;
407 if (!(iswdigit(*s) && iswascii(*s)))
408 return (0);
409 es = s;
410 do {
411 s++;
412 } while (iswdigit(*s) && iswascii(*s));
413
414
415 if (s - es > 2)
416 return (0);
417 else if (s - es == 2 &&
418 10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
419 return (0);
420 }
421 while (*s == ' ' || *s == '\t' || *s == '\n')
422 s++;
423 if (*s == '\0')
424 return (1);
425 else
426 return (0);
427 }
428 char *
toeuccode(str)429 toeuccode(str)
430 wchar_t *str;
431 {
432 static char euccode[RECSIZE];
433
434 (void) wcstombs(euccode, str, RECSIZE);
435 return (euccode);
436 }
437