1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <stdio.h> 33 #include "awk.def" 34 #include "awk.h" 35 #include <ctype.h> 36 #include <wctype.h> 37 #include "awktype.h" 38 #include <stdlib.h> 39 40 FILE *infile = NULL; 41 wchar_t *file; 42 #define RECSIZE (5 * 512) 43 wchar_t record[RECSIZE]; 44 wchar_t fields[RECSIZE]; 45 wchar_t L_NULL[] = L""; 46 47 48 #define MAXFLD 100 49 int donefld; /* 1 = implies rec broken into fields */ 50 int donerec; /* 1 = record is valid (no flds have changed) */ 51 int mustfld; /* 1 = NF seen, so always break */ 52 static wchar_t L_record[] = L"$record"; 53 54 55 #define FINIT { OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR } 56 CELL fldtab[MAXFLD] = { /* room for fields */ 57 { OCELL, CFLD, L_record, record, 0.0, STR|FLD}, 58 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 59 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 60 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 61 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 62 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 63 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 64 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 65 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 66 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 67 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT 68 }; 69 int maxfld = 0; /* last used field */ 70 /* pointer to CELL for maximum field assigned to */ 71 CELL *maxmfld = &fldtab[0]; 72 73 static int isclvar(wchar_t *); 74 75 getrec() 76 { 77 wchar_t *rr, *er; 78 int c, sep; 79 FILE *inf; 80 extern int svargc; 81 extern wchar_t **svargv; 82 83 84 dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL); 85 donefld = 0; 86 donerec = 1; 87 record[0] = 0; 88 er = record + RECSIZE; 89 while (svargc > 0) { 90 dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL); 91 if (infile == NULL) { /* have to open a new file */ 92 /* 93 * If the argument contains a '=', determine if the 94 * argument needs to be treated as a variable assignment 95 * or as the pathname of a file. 96 */ 97 if (isclvar(*svargv)) { 98 /* it's a var=value argument */ 99 setclvar(*svargv); 100 if (svargc > 1) { 101 svargv++; 102 svargc--; 103 continue; 104 } 105 *svargv = L"-"; 106 } 107 *FILENAME = file = *svargv; 108 dprintf("opening file %ws\n", file, NULL, NULL); 109 if (*file == (wchar_t)L'-') 110 infile = stdin; 111 else if ((infile = fopen(toeuccode(file), "r")) == NULL) 112 error(FATAL, "can't open %ws", file); 113 } 114 if ((sep = **RS) == 0) 115 sep = '\n'; 116 inf = infile; 117 for (rr = record; /* dummy */; /* dummy */) { 118 for (; (c = getwc(inf)) != sep && c != EOF && rr < er; 119 *rr++ = c) 120 ; 121 if (rr >= er) 122 error(FATAL, "record `%.20ws...' too long", 123 record); 124 if (**RS == sep || c == EOF) 125 break; 126 if ((c = getwc(inf)) == '\n' || c == EOF) 127 /* 2 in a row */ 128 break; 129 *rr++ = '\n'; 130 *rr++ = c; 131 } 132 if (rr >= er) 133 error(FATAL, "record `%.20ws...' too long", record); 134 *rr = 0; 135 if (mustfld) 136 fldbld(); 137 if (c != EOF || rr > record) { /* normal record */ 138 recloc->tval &= ~NUM; 139 recloc->tval |= STR; 140 ++nrloc->fval; 141 nrloc->tval &= ~STR; 142 nrloc->tval |= NUM; 143 return (1); 144 } 145 /* EOF arrived on this file; set up next */ 146 if (infile != stdin) 147 fclose(infile); 148 infile = NULL; 149 svargc--; 150 svargv++; 151 } 152 return (0); /* true end of file */ 153 } 154 155 /* 156 * isclvar() 157 * 158 * Returns 1 if the input string, arg, is a variable assignment, 159 * otherwise returns 0. 160 * 161 * An argument to awk can be either a pathname of a file, or a variable 162 * assignment. An operand that begins with an undersore or alphabetic 163 * character from the portable character set, followed by a sequence of 164 * underscores, digits, and alphabetics from the portable character set, 165 * followed by the '=' character, shall specify a variable assignment 166 * rather than a pathname. 167 */ 168 static int 169 isclvar(wchar_t *arg) 170 { 171 wchar_t *tmpptr = arg; 172 173 if (tmpptr != NULL) { 174 175 /* Begins with an underscore or alphabetic character */ 176 if (iswalpha(*tmpptr) || *tmpptr == '_') { 177 178 /* 179 * followed by a sequence of underscores, digits, 180 * and alphabetics 181 */ 182 for (tmpptr++; *tmpptr; tmpptr++) { 183 if (!(isalnum(*tmpptr) || (*tmpptr == '_'))) { 184 break; 185 } 186 } 187 return (*tmpptr == '='); 188 } 189 } 190 191 return (0); 192 } 193 194 setclvar(s) /* set var=value from s */ 195 wchar_t *s; 196 { 197 wchar_t *p; 198 CELL *q; 199 200 201 for (p = s; *p != '='; p++) 202 ; 203 *p++ = 0; 204 q = setsymtab(s, tostring(p), 0.0, STR, symtab); 205 setsval(q, p); 206 dprintf("command line set %ws to |%ws|\n", s, p, NULL); 207 } 208 209 210 fldbld() 211 { 212 wchar_t *r, *fr, sep, c; 213 static wchar_t L_NF[] = L"NF"; 214 CELL *p, *q; 215 int i, j; 216 217 218 r = record; 219 fr = fields; 220 i = 0; /* number of fields accumulated here */ 221 if ((sep = **FS) == ' ') 222 for (i = 0; /* dummy */; /* dummy */) { 223 c = *r; 224 while (iswblank(c) || c == '\t' || c == '\n') 225 c = *(++r); 226 if (*r == 0) 227 break; 228 i++; 229 if (i >= MAXFLD) 230 error(FATAL, 231 "record `%.20ws...' has too many fields", record); 232 if (!(fldtab[i].tval&FLD)) 233 xfree(fldtab[i].sval); 234 fldtab[i].sval = fr; 235 fldtab[i].tval = FLD | STR; 236 do { 237 *fr++ = *r++; 238 c = *r; 239 } while (! iswblank(c) && c != '\t' && 240 c != '\n' && c != '\0'); 241 242 243 *fr++ = 0; 244 245 } else if (*r != 0) /* if 0, it's a null field */ 246 for (;;) { 247 i++; 248 if (i >= MAXFLD) 249 error(FATAL, 250 "record `%.20ws...' has too many fields", record); 251 if (!(fldtab[i].tval&FLD)) 252 xfree(fldtab[i].sval); 253 fldtab[i].sval = fr; 254 fldtab[i].tval = FLD | STR; 255 while ((c = *r) != sep && c != '\n' && c != '\0') 256 /* \n always a separator */ 257 *fr++ = *r++; 258 *fr++ = 0; 259 if (*r++ == 0) 260 break; 261 } 262 *fr = 0; 263 /* clean out junk from previous record */ 264 for (p = maxmfld, q = &fldtab[i]; p > q; p--) { 265 if (!(p->tval&FLD)) 266 xfree(p->sval); 267 p->tval = STR | FLD; 268 p->sval = L_NULL; 269 } 270 maxfld = i; 271 maxmfld = &fldtab[i]; 272 donefld = 1; 273 for (i = 1; i <= maxfld; i++) 274 if (isanumber(fldtab[i].sval)) { 275 fldtab[i].fval = watof(fldtab[i].sval); 276 fldtab[i].tval |= NUM; 277 } 278 setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld); 279 if (dbg) 280 for (i = 0; i <= maxfld; i++) 281 printf("field %d: |%ws|\n", i, fldtab[i].sval); 282 } 283 284 285 recbld() 286 { 287 int i; 288 wchar_t *r, *p; 289 290 291 if (donefld == 0 || donerec == 1) 292 return; 293 r = record; 294 for (i = 1; i <= *NF; i++) { 295 p = getsval(&fldtab[i]); 296 while (*r++ = *p++) 297 ; 298 *(r-1) = **OFS; 299 } 300 *(r-1) = '\0'; 301 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 302 recloc->tval = STR | FLD; 303 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 304 if (r > record+RECSIZE) 305 error(FATAL, "built giant record `%.20ws...'", record); 306 dprintf("recbld = |%ws|\n", record, NULL, NULL); 307 } 308 309 310 CELL * 311 fieldadr(n) 312 { 313 if (n < 0 || n >= MAXFLD) 314 error(FATAL, "trying to access field %d", n); 315 return (&fldtab[n]); 316 } 317 318 319 int errorflag = 0; 320 321 322 yyerror(char *s) 323 { 324 fprintf(stderr, 325 gettext("awk: %s near line %lld\n"), gettext(s), lineno); 326 errorflag = 2; 327 } 328 329 330 error(f, s, a1, a2, a3, a4, a5, a6, a7) 331 { 332 fprintf(stderr, "awk: "); 333 fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7); 334 fprintf(stderr, "\n"); 335 if (NR && *NR > 0) 336 fprintf(stderr, gettext(" record number %g\n"), *NR); 337 if (f) 338 exit(2); 339 } 340 341 342 PUTS(s) char *s; { 343 dprintf("%s\n", s, NULL, NULL); 344 } 345 346 347 #define MAXEXPON 38 /* maximum exponenet for fp number */ 348 349 350 isanumber(s) 351 wchar_t *s; 352 { 353 int d1, d2; 354 int point; 355 wchar_t *es; 356 extern wchar_t radixpoint; 357 358 d1 = d2 = point = 0; 359 while (*s == ' ' || *s == '\t' || *s == '\n') 360 s++; 361 if (*s == '\0') 362 return (0); /* empty stuff isn't number */ 363 if (*s == '+' || *s == '-') 364 s++; 365 /* 366 * Since, iswdigit() will include digit from other than code set 0, 367 * we have to check it from code set 0 or not. 368 */ 369 if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint) 370 return (0); 371 if (iswdigit(*s) && iswascii(*s)) { 372 do { 373 d1++; 374 s++; 375 } while (iswdigit(*s) && iswascii(*s)); 376 } 377 if (d1 >= MAXEXPON) 378 return (0); /* too many digits to convert */ 379 if (*s == radixpoint) { 380 point++; 381 s++; 382 } 383 if (iswdigit(*s) && iswascii(*s)) { 384 d2++; 385 do { 386 s++; 387 } while (iswdigit(*s) && iswascii(*s)); 388 } 389 390 391 if (!(d1 || point && d2)) 392 return (0); 393 if (*s == 'e' || *s == 'E') { 394 s++; 395 if (*s == '+' || *s == '-') 396 s++; 397 if (!(iswdigit(*s) && iswascii(*s))) 398 return (0); 399 es = s; 400 do { 401 s++; 402 } while (iswdigit(*s) && iswascii(*s)); 403 404 405 if (s - es > 2) 406 return (0); 407 else if (s - es == 2 && 408 10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON) 409 return (0); 410 } 411 while (*s == ' ' || *s == '\t' || *s == '\n') 412 s++; 413 if (*s == '\0') 414 return (1); 415 else 416 return (0); 417 } 418 char * 419 toeuccode(str) 420 wchar_t *str; 421 { 422 static char euccode[RECSIZE]; 423 424 (void) wcstombs(euccode, str, RECSIZE); 425 return (euccode); 426 } 427