1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <stdio.h> 33 #include "awk.def" 34 #include "awk.h" 35 #include <ctype.h> 36 #include <wctype.h> 37 #include "awktype.h" 38 #include <stdlib.h> 39 40 FILE *infile = NULL; 41 wchar_t *file; 42 #define RECSIZE (5 * 512) 43 wchar_t record[RECSIZE]; 44 wchar_t fields[RECSIZE]; 45 wchar_t L_NULL[] = L""; 46 47 48 #define MAXFLD 100 49 int donefld; /* 1 = implies rec broken into fields */ 50 int donerec; /* 1 = record is valid (no flds have changed) */ 51 int mustfld; /* 1 = NF seen, so always break */ 52 static wchar_t L_record[] = L"$record"; 53 54 55 #define FINIT { OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR } 56 CELL fldtab[MAXFLD] = { /* room for fields */ 57 { OCELL, CFLD, L_record, record, 0.0, STR|FLD}, 58 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 59 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 60 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 61 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 62 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 63 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 64 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 65 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 66 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 67 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT 68 }; 69 int maxfld = 0; /* last used field */ 70 /* pointer to CELL for maximum field assigned to */ 71 CELL *maxmfld = &fldtab[0]; 72 73 static int isclvar(wchar_t *); 74 static void setclvar(wchar_t *); 75 void fldbld(void); 76 77 int 78 getrec(void) 79 { 80 wchar_t *rr, *er; 81 int c, sep; 82 FILE *inf; 83 extern int svargc; 84 extern wchar_t **svargv; 85 86 87 dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL); 88 donefld = 0; 89 donerec = 1; 90 record[0] = 0; 91 er = record + RECSIZE; 92 while (svargc > 0) { 93 dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL); 94 if (infile == NULL) { /* have to open a new file */ 95 /* 96 * If the argument contains a '=', determine if the 97 * argument needs to be treated as a variable assignment 98 * or as the pathname of a file. 99 */ 100 if (isclvar(*svargv)) { 101 /* it's a var=value argument */ 102 setclvar(*svargv); 103 if (svargc > 1) { 104 svargv++; 105 svargc--; 106 continue; 107 } 108 *svargv = L"-"; 109 } 110 *FILENAME = file = *svargv; 111 dprintf("opening file %ws\n", file, NULL, NULL); 112 if (*file == (wchar_t)L'-') 113 infile = stdin; 114 else if ((infile = fopen(toeuccode(file), "r")) == NULL) 115 error(FATAL, "can't open %ws", file); 116 } 117 if ((sep = **RS) == 0) 118 sep = '\n'; 119 inf = infile; 120 for (rr = record; /* dummy */; /* dummy */) { 121 for (; (c = getwc(inf)) != sep && c != EOF && rr < er; 122 *rr++ = c) 123 ; 124 if (rr >= er) 125 error(FATAL, "record `%.20ws...' too long", 126 record); 127 if (**RS == sep || c == EOF) 128 break; 129 if ((c = getwc(inf)) == '\n' || c == EOF) 130 /* 2 in a row */ 131 break; 132 *rr++ = '\n'; 133 *rr++ = c; 134 } 135 if (rr >= er) 136 error(FATAL, "record `%.20ws...' too long", record); 137 *rr = 0; 138 if (mustfld) 139 fldbld(); 140 if (c != EOF || rr > record) { /* normal record */ 141 recloc->tval &= ~NUM; 142 recloc->tval |= STR; 143 ++nrloc->fval; 144 nrloc->tval &= ~STR; 145 nrloc->tval |= NUM; 146 return (1); 147 } 148 /* EOF arrived on this file; set up next */ 149 if (infile != stdin) 150 fclose(infile); 151 infile = NULL; 152 svargc--; 153 svargv++; 154 } 155 return (0); /* true end of file */ 156 } 157 158 /* 159 * isclvar() 160 * 161 * Returns 1 if the input string, arg, is a variable assignment, 162 * otherwise returns 0. 163 * 164 * An argument to awk can be either a pathname of a file, or a variable 165 * assignment. An operand that begins with an undersore or alphabetic 166 * character from the portable character set, followed by a sequence of 167 * underscores, digits, and alphabetics from the portable character set, 168 * followed by the '=' character, shall specify a variable assignment 169 * rather than a pathname. 170 */ 171 static int 172 isclvar(wchar_t *arg) 173 { 174 wchar_t *tmpptr = arg; 175 176 if (tmpptr != NULL) { 177 178 /* Begins with an underscore or alphabetic character */ 179 if (iswalpha(*tmpptr) || *tmpptr == '_') { 180 181 /* 182 * followed by a sequence of underscores, digits, 183 * and alphabetics 184 */ 185 for (tmpptr++; *tmpptr; tmpptr++) { 186 if (!(isalnum(*tmpptr) || (*tmpptr == '_'))) { 187 break; 188 } 189 } 190 return (*tmpptr == '='); 191 } 192 } 193 194 return (0); 195 } 196 197 static void 198 setclvar(wchar_t *s) /* set var=value from s */ 199 { 200 wchar_t *p; 201 CELL *q; 202 203 204 for (p = s; *p != '='; p++) 205 ; 206 *p++ = 0; 207 q = setsymtab(s, tostring(p), 0.0, STR, symtab); 208 setsval(q, p); 209 dprintf("command line set %ws to |%ws|\n", s, p, NULL); 210 } 211 212 213 void 214 fldbld(void) 215 { 216 wchar_t *r, *fr, sep, c; 217 static wchar_t L_NF[] = L"NF"; 218 CELL *p, *q; 219 int i, j; 220 221 222 r = record; 223 fr = fields; 224 i = 0; /* number of fields accumulated here */ 225 if ((sep = **FS) == ' ') 226 for (i = 0; /* dummy */; /* dummy */) { 227 c = *r; 228 while (iswblank(c) || c == '\t' || c == '\n') 229 c = *(++r); 230 if (*r == 0) 231 break; 232 i++; 233 if (i >= MAXFLD) 234 error(FATAL, 235 "record `%.20ws...' has too many fields", record); 236 if (!(fldtab[i].tval&FLD)) 237 xfree(fldtab[i].sval); 238 fldtab[i].sval = fr; 239 fldtab[i].tval = FLD | STR; 240 do { 241 *fr++ = *r++; 242 c = *r; 243 } while (! iswblank(c) && c != '\t' && 244 c != '\n' && c != '\0'); 245 246 247 *fr++ = 0; 248 249 } else if (*r != 0) /* if 0, it's a null field */ 250 for (;;) { 251 i++; 252 if (i >= MAXFLD) 253 error(FATAL, 254 "record `%.20ws...' has too many fields", record); 255 if (!(fldtab[i].tval&FLD)) 256 xfree(fldtab[i].sval); 257 fldtab[i].sval = fr; 258 fldtab[i].tval = FLD | STR; 259 while ((c = *r) != sep && c != '\n' && c != '\0') 260 /* \n always a separator */ 261 *fr++ = *r++; 262 *fr++ = 0; 263 if (*r++ == 0) 264 break; 265 } 266 *fr = 0; 267 /* clean out junk from previous record */ 268 for (p = maxmfld, q = &fldtab[i]; p > q; p--) { 269 if (!(p->tval&FLD)) 270 xfree(p->sval); 271 p->tval = STR | FLD; 272 p->sval = L_NULL; 273 } 274 maxfld = i; 275 maxmfld = &fldtab[i]; 276 donefld = 1; 277 for (i = 1; i <= maxfld; i++) 278 if (isanumber(fldtab[i].sval)) { 279 fldtab[i].fval = watof(fldtab[i].sval); 280 fldtab[i].tval |= NUM; 281 } 282 setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld); 283 if (dbg) 284 for (i = 0; i <= maxfld; i++) 285 printf("field %d: |%ws|\n", i, fldtab[i].sval); 286 } 287 288 289 void 290 recbld(void) 291 { 292 int i; 293 wchar_t *r, *p; 294 295 296 if (donefld == 0 || donerec == 1) 297 return; 298 r = record; 299 for (i = 1; i <= *NF; i++) { 300 p = getsval(&fldtab[i]); 301 while (*r++ = *p++) 302 ; 303 *(r-1) = **OFS; 304 } 305 *(r-1) = '\0'; 306 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 307 recloc->tval = STR | FLD; 308 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 309 if (r > record+RECSIZE) 310 error(FATAL, "built giant record `%.20ws...'", record); 311 dprintf("recbld = |%ws|\n", record, NULL, NULL); 312 } 313 314 315 CELL * 316 fieldadr(n) 317 { 318 if (n < 0 || n >= MAXFLD) 319 error(FATAL, "trying to access field %d", n); 320 return (&fldtab[n]); 321 } 322 323 324 int errorflag = 0; 325 326 327 int 328 yyerror(char *s) 329 { 330 fprintf(stderr, 331 gettext("awk: %s near line %lld\n"), gettext(s), lineno); 332 errorflag = 2; 333 return (0); 334 } 335 336 337 void 338 error(f, s, a1, a2, a3, a4, a5, a6, a7) 339 { 340 fprintf(stderr, "awk: "); 341 fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7); 342 fprintf(stderr, "\n"); 343 if (NR && *NR > 0) 344 fprintf(stderr, gettext(" record number %g\n"), *NR); 345 if (f) 346 exit(2); 347 } 348 349 350 void 351 PUTS(char *s) 352 { 353 dprintf("%s\n", s, NULL, NULL); 354 } 355 356 357 #define MAXEXPON 38 /* maximum exponenet for fp number */ 358 359 360 int 361 isanumber(wchar_t *s) 362 { 363 int d1, d2; 364 int point; 365 wchar_t *es; 366 extern wchar_t radixpoint; 367 368 d1 = d2 = point = 0; 369 while (*s == ' ' || *s == '\t' || *s == '\n') 370 s++; 371 if (*s == '\0') 372 return (0); /* empty stuff isn't number */ 373 if (*s == '+' || *s == '-') 374 s++; 375 /* 376 * Since, iswdigit() will include digit from other than code set 0, 377 * we have to check it from code set 0 or not. 378 */ 379 if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint) 380 return (0); 381 if (iswdigit(*s) && iswascii(*s)) { 382 do { 383 d1++; 384 s++; 385 } while (iswdigit(*s) && iswascii(*s)); 386 } 387 if (d1 >= MAXEXPON) 388 return (0); /* too many digits to convert */ 389 if (*s == radixpoint) { 390 point++; 391 s++; 392 } 393 if (iswdigit(*s) && iswascii(*s)) { 394 d2++; 395 do { 396 s++; 397 } while (iswdigit(*s) && iswascii(*s)); 398 } 399 400 401 if (!(d1 || point && d2)) 402 return (0); 403 if (*s == 'e' || *s == 'E') { 404 s++; 405 if (*s == '+' || *s == '-') 406 s++; 407 if (!(iswdigit(*s) && iswascii(*s))) 408 return (0); 409 es = s; 410 do { 411 s++; 412 } while (iswdigit(*s) && iswascii(*s)); 413 414 415 if (s - es > 2) 416 return (0); 417 else if (s - es == 2 && 418 10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON) 419 return (0); 420 } 421 while (*s == ' ' || *s == '\t' || *s == '\n') 422 s++; 423 if (*s == '\0') 424 return (1); 425 else 426 return (0); 427 } 428 char * 429 toeuccode(str) 430 wchar_t *str; 431 { 432 static char euccode[RECSIZE]; 433 434 (void) wcstombs(euccode, str, RECSIZE); 435 return (euccode); 436 } 437