1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <stdio.h> 31 #include "awk.def" 32 #include "awk.h" 33 #include <ctype.h> 34 #include <wctype.h> 35 #include "awktype.h" 36 #include <stdlib.h> 37 #include <stdarg.h> 38 39 FILE *infile = NULL; 40 wchar_t *file; 41 #define RECSIZE (5 * 512) 42 wchar_t record[RECSIZE]; 43 wchar_t fields[RECSIZE]; 44 wchar_t L_NULL[] = L""; 45 46 47 #define MAXFLD 100 48 int donefld; /* 1 = implies rec broken into fields */ 49 int donerec; /* 1 = record is valid (no flds have changed) */ 50 int mustfld; /* 1 = NF seen, so always break */ 51 static wchar_t L_record[] = L"$record"; 52 53 54 #define FINIT { OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR } 55 CELL fldtab[MAXFLD] = { /* room for fields */ 56 { OCELL, CFLD, L_record, record, 0.0, STR|FLD}, 57 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 58 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 59 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 60 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 61 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 62 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 63 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 64 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 65 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 66 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT 67 }; 68 int maxfld = 0; /* last used field */ 69 /* pointer to CELL for maximum field assigned to */ 70 CELL *maxmfld = &fldtab[0]; 71 72 static int isclvar(wchar_t *); 73 static void setclvar(wchar_t *); 74 void fldbld(void); 75 76 int 77 getrec(void) 78 { 79 wchar_t *rr, *er; 80 int c, sep; 81 FILE *inf; 82 extern int svargc; 83 extern wchar_t **svargv; 84 85 86 dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL); 87 donefld = 0; 88 donerec = 1; 89 record[0] = 0; 90 er = record + RECSIZE; 91 while (svargc > 0) { 92 dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL); 93 if (infile == NULL) { /* have to open a new file */ 94 /* 95 * If the argument contains a '=', determine if the 96 * argument needs to be treated as a variable assignment 97 * or as the pathname of a file. 98 */ 99 if (isclvar(*svargv)) { 100 /* it's a var=value argument */ 101 setclvar(*svargv); 102 if (svargc > 1) { 103 svargv++; 104 svargc--; 105 continue; 106 } 107 *svargv = L"-"; 108 } 109 *FILENAME = file = *svargv; 110 dprintf("opening file %ws\n", file, NULL, NULL); 111 if (*file == (wchar_t)L'-') 112 infile = stdin; 113 else if ((infile = fopen(toeuccode(file), "r")) == NULL) 114 error(FATAL, "can't open %ws", file); 115 } 116 if ((sep = **RS) == 0) 117 sep = '\n'; 118 inf = infile; 119 for (rr = record; /* dummy */; /* dummy */) { 120 for (; (c = getwc(inf)) != sep && c != EOF && rr < er; 121 *rr++ = c) 122 ; 123 if (rr >= er) 124 error(FATAL, "record `%.20ws...' too long", 125 record); 126 if (**RS == sep || c == EOF) 127 break; 128 if ((c = getwc(inf)) == '\n' || c == EOF) 129 /* 2 in a row */ 130 break; 131 *rr++ = '\n'; 132 *rr++ = c; 133 } 134 if (rr >= er) 135 error(FATAL, "record `%.20ws...' too long", record); 136 *rr = 0; 137 if (mustfld) 138 fldbld(); 139 if (c != EOF || rr > record) { /* normal record */ 140 recloc->tval &= ~NUM; 141 recloc->tval |= STR; 142 ++nrloc->fval; 143 nrloc->tval &= ~STR; 144 nrloc->tval |= NUM; 145 return (1); 146 } 147 /* EOF arrived on this file; set up next */ 148 if (infile != stdin) 149 fclose(infile); 150 infile = NULL; 151 svargc--; 152 svargv++; 153 } 154 return (0); /* true end of file */ 155 } 156 157 /* 158 * isclvar() 159 * 160 * Returns 1 if the input string, arg, is a variable assignment, 161 * otherwise returns 0. 162 * 163 * An argument to awk can be either a pathname of a file, or a variable 164 * assignment. An operand that begins with an undersore or alphabetic 165 * character from the portable character set, followed by a sequence of 166 * underscores, digits, and alphabetics from the portable character set, 167 * followed by the '=' character, shall specify a variable assignment 168 * rather than a pathname. 169 */ 170 static int 171 isclvar(wchar_t *arg) 172 { 173 wchar_t *tmpptr = arg; 174 175 if (tmpptr != NULL) { 176 177 /* Begins with an underscore or alphabetic character */ 178 if (iswalpha(*tmpptr) || *tmpptr == '_') { 179 180 /* 181 * followed by a sequence of underscores, digits, 182 * and alphabetics 183 */ 184 for (tmpptr++; *tmpptr; tmpptr++) { 185 if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) { 186 break; 187 } 188 } 189 return (*tmpptr == '='); 190 } 191 } 192 193 return (0); 194 } 195 196 static void 197 setclvar(wchar_t *s) /* set var=value from s */ 198 { 199 wchar_t *p; 200 CELL *q; 201 202 203 for (p = s; *p != '='; p++) 204 ; 205 *p++ = 0; 206 q = setsymtab(s, tostring(p), 0.0, STR, symtab); 207 setsval(q, p); 208 dprintf("command line set %ws to |%ws|\n", s, p, NULL); 209 } 210 211 212 void 213 fldbld(void) 214 { 215 wchar_t *r, *fr, sep, c; 216 static wchar_t L_NF[] = L"NF"; 217 CELL *p, *q; 218 int i, j; 219 220 221 r = record; 222 fr = fields; 223 i = 0; /* number of fields accumulated here */ 224 if ((sep = **FS) == ' ') 225 for (i = 0; /* dummy */; /* dummy */) { 226 c = *r; 227 while (iswblank(c) || c == '\t' || c == '\n') 228 c = *(++r); 229 if (*r == 0) 230 break; 231 i++; 232 if (i >= MAXFLD) 233 error(FATAL, 234 "record `%.20ws...' has too many fields", record); 235 if (!(fldtab[i].tval&FLD)) 236 xfree(fldtab[i].sval); 237 fldtab[i].sval = fr; 238 fldtab[i].tval = FLD | STR; 239 do { 240 *fr++ = *r++; 241 c = *r; 242 } while (! iswblank(c) && c != '\t' && 243 c != '\n' && c != '\0'); 244 245 246 *fr++ = 0; 247 248 } else if (*r != 0) /* if 0, it's a null field */ 249 for (;;) { 250 i++; 251 if (i >= MAXFLD) 252 error(FATAL, 253 "record `%.20ws...' has too many fields", record); 254 if (!(fldtab[i].tval&FLD)) 255 xfree(fldtab[i].sval); 256 fldtab[i].sval = fr; 257 fldtab[i].tval = FLD | STR; 258 while ((c = *r) != sep && c != '\n' && c != '\0') 259 /* \n always a separator */ 260 *fr++ = *r++; 261 *fr++ = 0; 262 if (*r++ == 0) 263 break; 264 } 265 *fr = 0; 266 /* clean out junk from previous record */ 267 for (p = maxmfld, q = &fldtab[i]; p > q; p--) { 268 if (!(p->tval&FLD)) 269 xfree(p->sval); 270 p->tval = STR | FLD; 271 p->sval = L_NULL; 272 } 273 maxfld = i; 274 maxmfld = &fldtab[i]; 275 donefld = 1; 276 for (i = 1; i <= maxfld; i++) 277 if (isanumber(fldtab[i].sval)) { 278 fldtab[i].fval = watof(fldtab[i].sval); 279 fldtab[i].tval |= NUM; 280 } 281 setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld); 282 if (dbg) 283 for (i = 0; i <= maxfld; i++) 284 printf("field %d: |%ws|\n", i, fldtab[i].sval); 285 } 286 287 288 void 289 recbld(void) 290 { 291 int i; 292 wchar_t *r, *p; 293 294 295 if (donefld == 0 || donerec == 1) 296 return; 297 r = record; 298 for (i = 1; i <= *NF; i++) { 299 p = getsval(&fldtab[i]); 300 while (*r++ = *p++) 301 ; 302 *(r-1) = **OFS; 303 } 304 *(r-1) = '\0'; 305 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 306 recloc->tval = STR | FLD; 307 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 308 if (r > record+RECSIZE) 309 error(FATAL, "built giant record `%.20ws...'", record); 310 dprintf("recbld = |%ws|\n", record, NULL, NULL); 311 } 312 313 314 CELL * 315 fieldadr(int n) 316 { 317 if (n < 0 || n >= MAXFLD) 318 error(FATAL, "trying to access field %d", n); 319 return (&fldtab[n]); 320 } 321 322 323 int errorflag = 0; 324 325 326 int 327 yyerror(char *s) 328 { 329 fprintf(stderr, 330 gettext("awk: %s near line %lld\n"), gettext(s), lineno); 331 errorflag = 2; 332 return (0); 333 } 334 335 336 void 337 error(int f, char *fmt, ...) 338 { 339 va_list ap; 340 341 va_start(ap, fmt); 342 fprintf(stderr, "awk: "); 343 vfprintf(stderr, gettext(fmt), ap); 344 va_end(ap); 345 fprintf(stderr, "\n"); 346 if (NR && *NR > 0) 347 fprintf(stderr, gettext(" record number %g\n"), *NR); 348 if (f) 349 exit(2); 350 } 351 352 353 void 354 PUTS(char *s) 355 { 356 dprintf("%s\n", s, NULL, NULL); 357 } 358 359 360 #define MAXEXPON 38 /* maximum exponenet for fp number */ 361 362 363 int 364 isanumber(wchar_t *s) 365 { 366 int d1, d2; 367 int point; 368 wchar_t *es; 369 extern wchar_t radixpoint; 370 371 d1 = d2 = point = 0; 372 while (*s == ' ' || *s == '\t' || *s == '\n') 373 s++; 374 if (*s == '\0') 375 return (0); /* empty stuff isn't number */ 376 if (*s == '+' || *s == '-') 377 s++; 378 /* 379 * Since, iswdigit() will include digit from other than code set 0, 380 * we have to check it from code set 0 or not. 381 */ 382 if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint) 383 return (0); 384 if (iswdigit(*s) && iswascii(*s)) { 385 do { 386 d1++; 387 s++; 388 } while (iswdigit(*s) && iswascii(*s)); 389 } 390 if (d1 >= MAXEXPON) 391 return (0); /* too many digits to convert */ 392 if (*s == radixpoint) { 393 point++; 394 s++; 395 } 396 if (iswdigit(*s) && iswascii(*s)) { 397 d2++; 398 do { 399 s++; 400 } while (iswdigit(*s) && iswascii(*s)); 401 } 402 403 404 if (!(d1 || point && d2)) 405 return (0); 406 if (*s == 'e' || *s == 'E') { 407 s++; 408 if (*s == '+' || *s == '-') 409 s++; 410 if (!(iswdigit(*s) && iswascii(*s))) 411 return (0); 412 es = s; 413 do { 414 s++; 415 } while (iswdigit(*s) && iswascii(*s)); 416 417 418 if (s - es > 2) 419 return (0); 420 else if (s - es == 2 && 421 10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON) 422 return (0); 423 } 424 while (*s == ' ' || *s == '\t' || *s == '\n') 425 s++; 426 if (*s == '\0') 427 return (1); 428 else 429 return (0); 430 } 431 char * 432 toeuccode(str) 433 wchar_t *str; 434 { 435 static char euccode[RECSIZE]; 436 437 (void) wcstombs(euccode, str, RECSIZE); 438 return (euccode); 439 } 440