1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <stdio.h> 31 #include "awk.def" 32 #include "awk.h" 33 #include <ctype.h> 34 #include <wctype.h> 35 #include "awktype.h" 36 #include <stdlib.h> 37 38 FILE *infile = NULL; 39 wchar_t *file; 40 #define RECSIZE (5 * 512) 41 wchar_t record[RECSIZE]; 42 wchar_t fields[RECSIZE]; 43 wchar_t L_NULL[] = L""; 44 45 46 #define MAXFLD 100 47 int donefld; /* 1 = implies rec broken into fields */ 48 int donerec; /* 1 = record is valid (no flds have changed) */ 49 int mustfld; /* 1 = NF seen, so always break */ 50 static wchar_t L_record[] = L"$record"; 51 52 53 #define FINIT { OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR } 54 CELL fldtab[MAXFLD] = { /* room for fields */ 55 { OCELL, CFLD, L_record, record, 0.0, STR|FLD}, 56 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 57 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 58 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 59 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 60 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 61 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 62 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 63 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 64 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, 65 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT 66 }; 67 int maxfld = 0; /* last used field */ 68 /* pointer to CELL for maximum field assigned to */ 69 CELL *maxmfld = &fldtab[0]; 70 71 static int isclvar(wchar_t *); 72 static void setclvar(wchar_t *); 73 void fldbld(void); 74 75 int 76 getrec(void) 77 { 78 wchar_t *rr, *er; 79 int c, sep; 80 FILE *inf; 81 extern int svargc; 82 extern wchar_t **svargv; 83 84 85 dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL); 86 donefld = 0; 87 donerec = 1; 88 record[0] = 0; 89 er = record + RECSIZE; 90 while (svargc > 0) { 91 dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL); 92 if (infile == NULL) { /* have to open a new file */ 93 /* 94 * If the argument contains a '=', determine if the 95 * argument needs to be treated as a variable assignment 96 * or as the pathname of a file. 97 */ 98 if (isclvar(*svargv)) { 99 /* it's a var=value argument */ 100 setclvar(*svargv); 101 if (svargc > 1) { 102 svargv++; 103 svargc--; 104 continue; 105 } 106 *svargv = L"-"; 107 } 108 *FILENAME = file = *svargv; 109 dprintf("opening file %ws\n", file, NULL, NULL); 110 if (*file == (wchar_t)L'-') 111 infile = stdin; 112 else if ((infile = fopen(toeuccode(file), "r")) == NULL) 113 error(FATAL, "can't open %ws", file); 114 } 115 if ((sep = **RS) == 0) 116 sep = '\n'; 117 inf = infile; 118 for (rr = record; /* dummy */; /* dummy */) { 119 for (; (c = getwc(inf)) != sep && c != EOF && rr < er; 120 *rr++ = c) 121 ; 122 if (rr >= er) 123 error(FATAL, "record `%.20ws...' too long", 124 record); 125 if (**RS == sep || c == EOF) 126 break; 127 if ((c = getwc(inf)) == '\n' || c == EOF) 128 /* 2 in a row */ 129 break; 130 *rr++ = '\n'; 131 *rr++ = c; 132 } 133 if (rr >= er) 134 error(FATAL, "record `%.20ws...' too long", record); 135 *rr = 0; 136 if (mustfld) 137 fldbld(); 138 if (c != EOF || rr > record) { /* normal record */ 139 recloc->tval &= ~NUM; 140 recloc->tval |= STR; 141 ++nrloc->fval; 142 nrloc->tval &= ~STR; 143 nrloc->tval |= NUM; 144 return (1); 145 } 146 /* EOF arrived on this file; set up next */ 147 if (infile != stdin) 148 fclose(infile); 149 infile = NULL; 150 svargc--; 151 svargv++; 152 } 153 return (0); /* true end of file */ 154 } 155 156 /* 157 * isclvar() 158 * 159 * Returns 1 if the input string, arg, is a variable assignment, 160 * otherwise returns 0. 161 * 162 * An argument to awk can be either a pathname of a file, or a variable 163 * assignment. An operand that begins with an undersore or alphabetic 164 * character from the portable character set, followed by a sequence of 165 * underscores, digits, and alphabetics from the portable character set, 166 * followed by the '=' character, shall specify a variable assignment 167 * rather than a pathname. 168 */ 169 static int 170 isclvar(wchar_t *arg) 171 { 172 wchar_t *tmpptr = arg; 173 174 if (tmpptr != NULL) { 175 176 /* Begins with an underscore or alphabetic character */ 177 if (iswalpha(*tmpptr) || *tmpptr == '_') { 178 179 /* 180 * followed by a sequence of underscores, digits, 181 * and alphabetics 182 */ 183 for (tmpptr++; *tmpptr; tmpptr++) { 184 if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) { 185 break; 186 } 187 } 188 return (*tmpptr == '='); 189 } 190 } 191 192 return (0); 193 } 194 195 static void 196 setclvar(wchar_t *s) /* set var=value from s */ 197 { 198 wchar_t *p; 199 CELL *q; 200 201 202 for (p = s; *p != '='; p++) 203 ; 204 *p++ = 0; 205 q = setsymtab(s, tostring(p), 0.0, STR, symtab); 206 setsval(q, p); 207 dprintf("command line set %ws to |%ws|\n", s, p, NULL); 208 } 209 210 211 void 212 fldbld(void) 213 { 214 wchar_t *r, *fr, sep, c; 215 static wchar_t L_NF[] = L"NF"; 216 CELL *p, *q; 217 int i, j; 218 219 220 r = record; 221 fr = fields; 222 i = 0; /* number of fields accumulated here */ 223 if ((sep = **FS) == ' ') 224 for (i = 0; /* dummy */; /* dummy */) { 225 c = *r; 226 while (iswblank(c) || c == '\t' || c == '\n') 227 c = *(++r); 228 if (*r == 0) 229 break; 230 i++; 231 if (i >= MAXFLD) 232 error(FATAL, 233 "record `%.20ws...' has too many fields", record); 234 if (!(fldtab[i].tval&FLD)) 235 xfree(fldtab[i].sval); 236 fldtab[i].sval = fr; 237 fldtab[i].tval = FLD | STR; 238 do { 239 *fr++ = *r++; 240 c = *r; 241 } while (! iswblank(c) && c != '\t' && 242 c != '\n' && c != '\0'); 243 244 245 *fr++ = 0; 246 247 } else if (*r != 0) /* if 0, it's a null field */ 248 for (;;) { 249 i++; 250 if (i >= MAXFLD) 251 error(FATAL, 252 "record `%.20ws...' has too many fields", record); 253 if (!(fldtab[i].tval&FLD)) 254 xfree(fldtab[i].sval); 255 fldtab[i].sval = fr; 256 fldtab[i].tval = FLD | STR; 257 while ((c = *r) != sep && c != '\n' && c != '\0') 258 /* \n always a separator */ 259 *fr++ = *r++; 260 *fr++ = 0; 261 if (*r++ == 0) 262 break; 263 } 264 *fr = 0; 265 /* clean out junk from previous record */ 266 for (p = maxmfld, q = &fldtab[i]; p > q; p--) { 267 if (!(p->tval&FLD)) 268 xfree(p->sval); 269 p->tval = STR | FLD; 270 p->sval = L_NULL; 271 } 272 maxfld = i; 273 maxmfld = &fldtab[i]; 274 donefld = 1; 275 for (i = 1; i <= maxfld; i++) 276 if (isanumber(fldtab[i].sval)) { 277 fldtab[i].fval = watof(fldtab[i].sval); 278 fldtab[i].tval |= NUM; 279 } 280 setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld); 281 if (dbg) 282 for (i = 0; i <= maxfld; i++) 283 printf("field %d: |%ws|\n", i, fldtab[i].sval); 284 } 285 286 287 void 288 recbld(void) 289 { 290 int i; 291 wchar_t *r, *p; 292 293 294 if (donefld == 0 || donerec == 1) 295 return; 296 r = record; 297 for (i = 1; i <= *NF; i++) { 298 p = getsval(&fldtab[i]); 299 while (*r++ = *p++) 300 ; 301 *(r-1) = **OFS; 302 } 303 *(r-1) = '\0'; 304 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 305 recloc->tval = STR | FLD; 306 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL); 307 if (r > record+RECSIZE) 308 error(FATAL, "built giant record `%.20ws...'", record); 309 dprintf("recbld = |%ws|\n", record, NULL, NULL); 310 } 311 312 313 CELL * 314 fieldadr(n) 315 { 316 if (n < 0 || n >= MAXFLD) 317 error(FATAL, "trying to access field %d", n); 318 return (&fldtab[n]); 319 } 320 321 322 int errorflag = 0; 323 324 325 int 326 yyerror(char *s) 327 { 328 fprintf(stderr, 329 gettext("awk: %s near line %lld\n"), gettext(s), lineno); 330 errorflag = 2; 331 return (0); 332 } 333 334 335 void 336 error(f, s, a1, a2, a3, a4, a5, a6, a7) 337 { 338 fprintf(stderr, "awk: "); 339 fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7); 340 fprintf(stderr, "\n"); 341 if (NR && *NR > 0) 342 fprintf(stderr, gettext(" record number %g\n"), *NR); 343 if (f) 344 exit(2); 345 } 346 347 348 void 349 PUTS(char *s) 350 { 351 dprintf("%s\n", s, NULL, NULL); 352 } 353 354 355 #define MAXEXPON 38 /* maximum exponenet for fp number */ 356 357 358 int 359 isanumber(wchar_t *s) 360 { 361 int d1, d2; 362 int point; 363 wchar_t *es; 364 extern wchar_t radixpoint; 365 366 d1 = d2 = point = 0; 367 while (*s == ' ' || *s == '\t' || *s == '\n') 368 s++; 369 if (*s == '\0') 370 return (0); /* empty stuff isn't number */ 371 if (*s == '+' || *s == '-') 372 s++; 373 /* 374 * Since, iswdigit() will include digit from other than code set 0, 375 * we have to check it from code set 0 or not. 376 */ 377 if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint) 378 return (0); 379 if (iswdigit(*s) && iswascii(*s)) { 380 do { 381 d1++; 382 s++; 383 } while (iswdigit(*s) && iswascii(*s)); 384 } 385 if (d1 >= MAXEXPON) 386 return (0); /* too many digits to convert */ 387 if (*s == radixpoint) { 388 point++; 389 s++; 390 } 391 if (iswdigit(*s) && iswascii(*s)) { 392 d2++; 393 do { 394 s++; 395 } while (iswdigit(*s) && iswascii(*s)); 396 } 397 398 399 if (!(d1 || point && d2)) 400 return (0); 401 if (*s == 'e' || *s == 'E') { 402 s++; 403 if (*s == '+' || *s == '-') 404 s++; 405 if (!(iswdigit(*s) && iswascii(*s))) 406 return (0); 407 es = s; 408 do { 409 s++; 410 } while (iswdigit(*s) && iswascii(*s)); 411 412 413 if (s - es > 2) 414 return (0); 415 else if (s - es == 2 && 416 10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON) 417 return (0); 418 } 419 while (*s == ' ' || *s == '\t' || *s == '\n') 420 s++; 421 if (*s == '\0') 422 return (1); 423 else 424 return (0); 425 } 426 char * 427 toeuccode(str) 428 wchar_t *str; 429 { 430 static char euccode[RECSIZE]; 431 432 (void) wcstombs(euccode, str, RECSIZE); 433 return (euccode); 434 } 435