1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * awk -- common header file. 29 * 30 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved. 31 * 32 * This version uses the POSIX.2 compatible <regex.h> routines. 33 * 34 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes 35 * 36 */ 37 38 #pragma ident "%Z%%M% %I% %E% SMI" 39 40 #include <stdio.h> 41 #include <ctype.h> 42 #include <string.h> 43 #include <math.h> 44 #include <limits.h> 45 #include <stdlib.h> 46 #include <regex.h> 47 #include <errno.h> 48 #include <sys/types.h> 49 #include <locale.h> 50 #include <wchar.h> 51 #include <widec.h> 52 53 #define YYMAXDEPTH 300 /* Max # of productions (used by yacc) */ 54 #define YYSSIZE 300 /* Size of State/Value stacks (MKS YACC) */ 55 #define MAXDIGINT 19 /* Number of digits in an INT */ 56 #define FNULL ((FILE *)0) 57 #define NNULL ((NODE *)0) 58 #define SNULL ((STRING)0) 59 #define LARGE INT_MAX /* Large integer */ 60 #define NPFILE 32 /* Number of -[fl] options allowed */ 61 #define NRECUR 3000 /* Maximum recursion depth */ 62 63 #define M_LDATA 1 64 #ifdef M_LDATA 65 # define NLINE 20000 /* Longest input record */ 66 # define NFIELD 4000 /* Number of fields allowed */ 67 # define NBUCKET 1024 /* # of symtab buckets (power of 2) */ 68 #else 69 # define NLINE 2048 /* Longest input record */ 70 # define NFIELD 1024 /* Number of fields allowed */ 71 # define NBUCKET 256 /* # of symtab buckets (power of 2) */ 72 #endif 73 74 #define NSNODE 40 /* Number of cached nodes */ 75 #define NCONTEXT 50 /* Amount of context for error msgs */ 76 #define hashbuck(n) ((n)&(NBUCKET-1)) 77 #if BSD 78 /* 79 * A speedup for BSD. Use their routines which are 80 * already optimised. Note that BSD bcopy does not 81 * return a value. 82 */ 83 int bcmp(); 84 #define memcmp(b1,b2,n) bcmp(b1,b2,n) 85 void bcopy(); 86 #define memcpy(b1,b2,n) bcopy(b2,b1,(int)n) 87 #endif /*BSD*/ 88 #define vlook(n) vlookup(n, 0) 89 90 /* 91 * Basic AWK internal types. 92 */ 93 typedef double REAL; 94 typedef long long INT; 95 typedef wchar_t *STRING; 96 typedef struct NODE *(*FUNCTION)(struct NODE * np); 97 typedef regex_t *REGEXP; 98 99 /* 100 * Node in the AWK interpreter expression tree. 101 */ 102 typedef struct NODE { 103 ushort n_type; 104 struct NODE *n_next; /* Symbol table/PARM link */ 105 ushort n_flags; /* Node flags, type */ 106 107 108 109 110 union { 111 struct { 112 ushort N_hash; /* Full hash value */ 113 struct NODE *N_alink; /* Array link */ 114 union { 115 struct { 116 STRING N_string; 117 size_t N_strlen; 118 } n_str; 119 INT N_int; 120 REAL N_real; 121 FUNCTION N_function; 122 struct NODE *N_ufunc; 123 } n_tun; 124 wchar_t N_name[1]; 125 } n_term; 126 struct { 127 struct NODE *N_left; 128 struct NODE *N_right; 129 ushort N_lineno; 130 } n_op; 131 struct { 132 struct NODE *N_left; /* Used for fliplist */ 133 struct NODE *N_right; 134 REGEXP N_regexp; /* Regular expression */ 135 } n_re; 136 } n_un; 137 } NODE; 138 139 /* 140 * Definitions to make the node access much easier. 141 */ 142 #define n_hash n_un.n_term.N_hash /* full hash value is sym tbl */ 143 #define n_scope n_un.n_term.N_hash /* local variable scope level */ 144 #define n_alink n_un.n_term.N_alink /* link to array list */ 145 #define n_string n_un.n_term.n_tun.n_str.N_string 146 #define n_strlen n_un.n_term.n_tun.n_str.N_strlen 147 #define n_int n_un.n_term.n_tun.N_int 148 #define n_real n_un.n_term.n_tun.N_real 149 #define n_function n_un.n_term.n_tun.N_function 150 #define n_ufunc n_un.n_term.n_tun.N_ufunc 151 #define n_name n_un.n_term.N_name 152 #define n_left n_un.n_op.N_left 153 #define n_right n_un.n_op.N_right 154 #define n_lineno n_un.n_op.N_lineno 155 #define n_keywtype n_un.n_op.N_lineno 156 #define n_regexp n_un.n_re.N_regexp 157 /* 158 * Compress the types that are actually used in the final tree 159 * to save space in the intermediate file. Allows 1 byte to 160 * represent all types 161 */ 162 163 164 165 166 167 168 169 /* 170 * n_flags bit assignments. 171 */ 172 #define FALLOC 0x01 /* Allocated node */ 173 #define FSTATIC 0x00 /* Not allocated */ 174 #define FMATCH 0x02 /* pattern,pattern (first part matches) */ 175 #define FSPECIAL 0x04 /* Special pre-computed variable */ 176 #define FINARRAY 0x08 /* NODE installed in N_alink array list */ 177 #define FNOALLOC 0x10 /* mark node FALLOC, but don't malloc */ 178 #define FSENSE 0x20 /* Sense if string looks like INT/REAL */ 179 #define FSAVE (FSPECIAL|FINARRAY) /* assign leaves on */ 180 181 #define FINT 0x40 /* Node has integer type */ 182 #define FREAL 0x80 /* Node has real type */ 183 #define FSTRING 0x100 /* Node has string type */ 184 #define FNONTOK 0x200 /* Node has non-token type */ 185 #define FVINT 0x400 /* Node looks like an integer */ 186 #define FVREAL 0x800 /* Node looks like a real number */ 187 #define FLARRAY 0x1000 /* Local array node */ 188 189 /* 190 * n_flags macros 191 * These work when given an argument of np->n_flags 192 */ 193 #define isleaf(f) (!((f)&FNONTOK)) 194 #define isstring(f) ((f)&FSTRING) 195 #define isastring(f) (((f)&(FSTRING|FALLOC))==(FSTRING|FALLOC)) 196 #define isnumber(f) ((f)&(FINT|FVINT|FREAL|FVREAL)) 197 #define isreal(f) ((f)&(FREAL|FVREAL)) 198 #define isint(f) ((f)&(FINT|FVINT)) 199 200 /* 201 * Prototype file size is defined in awksize.h 202 */ 203 204 205 206 207 208 /* 209 * Awkrun prototype default name 210 */ 211 #if defined(DOS) 212 # if defined(__386__) 213 # define AWK_PROTOTYPE M_ETCDIR(awkrunf.dos) 214 # define AWK_LPROTOTYPE M_ETCDIR(awkrunf.dos) 215 # else 216 # define AWK_PROTOTYPE M_ETCDIR(awkrun.dos) 217 # define AWK_LPROTOTYPE M_ETCDIR(awkrunl.dos) 218 # endif 219 #elif defined(OS2) 220 # define AWK_PROTOTYPE M_ETCDIR(awkrun.os2) 221 #elif defined(NT) 222 # define AWK_PROTOTYPE M_ETCDIR(awkrun.nt) 223 #else 224 # define AWK_PROTOTYPE M_ETCDIR(awkrun.mod) 225 #endif 226 227 /* 228 * This is a kludge that gets around a bug in compact & large 229 * models under DOS. It also makes the generated 230 * code faster even if there wasn't a bug. UNIX people: try 231 * to ignore these noisy "near" declarations. 232 */ 233 #ifndef DOS 234 #define near 235 #endif 236 237 typedef wchar_t near *LOCCHARP; /* pointer to local strings */ 238 /* 239 * Form of builtin symbols 240 * This should be a union because only one of r_ivalue 241 * and r_svalue is needed, but (alas) unions cannot be 242 * initialised. 243 */ 244 typedef struct RESERVED { 245 LOCCHARP r_name; 246 int r_type; /* Type of node */ 247 INT r_ivalue; /* Integer value or wcslen(r_svalue) */ 248 STRING r_svalue; /* String value */ 249 } RESERVED; 250 251 /* 252 * Table of builtin functions. 253 */ 254 typedef struct RESFUNC { 255 LOCCHARP rf_name; 256 int rf_type; /* FUNC || GETLINE */ 257 FUNCTION rf_func; /* Function pointer */ 258 } RESFUNC; 259 260 /* 261 * Structure holding list of open files. 262 */ 263 typedef struct OFILE { 264 ushort f_mode; /* Open mode: WRITE, APPEND, PIPE */ 265 FILE *f_fp; /* File pointer if open */ 266 char *f_name; /* Remembered file name */ 267 } OFILE; 268 269 /* Global functions -- awk.y */ 270 int yyparse(void); 271 272 /* Global functions -- awk1.c */ 273 #ifdef __WATCOMC__ 274 # pragma aux yyerror aborts; 275 # pragma aux awkerr aborts; 276 # pragma aux awkperr aborts; 277 #endif 278 void yyerror(char *msg, ...); 279 void awkerr(char *fmt, ...); 280 void awkperr(char *fmt, ...); 281 void uexit(NODE *); 282 int yylex(void); 283 NODE *renode(wchar_t *restr); 284 wchar_t *emalloc(unsigned); 285 wchar_t *erealloc(wchar_t *, unsigned); 286 287 /* Global functions -- awk2.c */ 288 void awk(void); 289 void dobegin(void); 290 void doend(int status); 291 int nextrecord(wchar_t *buf, FILE *fp); 292 wchar_t *defrecord(wchar_t *bp, int lim, FILE *fp); 293 wchar_t *charrecord(wchar_t *bp, int lim, FILE *fp); 294 wchar_t *multirecord(wchar_t *bp, int lim, FILE *fp); 295 wchar_t *whitefield(wchar_t **endp); 296 wchar_t *blackfield(wchar_t **endp); 297 wchar_t *refield(wchar_t **endp); 298 void s_print(NODE *np); 299 void s_prf(NODE *np); 300 size_t xprintf(NODE *np, FILE *fp, wchar_t **cp); 301 void awkclose(OFILE *op); 302 303 /* Global functions -- awk3.c */ 304 void strassign(NODE *np, STRING string, int flags, size_t length); 305 NODE *nassign(NODE *np, NODE *value); 306 NODE *assign(NODE *np, NODE *value); 307 void delarray(NODE *np); 308 NODE *node(int type, NODE *left, NODE *right); 309 NODE *intnode(INT i); 310 NODE *realnode(REAL r); 311 NODE *stringnode(STRING str, int aflag, size_t wcslen); 312 NODE *vlookup(wchar_t *name, int nocreate); 313 NODE *emptynode(int type, size_t nlength); 314 void freenode(NODE *np); 315 void execute(NODE *np); 316 INT exprint(NODE *np); 317 REAL exprreal(NODE *np); 318 STRING exprstring(NODE *np); 319 STRING strsave(wchar_t *string); 320 NODE *exprreduce(NODE *np); 321 NODE *getlist(NODE **npp); 322 NODE *symwalk(int *buckp, NODE **npp); 323 REGEXP getregexp(NODE *np); 324 void addsymtab(NODE *np); 325 void delsymtab(NODE *np, int fflag); 326 NODE * finstall(LOCCHARP name, FUNCTION f, int type); 327 void kinstall(LOCCHARP name, int type); 328 void fieldsplit(void); 329 void promote(NODE *); 330 331 332 333 334 335 336 337 /* Global functions -- awk4.c */ 338 NODE *f_exp(NODE *np); 339 NODE *f_int(NODE *np); 340 NODE *f_log(NODE *np); 341 NODE *f_sqrt(NODE *np); 342 NODE *f_getline(NODE *np); 343 NODE *f_index(NODE *np); 344 NODE *f_length(NODE *np); 345 NODE *f_split(NODE *np); 346 NODE *f_sprintf(NODE *np); 347 NODE *f_substr(NODE *np); 348 NODE *f_rand(NODE *np); 349 NODE *f_srand(NODE *np); 350 NODE *f_sin(NODE *np); 351 NODE *f_cos(NODE *np); 352 NODE *f_atan2(NODE *np); 353 NODE *f_sub(NODE *np); 354 NODE *f_gsub(NODE *np); 355 NODE *f_match(NODE *np); 356 NODE *f_system(NODE *np); 357 NODE *f_ord(NODE *np); 358 NODE *f_tolower(NODE *np); 359 NODE *f_toupper(NODE *np); 360 NODE *f_close(NODE *np); 361 NODE *f_asort(NODE *np); 362 363 /* In awk0.c */ 364 365 366 367 extern wchar_t _null[]; 368 extern char r[]; 369 extern char w[]; 370 extern wchar_t s_OFMT[]; 371 extern wchar_t s_CONVFMT[]; 372 extern wchar_t s_NR[]; 373 extern wchar_t s_NF[]; 374 extern wchar_t s_OFS[]; 375 extern wchar_t s_ORS[]; 376 extern wchar_t s_RS[]; 377 extern wchar_t s_FS[]; 378 extern wchar_t s_FNR[]; 379 extern wchar_t s_SUBSEP[]; 380 extern wchar_t s_ARGC[], s_ARGV[], s_ENVIRON[]; 381 extern wchar_t s_FILENAME[], s_SYMTAB[]; 382 extern wchar_t s_BEGIN[], s_END[], s_next[]; 383 extern wchar_t _begin[], _end[]; 384 extern wchar_t s_exp[], s_getline[], s_index[], s_int[], s_length[], s_log[]; 385 extern wchar_t s_split[], s_sprintf[], s_sqrt[], s_substr[]; 386 extern wchar_t s_rand[], s_srand[], s_sin[], s_cos[], s_atan2[]; 387 extern wchar_t s_sub[], s_gsub[], s_match[], s_system[], s_ord[]; 388 extern wchar_t s_toupper[], s_tolower[], s_asort[]; 389 extern wchar_t s_close[]; 390 extern wchar_t redelim; 391 extern unsigned char inprint; 392 extern unsigned char funparm; 393 extern unsigned char splitdone; 394 extern uint_t npattern; 395 extern uint_t nfield; 396 extern uint_t fcount; 397 extern uint_t phase; 398 extern uint_t running; 399 extern uchar_t catterm; 400 extern uint_t lexlast; 401 extern uint_t lineno; 402 extern uchar_t needsplit, needenviron, doing_begin, begin_getline; 403 extern ushort slevel; 404 extern ushort loopexit; 405 extern wchar_t radixpoint; 406 extern REGEXP resep; 407 extern RESERVED reserved[]; 408 extern RESFUNC resfuncs[]; 409 extern long NIOSTREAM; /* Maximum open I/O streams */ 410 extern OFILE *ofiles; 411 extern wchar_t *linebuf; 412 extern size_t lbuflen; 413 extern char interr[]; 414 extern char nomem[]; 415 extern NODE *symtab[NBUCKET]; 416 extern NODE *yytree; 417 extern NODE *freelist; 418 extern wchar_t *(*awkrecord)(wchar_t *, int, FILE *); 419 extern wchar_t *(*awkfield)(wchar_t **); 420 421 extern NODE *constant; 422 extern NODE *const0; 423 extern NODE *const1; 424 extern NODE *constundef; 425 extern NODE *field0; 426 extern NODE *incNR; 427 extern NODE *incFNR; 428 extern NODE *clrFNR; 429 extern NODE *ARGVsubi; 430 extern NODE *varNR; 431 extern NODE *varFNR; 432 extern NODE *varNF; 433 extern NODE *varOFMT; 434 extern NODE *varCONVFMT; 435 extern NODE *varOFS; 436 extern NODE *varORS; 437 extern NODE *varFS; 438 extern NODE *varRS; 439 extern NODE *varARGC; 440 extern NODE *varSUBSEP; 441 extern NODE *varENVIRON; 442 extern NODE *varSYMTAB; 443 extern NODE *varFILENAME; 444 extern NODE *curnode; 445 extern NODE *inc_oper; 446 extern NODE *asn_oper; 447 448 extern char *mbunconvert(wchar_t *); 449 extern wchar_t *mbstowcsdup(char *); 450 extern char *wcstombsdup(wchar_t *); 451 extern void awkerr(char *, ...); 452 /* 453 * The following defines the expected max length in chars of a printed number. 454 * This should be the longest expected size for any type of number 455 * ie. float, long etc. This number is used to calculate the approximate 456 * number of chars needed to hold the number. 457 */ 458 #ifdef M_NUMSIZE 459 #define NUMSIZE M_NUMSIZE 460 #else 461 #define NUMSIZE 30 462 #endif 463 464 #define M_MB_L(s) L##s 465 #ifdef __STDC__ 466 #define ANSI(x) x 467 #define _VOID void /* Used in VOID *malloc() */ 468 #else 469 #define const 470 #define signed 471 #define volatile 472 #define ANSI(x) () 473 #define _VOID char /* Used in _VOID *malloc() */ 474 #endif 475 476 #define isWblank(x) (((x) == ' ' || (x) == '\t') ? 1 : 0 ) 477 478 479 /* 480 * Wide character version of regular expression functions. 481 */ 482 #define REGWMATCH_T int_regwmatch_t 483 #define REGWCOMP int_regwcomp 484 #define REGWEXEC int_regwexec 485 #define REGWDOSUBA int_regwdosuba 486 487 typedef struct { 488 const wchar_t *rm_sp, *rm_ep; 489 regoff_t rm_so, rm_eo; 490 } int_regwmatch_t; 491 492 extern int int_regwcomp(regex_t *, const wchar_t *, int); 493 extern int int_regwexec(const regex_t *, const wchar_t *, size_t, 494 int_regwmatch_t *, int); 495 extern int int_regwdosuba(regex_t *, const wchar_t *, 496 const wchar_t *, wchar_t **, int, int *); 497