1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * awk -- common header file. 28 * 29 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved. 30 * 31 * This version uses the POSIX.2 compatible <regex.h> routines. 32 * 33 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes 34 * 35 */ 36 37 #pragma ident "%Z%%M% %I% %E% SMI" 38 39 #include <stdio.h> 40 #include <ctype.h> 41 #include <string.h> 42 #include <math.h> 43 #include <limits.h> 44 #include <stdlib.h> 45 #include <regex.h> 46 #include <errno.h> 47 #include <sys/types.h> 48 #include <locale.h> 49 #include <wchar.h> 50 #include <widec.h> 51 52 #define YYMAXDEPTH 300 /* Max # of productions (used by yacc) */ 53 #define YYSSIZE 300 /* Size of State/Value stacks (MKS YACC) */ 54 #define MAXDIGINT 19 /* Number of digits in an INT */ 55 #define FNULL ((FILE *)0) 56 #define NNULL ((NODE *)0) 57 #define SNULL ((STRING)0) 58 #define LARGE INT_MAX /* Large integer */ 59 #define NPFILE 32 /* Number of -[fl] options allowed */ 60 #define NRECUR 3000 /* Maximum recursion depth */ 61 62 #define M_LDATA 1 63 #ifdef M_LDATA 64 #define NLINE 20000 /* Longest input record */ 65 #define NFIELD 4000 /* Number of fields allowed */ 66 #define NBUCKET 1024 /* # of symtab buckets (power of 2) */ 67 #else 68 #define NLINE 2048 /* Longest input record */ 69 #define NFIELD 1024 /* Number of fields allowed */ 70 #define NBUCKET 256 /* # of symtab buckets (power of 2) */ 71 #endif 72 73 #define NSNODE 40 /* Number of cached nodes */ 74 #define NCONTEXT 50 /* Amount of context for error msgs */ 75 #define hashbuck(n) ((n)&(NBUCKET-1)) 76 #if BSD 77 /* 78 * A speedup for BSD. Use their routines which are 79 * already optimised. Note that BSD bcopy does not 80 * return a value. 81 */ 82 int bcmp(); 83 #define memcmp(b1, b2, n) bcmp(b1, b2, n) 84 void bcopy(); 85 #define memcpy(b1, b2, n) bcopy(b2, b1, (int)n) 86 #endif /* BSD */ 87 #define vlook(n) vlookup(n, 0) 88 89 /* 90 * Basic AWK internal types. 91 */ 92 typedef double REAL; 93 typedef long long INT; 94 typedef wchar_t *STRING; 95 typedef struct NODE *(*FUNCTION)(struct NODE *np); 96 typedef void *REGEXP; 97 98 /* 99 * Node in the AWK interpreter expression tree. 100 */ 101 typedef struct NODE { 102 ushort_t n_type; 103 struct NODE *n_next; /* Symbol table/PARM link */ 104 ushort_t n_flags; /* Node flags, type */ 105 106 107 108 109 union { 110 struct { 111 ushort_t N_hash; /* Full hash value */ 112 struct NODE *N_alink; /* Array link */ 113 union { 114 struct { 115 STRING N_string; 116 size_t N_strlen; 117 } n_str; 118 INT N_int; 119 REAL N_real; 120 FUNCTION N_function; 121 struct NODE *N_ufunc; 122 } n_tun; 123 wchar_t N_name[1]; 124 } n_term; 125 struct { 126 struct NODE *N_left; 127 struct NODE *N_right; 128 ushort_t N_lineno; 129 } n_op; 130 struct { 131 struct NODE *N_left; /* Used for fliplist */ 132 struct NODE *N_right; 133 REGEXP N_regexp; /* Regular expression */ 134 } n_re; 135 } n_un; 136 } NODE; 137 138 /* 139 * Definitions to make the node access much easier. 140 */ 141 #define n_hash n_un.n_term.N_hash /* full hash value is sym tbl */ 142 #define n_scope n_un.n_term.N_hash /* local variable scope level */ 143 #define n_alink n_un.n_term.N_alink /* link to array list */ 144 #define n_string n_un.n_term.n_tun.n_str.N_string 145 #define n_strlen n_un.n_term.n_tun.n_str.N_strlen 146 #define n_int n_un.n_term.n_tun.N_int 147 #define n_real n_un.n_term.n_tun.N_real 148 #define n_function n_un.n_term.n_tun.N_function 149 #define n_ufunc n_un.n_term.n_tun.N_ufunc 150 #define n_name n_un.n_term.N_name 151 #define n_left n_un.n_op.N_left 152 #define n_right n_un.n_op.N_right 153 #define n_lineno n_un.n_op.N_lineno 154 #define n_keywtype n_un.n_op.N_lineno 155 #define n_regexp n_un.n_re.N_regexp 156 /* 157 * Compress the types that are actually used in the final tree 158 * to save space in the intermediate file. Allows 1 byte to 159 * represent all types 160 */ 161 162 163 164 165 166 167 168 /* 169 * n_flags bit assignments. 170 */ 171 #define FALLOC 0x01 /* Allocated node */ 172 #define FSTATIC 0x00 /* Not allocated */ 173 #define FMATCH 0x02 /* pattern,pattern (first part matches) */ 174 #define FSPECIAL 0x04 /* Special pre-computed variable */ 175 #define FINARRAY 0x08 /* NODE installed in N_alink array list */ 176 #define FNOALLOC 0x10 /* mark node FALLOC, but don't malloc */ 177 #define FSENSE 0x20 /* Sense if string looks like INT/REAL */ 178 #define FSAVE (FSPECIAL|FINARRAY) /* assign leaves on */ 179 180 #define FINT 0x40 /* Node has integer type */ 181 #define FREAL 0x80 /* Node has real type */ 182 #define FSTRING 0x100 /* Node has string type */ 183 #define FNONTOK 0x200 /* Node has non-token type */ 184 #define FVINT 0x400 /* Node looks like an integer */ 185 #define FVREAL 0x800 /* Node looks like a real number */ 186 #define FLARRAY 0x1000 /* Local array node */ 187 188 /* 189 * n_flags macros 190 * These work when given an argument of np->n_flags 191 */ 192 #define isleaf(f) (!((f)&FNONTOK)) 193 #define isstring(f) ((f)&FSTRING) 194 #define isastring(f) (((f)&(FSTRING|FALLOC)) == (FSTRING|FALLOC)) 195 #define isnumber(f) ((f)&(FINT|FVINT|FREAL|FVREAL)) 196 #define isreal(f) ((f)&(FREAL|FVREAL)) 197 #define isint(f) ((f)&(FINT|FVINT)) 198 199 /* 200 * Prototype file size is defined in awksize.h 201 */ 202 203 204 205 206 207 /* 208 * Awkrun prototype default name 209 */ 210 #if defined(DOS) 211 #if defined(__386__) 212 #define AWK_PROTOTYPE M_ETCDIR(awkrunf.dos) 213 #define AWK_LPROTOTYPE M_ETCDIR(awkrunf.dos) 214 #else 215 #define AWK_PROTOTYPE M_ETCDIR(awkrun.dos) 216 #define AWK_LPROTOTYPE M_ETCDIR(awkrunl.dos) 217 #endif 218 #elif defined(OS2) 219 #define AWK_PROTOTYPE M_ETCDIR(awkrun.os2) 220 #elif defined(NT) 221 #define AWK_PROTOTYPE M_ETCDIR(awkrun.nt) 222 #else 223 #define AWK_PROTOTYPE M_ETCDIR(awkrun.mod) 224 #endif 225 226 /* 227 * This is a kludge that gets around a bug in compact & large 228 * models under DOS. It also makes the generated 229 * code faster even if there wasn't a bug. UNIX people: try 230 * to ignore these noisy "near" declarations. 231 */ 232 #ifndef DOS 233 #define near 234 #endif 235 236 typedef wchar_t near *LOCCHARP; /* pointer to local strings */ 237 /* 238 * Form of builtin symbols 239 * This should be a union because only one of r_ivalue 240 * and r_svalue is needed, but (alas) unions cannot be 241 * initialised. 242 */ 243 typedef struct RESERVED { 244 LOCCHARP r_name; 245 int r_type; /* Type of node */ 246 INT r_ivalue; /* Integer value or wcslen(r_svalue) */ 247 STRING r_svalue; /* String value */ 248 } RESERVED; 249 250 /* 251 * Table of builtin functions. 252 */ 253 typedef struct RESFUNC { 254 LOCCHARP rf_name; 255 int rf_type; /* FUNC || GETLINE */ 256 FUNCTION rf_func; /* Function pointer */ 257 } RESFUNC; 258 259 /* 260 * Structure holding list of open files. 261 */ 262 typedef struct OFILE { 263 ushort_t f_mode; /* Open mode: WRITE, APPEND, PIPE */ 264 FILE *f_fp; /* File pointer if open */ 265 char *f_name; /* Remembered file name */ 266 } OFILE; 267 268 /* Global functions -- awk.y */ 269 int yyparse(void); 270 271 /* Global functions -- awk1.c */ 272 #ifdef __WATCOMC__ 273 #pragma aux yyerror aborts; 274 #pragma aux awkerr aborts; 275 #pragma aux awkperr aborts; 276 #endif 277 void yyerror(char *msg, ...); 278 void awkerr(char *fmt, ...); 279 void awkperr(char *fmt, ...); 280 void uexit(NODE *); 281 int yylex(void); 282 NODE *renode(wchar_t *restr); 283 wchar_t *emalloc(unsigned); 284 wchar_t *erealloc(wchar_t *, unsigned); 285 286 /* Global functions -- awk2.c */ 287 void awk(void); 288 void dobegin(void); 289 void doend(int status); 290 int nextrecord(wchar_t *buf, FILE *fp); 291 wchar_t *defrecord(wchar_t *bp, int lim, FILE *fp); 292 wchar_t *charrecord(wchar_t *bp, int lim, FILE *fp); 293 wchar_t *multirecord(wchar_t *bp, int lim, FILE *fp); 294 wchar_t *whitefield(wchar_t **endp); 295 wchar_t *blackfield(wchar_t **endp); 296 wchar_t *refield(wchar_t **endp); 297 void s_print(NODE *np); 298 void s_prf(NODE *np); 299 size_t xprintf(NODE *np, FILE *fp, wchar_t **cp); 300 void awkclose(OFILE *op); 301 302 /* Global functions -- awk3.c */ 303 void strassign(NODE *np, STRING string, int flags, size_t length); 304 NODE *nassign(NODE *np, NODE *value); 305 NODE *assign(NODE *np, NODE *value); 306 void delarray(NODE *np); 307 NODE *node(int type, NODE *left, NODE *right); 308 NODE *intnode(INT i); 309 NODE *realnode(REAL r); 310 NODE *stringnode(STRING str, int aflag, size_t wcslen); 311 NODE *vlookup(wchar_t *name, int nocreate); 312 NODE *emptynode(int type, size_t nlength); 313 void freenode(NODE *np); 314 void execute(NODE *np); 315 INT exprint(NODE *np); 316 REAL exprreal(NODE *np); 317 STRING exprstring(NODE *np); 318 STRING strsave(wchar_t *string); 319 NODE *exprreduce(NODE *np); 320 NODE *getlist(NODE **npp); 321 NODE *symwalk(int *buckp, NODE **npp); 322 REGEXP getregexp(NODE *np); 323 void addsymtab(NODE *np); 324 void delsymtab(NODE *np, int fflag); 325 NODE * finstall(LOCCHARP name, FUNCTION f, int type); 326 void kinstall(LOCCHARP name, int type); 327 void fieldsplit(void); 328 void promote(NODE *); 329 330 331 332 333 334 335 336 /* Global functions -- awk4.c */ 337 NODE *f_exp(NODE *np); 338 NODE *f_int(NODE *np); 339 NODE *f_log(NODE *np); 340 NODE *f_sqrt(NODE *np); 341 NODE *f_getline(NODE *np); 342 NODE *f_index(NODE *np); 343 NODE *f_length(NODE *np); 344 NODE *f_split(NODE *np); 345 NODE *f_sprintf(NODE *np); 346 NODE *f_substr(NODE *np); 347 NODE *f_rand(NODE *np); 348 NODE *f_srand(NODE *np); 349 NODE *f_sin(NODE *np); 350 NODE *f_cos(NODE *np); 351 NODE *f_atan2(NODE *np); 352 NODE *f_sub(NODE *np); 353 NODE *f_gsub(NODE *np); 354 NODE *f_match(NODE *np); 355 NODE *f_system(NODE *np); 356 NODE *f_ord(NODE *np); 357 NODE *f_tolower(NODE *np); 358 NODE *f_toupper(NODE *np); 359 NODE *f_close(NODE *np); 360 NODE *f_asort(NODE *np); 361 362 /* In awk0.c */ 363 364 365 366 extern wchar_t _null[]; 367 extern char r[]; 368 extern char w[]; 369 extern wchar_t s_OFMT[]; 370 extern wchar_t s_CONVFMT[]; 371 extern wchar_t s_NR[]; 372 extern wchar_t s_NF[]; 373 extern wchar_t s_OFS[]; 374 extern wchar_t s_ORS[]; 375 extern wchar_t s_RS[]; 376 extern wchar_t s_FS[]; 377 extern wchar_t s_FNR[]; 378 extern wchar_t s_SUBSEP[]; 379 extern wchar_t s_ARGC[], s_ARGV[], s_ENVIRON[]; 380 extern wchar_t s_FILENAME[], s_SYMTAB[]; 381 extern wchar_t s_BEGIN[], s_END[], s_next[]; 382 extern wchar_t _begin[], _end[]; 383 extern wchar_t s_exp[], s_getline[], s_index[], s_int[], s_length[], s_log[]; 384 extern wchar_t s_split[], s_sprintf[], s_sqrt[], s_substr[]; 385 extern wchar_t s_rand[], s_srand[], s_sin[], s_cos[], s_atan2[]; 386 extern wchar_t s_sub[], s_gsub[], s_match[], s_system[], s_ord[]; 387 extern wchar_t s_toupper[], s_tolower[], s_asort[]; 388 extern wchar_t s_close[]; 389 extern wchar_t redelim; 390 extern unsigned char inprint; 391 extern unsigned char funparm; 392 extern unsigned char splitdone; 393 extern uint_t npattern; 394 extern uint_t nfield; 395 extern uint_t fcount; 396 extern uint_t phase; 397 extern uint_t running; 398 extern uchar_t catterm; 399 extern uint_t lexlast; 400 extern uint_t lineno; 401 extern uchar_t needsplit, needenviron, doing_begin, begin_getline; 402 extern ushort_t slevel; 403 extern ushort_t loopexit; 404 extern wchar_t radixpoint; 405 extern REGEXP resep; 406 extern RESERVED reserved[]; 407 extern RESFUNC resfuncs[]; 408 extern long NIOSTREAM; /* Maximum open I/O streams */ 409 extern OFILE *ofiles; 410 extern wchar_t *linebuf; 411 extern size_t lbuflen; 412 extern char interr[]; 413 extern char nomem[]; 414 extern NODE *symtab[NBUCKET]; 415 extern NODE *yytree; 416 extern NODE *freelist; 417 extern wchar_t *(*awkrecord)(wchar_t *, int, FILE *); 418 extern wchar_t *(*awkfield)(wchar_t **); 419 420 extern NODE *constant; 421 extern NODE *const0; 422 extern NODE *const1; 423 extern NODE *constundef; 424 extern NODE *field0; 425 extern NODE *incNR; 426 extern NODE *incFNR; 427 extern NODE *clrFNR; 428 extern NODE *ARGVsubi; 429 extern NODE *varNR; 430 extern NODE *varFNR; 431 extern NODE *varNF; 432 extern NODE *varOFMT; 433 extern NODE *varCONVFMT; 434 extern NODE *varOFS; 435 extern NODE *varORS; 436 extern NODE *varFS; 437 extern NODE *varRS; 438 extern NODE *varARGC; 439 extern NODE *varSUBSEP; 440 extern NODE *varENVIRON; 441 extern NODE *varSYMTAB; 442 extern NODE *varFILENAME; 443 extern NODE *curnode; 444 extern NODE *inc_oper; 445 extern NODE *asn_oper; 446 447 extern char *mbunconvert(wchar_t *); 448 extern wchar_t *mbstowcsdup(char *); 449 extern char *wcstombsdup(wchar_t *); 450 extern void awkerr(char *, ...); 451 /* 452 * The following defines the expected max length in chars of a printed number. 453 * This should be the longest expected size for any type of number 454 * ie. float, long etc. This number is used to calculate the approximate 455 * number of chars needed to hold the number. 456 */ 457 #ifdef M_NUMSIZE 458 #define NUMSIZE M_NUMSIZE 459 #else 460 #define NUMSIZE 30 461 #endif 462 463 #define M_MB_L(s) L##s 464 #ifdef __STDC__ 465 #define ANSI(x) x 466 #else 467 #define const 468 #define signed 469 #define volatile 470 #define ANSI(x) () 471 #endif 472 473 #define isWblank(x) (((x) == ' ' || (x) == '\t') ? 1 : 0) 474 475 476 /* 477 * Wide character version of regular expression functions. 478 */ 479 #define REGWMATCH_T int_regwmatch_t 480 #define REGWCOMP int_regwcomp 481 #define REGWEXEC int_regwexec 482 #define REGWFREE int_regwfree 483 #define REGWERROR int_regwerror 484 #define REGWDOSUBA int_regwdosuba 485 486 typedef struct { 487 const wchar_t *rm_sp, *rm_ep; 488 regoff_t rm_so, rm_eo; 489 } int_regwmatch_t; 490 491 extern int int_regwcomp(REGEXP *, const wchar_t *); 492 extern int int_regwexec(REGEXP, const wchar_t *, size_t, 493 int_regwmatch_t *, int); 494 extern void int_regwfree(REGEXP); 495 extern size_t int_regwerror(int, REGEXP, char *, size_t); 496 extern int int_regwdosuba(REGEXP, const wchar_t *, 497 const wchar_t *, wchar_t **, int, int *); 498