1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * awk -- common header file. 28 * 29 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved. 30 * 31 * This version uses the POSIX.2 compatible <regex.h> routines. 32 * 33 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes 34 * 35 */ 36 37 #include <stdio.h> 38 #include <ctype.h> 39 #include <string.h> 40 #include <math.h> 41 #include <limits.h> 42 #include <stdlib.h> 43 #include <regex.h> 44 #include <errno.h> 45 #include <sys/types.h> 46 #include <locale.h> 47 #include <wchar.h> 48 #include <widec.h> 49 50 #define YYMAXDEPTH 300 /* Max # of productions (used by yacc) */ 51 #define YYSSIZE 300 /* Size of State/Value stacks (MKS YACC) */ 52 #define MAXDIGINT 19 /* Number of digits in an INT */ 53 #define FNULL ((FILE *)0) 54 #define NNULL ((NODE *)0) 55 #define SNULL ((STRING)0) 56 #define LARGE INT_MAX /* Large integer */ 57 #define NPFILE 32 /* Number of -[fl] options allowed */ 58 #define NRECUR 3000 /* Maximum recursion depth */ 59 60 #define M_LDATA 1 61 #ifdef M_LDATA 62 #define NLINE 20000 /* Longest input record */ 63 #define NFIELD 4000 /* Number of fields allowed */ 64 #define NBUCKET 1024 /* # of symtab buckets (power of 2) */ 65 #else 66 #define NLINE 2048 /* Longest input record */ 67 #define NFIELD 1024 /* Number of fields allowed */ 68 #define NBUCKET 256 /* # of symtab buckets (power of 2) */ 69 #endif 70 71 #define NSNODE 40 /* Number of cached nodes */ 72 #define NCONTEXT 50 /* Amount of context for error msgs */ 73 #define hashbuck(n) ((n)&(NBUCKET-1)) 74 #if BSD 75 /* 76 * A speedup for BSD. Use their routines which are 77 * already optimised. Note that BSD bcopy does not 78 * return a value. 79 */ 80 int bcmp(); 81 #define memcmp(b1, b2, n) bcmp(b1, b2, n) 82 void bcopy(); 83 #define memcpy(b1, b2, n) bcopy(b2, b1, (int)n) 84 #endif /* BSD */ 85 #define vlook(n) vlookup(n, 0) 86 87 /* 88 * Basic AWK internal types. 89 */ 90 typedef double REAL; 91 typedef long long INT; 92 typedef wchar_t *STRING; 93 typedef struct NODE *(*FUNCTION)(struct NODE *np); 94 typedef void *REGEXP; 95 96 /* 97 * Node in the AWK interpreter expression tree. 98 */ 99 typedef struct NODE { 100 ushort_t n_type; 101 struct NODE *n_next; /* Symbol table/PARM link */ 102 ushort_t n_flags; /* Node flags, type */ 103 104 105 106 107 union { 108 struct { 109 ushort_t N_hash; /* Full hash value */ 110 struct NODE *N_alink; /* Array link */ 111 union { 112 struct { 113 STRING N_string; 114 size_t N_strlen; 115 } n_str; 116 INT N_int; 117 REAL N_real; 118 FUNCTION N_function; 119 struct NODE *N_ufunc; 120 } n_tun; 121 wchar_t N_name[1]; 122 } n_term; 123 struct { 124 struct NODE *N_left; 125 struct NODE *N_right; 126 ushort_t N_lineno; 127 } n_op; 128 struct { 129 struct NODE *N_left; /* Used for fliplist */ 130 struct NODE *N_right; 131 REGEXP N_regexp; /* Regular expression */ 132 } n_re; 133 } n_un; 134 } NODE; 135 136 /* 137 * Definitions to make the node access much easier. 138 */ 139 #define n_hash n_un.n_term.N_hash /* full hash value is sym tbl */ 140 #define n_scope n_un.n_term.N_hash /* local variable scope level */ 141 #define n_alink n_un.n_term.N_alink /* link to array list */ 142 #define n_string n_un.n_term.n_tun.n_str.N_string 143 #define n_strlen n_un.n_term.n_tun.n_str.N_strlen 144 #define n_int n_un.n_term.n_tun.N_int 145 #define n_real n_un.n_term.n_tun.N_real 146 #define n_function n_un.n_term.n_tun.N_function 147 #define n_ufunc n_un.n_term.n_tun.N_ufunc 148 #define n_name n_un.n_term.N_name 149 #define n_left n_un.n_op.N_left 150 #define n_right n_un.n_op.N_right 151 #define n_lineno n_un.n_op.N_lineno 152 #define n_keywtype n_un.n_op.N_lineno 153 #define n_regexp n_un.n_re.N_regexp 154 /* 155 * Compress the types that are actually used in the final tree 156 * to save space in the intermediate file. Allows 1 byte to 157 * represent all types 158 */ 159 160 161 162 163 164 165 166 /* 167 * n_flags bit assignments. 168 */ 169 #define FALLOC 0x01 /* Allocated node */ 170 #define FSTATIC 0x00 /* Not allocated */ 171 #define FMATCH 0x02 /* pattern,pattern (first part matches) */ 172 #define FSPECIAL 0x04 /* Special pre-computed variable */ 173 #define FINARRAY 0x08 /* NODE installed in N_alink array list */ 174 #define FNOALLOC 0x10 /* mark node FALLOC, but don't malloc */ 175 #define FSENSE 0x20 /* Sense if string looks like INT/REAL */ 176 #define FSAVE (FSPECIAL|FINARRAY) /* assign leaves on */ 177 178 #define FINT 0x40 /* Node has integer type */ 179 #define FREAL 0x80 /* Node has real type */ 180 #define FSTRING 0x100 /* Node has string type */ 181 #define FNONTOK 0x200 /* Node has non-token type */ 182 #define FVINT 0x400 /* Node looks like an integer */ 183 #define FVREAL 0x800 /* Node looks like a real number */ 184 #define FLARRAY 0x1000 /* Local array node */ 185 186 /* 187 * n_flags macros 188 * These work when given an argument of np->n_flags 189 */ 190 #define isleaf(f) (!((f)&FNONTOK)) 191 #define isstring(f) ((f)&FSTRING) 192 #define isastring(f) (((f)&(FSTRING|FALLOC)) == (FSTRING|FALLOC)) 193 #define isnumber(f) ((f)&(FINT|FVINT|FREAL|FVREAL)) 194 #define isreal(f) ((f)&(FREAL|FVREAL)) 195 #define isint(f) ((f)&(FINT|FVINT)) 196 197 /* 198 * Prototype file size is defined in awksize.h 199 */ 200 201 202 203 204 205 /* 206 * Awkrun prototype default name 207 */ 208 #if defined(DOS) 209 #if defined(__386__) 210 #define AWK_PROTOTYPE M_ETCDIR(awkrunf.dos) 211 #define AWK_LPROTOTYPE M_ETCDIR(awkrunf.dos) 212 #else 213 #define AWK_PROTOTYPE M_ETCDIR(awkrun.dos) 214 #define AWK_LPROTOTYPE M_ETCDIR(awkrunl.dos) 215 #endif 216 #elif defined(OS2) 217 #define AWK_PROTOTYPE M_ETCDIR(awkrun.os2) 218 #elif defined(NT) 219 #define AWK_PROTOTYPE M_ETCDIR(awkrun.nt) 220 #else 221 #define AWK_PROTOTYPE M_ETCDIR(awkrun.mod) 222 #endif 223 224 /* 225 * This is a kludge that gets around a bug in compact & large 226 * models under DOS. It also makes the generated 227 * code faster even if there wasn't a bug. UNIX people: try 228 * to ignore these noisy "near" declarations. 229 */ 230 #ifndef DOS 231 #define near 232 #endif 233 234 typedef wchar_t near *LOCCHARP; /* pointer to local strings */ 235 /* 236 * Form of builtin symbols 237 * This should be a union because only one of r_ivalue 238 * and r_svalue is needed, but (alas) unions cannot be 239 * initialised. 240 */ 241 typedef struct RESERVED { 242 LOCCHARP r_name; 243 int r_type; /* Type of node */ 244 INT r_ivalue; /* Integer value or wcslen(r_svalue) */ 245 STRING r_svalue; /* String value */ 246 } RESERVED; 247 248 /* 249 * Table of builtin functions. 250 */ 251 typedef struct RESFUNC { 252 LOCCHARP rf_name; 253 int rf_type; /* FUNC || GETLINE */ 254 FUNCTION rf_func; /* Function pointer */ 255 } RESFUNC; 256 257 /* 258 * Structure holding list of open files. 259 */ 260 typedef struct OFILE { 261 ushort_t f_mode; /* Open mode: WRITE, APPEND, PIPE */ 262 FILE *f_fp; /* File pointer if open */ 263 char *f_name; /* Remembered file name */ 264 } OFILE; 265 266 /* Global functions -- awk.y */ 267 int yyparse(void); 268 269 /* Global functions -- awk1.c */ 270 #ifdef __WATCOMC__ 271 #pragma aux yyerror aborts; 272 #pragma aux awkerr aborts; 273 #pragma aux awkperr aborts; 274 #endif 275 void yyerror(char *msg, ...); 276 void awkerr(char *fmt, ...) __NORETURN; 277 void awkperr(char *fmt, ...); 278 void uexit(NODE *); 279 int yylex(void); 280 NODE *renode(wchar_t *restr); 281 wchar_t *emalloc(unsigned); 282 wchar_t *erealloc(wchar_t *, unsigned); 283 284 /* Global functions -- awk2.c */ 285 void awk(void); 286 void dobegin(void); 287 void doend(int status) __NORETURN; 288 int nextrecord(wchar_t *buf, FILE *fp); 289 wchar_t *defrecord(wchar_t *bp, int lim, FILE *fp); 290 wchar_t *charrecord(wchar_t *bp, int lim, FILE *fp); 291 wchar_t *multirecord(wchar_t *bp, int lim, FILE *fp); 292 wchar_t *whitefield(wchar_t **endp); 293 wchar_t *blackfield(wchar_t **endp); 294 wchar_t *refield(wchar_t **endp); 295 void s_print(NODE *np); 296 void s_prf(NODE *np); 297 size_t xprintf(NODE *np, FILE *fp, wchar_t **cp); 298 void awkclose(OFILE *op); 299 300 /* Global functions -- awk3.c */ 301 void strassign(NODE *np, STRING string, int flags, size_t length); 302 NODE *nassign(NODE *np, NODE *value); 303 NODE *assign(NODE *np, NODE *value); 304 void delarray(NODE *np); 305 NODE *node(int type, NODE *left, NODE *right); 306 NODE *intnode(INT i); 307 NODE *realnode(REAL r); 308 NODE *stringnode(STRING str, int aflag, size_t wcslen); 309 NODE *vlookup(wchar_t *name, int nocreate); 310 NODE *emptynode(int type, size_t nlength); 311 void freenode(NODE *np); 312 void execute(NODE *np); 313 INT exprint(NODE *np); 314 REAL exprreal(NODE *np); 315 STRING exprstring(NODE *np); 316 STRING strsave(wchar_t *string); 317 NODE *exprreduce(NODE *np); 318 NODE *getlist(NODE **npp); 319 NODE *symwalk(int *buckp, NODE **npp); 320 REGEXP getregexp(NODE *np); 321 void addsymtab(NODE *np); 322 void delsymtab(NODE *np, int fflag); 323 NODE * finstall(LOCCHARP name, FUNCTION f, int type); 324 void kinstall(LOCCHARP name, int type); 325 void fieldsplit(void); 326 void promote(NODE *); 327 328 329 330 331 332 333 334 /* Global functions -- awk4.c */ 335 NODE *f_exp(NODE *np); 336 NODE *f_int(NODE *np); 337 NODE *f_log(NODE *np); 338 NODE *f_sqrt(NODE *np); 339 NODE *f_getline(NODE *np); 340 NODE *f_index(NODE *np); 341 NODE *f_length(NODE *np); 342 NODE *f_split(NODE *np); 343 NODE *f_sprintf(NODE *np); 344 NODE *f_substr(NODE *np); 345 NODE *f_rand(NODE *np); 346 NODE *f_srand(NODE *np); 347 NODE *f_sin(NODE *np); 348 NODE *f_cos(NODE *np); 349 NODE *f_atan2(NODE *np); 350 NODE *f_sub(NODE *np); 351 NODE *f_gsub(NODE *np); 352 NODE *f_match(NODE *np); 353 NODE *f_system(NODE *np); 354 NODE *f_ord(NODE *np); 355 NODE *f_tolower(NODE *np); 356 NODE *f_toupper(NODE *np); 357 NODE *f_close(NODE *np); 358 NODE *f_asort(NODE *np); 359 360 /* In awk0.c */ 361 362 363 364 extern wchar_t _null[]; 365 extern char r[]; 366 extern char w[]; 367 extern wchar_t s_OFMT[]; 368 extern wchar_t s_CONVFMT[]; 369 extern wchar_t s_NR[]; 370 extern wchar_t s_NF[]; 371 extern wchar_t s_OFS[]; 372 extern wchar_t s_ORS[]; 373 extern wchar_t s_RS[]; 374 extern wchar_t s_FS[]; 375 extern wchar_t s_FNR[]; 376 extern wchar_t s_SUBSEP[]; 377 extern wchar_t s_ARGC[], s_ARGV[], s_ENVIRON[]; 378 extern wchar_t s_FILENAME[], s_SYMTAB[]; 379 extern wchar_t s_BEGIN[], s_END[], s_next[]; 380 extern wchar_t _begin[], _end[]; 381 extern wchar_t s_exp[], s_getline[], s_index[], s_int[], s_length[], s_log[]; 382 extern wchar_t s_split[], s_sprintf[], s_sqrt[], s_substr[]; 383 extern wchar_t s_rand[], s_srand[], s_sin[], s_cos[], s_atan2[]; 384 extern wchar_t s_sub[], s_gsub[], s_match[], s_system[], s_ord[]; 385 extern wchar_t s_toupper[], s_tolower[], s_asort[]; 386 extern wchar_t s_close[]; 387 extern wchar_t redelim; 388 extern unsigned char inprint; 389 extern unsigned char funparm; 390 extern unsigned char splitdone; 391 extern uint_t npattern; 392 extern uint_t nfield; 393 extern uint_t fcount; 394 extern uint_t phase; 395 extern uint_t running; 396 extern uchar_t catterm; 397 extern uint_t lexlast; 398 extern uint_t lineno; 399 extern uchar_t needsplit, needenviron, doing_begin, begin_getline; 400 extern ushort_t slevel; 401 extern ushort_t loopexit; 402 extern wchar_t radixpoint; 403 extern REGEXP resep; 404 extern RESERVED reserved[]; 405 extern RESFUNC resfuncs[]; 406 extern long NIOSTREAM; /* Maximum open I/O streams */ 407 extern OFILE *ofiles; 408 extern wchar_t *linebuf; 409 extern size_t lbuflen; 410 extern char interr[]; 411 extern char nomem[]; 412 extern NODE *symtab[NBUCKET]; 413 extern NODE *yytree; 414 extern NODE *freelist; 415 extern wchar_t *(*awkrecord)(wchar_t *, int, FILE *); 416 extern wchar_t *(*awkfield)(wchar_t **); 417 418 extern NODE *constant; 419 extern NODE *const0; 420 extern NODE *const1; 421 extern NODE *constundef; 422 extern NODE *field0; 423 extern NODE *incNR; 424 extern NODE *incFNR; 425 extern NODE *clrFNR; 426 extern NODE *ARGVsubi; 427 extern NODE *varNR; 428 extern NODE *varFNR; 429 extern NODE *varNF; 430 extern NODE *varOFMT; 431 extern NODE *varCONVFMT; 432 extern NODE *varOFS; 433 extern NODE *varORS; 434 extern NODE *varFS; 435 extern NODE *varRS; 436 extern NODE *varARGC; 437 extern NODE *varSUBSEP; 438 extern NODE *varENVIRON; 439 extern NODE *varSYMTAB; 440 extern NODE *varFILENAME; 441 extern NODE *curnode; 442 extern NODE *inc_oper; 443 extern NODE *asn_oper; 444 445 extern char *mbunconvert(wchar_t *); 446 extern wchar_t *mbstowcsdup(char *); 447 extern char *wcstombsdup(wchar_t *); 448 extern void awkerr(char *, ...); 449 /* 450 * The following defines the expected max length in chars of a printed number. 451 * This should be the longest expected size for any type of number 452 * ie. float, long etc. This number is used to calculate the approximate 453 * number of chars needed to hold the number. 454 */ 455 #ifdef M_NUMSIZE 456 #define NUMSIZE M_NUMSIZE 457 #else 458 #define NUMSIZE 30 459 #endif 460 461 #define M_MB_L(s) L##s 462 #ifdef __STDC__ 463 #define ANSI(x) x 464 #else 465 #define const 466 #define signed 467 #define volatile 468 #define ANSI(x) () 469 #endif 470 471 #define isWblank(x) (((x) == ' ' || (x) == '\t') ? 1 : 0) 472 473 474 /* 475 * Wide character version of regular expression functions. 476 */ 477 #define REGWMATCH_T int_regwmatch_t 478 #define REGWCOMP int_regwcomp 479 #define REGWEXEC int_regwexec 480 #define REGWFREE int_regwfree 481 #define REGWERROR int_regwerror 482 #define REGWDOSUBA int_regwdosuba 483 484 typedef struct { 485 const wchar_t *rm_sp, *rm_ep; 486 regoff_t rm_so, rm_eo; 487 } int_regwmatch_t; 488 489 extern int int_regwcomp(REGEXP *, const wchar_t *); 490 extern int int_regwexec(REGEXP, const wchar_t *, size_t, 491 int_regwmatch_t *, int); 492 extern void int_regwfree(REGEXP); 493 extern size_t int_regwerror(int, REGEXP, char *, size_t); 494 extern int int_regwdosuba(REGEXP, const wchar_t *, 495 const wchar_t *, wchar_t **, int, int *); 496