1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * awk -- common header file. 28 * 29 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved. 30 * 31 * This version uses the POSIX.2 compatible <regex.h> routines. 32 * 33 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes 34 * 35 */ 36 37 #ifndef AWK_H 38 #define AWK_H 39 40 #include <stdio.h> 41 #include <ctype.h> 42 #include <string.h> 43 #include <math.h> 44 #include <limits.h> 45 #include <stdlib.h> 46 #include <regex.h> 47 #include <errno.h> 48 #include <sys/types.h> 49 #include <locale.h> 50 #include <wchar.h> 51 #include <widec.h> 52 53 #define YYMAXDEPTH 300 /* Max # of productions (used by yacc) */ 54 #define YYSSIZE 300 /* Size of State/Value stacks (MKS YACC) */ 55 #define MAXDIGINT 19 /* Number of digits in an INT */ 56 #define FNULL ((FILE *)0) 57 #define NNULL ((NODE *)0) 58 #define SNULL ((STRING)0) 59 #define LARGE INT_MAX /* Large integer */ 60 #define NPFILE 32 /* Number of -[fl] options allowed */ 61 #define NRECUR 3000 /* Maximum recursion depth */ 62 63 #define M_LDATA 1 64 #ifdef M_LDATA 65 #define NLINE 20000 /* Longest input record */ 66 #define NFIELD 4000 /* Number of fields allowed */ 67 #define NBUCKET 1024 /* # of symtab buckets (power of 2) */ 68 #else 69 #define NLINE 2048 /* Longest input record */ 70 #define NFIELD 1024 /* Number of fields allowed */ 71 #define NBUCKET 256 /* # of symtab buckets (power of 2) */ 72 #endif 73 74 #define NSNODE 40 /* Number of cached nodes */ 75 #define NCONTEXT 50 /* Amount of context for error msgs */ 76 #define hashbuck(n) ((n)&(NBUCKET-1)) 77 #if BSD 78 /* 79 * A speedup for BSD. Use their routines which are 80 * already optimised. Note that BSD bcopy does not 81 * return a value. 82 */ 83 int bcmp(); 84 #define memcmp(b1, b2, n) bcmp(b1, b2, n) 85 void bcopy(); 86 #define memcpy(b1, b2, n) bcopy(b2, b1, (int)n) 87 #endif /* BSD */ 88 #define vlook(n) vlookup(n, 0) 89 90 /* 91 * Basic AWK internal types. 92 */ 93 typedef double REAL; 94 typedef long long INT; 95 typedef wchar_t *STRING; 96 typedef struct NODE *(*FUNCTION)(struct NODE *np); 97 typedef void *REGEXP; 98 99 /* 100 * Node in the AWK interpreter expression tree. 101 */ 102 typedef struct NODE { 103 ushort_t n_type; 104 struct NODE *n_next; /* Symbol table/PARM link */ 105 ushort_t n_flags; /* Node flags, type */ 106 107 108 109 110 union { 111 struct { 112 ushort_t N_hash; /* Full hash value */ 113 struct NODE *N_alink; /* Array link */ 114 union { 115 struct { 116 STRING N_string; 117 size_t N_strlen; 118 } n_str; 119 INT N_int; 120 REAL N_real; 121 FUNCTION N_function; 122 struct NODE *N_ufunc; 123 } n_tun; 124 wchar_t N_name[1]; 125 } n_term; 126 struct { 127 struct NODE *N_left; 128 struct NODE *N_right; 129 ushort_t N_lineno; 130 } n_op; 131 struct { 132 struct NODE *N_left; /* Used for fliplist */ 133 struct NODE *N_right; 134 REGEXP N_regexp; /* Regular expression */ 135 } n_re; 136 } n_un; 137 } NODE; 138 139 /* 140 * Definitions to make the node access much easier. 141 */ 142 #define n_hash n_un.n_term.N_hash /* full hash value is sym tbl */ 143 #define n_scope n_un.n_term.N_hash /* local variable scope level */ 144 #define n_alink n_un.n_term.N_alink /* link to array list */ 145 #define n_string n_un.n_term.n_tun.n_str.N_string 146 #define n_strlen n_un.n_term.n_tun.n_str.N_strlen 147 #define n_int n_un.n_term.n_tun.N_int 148 #define n_real n_un.n_term.n_tun.N_real 149 #define n_function n_un.n_term.n_tun.N_function 150 #define n_ufunc n_un.n_term.n_tun.N_ufunc 151 #define n_name n_un.n_term.N_name 152 #define n_left n_un.n_op.N_left 153 #define n_right n_un.n_op.N_right 154 #define n_lineno n_un.n_op.N_lineno 155 #define n_keywtype n_un.n_op.N_lineno 156 #define n_regexp n_un.n_re.N_regexp 157 /* 158 * Compress the types that are actually used in the final tree 159 * to save space in the intermediate file. Allows 1 byte to 160 * represent all types 161 */ 162 163 164 165 166 167 168 169 /* 170 * n_flags bit assignments. 171 */ 172 #define FALLOC 0x01 /* Allocated node */ 173 #define FSTATIC 0x00 /* Not allocated */ 174 #define FMATCH 0x02 /* pattern,pattern (first part matches) */ 175 #define FSPECIAL 0x04 /* Special pre-computed variable */ 176 #define FINARRAY 0x08 /* NODE installed in N_alink array list */ 177 #define FNOALLOC 0x10 /* mark node FALLOC, but don't malloc */ 178 #define FSENSE 0x20 /* Sense if string looks like INT/REAL */ 179 #define FSAVE (FSPECIAL|FINARRAY) /* assign leaves on */ 180 181 #define FINT 0x40 /* Node has integer type */ 182 #define FREAL 0x80 /* Node has real type */ 183 #define FSTRING 0x100 /* Node has string type */ 184 #define FNONTOK 0x200 /* Node has non-token type */ 185 #define FVINT 0x400 /* Node looks like an integer */ 186 #define FVREAL 0x800 /* Node looks like a real number */ 187 #define FLARRAY 0x1000 /* Local array node */ 188 189 /* 190 * n_flags macros 191 * These work when given an argument of np->n_flags 192 */ 193 #define isleaf(f) (!((f)&FNONTOK)) 194 #define isstring(f) ((f)&FSTRING) 195 #define isastring(f) (((f)&(FSTRING|FALLOC)) == (FSTRING|FALLOC)) 196 #define isnumber(f) ((f)&(FINT|FVINT|FREAL|FVREAL)) 197 #define isreal(f) ((f)&(FREAL|FVREAL)) 198 #define isint(f) ((f)&(FINT|FVINT)) 199 200 /* 201 * Prototype file size is defined in awksize.h 202 */ 203 204 205 206 207 208 /* 209 * Awkrun prototype default name 210 */ 211 #if defined(DOS) 212 #if defined(__386__) 213 #define AWK_PROTOTYPE M_ETCDIR(awkrunf.dos) 214 #define AWK_LPROTOTYPE M_ETCDIR(awkrunf.dos) 215 #else 216 #define AWK_PROTOTYPE M_ETCDIR(awkrun.dos) 217 #define AWK_LPROTOTYPE M_ETCDIR(awkrunl.dos) 218 #endif 219 #elif defined(OS2) 220 #define AWK_PROTOTYPE M_ETCDIR(awkrun.os2) 221 #elif defined(NT) 222 #define AWK_PROTOTYPE M_ETCDIR(awkrun.nt) 223 #else 224 #define AWK_PROTOTYPE M_ETCDIR(awkrun.mod) 225 #endif 226 227 /* 228 * This is a kludge that gets around a bug in compact & large 229 * models under DOS. It also makes the generated 230 * code faster even if there wasn't a bug. UNIX people: try 231 * to ignore these noisy "near" declarations. 232 */ 233 #ifndef DOS 234 #define near 235 #endif 236 237 typedef wchar_t near *LOCCHARP; /* pointer to local strings */ 238 /* 239 * Form of builtin symbols 240 * This should be a union because only one of r_ivalue 241 * and r_svalue is needed, but (alas) unions cannot be 242 * initialised. 243 */ 244 typedef struct RESERVED { 245 LOCCHARP r_name; 246 int r_type; /* Type of node */ 247 INT r_ivalue; /* Integer value or wcslen(r_svalue) */ 248 STRING r_svalue; /* String value */ 249 } RESERVED; 250 251 /* 252 * Table of builtin functions. 253 */ 254 typedef struct RESFUNC { 255 LOCCHARP rf_name; 256 int rf_type; /* FUNC || GETLINE */ 257 FUNCTION rf_func; /* Function pointer */ 258 } RESFUNC; 259 260 /* 261 * Structure holding list of open files. 262 */ 263 typedef struct OFILE { 264 ushort_t f_mode; /* Open mode: WRITE, APPEND, PIPE */ 265 FILE *f_fp; /* File pointer if open */ 266 char *f_name; /* Remembered file name */ 267 } OFILE; 268 269 /* Global functions -- awk.y */ 270 int yyparse(void); 271 272 /* Global functions -- awk1.c */ 273 #ifdef __WATCOMC__ 274 #pragma aux yyerror aborts; 275 #pragma aux awkerr aborts; 276 #pragma aux awkperr aborts; 277 #endif 278 int yyerror(const char *msg, ...); 279 void awkerr(const char *fmt, ...) __NORETURN; 280 void awkperr(const char *fmt, ...); 281 void uexit(NODE *); 282 int yylex(void); 283 NODE *renode(wchar_t *restr); 284 wchar_t *emalloc(unsigned); 285 wchar_t *erealloc(wchar_t *, unsigned); 286 287 /* Global functions -- awk2.c */ 288 void awk(void); 289 void dobegin(void); 290 void doend(int status) __NORETURN; 291 int nextrecord(wchar_t *buf, FILE *fp); 292 wchar_t *defrecord(wchar_t *bp, int lim, FILE *fp); 293 wchar_t *charrecord(wchar_t *bp, int lim, FILE *fp); 294 wchar_t *multirecord(wchar_t *bp, int lim, FILE *fp); 295 wchar_t *whitefield(wchar_t **endp); 296 wchar_t *blackfield(wchar_t **endp); 297 wchar_t *refield(wchar_t **endp); 298 void s_print(NODE *np); 299 void s_prf(NODE *np); 300 size_t xprintf(NODE *np, FILE *fp, wchar_t **cp); 301 void awkclose(OFILE *op); 302 303 /* Global functions -- awk3.c */ 304 void strassign(NODE *np, STRING string, int flags, size_t length); 305 NODE *nassign(NODE *np, NODE *value); 306 NODE *assign(NODE *np, NODE *value); 307 void delarray(NODE *np); 308 NODE *node(int type, NODE *left, NODE *right); 309 NODE *intnode(INT i); 310 NODE *realnode(REAL r); 311 NODE *stringnode(STRING str, int aflag, size_t wcslen); 312 NODE *vlookup(wchar_t *name, int nocreate); 313 NODE *emptynode(int type, size_t nlength); 314 void freenode(NODE *np); 315 void execute(NODE *np); 316 INT exprint(NODE *np); 317 REAL exprreal(NODE *np); 318 STRING exprstring(NODE *np); 319 STRING strsave(wchar_t *string); 320 NODE *exprreduce(NODE *np); 321 NODE *getlist(NODE **npp); 322 NODE *symwalk(int *buckp, NODE **npp); 323 REGEXP getregexp(NODE *np); 324 void addsymtab(NODE *np); 325 void delsymtab(NODE *np, int fflag); 326 NODE * finstall(LOCCHARP name, FUNCTION f, int type); 327 void kinstall(LOCCHARP name, int type); 328 void fieldsplit(void); 329 void promote(NODE *); 330 331 332 333 334 335 336 337 /* Global functions -- awk4.c */ 338 NODE *f_exp(NODE *np); 339 NODE *f_int(NODE *np); 340 NODE *f_log(NODE *np); 341 NODE *f_sqrt(NODE *np); 342 NODE *f_getline(NODE *np); 343 NODE *f_index(NODE *np); 344 NODE *f_length(NODE *np); 345 NODE *f_split(NODE *np); 346 NODE *f_sprintf(NODE *np); 347 NODE *f_substr(NODE *np); 348 NODE *f_rand(NODE *np); 349 NODE *f_srand(NODE *np); 350 NODE *f_sin(NODE *np); 351 NODE *f_cos(NODE *np); 352 NODE *f_atan2(NODE *np); 353 NODE *f_sub(NODE *np); 354 NODE *f_gsub(NODE *np); 355 NODE *f_match(NODE *np); 356 NODE *f_system(NODE *np); 357 NODE *f_ord(NODE *np); 358 NODE *f_tolower(NODE *np); 359 NODE *f_toupper(NODE *np); 360 NODE *f_close(NODE *np); 361 NODE *f_asort(NODE *np); 362 363 /* In awk0.c */ 364 365 366 367 extern wchar_t _null[]; 368 extern char r[]; 369 extern char w[]; 370 extern wchar_t s_OFMT[]; 371 extern wchar_t s_CONVFMT[]; 372 extern wchar_t s_NR[]; 373 extern wchar_t s_NF[]; 374 extern wchar_t s_OFS[]; 375 extern wchar_t s_ORS[]; 376 extern wchar_t s_RS[]; 377 extern wchar_t s_FS[]; 378 extern wchar_t s_FNR[]; 379 extern wchar_t s_SUBSEP[]; 380 extern wchar_t s_ARGC[], s_ARGV[], s_ENVIRON[]; 381 extern wchar_t s_FILENAME[], s_SYMTAB[]; 382 extern wchar_t s_BEGIN[], s_END[], s_next[]; 383 extern wchar_t _begin[], _end[]; 384 extern wchar_t s_exp[], s_getline[], s_index[], s_int[], s_length[], s_log[]; 385 extern wchar_t s_split[], s_sprintf[], s_sqrt[], s_substr[]; 386 extern wchar_t s_rand[], s_srand[], s_sin[], s_cos[], s_atan2[]; 387 extern wchar_t s_sub[], s_gsub[], s_match[], s_system[], s_ord[]; 388 extern wchar_t s_toupper[], s_tolower[], s_asort[]; 389 extern wchar_t s_close[]; 390 extern wchar_t redelim; 391 extern unsigned char inprint; 392 extern unsigned char funparm; 393 extern unsigned char splitdone; 394 extern uint_t npattern; 395 extern uint_t nfield; 396 extern uint_t fcount; 397 extern uint_t phase; 398 extern uint_t running; 399 extern uchar_t catterm; 400 extern uint_t lexlast; 401 extern uint_t lineno; 402 extern uchar_t needsplit, needenviron, doing_begin, begin_getline; 403 extern ushort_t slevel; 404 extern ushort_t loopexit; 405 extern wchar_t radixpoint; 406 extern REGEXP resep; 407 extern RESERVED reserved[]; 408 extern RESFUNC resfuncs[]; 409 extern long NIOSTREAM; /* Maximum open I/O streams */ 410 extern OFILE *ofiles; 411 extern wchar_t *linebuf; 412 extern size_t lbuflen; 413 extern char interr[]; 414 extern char nomem[]; 415 extern NODE *symtab[NBUCKET]; 416 extern NODE *yytree; 417 extern NODE *freelist; 418 extern wchar_t *(*awkrecord)(wchar_t *, int, FILE *); 419 extern wchar_t *(*awkfield)(wchar_t **); 420 421 extern NODE *constant; 422 extern NODE *const0; 423 extern NODE *const1; 424 extern NODE *constundef; 425 extern NODE *field0; 426 extern NODE *incNR; 427 extern NODE *incFNR; 428 extern NODE *clrFNR; 429 extern NODE *ARGVsubi; 430 extern NODE *varNR; 431 extern NODE *varFNR; 432 extern NODE *varNF; 433 extern NODE *varOFMT; 434 extern NODE *varCONVFMT; 435 extern NODE *varOFS; 436 extern NODE *varORS; 437 extern NODE *varFS; 438 extern NODE *varRS; 439 extern NODE *varARGC; 440 extern NODE *varSUBSEP; 441 extern NODE *varENVIRON; 442 extern NODE *varSYMTAB; 443 extern NODE *varFILENAME; 444 extern NODE *curnode; 445 extern NODE *inc_oper; 446 extern NODE *asn_oper; 447 448 extern char *mbunconvert(wchar_t *); 449 extern wchar_t *mbstowcsdup(char *); 450 extern char *wcstombsdup(wchar_t *); 451 /* 452 * The following defines the expected max length in chars of a printed number. 453 * This should be the longest expected size for any type of number 454 * ie. float, long etc. This number is used to calculate the approximate 455 * number of chars needed to hold the number. 456 */ 457 #ifdef M_NUMSIZE 458 #define NUMSIZE M_NUMSIZE 459 #else 460 #define NUMSIZE 30 461 #endif 462 463 #define M_MB_L(s) L##s 464 #ifdef __STDC__ 465 #define ANSI(x) x 466 #else 467 #define const 468 #define signed 469 #define volatile 470 #define ANSI(x) () 471 #endif 472 473 #define isWblank(x) (((x) == ' ' || (x) == '\t') ? 1 : 0) 474 475 476 /* 477 * Wide character version of regular expression functions. 478 */ 479 #define REGWMATCH_T int_regwmatch_t 480 #define REGWCOMP int_regwcomp 481 #define REGWEXEC int_regwexec 482 #define REGWFREE int_regwfree 483 #define REGWERROR int_regwerror 484 #define REGWDOSUBA int_regwdosuba 485 486 typedef struct { 487 const wchar_t *rm_sp, *rm_ep; 488 regoff_t rm_so, rm_eo; 489 } int_regwmatch_t; 490 491 extern int int_regwcomp(REGEXP *, const wchar_t *); 492 extern int int_regwexec(REGEXP, const wchar_t *, size_t, 493 int_regwmatch_t *, int); 494 extern void int_regwfree(REGEXP); 495 extern size_t int_regwerror(int, REGEXP, char *, size_t); 496 extern int int_regwdosuba(REGEXP, const wchar_t *, 497 const wchar_t *, wchar_t **, int, int *); 498 499 #endif /* AWK_H */ 500