xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk0.c (revision 2a8bcb4efb45d99ac41c94a75c396b362c414f7f)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Awk -- data definitions
24*7c478bd9Sstevel@tonic-gate  *
25*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1995 by Sun Microsystems, Inc.
26*7c478bd9Sstevel@tonic-gate  *
27*7c478bd9Sstevel@tonic-gate  * Copyright 1986, 1992 by Mortice Kern Systems Inc.  All rights reserved.
28*7c478bd9Sstevel@tonic-gate  *
29*7c478bd9Sstevel@tonic-gate  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
30*7c478bd9Sstevel@tonic-gate  */
31*7c478bd9Sstevel@tonic-gate 
32*7c478bd9Sstevel@tonic-gate #include "awk.h"
33*7c478bd9Sstevel@tonic-gate #include "y.tab.h"
34*7c478bd9Sstevel@tonic-gate 
35*7c478bd9Sstevel@tonic-gate /*
36*7c478bd9Sstevel@tonic-gate  * This file contains data definitions for awk.
37*7c478bd9Sstevel@tonic-gate  */
38*7c478bd9Sstevel@tonic-gate 
39*7c478bd9Sstevel@tonic-gate RESERVED	reserved[] = {
40*7c478bd9Sstevel@tonic-gate 	s_BEGIN, KEYWORD, BEGIN, NULL,
41*7c478bd9Sstevel@tonic-gate 	s_END, KEYWORD, END, NULL,
42*7c478bd9Sstevel@tonic-gate 	M_MB_L("break"), KEYWORD, BREAK, NULL,
43*7c478bd9Sstevel@tonic-gate 	M_MB_L("continue"), KEYWORD, CONTINUE, NULL,
44*7c478bd9Sstevel@tonic-gate 	M_MB_L("for"), KEYWORD, FOR, NULL,
45*7c478bd9Sstevel@tonic-gate 	M_MB_L("if"), KEYWORD, IF, NULL,
46*7c478bd9Sstevel@tonic-gate 	M_MB_L("else"), KEYWORD, ELSE, NULL,
47*7c478bd9Sstevel@tonic-gate 	M_MB_L("in"), KEYWORD, IN, NULL,
48*7c478bd9Sstevel@tonic-gate 	s_next, KEYWORD, NEXT, NULL,
49*7c478bd9Sstevel@tonic-gate 	M_MB_L("while"), KEYWORD, WHILE, NULL,
50*7c478bd9Sstevel@tonic-gate 	M_MB_L("do"), KEYWORD, DO, NULL,
51*7c478bd9Sstevel@tonic-gate 	M_MB_L("print"), KEYWORD, PRINT, NULL,
52*7c478bd9Sstevel@tonic-gate 	M_MB_L("printf"), KEYWORD, PRINTF, NULL,
53*7c478bd9Sstevel@tonic-gate 	M_MB_L("return"), KEYWORD, RETURN, NULL,
54*7c478bd9Sstevel@tonic-gate 	M_MB_L("func"), KEYWORD, DEFFUNC, NULL,
55*7c478bd9Sstevel@tonic-gate 	M_MB_L("function"), KEYWORD, DEFFUNC, NULL,
56*7c478bd9Sstevel@tonic-gate 	M_MB_L("delete"), KEYWORD, DELETE, NULL,
57*7c478bd9Sstevel@tonic-gate 	M_MB_L("exit"), KEYWORD, EXIT, NULL,
58*7c478bd9Sstevel@tonic-gate 	s_FILENAME, VAR, 0, _null,
59*7c478bd9Sstevel@tonic-gate 	s_NF, SVAR, 0, NULL,
60*7c478bd9Sstevel@tonic-gate 	s_NR, VAR, 0, NULL,
61*7c478bd9Sstevel@tonic-gate 	s_FS, SVAR, 1, M_MB_L(" "),
62*7c478bd9Sstevel@tonic-gate 	s_OFS, VAR, 1, M_MB_L(" "),
63*7c478bd9Sstevel@tonic-gate 	s_ORS, VAR, 1, M_MB_L("\n"),
64*7c478bd9Sstevel@tonic-gate 	s_OFMT, VAR, 4, M_MB_L("%.6g"),
65*7c478bd9Sstevel@tonic-gate 	s_CONVFMT, VAR, 4, M_MB_L("%.6g"),
66*7c478bd9Sstevel@tonic-gate 	s_RS, SVAR, 1, M_MB_L("\n"),
67*7c478bd9Sstevel@tonic-gate 	s_FNR, VAR, 0, NULL,
68*7c478bd9Sstevel@tonic-gate 	s_SUBSEP, VAR, 1,
69*7c478bd9Sstevel@tonic-gate #ifdef	M_AWK_SUBSEP
70*7c478bd9Sstevel@tonic-gate 	M_AWK_SUBSEP,
71*7c478bd9Sstevel@tonic-gate #else
72*7c478bd9Sstevel@tonic-gate 	M_MB_L("\34"),
73*7c478bd9Sstevel@tonic-gate #endif
74*7c478bd9Sstevel@tonic-gate 	s_ARGC, SVAR, 0, NULL,
75*7c478bd9Sstevel@tonic-gate 	(LOCCHARP)NULL
76*7c478bd9Sstevel@tonic-gate };
77*7c478bd9Sstevel@tonic-gate 
78*7c478bd9Sstevel@tonic-gate RESFUNC	resfuncs[] = {
79*7c478bd9Sstevel@tonic-gate 	s_exp, FUNC, f_exp,
80*7c478bd9Sstevel@tonic-gate 	s_getline, GETLINE, f_getline,
81*7c478bd9Sstevel@tonic-gate 	s_index, FUNC, f_index,
82*7c478bd9Sstevel@tonic-gate 	s_int, FUNC, f_int,
83*7c478bd9Sstevel@tonic-gate 	s_length, FUNC, f_length,
84*7c478bd9Sstevel@tonic-gate 	s_log, FUNC, f_log,
85*7c478bd9Sstevel@tonic-gate 	s_split, FUNC, f_split,
86*7c478bd9Sstevel@tonic-gate 	s_sprintf, FUNC, f_sprintf,
87*7c478bd9Sstevel@tonic-gate 	s_sqrt, FUNC, f_sqrt,
88*7c478bd9Sstevel@tonic-gate 	s_substr, FUNC, f_substr,
89*7c478bd9Sstevel@tonic-gate 	s_rand, FUNC, f_rand,
90*7c478bd9Sstevel@tonic-gate 	s_srand, FUNC, f_srand,
91*7c478bd9Sstevel@tonic-gate 	s_sin, FUNC, f_sin,
92*7c478bd9Sstevel@tonic-gate 	s_cos, FUNC, f_cos,
93*7c478bd9Sstevel@tonic-gate 	s_atan2, FUNC, f_atan2,
94*7c478bd9Sstevel@tonic-gate 	s_sub, FUNC, f_sub,
95*7c478bd9Sstevel@tonic-gate 	s_gsub, FUNC, f_gsub,
96*7c478bd9Sstevel@tonic-gate 	s_match, FUNC, f_match,
97*7c478bd9Sstevel@tonic-gate 	s_system, FUNC, f_system,
98*7c478bd9Sstevel@tonic-gate 	s_ord, FUNC, f_ord,
99*7c478bd9Sstevel@tonic-gate 	s_toupper, FUNC, f_toupper,
100*7c478bd9Sstevel@tonic-gate 	s_tolower, FUNC, f_tolower,
101*7c478bd9Sstevel@tonic-gate 	s_asort, FUNC, f_asort,
102*7c478bd9Sstevel@tonic-gate 	s_close, FUNC, f_close,
103*7c478bd9Sstevel@tonic-gate 	(LOCCHARP)NULL
104*7c478bd9Sstevel@tonic-gate };
105*7c478bd9Sstevel@tonic-gate 
106*7c478bd9Sstevel@tonic-gate 
107*7c478bd9Sstevel@tonic-gate OFILE	*ofiles;			/* Remembered open files (print) */
108*7c478bd9Sstevel@tonic-gate long	NIOSTREAM = 512;		/* max num of open file descriptors */
109*7c478bd9Sstevel@tonic-gate 
110*7c478bd9Sstevel@tonic-gate 
111*7c478bd9Sstevel@tonic-gate 
112*7c478bd9Sstevel@tonic-gate 
113*7c478bd9Sstevel@tonic-gate wchar_t	_null[] = M_MB_L("");		/* Empty string */
114*7c478bd9Sstevel@tonic-gate char	r[] = "r";			/* Read file mode */
115*7c478bd9Sstevel@tonic-gate char	w[] = "w";			/* Write file mode */
116*7c478bd9Sstevel@tonic-gate wchar_t	s_OFMT[] = M_MB_L("OFMT");	/* Name of "OFMT" variable */
117*7c478bd9Sstevel@tonic-gate wchar_t	s_CONVFMT[] = M_MB_L("CONVFMT"); /* Name of "CONVFMT" variable */
118*7c478bd9Sstevel@tonic-gate wchar_t	s_NR[] = M_MB_L("NR");		/* Name of "NR" variable */
119*7c478bd9Sstevel@tonic-gate wchar_t	s_NF[] = M_MB_L("NF");		/* Name of "NF" variable */
120*7c478bd9Sstevel@tonic-gate wchar_t	s_OFS[] = M_MB_L("OFS");	/* Name of "OFS" variable */
121*7c478bd9Sstevel@tonic-gate wchar_t	s_ORS[] = M_MB_L("ORS");	/* Name of "ORS" variable */
122*7c478bd9Sstevel@tonic-gate wchar_t	s_RS[] = M_MB_L("RS");		/* Name of "RS" variable */
123*7c478bd9Sstevel@tonic-gate wchar_t	s_FS[] = M_MB_L("FS");		/* Name of "FS" variable */
124*7c478bd9Sstevel@tonic-gate wchar_t	s_FNR[] = M_MB_L("FNR");	/* Name of "FNR" variable */
125*7c478bd9Sstevel@tonic-gate wchar_t	s_SUBSEP[] = M_MB_L("SUBSEP");	/* Name of "SUBSEP" variable */
126*7c478bd9Sstevel@tonic-gate wchar_t	s_ARGC[] = M_MB_L("ARGC");	/* Name of "ARGC" variable */
127*7c478bd9Sstevel@tonic-gate wchar_t	s_ARGV[] = M_MB_L("ARGV");	/* Name of "ARGV" array variable */
128*7c478bd9Sstevel@tonic-gate wchar_t	s_ENVIRON[] = M_MB_L("ENVIRON"); /* Name of "ENVIRON" array variable */
129*7c478bd9Sstevel@tonic-gate wchar_t	s_FILENAME[] = M_MB_L("FILENAME"); /* Name of "FILENAME" variable */
130*7c478bd9Sstevel@tonic-gate wchar_t	s_SYMTAB[] = M_MB_L("SYMTAB");	/* Name of "SYMTAB" variable */
131*7c478bd9Sstevel@tonic-gate wchar_t	s_BEGIN[] = M_MB_L("BEGIN");	/* Name of "BEGIN" action */
132*7c478bd9Sstevel@tonic-gate wchar_t	s_END[] = M_MB_L("END");	/* Name of "END" action */
133*7c478bd9Sstevel@tonic-gate wchar_t	s_next[] = M_MB_L("next");	/* Name of "next" keyword */
134*7c478bd9Sstevel@tonic-gate wchar_t	s_exp[] = M_MB_L("exp");	/* Name of "exp" function */
135*7c478bd9Sstevel@tonic-gate wchar_t	s_getline[] = M_MB_L("getline"); /* Name of "getline" function */
136*7c478bd9Sstevel@tonic-gate wchar_t	s_index[] = M_MB_L("index");	/* Name of "index" function */
137*7c478bd9Sstevel@tonic-gate wchar_t	s_int[] = M_MB_L("int");	/* Name of "int" function */
138*7c478bd9Sstevel@tonic-gate wchar_t	s_length[] = M_MB_L("length");	/* Name of "length" function */
139*7c478bd9Sstevel@tonic-gate wchar_t	s_log[] = M_MB_L("log");	/* Name of "log" function */
140*7c478bd9Sstevel@tonic-gate wchar_t	s_split[] = M_MB_L("split");	/* Name of "split" function */
141*7c478bd9Sstevel@tonic-gate wchar_t	s_sprintf[] = M_MB_L("sprintf"); /* Name of "sprintf" function */
142*7c478bd9Sstevel@tonic-gate wchar_t	s_sqrt[] = M_MB_L("sqrt");	/* Name of "sqrt" function */
143*7c478bd9Sstevel@tonic-gate wchar_t	s_substr[] = M_MB_L("substr");	/* Name of "substr" function */
144*7c478bd9Sstevel@tonic-gate wchar_t	s_rand[] = M_MB_L("rand");	/* Name of "rand" function */
145*7c478bd9Sstevel@tonic-gate wchar_t	s_srand[] = M_MB_L("srand");	/* Name of "srand" function */
146*7c478bd9Sstevel@tonic-gate wchar_t	s_sin[] = M_MB_L("sin");	/* Name of "sin" function */
147*7c478bd9Sstevel@tonic-gate wchar_t	s_cos[] = M_MB_L("cos");	/* Name of "cos" function */
148*7c478bd9Sstevel@tonic-gate wchar_t	s_atan2[] = M_MB_L("atan2");	/* Name of "atan" function */
149*7c478bd9Sstevel@tonic-gate wchar_t	s_sub[] = M_MB_L("sub");	/* Name of "sub" function */
150*7c478bd9Sstevel@tonic-gate wchar_t	s_gsub[] = M_MB_L("gsub");	/* Name of "gsub" function */
151*7c478bd9Sstevel@tonic-gate wchar_t	s_match[] = M_MB_L("match");	/* Name of "match" function */
152*7c478bd9Sstevel@tonic-gate wchar_t	s_system[] = M_MB_L("system");	/* Name of "system" function */
153*7c478bd9Sstevel@tonic-gate wchar_t	s_ord[] = M_MB_L("ord");	/* Name of "ord" function */
154*7c478bd9Sstevel@tonic-gate wchar_t	s_toupper[] = M_MB_L("toupper"); /* Name of "toupper" function */
155*7c478bd9Sstevel@tonic-gate wchar_t	s_tolower[] = M_MB_L("tolower"); /* Name of "tolower" function */
156*7c478bd9Sstevel@tonic-gate wchar_t	s_asort[] = M_MB_L("asort");	/* Name of "asort" function */
157*7c478bd9Sstevel@tonic-gate wchar_t	s_close[] = M_MB_L("close");	/* Name of "close" function */
158*7c478bd9Sstevel@tonic-gate 
159*7c478bd9Sstevel@tonic-gate wchar_t redelim;			/* Delimiter for regexp (yylex) */
160*7c478bd9Sstevel@tonic-gate uchar_t	inprint;			/* Special meaning for '>' & '|' */
161*7c478bd9Sstevel@tonic-gate uchar_t	funparm;			/* Defining function parameters */
162*7c478bd9Sstevel@tonic-gate uchar_t	splitdone;			/* Line split into fields (fieldbuf) */
163*7c478bd9Sstevel@tonic-gate uint	npattern;			/* Number of non-BEGIN patterns */
164*7c478bd9Sstevel@tonic-gate uint	nfield;				/* Number of fields (if splitdone) */
165*7c478bd9Sstevel@tonic-gate uint	fcount;				/* Field counter (used by blackfield)*/
166*7c478bd9Sstevel@tonic-gate uint	phase;				/* BEGIN, END, or 0 */
167*7c478bd9Sstevel@tonic-gate uint	running = 0;			/* Set if not in compile phase */
168*7c478bd9Sstevel@tonic-gate uchar_t	catterm;			/* Can inject concat or ';' */
169*7c478bd9Sstevel@tonic-gate uint	lexlast = '\n';			/* Last lexical token */
170*7c478bd9Sstevel@tonic-gate uint	lineno = 0;			/* Current programme line number */
171*7c478bd9Sstevel@tonic-gate uchar_t	doing_begin;			/* set if compiling BEGIN block */
172*7c478bd9Sstevel@tonic-gate uchar_t	begin_getline;			/* flags a getline was done in BEGIN */
173*7c478bd9Sstevel@tonic-gate uchar_t	needsplit;			/* Set if $0 must be split when read */
174*7c478bd9Sstevel@tonic-gate uchar_t	needenviron;			/* Set if ENVIRON variable referenced */
175*7c478bd9Sstevel@tonic-gate ushort	slevel;				/* Scope level (0 == root) */
176*7c478bd9Sstevel@tonic-gate ushort	loopexit;			/* Short circuit loop with keyword */
177*7c478bd9Sstevel@tonic-gate wchar_t	radixpoint;			/* soft radix point for I18N */
178*7c478bd9Sstevel@tonic-gate REGEXP	resep;				/* Field separator as regexp */
179*7c478bd9Sstevel@tonic-gate wchar_t	*linebuf = NULL;		/* $0 buffer - malloc'd in awk1.c */
180*7c478bd9Sstevel@tonic-gate size_t	lbuflen;			/* Length of linebuf */
181*7c478bd9Sstevel@tonic-gate 
182*7c478bd9Sstevel@tonic-gate /*
183*7c478bd9Sstevel@tonic-gate  * XXX - Make sure to check where this error message is printed
184*7c478bd9Sstevel@tonic-gate  */
185*7c478bd9Sstevel@tonic-gate char	interr[] = "internal execution tree error at E string";
186*7c478bd9Sstevel@tonic-gate char	nomem[] =  "insufficient memory for string storage";
187*7c478bd9Sstevel@tonic-gate NODE	*symtab[NBUCKET];		/* Heads of symbol table buckets */
188*7c478bd9Sstevel@tonic-gate NODE	*yytree;			/* Code tree */
189*7c478bd9Sstevel@tonic-gate NODE	*freelist;			/* Free every pattern {action} line */
190*7c478bd9Sstevel@tonic-gate wchar_t	*(*awkrecord) ANSI((wchar_t *, int, FILE*)) = defrecord;
191*7c478bd9Sstevel@tonic-gate 					/* Function to read a record */
192*7c478bd9Sstevel@tonic-gate wchar_t	*(*awkfield) ANSI((wchar_t **)) = whitefield;
193*7c478bd9Sstevel@tonic-gate 					/* Function to extract a field */
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate /*
196*7c478bd9Sstevel@tonic-gate  * Nodes used to speed up the execution of the
197*7c478bd9Sstevel@tonic-gate  * interpreter.
198*7c478bd9Sstevel@tonic-gate  */
199*7c478bd9Sstevel@tonic-gate NODE	*constant;			/* Node to hold a constant INT */
200*7c478bd9Sstevel@tonic-gate NODE	*const0;			/* Constant INT 0 node */
201*7c478bd9Sstevel@tonic-gate NODE	*const1;			/* Constant INT 1 node */
202*7c478bd9Sstevel@tonic-gate NODE	*constundef;			/* Undefined variable */
203*7c478bd9Sstevel@tonic-gate NODE	*field0;			/* $0 */
204*7c478bd9Sstevel@tonic-gate NODE	*incNR;				/* Code to increment NR variable */
205*7c478bd9Sstevel@tonic-gate NODE	*incFNR;			/* Code to increment FNR variable */
206*7c478bd9Sstevel@tonic-gate NODE	*clrFNR;			/* Zero FNR variable (each file) */
207*7c478bd9Sstevel@tonic-gate NODE	*ARGVsubi;			/* Compute ARGV[i] */
208*7c478bd9Sstevel@tonic-gate NODE	*varNR;				/* Remove search for NR variable */
209*7c478bd9Sstevel@tonic-gate NODE	*varFNR;			/* Don't search for FNR variable */
210*7c478bd9Sstevel@tonic-gate NODE	*varNF;				/* Pointer to NF variable */
211*7c478bd9Sstevel@tonic-gate NODE	*varOFMT;			/* For s_prf */
212*7c478bd9Sstevel@tonic-gate NODE	*varCONVFMT;			/* For internal conv of float to str */
213*7c478bd9Sstevel@tonic-gate NODE	*varOFS;			/* For s_print */
214*7c478bd9Sstevel@tonic-gate NODE	*varORS;			/* For s_print */
215*7c478bd9Sstevel@tonic-gate NODE	*varFS;				/* Field separtor */
216*7c478bd9Sstevel@tonic-gate NODE	*varRS;				/* Record separator */
217*7c478bd9Sstevel@tonic-gate NODE	*varARGC;			/* Quick access to ARGC */
218*7c478bd9Sstevel@tonic-gate NODE	*varSUBSEP;			/* Quick access to SUBSEP */
219*7c478bd9Sstevel@tonic-gate NODE	*varENVIRON;			/* Pointer to ENVIRON variable */
220*7c478bd9Sstevel@tonic-gate NODE	*varSYMTAB;			/* Symbol table special variable */
221*7c478bd9Sstevel@tonic-gate NODE	*varFILENAME;			/* Node for FILENAME variable */
222*7c478bd9Sstevel@tonic-gate NODE	*curnode;			/* Pointer to current line */
223*7c478bd9Sstevel@tonic-gate NODE	*inc_oper;			/* used by INC/DEC in awk3.c */
224*7c478bd9Sstevel@tonic-gate NODE	*asn_oper;			/* used by AADD, etc in awk3.c */
225