xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk0.c (revision ddb365bfc9e868ad24ccdcb0dc91af18b10df082)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Awk -- data definitions
24  *
25  * Copyright (c) 1995 by Sun Microsystems, Inc.
26  *
27  * Copyright 1986, 1992 by Mortice Kern Systems Inc.  All rights reserved.
28  *
29  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
30  */
31 
32 #include "awk.h"
33 #include "y.tab.h"
34 
35 /*
36  * This file contains data definitions for awk.
37  */
38 
39 RESERVED	reserved[] = {
40 	s_BEGIN, KEYWORD, BEGIN, NULL,
41 	s_END, KEYWORD, END, NULL,
42 	M_MB_L("break"), KEYWORD, BREAK, NULL,
43 	M_MB_L("continue"), KEYWORD, CONTINUE, NULL,
44 	M_MB_L("for"), KEYWORD, FOR, NULL,
45 	M_MB_L("if"), KEYWORD, IF, NULL,
46 	M_MB_L("else"), KEYWORD, ELSE, NULL,
47 	M_MB_L("in"), KEYWORD, IN, NULL,
48 	s_next, KEYWORD, NEXT, NULL,
49 	M_MB_L("while"), KEYWORD, WHILE, NULL,
50 	M_MB_L("do"), KEYWORD, DO, NULL,
51 	M_MB_L("print"), KEYWORD, PRINT, NULL,
52 	M_MB_L("printf"), KEYWORD, PRINTF, NULL,
53 	M_MB_L("return"), KEYWORD, RETURN, NULL,
54 	M_MB_L("func"), KEYWORD, DEFFUNC, NULL,
55 	M_MB_L("function"), KEYWORD, DEFFUNC, NULL,
56 	M_MB_L("delete"), KEYWORD, DELETE, NULL,
57 	M_MB_L("exit"), KEYWORD, EXIT, NULL,
58 	s_FILENAME, VAR, 0, _null,
59 	s_NF, SVAR, 0, NULL,
60 	s_NR, VAR, 0, NULL,
61 	s_FS, SVAR, 1, M_MB_L(" "),
62 	s_OFS, VAR, 1, M_MB_L(" "),
63 	s_ORS, VAR, 1, M_MB_L("\n"),
64 	s_OFMT, VAR, 4, M_MB_L("%.6g"),
65 	s_CONVFMT, VAR, 4, M_MB_L("%.6g"),
66 	s_RS, SVAR, 1, M_MB_L("\n"),
67 	s_FNR, VAR, 0, NULL,
68 	s_SUBSEP, VAR, 1,
69 #ifdef	M_AWK_SUBSEP
70 	M_AWK_SUBSEP,
71 #else
72 	M_MB_L("\34"),
73 #endif
74 	s_ARGC, SVAR, 0, NULL,
75 	(LOCCHARP)NULL
76 };
77 
78 RESFUNC	resfuncs[] = {
79 	s_exp, FUNC, f_exp,
80 	s_getline, GETLINE, f_getline,
81 	s_index, FUNC, f_index,
82 	s_int, FUNC, f_int,
83 	s_length, FUNC, f_length,
84 	s_log, FUNC, f_log,
85 	s_split, FUNC, f_split,
86 	s_sprintf, FUNC, f_sprintf,
87 	s_sqrt, FUNC, f_sqrt,
88 	s_substr, FUNC, f_substr,
89 	s_rand, FUNC, f_rand,
90 	s_srand, FUNC, f_srand,
91 	s_sin, FUNC, f_sin,
92 	s_cos, FUNC, f_cos,
93 	s_atan2, FUNC, f_atan2,
94 	s_sub, FUNC, f_sub,
95 	s_gsub, FUNC, f_gsub,
96 	s_match, FUNC, f_match,
97 	s_system, FUNC, f_system,
98 	s_ord, FUNC, f_ord,
99 	s_toupper, FUNC, f_toupper,
100 	s_tolower, FUNC, f_tolower,
101 	s_asort, FUNC, f_asort,
102 	s_close, FUNC, f_close,
103 	(LOCCHARP)NULL
104 };
105 
106 
107 OFILE	*ofiles;			/* Remembered open files (print) */
108 long	NIOSTREAM = 512;		/* max num of open file descriptors */
109 
110 
111 
112 
113 wchar_t	_null[] = M_MB_L("");		/* Empty string */
114 char	r[] = "r";			/* Read file mode */
115 char	w[] = "w";			/* Write file mode */
116 wchar_t	s_OFMT[] = M_MB_L("OFMT");	/* Name of "OFMT" variable */
117 wchar_t	s_CONVFMT[] = M_MB_L("CONVFMT"); /* Name of "CONVFMT" variable */
118 wchar_t	s_NR[] = M_MB_L("NR");		/* Name of "NR" variable */
119 wchar_t	s_NF[] = M_MB_L("NF");		/* Name of "NF" variable */
120 wchar_t	s_OFS[] = M_MB_L("OFS");	/* Name of "OFS" variable */
121 wchar_t	s_ORS[] = M_MB_L("ORS");	/* Name of "ORS" variable */
122 wchar_t	s_RS[] = M_MB_L("RS");		/* Name of "RS" variable */
123 wchar_t	s_FS[] = M_MB_L("FS");		/* Name of "FS" variable */
124 wchar_t	s_FNR[] = M_MB_L("FNR");	/* Name of "FNR" variable */
125 wchar_t	s_SUBSEP[] = M_MB_L("SUBSEP");	/* Name of "SUBSEP" variable */
126 wchar_t	s_ARGC[] = M_MB_L("ARGC");	/* Name of "ARGC" variable */
127 wchar_t	s_ARGV[] = M_MB_L("ARGV");	/* Name of "ARGV" array variable */
128 wchar_t	s_ENVIRON[] = M_MB_L("ENVIRON"); /* Name of "ENVIRON" array variable */
129 wchar_t	s_FILENAME[] = M_MB_L("FILENAME"); /* Name of "FILENAME" variable */
130 wchar_t	s_SYMTAB[] = M_MB_L("SYMTAB");	/* Name of "SYMTAB" variable */
131 wchar_t	s_BEGIN[] = M_MB_L("BEGIN");	/* Name of "BEGIN" action */
132 wchar_t	s_END[] = M_MB_L("END");	/* Name of "END" action */
133 wchar_t	s_next[] = M_MB_L("next");	/* Name of "next" keyword */
134 wchar_t	s_exp[] = M_MB_L("exp");	/* Name of "exp" function */
135 wchar_t	s_getline[] = M_MB_L("getline"); /* Name of "getline" function */
136 wchar_t	s_index[] = M_MB_L("index");	/* Name of "index" function */
137 wchar_t	s_int[] = M_MB_L("int");	/* Name of "int" function */
138 wchar_t	s_length[] = M_MB_L("length");	/* Name of "length" function */
139 wchar_t	s_log[] = M_MB_L("log");	/* Name of "log" function */
140 wchar_t	s_split[] = M_MB_L("split");	/* Name of "split" function */
141 wchar_t	s_sprintf[] = M_MB_L("sprintf"); /* Name of "sprintf" function */
142 wchar_t	s_sqrt[] = M_MB_L("sqrt");	/* Name of "sqrt" function */
143 wchar_t	s_substr[] = M_MB_L("substr");	/* Name of "substr" function */
144 wchar_t	s_rand[] = M_MB_L("rand");	/* Name of "rand" function */
145 wchar_t	s_srand[] = M_MB_L("srand");	/* Name of "srand" function */
146 wchar_t	s_sin[] = M_MB_L("sin");	/* Name of "sin" function */
147 wchar_t	s_cos[] = M_MB_L("cos");	/* Name of "cos" function */
148 wchar_t	s_atan2[] = M_MB_L("atan2");	/* Name of "atan" function */
149 wchar_t	s_sub[] = M_MB_L("sub");	/* Name of "sub" function */
150 wchar_t	s_gsub[] = M_MB_L("gsub");	/* Name of "gsub" function */
151 wchar_t	s_match[] = M_MB_L("match");	/* Name of "match" function */
152 wchar_t	s_system[] = M_MB_L("system");	/* Name of "system" function */
153 wchar_t	s_ord[] = M_MB_L("ord");	/* Name of "ord" function */
154 wchar_t	s_toupper[] = M_MB_L("toupper"); /* Name of "toupper" function */
155 wchar_t	s_tolower[] = M_MB_L("tolower"); /* Name of "tolower" function */
156 wchar_t	s_asort[] = M_MB_L("asort");	/* Name of "asort" function */
157 wchar_t	s_close[] = M_MB_L("close");	/* Name of "close" function */
158 
159 wchar_t redelim;			/* Delimiter for regexp (yylex) */
160 uchar_t	inprint;			/* Special meaning for '>' & '|' */
161 uchar_t	funparm;			/* Defining function parameters */
162 uchar_t	splitdone;			/* Line split into fields (fieldbuf) */
163 uint	npattern;			/* Number of non-BEGIN patterns */
164 uint	nfield;				/* Number of fields (if splitdone) */
165 uint	fcount;				/* Field counter (used by blackfield)*/
166 uint	phase;				/* BEGIN, END, or 0 */
167 uint	running = 0;			/* Set if not in compile phase */
168 uchar_t	catterm;			/* Can inject concat or ';' */
169 uint	lexlast = '\n';			/* Last lexical token */
170 uint	lineno = 0;			/* Current programme line number */
171 uchar_t	doing_begin;			/* set if compiling BEGIN block */
172 uchar_t	begin_getline;			/* flags a getline was done in BEGIN */
173 uchar_t	needsplit;			/* Set if $0 must be split when read */
174 uchar_t	needenviron;			/* Set if ENVIRON variable referenced */
175 ushort	slevel;				/* Scope level (0 == root) */
176 ushort	loopexit;			/* Short circuit loop with keyword */
177 wchar_t	radixpoint;			/* soft radix point for I18N */
178 REGEXP	resep;				/* Field separator as regexp */
179 wchar_t	*linebuf = NULL;		/* $0 buffer - malloc'd in awk1.c */
180 size_t	lbuflen;			/* Length of linebuf */
181 
182 /*
183  * XXX - Make sure to check where this error message is printed
184  */
185 char	interr[] = "internal execution tree error at E string";
186 char	nomem[] =  "insufficient memory for string storage";
187 NODE	*symtab[NBUCKET];		/* Heads of symbol table buckets */
188 NODE	*yytree;			/* Code tree */
189 NODE	*freelist;			/* Free every pattern {action} line */
190 wchar_t	*(*awkrecord) ANSI((wchar_t *, int, FILE*)) = defrecord;
191 					/* Function to read a record */
192 wchar_t	*(*awkfield) ANSI((wchar_t **)) = whitefield;
193 					/* Function to extract a field */
194 
195 /*
196  * Nodes used to speed up the execution of the
197  * interpreter.
198  */
199 NODE	*constant;			/* Node to hold a constant INT */
200 NODE	*const0;			/* Constant INT 0 node */
201 NODE	*const1;			/* Constant INT 1 node */
202 NODE	*constundef;			/* Undefined variable */
203 NODE	*field0;			/* $0 */
204 NODE	*incNR;				/* Code to increment NR variable */
205 NODE	*incFNR;			/* Code to increment FNR variable */
206 NODE	*clrFNR;			/* Zero FNR variable (each file) */
207 NODE	*ARGVsubi;			/* Compute ARGV[i] */
208 NODE	*varNR;				/* Remove search for NR variable */
209 NODE	*varFNR;			/* Don't search for FNR variable */
210 NODE	*varNF;				/* Pointer to NF variable */
211 NODE	*varOFMT;			/* For s_prf */
212 NODE	*varCONVFMT;			/* For internal conv of float to str */
213 NODE	*varOFS;			/* For s_print */
214 NODE	*varORS;			/* For s_print */
215 NODE	*varFS;				/* Field separtor */
216 NODE	*varRS;				/* Record separator */
217 NODE	*varARGC;			/* Quick access to ARGC */
218 NODE	*varSUBSEP;			/* Quick access to SUBSEP */
219 NODE	*varENVIRON;			/* Pointer to ENVIRON variable */
220 NODE	*varSYMTAB;			/* Symbol table special variable */
221 NODE	*varFILENAME;			/* Node for FILENAME variable */
222 NODE	*curnode;			/* Pointer to current line */
223 NODE	*inc_oper;			/* used by INC/DEC in awk3.c */
224 NODE	*asn_oper;			/* used by AADD, etc in awk3.c */
225