xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk.h (revision 2a8d6eba033e4713ab12b61178f0513f1f075482)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * awk -- common header file.
28  *
29  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
30  *
31  * This version uses the POSIX.2 compatible <regex.h> routines.
32  *
33  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
34  *
35  */
36 
37 #pragma ident	"%Z%%M%	%I%	%E% SMI"
38 
39 #include <stdio.h>
40 #include <ctype.h>
41 #include <string.h>
42 #include <math.h>
43 #include <limits.h>
44 #include <stdlib.h>
45 #include <regex.h>
46 #include <errno.h>
47 #include <sys/types.h>
48 #include <locale.h>
49 #include <wchar.h>
50 #include <widec.h>
51 
52 #define	YYMAXDEPTH	300	/* Max # of productions (used by yacc) */
53 #define	YYSSIZE		300	/* Size of State/Value stacks (MKS YACC) */
54 #define	MAXDIGINT	19	/* Number of digits in an INT */
55 #define	FNULL		((FILE *)0)
56 #define	NNULL		((NODE *)0)
57 #define	SNULL		((STRING)0)
58 #define	LARGE		INT_MAX	/* Large integer */
59 #define	NPFILE		32	/* Number of -[fl] options allowed */
60 #define	NRECUR		3000	/* Maximum recursion depth */
61 
62 #define	M_LDATA	1
63 #ifdef M_LDATA
64 #define	NLINE	20000	/* Longest input record */
65 #define	NFIELD	4000	/* Number of fields allowed */
66 #define	NBUCKET	1024	/* # of symtab buckets (power of 2) */
67 #else
68 #define	NLINE	2048	/* Longest input record */
69 #define	NFIELD	1024	/* Number of fields allowed */
70 #define	NBUCKET	256	/* # of symtab buckets (power of 2) */
71 #endif
72 
73 #define	NSNODE		40	/* Number of cached nodes */
74 #define	NCONTEXT	50	/* Amount of context for error msgs */
75 #define	hashbuck(n)	((n)&(NBUCKET-1))
76 #if	BSD
77 /*
78  * A speedup for BSD.  Use their routines which are
79  * already optimised.  Note that BSD bcopy does not
80  * return a value.
81  */
82 int	bcmp();
83 #define	memcmp(b1, b2, n)	bcmp(b1, b2, n)
84 void	bcopy();
85 #define	memcpy(b1, b2, n)	bcopy(b2, b1, (int)n)
86 #endif	/* BSD */
87 #define	vlook(n)	vlookup(n, 0)
88 
89 /*
90  * Basic AWK internal types.
91  */
92 typedef	double		REAL;
93 typedef	long long	INT;
94 typedef	wchar_t		*STRING;
95 typedef	struct NODE	*(*FUNCTION)(struct NODE *np);
96 typedef	void		*REGEXP;
97 
98 /*
99  * Node in the AWK interpreter expression tree.
100  */
101 typedef	struct	NODE	{
102 	ushort_t	n_type;
103 	struct NODE	*n_next;		/* Symbol table/PARM link */
104 	ushort_t	n_flags;		/* Node flags, type */
105 
106 
107 
108 
109 	union	{
110 		struct	{
111 			ushort_t	N_hash;		/* Full hash value */
112 			struct NODE	*N_alink;	/* Array link */
113 			union	{
114 				struct	{
115 					STRING	N_string;
116 					size_t	N_strlen;
117 				}	n_str;
118 				INT	N_int;
119 				REAL	N_real;
120 				FUNCTION	N_function;
121 				struct	NODE	*N_ufunc;
122 			}	n_tun;
123 			wchar_t	N_name[1];
124 		}	n_term;
125 		struct	{
126 			struct	NODE	*N_left;
127 			struct	NODE	*N_right;
128 			ushort_t	N_lineno;
129 		}	n_op;
130 		struct {
131 			struct	NODE	*N_left;	/* Used for fliplist */
132 			struct	NODE	*N_right;
133 			REGEXP		N_regexp;	/* Regular expression */
134 		}	n_re;
135 	}	n_un;
136 }	NODE;
137 
138 /*
139  * Definitions to make the node access much easier.
140  */
141 #define	n_hash		n_un.n_term.N_hash	/* full hash value is sym tbl */
142 #define	n_scope		n_un.n_term.N_hash	/* local variable scope level */
143 #define	n_alink		n_un.n_term.N_alink	/* link to array list */
144 #define	n_string	n_un.n_term.n_tun.n_str.N_string
145 #define	n_strlen	n_un.n_term.n_tun.n_str.N_strlen
146 #define	n_int		n_un.n_term.n_tun.N_int
147 #define	n_real		n_un.n_term.n_tun.N_real
148 #define	n_function	n_un.n_term.n_tun.N_function
149 #define	n_ufunc		n_un.n_term.n_tun.N_ufunc
150 #define	n_name		n_un.n_term.N_name
151 #define	n_left		n_un.n_op.N_left
152 #define	n_right		n_un.n_op.N_right
153 #define	n_lineno	n_un.n_op.N_lineno
154 #define	n_keywtype	n_un.n_op.N_lineno
155 #define	n_regexp	n_un.n_re.N_regexp
156 /*
157  * Compress the types that are actually used in the final tree
158  * to save space in the intermediate file. Allows 1 byte to
159  * represent all types
160  */
161 
162 
163 
164 
165 
166 
167 
168 /*
169  * n_flags bit assignments.
170  */
171 #define	FALLOC		0x01	/* Allocated node */
172 #define	FSTATIC		0x00	/* Not allocated */
173 #define	FMATCH		0x02	/* pattern,pattern (first part matches) */
174 #define	FSPECIAL	0x04	/* Special pre-computed variable */
175 #define	FINARRAY	0x08	/* NODE installed in N_alink array list */
176 #define	FNOALLOC	0x10	/* mark node FALLOC, but don't malloc */
177 #define	FSENSE		0x20	/* Sense if string looks like INT/REAL */
178 #define	FSAVE		(FSPECIAL|FINARRAY)	/* assign leaves on */
179 
180 #define	FINT		0x40	/* Node has integer type */
181 #define	FREAL		0x80	/* Node has real type */
182 #define	FSTRING		0x100	/* Node has string type */
183 #define	FNONTOK		0x200	/* Node has non-token type */
184 #define	FVINT		0x400	/* Node looks like an integer */
185 #define	FVREAL		0x800	/* Node looks like a real number */
186 #define	FLARRAY		0x1000	/* Local array node */
187 
188 /*
189  * n_flags macros
190  * These work when given an argument of np->n_flags
191  */
192 #define	isleaf(f)	(!((f)&FNONTOK))
193 #define	isstring(f)	((f)&FSTRING)
194 #define	isastring(f)	(((f)&(FSTRING|FALLOC)) == (FSTRING|FALLOC))
195 #define	isnumber(f)	((f)&(FINT|FVINT|FREAL|FVREAL))
196 #define	isreal(f)	((f)&(FREAL|FVREAL))
197 #define	isint(f)	((f)&(FINT|FVINT))
198 
199 /*
200  * Prototype file size is defined in awksize.h
201  */
202 
203 
204 
205 
206 
207 /*
208  * Awkrun prototype default name
209  */
210 #if defined(DOS)
211 #if defined(__386__)
212 #define	AWK_PROTOTYPE  M_ETCDIR(awkrunf.dos)
213 #define	AWK_LPROTOTYPE M_ETCDIR(awkrunf.dos)
214 #else
215 #define	AWK_PROTOTYPE  M_ETCDIR(awkrun.dos)
216 #define	AWK_LPROTOTYPE M_ETCDIR(awkrunl.dos)
217 #endif
218 #elif defined(OS2)
219 #define	AWK_PROTOTYPE M_ETCDIR(awkrun.os2)
220 #elif defined(NT)
221 #define	AWK_PROTOTYPE M_ETCDIR(awkrun.nt)
222 #else
223 #define	AWK_PROTOTYPE M_ETCDIR(awkrun.mod)
224 #endif
225 
226 /*
227  * This is a kludge that gets around a bug in compact & large
228  * models under DOS.  It also makes the generated
229  * code faster even if there wasn't a bug.  UNIX people: try
230  * to ignore these noisy "near" declarations.
231  */
232 #ifndef	DOS
233 #define	near
234 #endif
235 
236 typedef	wchar_t	near	*LOCCHARP;	/* pointer to local strings */
237 /*
238  * Form of builtin symbols
239  * This should be a union because only one of r_ivalue
240  * and r_svalue is needed, but (alas) unions cannot be
241  * initialised.
242  */
243 typedef	struct	RESERVED {
244 	LOCCHARP	r_name;
245 	int		r_type;		/* Type of node */
246 	INT		r_ivalue;	/* Integer value or wcslen(r_svalue) */
247 	STRING		r_svalue;	/* String value */
248 }	RESERVED;
249 
250 /*
251  * Table of builtin functions.
252  */
253 typedef	struct	RESFUNC {
254 	LOCCHARP	rf_name;
255 	int		rf_type;	/* FUNC || GETLINE */
256 	FUNCTION	rf_func;	/* Function pointer */
257 }	RESFUNC;
258 
259 /*
260  * Structure holding list of open files.
261  */
262 typedef	struct	OFILE	{
263 	ushort_t f_mode;		/* Open mode: WRITE, APPEND, PIPE */
264 	FILE	*f_fp;			/* File pointer if open */
265 	char	*f_name;		/* Remembered file name */
266 }	OFILE;
267 
268 /* Global functions -- awk.y */
269 int	yyparse(void);
270 
271 /* Global functions -- awk1.c */
272 #ifdef __WATCOMC__
273 #pragma aux yyerror aborts;
274 #pragma aux awkerr aborts;
275 #pragma aux awkperr aborts;
276 #endif
277 void	yyerror(char *msg, ...);
278 void	awkerr(char *fmt, ...);
279 void	awkperr(char *fmt, ...);
280 void	uexit(NODE *);
281 int	yylex(void);
282 NODE	*renode(wchar_t *restr);
283 wchar_t	*emalloc(unsigned);
284 wchar_t	*erealloc(wchar_t *, unsigned);
285 
286 /* Global functions -- awk2.c */
287 void	awk(void);
288 void	dobegin(void);
289 void	doend(int status);
290 int	nextrecord(wchar_t *buf, FILE *fp);
291 wchar_t	*defrecord(wchar_t *bp, int lim, FILE *fp);
292 wchar_t	*charrecord(wchar_t *bp, int lim, FILE *fp);
293 wchar_t	*multirecord(wchar_t *bp, int lim, FILE *fp);
294 wchar_t	*whitefield(wchar_t **endp);
295 wchar_t	*blackfield(wchar_t **endp);
296 wchar_t	*refield(wchar_t **endp);
297 void	s_print(NODE *np);
298 void	s_prf(NODE *np);
299 size_t	xprintf(NODE *np, FILE *fp, wchar_t **cp);
300 void	awkclose(OFILE *op);
301 
302 /* Global functions -- awk3.c */
303 void	strassign(NODE *np, STRING string, int flags, size_t length);
304 NODE	*nassign(NODE *np, NODE *value);
305 NODE	*assign(NODE *np, NODE *value);
306 void	delarray(NODE *np);
307 NODE	*node(int type, NODE *left, NODE *right);
308 NODE	*intnode(INT i);
309 NODE	*realnode(REAL r);
310 NODE	*stringnode(STRING str, int aflag, size_t wcslen);
311 NODE	*vlookup(wchar_t *name, int nocreate);
312 NODE	*emptynode(int type, size_t nlength);
313 void	freenode(NODE *np);
314 void	execute(NODE *np);
315 INT	exprint(NODE *np);
316 REAL	exprreal(NODE *np);
317 STRING	exprstring(NODE *np);
318 STRING	strsave(wchar_t *string);
319 NODE	*exprreduce(NODE *np);
320 NODE	*getlist(NODE **npp);
321 NODE	*symwalk(int *buckp, NODE **npp);
322 REGEXP	getregexp(NODE *np);
323 void	addsymtab(NODE *np);
324 void	delsymtab(NODE *np, int fflag);
325 NODE	* finstall(LOCCHARP name, FUNCTION f, int type);
326 void	kinstall(LOCCHARP name, int type);
327 void	fieldsplit(void);
328 void	promote(NODE *);
329 
330 
331 
332 
333 
334 
335 
336 /* Global functions -- awk4.c */
337 NODE	*f_exp(NODE *np);
338 NODE	*f_int(NODE *np);
339 NODE	*f_log(NODE *np);
340 NODE	*f_sqrt(NODE *np);
341 NODE	*f_getline(NODE *np);
342 NODE	*f_index(NODE *np);
343 NODE	*f_length(NODE *np);
344 NODE	*f_split(NODE *np);
345 NODE	*f_sprintf(NODE *np);
346 NODE	*f_substr(NODE *np);
347 NODE	*f_rand(NODE *np);
348 NODE	*f_srand(NODE *np);
349 NODE	*f_sin(NODE *np);
350 NODE	*f_cos(NODE *np);
351 NODE	*f_atan2(NODE *np);
352 NODE	*f_sub(NODE *np);
353 NODE	*f_gsub(NODE *np);
354 NODE	*f_match(NODE *np);
355 NODE	*f_system(NODE *np);
356 NODE	*f_ord(NODE *np);
357 NODE	*f_tolower(NODE *np);
358 NODE	*f_toupper(NODE *np);
359 NODE	*f_close(NODE *np);
360 NODE	*f_asort(NODE *np);
361 
362 /* In awk0.c */
363 
364 
365 
366 extern	wchar_t	_null[];
367 extern	char	r[];
368 extern	char	w[];
369 extern	wchar_t	s_OFMT[];
370 extern	wchar_t	s_CONVFMT[];
371 extern	wchar_t	s_NR[];
372 extern	wchar_t	s_NF[];
373 extern	wchar_t	s_OFS[];
374 extern	wchar_t	s_ORS[];
375 extern	wchar_t	s_RS[];
376 extern	wchar_t	s_FS[];
377 extern	wchar_t	s_FNR[];
378 extern	wchar_t	s_SUBSEP[];
379 extern	wchar_t	s_ARGC[], s_ARGV[], s_ENVIRON[];
380 extern	wchar_t	s_FILENAME[], s_SYMTAB[];
381 extern	wchar_t	s_BEGIN[], s_END[], s_next[];
382 extern	wchar_t	_begin[], _end[];
383 extern	wchar_t	s_exp[], s_getline[], s_index[], s_int[], s_length[], s_log[];
384 extern	wchar_t	s_split[], s_sprintf[], s_sqrt[], s_substr[];
385 extern	wchar_t	s_rand[], s_srand[], s_sin[], s_cos[], s_atan2[];
386 extern	wchar_t	s_sub[], s_gsub[], s_match[], s_system[], s_ord[];
387 extern	wchar_t	s_toupper[], s_tolower[], s_asort[];
388 extern	wchar_t	s_close[];
389 extern	wchar_t	redelim;
390 extern	unsigned char	inprint;
391 extern	unsigned char	funparm;
392 extern	unsigned char	splitdone;
393 extern	uint_t	npattern;
394 extern	uint_t	nfield;
395 extern	uint_t	fcount;
396 extern	uint_t	phase;
397 extern	uint_t	running;
398 extern	uchar_t	catterm;
399 extern	uint_t	lexlast;
400 extern	uint_t	lineno;
401 extern	uchar_t	needsplit, needenviron, doing_begin, begin_getline;
402 extern	ushort_t	slevel;
403 extern	ushort_t	loopexit;
404 extern	wchar_t	radixpoint;
405 extern	REGEXP	resep;
406 extern	RESERVED	reserved[];
407 extern	RESFUNC		resfuncs[];
408 extern	long	NIOSTREAM;	/* Maximum open I/O streams */
409 extern	OFILE	*ofiles;
410 extern	wchar_t	*linebuf;
411 extern	size_t	lbuflen;
412 extern	char	interr[];
413 extern	char	nomem[];
414 extern	NODE	*symtab[NBUCKET];
415 extern	NODE	*yytree;
416 extern	NODE	*freelist;
417 extern	wchar_t	*(*awkrecord)(wchar_t *, int, FILE *);
418 extern	wchar_t	*(*awkfield)(wchar_t **);
419 
420 extern	NODE	*constant;
421 extern	NODE	*const0;
422 extern	NODE	*const1;
423 extern	NODE	*constundef;
424 extern	NODE	*field0;
425 extern	NODE	*incNR;
426 extern	NODE	*incFNR;
427 extern	NODE	*clrFNR;
428 extern	NODE	*ARGVsubi;
429 extern	NODE	*varNR;
430 extern	NODE	*varFNR;
431 extern	NODE	*varNF;
432 extern	NODE	*varOFMT;
433 extern	NODE	*varCONVFMT;
434 extern	NODE	*varOFS;
435 extern	NODE	*varORS;
436 extern	NODE	*varFS;
437 extern	NODE	*varRS;
438 extern	NODE	*varARGC;
439 extern	NODE	*varSUBSEP;
440 extern	NODE	*varENVIRON;
441 extern	NODE	*varSYMTAB;
442 extern	NODE	*varFILENAME;
443 extern	NODE	*curnode;
444 extern	NODE    *inc_oper;
445 extern	NODE	*asn_oper;
446 
447 extern char *mbunconvert(wchar_t *);
448 extern	wchar_t 	*mbstowcsdup(char *);
449 extern	char		*wcstombsdup(wchar_t *);
450 extern	void		awkerr(char *, ...);
451 /*
452  * The following defines the expected max length in chars of a printed number.
453  * This should be the longest expected size for any type of number
454  * ie. float, long etc. This number is used to calculate the approximate
455  * number of chars needed to hold the number.
456  */
457 #ifdef M_NUMSIZE
458 #define	NUMSIZE M_NUMSIZE
459 #else
460 #define	NUMSIZE 30
461 #endif
462 
463 #define	M_MB_L(s)	L##s
464 #ifdef  __STDC__
465 #define	ANSI(x) x
466 #else
467 #define	const
468 #define	signed
469 #define	volatile
470 #define	ANSI(x) ()
471 #endif
472 
473 #define	isWblank(x) (((x) == ' ' || (x) == '\t') ? 1 : 0)
474 
475 
476 /*
477  * Wide character version of regular expression functions.
478  */
479 #define	REGWMATCH_T	int_regwmatch_t
480 #define	REGWCOMP	int_regwcomp
481 #define	REGWEXEC	int_regwexec
482 #define	REGWFREE	int_regwfree
483 #define	REGWERROR	int_regwerror
484 #define	REGWDOSUBA	int_regwdosuba
485 
486 typedef struct {
487 	const wchar_t	*rm_sp, *rm_ep;
488 	regoff_t	rm_so, rm_eo;
489 } int_regwmatch_t;
490 
491 extern int int_regwcomp(REGEXP *, const wchar_t *);
492 extern int int_regwexec(REGEXP, const wchar_t *, size_t,
493 			int_regwmatch_t *, int);
494 extern void int_regwfree(REGEXP);
495 extern size_t int_regwerror(int, REGEXP, char *, size_t);
496 extern int int_regwdosuba(REGEXP, const wchar_t *,
497 			const wchar_t *, wchar_t **, int, int *);
498