xref: /freebsd/contrib/one-true-awk/awk.h (revision 8d457988a72487b35ee3922671775d73169339e3)
12a55deb1SDavid E. O'Brien /****************************************************************
22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997
32a55deb1SDavid E. O'Brien All Rights Reserved
42a55deb1SDavid E. O'Brien 
52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and
62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby
72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all
82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this
92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting
102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of
112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining
122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior
132a55deb1SDavid E. O'Brien permission.
142a55deb1SDavid E. O'Brien 
152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
222a55deb1SDavid E. O'Brien THIS SOFTWARE.
232a55deb1SDavid E. O'Brien ****************************************************************/
242a55deb1SDavid E. O'Brien 
25c263f9bfSRuslan Ermilov #include <assert.h>
26f39dd6a9SWarner Losh #include <stdint.h>
27f39dd6a9SWarner Losh #include <stdbool.h>
28f39dd6a9SWarner Losh #if __STDC_VERSION__ <= 199901L
29f39dd6a9SWarner Losh #define noreturn
30f39dd6a9SWarner Losh #else
31f39dd6a9SWarner Losh #include <stdnoreturn.h>
32f39dd6a9SWarner Losh #endif
33c263f9bfSRuslan Ermilov 
342a55deb1SDavid E. O'Brien typedef double	Awkfloat;
352a55deb1SDavid E. O'Brien 
362a55deb1SDavid E. O'Brien /* unsigned char is more trouble than it's worth */
372a55deb1SDavid E. O'Brien 
382a55deb1SDavid E. O'Brien typedef	unsigned char uschar;
392a55deb1SDavid E. O'Brien 
40f32a6403SWarner Losh #define	xfree(a)	{ free((void *)(intptr_t)(a)); (a) = NULL; }
41f39dd6a9SWarner Losh /*
42f39dd6a9SWarner Losh  * We sometimes cheat writing read-only pointers to NUL-terminate them
43f39dd6a9SWarner Losh  * and then put back the original value
44f39dd6a9SWarner Losh  */
45f39dd6a9SWarner Losh #define setptr(ptr, a)	(*(char *)(intptr_t)(ptr)) = (a)
462a55deb1SDavid E. O'Brien 
47f39dd6a9SWarner Losh #define	NN(p)	((p) ? (p) : "(null)")	/* guaranteed non-null for DPRINTF
48813da98dSDavid E. O'Brien */
492a55deb1SDavid E. O'Brien #define	DEBUG
502a55deb1SDavid E. O'Brien #ifdef	DEBUG
51f39dd6a9SWarner Losh #	define	DPRINTF(...)	if (dbg) printf(__VA_ARGS__)
522a55deb1SDavid E. O'Brien #else
53f39dd6a9SWarner Losh #	define	DPRINTF(...)
542a55deb1SDavid E. O'Brien #endif
552a55deb1SDavid E. O'Brien 
56f39dd6a9SWarner Losh extern enum compile_states {
57f39dd6a9SWarner Losh 	RUNNING,
58f39dd6a9SWarner Losh 	COMPILING,
59f39dd6a9SWarner Losh 	ERROR_PRINTING
60f39dd6a9SWarner Losh } compile_time;
61f39dd6a9SWarner Losh 
62f39dd6a9SWarner Losh extern bool	safe;		/* false => unsafe, true => safe */
632a55deb1SDavid E. O'Brien 
642a55deb1SDavid E. O'Brien #define	RECSIZE	(8 * 1024)	/* sets limit on records, fields, etc., etc. */
652a55deb1SDavid E. O'Brien extern int	recsize;	/* size of current record, orig RECSIZE */
662a55deb1SDavid E. O'Brien 
67f32a6403SWarner Losh extern size_t	awk_mb_cur_max;	/* max size of a multi-byte character */
68f32a6403SWarner Losh 
69f39dd6a9SWarner Losh extern char	EMPTY[];	/* this avoid -Wwritable-strings issues */
702a55deb1SDavid E. O'Brien extern char	**FS;
712a55deb1SDavid E. O'Brien extern char	**RS;
722a55deb1SDavid E. O'Brien extern char	**ORS;
732a55deb1SDavid E. O'Brien extern char	**OFS;
742a55deb1SDavid E. O'Brien extern char	**OFMT;
752a55deb1SDavid E. O'Brien extern Awkfloat *NR;
762a55deb1SDavid E. O'Brien extern Awkfloat *FNR;
772a55deb1SDavid E. O'Brien extern Awkfloat *NF;
782a55deb1SDavid E. O'Brien extern char	**FILENAME;
792a55deb1SDavid E. O'Brien extern char	**SUBSEP;
802a55deb1SDavid E. O'Brien extern Awkfloat *RSTART;
812a55deb1SDavid E. O'Brien extern Awkfloat *RLENGTH;
822a55deb1SDavid E. O'Brien 
83f32a6403SWarner Losh extern bool	CSV;		/* true for csv input */
84f32a6403SWarner Losh 
852a55deb1SDavid E. O'Brien extern char	*record;	/* points to $0 */
862a55deb1SDavid E. O'Brien extern int	lineno;		/* line number in awk program */
872a55deb1SDavid E. O'Brien extern int	errorflag;	/* 1 if error has occurred */
88f39dd6a9SWarner Losh extern bool	donefld;	/* true if record broken into fields */
89f39dd6a9SWarner Losh extern bool	donerec;	/* true if record is valid (no fld has changed */
902a55deb1SDavid E. O'Brien extern int	dbg;
912a55deb1SDavid E. O'Brien 
92f39dd6a9SWarner Losh extern const char *patbeg;	/* beginning of pattern matched */
932a55deb1SDavid E. O'Brien extern	int	patlen;		/* length of pattern matched.  set in b.c */
942a55deb1SDavid E. O'Brien 
952a55deb1SDavid E. O'Brien /* Cell:  all information about a variable or constant */
962a55deb1SDavid E. O'Brien 
972a55deb1SDavid E. O'Brien typedef struct Cell {
982a55deb1SDavid E. O'Brien 	uschar	ctype;		/* OCELL, OBOOL, OJUMP, etc. */
992a55deb1SDavid E. O'Brien 	uschar	csub;		/* CCON, CTEMP, CFLD, etc. */
1002a55deb1SDavid E. O'Brien 	char	*nval;		/* name, for variables only */
1012a55deb1SDavid E. O'Brien 	char	*sval;		/* string value */
1022a55deb1SDavid E. O'Brien 	Awkfloat fval;		/* value as number */
103b5253557SWarner Losh 	int	 tval;		/* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
104b5253557SWarner Losh 	char	*fmt;		/* CONVFMT/OFMT value used to convert from number */
1052a55deb1SDavid E. O'Brien 	struct Cell *cnext;	/* ptr to next if chained */
1062a55deb1SDavid E. O'Brien } Cell;
1072a55deb1SDavid E. O'Brien 
1082a55deb1SDavid E. O'Brien typedef struct Array {		/* symbol table array */
1092a55deb1SDavid E. O'Brien 	int	nelem;		/* elements in table right now */
1102a55deb1SDavid E. O'Brien 	int	size;		/* size of tab */
1112a55deb1SDavid E. O'Brien 	Cell	**tab;		/* hash table pointers */
1122a55deb1SDavid E. O'Brien } Array;
1132a55deb1SDavid E. O'Brien 
1142a55deb1SDavid E. O'Brien #define	NSYMTAB	50	/* initial size of a symbol table */
1152a55deb1SDavid E. O'Brien extern Array	*symtab;
1162a55deb1SDavid E. O'Brien 
1172a55deb1SDavid E. O'Brien extern Cell	*nrloc;		/* NR */
1182a55deb1SDavid E. O'Brien extern Cell	*fnrloc;	/* FNR */
119b5253557SWarner Losh extern Cell	*fsloc;		/* FS */
1202a55deb1SDavid E. O'Brien extern Cell	*nfloc;		/* NF */
121b5253557SWarner Losh extern Cell	*ofsloc;	/* OFS */
122b5253557SWarner Losh extern Cell	*orsloc;	/* ORS */
123b5253557SWarner Losh extern Cell	*rsloc;		/* RS */
1242a55deb1SDavid E. O'Brien extern Cell	*rstartloc;	/* RSTART */
1252a55deb1SDavid E. O'Brien extern Cell	*rlengthloc;	/* RLENGTH */
126b5253557SWarner Losh extern Cell	*subseploc;	/* SUBSEP */
127f39dd6a9SWarner Losh extern Cell	*symtabloc;	/* SYMTAB */
1282a55deb1SDavid E. O'Brien 
1292a55deb1SDavid E. O'Brien /* Cell.tval values: */
1302a55deb1SDavid E. O'Brien #define	NUM	01	/* number value is valid */
1312a55deb1SDavid E. O'Brien #define	STR	02	/* string value is valid */
1322a55deb1SDavid E. O'Brien #define DONTFREE 04	/* string space is not freeable */
1332a55deb1SDavid E. O'Brien #define	CON	010	/* this is a constant */
1342a55deb1SDavid E. O'Brien #define	ARR	020	/* this is an array */
1352a55deb1SDavid E. O'Brien #define	FCN	040	/* this is a function name */
1362a55deb1SDavid E. O'Brien #define FLD	0100	/* this is a field $1, $2, ... */
1372a55deb1SDavid E. O'Brien #define	REC	0200	/* this is $0 */
138b5253557SWarner Losh #define CONVC	0400	/* string was converted from number via CONVFMT */
139b5253557SWarner Losh #define CONVO	01000	/* string was converted from number via OFMT */
1402a55deb1SDavid E. O'Brien 
1412a55deb1SDavid E. O'Brien 
1422a55deb1SDavid E. O'Brien /* function types */
1432a55deb1SDavid E. O'Brien #define	FLENGTH	1
1442a55deb1SDavid E. O'Brien #define	FSQRT	2
1452a55deb1SDavid E. O'Brien #define	FEXP	3
1462a55deb1SDavid E. O'Brien #define	FLOG	4
1472a55deb1SDavid E. O'Brien #define	FINT	5
1482a55deb1SDavid E. O'Brien #define	FSYSTEM	6
1492a55deb1SDavid E. O'Brien #define	FRAND	7
1502a55deb1SDavid E. O'Brien #define	FSRAND	8
1512a55deb1SDavid E. O'Brien #define	FSIN	9
1522a55deb1SDavid E. O'Brien #define	FCOS	10
1532a55deb1SDavid E. O'Brien #define	FATAN	11
1542a55deb1SDavid E. O'Brien #define	FTOUPPER 12
1552a55deb1SDavid E. O'Brien #define	FTOLOWER 13
1562a55deb1SDavid E. O'Brien #define	FFLUSH	14
157eb690a05SWarner Losh #define FAND	15
158eb690a05SWarner Losh #define FFOR	16
159eb690a05SWarner Losh #define FXOR	17
160eb690a05SWarner Losh #define FCOMPL	18
161eb690a05SWarner Losh #define FLSHIFT	19
162eb690a05SWarner Losh #define FRSHIFT	20
163eb690a05SWarner Losh #define FSYSTIME	21
164eb690a05SWarner Losh #define FSTRFTIME	22
165*8d457988SWarner Losh #define FMKTIME	23
1662a55deb1SDavid E. O'Brien 
1672a55deb1SDavid E. O'Brien /* Node:  parse tree is made of nodes, with Cell's at bottom */
1682a55deb1SDavid E. O'Brien 
1692a55deb1SDavid E. O'Brien typedef struct Node {
1702a55deb1SDavid E. O'Brien 	int	ntype;
1712a55deb1SDavid E. O'Brien 	struct	Node *nnext;
1722a55deb1SDavid E. O'Brien 	int	lineno;
1732a55deb1SDavid E. O'Brien 	int	nobj;
1742a55deb1SDavid E. O'Brien 	struct	Node *narg[1];	/* variable: actual size set by calling malloc */
1752a55deb1SDavid E. O'Brien } Node;
1762a55deb1SDavid E. O'Brien 
1772a55deb1SDavid E. O'Brien #define	NIL	((Node *) 0)
1782a55deb1SDavid E. O'Brien 
1792a55deb1SDavid E. O'Brien extern Node	*winner;
1802a55deb1SDavid E. O'Brien extern Node	*nullnode;
1812a55deb1SDavid E. O'Brien 
1822a55deb1SDavid E. O'Brien /* ctypes */
1832a55deb1SDavid E. O'Brien #define OCELL	1
1842a55deb1SDavid E. O'Brien #define OBOOL	2
1852a55deb1SDavid E. O'Brien #define OJUMP	3
1862a55deb1SDavid E. O'Brien 
1872a55deb1SDavid E. O'Brien /* Cell subtypes: csub */
1882a55deb1SDavid E. O'Brien #define	CFREE	7
1892a55deb1SDavid E. O'Brien #define CCOPY	6
1902a55deb1SDavid E. O'Brien #define CCON	5
1912a55deb1SDavid E. O'Brien #define CTEMP	4
1922a55deb1SDavid E. O'Brien #define CNAME	3
1932a55deb1SDavid E. O'Brien #define CVAR	2
1942a55deb1SDavid E. O'Brien #define CFLD	1
1952a55deb1SDavid E. O'Brien #define	CUNK	0
1962a55deb1SDavid E. O'Brien 
1972a55deb1SDavid E. O'Brien /* bool subtypes */
1982a55deb1SDavid E. O'Brien #define BTRUE	11
1992a55deb1SDavid E. O'Brien #define BFALSE	12
2002a55deb1SDavid E. O'Brien 
2012a55deb1SDavid E. O'Brien /* jump subtypes */
2022a55deb1SDavid E. O'Brien #define JEXIT	21
2032a55deb1SDavid E. O'Brien #define JNEXT	22
2042a55deb1SDavid E. O'Brien #define	JBREAK	23
2052a55deb1SDavid E. O'Brien #define	JCONT	24
2062a55deb1SDavid E. O'Brien #define	JRET	25
2072a55deb1SDavid E. O'Brien #define	JNEXTFILE	26
2082a55deb1SDavid E. O'Brien 
2092a55deb1SDavid E. O'Brien /* node types */
2102a55deb1SDavid E. O'Brien #define NVALUE	1
2112a55deb1SDavid E. O'Brien #define NSTAT	2
2122a55deb1SDavid E. O'Brien #define NEXPR	3
2132a55deb1SDavid E. O'Brien 
2142a55deb1SDavid E. O'Brien 
2152a55deb1SDavid E. O'Brien extern	int	pairstack[], paircnt;
2162a55deb1SDavid E. O'Brien 
2172a55deb1SDavid E. O'Brien #define notlegal(n)	(n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc)
2182a55deb1SDavid E. O'Brien #define isvalue(n)	((n)->ntype == NVALUE)
2192a55deb1SDavid E. O'Brien #define isexpr(n)	((n)->ntype == NEXPR)
2202a55deb1SDavid E. O'Brien #define isjump(n)	((n)->ctype == OJUMP)
2212a55deb1SDavid E. O'Brien #define isexit(n)	((n)->csub == JEXIT)
2222a55deb1SDavid E. O'Brien #define	isbreak(n)	((n)->csub == JBREAK)
2232a55deb1SDavid E. O'Brien #define	iscont(n)	((n)->csub == JCONT)
2242a55deb1SDavid E. O'Brien #define	isnext(n)	((n)->csub == JNEXT || (n)->csub == JNEXTFILE)
2252a55deb1SDavid E. O'Brien #define	isret(n)	((n)->csub == JRET)
2262a55deb1SDavid E. O'Brien #define isrec(n)	((n)->tval & REC)
2272a55deb1SDavid E. O'Brien #define isfld(n)	((n)->tval & FLD)
2282a55deb1SDavid E. O'Brien #define isstr(n)	((n)->tval & STR)
2292a55deb1SDavid E. O'Brien #define isnum(n)	((n)->tval & NUM)
2302a55deb1SDavid E. O'Brien #define isarr(n)	((n)->tval & ARR)
2312a55deb1SDavid E. O'Brien #define isfcn(n)	((n)->tval & FCN)
2322a55deb1SDavid E. O'Brien #define istrue(n)	((n)->csub == BTRUE)
2332a55deb1SDavid E. O'Brien #define istemp(n)	((n)->csub == CTEMP)
2342a55deb1SDavid E. O'Brien #define	isargument(n)	((n)->nobj == ARG)
2352a55deb1SDavid E. O'Brien /* #define freeable(p)	(!((p)->tval & DONTFREE)) */
2362a55deb1SDavid E. O'Brien #define freeable(p)	( ((p)->tval & (STR|DONTFREE)) == STR )
2372a55deb1SDavid E. O'Brien 
2382a55deb1SDavid E. O'Brien /* structures used by regular expression matching machinery, mostly b.c: */
2392a55deb1SDavid E. O'Brien 
240f32a6403SWarner Losh #define NCHARS	(1256+3)		/* 256 handles 8-bit chars; 128 does 7-bit */
241f32a6403SWarner Losh 				/* BUG: some overflows (caught) if we use 256 */
2422a55deb1SDavid E. O'Brien 				/* watch out in match(), etc. */
243f39dd6a9SWarner Losh #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
2442a55deb1SDavid E. O'Brien #define NSTATES	32
2452a55deb1SDavid E. O'Brien 
2462a55deb1SDavid E. O'Brien typedef struct rrow {
2472a55deb1SDavid E. O'Brien 	long	ltype;	/* long avoids pointer warnings on 64-bit */
2482a55deb1SDavid E. O'Brien 	union {
2492a55deb1SDavid E. O'Brien 		int i;
2502a55deb1SDavid E. O'Brien 		Node *np;
2512a55deb1SDavid E. O'Brien 		uschar *up;
252f32a6403SWarner Losh 		int *rp; /* rune representation of char class */
2532a55deb1SDavid E. O'Brien 	} lval;		/* because Al stores a pointer in it! */
2542a55deb1SDavid E. O'Brien 	int	*lfollow;
2552a55deb1SDavid E. O'Brien } rrow;
2562a55deb1SDavid E. O'Brien 
257f32a6403SWarner Losh typedef struct gtte { /* gototab entry */
258f32a6403SWarner Losh 	unsigned int ch;
259f32a6403SWarner Losh 	unsigned int state;
260f32a6403SWarner Losh } gtte;
261f32a6403SWarner Losh 
262f32a6403SWarner Losh typedef struct gtt {	/* gototab */
263f32a6403SWarner Losh 	size_t	allocated;
264f32a6403SWarner Losh 	size_t	inuse;
265f32a6403SWarner Losh 	gtte	*entries;
266f32a6403SWarner Losh } gtt;
267f32a6403SWarner Losh 
2682a55deb1SDavid E. O'Brien typedef struct fa {
269f32a6403SWarner Losh 	gtt	*gototab;
270f39dd6a9SWarner Losh 	uschar	*out;
2712a55deb1SDavid E. O'Brien 	uschar	*restr;
272f39dd6a9SWarner Losh 	int	**posns;
273f39dd6a9SWarner Losh 	int	state_count;
274f39dd6a9SWarner Losh 	bool	anchor;
2752a55deb1SDavid E. O'Brien 	int	use;
2762a55deb1SDavid E. O'Brien 	int	initstat;
2772a55deb1SDavid E. O'Brien 	int	curstat;
2782a55deb1SDavid E. O'Brien 	int	accept;
2792a55deb1SDavid E. O'Brien 	struct	rrow re[1];	/* variable: actual size set by calling malloc */
2802a55deb1SDavid E. O'Brien } fa;
2812a55deb1SDavid E. O'Brien 
2822a55deb1SDavid E. O'Brien 
2832a55deb1SDavid E. O'Brien #include "proto.h"
284