xref: /freebsd/contrib/one-true-awk/lex.c (revision dd78d987cb38ef162d40aad86229f1dc19884f78)
12a55deb1SDavid E. O'Brien /****************************************************************
22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997
32a55deb1SDavid E. O'Brien All Rights Reserved
42a55deb1SDavid E. O'Brien 
52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and
62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby
72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all
82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this
92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting
102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of
112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining
122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior
132a55deb1SDavid E. O'Brien permission.
142a55deb1SDavid E. O'Brien 
152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
222a55deb1SDavid E. O'Brien THIS SOFTWARE.
232a55deb1SDavid E. O'Brien ****************************************************************/
242a55deb1SDavid E. O'Brien 
252a55deb1SDavid E. O'Brien #include <stdio.h>
262a55deb1SDavid E. O'Brien #include <stdlib.h>
272a55deb1SDavid E. O'Brien #include <string.h>
282a55deb1SDavid E. O'Brien #include <ctype.h>
292a55deb1SDavid E. O'Brien #include "awk.h"
30f39dd6a9SWarner Losh #include "awkgram.tab.h"
312a55deb1SDavid E. O'Brien 
322a55deb1SDavid E. O'Brien extern YYSTYPE	yylval;
33f39dd6a9SWarner Losh extern bool	infunc;
342a55deb1SDavid E. O'Brien 
352a55deb1SDavid E. O'Brien int	lineno	= 1;
362a55deb1SDavid E. O'Brien int	bracecnt = 0;
372a55deb1SDavid E. O'Brien int	brackcnt  = 0;
382a55deb1SDavid E. O'Brien int	parencnt = 0;
392a55deb1SDavid E. O'Brien 
402a55deb1SDavid E. O'Brien typedef struct Keyword {
41813da98dSDavid E. O'Brien 	const char *word;
422a55deb1SDavid E. O'Brien 	int	sub;
432a55deb1SDavid E. O'Brien 	int	type;
442a55deb1SDavid E. O'Brien } Keyword;
452a55deb1SDavid E. O'Brien 
46f39dd6a9SWarner Losh const Keyword keywords[] = {	/* keep sorted: binary searched */
472a55deb1SDavid E. O'Brien 	{ "BEGIN",	XBEGIN,		XBEGIN },
482a55deb1SDavid E. O'Brien 	{ "END",	XEND,		XEND },
492a55deb1SDavid E. O'Brien 	{ "NF",		VARNF,		VARNF },
50eb690a05SWarner Losh 	{ "and",	FAND,		BLTIN },
512a55deb1SDavid E. O'Brien 	{ "atan2",	FATAN,		BLTIN },
522a55deb1SDavid E. O'Brien 	{ "break",	BREAK,		BREAK },
532a55deb1SDavid E. O'Brien 	{ "close",	CLOSE,		CLOSE },
54eb690a05SWarner Losh 	{ "compl",	FCOMPL,		BLTIN },
552a55deb1SDavid E. O'Brien 	{ "continue",	CONTINUE,	CONTINUE },
562a55deb1SDavid E. O'Brien 	{ "cos",	FCOS,		BLTIN },
572a55deb1SDavid E. O'Brien 	{ "delete",	DELETE,		DELETE },
582a55deb1SDavid E. O'Brien 	{ "do",		DO,		DO },
592a55deb1SDavid E. O'Brien 	{ "else",	ELSE,		ELSE },
602a55deb1SDavid E. O'Brien 	{ "exit",	EXIT,		EXIT },
612a55deb1SDavid E. O'Brien 	{ "exp",	FEXP,		BLTIN },
622a55deb1SDavid E. O'Brien 	{ "fflush",	FFLUSH,		BLTIN },
632a55deb1SDavid E. O'Brien 	{ "for",	FOR,		FOR },
642a55deb1SDavid E. O'Brien 	{ "func",	FUNC,		FUNC },
652a55deb1SDavid E. O'Brien 	{ "function",	FUNC,		FUNC },
66eb690a05SWarner Losh 	{ "gensub",	GENSUB,		GENSUB },
672a55deb1SDavid E. O'Brien 	{ "getline",	GETLINE,	GETLINE },
682a55deb1SDavid E. O'Brien 	{ "gsub",	GSUB,		GSUB },
692a55deb1SDavid E. O'Brien 	{ "if",		IF,		IF },
702a55deb1SDavid E. O'Brien 	{ "in",		IN,		IN },
712a55deb1SDavid E. O'Brien 	{ "index",	INDEX,		INDEX },
722a55deb1SDavid E. O'Brien 	{ "int",	FINT,		BLTIN },
732a55deb1SDavid E. O'Brien 	{ "length",	FLENGTH,	BLTIN },
742a55deb1SDavid E. O'Brien 	{ "log",	FLOG,		BLTIN },
75eb690a05SWarner Losh 	{ "lshift",	FLSHIFT,	BLTIN },
762a55deb1SDavid E. O'Brien 	{ "match",	MATCHFCN,	MATCHFCN },
77*8d457988SWarner Losh 	{ "mktime",	FMKTIME,	BLTIN },
782a55deb1SDavid E. O'Brien 	{ "next",	NEXT,		NEXT },
792a55deb1SDavid E. O'Brien 	{ "nextfile",	NEXTFILE,	NEXTFILE },
80eb690a05SWarner Losh 	{ "or",		FFOR,		BLTIN },
812a55deb1SDavid E. O'Brien 	{ "print",	PRINT,		PRINT },
822a55deb1SDavid E. O'Brien 	{ "printf",	PRINTF,		PRINTF },
832a55deb1SDavid E. O'Brien 	{ "rand",	FRAND,		BLTIN },
842a55deb1SDavid E. O'Brien 	{ "return",	RETURN,		RETURN },
85eb690a05SWarner Losh 	{ "rshift",	FRSHIFT,	BLTIN },
862a55deb1SDavid E. O'Brien 	{ "sin",	FSIN,		BLTIN },
872a55deb1SDavid E. O'Brien 	{ "split",	SPLIT,		SPLIT },
882a55deb1SDavid E. O'Brien 	{ "sprintf",	SPRINTF,	SPRINTF },
892a55deb1SDavid E. O'Brien 	{ "sqrt",	FSQRT,		BLTIN },
902a55deb1SDavid E. O'Brien 	{ "srand",	FSRAND,		BLTIN },
91eb690a05SWarner Losh 	{ "strftime",	FSTRFTIME,	BLTIN },
922a55deb1SDavid E. O'Brien 	{ "sub",	SUB,		SUB },
932a55deb1SDavid E. O'Brien 	{ "substr",	SUBSTR,		SUBSTR },
942a55deb1SDavid E. O'Brien 	{ "system",	FSYSTEM,	BLTIN },
95eb690a05SWarner Losh 	{ "systime",	FSYSTIME,	BLTIN },
962a55deb1SDavid E. O'Brien 	{ "tolower",	FTOLOWER,	BLTIN },
972a55deb1SDavid E. O'Brien 	{ "toupper",	FTOUPPER,	BLTIN },
982a55deb1SDavid E. O'Brien 	{ "while",	WHILE,		WHILE },
99eb690a05SWarner Losh 	{ "xor",	FXOR,		BLTIN },
1002a55deb1SDavid E. O'Brien };
1012a55deb1SDavid E. O'Brien 
1022a55deb1SDavid E. O'Brien #define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
1032a55deb1SDavid E. O'Brien 
peek(void)104f39dd6a9SWarner Losh static int peek(void)
1052a55deb1SDavid E. O'Brien {
1062a55deb1SDavid E. O'Brien 	int c = input();
1072a55deb1SDavid E. O'Brien 	unput(c);
1082a55deb1SDavid E. O'Brien 	return c;
1092a55deb1SDavid E. O'Brien }
1102a55deb1SDavid E. O'Brien 
gettok(char ** pbuf,int * psz)111f39dd6a9SWarner Losh static int gettok(char **pbuf, int *psz)	/* get next input token */
1122a55deb1SDavid E. O'Brien {
113007c6572SDag-Erling Smørgrav 	int c, retc;
1142a55deb1SDavid E. O'Brien 	char *buf = *pbuf;
1152a55deb1SDavid E. O'Brien 	int sz = *psz;
1162a55deb1SDavid E. O'Brien 	char *bp = buf;
1172a55deb1SDavid E. O'Brien 
1182a55deb1SDavid E. O'Brien 	c = input();
1192a55deb1SDavid E. O'Brien 	if (c == 0)
1202a55deb1SDavid E. O'Brien 		return 0;
1212a55deb1SDavid E. O'Brien 	buf[0] = c;
1222a55deb1SDavid E. O'Brien 	buf[1] = 0;
1232a55deb1SDavid E. O'Brien 	if (!isalnum(c) && c != '.' && c != '_')
1242a55deb1SDavid E. O'Brien 		return c;
1252a55deb1SDavid E. O'Brien 
1262a55deb1SDavid E. O'Brien 	*bp++ = c;
1272a55deb1SDavid E. O'Brien 	if (isalpha(c) || c == '_') {	/* it's a varname */
1282a55deb1SDavid E. O'Brien 		for ( ; (c = input()) != 0; ) {
1292a55deb1SDavid E. O'Brien 			if (bp-buf >= sz)
130addad6afSRong-En Fan 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1312a55deb1SDavid E. O'Brien 					FATAL( "out of space for name %.10s...", buf );
1322a55deb1SDavid E. O'Brien 			if (isalnum(c) || c == '_')
1332a55deb1SDavid E. O'Brien 				*bp++ = c;
1342a55deb1SDavid E. O'Brien 			else {
1352a55deb1SDavid E. O'Brien 				*bp = 0;
1362a55deb1SDavid E. O'Brien 				unput(c);
1372a55deb1SDavid E. O'Brien 				break;
1382a55deb1SDavid E. O'Brien 			}
1392a55deb1SDavid E. O'Brien 		}
1402a55deb1SDavid E. O'Brien 		*bp = 0;
141007c6572SDag-Erling Smørgrav 		retc = 'a';	/* alphanumeric */
142c263f9bfSRuslan Ermilov 	} else {	/* maybe it's a number, but could be . */
1432a55deb1SDavid E. O'Brien 		char *rem;
1442a55deb1SDavid E. O'Brien 		/* read input until can't be a number */
1452a55deb1SDavid E. O'Brien 		for ( ; (c = input()) != 0; ) {
1462a55deb1SDavid E. O'Brien 			if (bp-buf >= sz)
147addad6afSRong-En Fan 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1482a55deb1SDavid E. O'Brien 					FATAL( "out of space for number %.10s...", buf );
1492a55deb1SDavid E. O'Brien 			if (isdigit(c) || c == 'e' || c == 'E'
1502a55deb1SDavid E. O'Brien 			  || c == '.' || c == '+' || c == '-')
1512a55deb1SDavid E. O'Brien 				*bp++ = c;
1522a55deb1SDavid E. O'Brien 			else {
1532a55deb1SDavid E. O'Brien 				unput(c);
1542a55deb1SDavid E. O'Brien 				break;
1552a55deb1SDavid E. O'Brien 			}
1562a55deb1SDavid E. O'Brien 		}
1572a55deb1SDavid E. O'Brien 		*bp = 0;
1582a55deb1SDavid E. O'Brien 		strtod(buf, &rem);	/* parse the number */
159007c6572SDag-Erling Smørgrav 		if (rem == buf) {	/* it wasn't a valid number at all */
160c263f9bfSRuslan Ermilov 			buf[1] = 0;	/* return one character as token */
161f39dd6a9SWarner Losh 			retc = (uschar)buf[0];	/* character is its own type */
162c263f9bfSRuslan Ermilov 			unputstr(rem+1); /* put rest back for later */
163007c6572SDag-Erling Smørgrav 		} else {	/* some prefix was a number */
164c263f9bfSRuslan Ermilov 			unputstr(rem);	/* put rest back for later */
165c263f9bfSRuslan Ermilov 			rem[0] = 0;	/* truncate buf after number part */
166c263f9bfSRuslan Ermilov 			retc = '0';	/* type is number */
167007c6572SDag-Erling Smørgrav 		}
1682a55deb1SDavid E. O'Brien 	}
1692a55deb1SDavid E. O'Brien 	*pbuf = buf;
1702a55deb1SDavid E. O'Brien 	*psz = sz;
171007c6572SDag-Erling Smørgrav 	return retc;
1722a55deb1SDavid E. O'Brien }
1732a55deb1SDavid E. O'Brien 
1742a55deb1SDavid E. O'Brien int	word(char *);
1752a55deb1SDavid E. O'Brien int	string(void);
1762a55deb1SDavid E. O'Brien int	regexpr(void);
177f39dd6a9SWarner Losh bool	sc	= false;	/* true => return a } right now */
178f39dd6a9SWarner Losh bool	reg	= false;	/* true => return a REGEXPR now */
1792a55deb1SDavid E. O'Brien 
yylex(void)1802a55deb1SDavid E. O'Brien int yylex(void)
1812a55deb1SDavid E. O'Brien {
1822a55deb1SDavid E. O'Brien 	int c;
18310ce5b99SWarner Losh 	static char *buf = NULL;
184addad6afSRong-En Fan 	static int bufsize = 5; /* BUG: setting this small causes core dump! */
1852a55deb1SDavid E. O'Brien 
18610ce5b99SWarner Losh 	if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
1872a55deb1SDavid E. O'Brien 		FATAL( "out of space in yylex" );
1882a55deb1SDavid E. O'Brien 	if (sc) {
189f39dd6a9SWarner Losh 		sc = false;
1902a55deb1SDavid E. O'Brien 		RET('}');
1912a55deb1SDavid E. O'Brien 	}
1922a55deb1SDavid E. O'Brien 	if (reg) {
193f39dd6a9SWarner Losh 		reg = false;
1942a55deb1SDavid E. O'Brien 		return regexpr();
1952a55deb1SDavid E. O'Brien 	}
1962a55deb1SDavid E. O'Brien 	for (;;) {
1972a55deb1SDavid E. O'Brien 		c = gettok(&buf, &bufsize);
1982a55deb1SDavid E. O'Brien 		if (c == 0)
1992a55deb1SDavid E. O'Brien 			return 0;
2002a55deb1SDavid E. O'Brien 		if (isalpha(c) || c == '_')
2012a55deb1SDavid E. O'Brien 			return word(buf);
202007c6572SDag-Erling Smørgrav 		if (isdigit(c)) {
203f39dd6a9SWarner Losh 			char *cp = tostring(buf);
204f39dd6a9SWarner Losh 			double result;
205f39dd6a9SWarner Losh 
206f39dd6a9SWarner Losh 			if (is_number(cp, & result))
207f39dd6a9SWarner Losh 				yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
208f39dd6a9SWarner Losh 			else
209f39dd6a9SWarner Losh 				yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
210f39dd6a9SWarner Losh 			free(cp);
2112a55deb1SDavid E. O'Brien 			/* should this also have STR set? */
2122a55deb1SDavid E. O'Brien 			RET(NUMBER);
2132a55deb1SDavid E. O'Brien 		}
2142a55deb1SDavid E. O'Brien 
2152a55deb1SDavid E. O'Brien 		yylval.i = c;
2162a55deb1SDavid E. O'Brien 		switch (c) {
2172a55deb1SDavid E. O'Brien 		case '\n':	/* {EOL} */
218b5253557SWarner Losh 			lineno++;
2192a55deb1SDavid E. O'Brien 			RET(NL);
2202a55deb1SDavid E. O'Brien 		case '\r':	/* assume \n is coming */
2212a55deb1SDavid E. O'Brien 		case ' ':	/* {WS}+ */
2222a55deb1SDavid E. O'Brien 		case '\t':
2232a55deb1SDavid E. O'Brien 			break;
2242a55deb1SDavid E. O'Brien 		case '#':	/* #.* strip comments */
2252a55deb1SDavid E. O'Brien 			while ((c = input()) != '\n' && c != 0)
2262a55deb1SDavid E. O'Brien 				;
2272a55deb1SDavid E. O'Brien 			unput(c);
2282a55deb1SDavid E. O'Brien 			break;
2292a55deb1SDavid E. O'Brien 		case ';':
2302a55deb1SDavid E. O'Brien 			RET(';');
2312a55deb1SDavid E. O'Brien 		case '\\':
2322a55deb1SDavid E. O'Brien 			if (peek() == '\n') {
2332a55deb1SDavid E. O'Brien 				input();
234b5253557SWarner Losh 				lineno++;
2352a55deb1SDavid E. O'Brien 			} else if (peek() == '\r') {
2362a55deb1SDavid E. O'Brien 				input(); input();	/* \n */
2372a55deb1SDavid E. O'Brien 				lineno++;
2382a55deb1SDavid E. O'Brien 			} else {
2392a55deb1SDavid E. O'Brien 				RET(c);
2402a55deb1SDavid E. O'Brien 			}
2412a55deb1SDavid E. O'Brien 			break;
2422a55deb1SDavid E. O'Brien 		case '&':
2432a55deb1SDavid E. O'Brien 			if (peek() == '&') {
2442a55deb1SDavid E. O'Brien 				input(); RET(AND);
2452a55deb1SDavid E. O'Brien 			} else
2462a55deb1SDavid E. O'Brien 				RET('&');
2472a55deb1SDavid E. O'Brien 		case '|':
2482a55deb1SDavid E. O'Brien 			if (peek() == '|') {
2492a55deb1SDavid E. O'Brien 				input(); RET(BOR);
2502a55deb1SDavid E. O'Brien 			} else
2512a55deb1SDavid E. O'Brien 				RET('|');
2522a55deb1SDavid E. O'Brien 		case '!':
2532a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2542a55deb1SDavid E. O'Brien 				input(); yylval.i = NE; RET(NE);
2552a55deb1SDavid E. O'Brien 			} else if (peek() == '~') {
2562a55deb1SDavid E. O'Brien 				input(); yylval.i = NOTMATCH; RET(MATCHOP);
2572a55deb1SDavid E. O'Brien 			} else
2582a55deb1SDavid E. O'Brien 				RET(NOT);
2592a55deb1SDavid E. O'Brien 		case '~':
2602a55deb1SDavid E. O'Brien 			yylval.i = MATCH;
2612a55deb1SDavid E. O'Brien 			RET(MATCHOP);
2622a55deb1SDavid E. O'Brien 		case '<':
2632a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2642a55deb1SDavid E. O'Brien 				input(); yylval.i = LE; RET(LE);
2652a55deb1SDavid E. O'Brien 			} else {
2662a55deb1SDavid E. O'Brien 				yylval.i = LT; RET(LT);
2672a55deb1SDavid E. O'Brien 			}
2682a55deb1SDavid E. O'Brien 		case '=':
2692a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2702a55deb1SDavid E. O'Brien 				input(); yylval.i = EQ; RET(EQ);
2712a55deb1SDavid E. O'Brien 			} else {
2722a55deb1SDavid E. O'Brien 				yylval.i = ASSIGN; RET(ASGNOP);
2732a55deb1SDavid E. O'Brien 			}
2742a55deb1SDavid E. O'Brien 		case '>':
2752a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2762a55deb1SDavid E. O'Brien 				input(); yylval.i = GE; RET(GE);
2772a55deb1SDavid E. O'Brien 			} else if (peek() == '>') {
2782a55deb1SDavid E. O'Brien 				input(); yylval.i = APPEND; RET(APPEND);
2792a55deb1SDavid E. O'Brien 			} else {
2802a55deb1SDavid E. O'Brien 				yylval.i = GT; RET(GT);
2812a55deb1SDavid E. O'Brien 			}
2822a55deb1SDavid E. O'Brien 		case '+':
2832a55deb1SDavid E. O'Brien 			if (peek() == '+') {
2842a55deb1SDavid E. O'Brien 				input(); yylval.i = INCR; RET(INCR);
2852a55deb1SDavid E. O'Brien 			} else if (peek() == '=') {
2862a55deb1SDavid E. O'Brien 				input(); yylval.i = ADDEQ; RET(ASGNOP);
2872a55deb1SDavid E. O'Brien 			} else
2882a55deb1SDavid E. O'Brien 				RET('+');
2892a55deb1SDavid E. O'Brien 		case '-':
2902a55deb1SDavid E. O'Brien 			if (peek() == '-') {
2912a55deb1SDavid E. O'Brien 				input(); yylval.i = DECR; RET(DECR);
2922a55deb1SDavid E. O'Brien 			} else if (peek() == '=') {
2932a55deb1SDavid E. O'Brien 				input(); yylval.i = SUBEQ; RET(ASGNOP);
2942a55deb1SDavid E. O'Brien 			} else
2952a55deb1SDavid E. O'Brien 				RET('-');
2962a55deb1SDavid E. O'Brien 		case '*':
2972a55deb1SDavid E. O'Brien 			if (peek() == '=') {	/* *= */
2982a55deb1SDavid E. O'Brien 				input(); yylval.i = MULTEQ; RET(ASGNOP);
2992a55deb1SDavid E. O'Brien 			} else if (peek() == '*') {	/* ** or **= */
3002a55deb1SDavid E. O'Brien 				input();	/* eat 2nd * */
3012a55deb1SDavid E. O'Brien 				if (peek() == '=') {
3022a55deb1SDavid E. O'Brien 					input(); yylval.i = POWEQ; RET(ASGNOP);
3032a55deb1SDavid E. O'Brien 				} else {
3042a55deb1SDavid E. O'Brien 					RET(POWER);
3052a55deb1SDavid E. O'Brien 				}
3062a55deb1SDavid E. O'Brien 			} else
3072a55deb1SDavid E. O'Brien 				RET('*');
3082a55deb1SDavid E. O'Brien 		case '/':
3092a55deb1SDavid E. O'Brien 			RET('/');
3102a55deb1SDavid E. O'Brien 		case '%':
3112a55deb1SDavid E. O'Brien 			if (peek() == '=') {
3122a55deb1SDavid E. O'Brien 				input(); yylval.i = MODEQ; RET(ASGNOP);
3132a55deb1SDavid E. O'Brien 			} else
3142a55deb1SDavid E. O'Brien 				RET('%');
3152a55deb1SDavid E. O'Brien 		case '^':
3162a55deb1SDavid E. O'Brien 			if (peek() == '=') {
3172a55deb1SDavid E. O'Brien 				input(); yylval.i = POWEQ; RET(ASGNOP);
3182a55deb1SDavid E. O'Brien 			} else
3192a55deb1SDavid E. O'Brien 				RET(POWER);
3202a55deb1SDavid E. O'Brien 
3212a55deb1SDavid E. O'Brien 		case '$':
3222a55deb1SDavid E. O'Brien 			/* BUG: awkward, if not wrong */
3232a55deb1SDavid E. O'Brien 			c = gettok(&buf, &bufsize);
3242a55deb1SDavid E. O'Brien 			if (isalpha(c)) {
3252a55deb1SDavid E. O'Brien 				if (strcmp(buf, "NF") == 0) {	/* very special */
3262a55deb1SDavid E. O'Brien 					unputstr("(NF)");
3272a55deb1SDavid E. O'Brien 					RET(INDIRECT);
3282a55deb1SDavid E. O'Brien 				}
3292a55deb1SDavid E. O'Brien 				c = peek();
3302a55deb1SDavid E. O'Brien 				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
3312a55deb1SDavid E. O'Brien 					unputstr(buf);
3322a55deb1SDavid E. O'Brien 					RET(INDIRECT);
3332a55deb1SDavid E. O'Brien 				}
3342a55deb1SDavid E. O'Brien 				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
3352a55deb1SDavid E. O'Brien 				RET(IVAR);
336007c6572SDag-Erling Smørgrav 			} else if (c == 0) {	/*  */
337007c6572SDag-Erling Smørgrav 				SYNTAX( "unexpected end of input after $" );
338007c6572SDag-Erling Smørgrav 				RET(';');
3392a55deb1SDavid E. O'Brien 			} else {
3402a55deb1SDavid E. O'Brien 				unputstr(buf);
3412a55deb1SDavid E. O'Brien 				RET(INDIRECT);
3422a55deb1SDavid E. O'Brien 			}
3432a55deb1SDavid E. O'Brien 
3442a55deb1SDavid E. O'Brien 		case '}':
3452a55deb1SDavid E. O'Brien 			if (--bracecnt < 0)
3462a55deb1SDavid E. O'Brien 				SYNTAX( "extra }" );
347f39dd6a9SWarner Losh 			sc = true;
3482a55deb1SDavid E. O'Brien 			RET(';');
3492a55deb1SDavid E. O'Brien 		case ']':
3502a55deb1SDavid E. O'Brien 			if (--brackcnt < 0)
3512a55deb1SDavid E. O'Brien 				SYNTAX( "extra ]" );
3522a55deb1SDavid E. O'Brien 			RET(']');
3532a55deb1SDavid E. O'Brien 		case ')':
3542a55deb1SDavid E. O'Brien 			if (--parencnt < 0)
3552a55deb1SDavid E. O'Brien 				SYNTAX( "extra )" );
3562a55deb1SDavid E. O'Brien 			RET(')');
3572a55deb1SDavid E. O'Brien 		case '{':
3582a55deb1SDavid E. O'Brien 			bracecnt++;
3592a55deb1SDavid E. O'Brien 			RET('{');
3602a55deb1SDavid E. O'Brien 		case '[':
3612a55deb1SDavid E. O'Brien 			brackcnt++;
3622a55deb1SDavid E. O'Brien 			RET('[');
3632a55deb1SDavid E. O'Brien 		case '(':
3642a55deb1SDavid E. O'Brien 			parencnt++;
3652a55deb1SDavid E. O'Brien 			RET('(');
3662a55deb1SDavid E. O'Brien 
3672a55deb1SDavid E. O'Brien 		case '"':
3682a55deb1SDavid E. O'Brien 			return string();	/* BUG: should be like tran.c ? */
3692a55deb1SDavid E. O'Brien 
3702a55deb1SDavid E. O'Brien 		default:
3712a55deb1SDavid E. O'Brien 			RET(c);
3722a55deb1SDavid E. O'Brien 		}
3732a55deb1SDavid E. O'Brien 	}
3742a55deb1SDavid E. O'Brien }
3752a55deb1SDavid E. O'Brien 
376f32a6403SWarner Losh extern int runetochar(char *str, int c);
377f32a6403SWarner Losh 
string(void)3782a55deb1SDavid E. O'Brien int string(void)
3792a55deb1SDavid E. O'Brien {
3802a55deb1SDavid E. O'Brien 	int c, n;
3812a55deb1SDavid E. O'Brien 	char *s, *bp;
38210ce5b99SWarner Losh 	static char *buf = NULL;
3832a55deb1SDavid E. O'Brien 	static int bufsz = 500;
3842a55deb1SDavid E. O'Brien 
38510ce5b99SWarner Losh 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
3862a55deb1SDavid E. O'Brien 		FATAL("out of space for strings");
3872a55deb1SDavid E. O'Brien 	for (bp = buf; (c = input()) != '"'; ) {
388addad6afSRong-En Fan 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
3892a55deb1SDavid E. O'Brien 			FATAL("out of space for string %.10s...", buf);
3902a55deb1SDavid E. O'Brien 		switch (c) {
3912a55deb1SDavid E. O'Brien 		case '\n':
3922a55deb1SDavid E. O'Brien 		case '\r':
3932a55deb1SDavid E. O'Brien 		case 0:
394b5253557SWarner Losh 			*bp = '\0';
3952a55deb1SDavid E. O'Brien 			SYNTAX( "non-terminated string %.10s...", buf );
396007c6572SDag-Erling Smørgrav 			if (c == 0)	/* hopeless */
397007c6572SDag-Erling Smørgrav 				FATAL( "giving up" );
398b5253557SWarner Losh 			lineno++;
3992a55deb1SDavid E. O'Brien 			break;
4002a55deb1SDavid E. O'Brien 		case '\\':
4012a55deb1SDavid E. O'Brien 			c = input();
4022a55deb1SDavid E. O'Brien 			switch (c) {
403f39dd6a9SWarner Losh 			case '\n': break;
4042a55deb1SDavid E. O'Brien 			case '"': *bp++ = '"'; break;
4052a55deb1SDavid E. O'Brien 			case 'n': *bp++ = '\n'; break;
4062a55deb1SDavid E. O'Brien 			case 't': *bp++ = '\t'; break;
4072a55deb1SDavid E. O'Brien 			case 'f': *bp++ = '\f'; break;
4082a55deb1SDavid E. O'Brien 			case 'r': *bp++ = '\r'; break;
4092a55deb1SDavid E. O'Brien 			case 'b': *bp++ = '\b'; break;
4102a55deb1SDavid E. O'Brien 			case 'v': *bp++ = '\v'; break;
411f39dd6a9SWarner Losh 			case 'a': *bp++ = '\a'; break;
4122a55deb1SDavid E. O'Brien 			case '\\': *bp++ = '\\'; break;
4132a55deb1SDavid E. O'Brien 
4142a55deb1SDavid E. O'Brien 			case '0': case '1': case '2': /* octal: \d \dd \ddd */
4152a55deb1SDavid E. O'Brien 			case '3': case '4': case '5': case '6': case '7':
4162a55deb1SDavid E. O'Brien 				n = c - '0';
4172a55deb1SDavid E. O'Brien 				if ((c = peek()) >= '0' && c < '8') {
4182a55deb1SDavid E. O'Brien 					n = 8 * n + input() - '0';
4192a55deb1SDavid E. O'Brien 					if ((c = peek()) >= '0' && c < '8')
4202a55deb1SDavid E. O'Brien 						n = 8 * n + input() - '0';
4212a55deb1SDavid E. O'Brien 				}
4222a55deb1SDavid E. O'Brien 				*bp++ = n;
4232a55deb1SDavid E. O'Brien 				break;
4242a55deb1SDavid E. O'Brien 
425f32a6403SWarner Losh 			case 'x':	/* hex  \x0-9a-fA-F (exactly two) */
426f32a6403SWarner Losh 			    {
427f32a6403SWarner Losh 				int i;
428f32a6403SWarner Losh 
429f32a6403SWarner Losh 				if (!isxdigit(peek())) {
430f32a6403SWarner Losh 					unput(c);
4312a55deb1SDavid E. O'Brien 					break;
4322a55deb1SDavid E. O'Brien 				}
433f32a6403SWarner Losh 				n = 0;
434f32a6403SWarner Losh 				for (i = 0; i < 2; i++) {
435f32a6403SWarner Losh 					c = input();
436f32a6403SWarner Losh 					if (c == 0)
437f32a6403SWarner Losh 						break;
438f32a6403SWarner Losh 					if (isxdigit(c)) {
439f32a6403SWarner Losh 						c = tolower(c);
440f32a6403SWarner Losh 						n *= 16;
441f32a6403SWarner Losh 						if (isdigit(c))
442f32a6403SWarner Losh 							n += (c - '0');
443f32a6403SWarner Losh 						else
444f32a6403SWarner Losh 							n += 10 + (c - 'a');
445f32a6403SWarner Losh 					} else {
446b2376a5fSWarner Losh 						unput(c);
447f32a6403SWarner Losh 						break;
448f32a6403SWarner Losh 					}
449f32a6403SWarner Losh 				}
450f32a6403SWarner Losh 				if (i)
4512a55deb1SDavid E. O'Brien 					*bp++ = n;
4522a55deb1SDavid E. O'Brien 				break;
4532a55deb1SDavid E. O'Brien 			    }
4542a55deb1SDavid E. O'Brien 
455f32a6403SWarner Losh 			case 'u':	/* utf  \u0-9a-fA-F (1..8) */
456f32a6403SWarner Losh 			    {
457f32a6403SWarner Losh 				int i;
458f32a6403SWarner Losh 
459f32a6403SWarner Losh 				n = 0;
460f32a6403SWarner Losh 				for (i = 0; i < 8; i++) {
461f32a6403SWarner Losh 					c = input();
462f32a6403SWarner Losh 					if (!isxdigit(c) || c == 0)
463f32a6403SWarner Losh 						break;
464f32a6403SWarner Losh 					c = tolower(c);
465f32a6403SWarner Losh 					n *= 16;
466f32a6403SWarner Losh 					if (isdigit(c))
467f32a6403SWarner Losh 						n += (c - '0');
468f32a6403SWarner Losh 					else
469f32a6403SWarner Losh 						n += 10 + (c - 'a');
470f32a6403SWarner Losh 				}
471f32a6403SWarner Losh 				unput(c);
472f32a6403SWarner Losh 				bp += runetochar(bp, n);
473f32a6403SWarner Losh 				break;
474f32a6403SWarner Losh 			    }
475f32a6403SWarner Losh 
4762a55deb1SDavid E. O'Brien 			default:
4772a55deb1SDavid E. O'Brien 				*bp++ = c;
4782a55deb1SDavid E. O'Brien 				break;
4792a55deb1SDavid E. O'Brien 			}
4802a55deb1SDavid E. O'Brien 			break;
4812a55deb1SDavid E. O'Brien 		default:
4822a55deb1SDavid E. O'Brien 			*bp++ = c;
4832a55deb1SDavid E. O'Brien 			break;
4842a55deb1SDavid E. O'Brien 		}
4852a55deb1SDavid E. O'Brien 	}
4862a55deb1SDavid E. O'Brien 	*bp = 0;
4872a55deb1SDavid E. O'Brien 	s = tostring(buf);
488f39dd6a9SWarner Losh 	*bp++ = ' '; *bp++ = '\0';
4892a55deb1SDavid E. O'Brien 	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
490f39dd6a9SWarner Losh 	free(s);
4912a55deb1SDavid E. O'Brien 	RET(STRING);
4922a55deb1SDavid E. O'Brien }
4932a55deb1SDavid E. O'Brien 
4942a55deb1SDavid E. O'Brien 
binsearch(char * w,const Keyword * kp,int n)495f39dd6a9SWarner Losh static int binsearch(char *w, const Keyword *kp, int n)
4962a55deb1SDavid E. O'Brien {
4972a55deb1SDavid E. O'Brien 	int cond, low, mid, high;
4982a55deb1SDavid E. O'Brien 
4992a55deb1SDavid E. O'Brien 	low = 0;
5002a55deb1SDavid E. O'Brien 	high = n - 1;
5012a55deb1SDavid E. O'Brien 	while (low <= high) {
5022a55deb1SDavid E. O'Brien 		mid = (low + high) / 2;
5032a55deb1SDavid E. O'Brien 		if ((cond = strcmp(w, kp[mid].word)) < 0)
5042a55deb1SDavid E. O'Brien 			high = mid - 1;
5052a55deb1SDavid E. O'Brien 		else if (cond > 0)
5062a55deb1SDavid E. O'Brien 			low = mid + 1;
5072a55deb1SDavid E. O'Brien 		else
5082a55deb1SDavid E. O'Brien 			return mid;
5092a55deb1SDavid E. O'Brien 	}
5102a55deb1SDavid E. O'Brien 	return -1;
5112a55deb1SDavid E. O'Brien }
5122a55deb1SDavid E. O'Brien 
word(char * w)5132a55deb1SDavid E. O'Brien int word(char *w)
5142a55deb1SDavid E. O'Brien {
515f39dd6a9SWarner Losh 	const Keyword *kp;
5162a55deb1SDavid E. O'Brien 	int c, n;
5172a55deb1SDavid E. O'Brien 
5182a55deb1SDavid E. O'Brien 	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
5192a55deb1SDavid E. O'Brien 	if (n != -1) {	/* found in table */
52011169460SAlex Richardson 		kp = keywords + n;
5212a55deb1SDavid E. O'Brien 		yylval.i = kp->sub;
5222a55deb1SDavid E. O'Brien 		switch (kp->type) {	/* special handling */
523addad6afSRong-En Fan 		case BLTIN:
524addad6afSRong-En Fan 			if (kp->sub == FSYSTEM && safe)
5252a55deb1SDavid E. O'Brien 				SYNTAX( "system is unsafe" );
5262a55deb1SDavid E. O'Brien 			RET(kp->type);
5272a55deb1SDavid E. O'Brien 		case FUNC:
5282a55deb1SDavid E. O'Brien 			if (infunc)
5292a55deb1SDavid E. O'Brien 				SYNTAX( "illegal nested function" );
5302a55deb1SDavid E. O'Brien 			RET(kp->type);
5312a55deb1SDavid E. O'Brien 		case RETURN:
5322a55deb1SDavid E. O'Brien 			if (!infunc)
5332a55deb1SDavid E. O'Brien 				SYNTAX( "return not in function" );
5342a55deb1SDavid E. O'Brien 			RET(kp->type);
5352a55deb1SDavid E. O'Brien 		case VARNF:
5362a55deb1SDavid E. O'Brien 			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
5372a55deb1SDavid E. O'Brien 			RET(VARNF);
5382a55deb1SDavid E. O'Brien 		default:
5392a55deb1SDavid E. O'Brien 			RET(kp->type);
5402a55deb1SDavid E. O'Brien 		}
5412a55deb1SDavid E. O'Brien 	}
5422a55deb1SDavid E. O'Brien 	c = peek();	/* look for '(' */
5432a55deb1SDavid E. O'Brien 	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
5442a55deb1SDavid E. O'Brien 		yylval.i = n;
5452a55deb1SDavid E. O'Brien 		RET(ARG);
5462a55deb1SDavid E. O'Brien 	} else {
5472a55deb1SDavid E. O'Brien 		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
5482a55deb1SDavid E. O'Brien 		if (c == '(') {
5492a55deb1SDavid E. O'Brien 			RET(CALL);
5502a55deb1SDavid E. O'Brien 		} else {
5512a55deb1SDavid E. O'Brien 			RET(VAR);
5522a55deb1SDavid E. O'Brien 		}
5532a55deb1SDavid E. O'Brien 	}
5542a55deb1SDavid E. O'Brien }
5552a55deb1SDavid E. O'Brien 
startreg(void)556813da98dSDavid E. O'Brien void startreg(void)	/* next call to yylex will return a regular expression */
5572a55deb1SDavid E. O'Brien {
558f39dd6a9SWarner Losh 	reg = true;
5592a55deb1SDavid E. O'Brien }
5602a55deb1SDavid E. O'Brien 
regexpr(void)5612a55deb1SDavid E. O'Brien int regexpr(void)
5622a55deb1SDavid E. O'Brien {
5632a55deb1SDavid E. O'Brien 	int c;
56410ce5b99SWarner Losh 	static char *buf = NULL;
5652a55deb1SDavid E. O'Brien 	static int bufsz = 500;
5662a55deb1SDavid E. O'Brien 	char *bp;
5672a55deb1SDavid E. O'Brien 
56810ce5b99SWarner Losh 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
569f32a6403SWarner Losh 		FATAL("out of space for reg expr");
5702a55deb1SDavid E. O'Brien 	bp = buf;
5712a55deb1SDavid E. O'Brien 	for ( ; (c = input()) != '/' && c != 0; ) {
572addad6afSRong-En Fan 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
5732a55deb1SDavid E. O'Brien 			FATAL("out of space for reg expr %.10s...", buf);
5742a55deb1SDavid E. O'Brien 		if (c == '\n') {
575b5253557SWarner Losh 			*bp = '\0';
5762a55deb1SDavid E. O'Brien 			SYNTAX( "newline in regular expression %.10s...", buf );
5772a55deb1SDavid E. O'Brien 			unput('\n');
5782a55deb1SDavid E. O'Brien 			break;
5792a55deb1SDavid E. O'Brien 		} else if (c == '\\') {
5802a55deb1SDavid E. O'Brien 			*bp++ = '\\';
5812a55deb1SDavid E. O'Brien 			*bp++ = input();
5822a55deb1SDavid E. O'Brien 		} else {
5832a55deb1SDavid E. O'Brien 			*bp++ = c;
5842a55deb1SDavid E. O'Brien 		}
5852a55deb1SDavid E. O'Brien 	}
5862a55deb1SDavid E. O'Brien 	*bp = 0;
58788b8d487SRuslan Ermilov 	if (c == 0)
58888b8d487SRuslan Ermilov 		SYNTAX("non-terminated regular expression %.10s...", buf);
5892a55deb1SDavid E. O'Brien 	yylval.s = tostring(buf);
5902a55deb1SDavid E. O'Brien 	unput('/');
5912a55deb1SDavid E. O'Brien 	RET(REGEXPR);
5922a55deb1SDavid E. O'Brien }
5932a55deb1SDavid E. O'Brien 
5942a55deb1SDavid E. O'Brien /* low-level lexical stuff, sort of inherited from lex */
5952a55deb1SDavid E. O'Brien 
5962a55deb1SDavid E. O'Brien char	ebuf[300];
5972a55deb1SDavid E. O'Brien char	*ep = ebuf;
5982a55deb1SDavid E. O'Brien char	yysbuf[100];	/* pushback buffer */
5992a55deb1SDavid E. O'Brien char	*yysptr = yysbuf;
60010ce5b99SWarner Losh FILE	*yyin = NULL;
6012a55deb1SDavid E. O'Brien 
input(void)6022a55deb1SDavid E. O'Brien int input(void)	/* get next lexical input character */
6032a55deb1SDavid E. O'Brien {
6042a55deb1SDavid E. O'Brien 	int c;
6052a55deb1SDavid E. O'Brien 	extern char *lexprog;
6062a55deb1SDavid E. O'Brien 
6072a55deb1SDavid E. O'Brien 	if (yysptr > yysbuf)
6082e454f23SRuslan Ermilov 		c = (uschar)*--yysptr;
6092a55deb1SDavid E. O'Brien 	else if (lexprog != NULL) {	/* awk '...' */
6102e454f23SRuslan Ermilov 		if ((c = (uschar)*lexprog) != 0)
6112a55deb1SDavid E. O'Brien 			lexprog++;
6122a55deb1SDavid E. O'Brien 	} else				/* awk -f ... */
6132a55deb1SDavid E. O'Brien 		c = pgetc();
614b5253557SWarner Losh 	if (c == EOF)
6152a55deb1SDavid E. O'Brien 		c = 0;
6162a55deb1SDavid E. O'Brien 	if (ep >= ebuf + sizeof ebuf)
6172a55deb1SDavid E. O'Brien 		ep = ebuf;
618b5253557SWarner Losh 	*ep = c;
619b5253557SWarner Losh 	if (c != 0) {
620b5253557SWarner Losh 		ep++;
621b5253557SWarner Losh 	}
622b5253557SWarner Losh 	return (c);
6232a55deb1SDavid E. O'Brien }
6242a55deb1SDavid E. O'Brien 
unput(int c)6252a55deb1SDavid E. O'Brien void unput(int c)	/* put lexical character back on input */
6262a55deb1SDavid E. O'Brien {
6272a55deb1SDavid E. O'Brien 	if (yysptr >= yysbuf + sizeof(yysbuf))
6282a55deb1SDavid E. O'Brien 		FATAL("pushed back too much: %.20s...", yysbuf);
6292a55deb1SDavid E. O'Brien 	*yysptr++ = c;
6302a55deb1SDavid E. O'Brien 	if (--ep < ebuf)
6312a55deb1SDavid E. O'Brien 		ep = ebuf + sizeof(ebuf) - 1;
6322a55deb1SDavid E. O'Brien }
6332a55deb1SDavid E. O'Brien 
unputstr(const char * s)634813da98dSDavid E. O'Brien void unputstr(const char *s)	/* put a string back on input */
6352a55deb1SDavid E. O'Brien {
6362a55deb1SDavid E. O'Brien 	int i;
6372a55deb1SDavid E. O'Brien 
6382a55deb1SDavid E. O'Brien 	for (i = strlen(s)-1; i >= 0; i--)
6392a55deb1SDavid E. O'Brien 		unput(s[i]);
6402a55deb1SDavid E. O'Brien }
641