12a55deb1SDavid E. O'Brien /****************************************************************
22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997
32a55deb1SDavid E. O'Brien All Rights Reserved
42a55deb1SDavid E. O'Brien
52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and
62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby
72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all
82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this
92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting
102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of
112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining
122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior
132a55deb1SDavid E. O'Brien permission.
142a55deb1SDavid E. O'Brien
152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
222a55deb1SDavid E. O'Brien THIS SOFTWARE.
232a55deb1SDavid E. O'Brien ****************************************************************/
242a55deb1SDavid E. O'Brien
252a55deb1SDavid E. O'Brien #include <stdio.h>
262a55deb1SDavid E. O'Brien #include <stdlib.h>
272a55deb1SDavid E. O'Brien #include <string.h>
282a55deb1SDavid E. O'Brien #include <ctype.h>
292a55deb1SDavid E. O'Brien #include "awk.h"
30f39dd6a9SWarner Losh #include "awkgram.tab.h"
312a55deb1SDavid E. O'Brien
322a55deb1SDavid E. O'Brien extern YYSTYPE yylval;
33f39dd6a9SWarner Losh extern bool infunc;
342a55deb1SDavid E. O'Brien
352a55deb1SDavid E. O'Brien int lineno = 1;
362a55deb1SDavid E. O'Brien int bracecnt = 0;
372a55deb1SDavid E. O'Brien int brackcnt = 0;
382a55deb1SDavid E. O'Brien int parencnt = 0;
392a55deb1SDavid E. O'Brien
402a55deb1SDavid E. O'Brien typedef struct Keyword {
41813da98dSDavid E. O'Brien const char *word;
422a55deb1SDavid E. O'Brien int sub;
432a55deb1SDavid E. O'Brien int type;
442a55deb1SDavid E. O'Brien } Keyword;
452a55deb1SDavid E. O'Brien
46f39dd6a9SWarner Losh const Keyword keywords[] = { /* keep sorted: binary searched */
472a55deb1SDavid E. O'Brien { "BEGIN", XBEGIN, XBEGIN },
482a55deb1SDavid E. O'Brien { "END", XEND, XEND },
492a55deb1SDavid E. O'Brien { "NF", VARNF, VARNF },
50eb690a05SWarner Losh { "and", FAND, BLTIN },
512a55deb1SDavid E. O'Brien { "atan2", FATAN, BLTIN },
522a55deb1SDavid E. O'Brien { "break", BREAK, BREAK },
532a55deb1SDavid E. O'Brien { "close", CLOSE, CLOSE },
54eb690a05SWarner Losh { "compl", FCOMPL, BLTIN },
552a55deb1SDavid E. O'Brien { "continue", CONTINUE, CONTINUE },
562a55deb1SDavid E. O'Brien { "cos", FCOS, BLTIN },
572a55deb1SDavid E. O'Brien { "delete", DELETE, DELETE },
582a55deb1SDavid E. O'Brien { "do", DO, DO },
592a55deb1SDavid E. O'Brien { "else", ELSE, ELSE },
602a55deb1SDavid E. O'Brien { "exit", EXIT, EXIT },
612a55deb1SDavid E. O'Brien { "exp", FEXP, BLTIN },
622a55deb1SDavid E. O'Brien { "fflush", FFLUSH, BLTIN },
632a55deb1SDavid E. O'Brien { "for", FOR, FOR },
642a55deb1SDavid E. O'Brien { "func", FUNC, FUNC },
652a55deb1SDavid E. O'Brien { "function", FUNC, FUNC },
66eb690a05SWarner Losh { "gensub", GENSUB, GENSUB },
672a55deb1SDavid E. O'Brien { "getline", GETLINE, GETLINE },
682a55deb1SDavid E. O'Brien { "gsub", GSUB, GSUB },
692a55deb1SDavid E. O'Brien { "if", IF, IF },
702a55deb1SDavid E. O'Brien { "in", IN, IN },
712a55deb1SDavid E. O'Brien { "index", INDEX, INDEX },
722a55deb1SDavid E. O'Brien { "int", FINT, BLTIN },
732a55deb1SDavid E. O'Brien { "length", FLENGTH, BLTIN },
742a55deb1SDavid E. O'Brien { "log", FLOG, BLTIN },
75eb690a05SWarner Losh { "lshift", FLSHIFT, BLTIN },
762a55deb1SDavid E. O'Brien { "match", MATCHFCN, MATCHFCN },
77*8d457988SWarner Losh { "mktime", FMKTIME, BLTIN },
782a55deb1SDavid E. O'Brien { "next", NEXT, NEXT },
792a55deb1SDavid E. O'Brien { "nextfile", NEXTFILE, NEXTFILE },
80eb690a05SWarner Losh { "or", FFOR, BLTIN },
812a55deb1SDavid E. O'Brien { "print", PRINT, PRINT },
822a55deb1SDavid E. O'Brien { "printf", PRINTF, PRINTF },
832a55deb1SDavid E. O'Brien { "rand", FRAND, BLTIN },
842a55deb1SDavid E. O'Brien { "return", RETURN, RETURN },
85eb690a05SWarner Losh { "rshift", FRSHIFT, BLTIN },
862a55deb1SDavid E. O'Brien { "sin", FSIN, BLTIN },
872a55deb1SDavid E. O'Brien { "split", SPLIT, SPLIT },
882a55deb1SDavid E. O'Brien { "sprintf", SPRINTF, SPRINTF },
892a55deb1SDavid E. O'Brien { "sqrt", FSQRT, BLTIN },
902a55deb1SDavid E. O'Brien { "srand", FSRAND, BLTIN },
91eb690a05SWarner Losh { "strftime", FSTRFTIME, BLTIN },
922a55deb1SDavid E. O'Brien { "sub", SUB, SUB },
932a55deb1SDavid E. O'Brien { "substr", SUBSTR, SUBSTR },
942a55deb1SDavid E. O'Brien { "system", FSYSTEM, BLTIN },
95eb690a05SWarner Losh { "systime", FSYSTIME, BLTIN },
962a55deb1SDavid E. O'Brien { "tolower", FTOLOWER, BLTIN },
972a55deb1SDavid E. O'Brien { "toupper", FTOUPPER, BLTIN },
982a55deb1SDavid E. O'Brien { "while", WHILE, WHILE },
99eb690a05SWarner Losh { "xor", FXOR, BLTIN },
1002a55deb1SDavid E. O'Brien };
1012a55deb1SDavid E. O'Brien
1022a55deb1SDavid E. O'Brien #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
1032a55deb1SDavid E. O'Brien
peek(void)104f39dd6a9SWarner Losh static int peek(void)
1052a55deb1SDavid E. O'Brien {
1062a55deb1SDavid E. O'Brien int c = input();
1072a55deb1SDavid E. O'Brien unput(c);
1082a55deb1SDavid E. O'Brien return c;
1092a55deb1SDavid E. O'Brien }
1102a55deb1SDavid E. O'Brien
gettok(char ** pbuf,int * psz)111f39dd6a9SWarner Losh static int gettok(char **pbuf, int *psz) /* get next input token */
1122a55deb1SDavid E. O'Brien {
113007c6572SDag-Erling Smørgrav int c, retc;
1142a55deb1SDavid E. O'Brien char *buf = *pbuf;
1152a55deb1SDavid E. O'Brien int sz = *psz;
1162a55deb1SDavid E. O'Brien char *bp = buf;
1172a55deb1SDavid E. O'Brien
1182a55deb1SDavid E. O'Brien c = input();
1192a55deb1SDavid E. O'Brien if (c == 0)
1202a55deb1SDavid E. O'Brien return 0;
1212a55deb1SDavid E. O'Brien buf[0] = c;
1222a55deb1SDavid E. O'Brien buf[1] = 0;
1232a55deb1SDavid E. O'Brien if (!isalnum(c) && c != '.' && c != '_')
1242a55deb1SDavid E. O'Brien return c;
1252a55deb1SDavid E. O'Brien
1262a55deb1SDavid E. O'Brien *bp++ = c;
1272a55deb1SDavid E. O'Brien if (isalpha(c) || c == '_') { /* it's a varname */
1282a55deb1SDavid E. O'Brien for ( ; (c = input()) != 0; ) {
1292a55deb1SDavid E. O'Brien if (bp-buf >= sz)
130addad6afSRong-En Fan if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1312a55deb1SDavid E. O'Brien FATAL( "out of space for name %.10s...", buf );
1322a55deb1SDavid E. O'Brien if (isalnum(c) || c == '_')
1332a55deb1SDavid E. O'Brien *bp++ = c;
1342a55deb1SDavid E. O'Brien else {
1352a55deb1SDavid E. O'Brien *bp = 0;
1362a55deb1SDavid E. O'Brien unput(c);
1372a55deb1SDavid E. O'Brien break;
1382a55deb1SDavid E. O'Brien }
1392a55deb1SDavid E. O'Brien }
1402a55deb1SDavid E. O'Brien *bp = 0;
141007c6572SDag-Erling Smørgrav retc = 'a'; /* alphanumeric */
142c263f9bfSRuslan Ermilov } else { /* maybe it's a number, but could be . */
1432a55deb1SDavid E. O'Brien char *rem;
1442a55deb1SDavid E. O'Brien /* read input until can't be a number */
1452a55deb1SDavid E. O'Brien for ( ; (c = input()) != 0; ) {
1462a55deb1SDavid E. O'Brien if (bp-buf >= sz)
147addad6afSRong-En Fan if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1482a55deb1SDavid E. O'Brien FATAL( "out of space for number %.10s...", buf );
1492a55deb1SDavid E. O'Brien if (isdigit(c) || c == 'e' || c == 'E'
1502a55deb1SDavid E. O'Brien || c == '.' || c == '+' || c == '-')
1512a55deb1SDavid E. O'Brien *bp++ = c;
1522a55deb1SDavid E. O'Brien else {
1532a55deb1SDavid E. O'Brien unput(c);
1542a55deb1SDavid E. O'Brien break;
1552a55deb1SDavid E. O'Brien }
1562a55deb1SDavid E. O'Brien }
1572a55deb1SDavid E. O'Brien *bp = 0;
1582a55deb1SDavid E. O'Brien strtod(buf, &rem); /* parse the number */
159007c6572SDag-Erling Smørgrav if (rem == buf) { /* it wasn't a valid number at all */
160c263f9bfSRuslan Ermilov buf[1] = 0; /* return one character as token */
161f39dd6a9SWarner Losh retc = (uschar)buf[0]; /* character is its own type */
162c263f9bfSRuslan Ermilov unputstr(rem+1); /* put rest back for later */
163007c6572SDag-Erling Smørgrav } else { /* some prefix was a number */
164c263f9bfSRuslan Ermilov unputstr(rem); /* put rest back for later */
165c263f9bfSRuslan Ermilov rem[0] = 0; /* truncate buf after number part */
166c263f9bfSRuslan Ermilov retc = '0'; /* type is number */
167007c6572SDag-Erling Smørgrav }
1682a55deb1SDavid E. O'Brien }
1692a55deb1SDavid E. O'Brien *pbuf = buf;
1702a55deb1SDavid E. O'Brien *psz = sz;
171007c6572SDag-Erling Smørgrav return retc;
1722a55deb1SDavid E. O'Brien }
1732a55deb1SDavid E. O'Brien
1742a55deb1SDavid E. O'Brien int word(char *);
1752a55deb1SDavid E. O'Brien int string(void);
1762a55deb1SDavid E. O'Brien int regexpr(void);
177f39dd6a9SWarner Losh bool sc = false; /* true => return a } right now */
178f39dd6a9SWarner Losh bool reg = false; /* true => return a REGEXPR now */
1792a55deb1SDavid E. O'Brien
yylex(void)1802a55deb1SDavid E. O'Brien int yylex(void)
1812a55deb1SDavid E. O'Brien {
1822a55deb1SDavid E. O'Brien int c;
18310ce5b99SWarner Losh static char *buf = NULL;
184addad6afSRong-En Fan static int bufsize = 5; /* BUG: setting this small causes core dump! */
1852a55deb1SDavid E. O'Brien
18610ce5b99SWarner Losh if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
1872a55deb1SDavid E. O'Brien FATAL( "out of space in yylex" );
1882a55deb1SDavid E. O'Brien if (sc) {
189f39dd6a9SWarner Losh sc = false;
1902a55deb1SDavid E. O'Brien RET('}');
1912a55deb1SDavid E. O'Brien }
1922a55deb1SDavid E. O'Brien if (reg) {
193f39dd6a9SWarner Losh reg = false;
1942a55deb1SDavid E. O'Brien return regexpr();
1952a55deb1SDavid E. O'Brien }
1962a55deb1SDavid E. O'Brien for (;;) {
1972a55deb1SDavid E. O'Brien c = gettok(&buf, &bufsize);
1982a55deb1SDavid E. O'Brien if (c == 0)
1992a55deb1SDavid E. O'Brien return 0;
2002a55deb1SDavid E. O'Brien if (isalpha(c) || c == '_')
2012a55deb1SDavid E. O'Brien return word(buf);
202007c6572SDag-Erling Smørgrav if (isdigit(c)) {
203f39dd6a9SWarner Losh char *cp = tostring(buf);
204f39dd6a9SWarner Losh double result;
205f39dd6a9SWarner Losh
206f39dd6a9SWarner Losh if (is_number(cp, & result))
207f39dd6a9SWarner Losh yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
208f39dd6a9SWarner Losh else
209f39dd6a9SWarner Losh yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
210f39dd6a9SWarner Losh free(cp);
2112a55deb1SDavid E. O'Brien /* should this also have STR set? */
2122a55deb1SDavid E. O'Brien RET(NUMBER);
2132a55deb1SDavid E. O'Brien }
2142a55deb1SDavid E. O'Brien
2152a55deb1SDavid E. O'Brien yylval.i = c;
2162a55deb1SDavid E. O'Brien switch (c) {
2172a55deb1SDavid E. O'Brien case '\n': /* {EOL} */
218b5253557SWarner Losh lineno++;
2192a55deb1SDavid E. O'Brien RET(NL);
2202a55deb1SDavid E. O'Brien case '\r': /* assume \n is coming */
2212a55deb1SDavid E. O'Brien case ' ': /* {WS}+ */
2222a55deb1SDavid E. O'Brien case '\t':
2232a55deb1SDavid E. O'Brien break;
2242a55deb1SDavid E. O'Brien case '#': /* #.* strip comments */
2252a55deb1SDavid E. O'Brien while ((c = input()) != '\n' && c != 0)
2262a55deb1SDavid E. O'Brien ;
2272a55deb1SDavid E. O'Brien unput(c);
2282a55deb1SDavid E. O'Brien break;
2292a55deb1SDavid E. O'Brien case ';':
2302a55deb1SDavid E. O'Brien RET(';');
2312a55deb1SDavid E. O'Brien case '\\':
2322a55deb1SDavid E. O'Brien if (peek() == '\n') {
2332a55deb1SDavid E. O'Brien input();
234b5253557SWarner Losh lineno++;
2352a55deb1SDavid E. O'Brien } else if (peek() == '\r') {
2362a55deb1SDavid E. O'Brien input(); input(); /* \n */
2372a55deb1SDavid E. O'Brien lineno++;
2382a55deb1SDavid E. O'Brien } else {
2392a55deb1SDavid E. O'Brien RET(c);
2402a55deb1SDavid E. O'Brien }
2412a55deb1SDavid E. O'Brien break;
2422a55deb1SDavid E. O'Brien case '&':
2432a55deb1SDavid E. O'Brien if (peek() == '&') {
2442a55deb1SDavid E. O'Brien input(); RET(AND);
2452a55deb1SDavid E. O'Brien } else
2462a55deb1SDavid E. O'Brien RET('&');
2472a55deb1SDavid E. O'Brien case '|':
2482a55deb1SDavid E. O'Brien if (peek() == '|') {
2492a55deb1SDavid E. O'Brien input(); RET(BOR);
2502a55deb1SDavid E. O'Brien } else
2512a55deb1SDavid E. O'Brien RET('|');
2522a55deb1SDavid E. O'Brien case '!':
2532a55deb1SDavid E. O'Brien if (peek() == '=') {
2542a55deb1SDavid E. O'Brien input(); yylval.i = NE; RET(NE);
2552a55deb1SDavid E. O'Brien } else if (peek() == '~') {
2562a55deb1SDavid E. O'Brien input(); yylval.i = NOTMATCH; RET(MATCHOP);
2572a55deb1SDavid E. O'Brien } else
2582a55deb1SDavid E. O'Brien RET(NOT);
2592a55deb1SDavid E. O'Brien case '~':
2602a55deb1SDavid E. O'Brien yylval.i = MATCH;
2612a55deb1SDavid E. O'Brien RET(MATCHOP);
2622a55deb1SDavid E. O'Brien case '<':
2632a55deb1SDavid E. O'Brien if (peek() == '=') {
2642a55deb1SDavid E. O'Brien input(); yylval.i = LE; RET(LE);
2652a55deb1SDavid E. O'Brien } else {
2662a55deb1SDavid E. O'Brien yylval.i = LT; RET(LT);
2672a55deb1SDavid E. O'Brien }
2682a55deb1SDavid E. O'Brien case '=':
2692a55deb1SDavid E. O'Brien if (peek() == '=') {
2702a55deb1SDavid E. O'Brien input(); yylval.i = EQ; RET(EQ);
2712a55deb1SDavid E. O'Brien } else {
2722a55deb1SDavid E. O'Brien yylval.i = ASSIGN; RET(ASGNOP);
2732a55deb1SDavid E. O'Brien }
2742a55deb1SDavid E. O'Brien case '>':
2752a55deb1SDavid E. O'Brien if (peek() == '=') {
2762a55deb1SDavid E. O'Brien input(); yylval.i = GE; RET(GE);
2772a55deb1SDavid E. O'Brien } else if (peek() == '>') {
2782a55deb1SDavid E. O'Brien input(); yylval.i = APPEND; RET(APPEND);
2792a55deb1SDavid E. O'Brien } else {
2802a55deb1SDavid E. O'Brien yylval.i = GT; RET(GT);
2812a55deb1SDavid E. O'Brien }
2822a55deb1SDavid E. O'Brien case '+':
2832a55deb1SDavid E. O'Brien if (peek() == '+') {
2842a55deb1SDavid E. O'Brien input(); yylval.i = INCR; RET(INCR);
2852a55deb1SDavid E. O'Brien } else if (peek() == '=') {
2862a55deb1SDavid E. O'Brien input(); yylval.i = ADDEQ; RET(ASGNOP);
2872a55deb1SDavid E. O'Brien } else
2882a55deb1SDavid E. O'Brien RET('+');
2892a55deb1SDavid E. O'Brien case '-':
2902a55deb1SDavid E. O'Brien if (peek() == '-') {
2912a55deb1SDavid E. O'Brien input(); yylval.i = DECR; RET(DECR);
2922a55deb1SDavid E. O'Brien } else if (peek() == '=') {
2932a55deb1SDavid E. O'Brien input(); yylval.i = SUBEQ; RET(ASGNOP);
2942a55deb1SDavid E. O'Brien } else
2952a55deb1SDavid E. O'Brien RET('-');
2962a55deb1SDavid E. O'Brien case '*':
2972a55deb1SDavid E. O'Brien if (peek() == '=') { /* *= */
2982a55deb1SDavid E. O'Brien input(); yylval.i = MULTEQ; RET(ASGNOP);
2992a55deb1SDavid E. O'Brien } else if (peek() == '*') { /* ** or **= */
3002a55deb1SDavid E. O'Brien input(); /* eat 2nd * */
3012a55deb1SDavid E. O'Brien if (peek() == '=') {
3022a55deb1SDavid E. O'Brien input(); yylval.i = POWEQ; RET(ASGNOP);
3032a55deb1SDavid E. O'Brien } else {
3042a55deb1SDavid E. O'Brien RET(POWER);
3052a55deb1SDavid E. O'Brien }
3062a55deb1SDavid E. O'Brien } else
3072a55deb1SDavid E. O'Brien RET('*');
3082a55deb1SDavid E. O'Brien case '/':
3092a55deb1SDavid E. O'Brien RET('/');
3102a55deb1SDavid E. O'Brien case '%':
3112a55deb1SDavid E. O'Brien if (peek() == '=') {
3122a55deb1SDavid E. O'Brien input(); yylval.i = MODEQ; RET(ASGNOP);
3132a55deb1SDavid E. O'Brien } else
3142a55deb1SDavid E. O'Brien RET('%');
3152a55deb1SDavid E. O'Brien case '^':
3162a55deb1SDavid E. O'Brien if (peek() == '=') {
3172a55deb1SDavid E. O'Brien input(); yylval.i = POWEQ; RET(ASGNOP);
3182a55deb1SDavid E. O'Brien } else
3192a55deb1SDavid E. O'Brien RET(POWER);
3202a55deb1SDavid E. O'Brien
3212a55deb1SDavid E. O'Brien case '$':
3222a55deb1SDavid E. O'Brien /* BUG: awkward, if not wrong */
3232a55deb1SDavid E. O'Brien c = gettok(&buf, &bufsize);
3242a55deb1SDavid E. O'Brien if (isalpha(c)) {
3252a55deb1SDavid E. O'Brien if (strcmp(buf, "NF") == 0) { /* very special */
3262a55deb1SDavid E. O'Brien unputstr("(NF)");
3272a55deb1SDavid E. O'Brien RET(INDIRECT);
3282a55deb1SDavid E. O'Brien }
3292a55deb1SDavid E. O'Brien c = peek();
3302a55deb1SDavid E. O'Brien if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
3312a55deb1SDavid E. O'Brien unputstr(buf);
3322a55deb1SDavid E. O'Brien RET(INDIRECT);
3332a55deb1SDavid E. O'Brien }
3342a55deb1SDavid E. O'Brien yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
3352a55deb1SDavid E. O'Brien RET(IVAR);
336007c6572SDag-Erling Smørgrav } else if (c == 0) { /* */
337007c6572SDag-Erling Smørgrav SYNTAX( "unexpected end of input after $" );
338007c6572SDag-Erling Smørgrav RET(';');
3392a55deb1SDavid E. O'Brien } else {
3402a55deb1SDavid E. O'Brien unputstr(buf);
3412a55deb1SDavid E. O'Brien RET(INDIRECT);
3422a55deb1SDavid E. O'Brien }
3432a55deb1SDavid E. O'Brien
3442a55deb1SDavid E. O'Brien case '}':
3452a55deb1SDavid E. O'Brien if (--bracecnt < 0)
3462a55deb1SDavid E. O'Brien SYNTAX( "extra }" );
347f39dd6a9SWarner Losh sc = true;
3482a55deb1SDavid E. O'Brien RET(';');
3492a55deb1SDavid E. O'Brien case ']':
3502a55deb1SDavid E. O'Brien if (--brackcnt < 0)
3512a55deb1SDavid E. O'Brien SYNTAX( "extra ]" );
3522a55deb1SDavid E. O'Brien RET(']');
3532a55deb1SDavid E. O'Brien case ')':
3542a55deb1SDavid E. O'Brien if (--parencnt < 0)
3552a55deb1SDavid E. O'Brien SYNTAX( "extra )" );
3562a55deb1SDavid E. O'Brien RET(')');
3572a55deb1SDavid E. O'Brien case '{':
3582a55deb1SDavid E. O'Brien bracecnt++;
3592a55deb1SDavid E. O'Brien RET('{');
3602a55deb1SDavid E. O'Brien case '[':
3612a55deb1SDavid E. O'Brien brackcnt++;
3622a55deb1SDavid E. O'Brien RET('[');
3632a55deb1SDavid E. O'Brien case '(':
3642a55deb1SDavid E. O'Brien parencnt++;
3652a55deb1SDavid E. O'Brien RET('(');
3662a55deb1SDavid E. O'Brien
3672a55deb1SDavid E. O'Brien case '"':
3682a55deb1SDavid E. O'Brien return string(); /* BUG: should be like tran.c ? */
3692a55deb1SDavid E. O'Brien
3702a55deb1SDavid E. O'Brien default:
3712a55deb1SDavid E. O'Brien RET(c);
3722a55deb1SDavid E. O'Brien }
3732a55deb1SDavid E. O'Brien }
3742a55deb1SDavid E. O'Brien }
3752a55deb1SDavid E. O'Brien
376f32a6403SWarner Losh extern int runetochar(char *str, int c);
377f32a6403SWarner Losh
string(void)3782a55deb1SDavid E. O'Brien int string(void)
3792a55deb1SDavid E. O'Brien {
3802a55deb1SDavid E. O'Brien int c, n;
3812a55deb1SDavid E. O'Brien char *s, *bp;
38210ce5b99SWarner Losh static char *buf = NULL;
3832a55deb1SDavid E. O'Brien static int bufsz = 500;
3842a55deb1SDavid E. O'Brien
38510ce5b99SWarner Losh if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
3862a55deb1SDavid E. O'Brien FATAL("out of space for strings");
3872a55deb1SDavid E. O'Brien for (bp = buf; (c = input()) != '"'; ) {
388addad6afSRong-En Fan if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
3892a55deb1SDavid E. O'Brien FATAL("out of space for string %.10s...", buf);
3902a55deb1SDavid E. O'Brien switch (c) {
3912a55deb1SDavid E. O'Brien case '\n':
3922a55deb1SDavid E. O'Brien case '\r':
3932a55deb1SDavid E. O'Brien case 0:
394b5253557SWarner Losh *bp = '\0';
3952a55deb1SDavid E. O'Brien SYNTAX( "non-terminated string %.10s...", buf );
396007c6572SDag-Erling Smørgrav if (c == 0) /* hopeless */
397007c6572SDag-Erling Smørgrav FATAL( "giving up" );
398b5253557SWarner Losh lineno++;
3992a55deb1SDavid E. O'Brien break;
4002a55deb1SDavid E. O'Brien case '\\':
4012a55deb1SDavid E. O'Brien c = input();
4022a55deb1SDavid E. O'Brien switch (c) {
403f39dd6a9SWarner Losh case '\n': break;
4042a55deb1SDavid E. O'Brien case '"': *bp++ = '"'; break;
4052a55deb1SDavid E. O'Brien case 'n': *bp++ = '\n'; break;
4062a55deb1SDavid E. O'Brien case 't': *bp++ = '\t'; break;
4072a55deb1SDavid E. O'Brien case 'f': *bp++ = '\f'; break;
4082a55deb1SDavid E. O'Brien case 'r': *bp++ = '\r'; break;
4092a55deb1SDavid E. O'Brien case 'b': *bp++ = '\b'; break;
4102a55deb1SDavid E. O'Brien case 'v': *bp++ = '\v'; break;
411f39dd6a9SWarner Losh case 'a': *bp++ = '\a'; break;
4122a55deb1SDavid E. O'Brien case '\\': *bp++ = '\\'; break;
4132a55deb1SDavid E. O'Brien
4142a55deb1SDavid E. O'Brien case '0': case '1': case '2': /* octal: \d \dd \ddd */
4152a55deb1SDavid E. O'Brien case '3': case '4': case '5': case '6': case '7':
4162a55deb1SDavid E. O'Brien n = c - '0';
4172a55deb1SDavid E. O'Brien if ((c = peek()) >= '0' && c < '8') {
4182a55deb1SDavid E. O'Brien n = 8 * n + input() - '0';
4192a55deb1SDavid E. O'Brien if ((c = peek()) >= '0' && c < '8')
4202a55deb1SDavid E. O'Brien n = 8 * n + input() - '0';
4212a55deb1SDavid E. O'Brien }
4222a55deb1SDavid E. O'Brien *bp++ = n;
4232a55deb1SDavid E. O'Brien break;
4242a55deb1SDavid E. O'Brien
425f32a6403SWarner Losh case 'x': /* hex \x0-9a-fA-F (exactly two) */
426f32a6403SWarner Losh {
427f32a6403SWarner Losh int i;
428f32a6403SWarner Losh
429f32a6403SWarner Losh if (!isxdigit(peek())) {
430f32a6403SWarner Losh unput(c);
4312a55deb1SDavid E. O'Brien break;
4322a55deb1SDavid E. O'Brien }
433f32a6403SWarner Losh n = 0;
434f32a6403SWarner Losh for (i = 0; i < 2; i++) {
435f32a6403SWarner Losh c = input();
436f32a6403SWarner Losh if (c == 0)
437f32a6403SWarner Losh break;
438f32a6403SWarner Losh if (isxdigit(c)) {
439f32a6403SWarner Losh c = tolower(c);
440f32a6403SWarner Losh n *= 16;
441f32a6403SWarner Losh if (isdigit(c))
442f32a6403SWarner Losh n += (c - '0');
443f32a6403SWarner Losh else
444f32a6403SWarner Losh n += 10 + (c - 'a');
445f32a6403SWarner Losh } else {
446b2376a5fSWarner Losh unput(c);
447f32a6403SWarner Losh break;
448f32a6403SWarner Losh }
449f32a6403SWarner Losh }
450f32a6403SWarner Losh if (i)
4512a55deb1SDavid E. O'Brien *bp++ = n;
4522a55deb1SDavid E. O'Brien break;
4532a55deb1SDavid E. O'Brien }
4542a55deb1SDavid E. O'Brien
455f32a6403SWarner Losh case 'u': /* utf \u0-9a-fA-F (1..8) */
456f32a6403SWarner Losh {
457f32a6403SWarner Losh int i;
458f32a6403SWarner Losh
459f32a6403SWarner Losh n = 0;
460f32a6403SWarner Losh for (i = 0; i < 8; i++) {
461f32a6403SWarner Losh c = input();
462f32a6403SWarner Losh if (!isxdigit(c) || c == 0)
463f32a6403SWarner Losh break;
464f32a6403SWarner Losh c = tolower(c);
465f32a6403SWarner Losh n *= 16;
466f32a6403SWarner Losh if (isdigit(c))
467f32a6403SWarner Losh n += (c - '0');
468f32a6403SWarner Losh else
469f32a6403SWarner Losh n += 10 + (c - 'a');
470f32a6403SWarner Losh }
471f32a6403SWarner Losh unput(c);
472f32a6403SWarner Losh bp += runetochar(bp, n);
473f32a6403SWarner Losh break;
474f32a6403SWarner Losh }
475f32a6403SWarner Losh
4762a55deb1SDavid E. O'Brien default:
4772a55deb1SDavid E. O'Brien *bp++ = c;
4782a55deb1SDavid E. O'Brien break;
4792a55deb1SDavid E. O'Brien }
4802a55deb1SDavid E. O'Brien break;
4812a55deb1SDavid E. O'Brien default:
4822a55deb1SDavid E. O'Brien *bp++ = c;
4832a55deb1SDavid E. O'Brien break;
4842a55deb1SDavid E. O'Brien }
4852a55deb1SDavid E. O'Brien }
4862a55deb1SDavid E. O'Brien *bp = 0;
4872a55deb1SDavid E. O'Brien s = tostring(buf);
488f39dd6a9SWarner Losh *bp++ = ' '; *bp++ = '\0';
4892a55deb1SDavid E. O'Brien yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
490f39dd6a9SWarner Losh free(s);
4912a55deb1SDavid E. O'Brien RET(STRING);
4922a55deb1SDavid E. O'Brien }
4932a55deb1SDavid E. O'Brien
4942a55deb1SDavid E. O'Brien
binsearch(char * w,const Keyword * kp,int n)495f39dd6a9SWarner Losh static int binsearch(char *w, const Keyword *kp, int n)
4962a55deb1SDavid E. O'Brien {
4972a55deb1SDavid E. O'Brien int cond, low, mid, high;
4982a55deb1SDavid E. O'Brien
4992a55deb1SDavid E. O'Brien low = 0;
5002a55deb1SDavid E. O'Brien high = n - 1;
5012a55deb1SDavid E. O'Brien while (low <= high) {
5022a55deb1SDavid E. O'Brien mid = (low + high) / 2;
5032a55deb1SDavid E. O'Brien if ((cond = strcmp(w, kp[mid].word)) < 0)
5042a55deb1SDavid E. O'Brien high = mid - 1;
5052a55deb1SDavid E. O'Brien else if (cond > 0)
5062a55deb1SDavid E. O'Brien low = mid + 1;
5072a55deb1SDavid E. O'Brien else
5082a55deb1SDavid E. O'Brien return mid;
5092a55deb1SDavid E. O'Brien }
5102a55deb1SDavid E. O'Brien return -1;
5112a55deb1SDavid E. O'Brien }
5122a55deb1SDavid E. O'Brien
word(char * w)5132a55deb1SDavid E. O'Brien int word(char *w)
5142a55deb1SDavid E. O'Brien {
515f39dd6a9SWarner Losh const Keyword *kp;
5162a55deb1SDavid E. O'Brien int c, n;
5172a55deb1SDavid E. O'Brien
5182a55deb1SDavid E. O'Brien n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
5192a55deb1SDavid E. O'Brien if (n != -1) { /* found in table */
52011169460SAlex Richardson kp = keywords + n;
5212a55deb1SDavid E. O'Brien yylval.i = kp->sub;
5222a55deb1SDavid E. O'Brien switch (kp->type) { /* special handling */
523addad6afSRong-En Fan case BLTIN:
524addad6afSRong-En Fan if (kp->sub == FSYSTEM && safe)
5252a55deb1SDavid E. O'Brien SYNTAX( "system is unsafe" );
5262a55deb1SDavid E. O'Brien RET(kp->type);
5272a55deb1SDavid E. O'Brien case FUNC:
5282a55deb1SDavid E. O'Brien if (infunc)
5292a55deb1SDavid E. O'Brien SYNTAX( "illegal nested function" );
5302a55deb1SDavid E. O'Brien RET(kp->type);
5312a55deb1SDavid E. O'Brien case RETURN:
5322a55deb1SDavid E. O'Brien if (!infunc)
5332a55deb1SDavid E. O'Brien SYNTAX( "return not in function" );
5342a55deb1SDavid E. O'Brien RET(kp->type);
5352a55deb1SDavid E. O'Brien case VARNF:
5362a55deb1SDavid E. O'Brien yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
5372a55deb1SDavid E. O'Brien RET(VARNF);
5382a55deb1SDavid E. O'Brien default:
5392a55deb1SDavid E. O'Brien RET(kp->type);
5402a55deb1SDavid E. O'Brien }
5412a55deb1SDavid E. O'Brien }
5422a55deb1SDavid E. O'Brien c = peek(); /* look for '(' */
5432a55deb1SDavid E. O'Brien if (c != '(' && infunc && (n=isarg(w)) >= 0) {
5442a55deb1SDavid E. O'Brien yylval.i = n;
5452a55deb1SDavid E. O'Brien RET(ARG);
5462a55deb1SDavid E. O'Brien } else {
5472a55deb1SDavid E. O'Brien yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
5482a55deb1SDavid E. O'Brien if (c == '(') {
5492a55deb1SDavid E. O'Brien RET(CALL);
5502a55deb1SDavid E. O'Brien } else {
5512a55deb1SDavid E. O'Brien RET(VAR);
5522a55deb1SDavid E. O'Brien }
5532a55deb1SDavid E. O'Brien }
5542a55deb1SDavid E. O'Brien }
5552a55deb1SDavid E. O'Brien
startreg(void)556813da98dSDavid E. O'Brien void startreg(void) /* next call to yylex will return a regular expression */
5572a55deb1SDavid E. O'Brien {
558f39dd6a9SWarner Losh reg = true;
5592a55deb1SDavid E. O'Brien }
5602a55deb1SDavid E. O'Brien
regexpr(void)5612a55deb1SDavid E. O'Brien int regexpr(void)
5622a55deb1SDavid E. O'Brien {
5632a55deb1SDavid E. O'Brien int c;
56410ce5b99SWarner Losh static char *buf = NULL;
5652a55deb1SDavid E. O'Brien static int bufsz = 500;
5662a55deb1SDavid E. O'Brien char *bp;
5672a55deb1SDavid E. O'Brien
56810ce5b99SWarner Losh if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
569f32a6403SWarner Losh FATAL("out of space for reg expr");
5702a55deb1SDavid E. O'Brien bp = buf;
5712a55deb1SDavid E. O'Brien for ( ; (c = input()) != '/' && c != 0; ) {
572addad6afSRong-En Fan if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
5732a55deb1SDavid E. O'Brien FATAL("out of space for reg expr %.10s...", buf);
5742a55deb1SDavid E. O'Brien if (c == '\n') {
575b5253557SWarner Losh *bp = '\0';
5762a55deb1SDavid E. O'Brien SYNTAX( "newline in regular expression %.10s...", buf );
5772a55deb1SDavid E. O'Brien unput('\n');
5782a55deb1SDavid E. O'Brien break;
5792a55deb1SDavid E. O'Brien } else if (c == '\\') {
5802a55deb1SDavid E. O'Brien *bp++ = '\\';
5812a55deb1SDavid E. O'Brien *bp++ = input();
5822a55deb1SDavid E. O'Brien } else {
5832a55deb1SDavid E. O'Brien *bp++ = c;
5842a55deb1SDavid E. O'Brien }
5852a55deb1SDavid E. O'Brien }
5862a55deb1SDavid E. O'Brien *bp = 0;
58788b8d487SRuslan Ermilov if (c == 0)
58888b8d487SRuslan Ermilov SYNTAX("non-terminated regular expression %.10s...", buf);
5892a55deb1SDavid E. O'Brien yylval.s = tostring(buf);
5902a55deb1SDavid E. O'Brien unput('/');
5912a55deb1SDavid E. O'Brien RET(REGEXPR);
5922a55deb1SDavid E. O'Brien }
5932a55deb1SDavid E. O'Brien
5942a55deb1SDavid E. O'Brien /* low-level lexical stuff, sort of inherited from lex */
5952a55deb1SDavid E. O'Brien
5962a55deb1SDavid E. O'Brien char ebuf[300];
5972a55deb1SDavid E. O'Brien char *ep = ebuf;
5982a55deb1SDavid E. O'Brien char yysbuf[100]; /* pushback buffer */
5992a55deb1SDavid E. O'Brien char *yysptr = yysbuf;
60010ce5b99SWarner Losh FILE *yyin = NULL;
6012a55deb1SDavid E. O'Brien
input(void)6022a55deb1SDavid E. O'Brien int input(void) /* get next lexical input character */
6032a55deb1SDavid E. O'Brien {
6042a55deb1SDavid E. O'Brien int c;
6052a55deb1SDavid E. O'Brien extern char *lexprog;
6062a55deb1SDavid E. O'Brien
6072a55deb1SDavid E. O'Brien if (yysptr > yysbuf)
6082e454f23SRuslan Ermilov c = (uschar)*--yysptr;
6092a55deb1SDavid E. O'Brien else if (lexprog != NULL) { /* awk '...' */
6102e454f23SRuslan Ermilov if ((c = (uschar)*lexprog) != 0)
6112a55deb1SDavid E. O'Brien lexprog++;
6122a55deb1SDavid E. O'Brien } else /* awk -f ... */
6132a55deb1SDavid E. O'Brien c = pgetc();
614b5253557SWarner Losh if (c == EOF)
6152a55deb1SDavid E. O'Brien c = 0;
6162a55deb1SDavid E. O'Brien if (ep >= ebuf + sizeof ebuf)
6172a55deb1SDavid E. O'Brien ep = ebuf;
618b5253557SWarner Losh *ep = c;
619b5253557SWarner Losh if (c != 0) {
620b5253557SWarner Losh ep++;
621b5253557SWarner Losh }
622b5253557SWarner Losh return (c);
6232a55deb1SDavid E. O'Brien }
6242a55deb1SDavid E. O'Brien
unput(int c)6252a55deb1SDavid E. O'Brien void unput(int c) /* put lexical character back on input */
6262a55deb1SDavid E. O'Brien {
6272a55deb1SDavid E. O'Brien if (yysptr >= yysbuf + sizeof(yysbuf))
6282a55deb1SDavid E. O'Brien FATAL("pushed back too much: %.20s...", yysbuf);
6292a55deb1SDavid E. O'Brien *yysptr++ = c;
6302a55deb1SDavid E. O'Brien if (--ep < ebuf)
6312a55deb1SDavid E. O'Brien ep = ebuf + sizeof(ebuf) - 1;
6322a55deb1SDavid E. O'Brien }
6332a55deb1SDavid E. O'Brien
unputstr(const char * s)634813da98dSDavid E. O'Brien void unputstr(const char *s) /* put a string back on input */
6352a55deb1SDavid E. O'Brien {
6362a55deb1SDavid E. O'Brien int i;
6372a55deb1SDavid E. O'Brien
6382a55deb1SDavid E. O'Brien for (i = strlen(s)-1; i >= 0; i--)
6392a55deb1SDavid E. O'Brien unput(s[i]);
6402a55deb1SDavid E. O'Brien }
641