17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*79777a7dSnakanon * Common Development and Distribution License (the "License").
6*79777a7dSnakanon * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22*79777a7dSnakanon * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved.
287c478bd9Sstevel@tonic-gate */
297c478bd9Sstevel@tonic-gate
307c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
317c478bd9Sstevel@tonic-gate
32cb4658fbSceastha /*
33cb4658fbSceastha * awk -- mainline, yylex, etc.
34cb4658fbSceastha *
35cb4658fbSceastha * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
36cb4658fbSceastha */
37cb4658fbSceastha
387c478bd9Sstevel@tonic-gate #include "awk.h"
397c478bd9Sstevel@tonic-gate #include "y.tab.h"
407c478bd9Sstevel@tonic-gate #include <stdarg.h>
417c478bd9Sstevel@tonic-gate #include <unistd.h>
427c478bd9Sstevel@tonic-gate #include <locale.h>
43*79777a7dSnakanon #include <search.h>
447c478bd9Sstevel@tonic-gate
457c478bd9Sstevel@tonic-gate static char *progfiles[NPFILE]; /* Programmes files for yylex */
467c478bd9Sstevel@tonic-gate static char **progfilep = &progfiles[0]; /* Pointer to last file */
477c478bd9Sstevel@tonic-gate static wchar_t *progptr; /* In-memory programme */
487c478bd9Sstevel@tonic-gate static int proglen; /* Length of progptr */
497c478bd9Sstevel@tonic-gate static wchar_t context[NCONTEXT]; /* Circular buffer of context */
507c478bd9Sstevel@tonic-gate static wchar_t *conptr = &context[0]; /* context ptr */
517c478bd9Sstevel@tonic-gate static FILE *progfp; /* Stdio stream for programme */
527c478bd9Sstevel@tonic-gate static char *filename;
537c478bd9Sstevel@tonic-gate #ifdef DEBUG
547c478bd9Sstevel@tonic-gate static int dflag;
557c478bd9Sstevel@tonic-gate #endif
567c478bd9Sstevel@tonic-gate
577c478bd9Sstevel@tonic-gate #define AWK_EXEC_MAGIC "<MKS AWKC>"
587c478bd9Sstevel@tonic-gate #define LEN_EXEC_MAGIC 10
597c478bd9Sstevel@tonic-gate
607c478bd9Sstevel@tonic-gate static char unbal[] = "unbalanced E char";
617c478bd9Sstevel@tonic-gate
627c478bd9Sstevel@tonic-gate static void awkarginit(int c, char **av);
637c478bd9Sstevel@tonic-gate static int lexid(wint_t c);
647c478bd9Sstevel@tonic-gate static int lexnumber(wint_t c);
657c478bd9Sstevel@tonic-gate static int lexstring(wint_t endc);
66cb4658fbSceastha static int lexregexp(wint_t endc);
677c478bd9Sstevel@tonic-gate
687c478bd9Sstevel@tonic-gate static void awkvarinit(void);
697c478bd9Sstevel@tonic-gate static wint_t lexgetc(void);
707c478bd9Sstevel@tonic-gate static void lexungetc(wint_t c);
717c478bd9Sstevel@tonic-gate static size_t lexescape(wint_t endc, int regx, int cmd_line_operand);
727c478bd9Sstevel@tonic-gate static void awkierr(int perr, char *fmt, va_list ap);
737c478bd9Sstevel@tonic-gate static int usage(void);
747c478bd9Sstevel@tonic-gate void strescape(wchar_t *str);
757c478bd9Sstevel@tonic-gate static const char *toprint(wint_t);
767c478bd9Sstevel@tonic-gate char *_cmdname;
777c478bd9Sstevel@tonic-gate static wchar_t *mbconvert(char *str);
787c478bd9Sstevel@tonic-gate
79cb4658fbSceastha extern int isclvar(wchar_t *arg);
807c478bd9Sstevel@tonic-gate
817c478bd9Sstevel@tonic-gate /*
827c478bd9Sstevel@tonic-gate * mainline for awk
837c478bd9Sstevel@tonic-gate */
847c478bd9Sstevel@tonic-gate int
main(int argc,char * argv[])857c478bd9Sstevel@tonic-gate main(int argc, char *argv[])
867c478bd9Sstevel@tonic-gate {
87cb4658fbSceastha wchar_t *ap;
88cb4658fbSceastha char *cmd;
897c478bd9Sstevel@tonic-gate
907c478bd9Sstevel@tonic-gate cmd = argv[0];
917c478bd9Sstevel@tonic-gate _cmdname = cmd;
927c478bd9Sstevel@tonic-gate
937c478bd9Sstevel@tonic-gate linebuf = emalloc(NLINE * sizeof (wchar_t));
947c478bd9Sstevel@tonic-gate
95cb4658fbSceastha /*
967c478bd9Sstevel@tonic-gate * At this point only messaging should be internationalized.
977c478bd9Sstevel@tonic-gate * numbers are still scanned as in the Posix locale.
987c478bd9Sstevel@tonic-gate */
997c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL, "");
1007c478bd9Sstevel@tonic-gate (void) setlocale(LC_NUMERIC, "C");
1017c478bd9Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
1027c478bd9Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST"
1037c478bd9Sstevel@tonic-gate #endif
1047c478bd9Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN);
1057c478bd9Sstevel@tonic-gate
1067c478bd9Sstevel@tonic-gate awkvarinit();
1077c478bd9Sstevel@tonic-gate /* running = 1; */
1087c478bd9Sstevel@tonic-gate while (argc > 1 && *argv[1] == '-') {
1097c478bd9Sstevel@tonic-gate void *save_ptr = NULL;
1107c478bd9Sstevel@tonic-gate ap = mbstowcsdup(&argv[1][1]);
1117c478bd9Sstevel@tonic-gate if (ap == NULL)
1127c478bd9Sstevel@tonic-gate break;
1137c478bd9Sstevel@tonic-gate if (*ap == '\0') {
1147c478bd9Sstevel@tonic-gate free(ap);
1157c478bd9Sstevel@tonic-gate break;
1167c478bd9Sstevel@tonic-gate }
1177c478bd9Sstevel@tonic-gate save_ptr = (void *) ap;
1187c478bd9Sstevel@tonic-gate ++argv;
1197c478bd9Sstevel@tonic-gate --argc;
1207c478bd9Sstevel@tonic-gate if (*ap == '-' && ap[1] == '\0')
1217c478bd9Sstevel@tonic-gate break;
1227c478bd9Sstevel@tonic-gate for (; *ap != '\0'; ++ap) {
1237c478bd9Sstevel@tonic-gate switch (*ap) {
1247c478bd9Sstevel@tonic-gate #ifdef DEBUG
1257c478bd9Sstevel@tonic-gate case 'd':
1267c478bd9Sstevel@tonic-gate dflag = 1;
1277c478bd9Sstevel@tonic-gate continue;
1287c478bd9Sstevel@tonic-gate
1297c478bd9Sstevel@tonic-gate #endif
1307c478bd9Sstevel@tonic-gate case 'f':
1317c478bd9Sstevel@tonic-gate if (argc < 2) {
1327c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
1337c478bd9Sstevel@tonic-gate gettext("Missing script file\n"));
1347c478bd9Sstevel@tonic-gate return (1);
1357c478bd9Sstevel@tonic-gate }
1367c478bd9Sstevel@tonic-gate *progfilep++ = argv[1];
1377c478bd9Sstevel@tonic-gate --argc;
1387c478bd9Sstevel@tonic-gate ++argv;
1397c478bd9Sstevel@tonic-gate continue;
1407c478bd9Sstevel@tonic-gate
1417c478bd9Sstevel@tonic-gate case 'F':
1427c478bd9Sstevel@tonic-gate if (ap[1] == '\0') {
1437c478bd9Sstevel@tonic-gate if (argc < 2) {
1447c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
1457c478bd9Sstevel@tonic-gate gettext("Missing field separator\n"));
1467c478bd9Sstevel@tonic-gate return (1);
1477c478bd9Sstevel@tonic-gate }
1487c478bd9Sstevel@tonic-gate ap = mbstowcsdup(argv[1]);
1497c478bd9Sstevel@tonic-gate --argc;
1507c478bd9Sstevel@tonic-gate ++argv;
1517c478bd9Sstevel@tonic-gate } else
1527c478bd9Sstevel@tonic-gate ++ap;
1537c478bd9Sstevel@tonic-gate strescape(ap);
1547c478bd9Sstevel@tonic-gate strassign(varFS, linebuf, FALLOC,
1557c478bd9Sstevel@tonic-gate wcslen(linebuf));
1567c478bd9Sstevel@tonic-gate break;
1577c478bd9Sstevel@tonic-gate
1587c478bd9Sstevel@tonic-gate case 'v': {
159cb4658fbSceastha wchar_t *vp;
160cb4658fbSceastha wchar_t *arg;
1617c478bd9Sstevel@tonic-gate
1627c478bd9Sstevel@tonic-gate if (argc < 2) {
1637c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
1647c478bd9Sstevel@tonic-gate gettext("Missing variable assignment\n"));
1657c478bd9Sstevel@tonic-gate return (1);
1667c478bd9Sstevel@tonic-gate }
1677c478bd9Sstevel@tonic-gate arg = mbconvert(argv[1]);
168cb4658fbSceastha /*
169cb4658fbSceastha * Ensure the variable expression
170cb4658fbSceastha * is valid (correct form).
171cb4658fbSceastha */
172cb4658fbSceastha if (((vp = wcschr(arg, '=')) != NULL) &&
173cb4658fbSceastha isclvar(arg)) {
1747c478bd9Sstevel@tonic-gate *vp = '\0';
1757c478bd9Sstevel@tonic-gate strescape(vp+1);
1767c478bd9Sstevel@tonic-gate strassign(vlook(arg), linebuf,
177cb4658fbSceastha FALLOC|FSENSE,
178cb4658fbSceastha wcslen(linebuf));
1797c478bd9Sstevel@tonic-gate *vp = '=';
180cb4658fbSceastha } else {
181cb4658fbSceastha (void) fprintf(stderr, gettext(
182cb4658fbSceastha "Invalid form for variable "
183cb4658fbSceastha "assignment: %S\n"), arg);
184cb4658fbSceastha return (1);
1857c478bd9Sstevel@tonic-gate }
1867c478bd9Sstevel@tonic-gate --argc;
1877c478bd9Sstevel@tonic-gate ++argv;
1887c478bd9Sstevel@tonic-gate continue;
1897c478bd9Sstevel@tonic-gate }
1907c478bd9Sstevel@tonic-gate
1917c478bd9Sstevel@tonic-gate default:
1927c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
1937c478bd9Sstevel@tonic-gate gettext("Unknown option \"-%S\"\n"), ap);
1947c478bd9Sstevel@tonic-gate return (usage());
1957c478bd9Sstevel@tonic-gate }
1967c478bd9Sstevel@tonic-gate break;
1977c478bd9Sstevel@tonic-gate }
1987c478bd9Sstevel@tonic-gate if (save_ptr)
1997c478bd9Sstevel@tonic-gate free(save_ptr);
2007c478bd9Sstevel@tonic-gate }
2017c478bd9Sstevel@tonic-gate if (progfilep == &progfiles[0]) {
2027c478bd9Sstevel@tonic-gate if (argc < 2)
2037c478bd9Sstevel@tonic-gate return (usage());
2047c478bd9Sstevel@tonic-gate filename = "[command line]"; /* BUG: NEEDS TRANSLATION */
2057c478bd9Sstevel@tonic-gate progptr = mbstowcsdup(argv[1]);
2067c478bd9Sstevel@tonic-gate proglen = wcslen(progptr);
2077c478bd9Sstevel@tonic-gate --argc;
2087c478bd9Sstevel@tonic-gate ++argv;
2097c478bd9Sstevel@tonic-gate }
2107c478bd9Sstevel@tonic-gate
2117c478bd9Sstevel@tonic-gate argv[0] = cmd;
2127c478bd9Sstevel@tonic-gate
2137c478bd9Sstevel@tonic-gate awkarginit(argc, argv);
2147c478bd9Sstevel@tonic-gate
2157c478bd9Sstevel@tonic-gate /* running = 0; */
2167c478bd9Sstevel@tonic-gate (void) yyparse();
2177c478bd9Sstevel@tonic-gate
2187c478bd9Sstevel@tonic-gate lineno = 0;
2197c478bd9Sstevel@tonic-gate /*
2207c478bd9Sstevel@tonic-gate * Ok, done parsing, so now activate the rest of the nls stuff, set
2217c478bd9Sstevel@tonic-gate * the radix character.
2227c478bd9Sstevel@tonic-gate */
2237c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL, "");
2247c478bd9Sstevel@tonic-gate radixpoint = *localeconv()->decimal_point;
2257c478bd9Sstevel@tonic-gate awk();
2267c478bd9Sstevel@tonic-gate /* NOTREACHED */
2277c478bd9Sstevel@tonic-gate return (0);
2287c478bd9Sstevel@tonic-gate }
2297c478bd9Sstevel@tonic-gate
2307c478bd9Sstevel@tonic-gate /*
2317c478bd9Sstevel@tonic-gate * Do initial setup of buffers, etc.
2327c478bd9Sstevel@tonic-gate * This must be called before most processing
2337c478bd9Sstevel@tonic-gate * and especially before lexical analysis.
2347c478bd9Sstevel@tonic-gate * Variables initialised here will be overruled by command
2357c478bd9Sstevel@tonic-gate * line parameter initialisation.
2367c478bd9Sstevel@tonic-gate */
2377c478bd9Sstevel@tonic-gate static void
awkvarinit()2387c478bd9Sstevel@tonic-gate awkvarinit()
2397c478bd9Sstevel@tonic-gate {
240cb4658fbSceastha NODE *np;
2417c478bd9Sstevel@tonic-gate
2427c478bd9Sstevel@tonic-gate (void) setvbuf(stderr, NULL, _IONBF, 0);
2437c478bd9Sstevel@tonic-gate
2447c478bd9Sstevel@tonic-gate if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) {
2457c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
2467c478bd9Sstevel@tonic-gate gettext("not enough available file descriptors"));
2477c478bd9Sstevel@tonic-gate exit(1);
2487c478bd9Sstevel@tonic-gate }
2497c478bd9Sstevel@tonic-gate ofiles = (OFILE *)emalloc(sizeof (OFILE)*NIOSTREAM);
2507c478bd9Sstevel@tonic-gate #ifdef A_ZERO_POINTERS
2517c478bd9Sstevel@tonic-gate (void) memset((wchar_t *)ofiles, 0, sizeof (OFILE) * NIOSTREAM);
2527c478bd9Sstevel@tonic-gate #else
2537c478bd9Sstevel@tonic-gate {
2547c478bd9Sstevel@tonic-gate /* initialize file descriptor table */
2557c478bd9Sstevel@tonic-gate OFILE *fp;
2567c478bd9Sstevel@tonic-gate for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) {
2577c478bd9Sstevel@tonic-gate fp->f_fp = FNULL;
2587c478bd9Sstevel@tonic-gate fp->f_mode = 0;
2597c478bd9Sstevel@tonic-gate fp->f_name = (char *)0;
2607c478bd9Sstevel@tonic-gate }
2617c478bd9Sstevel@tonic-gate }
2627c478bd9Sstevel@tonic-gate #endif
2637c478bd9Sstevel@tonic-gate constant = intnode((INT)0);
2647c478bd9Sstevel@tonic-gate
2657c478bd9Sstevel@tonic-gate const0 = intnode((INT)0);
2667c478bd9Sstevel@tonic-gate const1 = intnode((INT)1);
2677c478bd9Sstevel@tonic-gate constundef = emptynode(CONSTANT, 0);
2687c478bd9Sstevel@tonic-gate constundef->n_flags = FSTRING|FVINT;
2697c478bd9Sstevel@tonic-gate constundef->n_string = _null;
2707c478bd9Sstevel@tonic-gate constundef->n_strlen = 0;
2717c478bd9Sstevel@tonic-gate inc_oper = emptynode(ADD, 0);
2727c478bd9Sstevel@tonic-gate inc_oper->n_right = const1;
2737c478bd9Sstevel@tonic-gate asn_oper = emptynode(ADD, 0);
2747c478bd9Sstevel@tonic-gate field0 = node(FIELD, const0, NNULL);
2757c478bd9Sstevel@tonic-gate
2767c478bd9Sstevel@tonic-gate {
277cb4658fbSceastha RESFUNC near*rp;
2787c478bd9Sstevel@tonic-gate
2797c478bd9Sstevel@tonic-gate for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) {
2807c478bd9Sstevel@tonic-gate np = finstall(rp->rf_name, rp->rf_func, rp->rf_type);
2817c478bd9Sstevel@tonic-gate }
2827c478bd9Sstevel@tonic-gate }
2837c478bd9Sstevel@tonic-gate {
284cb4658fbSceastha RESERVED near*rp;
2857c478bd9Sstevel@tonic-gate
2867c478bd9Sstevel@tonic-gate for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) {
2877c478bd9Sstevel@tonic-gate switch (rp->r_type) {
2887c478bd9Sstevel@tonic-gate case SVAR:
2897c478bd9Sstevel@tonic-gate case VAR:
2907c478bd9Sstevel@tonic-gate running = 1;
2917c478bd9Sstevel@tonic-gate np = vlook(rp->r_name);
2927c478bd9Sstevel@tonic-gate if (rp->r_type == SVAR)
2937c478bd9Sstevel@tonic-gate np->n_flags |= FSPECIAL;
2947c478bd9Sstevel@tonic-gate if (rp->r_svalue != NULL)
2957c478bd9Sstevel@tonic-gate strassign(np, rp->r_svalue, FSTATIC,
2967c478bd9Sstevel@tonic-gate (size_t)rp->r_ivalue);
2977c478bd9Sstevel@tonic-gate else {
2987c478bd9Sstevel@tonic-gate constant->n_int = rp->r_ivalue;
2997c478bd9Sstevel@tonic-gate (void) assign(np, constant);
3007c478bd9Sstevel@tonic-gate }
3017c478bd9Sstevel@tonic-gate running = 0;
3027c478bd9Sstevel@tonic-gate break;
3037c478bd9Sstevel@tonic-gate
3047c478bd9Sstevel@tonic-gate case KEYWORD:
3057c478bd9Sstevel@tonic-gate kinstall(rp->r_name, (int)rp->r_ivalue);
3067c478bd9Sstevel@tonic-gate break;
3077c478bd9Sstevel@tonic-gate }
3087c478bd9Sstevel@tonic-gate }
3097c478bd9Sstevel@tonic-gate }
3107c478bd9Sstevel@tonic-gate
3117c478bd9Sstevel@tonic-gate varNR = vlook(s_NR);
3127c478bd9Sstevel@tonic-gate varFNR = vlook(s_FNR);
3137c478bd9Sstevel@tonic-gate varNF = vlook(s_NF);
3147c478bd9Sstevel@tonic-gate varOFMT = vlook(s_OFMT);
3157c478bd9Sstevel@tonic-gate varCONVFMT = vlook(s_CONVFMT);
3167c478bd9Sstevel@tonic-gate varOFS = vlook(s_OFS);
3177c478bd9Sstevel@tonic-gate varORS = vlook(s_ORS);
3187c478bd9Sstevel@tonic-gate varRS = vlook(s_RS);
3197c478bd9Sstevel@tonic-gate varFS = vlook(s_FS);
3207c478bd9Sstevel@tonic-gate varARGC = vlook(s_ARGC);
3217c478bd9Sstevel@tonic-gate varSUBSEP = vlook(s_SUBSEP);
3227c478bd9Sstevel@tonic-gate varENVIRON = vlook(s_ENVIRON);
3237c478bd9Sstevel@tonic-gate varFILENAME = vlook(s_FILENAME);
3247c478bd9Sstevel@tonic-gate varSYMTAB = vlook(s_SYMTAB);
3257c478bd9Sstevel@tonic-gate incNR = node(ASG, varNR, node(ADD, varNR, const1));
3267c478bd9Sstevel@tonic-gate incFNR = node(ASG, varFNR, node(ADD, varFNR, const1));
3277c478bd9Sstevel@tonic-gate clrFNR = node(ASG, varFNR, const0);
3287c478bd9Sstevel@tonic-gate }
3297c478bd9Sstevel@tonic-gate
3307c478bd9Sstevel@tonic-gate /*
3317c478bd9Sstevel@tonic-gate * Initialise awk ARGC, ARGV variables.
3327c478bd9Sstevel@tonic-gate */
3337c478bd9Sstevel@tonic-gate static void
awkarginit(int ac,char ** av)3347c478bd9Sstevel@tonic-gate awkarginit(int ac, char **av)
3357c478bd9Sstevel@tonic-gate {
336cb4658fbSceastha int i;
337cb4658fbSceastha wchar_t *cp;
3387c478bd9Sstevel@tonic-gate
3397c478bd9Sstevel@tonic-gate ARGVsubi = node(INDEX, vlook(s_ARGV), constant);
3407c478bd9Sstevel@tonic-gate running = 1;
3417c478bd9Sstevel@tonic-gate constant->n_int = ac;
3427c478bd9Sstevel@tonic-gate (void) assign(varARGC, constant);
3437c478bd9Sstevel@tonic-gate for (i = 0; i < ac; ++i) {
3447c478bd9Sstevel@tonic-gate cp = mbstowcsdup(av[i]);
3457c478bd9Sstevel@tonic-gate constant->n_int = i;
3467c478bd9Sstevel@tonic-gate strassign(exprreduce(ARGVsubi), cp,
3477c478bd9Sstevel@tonic-gate FSTATIC|FSENSE, wcslen(cp));
3487c478bd9Sstevel@tonic-gate }
3497c478bd9Sstevel@tonic-gate running = 0;
3507c478bd9Sstevel@tonic-gate }
3517c478bd9Sstevel@tonic-gate
3527c478bd9Sstevel@tonic-gate /*
3537c478bd9Sstevel@tonic-gate * Clean up when done parsing a function.
3547c478bd9Sstevel@tonic-gate * All formal parameters, because of a deal (funparm) in
3557c478bd9Sstevel@tonic-gate * yylex, get put into the symbol table in front of any
3567c478bd9Sstevel@tonic-gate * global variable of the same name. When the entire
3577c478bd9Sstevel@tonic-gate * function is parsed, remove these formal dummy nodes
3587c478bd9Sstevel@tonic-gate * from the symbol table but retain the nodes because
3597c478bd9Sstevel@tonic-gate * the generated tree points at them.
3607c478bd9Sstevel@tonic-gate */
3617c478bd9Sstevel@tonic-gate void
uexit(NODE * np)3627c478bd9Sstevel@tonic-gate uexit(NODE *np)
3637c478bd9Sstevel@tonic-gate {
364cb4658fbSceastha NODE *formal;
3657c478bd9Sstevel@tonic-gate
3667c478bd9Sstevel@tonic-gate while ((formal = getlist(&np)) != NNULL)
3677c478bd9Sstevel@tonic-gate delsymtab(formal, 0);
3687c478bd9Sstevel@tonic-gate }
3697c478bd9Sstevel@tonic-gate
3707c478bd9Sstevel@tonic-gate /*
3717c478bd9Sstevel@tonic-gate * The lexical analyzer.
3727c478bd9Sstevel@tonic-gate */
3737c478bd9Sstevel@tonic-gate int
yylex()3747c478bd9Sstevel@tonic-gate yylex()
3757c478bd9Sstevel@tonic-gate #ifdef DEBUG
3767c478bd9Sstevel@tonic-gate {
377cb4658fbSceastha int l;
3787c478bd9Sstevel@tonic-gate
3797c478bd9Sstevel@tonic-gate l = yyhex();
3807c478bd9Sstevel@tonic-gate if (dflag)
3817c478bd9Sstevel@tonic-gate (void) printf("%d\n", l);
3827c478bd9Sstevel@tonic-gate return (l);
3837c478bd9Sstevel@tonic-gate }
yyhex()3847c478bd9Sstevel@tonic-gate yyhex()
3857c478bd9Sstevel@tonic-gate #endif
3867c478bd9Sstevel@tonic-gate {
387cb4658fbSceastha wint_t c, c1;
3887c478bd9Sstevel@tonic-gate int i;
3897c478bd9Sstevel@tonic-gate static int savetoken = 0;
39088f3d729Sakaplan static int wasfield;
3917c478bd9Sstevel@tonic-gate static int isfuncdef;
3927c478bd9Sstevel@tonic-gate static int nbrace, nparen, nbracket;
3937c478bd9Sstevel@tonic-gate static struct ctosymstruct {
3947c478bd9Sstevel@tonic-gate wint_t c, sym;
3957c478bd9Sstevel@tonic-gate } ctosym[] = {
3967c478bd9Sstevel@tonic-gate { '|', BAR }, { '^', CARAT },
3977c478bd9Sstevel@tonic-gate { '~', TILDE }, { '<', LANGLE },
3987c478bd9Sstevel@tonic-gate { '>', RANGLE }, { '+', PLUSC },
3997c478bd9Sstevel@tonic-gate { '-', HYPHEN }, { '*', STAR },
4007c478bd9Sstevel@tonic-gate { '/', SLASH }, { '%', PERCENT },
4017c478bd9Sstevel@tonic-gate { '!', EXCLAMATION }, { '$', DOLLAR },
4027c478bd9Sstevel@tonic-gate { '[', LSQUARE }, { ']', RSQUARE },
4037c478bd9Sstevel@tonic-gate { '(', LPAREN }, { ')', RPAREN },
4047c478bd9Sstevel@tonic-gate { ';', SEMI }, { '{', LBRACE },
4057c478bd9Sstevel@tonic-gate { '}', RBRACE }, { 0, 0 }
4067c478bd9Sstevel@tonic-gate };
4077c478bd9Sstevel@tonic-gate
4087c478bd9Sstevel@tonic-gate if (savetoken) {
4097c478bd9Sstevel@tonic-gate c = savetoken;
4107c478bd9Sstevel@tonic-gate savetoken = 0;
4117c478bd9Sstevel@tonic-gate } else if (redelim != '\0') {
4127c478bd9Sstevel@tonic-gate c = redelim;
4137c478bd9Sstevel@tonic-gate redelim = 0;
4147c478bd9Sstevel@tonic-gate catterm = 0;
4157c478bd9Sstevel@tonic-gate savetoken = c;
4167c478bd9Sstevel@tonic-gate return (lexlast = lexregexp(c));
4177c478bd9Sstevel@tonic-gate } else while ((c = lexgetc()) != WEOF) {
4187c478bd9Sstevel@tonic-gate if (iswalpha(c) || c == '_') {
4197c478bd9Sstevel@tonic-gate c = lexid(c);
4207c478bd9Sstevel@tonic-gate } else if (iswdigit(c) || c == '.') {
4217c478bd9Sstevel@tonic-gate c = lexnumber(c);
4227c478bd9Sstevel@tonic-gate } else if (isWblank(c)) {
4237c478bd9Sstevel@tonic-gate continue;
4247c478bd9Sstevel@tonic-gate } else switch (c) {
4257c478bd9Sstevel@tonic-gate #if DOS || OS2
4267c478bd9Sstevel@tonic-gate case 032: /* ^Z */
4277c478bd9Sstevel@tonic-gate continue;
4287c478bd9Sstevel@tonic-gate #endif
4297c478bd9Sstevel@tonic-gate
4307c478bd9Sstevel@tonic-gate case '"':
4317c478bd9Sstevel@tonic-gate c = lexstring(c);
4327c478bd9Sstevel@tonic-gate break;
4337c478bd9Sstevel@tonic-gate
4347c478bd9Sstevel@tonic-gate case '#':
4357c478bd9Sstevel@tonic-gate while ((c = lexgetc()) != '\n' && c != WEOF)
4367c478bd9Sstevel@tonic-gate ;
4377c478bd9Sstevel@tonic-gate lexungetc(c);
4387c478bd9Sstevel@tonic-gate continue;
4397c478bd9Sstevel@tonic-gate
4407c478bd9Sstevel@tonic-gate case '+':
4417c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '+')
4427c478bd9Sstevel@tonic-gate c = INC;
4437c478bd9Sstevel@tonic-gate else if (c1 == '=')
4447c478bd9Sstevel@tonic-gate c = AADD;
4457c478bd9Sstevel@tonic-gate else
4467c478bd9Sstevel@tonic-gate lexungetc(c1);
4477c478bd9Sstevel@tonic-gate break;
4487c478bd9Sstevel@tonic-gate
4497c478bd9Sstevel@tonic-gate case '-':
4507c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '-')
4517c478bd9Sstevel@tonic-gate c = DEC;
4527c478bd9Sstevel@tonic-gate else if (c1 == '=')
4537c478bd9Sstevel@tonic-gate c = ASUB;
4547c478bd9Sstevel@tonic-gate else
4557c478bd9Sstevel@tonic-gate lexungetc(c1);
4567c478bd9Sstevel@tonic-gate break;
4577c478bd9Sstevel@tonic-gate
4587c478bd9Sstevel@tonic-gate case '*':
4597c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=')
4607c478bd9Sstevel@tonic-gate c = AMUL;
4617c478bd9Sstevel@tonic-gate else if (c1 == '*') {
4627c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=')
4637c478bd9Sstevel@tonic-gate c = AEXP;
4647c478bd9Sstevel@tonic-gate else {
4657c478bd9Sstevel@tonic-gate c = EXP;
4667c478bd9Sstevel@tonic-gate lexungetc(c1);
4677c478bd9Sstevel@tonic-gate }
4687c478bd9Sstevel@tonic-gate } else
4697c478bd9Sstevel@tonic-gate lexungetc(c1);
4707c478bd9Sstevel@tonic-gate break;
4717c478bd9Sstevel@tonic-gate
4727c478bd9Sstevel@tonic-gate case '^':
4737c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') {
4747c478bd9Sstevel@tonic-gate c = AEXP;
4757c478bd9Sstevel@tonic-gate } else {
4767c478bd9Sstevel@tonic-gate c = EXP;
4777c478bd9Sstevel@tonic-gate lexungetc(c1);
4787c478bd9Sstevel@tonic-gate }
4797c478bd9Sstevel@tonic-gate break;
4807c478bd9Sstevel@tonic-gate
4817c478bd9Sstevel@tonic-gate case '/':
482cb4658fbSceastha if ((c1 = lexgetc()) == '=' &&
483cb4658fbSceastha lexlast != RE && lexlast != NRE &&
484cb4658fbSceastha lexlast != ';' && lexlast != '\n' &&
485cb4658fbSceastha lexlast != ',' && lexlast != '(')
4867c478bd9Sstevel@tonic-gate c = ADIV;
4877c478bd9Sstevel@tonic-gate else
4887c478bd9Sstevel@tonic-gate lexungetc(c1);
4897c478bd9Sstevel@tonic-gate break;
4907c478bd9Sstevel@tonic-gate
4917c478bd9Sstevel@tonic-gate case '%':
4927c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=')
4937c478bd9Sstevel@tonic-gate c = AREM;
4947c478bd9Sstevel@tonic-gate else
4957c478bd9Sstevel@tonic-gate lexungetc(c1);
4967c478bd9Sstevel@tonic-gate break;
4977c478bd9Sstevel@tonic-gate
4987c478bd9Sstevel@tonic-gate case '&':
4997c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '&')
5007c478bd9Sstevel@tonic-gate c = AND;
5017c478bd9Sstevel@tonic-gate else
5027c478bd9Sstevel@tonic-gate lexungetc(c1);
5037c478bd9Sstevel@tonic-gate break;
5047c478bd9Sstevel@tonic-gate
5057c478bd9Sstevel@tonic-gate case '|':
5067c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '|')
5077c478bd9Sstevel@tonic-gate c = OR;
5087c478bd9Sstevel@tonic-gate else {
5097c478bd9Sstevel@tonic-gate lexungetc(c1);
5107c478bd9Sstevel@tonic-gate if (inprint)
5117c478bd9Sstevel@tonic-gate c = PIPE;
5127c478bd9Sstevel@tonic-gate }
5137c478bd9Sstevel@tonic-gate break;
5147c478bd9Sstevel@tonic-gate
5157c478bd9Sstevel@tonic-gate case '>':
5167c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=')
5177c478bd9Sstevel@tonic-gate c = GE;
5187c478bd9Sstevel@tonic-gate else if (c1 == '>')
5197c478bd9Sstevel@tonic-gate c = APPEND;
5207c478bd9Sstevel@tonic-gate else {
5217c478bd9Sstevel@tonic-gate lexungetc(c1);
5227c478bd9Sstevel@tonic-gate if (nparen == 0 && inprint)
5237c478bd9Sstevel@tonic-gate c = WRITE;
5247c478bd9Sstevel@tonic-gate }
5257c478bd9Sstevel@tonic-gate break;
5267c478bd9Sstevel@tonic-gate
5277c478bd9Sstevel@tonic-gate case '<':
5287c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=')
5297c478bd9Sstevel@tonic-gate c = LE;
5307c478bd9Sstevel@tonic-gate else
5317c478bd9Sstevel@tonic-gate lexungetc(c1);
5327c478bd9Sstevel@tonic-gate break;
5337c478bd9Sstevel@tonic-gate
5347c478bd9Sstevel@tonic-gate case '!':
5357c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=')
5367c478bd9Sstevel@tonic-gate c = NE;
5377c478bd9Sstevel@tonic-gate else if (c1 == '~')
5387c478bd9Sstevel@tonic-gate c = NRE;
5397c478bd9Sstevel@tonic-gate else
5407c478bd9Sstevel@tonic-gate lexungetc(c1);
5417c478bd9Sstevel@tonic-gate break;
5427c478bd9Sstevel@tonic-gate
5437c478bd9Sstevel@tonic-gate case '=':
5447c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=')
5457c478bd9Sstevel@tonic-gate c = EQ;
5467c478bd9Sstevel@tonic-gate else {
5477c478bd9Sstevel@tonic-gate lexungetc(c1);
5487c478bd9Sstevel@tonic-gate c = ASG;
5497c478bd9Sstevel@tonic-gate }
5507c478bd9Sstevel@tonic-gate break;
5517c478bd9Sstevel@tonic-gate
5527c478bd9Sstevel@tonic-gate case '\n':
5537c478bd9Sstevel@tonic-gate switch (lexlast) {
5547c478bd9Sstevel@tonic-gate case ')':
5557c478bd9Sstevel@tonic-gate if (catterm || inprint) {
5567c478bd9Sstevel@tonic-gate c = ';';
5577c478bd9Sstevel@tonic-gate break;
5587c478bd9Sstevel@tonic-gate }
559*79777a7dSnakanon /*FALLTHRU*/
5607c478bd9Sstevel@tonic-gate case AND:
5617c478bd9Sstevel@tonic-gate case OR:
5627c478bd9Sstevel@tonic-gate case COMMA:
5637c478bd9Sstevel@tonic-gate case '{':
5647c478bd9Sstevel@tonic-gate case ELSE:
5657c478bd9Sstevel@tonic-gate case ';':
5667c478bd9Sstevel@tonic-gate case DO:
5677c478bd9Sstevel@tonic-gate continue;
5687c478bd9Sstevel@tonic-gate
5697c478bd9Sstevel@tonic-gate case '}':
5707c478bd9Sstevel@tonic-gate if (nbrace != 0)
5717c478bd9Sstevel@tonic-gate continue;
5727c478bd9Sstevel@tonic-gate
5737c478bd9Sstevel@tonic-gate default:
5747c478bd9Sstevel@tonic-gate c = ';';
5757c478bd9Sstevel@tonic-gate break;
5767c478bd9Sstevel@tonic-gate }
5777c478bd9Sstevel@tonic-gate break;
5787c478bd9Sstevel@tonic-gate
5797c478bd9Sstevel@tonic-gate case ELSE:
5807c478bd9Sstevel@tonic-gate if (lexlast != ';') {
5817c478bd9Sstevel@tonic-gate savetoken = ELSE;
5827c478bd9Sstevel@tonic-gate c = ';';
5837c478bd9Sstevel@tonic-gate }
5847c478bd9Sstevel@tonic-gate break;
5857c478bd9Sstevel@tonic-gate
5867c478bd9Sstevel@tonic-gate case '(':
5877c478bd9Sstevel@tonic-gate ++nparen;
5887c478bd9Sstevel@tonic-gate break;
5897c478bd9Sstevel@tonic-gate
5907c478bd9Sstevel@tonic-gate case ')':
5917c478bd9Sstevel@tonic-gate if (--nparen < 0)
5927c478bd9Sstevel@tonic-gate awkerr(unbal, "()");
5937c478bd9Sstevel@tonic-gate break;
5947c478bd9Sstevel@tonic-gate
5957c478bd9Sstevel@tonic-gate case '{':
5967c478bd9Sstevel@tonic-gate nbrace++;
5977c478bd9Sstevel@tonic-gate break;
5987c478bd9Sstevel@tonic-gate
5997c478bd9Sstevel@tonic-gate case '}':
6007c478bd9Sstevel@tonic-gate if (--nbrace < 0) {
6017c478bd9Sstevel@tonic-gate char brk[3];
6027c478bd9Sstevel@tonic-gate
6037c478bd9Sstevel@tonic-gate brk[0] = '{';
6047c478bd9Sstevel@tonic-gate brk[1] = '}';
6057c478bd9Sstevel@tonic-gate brk[2] = '\0';
6067c478bd9Sstevel@tonic-gate awkerr(unbal, brk);
6077c478bd9Sstevel@tonic-gate }
6087c478bd9Sstevel@tonic-gate if (lexlast != ';') {
6097c478bd9Sstevel@tonic-gate savetoken = c;
6107c478bd9Sstevel@tonic-gate c = ';';
6117c478bd9Sstevel@tonic-gate }
6127c478bd9Sstevel@tonic-gate break;
6137c478bd9Sstevel@tonic-gate
6147c478bd9Sstevel@tonic-gate case '[':
6157c478bd9Sstevel@tonic-gate ++nbracket;
6167c478bd9Sstevel@tonic-gate break;
6177c478bd9Sstevel@tonic-gate
6187c478bd9Sstevel@tonic-gate case ']':
6197c478bd9Sstevel@tonic-gate if (--nbracket < 0) {
6207c478bd9Sstevel@tonic-gate char brk[3];
6217c478bd9Sstevel@tonic-gate
6227c478bd9Sstevel@tonic-gate brk[0] = '[';
6237c478bd9Sstevel@tonic-gate brk[1] = ']';
6247c478bd9Sstevel@tonic-gate brk[2] = '\0';
6257c478bd9Sstevel@tonic-gate awkerr(unbal, brk);
6267c478bd9Sstevel@tonic-gate }
6277c478bd9Sstevel@tonic-gate break;
6287c478bd9Sstevel@tonic-gate
6297c478bd9Sstevel@tonic-gate case '\\':
6307c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '\n')
6317c478bd9Sstevel@tonic-gate continue;
6327c478bd9Sstevel@tonic-gate lexungetc(c1);
6337c478bd9Sstevel@tonic-gate break;
6347c478bd9Sstevel@tonic-gate
6357c478bd9Sstevel@tonic-gate case ',':
6367c478bd9Sstevel@tonic-gate c = COMMA;
6377c478bd9Sstevel@tonic-gate break;
6387c478bd9Sstevel@tonic-gate
6397c478bd9Sstevel@tonic-gate case '?':
6407c478bd9Sstevel@tonic-gate c = QUEST;
6417c478bd9Sstevel@tonic-gate break;
6427c478bd9Sstevel@tonic-gate
6437c478bd9Sstevel@tonic-gate case ':':
6447c478bd9Sstevel@tonic-gate c = COLON;
6457c478bd9Sstevel@tonic-gate break;
6467c478bd9Sstevel@tonic-gate
6477c478bd9Sstevel@tonic-gate default:
6487c478bd9Sstevel@tonic-gate if (!iswprint(c))
6497c478bd9Sstevel@tonic-gate awkerr(
6507c478bd9Sstevel@tonic-gate gettext("invalid character \"%s\""),
6517c478bd9Sstevel@tonic-gate toprint(c));
6527c478bd9Sstevel@tonic-gate break;
6537c478bd9Sstevel@tonic-gate }
6547c478bd9Sstevel@tonic-gate break;
6557c478bd9Sstevel@tonic-gate }
6567c478bd9Sstevel@tonic-gate
6577c478bd9Sstevel@tonic-gate switch (c) {
6587c478bd9Sstevel@tonic-gate case ']':
6597c478bd9Sstevel@tonic-gate ++catterm;
6607c478bd9Sstevel@tonic-gate break;
6617c478bd9Sstevel@tonic-gate
6627c478bd9Sstevel@tonic-gate case VAR:
6637c478bd9Sstevel@tonic-gate if (catterm) {
6647c478bd9Sstevel@tonic-gate savetoken = c;
6657c478bd9Sstevel@tonic-gate c = CONCAT;
6667c478bd9Sstevel@tonic-gate catterm = 0;
6677c478bd9Sstevel@tonic-gate } else if (!isfuncdef) {
6687c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) != '(')
6697c478bd9Sstevel@tonic-gate ++catterm;
6707c478bd9Sstevel@tonic-gate lexungetc(c1);
6717c478bd9Sstevel@tonic-gate }
6727c478bd9Sstevel@tonic-gate isfuncdef = 0;
6737c478bd9Sstevel@tonic-gate break;
6747c478bd9Sstevel@tonic-gate
6757c478bd9Sstevel@tonic-gate case PARM:
6767c478bd9Sstevel@tonic-gate case CONSTANT:
6777c478bd9Sstevel@tonic-gate if (catterm) {
6787c478bd9Sstevel@tonic-gate savetoken = c;
6797c478bd9Sstevel@tonic-gate c = CONCAT;
6807c478bd9Sstevel@tonic-gate catterm = 0;
6817c478bd9Sstevel@tonic-gate } else {
6827c478bd9Sstevel@tonic-gate if (lexlast == '$')
6837c478bd9Sstevel@tonic-gate wasfield = 2;
6847c478bd9Sstevel@tonic-gate ++catterm;
6857c478bd9Sstevel@tonic-gate }
6867c478bd9Sstevel@tonic-gate break;
6877c478bd9Sstevel@tonic-gate
6887c478bd9Sstevel@tonic-gate case INC:
6897c478bd9Sstevel@tonic-gate case DEC:
6907c478bd9Sstevel@tonic-gate if (!catterm || lexlast != CONSTANT || wasfield)
6917c478bd9Sstevel@tonic-gate break;
6927c478bd9Sstevel@tonic-gate
693*79777a7dSnakanon /*FALLTHRU*/
6947c478bd9Sstevel@tonic-gate case UFUNC:
6957c478bd9Sstevel@tonic-gate case FUNC:
6967c478bd9Sstevel@tonic-gate case GETLINE:
6977c478bd9Sstevel@tonic-gate case '!':
6987c478bd9Sstevel@tonic-gate case '$':
6997c478bd9Sstevel@tonic-gate case '(':
7007c478bd9Sstevel@tonic-gate if (catterm) {
7017c478bd9Sstevel@tonic-gate savetoken = c;
7027c478bd9Sstevel@tonic-gate c = CONCAT;
7037c478bd9Sstevel@tonic-gate catterm = 0;
7047c478bd9Sstevel@tonic-gate }
7057c478bd9Sstevel@tonic-gate break;
7067c478bd9Sstevel@tonic-gate
7077c478bd9Sstevel@tonic-gate /* { */ case '}':
7087c478bd9Sstevel@tonic-gate if (nbrace == 0)
7097c478bd9Sstevel@tonic-gate savetoken = ';';
710*79777a7dSnakanon /*FALLTHRU*/
7117c478bd9Sstevel@tonic-gate case ';':
7127c478bd9Sstevel@tonic-gate inprint = 0;
713*79777a7dSnakanon /*FALLTHRU*/
7147c478bd9Sstevel@tonic-gate default:
7157c478bd9Sstevel@tonic-gate if (c == DEFFUNC)
7167c478bd9Sstevel@tonic-gate isfuncdef = 1;
7177c478bd9Sstevel@tonic-gate catterm = 0;
7187c478bd9Sstevel@tonic-gate }
7197c478bd9Sstevel@tonic-gate lexlast = c;
7207c478bd9Sstevel@tonic-gate if (wasfield)
7217c478bd9Sstevel@tonic-gate wasfield--;
7227c478bd9Sstevel@tonic-gate /*
7237c478bd9Sstevel@tonic-gate * Map character constants to symbolic names.
7247c478bd9Sstevel@tonic-gate */
7257c478bd9Sstevel@tonic-gate for (i = 0; ctosym[i].c != 0; i++)
7267c478bd9Sstevel@tonic-gate if (c == ctosym[i].c) {
7277c478bd9Sstevel@tonic-gate c = ctosym[i].sym;
7287c478bd9Sstevel@tonic-gate break;
7297c478bd9Sstevel@tonic-gate }
7307c478bd9Sstevel@tonic-gate return ((int)c);
7317c478bd9Sstevel@tonic-gate }
7327c478bd9Sstevel@tonic-gate
7337c478bd9Sstevel@tonic-gate /*
7347c478bd9Sstevel@tonic-gate * Read a number for the lexical analyzer.
7357c478bd9Sstevel@tonic-gate * Input is the first character of the number.
7367c478bd9Sstevel@tonic-gate * Return value is the lexical type.
7377c478bd9Sstevel@tonic-gate */
7387c478bd9Sstevel@tonic-gate static int
lexnumber(wint_t c)7397c478bd9Sstevel@tonic-gate lexnumber(wint_t c)
7407c478bd9Sstevel@tonic-gate {
741cb4658fbSceastha wchar_t *cp;
742cb4658fbSceastha int dotfound = 0;
743cb4658fbSceastha int efound = 0;
7447c478bd9Sstevel@tonic-gate INT number;
7457c478bd9Sstevel@tonic-gate
7467c478bd9Sstevel@tonic-gate cp = linebuf;
7477c478bd9Sstevel@tonic-gate do {
7487c478bd9Sstevel@tonic-gate if (iswdigit(c))
7497c478bd9Sstevel@tonic-gate ;
7507c478bd9Sstevel@tonic-gate else if (c == '.') {
7517c478bd9Sstevel@tonic-gate if (dotfound++)
7527c478bd9Sstevel@tonic-gate break;
7537c478bd9Sstevel@tonic-gate } else if (c == 'e' || c == 'E') {
7547c478bd9Sstevel@tonic-gate if ((c = lexgetc()) != '-' && c != '+') {
7557c478bd9Sstevel@tonic-gate lexungetc(c);
7567c478bd9Sstevel@tonic-gate c = 'e';
7577c478bd9Sstevel@tonic-gate } else
7587c478bd9Sstevel@tonic-gate *cp++ = 'e';
7597c478bd9Sstevel@tonic-gate if (efound++)
7607c478bd9Sstevel@tonic-gate break;
7617c478bd9Sstevel@tonic-gate } else
7627c478bd9Sstevel@tonic-gate break;
7637c478bd9Sstevel@tonic-gate *cp++ = c;
7647c478bd9Sstevel@tonic-gate } while ((c = lexgetc()) != WEOF);
7657c478bd9Sstevel@tonic-gate *cp = '\0';
7667c478bd9Sstevel@tonic-gate if (dotfound && cp == linebuf+1)
7677c478bd9Sstevel@tonic-gate return (DOT);
7687c478bd9Sstevel@tonic-gate lexungetc(c);
7697c478bd9Sstevel@tonic-gate errno = 0;
770cb4658fbSceastha if (!dotfound && !efound &&
771cb4658fbSceastha ((number = wcstol(linebuf, (wchar_t **)0, 10)), errno != ERANGE))
7727c478bd9Sstevel@tonic-gate yylval.node = intnode(number);
7737c478bd9Sstevel@tonic-gate else
7747c478bd9Sstevel@tonic-gate yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0));
7757c478bd9Sstevel@tonic-gate return (CONSTANT);
7767c478bd9Sstevel@tonic-gate }
7777c478bd9Sstevel@tonic-gate
7787c478bd9Sstevel@tonic-gate /*
7797c478bd9Sstevel@tonic-gate * Read an identifier.
7807c478bd9Sstevel@tonic-gate * Input is first character of identifier.
7817c478bd9Sstevel@tonic-gate * Return VAR.
7827c478bd9Sstevel@tonic-gate */
7837c478bd9Sstevel@tonic-gate static int
lexid(wint_t c)7847c478bd9Sstevel@tonic-gate lexid(wint_t c)
7857c478bd9Sstevel@tonic-gate {
786cb4658fbSceastha wchar_t *cp;
787cb4658fbSceastha size_t i;
788cb4658fbSceastha NODE *np;
7897c478bd9Sstevel@tonic-gate
7907c478bd9Sstevel@tonic-gate cp = linebuf;
7917c478bd9Sstevel@tonic-gate do {
7927c478bd9Sstevel@tonic-gate *cp++ = c;
7937c478bd9Sstevel@tonic-gate c = lexgetc();
7947c478bd9Sstevel@tonic-gate } while (iswalpha(c) || iswdigit(c) || c == '_');
7957c478bd9Sstevel@tonic-gate *cp = '\0';
7967c478bd9Sstevel@tonic-gate lexungetc(c);
7977c478bd9Sstevel@tonic-gate yylval.node = np = vlook(linebuf);
7987c478bd9Sstevel@tonic-gate
7997c478bd9Sstevel@tonic-gate switch (np->n_type) {
8007c478bd9Sstevel@tonic-gate case KEYWORD:
8017c478bd9Sstevel@tonic-gate switch (np->n_keywtype) {
8027c478bd9Sstevel@tonic-gate case PRINT:
8037c478bd9Sstevel@tonic-gate case PRINTF:
8047c478bd9Sstevel@tonic-gate ++inprint;
8057c478bd9Sstevel@tonic-gate default:
8067c478bd9Sstevel@tonic-gate return ((int)np->n_keywtype);
8077c478bd9Sstevel@tonic-gate }
8087c478bd9Sstevel@tonic-gate /* NOTREACHED */
8097c478bd9Sstevel@tonic-gate
8107c478bd9Sstevel@tonic-gate case ARRAY:
8117c478bd9Sstevel@tonic-gate case VAR:
8127c478bd9Sstevel@tonic-gate /*
8137c478bd9Sstevel@tonic-gate * If reading the argument list, create a dummy node
8147c478bd9Sstevel@tonic-gate * for the duration of that function. These variables
8157c478bd9Sstevel@tonic-gate * can be removed from the symbol table at function end
8167c478bd9Sstevel@tonic-gate * but they must still exist because the execution tree
8177c478bd9Sstevel@tonic-gate * knows about them.
8187c478bd9Sstevel@tonic-gate */
8197c478bd9Sstevel@tonic-gate if (funparm) {
8207c478bd9Sstevel@tonic-gate do_funparm:
8217c478bd9Sstevel@tonic-gate np = emptynode(PARM, i = (cp-linebuf));
8227c478bd9Sstevel@tonic-gate np->n_flags = FSTRING;
8237c478bd9Sstevel@tonic-gate np->n_string = _null;
8247c478bd9Sstevel@tonic-gate np->n_strlen = 0;
8257c478bd9Sstevel@tonic-gate (void) memcpy(np->n_name, linebuf,
8267c478bd9Sstevel@tonic-gate (i+1) * sizeof (wchar_t));
8277c478bd9Sstevel@tonic-gate addsymtab(np);
8287c478bd9Sstevel@tonic-gate yylval.node = np;
8297c478bd9Sstevel@tonic-gate } else if (np == varNF || (np == varFS &&
8307c478bd9Sstevel@tonic-gate (!doing_begin || begin_getline))) {
8317c478bd9Sstevel@tonic-gate /*
8327c478bd9Sstevel@tonic-gate * If the user program references NF or sets
8337c478bd9Sstevel@tonic-gate * FS either outside of a begin block or
8347c478bd9Sstevel@tonic-gate * in a begin block after a getline then the
8357c478bd9Sstevel@tonic-gate * input line will be split immediately upon read
8367c478bd9Sstevel@tonic-gate * rather than when a field is first referenced.
8377c478bd9Sstevel@tonic-gate */
8387c478bd9Sstevel@tonic-gate needsplit = 1;
8397c478bd9Sstevel@tonic-gate } else if (np == varENVIRON)
8407c478bd9Sstevel@tonic-gate needenviron = 1;
841*79777a7dSnakanon /*FALLTHRU*/
8427c478bd9Sstevel@tonic-gate case PARM:
8437c478bd9Sstevel@tonic-gate return (VAR);
8447c478bd9Sstevel@tonic-gate
8457c478bd9Sstevel@tonic-gate case UFUNC:
8467c478bd9Sstevel@tonic-gate /*
8477c478bd9Sstevel@tonic-gate * It is ok to redefine functions as parameters
8487c478bd9Sstevel@tonic-gate */
8497c478bd9Sstevel@tonic-gate if (funparm) goto do_funparm;
850*79777a7dSnakanon /*FALLTHRU*/
8517c478bd9Sstevel@tonic-gate case FUNC:
8527c478bd9Sstevel@tonic-gate case GETLINE:
8537c478bd9Sstevel@tonic-gate /*
8547c478bd9Sstevel@tonic-gate * When a getline is encountered, clear the 'doing_begin' flag.
8557c478bd9Sstevel@tonic-gate * This will force the 'needsplit' flag to be set, even inside
8567c478bd9Sstevel@tonic-gate * a begin block, if FS is altered. (See VAR case above)
8577c478bd9Sstevel@tonic-gate */
8587c478bd9Sstevel@tonic-gate if (doing_begin)
8597c478bd9Sstevel@tonic-gate begin_getline = 1;
8607c478bd9Sstevel@tonic-gate return (np->n_type);
8617c478bd9Sstevel@tonic-gate }
8627c478bd9Sstevel@tonic-gate /* NOTREACHED */
86388f3d729Sakaplan return (0);
8647c478bd9Sstevel@tonic-gate }
8657c478bd9Sstevel@tonic-gate
8667c478bd9Sstevel@tonic-gate /*
8677c478bd9Sstevel@tonic-gate * Read a string for the lexical analyzer.
8687c478bd9Sstevel@tonic-gate * `endc' terminates the string.
8697c478bd9Sstevel@tonic-gate */
8707c478bd9Sstevel@tonic-gate static int
lexstring(wint_t endc)8717c478bd9Sstevel@tonic-gate lexstring(wint_t endc)
8727c478bd9Sstevel@tonic-gate {
873cb4658fbSceastha size_t length = lexescape(endc, 0, 0);
8747c478bd9Sstevel@tonic-gate
8757c478bd9Sstevel@tonic-gate yylval.node = stringnode(linebuf, FALLOC, length);
8767c478bd9Sstevel@tonic-gate return (CONSTANT);
8777c478bd9Sstevel@tonic-gate }
8787c478bd9Sstevel@tonic-gate
8797c478bd9Sstevel@tonic-gate /*
8807c478bd9Sstevel@tonic-gate * Read a regular expression.
8817c478bd9Sstevel@tonic-gate */
8827c478bd9Sstevel@tonic-gate static int
lexregexp(wint_t endc)8837c478bd9Sstevel@tonic-gate lexregexp(wint_t endc)
8847c478bd9Sstevel@tonic-gate {
8857c478bd9Sstevel@tonic-gate (void) lexescape(endc, 1, 0);
8867c478bd9Sstevel@tonic-gate yylval.node = renode(linebuf);
8877c478bd9Sstevel@tonic-gate return (URE);
8887c478bd9Sstevel@tonic-gate }
8897c478bd9Sstevel@tonic-gate
8907c478bd9Sstevel@tonic-gate /*
8917c478bd9Sstevel@tonic-gate * Process a string, converting the escape characters as required by
8927c478bd9Sstevel@tonic-gate * 1003.2. The processed string ends up in the global linebuf[]. This
8937c478bd9Sstevel@tonic-gate * routine also changes the value of 'progfd' - the program file
8947c478bd9Sstevel@tonic-gate * descriptor, so it should be used with some care. It is presently used to
8957c478bd9Sstevel@tonic-gate * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
8967c478bd9Sstevel@tonic-gate */
8977c478bd9Sstevel@tonic-gate void
strescape(wchar_t * str)8987c478bd9Sstevel@tonic-gate strescape(wchar_t *str)
8997c478bd9Sstevel@tonic-gate {
9007c478bd9Sstevel@tonic-gate progptr = str;
9017c478bd9Sstevel@tonic-gate proglen = wcslen(str) + 1; /* Include \0 */
9027c478bd9Sstevel@tonic-gate (void) lexescape('\0', 0, 1);
9037c478bd9Sstevel@tonic-gate progptr = NULL;
9047c478bd9Sstevel@tonic-gate }
9057c478bd9Sstevel@tonic-gate
9067c478bd9Sstevel@tonic-gate /*
9077c478bd9Sstevel@tonic-gate * Read a string or regular expression, terminated by ``endc'',
9087c478bd9Sstevel@tonic-gate * for lexical analyzer, processing escape sequences.
9097c478bd9Sstevel@tonic-gate * Return string length.
9107c478bd9Sstevel@tonic-gate */
9117c478bd9Sstevel@tonic-gate static size_t
lexescape(wint_t endc,int regx,int cmd_line_operand)9127c478bd9Sstevel@tonic-gate lexescape(wint_t endc, int regx, int cmd_line_operand)
9137c478bd9Sstevel@tonic-gate {
9147c478bd9Sstevel@tonic-gate static char nlre[256];
9157c478bd9Sstevel@tonic-gate static char nlstr[256];
9167c478bd9Sstevel@tonic-gate static char eofre[256];
9177c478bd9Sstevel@tonic-gate static char eofstr[256];
9187c478bd9Sstevel@tonic-gate int first_time = 1;
9197c478bd9Sstevel@tonic-gate wint_t c;
9207c478bd9Sstevel@tonic-gate wchar_t *cp;
9217c478bd9Sstevel@tonic-gate int n, max;
9227c478bd9Sstevel@tonic-gate
9237c478bd9Sstevel@tonic-gate if (first_time == 1) {
9247c478bd9Sstevel@tonic-gate (void) strcpy(nlre, gettext("Newline in regular expression\n"));
9257c478bd9Sstevel@tonic-gate (void) strcpy(nlstr, gettext("Newline in string\n"));
9267c478bd9Sstevel@tonic-gate (void) strcpy(eofre, gettext("EOF in regular expression\n"));
9277c478bd9Sstevel@tonic-gate (void) strcpy(eofstr, gettext("EOF in string\n"));
9287c478bd9Sstevel@tonic-gate first_time = 0;
9297c478bd9Sstevel@tonic-gate }
9307c478bd9Sstevel@tonic-gate
9317c478bd9Sstevel@tonic-gate cp = linebuf;
9327c478bd9Sstevel@tonic-gate while ((c = lexgetc()) != endc) {
9337c478bd9Sstevel@tonic-gate if (c == '\n')
9347c478bd9Sstevel@tonic-gate awkerr(regx ? nlre : nlstr);
9357c478bd9Sstevel@tonic-gate if (c == '\\') {
9367c478bd9Sstevel@tonic-gate switch (c = lexgetc(), c) {
9377c478bd9Sstevel@tonic-gate case '\\':
9387c478bd9Sstevel@tonic-gate if (regx)
9397c478bd9Sstevel@tonic-gate *cp++ = '\\';
9407c478bd9Sstevel@tonic-gate break;
9417c478bd9Sstevel@tonic-gate
9427c478bd9Sstevel@tonic-gate case '/':
9437c478bd9Sstevel@tonic-gate c = '/';
9447c478bd9Sstevel@tonic-gate break;
9457c478bd9Sstevel@tonic-gate
9467c478bd9Sstevel@tonic-gate case 'n':
9477c478bd9Sstevel@tonic-gate c = '\n';
9487c478bd9Sstevel@tonic-gate break;
9497c478bd9Sstevel@tonic-gate
9507c478bd9Sstevel@tonic-gate case 'b':
9517c478bd9Sstevel@tonic-gate c = '\b';
9527c478bd9Sstevel@tonic-gate break;
9537c478bd9Sstevel@tonic-gate
9547c478bd9Sstevel@tonic-gate case 't':
9557c478bd9Sstevel@tonic-gate c = '\t';
9567c478bd9Sstevel@tonic-gate break;
9577c478bd9Sstevel@tonic-gate
9587c478bd9Sstevel@tonic-gate case 'r':
9597c478bd9Sstevel@tonic-gate c = '\r';
9607c478bd9Sstevel@tonic-gate break;
9617c478bd9Sstevel@tonic-gate
9627c478bd9Sstevel@tonic-gate case 'f':
9637c478bd9Sstevel@tonic-gate c = '\f';
9647c478bd9Sstevel@tonic-gate break;
9657c478bd9Sstevel@tonic-gate
9667c478bd9Sstevel@tonic-gate case 'v':
9677c478bd9Sstevel@tonic-gate c = '\v';
9687c478bd9Sstevel@tonic-gate break;
9697c478bd9Sstevel@tonic-gate
9707c478bd9Sstevel@tonic-gate case 'a':
9717c478bd9Sstevel@tonic-gate c = (char)0x07;
9727c478bd9Sstevel@tonic-gate break;
9737c478bd9Sstevel@tonic-gate
9747c478bd9Sstevel@tonic-gate case 'x':
9757c478bd9Sstevel@tonic-gate n = 0;
9767c478bd9Sstevel@tonic-gate while (iswxdigit(c = lexgetc())) {
9777c478bd9Sstevel@tonic-gate if (iswdigit(c))
9787c478bd9Sstevel@tonic-gate c -= '0';
9797c478bd9Sstevel@tonic-gate else if (iswupper(c))
9807c478bd9Sstevel@tonic-gate c -= 'A'-10;
9817c478bd9Sstevel@tonic-gate else
9827c478bd9Sstevel@tonic-gate c -= 'a'-10;
9837c478bd9Sstevel@tonic-gate n = (n<<4) + c;
9847c478bd9Sstevel@tonic-gate }
9857c478bd9Sstevel@tonic-gate lexungetc(c);
9867c478bd9Sstevel@tonic-gate c = n;
9877c478bd9Sstevel@tonic-gate break;
9887c478bd9Sstevel@tonic-gate
9897c478bd9Sstevel@tonic-gate case '0':
9907c478bd9Sstevel@tonic-gate case '1':
9917c478bd9Sstevel@tonic-gate case '2':
9927c478bd9Sstevel@tonic-gate case '3':
9937c478bd9Sstevel@tonic-gate case '4':
9947c478bd9Sstevel@tonic-gate case '5':
9957c478bd9Sstevel@tonic-gate case '6':
9967c478bd9Sstevel@tonic-gate case '7':
9977c478bd9Sstevel@tonic-gate #if 0
9987c478bd9Sstevel@tonic-gate /*
9997c478bd9Sstevel@tonic-gate * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
10007c478bd9Sstevel@tonic-gate * requires processing of the octal escapes both in strings and
10017c478bd9Sstevel@tonic-gate * regular expressions. The following code is disabled instead of
10027c478bd9Sstevel@tonic-gate * removed as back-referencing may be reintroduced in a future draft
10037c478bd9Sstevel@tonic-gate * of the standard.
10047c478bd9Sstevel@tonic-gate */
10057c478bd9Sstevel@tonic-gate /*
10067c478bd9Sstevel@tonic-gate * For regular expressions, we disallow
10077c478bd9Sstevel@tonic-gate * \ooo to mean octal character, in favour
10087c478bd9Sstevel@tonic-gate * of back referencing.
10097c478bd9Sstevel@tonic-gate */
10107c478bd9Sstevel@tonic-gate if (regx) {
10117c478bd9Sstevel@tonic-gate *cp++ = '\\';
10127c478bd9Sstevel@tonic-gate break;
10137c478bd9Sstevel@tonic-gate }
10147c478bd9Sstevel@tonic-gate #endif
10157c478bd9Sstevel@tonic-gate max = 3;
10167c478bd9Sstevel@tonic-gate n = 0;
10177c478bd9Sstevel@tonic-gate do {
10187c478bd9Sstevel@tonic-gate n = (n<<3) + c-'0';
10197c478bd9Sstevel@tonic-gate if ((c = lexgetc()) > '7' || c < '0')
10207c478bd9Sstevel@tonic-gate break;
10217c478bd9Sstevel@tonic-gate } while (--max);
10227c478bd9Sstevel@tonic-gate lexungetc(c);
10237c478bd9Sstevel@tonic-gate /*
10247c478bd9Sstevel@tonic-gate * an octal escape sequence must have at least
10257c478bd9Sstevel@tonic-gate * 2 digits after the backslash, otherwise
10267c478bd9Sstevel@tonic-gate * it gets passed straight thru for possible
10277c478bd9Sstevel@tonic-gate * use in backreferencing.
10287c478bd9Sstevel@tonic-gate */
10297c478bd9Sstevel@tonic-gate if (max == 3) {
10307c478bd9Sstevel@tonic-gate *cp++ = '\\';
10317c478bd9Sstevel@tonic-gate n += '0';
10327c478bd9Sstevel@tonic-gate }
10337c478bd9Sstevel@tonic-gate c = n;
10347c478bd9Sstevel@tonic-gate break;
10357c478bd9Sstevel@tonic-gate
10367c478bd9Sstevel@tonic-gate case '\n':
10377c478bd9Sstevel@tonic-gate continue;
10387c478bd9Sstevel@tonic-gate
10397c478bd9Sstevel@tonic-gate default:
10407c478bd9Sstevel@tonic-gate if (c != endc || cmd_line_operand) {
10417c478bd9Sstevel@tonic-gate *cp++ = '\\';
10427c478bd9Sstevel@tonic-gate if (c == endc)
10437c478bd9Sstevel@tonic-gate lexungetc(c);
10447c478bd9Sstevel@tonic-gate }
10457c478bd9Sstevel@tonic-gate }
10467c478bd9Sstevel@tonic-gate }
10477c478bd9Sstevel@tonic-gate if (c == WEOF)
10487c478bd9Sstevel@tonic-gate awkerr(regx ? eofre : eofstr);
10497c478bd9Sstevel@tonic-gate *cp++ = c;
10507c478bd9Sstevel@tonic-gate }
10517c478bd9Sstevel@tonic-gate *cp = '\0';
10527c478bd9Sstevel@tonic-gate return (cp - linebuf);
10537c478bd9Sstevel@tonic-gate }
10547c478bd9Sstevel@tonic-gate
10557c478bd9Sstevel@tonic-gate /*
10567c478bd9Sstevel@tonic-gate * Build a regular expression NODE.
10577c478bd9Sstevel@tonic-gate * Argument is the string holding the expression.
10587c478bd9Sstevel@tonic-gate */
10597c478bd9Sstevel@tonic-gate NODE *
renode(wchar_t * s)10607c478bd9Sstevel@tonic-gate renode(wchar_t *s)
10617c478bd9Sstevel@tonic-gate {
1062cb4658fbSceastha NODE *np;
10637c478bd9Sstevel@tonic-gate int n;
10647c478bd9Sstevel@tonic-gate
10657c478bd9Sstevel@tonic-gate np = emptynode(RE, 0);
10667c478bd9Sstevel@tonic-gate np->n_left = np->n_right = NNULL;
1067*79777a7dSnakanon if ((n = REGWCOMP(&np->n_regexp, s)) != REG_OK) {
10687c478bd9Sstevel@tonic-gate int m;
10697c478bd9Sstevel@tonic-gate char *p;
10707c478bd9Sstevel@tonic-gate
1071*79777a7dSnakanon m = REGWERROR(n, np->n_regexp, NULL, 0);
10727c478bd9Sstevel@tonic-gate p = (char *)emalloc(m);
1073*79777a7dSnakanon REGWERROR(n, np->n_regexp, p, m);
10747c478bd9Sstevel@tonic-gate awkerr("/%S/: %s", s, p);
10757c478bd9Sstevel@tonic-gate }
10767c478bd9Sstevel@tonic-gate return (np);
10777c478bd9Sstevel@tonic-gate }
10787c478bd9Sstevel@tonic-gate /*
10797c478bd9Sstevel@tonic-gate * Get a character for the lexical analyser routine.
10807c478bd9Sstevel@tonic-gate */
10817c478bd9Sstevel@tonic-gate static wint_t
lexgetc()10827c478bd9Sstevel@tonic-gate lexgetc()
10837c478bd9Sstevel@tonic-gate {
1084cb4658fbSceastha wint_t c;
10857c478bd9Sstevel@tonic-gate static char **files = &progfiles[0];
10867c478bd9Sstevel@tonic-gate
10877c478bd9Sstevel@tonic-gate if (progfp != FNULL && (c = fgetwc(progfp)) != WEOF)
10887c478bd9Sstevel@tonic-gate ;
10897c478bd9Sstevel@tonic-gate else {
10907c478bd9Sstevel@tonic-gate if (progptr != NULL) {
10917c478bd9Sstevel@tonic-gate if (proglen-- <= 0)
10927c478bd9Sstevel@tonic-gate c = WEOF;
10937c478bd9Sstevel@tonic-gate else
10947c478bd9Sstevel@tonic-gate c = *progptr++;
10957c478bd9Sstevel@tonic-gate } else {
10967c478bd9Sstevel@tonic-gate if (progfp != FNULL)
10977c478bd9Sstevel@tonic-gate if (progfp != stdin)
10987c478bd9Sstevel@tonic-gate (void) fclose(progfp);
10997c478bd9Sstevel@tonic-gate else
11007c478bd9Sstevel@tonic-gate clearerr(progfp);
11017c478bd9Sstevel@tonic-gate progfp = FNULL;
11027c478bd9Sstevel@tonic-gate if (files < progfilep) {
11037c478bd9Sstevel@tonic-gate filename = *files++;
11047c478bd9Sstevel@tonic-gate lineno = 1;
11057c478bd9Sstevel@tonic-gate if (filename[0] == '-' && filename[1] == '\0')
11067c478bd9Sstevel@tonic-gate progfp = stdin;
1107cb4658fbSceastha else if ((progfp = fopen(filename, r))
1108cb4658fbSceastha == FNULL) {
11097c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
11107c478bd9Sstevel@tonic-gate gettext("script file \"%s\""), filename);
11117c478bd9Sstevel@tonic-gate exit(1);
11127c478bd9Sstevel@tonic-gate }
11137c478bd9Sstevel@tonic-gate c = fgetwc(progfp);
11147c478bd9Sstevel@tonic-gate }
11157c478bd9Sstevel@tonic-gate }
11167c478bd9Sstevel@tonic-gate }
11177c478bd9Sstevel@tonic-gate if (c == '\n')
11187c478bd9Sstevel@tonic-gate ++lineno;
11197c478bd9Sstevel@tonic-gate if (conptr >= &context[NCONTEXT])
11207c478bd9Sstevel@tonic-gate conptr = &context[0];
11217c478bd9Sstevel@tonic-gate if (c != WEOF)
11227c478bd9Sstevel@tonic-gate *conptr++ = c;
11237c478bd9Sstevel@tonic-gate return (c);
11247c478bd9Sstevel@tonic-gate }
11257c478bd9Sstevel@tonic-gate
11267c478bd9Sstevel@tonic-gate /*
11277c478bd9Sstevel@tonic-gate * Return a character for lexical analyser.
11287c478bd9Sstevel@tonic-gate * Only one returned character is (not enforced) legitimite.
11297c478bd9Sstevel@tonic-gate */
11307c478bd9Sstevel@tonic-gate static void
lexungetc(wint_t c)11317c478bd9Sstevel@tonic-gate lexungetc(wint_t c)
11327c478bd9Sstevel@tonic-gate {
11337c478bd9Sstevel@tonic-gate if (c == '\n')
11347c478bd9Sstevel@tonic-gate --lineno;
11357c478bd9Sstevel@tonic-gate if (c != WEOF) {
11367c478bd9Sstevel@tonic-gate if (conptr == &context[0])
11377c478bd9Sstevel@tonic-gate conptr = &context[NCONTEXT];
11387c478bd9Sstevel@tonic-gate *--conptr = '\0';
11397c478bd9Sstevel@tonic-gate }
11407c478bd9Sstevel@tonic-gate if (progfp != FNULL) {
11417c478bd9Sstevel@tonic-gate (void) ungetwc(c, progfp);
11427c478bd9Sstevel@tonic-gate return;
11437c478bd9Sstevel@tonic-gate }
11447c478bd9Sstevel@tonic-gate if (c == WEOF)
11457c478bd9Sstevel@tonic-gate return;
11467c478bd9Sstevel@tonic-gate *--progptr = c;
11477c478bd9Sstevel@tonic-gate proglen++;
11487c478bd9Sstevel@tonic-gate }
11497c478bd9Sstevel@tonic-gate
11507c478bd9Sstevel@tonic-gate /*
11517c478bd9Sstevel@tonic-gate * Syntax errors during parsing.
11527c478bd9Sstevel@tonic-gate */
11537c478bd9Sstevel@tonic-gate void
yyerror(char * s,...)11547c478bd9Sstevel@tonic-gate yyerror(char *s, ...)
11557c478bd9Sstevel@tonic-gate {
11567c478bd9Sstevel@tonic-gate if (lexlast == FUNC || lexlast == GETLINE || lexlast == KEYWORD)
11577c478bd9Sstevel@tonic-gate if (lexlast == KEYWORD)
11587c478bd9Sstevel@tonic-gate awkerr(gettext("inadmissible use of reserved keyword"));
11597c478bd9Sstevel@tonic-gate else
11607c478bd9Sstevel@tonic-gate awkerr(gettext("attempt to redefine builtin function"));
11617c478bd9Sstevel@tonic-gate awkerr(s);
11627c478bd9Sstevel@tonic-gate }
11637c478bd9Sstevel@tonic-gate
11647c478bd9Sstevel@tonic-gate /*
11657c478bd9Sstevel@tonic-gate * Error routine for all awk errors.
11667c478bd9Sstevel@tonic-gate */
11677c478bd9Sstevel@tonic-gate /* ARGSUSED */
11687c478bd9Sstevel@tonic-gate void
awkerr(char * fmt,...)11697c478bd9Sstevel@tonic-gate awkerr(char *fmt, ...)
11707c478bd9Sstevel@tonic-gate {
11717c478bd9Sstevel@tonic-gate va_list args;
11727c478bd9Sstevel@tonic-gate
11737c478bd9Sstevel@tonic-gate va_start(args, fmt);
11747c478bd9Sstevel@tonic-gate awkierr(0, fmt, args);
11757c478bd9Sstevel@tonic-gate va_end(args);
11767c478bd9Sstevel@tonic-gate }
11777c478bd9Sstevel@tonic-gate
11787c478bd9Sstevel@tonic-gate /*
11797c478bd9Sstevel@tonic-gate * Error routine like "awkerr" except that it prints out
11807c478bd9Sstevel@tonic-gate * a message that includes an errno-specific indication.
11817c478bd9Sstevel@tonic-gate */
11827c478bd9Sstevel@tonic-gate /* ARGSUSED */
11837c478bd9Sstevel@tonic-gate void
awkperr(char * fmt,...)11847c478bd9Sstevel@tonic-gate awkperr(char *fmt, ...)
11857c478bd9Sstevel@tonic-gate {
11867c478bd9Sstevel@tonic-gate va_list args;
11877c478bd9Sstevel@tonic-gate
11887c478bd9Sstevel@tonic-gate va_start(args, fmt);
11897c478bd9Sstevel@tonic-gate awkierr(1, fmt, args);
11907c478bd9Sstevel@tonic-gate va_end(args);
11917c478bd9Sstevel@tonic-gate }
11927c478bd9Sstevel@tonic-gate
11937c478bd9Sstevel@tonic-gate /*
11947c478bd9Sstevel@tonic-gate * Common internal routine for awkerr, awkperr
11957c478bd9Sstevel@tonic-gate */
11967c478bd9Sstevel@tonic-gate static void
awkierr(int perr,char * fmt,va_list ap)11977c478bd9Sstevel@tonic-gate awkierr(int perr, char *fmt, va_list ap)
11987c478bd9Sstevel@tonic-gate {
11997c478bd9Sstevel@tonic-gate static char sep1[] = "\n>>>\t";
12007c478bd9Sstevel@tonic-gate static char sep2[] = "\t<<<";
12017c478bd9Sstevel@tonic-gate int saveerr = errno;
12027c478bd9Sstevel@tonic-gate
12037c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: ", _cmdname);
12047c478bd9Sstevel@tonic-gate if (running) {
12057c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("line %u ("),
12067c478bd9Sstevel@tonic-gate curnode == NNULL ? 0 : curnode->n_lineno);
12077c478bd9Sstevel@tonic-gate if (phase == 0)
1208cb4658fbSceastha (void) fprintf(stderr, "NR=%lld): ",
1209cb4658fbSceastha (INT)exprint(varNR));
12107c478bd9Sstevel@tonic-gate else
12117c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s): ",
12127c478bd9Sstevel@tonic-gate phase == BEGIN ? s_BEGIN : s_END);
12137c478bd9Sstevel@tonic-gate } else if (lineno != 0) {
12147c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("file \"%s\": "), filename);
12157c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("line %u: "), lineno);
12167c478bd9Sstevel@tonic-gate }
12177c478bd9Sstevel@tonic-gate (void) vfprintf(stderr, gettext(fmt), ap);
12187c478bd9Sstevel@tonic-gate if (perr == 1)
12197c478bd9Sstevel@tonic-gate (void) fprintf(stderr, ": %s", strerror(saveerr));
12207c478bd9Sstevel@tonic-gate if (perr != 2 && !running) {
1221cb4658fbSceastha wchar_t *cp;
1222cb4658fbSceastha int n;
1223cb4658fbSceastha int c;
12247c478bd9Sstevel@tonic-gate
12257c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext(" Context is:%s"), sep1);
12267c478bd9Sstevel@tonic-gate cp = conptr;
12277c478bd9Sstevel@tonic-gate n = NCONTEXT;
12287c478bd9Sstevel@tonic-gate do {
12297c478bd9Sstevel@tonic-gate if (cp >= &context[NCONTEXT])
12307c478bd9Sstevel@tonic-gate cp = &context[0];
12317c478bd9Sstevel@tonic-gate if ((c = *cp++) != '\0')
12327c478bd9Sstevel@tonic-gate (void) fputs(c == '\n' ? sep1 : toprint(c),
12337c478bd9Sstevel@tonic-gate stderr);
12347c478bd9Sstevel@tonic-gate } while (--n != 0);
12357c478bd9Sstevel@tonic-gate (void) fputs(sep2, stderr);
12367c478bd9Sstevel@tonic-gate }
12377c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "\n");
12387c478bd9Sstevel@tonic-gate exit(1);
12397c478bd9Sstevel@tonic-gate }
12407c478bd9Sstevel@tonic-gate
12417c478bd9Sstevel@tonic-gate wchar_t *
emalloc(unsigned n)12427c478bd9Sstevel@tonic-gate emalloc(unsigned n)
12437c478bd9Sstevel@tonic-gate {
12447c478bd9Sstevel@tonic-gate wchar_t *cp;
12457c478bd9Sstevel@tonic-gate
12467c478bd9Sstevel@tonic-gate if ((cp = malloc(n)) == NULL)
12477c478bd9Sstevel@tonic-gate awkerr(nomem);
1248cb4658fbSceastha return (cp);
12497c478bd9Sstevel@tonic-gate }
12507c478bd9Sstevel@tonic-gate
12517c478bd9Sstevel@tonic-gate wchar_t *
erealloc(wchar_t * p,unsigned n)12527c478bd9Sstevel@tonic-gate erealloc(wchar_t *p, unsigned n)
12537c478bd9Sstevel@tonic-gate {
12547c478bd9Sstevel@tonic-gate wchar_t *cp;
12557c478bd9Sstevel@tonic-gate
12567c478bd9Sstevel@tonic-gate if ((cp = realloc(p, n)) == NULL)
12577c478bd9Sstevel@tonic-gate awkerr(nomem);
1258cb4658fbSceastha return (cp);
12597c478bd9Sstevel@tonic-gate }
12607c478bd9Sstevel@tonic-gate
12617c478bd9Sstevel@tonic-gate
12627c478bd9Sstevel@tonic-gate /*
12637c478bd9Sstevel@tonic-gate * usage message for awk
12647c478bd9Sstevel@tonic-gate */
12657c478bd9Sstevel@tonic-gate static int
usage()12667c478bd9Sstevel@tonic-gate usage()
12677c478bd9Sstevel@tonic-gate {
12687c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext(
12697c478bd9Sstevel@tonic-gate "Usage: awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
12707c478bd9Sstevel@tonic-gate " awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
12717c478bd9Sstevel@tonic-gate return (2);
12727c478bd9Sstevel@tonic-gate }
12737c478bd9Sstevel@tonic-gate
12747c478bd9Sstevel@tonic-gate
12757c478bd9Sstevel@tonic-gate static wchar_t *
mbconvert(char * str)12767c478bd9Sstevel@tonic-gate mbconvert(char *str)
12777c478bd9Sstevel@tonic-gate {
12787c478bd9Sstevel@tonic-gate static wchar_t *op = 0;
12797c478bd9Sstevel@tonic-gate
12807c478bd9Sstevel@tonic-gate if (op != 0)
12817c478bd9Sstevel@tonic-gate free(op);
12827c478bd9Sstevel@tonic-gate return (op = mbstowcsdup(str));
12837c478bd9Sstevel@tonic-gate }
12847c478bd9Sstevel@tonic-gate
12857c478bd9Sstevel@tonic-gate char *
mbunconvert(wchar_t * str)12867c478bd9Sstevel@tonic-gate mbunconvert(wchar_t *str)
12877c478bd9Sstevel@tonic-gate {
12887c478bd9Sstevel@tonic-gate static char *op = 0;
12897c478bd9Sstevel@tonic-gate
12907c478bd9Sstevel@tonic-gate if (op != 0)
12917c478bd9Sstevel@tonic-gate free(op);
12927c478bd9Sstevel@tonic-gate return (op = wcstombsdup(str));
12937c478bd9Sstevel@tonic-gate }
12947c478bd9Sstevel@tonic-gate
12957c478bd9Sstevel@tonic-gate /*
12967c478bd9Sstevel@tonic-gate * Solaris port - following functions are typical MKS functions written
12977c478bd9Sstevel@tonic-gate * to work for Solaris.
12987c478bd9Sstevel@tonic-gate */
12997c478bd9Sstevel@tonic-gate
13007c478bd9Sstevel@tonic-gate wchar_t *
mbstowcsdup(s)13017c478bd9Sstevel@tonic-gate mbstowcsdup(s)
13027c478bd9Sstevel@tonic-gate char *s;
13037c478bd9Sstevel@tonic-gate {
13047c478bd9Sstevel@tonic-gate int n;
13057c478bd9Sstevel@tonic-gate wchar_t *w;
13067c478bd9Sstevel@tonic-gate
13077c478bd9Sstevel@tonic-gate n = strlen(s) + 1;
13087c478bd9Sstevel@tonic-gate if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL)
13097c478bd9Sstevel@tonic-gate return (NULL);
13107c478bd9Sstevel@tonic-gate
1311*79777a7dSnakanon if (mbstowcs(w, s, n) == (size_t)-1)
13127c478bd9Sstevel@tonic-gate return (NULL);
13137c478bd9Sstevel@tonic-gate return (w);
13147c478bd9Sstevel@tonic-gate
13157c478bd9Sstevel@tonic-gate }
13167c478bd9Sstevel@tonic-gate
13177c478bd9Sstevel@tonic-gate char *
wcstombsdup(wchar_t * w)13187c478bd9Sstevel@tonic-gate wcstombsdup(wchar_t *w)
13197c478bd9Sstevel@tonic-gate {
13207c478bd9Sstevel@tonic-gate int n;
13217c478bd9Sstevel@tonic-gate char *mb;
13227c478bd9Sstevel@tonic-gate
13237c478bd9Sstevel@tonic-gate /* Fetch memory for worst case string length */
13247c478bd9Sstevel@tonic-gate n = wslen(w) + 1;
13257c478bd9Sstevel@tonic-gate n *= MB_CUR_MAX;
13267c478bd9Sstevel@tonic-gate if ((mb = (char *)malloc(n)) == NULL) {
13277c478bd9Sstevel@tonic-gate return (NULL);
13287c478bd9Sstevel@tonic-gate }
13297c478bd9Sstevel@tonic-gate
13307c478bd9Sstevel@tonic-gate /* Convert the string */
13317c478bd9Sstevel@tonic-gate if ((n = wcstombs(mb, w, n)) == -1) {
13327c478bd9Sstevel@tonic-gate int saverr = errno;
13337c478bd9Sstevel@tonic-gate
13347c478bd9Sstevel@tonic-gate free(mb);
13357c478bd9Sstevel@tonic-gate errno = saverr;
13367c478bd9Sstevel@tonic-gate return (0);
13377c478bd9Sstevel@tonic-gate }
13387c478bd9Sstevel@tonic-gate
13397c478bd9Sstevel@tonic-gate /* Shrink the string down */
13407c478bd9Sstevel@tonic-gate if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL) {
13417c478bd9Sstevel@tonic-gate return (NULL);
13427c478bd9Sstevel@tonic-gate }
13437c478bd9Sstevel@tonic-gate return (mb);
13447c478bd9Sstevel@tonic-gate }
13457c478bd9Sstevel@tonic-gate
13467c478bd9Sstevel@tonic-gate /*
13477c478bd9Sstevel@tonic-gate * The upe_ctrls[] table contains the printable 'control-sequences' for the
13487c478bd9Sstevel@tonic-gate * character values 0..31 and 127. The first entry is for value 127, thus the
13497c478bd9Sstevel@tonic-gate * entries for the remaining character values are from 1..32.
13507c478bd9Sstevel@tonic-gate */
13517c478bd9Sstevel@tonic-gate static const char *const upe_ctrls[] =
13527c478bd9Sstevel@tonic-gate {
13537c478bd9Sstevel@tonic-gate "^?",
13547c478bd9Sstevel@tonic-gate "^@", "^A", "^B", "^C", "^D", "^E", "^F", "^G",
13557c478bd9Sstevel@tonic-gate "^H", "^I", "^J", "^K", "^L", "^M", "^N", "^O",
13567c478bd9Sstevel@tonic-gate "^P", "^Q", "^R", "^S", "^T", "^U", "^V", "^W",
13577c478bd9Sstevel@tonic-gate "^X", "^Y", "^Z", "^[", "^\\", "^]", "^^", "^_"
13587c478bd9Sstevel@tonic-gate };
13597c478bd9Sstevel@tonic-gate
13607c478bd9Sstevel@tonic-gate
13617c478bd9Sstevel@tonic-gate /*
13627c478bd9Sstevel@tonic-gate * Return a printable string corresponding to the given character value. If
13637c478bd9Sstevel@tonic-gate * the character is printable, simply return it as the string. If it is in
13647c478bd9Sstevel@tonic-gate * the range specified by table 5-101 in the UPE, return the corresponding
13657c478bd9Sstevel@tonic-gate * string. Otherwise, return an octal escape sequence.
13667c478bd9Sstevel@tonic-gate */
13677c478bd9Sstevel@tonic-gate static const char *
toprint(c)13687c478bd9Sstevel@tonic-gate toprint(c)
13697c478bd9Sstevel@tonic-gate wchar_t c;
13707c478bd9Sstevel@tonic-gate {
13717c478bd9Sstevel@tonic-gate int n, len;
13727c478bd9Sstevel@tonic-gate unsigned char *ptr;
13737c478bd9Sstevel@tonic-gate static char mbch[MB_LEN_MAX+1];
13747c478bd9Sstevel@tonic-gate static char buf[5 * MB_LEN_MAX + 1];
13757c478bd9Sstevel@tonic-gate
13767c478bd9Sstevel@tonic-gate if ((n = wctomb(mbch, c)) == -1) {
13777c478bd9Sstevel@tonic-gate /* Should never happen */
13787c478bd9Sstevel@tonic-gate (void) sprintf(buf, "\\%x", c);
13797c478bd9Sstevel@tonic-gate return (buf);
13807c478bd9Sstevel@tonic-gate }
13817c478bd9Sstevel@tonic-gate mbch[n] = '\0';
13827c478bd9Sstevel@tonic-gate if (iswprint(c)) {
13837c478bd9Sstevel@tonic-gate return (mbch);
13847c478bd9Sstevel@tonic-gate } else if (c == 127) {
13857c478bd9Sstevel@tonic-gate return (upe_ctrls[0]);
13867c478bd9Sstevel@tonic-gate } else if (c < 32) {
13877c478bd9Sstevel@tonic-gate /* Print as in Table 5-101 in the UPE */
13887c478bd9Sstevel@tonic-gate return (upe_ctrls[c+1]);
13897c478bd9Sstevel@tonic-gate } else {
13907c478bd9Sstevel@tonic-gate /* Print as an octal escape sequence */
13917c478bd9Sstevel@tonic-gate for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr)
13927c478bd9Sstevel@tonic-gate len += sprintf(buf+len, "\\%03o", *ptr);
13937c478bd9Sstevel@tonic-gate }
13947c478bd9Sstevel@tonic-gate return (buf);
13957c478bd9Sstevel@tonic-gate }
13967c478bd9Sstevel@tonic-gate
13977c478bd9Sstevel@tonic-gate static int
wcoff(const wchar_t * astring,const int off)13987c478bd9Sstevel@tonic-gate wcoff(const wchar_t *astring, const int off)
13997c478bd9Sstevel@tonic-gate {
14007c478bd9Sstevel@tonic-gate const wchar_t *s = astring;
14017c478bd9Sstevel@tonic-gate int c = 0;
14027c478bd9Sstevel@tonic-gate char mb[MB_LEN_MAX];
14037c478bd9Sstevel@tonic-gate
14047c478bd9Sstevel@tonic-gate while (c < off) {
14057c478bd9Sstevel@tonic-gate int n;
14067c478bd9Sstevel@tonic-gate if ((n = wctomb(mb, *s)) == 0)
14077c478bd9Sstevel@tonic-gate break;
14087c478bd9Sstevel@tonic-gate if (n == -1)
14097c478bd9Sstevel@tonic-gate n = 1;
14107c478bd9Sstevel@tonic-gate c += n;
14117c478bd9Sstevel@tonic-gate s++;
14127c478bd9Sstevel@tonic-gate }
14137c478bd9Sstevel@tonic-gate
14147c478bd9Sstevel@tonic-gate return (s - astring);
14157c478bd9Sstevel@tonic-gate }
14167c478bd9Sstevel@tonic-gate
1417*79777a7dSnakanon #define NREGHASH 64
1418*79777a7dSnakanon #define NREGHOLD 1024 /* max number unused entries */
1419*79777a7dSnakanon
1420*79777a7dSnakanon static int nregunref;
1421*79777a7dSnakanon
1422*79777a7dSnakanon struct reghashq {
1423*79777a7dSnakanon struct qelem hq;
1424*79777a7dSnakanon struct regcache *regcachep;
1425*79777a7dSnakanon };
1426*79777a7dSnakanon
1427*79777a7dSnakanon struct regcache {
1428*79777a7dSnakanon struct qelem lq;
1429*79777a7dSnakanon wchar_t *pattern;
1430*79777a7dSnakanon regex_t re;
1431*79777a7dSnakanon int refcnt;
1432*79777a7dSnakanon struct reghashq hash;
1433*79777a7dSnakanon };
1434*79777a7dSnakanon
1435*79777a7dSnakanon static struct qelem reghash[NREGHASH], reglink;
1436*79777a7dSnakanon
1437*79777a7dSnakanon /*
1438*79777a7dSnakanon * Generate a hash value of the given wchar string.
1439*79777a7dSnakanon * The hashing method is similar to what Java does for strings.
1440*79777a7dSnakanon */
1441*79777a7dSnakanon static uint_t
regtxthash(const wchar_t * str)1442*79777a7dSnakanon regtxthash(const wchar_t *str)
14437c478bd9Sstevel@tonic-gate {
1444*79777a7dSnakanon int k = 0;
1445*79777a7dSnakanon
1446*79777a7dSnakanon while (*str != L'\0')
1447*79777a7dSnakanon k = (31 * k) + *str++;
1448*79777a7dSnakanon
1449*79777a7dSnakanon k += ~(k << 9);
1450*79777a7dSnakanon k ^= (k >> 14);
1451*79777a7dSnakanon k += (k << 4);
1452*79777a7dSnakanon k ^= (k >> 10);
1453*79777a7dSnakanon
1454*79777a7dSnakanon return (k % NREGHASH);
1455*79777a7dSnakanon }
1456*79777a7dSnakanon
1457*79777a7dSnakanon int
int_regwcomp(REGEXP * r,const wchar_t * pattern)1458*79777a7dSnakanon int_regwcomp(REGEXP *r, const wchar_t *pattern)
1459*79777a7dSnakanon {
1460*79777a7dSnakanon regex_t re;
14617c478bd9Sstevel@tonic-gate char *mbpattern;
14627c478bd9Sstevel@tonic-gate int ret;
1463*79777a7dSnakanon uint_t key;
1464*79777a7dSnakanon struct qelem *qp;
1465*79777a7dSnakanon struct regcache *rcp;
1466*79777a7dSnakanon
1467*79777a7dSnakanon key = regtxthash(pattern);
1468*79777a7dSnakanon for (qp = reghash[key].q_forw; qp != NULL; qp = qp->q_forw) {
1469*79777a7dSnakanon rcp = ((struct reghashq *)qp)->regcachep;
1470*79777a7dSnakanon if (*rcp->pattern == *pattern &&
1471*79777a7dSnakanon wcscmp(rcp->pattern, pattern) == 0)
1472*79777a7dSnakanon break;
1473*79777a7dSnakanon }
1474*79777a7dSnakanon if (qp != NULL) {
1475*79777a7dSnakanon /* update link. put this one at the beginning */
1476*79777a7dSnakanon if (rcp != (struct regcache *)reglink.q_forw) {
1477*79777a7dSnakanon remque(&rcp->lq);
1478*79777a7dSnakanon insque(&rcp->lq, ®link);
1479*79777a7dSnakanon }
1480*79777a7dSnakanon if (rcp->refcnt == 0)
1481*79777a7dSnakanon nregunref--; /* no longer unref'ed */
1482*79777a7dSnakanon rcp->refcnt++;
1483*79777a7dSnakanon *(struct regcache **)r = rcp;
1484*79777a7dSnakanon return (REG_OK);
1485*79777a7dSnakanon }
14867c478bd9Sstevel@tonic-gate
14877c478bd9Sstevel@tonic-gate if ((mbpattern = wcstombsdup((wchar_t *)pattern)) == NULL)
14887c478bd9Sstevel@tonic-gate return (REG_ESPACE);
14897c478bd9Sstevel@tonic-gate
1490*79777a7dSnakanon ret = regcomp(&re, mbpattern, REG_EXTENDED);
14917c478bd9Sstevel@tonic-gate
14927c478bd9Sstevel@tonic-gate free(mbpattern);
14937c478bd9Sstevel@tonic-gate
1494*79777a7dSnakanon if (ret != REG_OK)
1495*79777a7dSnakanon return (ret);
1496*79777a7dSnakanon
1497*79777a7dSnakanon if ((rcp = malloc(sizeof (struct regcache))) == NULL)
1498*79777a7dSnakanon return (REG_ESPACE);
1499*79777a7dSnakanon rcp->re = re;
1500*79777a7dSnakanon if ((rcp->pattern = wsdup(pattern)) == NULL) {
1501*79777a7dSnakanon regfree(&re);
1502*79777a7dSnakanon free(rcp);
1503*79777a7dSnakanon return (REG_ESPACE);
1504*79777a7dSnakanon }
1505*79777a7dSnakanon rcp->refcnt = 1;
1506*79777a7dSnakanon insque(&rcp->lq, ®link);
1507*79777a7dSnakanon insque(&rcp->hash.hq, ®hash[key]);
1508*79777a7dSnakanon rcp->hash.regcachep = rcp;
1509*79777a7dSnakanon
1510*79777a7dSnakanon *(struct regcache **)r = rcp;
15117c478bd9Sstevel@tonic-gate return (ret);
15127c478bd9Sstevel@tonic-gate }
15137c478bd9Sstevel@tonic-gate
1514*79777a7dSnakanon void
int_regwfree(REGEXP r)1515*79777a7dSnakanon int_regwfree(REGEXP r)
1516*79777a7dSnakanon {
1517*79777a7dSnakanon int cnt;
1518*79777a7dSnakanon struct qelem *qp, *nqp;
1519*79777a7dSnakanon struct regcache *rcp;
1520*79777a7dSnakanon
1521*79777a7dSnakanon rcp = (struct regcache *)r;
1522*79777a7dSnakanon
1523*79777a7dSnakanon if (--rcp->refcnt != 0)
1524*79777a7dSnakanon return;
1525*79777a7dSnakanon
1526*79777a7dSnakanon /* this cache has no reference */
1527*79777a7dSnakanon if (++nregunref < NREGHOLD)
1528*79777a7dSnakanon return;
1529*79777a7dSnakanon
1530*79777a7dSnakanon /*
1531*79777a7dSnakanon * We've got too much unref'ed regex. Free half of least
1532*79777a7dSnakanon * used regex.
1533*79777a7dSnakanon */
1534*79777a7dSnakanon cnt = 0;
1535*79777a7dSnakanon for (qp = reglink.q_forw; qp != NULL; qp = nqp) {
1536*79777a7dSnakanon nqp = qp->q_forw;
1537*79777a7dSnakanon rcp = (struct regcache *)qp;
1538*79777a7dSnakanon if (rcp->refcnt != 0)
1539*79777a7dSnakanon continue;
1540*79777a7dSnakanon
1541*79777a7dSnakanon /* free half of them */
1542*79777a7dSnakanon if (++cnt < (NREGHOLD / 2))
1543*79777a7dSnakanon continue;
1544*79777a7dSnakanon
1545*79777a7dSnakanon /* detach and free */
1546*79777a7dSnakanon remque(&rcp->lq);
1547*79777a7dSnakanon remque(&rcp->hash.hq);
1548*79777a7dSnakanon
1549*79777a7dSnakanon /* free up */
1550*79777a7dSnakanon free(rcp->pattern);
1551*79777a7dSnakanon regfree(&rcp->re);
1552*79777a7dSnakanon free(rcp);
1553*79777a7dSnakanon
1554*79777a7dSnakanon nregunref--;
1555*79777a7dSnakanon }
1556*79777a7dSnakanon }
1557*79777a7dSnakanon
1558*79777a7dSnakanon size_t
int_regwerror(int errcode,REGEXP r,char * errbuf,size_t bufsiz)1559*79777a7dSnakanon int_regwerror(int errcode, REGEXP r, char *errbuf, size_t bufsiz)
1560*79777a7dSnakanon {
1561*79777a7dSnakanon struct regcache *rcp;
1562*79777a7dSnakanon
1563*79777a7dSnakanon rcp = (struct regcache *)r;
1564*79777a7dSnakanon return (regerror(errcode, &rcp->re, errbuf, bufsiz));
1565*79777a7dSnakanon }
1566*79777a7dSnakanon
15677c478bd9Sstevel@tonic-gate int
int_regwexec(REGEXP r,const wchar_t * astring,size_t nsub,int_regwmatch_t * sub,int flags)1568*79777a7dSnakanon int_regwexec(REGEXP r, /* compiled RE */
15697c478bd9Sstevel@tonic-gate const wchar_t *astring, /* subject string */
15707c478bd9Sstevel@tonic-gate size_t nsub, /* number of subexpressions */
15717c478bd9Sstevel@tonic-gate int_regwmatch_t *sub, /* subexpression pointers */
15727c478bd9Sstevel@tonic-gate int flags)
15737c478bd9Sstevel@tonic-gate {
15747c478bd9Sstevel@tonic-gate char *mbs;
15757c478bd9Sstevel@tonic-gate regmatch_t *mbsub = NULL;
1576cb4658fbSceastha int i;
1577*79777a7dSnakanon struct regcache *rcp;
15787c478bd9Sstevel@tonic-gate
15797c478bd9Sstevel@tonic-gate if ((mbs = wcstombsdup((wchar_t *)astring)) == NULL)
15807c478bd9Sstevel@tonic-gate return (REG_ESPACE);
15817c478bd9Sstevel@tonic-gate
15827c478bd9Sstevel@tonic-gate if (nsub > 0 && sub) {
15837c478bd9Sstevel@tonic-gate if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL)
15847c478bd9Sstevel@tonic-gate return (REG_ESPACE);
15857c478bd9Sstevel@tonic-gate }
15867c478bd9Sstevel@tonic-gate
1587*79777a7dSnakanon rcp = (struct regcache *)r;
1588*79777a7dSnakanon
1589*79777a7dSnakanon i = regexec(&rcp->re, mbs, nsub, mbsub, flags);
15907c478bd9Sstevel@tonic-gate
15917c478bd9Sstevel@tonic-gate /* Now, adjust the pointers/counts in sub */
15927c478bd9Sstevel@tonic-gate if (i == REG_OK && nsub > 0 && mbsub) {
1593cb4658fbSceastha int j, k;
15947c478bd9Sstevel@tonic-gate
15957c478bd9Sstevel@tonic-gate for (j = 0; j < nsub; j++) {
15967c478bd9Sstevel@tonic-gate regmatch_t *ms = &mbsub[j];
15977c478bd9Sstevel@tonic-gate int_regwmatch_t *ws = &sub[j];
15987c478bd9Sstevel@tonic-gate
15997c478bd9Sstevel@tonic-gate if ((k = ms->rm_so) >= 0) {
16007c478bd9Sstevel@tonic-gate ws->rm_so = wcoff(astring, k);
16017c478bd9Sstevel@tonic-gate ws->rm_sp = astring + ws->rm_so;
16027c478bd9Sstevel@tonic-gate }
16037c478bd9Sstevel@tonic-gate if ((k = ms->rm_eo) >= 0) {
16047c478bd9Sstevel@tonic-gate ws->rm_eo = wcoff(astring, k);
16057c478bd9Sstevel@tonic-gate ws->rm_ep = astring + ws->rm_eo;
16067c478bd9Sstevel@tonic-gate }
16077c478bd9Sstevel@tonic-gate }
16087c478bd9Sstevel@tonic-gate }
16097c478bd9Sstevel@tonic-gate
16107c478bd9Sstevel@tonic-gate free(mbs);
16117c478bd9Sstevel@tonic-gate if (mbsub)
16127c478bd9Sstevel@tonic-gate free(mbsub);
16137c478bd9Sstevel@tonic-gate return (i);
16147c478bd9Sstevel@tonic-gate }
16157c478bd9Sstevel@tonic-gate
16167c478bd9Sstevel@tonic-gate int
int_regwdosuba(REGEXP rp,const wchar_t * rpl,const wchar_t * src,wchar_t ** dstp,int len,int * globp)1617*79777a7dSnakanon int_regwdosuba(REGEXP rp, /* compiled RE: Pattern */
16187c478bd9Sstevel@tonic-gate const wchar_t *rpl, /* replacement string: /rpl/ */
16197c478bd9Sstevel@tonic-gate const wchar_t *src, /* source string */
16207c478bd9Sstevel@tonic-gate wchar_t **dstp, /* destination string */
16217c478bd9Sstevel@tonic-gate int len, /* destination length */
16227c478bd9Sstevel@tonic-gate int *globp) /* IN: occurence, 0 for all; OUT: substitutions */
16237c478bd9Sstevel@tonic-gate {
16247c478bd9Sstevel@tonic-gate wchar_t *dst, *odst;
1625cb4658fbSceastha const wchar_t *ip, *xp;
1626cb4658fbSceastha wchar_t *op;
1627cb4658fbSceastha int i;
1628cb4658fbSceastha wchar_t c;
16297c478bd9Sstevel@tonic-gate int glob, iglob = *globp, oglob = 0;
16307c478bd9Sstevel@tonic-gate #define NSUB 10
16317c478bd9Sstevel@tonic-gate int_regwmatch_t rm[NSUB], *rmp;
16327c478bd9Sstevel@tonic-gate int flags;
16337c478bd9Sstevel@tonic-gate wchar_t *end;
16347c478bd9Sstevel@tonic-gate int regerr;
16357c478bd9Sstevel@tonic-gate
16367c478bd9Sstevel@tonic-gate /* handle overflow of dst. we need "i" more bytes */
16377c478bd9Sstevel@tonic-gate #ifdef OVERFLOW
16387c478bd9Sstevel@tonic-gate #undef OVERFLOW
1639*79777a7dSnakanon #define OVERFLOW(i) { \
16407c478bd9Sstevel@tonic-gate int pos = op - dst; \
16417c478bd9Sstevel@tonic-gate dst = (wchar_t *)realloc(odst = dst, \
16427c478bd9Sstevel@tonic-gate (len += len + i) * sizeof (wchar_t)); \
16437c478bd9Sstevel@tonic-gate if (dst == NULL) \
16447c478bd9Sstevel@tonic-gate goto nospace; \
16457c478bd9Sstevel@tonic-gate op = dst + pos; \
16467c478bd9Sstevel@tonic-gate end = dst + len; \
1647*79777a7dSnakanon }
16487c478bd9Sstevel@tonic-gate #endif
16497c478bd9Sstevel@tonic-gate
16507c478bd9Sstevel@tonic-gate *dstp = dst = (wchar_t *)malloc(len * sizeof (wchar_t));
16517c478bd9Sstevel@tonic-gate if (dst == NULL)
16527c478bd9Sstevel@tonic-gate return (REG_ESPACE);
16537c478bd9Sstevel@tonic-gate
16547c478bd9Sstevel@tonic-gate if (rp == NULL || rpl == NULL || src == NULL || dst == NULL)
16557c478bd9Sstevel@tonic-gate return (REG_EFATAL);
16567c478bd9Sstevel@tonic-gate
16577c478bd9Sstevel@tonic-gate glob = 0; /* match count */
16587c478bd9Sstevel@tonic-gate ip = src; /* source position */
16597c478bd9Sstevel@tonic-gate op = dst; /* destination position */
16607c478bd9Sstevel@tonic-gate end = dst + len;
16617c478bd9Sstevel@tonic-gate
16627c478bd9Sstevel@tonic-gate flags = 0;
16637c478bd9Sstevel@tonic-gate while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) {
16647c478bd9Sstevel@tonic-gate /* Copy text preceding match */
16657c478bd9Sstevel@tonic-gate if (op + (i = rm[0].rm_sp - ip) >= end)
1666*79777a7dSnakanon OVERFLOW(i)
16677c478bd9Sstevel@tonic-gate while (i--)
16687c478bd9Sstevel@tonic-gate *op++ = *ip++;
16697c478bd9Sstevel@tonic-gate
16707c478bd9Sstevel@tonic-gate if (iglob == 0 || ++glob == iglob) {
16717c478bd9Sstevel@tonic-gate oglob++;
16727c478bd9Sstevel@tonic-gate xp = rpl; /* do substitute */
16737c478bd9Sstevel@tonic-gate } else
16747c478bd9Sstevel@tonic-gate xp = L"&"; /* preserve text */
16757c478bd9Sstevel@tonic-gate
16767c478bd9Sstevel@tonic-gate /* Perform replacement of matched substing */
16777c478bd9Sstevel@tonic-gate while ((c = *xp++) != '\0') {
16787c478bd9Sstevel@tonic-gate rmp = NULL;
16797c478bd9Sstevel@tonic-gate if (c == '&')
16807c478bd9Sstevel@tonic-gate rmp = &rm[0];
16817c478bd9Sstevel@tonic-gate else if (c == '\\') {
16827c478bd9Sstevel@tonic-gate if ('0' <= *xp && *xp <= '9')
16837c478bd9Sstevel@tonic-gate rmp = &rm[*xp++ - '0'];
16847c478bd9Sstevel@tonic-gate else if (*xp != '\0')
16857c478bd9Sstevel@tonic-gate c = *xp++;
16867c478bd9Sstevel@tonic-gate }
16877c478bd9Sstevel@tonic-gate
16887c478bd9Sstevel@tonic-gate if (rmp == NULL) { /* Ordinary character. */
16897c478bd9Sstevel@tonic-gate *op++ = c;
16907c478bd9Sstevel@tonic-gate if (op >= end)
1691*79777a7dSnakanon OVERFLOW(1)
16927c478bd9Sstevel@tonic-gate } else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) {
16937c478bd9Sstevel@tonic-gate ip = rmp->rm_sp;
16947c478bd9Sstevel@tonic-gate if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end)
1695*79777a7dSnakanon OVERFLOW(i)
16967c478bd9Sstevel@tonic-gate while (i--)
16977c478bd9Sstevel@tonic-gate *op++ = *ip++;
16987c478bd9Sstevel@tonic-gate }
16997c478bd9Sstevel@tonic-gate }
17007c478bd9Sstevel@tonic-gate
17017c478bd9Sstevel@tonic-gate ip = rm[0].rm_ep;
17027c478bd9Sstevel@tonic-gate if (*ip == '\0') /* If at end break */
17037c478bd9Sstevel@tonic-gate break;
17047c478bd9Sstevel@tonic-gate else if (rm[0].rm_sp == rm[0].rm_ep) {
17057c478bd9Sstevel@tonic-gate /* If empty match copy next char */
17067c478bd9Sstevel@tonic-gate *op++ = *ip++;
17077c478bd9Sstevel@tonic-gate if (op >= end)
1708*79777a7dSnakanon OVERFLOW(1)
17097c478bd9Sstevel@tonic-gate }
17107c478bd9Sstevel@tonic-gate flags = REG_NOTBOL;
17117c478bd9Sstevel@tonic-gate }
17127c478bd9Sstevel@tonic-gate
17137c478bd9Sstevel@tonic-gate if (regerr != REG_OK && regerr != REG_NOMATCH)
17147c478bd9Sstevel@tonic-gate return (regerr);
17157c478bd9Sstevel@tonic-gate
17167c478bd9Sstevel@tonic-gate /* Copy rest of text */
17177c478bd9Sstevel@tonic-gate if (op + (i = wcslen(ip)) >= end)
1718*79777a7dSnakanon OVERFLOW(i)
17197c478bd9Sstevel@tonic-gate while (i--)
17207c478bd9Sstevel@tonic-gate *op++ = *ip++;
17217c478bd9Sstevel@tonic-gate *op++ = '\0';
17227c478bd9Sstevel@tonic-gate
17237c478bd9Sstevel@tonic-gate if ((*dstp = dst = (wchar_t *)realloc(odst = dst,
17247c478bd9Sstevel@tonic-gate sizeof (wchar_t) * (size_t)(op - dst))) == NULL) {
17257c478bd9Sstevel@tonic-gate nospace:
17267c478bd9Sstevel@tonic-gate free(odst);
17277c478bd9Sstevel@tonic-gate return (REG_ESPACE);
17287c478bd9Sstevel@tonic-gate }
17297c478bd9Sstevel@tonic-gate
17307c478bd9Sstevel@tonic-gate *globp = oglob;
17317c478bd9Sstevel@tonic-gate
17327c478bd9Sstevel@tonic-gate return ((oglob == 0) ? REG_NOMATCH : REG_OK);
17337c478bd9Sstevel@tonic-gate }
1734