17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*79777a7dSnakanon * Common Development and Distribution License (the "License"). 6*79777a7dSnakanon * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*79777a7dSnakanon * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved. 287c478bd9Sstevel@tonic-gate */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 317c478bd9Sstevel@tonic-gate 32cb4658fbSceastha /* 33cb4658fbSceastha * awk -- mainline, yylex, etc. 34cb4658fbSceastha * 35cb4658fbSceastha * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes 36cb4658fbSceastha */ 37cb4658fbSceastha 387c478bd9Sstevel@tonic-gate #include "awk.h" 397c478bd9Sstevel@tonic-gate #include "y.tab.h" 407c478bd9Sstevel@tonic-gate #include <stdarg.h> 417c478bd9Sstevel@tonic-gate #include <unistd.h> 427c478bd9Sstevel@tonic-gate #include <locale.h> 43*79777a7dSnakanon #include <search.h> 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate static char *progfiles[NPFILE]; /* Programmes files for yylex */ 467c478bd9Sstevel@tonic-gate static char **progfilep = &progfiles[0]; /* Pointer to last file */ 477c478bd9Sstevel@tonic-gate static wchar_t *progptr; /* In-memory programme */ 487c478bd9Sstevel@tonic-gate static int proglen; /* Length of progptr */ 497c478bd9Sstevel@tonic-gate static wchar_t context[NCONTEXT]; /* Circular buffer of context */ 507c478bd9Sstevel@tonic-gate static wchar_t *conptr = &context[0]; /* context ptr */ 517c478bd9Sstevel@tonic-gate static FILE *progfp; /* Stdio stream for programme */ 527c478bd9Sstevel@tonic-gate static char *filename; 537c478bd9Sstevel@tonic-gate #ifdef DEBUG 547c478bd9Sstevel@tonic-gate static int dflag; 557c478bd9Sstevel@tonic-gate #endif 567c478bd9Sstevel@tonic-gate 577c478bd9Sstevel@tonic-gate #define AWK_EXEC_MAGIC "<MKS AWKC>" 587c478bd9Sstevel@tonic-gate #define LEN_EXEC_MAGIC 10 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate static char unbal[] = "unbalanced E char"; 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate static void awkarginit(int c, char **av); 637c478bd9Sstevel@tonic-gate static int lexid(wint_t c); 647c478bd9Sstevel@tonic-gate static int lexnumber(wint_t c); 657c478bd9Sstevel@tonic-gate static int lexstring(wint_t endc); 66cb4658fbSceastha static int lexregexp(wint_t endc); 677c478bd9Sstevel@tonic-gate 687c478bd9Sstevel@tonic-gate static void awkvarinit(void); 697c478bd9Sstevel@tonic-gate static wint_t lexgetc(void); 707c478bd9Sstevel@tonic-gate static void lexungetc(wint_t c); 717c478bd9Sstevel@tonic-gate static size_t lexescape(wint_t endc, int regx, int cmd_line_operand); 727c478bd9Sstevel@tonic-gate static void awkierr(int perr, char *fmt, va_list ap); 737c478bd9Sstevel@tonic-gate static int usage(void); 747c478bd9Sstevel@tonic-gate void strescape(wchar_t *str); 757c478bd9Sstevel@tonic-gate static const char *toprint(wint_t); 767c478bd9Sstevel@tonic-gate char *_cmdname; 777c478bd9Sstevel@tonic-gate static wchar_t *mbconvert(char *str); 787c478bd9Sstevel@tonic-gate 79cb4658fbSceastha extern int isclvar(wchar_t *arg); 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* 827c478bd9Sstevel@tonic-gate * mainline for awk 837c478bd9Sstevel@tonic-gate */ 847c478bd9Sstevel@tonic-gate int 857c478bd9Sstevel@tonic-gate main(int argc, char *argv[]) 867c478bd9Sstevel@tonic-gate { 87cb4658fbSceastha wchar_t *ap; 88cb4658fbSceastha char *cmd; 897c478bd9Sstevel@tonic-gate 907c478bd9Sstevel@tonic-gate cmd = argv[0]; 917c478bd9Sstevel@tonic-gate _cmdname = cmd; 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate linebuf = emalloc(NLINE * sizeof (wchar_t)); 947c478bd9Sstevel@tonic-gate 95cb4658fbSceastha /* 967c478bd9Sstevel@tonic-gate * At this point only messaging should be internationalized. 977c478bd9Sstevel@tonic-gate * numbers are still scanned as in the Posix locale. 987c478bd9Sstevel@tonic-gate */ 997c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL, ""); 1007c478bd9Sstevel@tonic-gate (void) setlocale(LC_NUMERIC, "C"); 1017c478bd9Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) 1027c478bd9Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" 1037c478bd9Sstevel@tonic-gate #endif 1047c478bd9Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate awkvarinit(); 1077c478bd9Sstevel@tonic-gate /* running = 1; */ 1087c478bd9Sstevel@tonic-gate while (argc > 1 && *argv[1] == '-') { 1097c478bd9Sstevel@tonic-gate void *save_ptr = NULL; 1107c478bd9Sstevel@tonic-gate ap = mbstowcsdup(&argv[1][1]); 1117c478bd9Sstevel@tonic-gate if (ap == NULL) 1127c478bd9Sstevel@tonic-gate break; 1137c478bd9Sstevel@tonic-gate if (*ap == '\0') { 1147c478bd9Sstevel@tonic-gate free(ap); 1157c478bd9Sstevel@tonic-gate break; 1167c478bd9Sstevel@tonic-gate } 1177c478bd9Sstevel@tonic-gate save_ptr = (void *) ap; 1187c478bd9Sstevel@tonic-gate ++argv; 1197c478bd9Sstevel@tonic-gate --argc; 1207c478bd9Sstevel@tonic-gate if (*ap == '-' && ap[1] == '\0') 1217c478bd9Sstevel@tonic-gate break; 1227c478bd9Sstevel@tonic-gate for (; *ap != '\0'; ++ap) { 1237c478bd9Sstevel@tonic-gate switch (*ap) { 1247c478bd9Sstevel@tonic-gate #ifdef DEBUG 1257c478bd9Sstevel@tonic-gate case 'd': 1267c478bd9Sstevel@tonic-gate dflag = 1; 1277c478bd9Sstevel@tonic-gate continue; 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate #endif 1307c478bd9Sstevel@tonic-gate case 'f': 1317c478bd9Sstevel@tonic-gate if (argc < 2) { 1327c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 1337c478bd9Sstevel@tonic-gate gettext("Missing script file\n")); 1347c478bd9Sstevel@tonic-gate return (1); 1357c478bd9Sstevel@tonic-gate } 1367c478bd9Sstevel@tonic-gate *progfilep++ = argv[1]; 1377c478bd9Sstevel@tonic-gate --argc; 1387c478bd9Sstevel@tonic-gate ++argv; 1397c478bd9Sstevel@tonic-gate continue; 1407c478bd9Sstevel@tonic-gate 1417c478bd9Sstevel@tonic-gate case 'F': 1427c478bd9Sstevel@tonic-gate if (ap[1] == '\0') { 1437c478bd9Sstevel@tonic-gate if (argc < 2) { 1447c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 1457c478bd9Sstevel@tonic-gate gettext("Missing field separator\n")); 1467c478bd9Sstevel@tonic-gate return (1); 1477c478bd9Sstevel@tonic-gate } 1487c478bd9Sstevel@tonic-gate ap = mbstowcsdup(argv[1]); 1497c478bd9Sstevel@tonic-gate --argc; 1507c478bd9Sstevel@tonic-gate ++argv; 1517c478bd9Sstevel@tonic-gate } else 1527c478bd9Sstevel@tonic-gate ++ap; 1537c478bd9Sstevel@tonic-gate strescape(ap); 1547c478bd9Sstevel@tonic-gate strassign(varFS, linebuf, FALLOC, 1557c478bd9Sstevel@tonic-gate wcslen(linebuf)); 1567c478bd9Sstevel@tonic-gate break; 1577c478bd9Sstevel@tonic-gate 1587c478bd9Sstevel@tonic-gate case 'v': { 159cb4658fbSceastha wchar_t *vp; 160cb4658fbSceastha wchar_t *arg; 1617c478bd9Sstevel@tonic-gate 1627c478bd9Sstevel@tonic-gate if (argc < 2) { 1637c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 1647c478bd9Sstevel@tonic-gate gettext("Missing variable assignment\n")); 1657c478bd9Sstevel@tonic-gate return (1); 1667c478bd9Sstevel@tonic-gate } 1677c478bd9Sstevel@tonic-gate arg = mbconvert(argv[1]); 168cb4658fbSceastha /* 169cb4658fbSceastha * Ensure the variable expression 170cb4658fbSceastha * is valid (correct form). 171cb4658fbSceastha */ 172cb4658fbSceastha if (((vp = wcschr(arg, '=')) != NULL) && 173cb4658fbSceastha isclvar(arg)) { 1747c478bd9Sstevel@tonic-gate *vp = '\0'; 1757c478bd9Sstevel@tonic-gate strescape(vp+1); 1767c478bd9Sstevel@tonic-gate strassign(vlook(arg), linebuf, 177cb4658fbSceastha FALLOC|FSENSE, 178cb4658fbSceastha wcslen(linebuf)); 1797c478bd9Sstevel@tonic-gate *vp = '='; 180cb4658fbSceastha } else { 181cb4658fbSceastha (void) fprintf(stderr, gettext( 182cb4658fbSceastha "Invalid form for variable " 183cb4658fbSceastha "assignment: %S\n"), arg); 184cb4658fbSceastha return (1); 1857c478bd9Sstevel@tonic-gate } 1867c478bd9Sstevel@tonic-gate --argc; 1877c478bd9Sstevel@tonic-gate ++argv; 1887c478bd9Sstevel@tonic-gate continue; 1897c478bd9Sstevel@tonic-gate } 1907c478bd9Sstevel@tonic-gate 1917c478bd9Sstevel@tonic-gate default: 1927c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 1937c478bd9Sstevel@tonic-gate gettext("Unknown option \"-%S\"\n"), ap); 1947c478bd9Sstevel@tonic-gate return (usage()); 1957c478bd9Sstevel@tonic-gate } 1967c478bd9Sstevel@tonic-gate break; 1977c478bd9Sstevel@tonic-gate } 1987c478bd9Sstevel@tonic-gate if (save_ptr) 1997c478bd9Sstevel@tonic-gate free(save_ptr); 2007c478bd9Sstevel@tonic-gate } 2017c478bd9Sstevel@tonic-gate if (progfilep == &progfiles[0]) { 2027c478bd9Sstevel@tonic-gate if (argc < 2) 2037c478bd9Sstevel@tonic-gate return (usage()); 2047c478bd9Sstevel@tonic-gate filename = "[command line]"; /* BUG: NEEDS TRANSLATION */ 2057c478bd9Sstevel@tonic-gate progptr = mbstowcsdup(argv[1]); 2067c478bd9Sstevel@tonic-gate proglen = wcslen(progptr); 2077c478bd9Sstevel@tonic-gate --argc; 2087c478bd9Sstevel@tonic-gate ++argv; 2097c478bd9Sstevel@tonic-gate } 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate argv[0] = cmd; 2127c478bd9Sstevel@tonic-gate 2137c478bd9Sstevel@tonic-gate awkarginit(argc, argv); 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* running = 0; */ 2167c478bd9Sstevel@tonic-gate (void) yyparse(); 2177c478bd9Sstevel@tonic-gate 2187c478bd9Sstevel@tonic-gate lineno = 0; 2197c478bd9Sstevel@tonic-gate /* 2207c478bd9Sstevel@tonic-gate * Ok, done parsing, so now activate the rest of the nls stuff, set 2217c478bd9Sstevel@tonic-gate * the radix character. 2227c478bd9Sstevel@tonic-gate */ 2237c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL, ""); 2247c478bd9Sstevel@tonic-gate radixpoint = *localeconv()->decimal_point; 2257c478bd9Sstevel@tonic-gate awk(); 2267c478bd9Sstevel@tonic-gate /* NOTREACHED */ 2277c478bd9Sstevel@tonic-gate return (0); 2287c478bd9Sstevel@tonic-gate } 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate /* 2317c478bd9Sstevel@tonic-gate * Do initial setup of buffers, etc. 2327c478bd9Sstevel@tonic-gate * This must be called before most processing 2337c478bd9Sstevel@tonic-gate * and especially before lexical analysis. 2347c478bd9Sstevel@tonic-gate * Variables initialised here will be overruled by command 2357c478bd9Sstevel@tonic-gate * line parameter initialisation. 2367c478bd9Sstevel@tonic-gate */ 2377c478bd9Sstevel@tonic-gate static void 2387c478bd9Sstevel@tonic-gate awkvarinit() 2397c478bd9Sstevel@tonic-gate { 240cb4658fbSceastha NODE *np; 2417c478bd9Sstevel@tonic-gate 2427c478bd9Sstevel@tonic-gate (void) setvbuf(stderr, NULL, _IONBF, 0); 2437c478bd9Sstevel@tonic-gate 2447c478bd9Sstevel@tonic-gate if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) { 2457c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 2467c478bd9Sstevel@tonic-gate gettext("not enough available file descriptors")); 2477c478bd9Sstevel@tonic-gate exit(1); 2487c478bd9Sstevel@tonic-gate } 2497c478bd9Sstevel@tonic-gate ofiles = (OFILE *)emalloc(sizeof (OFILE)*NIOSTREAM); 2507c478bd9Sstevel@tonic-gate #ifdef A_ZERO_POINTERS 2517c478bd9Sstevel@tonic-gate (void) memset((wchar_t *)ofiles, 0, sizeof (OFILE) * NIOSTREAM); 2527c478bd9Sstevel@tonic-gate #else 2537c478bd9Sstevel@tonic-gate { 2547c478bd9Sstevel@tonic-gate /* initialize file descriptor table */ 2557c478bd9Sstevel@tonic-gate OFILE *fp; 2567c478bd9Sstevel@tonic-gate for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) { 2577c478bd9Sstevel@tonic-gate fp->f_fp = FNULL; 2587c478bd9Sstevel@tonic-gate fp->f_mode = 0; 2597c478bd9Sstevel@tonic-gate fp->f_name = (char *)0; 2607c478bd9Sstevel@tonic-gate } 2617c478bd9Sstevel@tonic-gate } 2627c478bd9Sstevel@tonic-gate #endif 2637c478bd9Sstevel@tonic-gate constant = intnode((INT)0); 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate const0 = intnode((INT)0); 2667c478bd9Sstevel@tonic-gate const1 = intnode((INT)1); 2677c478bd9Sstevel@tonic-gate constundef = emptynode(CONSTANT, 0); 2687c478bd9Sstevel@tonic-gate constundef->n_flags = FSTRING|FVINT; 2697c478bd9Sstevel@tonic-gate constundef->n_string = _null; 2707c478bd9Sstevel@tonic-gate constundef->n_strlen = 0; 2717c478bd9Sstevel@tonic-gate inc_oper = emptynode(ADD, 0); 2727c478bd9Sstevel@tonic-gate inc_oper->n_right = const1; 2737c478bd9Sstevel@tonic-gate asn_oper = emptynode(ADD, 0); 2747c478bd9Sstevel@tonic-gate field0 = node(FIELD, const0, NNULL); 2757c478bd9Sstevel@tonic-gate 2767c478bd9Sstevel@tonic-gate { 277cb4658fbSceastha RESFUNC near*rp; 2787c478bd9Sstevel@tonic-gate 2797c478bd9Sstevel@tonic-gate for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) { 2807c478bd9Sstevel@tonic-gate np = finstall(rp->rf_name, rp->rf_func, rp->rf_type); 2817c478bd9Sstevel@tonic-gate } 2827c478bd9Sstevel@tonic-gate } 2837c478bd9Sstevel@tonic-gate { 284cb4658fbSceastha RESERVED near*rp; 2857c478bd9Sstevel@tonic-gate 2867c478bd9Sstevel@tonic-gate for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) { 2877c478bd9Sstevel@tonic-gate switch (rp->r_type) { 2887c478bd9Sstevel@tonic-gate case SVAR: 2897c478bd9Sstevel@tonic-gate case VAR: 2907c478bd9Sstevel@tonic-gate running = 1; 2917c478bd9Sstevel@tonic-gate np = vlook(rp->r_name); 2927c478bd9Sstevel@tonic-gate if (rp->r_type == SVAR) 2937c478bd9Sstevel@tonic-gate np->n_flags |= FSPECIAL; 2947c478bd9Sstevel@tonic-gate if (rp->r_svalue != NULL) 2957c478bd9Sstevel@tonic-gate strassign(np, rp->r_svalue, FSTATIC, 2967c478bd9Sstevel@tonic-gate (size_t)rp->r_ivalue); 2977c478bd9Sstevel@tonic-gate else { 2987c478bd9Sstevel@tonic-gate constant->n_int = rp->r_ivalue; 2997c478bd9Sstevel@tonic-gate (void) assign(np, constant); 3007c478bd9Sstevel@tonic-gate } 3017c478bd9Sstevel@tonic-gate running = 0; 3027c478bd9Sstevel@tonic-gate break; 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate case KEYWORD: 3057c478bd9Sstevel@tonic-gate kinstall(rp->r_name, (int)rp->r_ivalue); 3067c478bd9Sstevel@tonic-gate break; 3077c478bd9Sstevel@tonic-gate } 3087c478bd9Sstevel@tonic-gate } 3097c478bd9Sstevel@tonic-gate } 3107c478bd9Sstevel@tonic-gate 3117c478bd9Sstevel@tonic-gate varNR = vlook(s_NR); 3127c478bd9Sstevel@tonic-gate varFNR = vlook(s_FNR); 3137c478bd9Sstevel@tonic-gate varNF = vlook(s_NF); 3147c478bd9Sstevel@tonic-gate varOFMT = vlook(s_OFMT); 3157c478bd9Sstevel@tonic-gate varCONVFMT = vlook(s_CONVFMT); 3167c478bd9Sstevel@tonic-gate varOFS = vlook(s_OFS); 3177c478bd9Sstevel@tonic-gate varORS = vlook(s_ORS); 3187c478bd9Sstevel@tonic-gate varRS = vlook(s_RS); 3197c478bd9Sstevel@tonic-gate varFS = vlook(s_FS); 3207c478bd9Sstevel@tonic-gate varARGC = vlook(s_ARGC); 3217c478bd9Sstevel@tonic-gate varSUBSEP = vlook(s_SUBSEP); 3227c478bd9Sstevel@tonic-gate varENVIRON = vlook(s_ENVIRON); 3237c478bd9Sstevel@tonic-gate varFILENAME = vlook(s_FILENAME); 3247c478bd9Sstevel@tonic-gate varSYMTAB = vlook(s_SYMTAB); 3257c478bd9Sstevel@tonic-gate incNR = node(ASG, varNR, node(ADD, varNR, const1)); 3267c478bd9Sstevel@tonic-gate incFNR = node(ASG, varFNR, node(ADD, varFNR, const1)); 3277c478bd9Sstevel@tonic-gate clrFNR = node(ASG, varFNR, const0); 3287c478bd9Sstevel@tonic-gate } 3297c478bd9Sstevel@tonic-gate 3307c478bd9Sstevel@tonic-gate /* 3317c478bd9Sstevel@tonic-gate * Initialise awk ARGC, ARGV variables. 3327c478bd9Sstevel@tonic-gate */ 3337c478bd9Sstevel@tonic-gate static void 3347c478bd9Sstevel@tonic-gate awkarginit(int ac, char **av) 3357c478bd9Sstevel@tonic-gate { 336cb4658fbSceastha int i; 337cb4658fbSceastha wchar_t *cp; 3387c478bd9Sstevel@tonic-gate 3397c478bd9Sstevel@tonic-gate ARGVsubi = node(INDEX, vlook(s_ARGV), constant); 3407c478bd9Sstevel@tonic-gate running = 1; 3417c478bd9Sstevel@tonic-gate constant->n_int = ac; 3427c478bd9Sstevel@tonic-gate (void) assign(varARGC, constant); 3437c478bd9Sstevel@tonic-gate for (i = 0; i < ac; ++i) { 3447c478bd9Sstevel@tonic-gate cp = mbstowcsdup(av[i]); 3457c478bd9Sstevel@tonic-gate constant->n_int = i; 3467c478bd9Sstevel@tonic-gate strassign(exprreduce(ARGVsubi), cp, 3477c478bd9Sstevel@tonic-gate FSTATIC|FSENSE, wcslen(cp)); 3487c478bd9Sstevel@tonic-gate } 3497c478bd9Sstevel@tonic-gate running = 0; 3507c478bd9Sstevel@tonic-gate } 3517c478bd9Sstevel@tonic-gate 3527c478bd9Sstevel@tonic-gate /* 3537c478bd9Sstevel@tonic-gate * Clean up when done parsing a function. 3547c478bd9Sstevel@tonic-gate * All formal parameters, because of a deal (funparm) in 3557c478bd9Sstevel@tonic-gate * yylex, get put into the symbol table in front of any 3567c478bd9Sstevel@tonic-gate * global variable of the same name. When the entire 3577c478bd9Sstevel@tonic-gate * function is parsed, remove these formal dummy nodes 3587c478bd9Sstevel@tonic-gate * from the symbol table but retain the nodes because 3597c478bd9Sstevel@tonic-gate * the generated tree points at them. 3607c478bd9Sstevel@tonic-gate */ 3617c478bd9Sstevel@tonic-gate void 3627c478bd9Sstevel@tonic-gate uexit(NODE *np) 3637c478bd9Sstevel@tonic-gate { 364cb4658fbSceastha NODE *formal; 3657c478bd9Sstevel@tonic-gate 3667c478bd9Sstevel@tonic-gate while ((formal = getlist(&np)) != NNULL) 3677c478bd9Sstevel@tonic-gate delsymtab(formal, 0); 3687c478bd9Sstevel@tonic-gate } 3697c478bd9Sstevel@tonic-gate 3707c478bd9Sstevel@tonic-gate /* 3717c478bd9Sstevel@tonic-gate * The lexical analyzer. 3727c478bd9Sstevel@tonic-gate */ 3737c478bd9Sstevel@tonic-gate int 3747c478bd9Sstevel@tonic-gate yylex() 3757c478bd9Sstevel@tonic-gate #ifdef DEBUG 3767c478bd9Sstevel@tonic-gate { 377cb4658fbSceastha int l; 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate l = yyhex(); 3807c478bd9Sstevel@tonic-gate if (dflag) 3817c478bd9Sstevel@tonic-gate (void) printf("%d\n", l); 3827c478bd9Sstevel@tonic-gate return (l); 3837c478bd9Sstevel@tonic-gate } 3847c478bd9Sstevel@tonic-gate yyhex() 3857c478bd9Sstevel@tonic-gate #endif 3867c478bd9Sstevel@tonic-gate { 387cb4658fbSceastha wint_t c, c1; 3887c478bd9Sstevel@tonic-gate int i; 3897c478bd9Sstevel@tonic-gate static int savetoken = 0; 39088f3d729Sakaplan static int wasfield; 3917c478bd9Sstevel@tonic-gate static int isfuncdef; 3927c478bd9Sstevel@tonic-gate static int nbrace, nparen, nbracket; 3937c478bd9Sstevel@tonic-gate static struct ctosymstruct { 3947c478bd9Sstevel@tonic-gate wint_t c, sym; 3957c478bd9Sstevel@tonic-gate } ctosym[] = { 3967c478bd9Sstevel@tonic-gate { '|', BAR }, { '^', CARAT }, 3977c478bd9Sstevel@tonic-gate { '~', TILDE }, { '<', LANGLE }, 3987c478bd9Sstevel@tonic-gate { '>', RANGLE }, { '+', PLUSC }, 3997c478bd9Sstevel@tonic-gate { '-', HYPHEN }, { '*', STAR }, 4007c478bd9Sstevel@tonic-gate { '/', SLASH }, { '%', PERCENT }, 4017c478bd9Sstevel@tonic-gate { '!', EXCLAMATION }, { '$', DOLLAR }, 4027c478bd9Sstevel@tonic-gate { '[', LSQUARE }, { ']', RSQUARE }, 4037c478bd9Sstevel@tonic-gate { '(', LPAREN }, { ')', RPAREN }, 4047c478bd9Sstevel@tonic-gate { ';', SEMI }, { '{', LBRACE }, 4057c478bd9Sstevel@tonic-gate { '}', RBRACE }, { 0, 0 } 4067c478bd9Sstevel@tonic-gate }; 4077c478bd9Sstevel@tonic-gate 4087c478bd9Sstevel@tonic-gate if (savetoken) { 4097c478bd9Sstevel@tonic-gate c = savetoken; 4107c478bd9Sstevel@tonic-gate savetoken = 0; 4117c478bd9Sstevel@tonic-gate } else if (redelim != '\0') { 4127c478bd9Sstevel@tonic-gate c = redelim; 4137c478bd9Sstevel@tonic-gate redelim = 0; 4147c478bd9Sstevel@tonic-gate catterm = 0; 4157c478bd9Sstevel@tonic-gate savetoken = c; 4167c478bd9Sstevel@tonic-gate return (lexlast = lexregexp(c)); 4177c478bd9Sstevel@tonic-gate } else while ((c = lexgetc()) != WEOF) { 4187c478bd9Sstevel@tonic-gate if (iswalpha(c) || c == '_') { 4197c478bd9Sstevel@tonic-gate c = lexid(c); 4207c478bd9Sstevel@tonic-gate } else if (iswdigit(c) || c == '.') { 4217c478bd9Sstevel@tonic-gate c = lexnumber(c); 4227c478bd9Sstevel@tonic-gate } else if (isWblank(c)) { 4237c478bd9Sstevel@tonic-gate continue; 4247c478bd9Sstevel@tonic-gate } else switch (c) { 4257c478bd9Sstevel@tonic-gate #if DOS || OS2 4267c478bd9Sstevel@tonic-gate case 032: /* ^Z */ 4277c478bd9Sstevel@tonic-gate continue; 4287c478bd9Sstevel@tonic-gate #endif 4297c478bd9Sstevel@tonic-gate 4307c478bd9Sstevel@tonic-gate case '"': 4317c478bd9Sstevel@tonic-gate c = lexstring(c); 4327c478bd9Sstevel@tonic-gate break; 4337c478bd9Sstevel@tonic-gate 4347c478bd9Sstevel@tonic-gate case '#': 4357c478bd9Sstevel@tonic-gate while ((c = lexgetc()) != '\n' && c != WEOF) 4367c478bd9Sstevel@tonic-gate ; 4377c478bd9Sstevel@tonic-gate lexungetc(c); 4387c478bd9Sstevel@tonic-gate continue; 4397c478bd9Sstevel@tonic-gate 4407c478bd9Sstevel@tonic-gate case '+': 4417c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '+') 4427c478bd9Sstevel@tonic-gate c = INC; 4437c478bd9Sstevel@tonic-gate else if (c1 == '=') 4447c478bd9Sstevel@tonic-gate c = AADD; 4457c478bd9Sstevel@tonic-gate else 4467c478bd9Sstevel@tonic-gate lexungetc(c1); 4477c478bd9Sstevel@tonic-gate break; 4487c478bd9Sstevel@tonic-gate 4497c478bd9Sstevel@tonic-gate case '-': 4507c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '-') 4517c478bd9Sstevel@tonic-gate c = DEC; 4527c478bd9Sstevel@tonic-gate else if (c1 == '=') 4537c478bd9Sstevel@tonic-gate c = ASUB; 4547c478bd9Sstevel@tonic-gate else 4557c478bd9Sstevel@tonic-gate lexungetc(c1); 4567c478bd9Sstevel@tonic-gate break; 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate case '*': 4597c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 4607c478bd9Sstevel@tonic-gate c = AMUL; 4617c478bd9Sstevel@tonic-gate else if (c1 == '*') { 4627c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 4637c478bd9Sstevel@tonic-gate c = AEXP; 4647c478bd9Sstevel@tonic-gate else { 4657c478bd9Sstevel@tonic-gate c = EXP; 4667c478bd9Sstevel@tonic-gate lexungetc(c1); 4677c478bd9Sstevel@tonic-gate } 4687c478bd9Sstevel@tonic-gate } else 4697c478bd9Sstevel@tonic-gate lexungetc(c1); 4707c478bd9Sstevel@tonic-gate break; 4717c478bd9Sstevel@tonic-gate 4727c478bd9Sstevel@tonic-gate case '^': 4737c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') { 4747c478bd9Sstevel@tonic-gate c = AEXP; 4757c478bd9Sstevel@tonic-gate } else { 4767c478bd9Sstevel@tonic-gate c = EXP; 4777c478bd9Sstevel@tonic-gate lexungetc(c1); 4787c478bd9Sstevel@tonic-gate } 4797c478bd9Sstevel@tonic-gate break; 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate case '/': 482cb4658fbSceastha if ((c1 = lexgetc()) == '=' && 483cb4658fbSceastha lexlast != RE && lexlast != NRE && 484cb4658fbSceastha lexlast != ';' && lexlast != '\n' && 485cb4658fbSceastha lexlast != ',' && lexlast != '(') 4867c478bd9Sstevel@tonic-gate c = ADIV; 4877c478bd9Sstevel@tonic-gate else 4887c478bd9Sstevel@tonic-gate lexungetc(c1); 4897c478bd9Sstevel@tonic-gate break; 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate case '%': 4927c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 4937c478bd9Sstevel@tonic-gate c = AREM; 4947c478bd9Sstevel@tonic-gate else 4957c478bd9Sstevel@tonic-gate lexungetc(c1); 4967c478bd9Sstevel@tonic-gate break; 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate case '&': 4997c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '&') 5007c478bd9Sstevel@tonic-gate c = AND; 5017c478bd9Sstevel@tonic-gate else 5027c478bd9Sstevel@tonic-gate lexungetc(c1); 5037c478bd9Sstevel@tonic-gate break; 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate case '|': 5067c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '|') 5077c478bd9Sstevel@tonic-gate c = OR; 5087c478bd9Sstevel@tonic-gate else { 5097c478bd9Sstevel@tonic-gate lexungetc(c1); 5107c478bd9Sstevel@tonic-gate if (inprint) 5117c478bd9Sstevel@tonic-gate c = PIPE; 5127c478bd9Sstevel@tonic-gate } 5137c478bd9Sstevel@tonic-gate break; 5147c478bd9Sstevel@tonic-gate 5157c478bd9Sstevel@tonic-gate case '>': 5167c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 5177c478bd9Sstevel@tonic-gate c = GE; 5187c478bd9Sstevel@tonic-gate else if (c1 == '>') 5197c478bd9Sstevel@tonic-gate c = APPEND; 5207c478bd9Sstevel@tonic-gate else { 5217c478bd9Sstevel@tonic-gate lexungetc(c1); 5227c478bd9Sstevel@tonic-gate if (nparen == 0 && inprint) 5237c478bd9Sstevel@tonic-gate c = WRITE; 5247c478bd9Sstevel@tonic-gate } 5257c478bd9Sstevel@tonic-gate break; 5267c478bd9Sstevel@tonic-gate 5277c478bd9Sstevel@tonic-gate case '<': 5287c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 5297c478bd9Sstevel@tonic-gate c = LE; 5307c478bd9Sstevel@tonic-gate else 5317c478bd9Sstevel@tonic-gate lexungetc(c1); 5327c478bd9Sstevel@tonic-gate break; 5337c478bd9Sstevel@tonic-gate 5347c478bd9Sstevel@tonic-gate case '!': 5357c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 5367c478bd9Sstevel@tonic-gate c = NE; 5377c478bd9Sstevel@tonic-gate else if (c1 == '~') 5387c478bd9Sstevel@tonic-gate c = NRE; 5397c478bd9Sstevel@tonic-gate else 5407c478bd9Sstevel@tonic-gate lexungetc(c1); 5417c478bd9Sstevel@tonic-gate break; 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate case '=': 5447c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 5457c478bd9Sstevel@tonic-gate c = EQ; 5467c478bd9Sstevel@tonic-gate else { 5477c478bd9Sstevel@tonic-gate lexungetc(c1); 5487c478bd9Sstevel@tonic-gate c = ASG; 5497c478bd9Sstevel@tonic-gate } 5507c478bd9Sstevel@tonic-gate break; 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate case '\n': 5537c478bd9Sstevel@tonic-gate switch (lexlast) { 5547c478bd9Sstevel@tonic-gate case ')': 5557c478bd9Sstevel@tonic-gate if (catterm || inprint) { 5567c478bd9Sstevel@tonic-gate c = ';'; 5577c478bd9Sstevel@tonic-gate break; 5587c478bd9Sstevel@tonic-gate } 559*79777a7dSnakanon /*FALLTHRU*/ 5607c478bd9Sstevel@tonic-gate case AND: 5617c478bd9Sstevel@tonic-gate case OR: 5627c478bd9Sstevel@tonic-gate case COMMA: 5637c478bd9Sstevel@tonic-gate case '{': 5647c478bd9Sstevel@tonic-gate case ELSE: 5657c478bd9Sstevel@tonic-gate case ';': 5667c478bd9Sstevel@tonic-gate case DO: 5677c478bd9Sstevel@tonic-gate continue; 5687c478bd9Sstevel@tonic-gate 5697c478bd9Sstevel@tonic-gate case '}': 5707c478bd9Sstevel@tonic-gate if (nbrace != 0) 5717c478bd9Sstevel@tonic-gate continue; 5727c478bd9Sstevel@tonic-gate 5737c478bd9Sstevel@tonic-gate default: 5747c478bd9Sstevel@tonic-gate c = ';'; 5757c478bd9Sstevel@tonic-gate break; 5767c478bd9Sstevel@tonic-gate } 5777c478bd9Sstevel@tonic-gate break; 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate case ELSE: 5807c478bd9Sstevel@tonic-gate if (lexlast != ';') { 5817c478bd9Sstevel@tonic-gate savetoken = ELSE; 5827c478bd9Sstevel@tonic-gate c = ';'; 5837c478bd9Sstevel@tonic-gate } 5847c478bd9Sstevel@tonic-gate break; 5857c478bd9Sstevel@tonic-gate 5867c478bd9Sstevel@tonic-gate case '(': 5877c478bd9Sstevel@tonic-gate ++nparen; 5887c478bd9Sstevel@tonic-gate break; 5897c478bd9Sstevel@tonic-gate 5907c478bd9Sstevel@tonic-gate case ')': 5917c478bd9Sstevel@tonic-gate if (--nparen < 0) 5927c478bd9Sstevel@tonic-gate awkerr(unbal, "()"); 5937c478bd9Sstevel@tonic-gate break; 5947c478bd9Sstevel@tonic-gate 5957c478bd9Sstevel@tonic-gate case '{': 5967c478bd9Sstevel@tonic-gate nbrace++; 5977c478bd9Sstevel@tonic-gate break; 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate case '}': 6007c478bd9Sstevel@tonic-gate if (--nbrace < 0) { 6017c478bd9Sstevel@tonic-gate char brk[3]; 6027c478bd9Sstevel@tonic-gate 6037c478bd9Sstevel@tonic-gate brk[0] = '{'; 6047c478bd9Sstevel@tonic-gate brk[1] = '}'; 6057c478bd9Sstevel@tonic-gate brk[2] = '\0'; 6067c478bd9Sstevel@tonic-gate awkerr(unbal, brk); 6077c478bd9Sstevel@tonic-gate } 6087c478bd9Sstevel@tonic-gate if (lexlast != ';') { 6097c478bd9Sstevel@tonic-gate savetoken = c; 6107c478bd9Sstevel@tonic-gate c = ';'; 6117c478bd9Sstevel@tonic-gate } 6127c478bd9Sstevel@tonic-gate break; 6137c478bd9Sstevel@tonic-gate 6147c478bd9Sstevel@tonic-gate case '[': 6157c478bd9Sstevel@tonic-gate ++nbracket; 6167c478bd9Sstevel@tonic-gate break; 6177c478bd9Sstevel@tonic-gate 6187c478bd9Sstevel@tonic-gate case ']': 6197c478bd9Sstevel@tonic-gate if (--nbracket < 0) { 6207c478bd9Sstevel@tonic-gate char brk[3]; 6217c478bd9Sstevel@tonic-gate 6227c478bd9Sstevel@tonic-gate brk[0] = '['; 6237c478bd9Sstevel@tonic-gate brk[1] = ']'; 6247c478bd9Sstevel@tonic-gate brk[2] = '\0'; 6257c478bd9Sstevel@tonic-gate awkerr(unbal, brk); 6267c478bd9Sstevel@tonic-gate } 6277c478bd9Sstevel@tonic-gate break; 6287c478bd9Sstevel@tonic-gate 6297c478bd9Sstevel@tonic-gate case '\\': 6307c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '\n') 6317c478bd9Sstevel@tonic-gate continue; 6327c478bd9Sstevel@tonic-gate lexungetc(c1); 6337c478bd9Sstevel@tonic-gate break; 6347c478bd9Sstevel@tonic-gate 6357c478bd9Sstevel@tonic-gate case ',': 6367c478bd9Sstevel@tonic-gate c = COMMA; 6377c478bd9Sstevel@tonic-gate break; 6387c478bd9Sstevel@tonic-gate 6397c478bd9Sstevel@tonic-gate case '?': 6407c478bd9Sstevel@tonic-gate c = QUEST; 6417c478bd9Sstevel@tonic-gate break; 6427c478bd9Sstevel@tonic-gate 6437c478bd9Sstevel@tonic-gate case ':': 6447c478bd9Sstevel@tonic-gate c = COLON; 6457c478bd9Sstevel@tonic-gate break; 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate default: 6487c478bd9Sstevel@tonic-gate if (!iswprint(c)) 6497c478bd9Sstevel@tonic-gate awkerr( 6507c478bd9Sstevel@tonic-gate gettext("invalid character \"%s\""), 6517c478bd9Sstevel@tonic-gate toprint(c)); 6527c478bd9Sstevel@tonic-gate break; 6537c478bd9Sstevel@tonic-gate } 6547c478bd9Sstevel@tonic-gate break; 6557c478bd9Sstevel@tonic-gate } 6567c478bd9Sstevel@tonic-gate 6577c478bd9Sstevel@tonic-gate switch (c) { 6587c478bd9Sstevel@tonic-gate case ']': 6597c478bd9Sstevel@tonic-gate ++catterm; 6607c478bd9Sstevel@tonic-gate break; 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate case VAR: 6637c478bd9Sstevel@tonic-gate if (catterm) { 6647c478bd9Sstevel@tonic-gate savetoken = c; 6657c478bd9Sstevel@tonic-gate c = CONCAT; 6667c478bd9Sstevel@tonic-gate catterm = 0; 6677c478bd9Sstevel@tonic-gate } else if (!isfuncdef) { 6687c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) != '(') 6697c478bd9Sstevel@tonic-gate ++catterm; 6707c478bd9Sstevel@tonic-gate lexungetc(c1); 6717c478bd9Sstevel@tonic-gate } 6727c478bd9Sstevel@tonic-gate isfuncdef = 0; 6737c478bd9Sstevel@tonic-gate break; 6747c478bd9Sstevel@tonic-gate 6757c478bd9Sstevel@tonic-gate case PARM: 6767c478bd9Sstevel@tonic-gate case CONSTANT: 6777c478bd9Sstevel@tonic-gate if (catterm) { 6787c478bd9Sstevel@tonic-gate savetoken = c; 6797c478bd9Sstevel@tonic-gate c = CONCAT; 6807c478bd9Sstevel@tonic-gate catterm = 0; 6817c478bd9Sstevel@tonic-gate } else { 6827c478bd9Sstevel@tonic-gate if (lexlast == '$') 6837c478bd9Sstevel@tonic-gate wasfield = 2; 6847c478bd9Sstevel@tonic-gate ++catterm; 6857c478bd9Sstevel@tonic-gate } 6867c478bd9Sstevel@tonic-gate break; 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate case INC: 6897c478bd9Sstevel@tonic-gate case DEC: 6907c478bd9Sstevel@tonic-gate if (!catterm || lexlast != CONSTANT || wasfield) 6917c478bd9Sstevel@tonic-gate break; 6927c478bd9Sstevel@tonic-gate 693*79777a7dSnakanon /*FALLTHRU*/ 6947c478bd9Sstevel@tonic-gate case UFUNC: 6957c478bd9Sstevel@tonic-gate case FUNC: 6967c478bd9Sstevel@tonic-gate case GETLINE: 6977c478bd9Sstevel@tonic-gate case '!': 6987c478bd9Sstevel@tonic-gate case '$': 6997c478bd9Sstevel@tonic-gate case '(': 7007c478bd9Sstevel@tonic-gate if (catterm) { 7017c478bd9Sstevel@tonic-gate savetoken = c; 7027c478bd9Sstevel@tonic-gate c = CONCAT; 7037c478bd9Sstevel@tonic-gate catterm = 0; 7047c478bd9Sstevel@tonic-gate } 7057c478bd9Sstevel@tonic-gate break; 7067c478bd9Sstevel@tonic-gate 7077c478bd9Sstevel@tonic-gate /* { */ case '}': 7087c478bd9Sstevel@tonic-gate if (nbrace == 0) 7097c478bd9Sstevel@tonic-gate savetoken = ';'; 710*79777a7dSnakanon /*FALLTHRU*/ 7117c478bd9Sstevel@tonic-gate case ';': 7127c478bd9Sstevel@tonic-gate inprint = 0; 713*79777a7dSnakanon /*FALLTHRU*/ 7147c478bd9Sstevel@tonic-gate default: 7157c478bd9Sstevel@tonic-gate if (c == DEFFUNC) 7167c478bd9Sstevel@tonic-gate isfuncdef = 1; 7177c478bd9Sstevel@tonic-gate catterm = 0; 7187c478bd9Sstevel@tonic-gate } 7197c478bd9Sstevel@tonic-gate lexlast = c; 7207c478bd9Sstevel@tonic-gate if (wasfield) 7217c478bd9Sstevel@tonic-gate wasfield--; 7227c478bd9Sstevel@tonic-gate /* 7237c478bd9Sstevel@tonic-gate * Map character constants to symbolic names. 7247c478bd9Sstevel@tonic-gate */ 7257c478bd9Sstevel@tonic-gate for (i = 0; ctosym[i].c != 0; i++) 7267c478bd9Sstevel@tonic-gate if (c == ctosym[i].c) { 7277c478bd9Sstevel@tonic-gate c = ctosym[i].sym; 7287c478bd9Sstevel@tonic-gate break; 7297c478bd9Sstevel@tonic-gate } 7307c478bd9Sstevel@tonic-gate return ((int)c); 7317c478bd9Sstevel@tonic-gate } 7327c478bd9Sstevel@tonic-gate 7337c478bd9Sstevel@tonic-gate /* 7347c478bd9Sstevel@tonic-gate * Read a number for the lexical analyzer. 7357c478bd9Sstevel@tonic-gate * Input is the first character of the number. 7367c478bd9Sstevel@tonic-gate * Return value is the lexical type. 7377c478bd9Sstevel@tonic-gate */ 7387c478bd9Sstevel@tonic-gate static int 7397c478bd9Sstevel@tonic-gate lexnumber(wint_t c) 7407c478bd9Sstevel@tonic-gate { 741cb4658fbSceastha wchar_t *cp; 742cb4658fbSceastha int dotfound = 0; 743cb4658fbSceastha int efound = 0; 7447c478bd9Sstevel@tonic-gate INT number; 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate cp = linebuf; 7477c478bd9Sstevel@tonic-gate do { 7487c478bd9Sstevel@tonic-gate if (iswdigit(c)) 7497c478bd9Sstevel@tonic-gate ; 7507c478bd9Sstevel@tonic-gate else if (c == '.') { 7517c478bd9Sstevel@tonic-gate if (dotfound++) 7527c478bd9Sstevel@tonic-gate break; 7537c478bd9Sstevel@tonic-gate } else if (c == 'e' || c == 'E') { 7547c478bd9Sstevel@tonic-gate if ((c = lexgetc()) != '-' && c != '+') { 7557c478bd9Sstevel@tonic-gate lexungetc(c); 7567c478bd9Sstevel@tonic-gate c = 'e'; 7577c478bd9Sstevel@tonic-gate } else 7587c478bd9Sstevel@tonic-gate *cp++ = 'e'; 7597c478bd9Sstevel@tonic-gate if (efound++) 7607c478bd9Sstevel@tonic-gate break; 7617c478bd9Sstevel@tonic-gate } else 7627c478bd9Sstevel@tonic-gate break; 7637c478bd9Sstevel@tonic-gate *cp++ = c; 7647c478bd9Sstevel@tonic-gate } while ((c = lexgetc()) != WEOF); 7657c478bd9Sstevel@tonic-gate *cp = '\0'; 7667c478bd9Sstevel@tonic-gate if (dotfound && cp == linebuf+1) 7677c478bd9Sstevel@tonic-gate return (DOT); 7687c478bd9Sstevel@tonic-gate lexungetc(c); 7697c478bd9Sstevel@tonic-gate errno = 0; 770cb4658fbSceastha if (!dotfound && !efound && 771cb4658fbSceastha ((number = wcstol(linebuf, (wchar_t **)0, 10)), errno != ERANGE)) 7727c478bd9Sstevel@tonic-gate yylval.node = intnode(number); 7737c478bd9Sstevel@tonic-gate else 7747c478bd9Sstevel@tonic-gate yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0)); 7757c478bd9Sstevel@tonic-gate return (CONSTANT); 7767c478bd9Sstevel@tonic-gate } 7777c478bd9Sstevel@tonic-gate 7787c478bd9Sstevel@tonic-gate /* 7797c478bd9Sstevel@tonic-gate * Read an identifier. 7807c478bd9Sstevel@tonic-gate * Input is first character of identifier. 7817c478bd9Sstevel@tonic-gate * Return VAR. 7827c478bd9Sstevel@tonic-gate */ 7837c478bd9Sstevel@tonic-gate static int 7847c478bd9Sstevel@tonic-gate lexid(wint_t c) 7857c478bd9Sstevel@tonic-gate { 786cb4658fbSceastha wchar_t *cp; 787cb4658fbSceastha size_t i; 788cb4658fbSceastha NODE *np; 7897c478bd9Sstevel@tonic-gate 7907c478bd9Sstevel@tonic-gate cp = linebuf; 7917c478bd9Sstevel@tonic-gate do { 7927c478bd9Sstevel@tonic-gate *cp++ = c; 7937c478bd9Sstevel@tonic-gate c = lexgetc(); 7947c478bd9Sstevel@tonic-gate } while (iswalpha(c) || iswdigit(c) || c == '_'); 7957c478bd9Sstevel@tonic-gate *cp = '\0'; 7967c478bd9Sstevel@tonic-gate lexungetc(c); 7977c478bd9Sstevel@tonic-gate yylval.node = np = vlook(linebuf); 7987c478bd9Sstevel@tonic-gate 7997c478bd9Sstevel@tonic-gate switch (np->n_type) { 8007c478bd9Sstevel@tonic-gate case KEYWORD: 8017c478bd9Sstevel@tonic-gate switch (np->n_keywtype) { 8027c478bd9Sstevel@tonic-gate case PRINT: 8037c478bd9Sstevel@tonic-gate case PRINTF: 8047c478bd9Sstevel@tonic-gate ++inprint; 8057c478bd9Sstevel@tonic-gate default: 8067c478bd9Sstevel@tonic-gate return ((int)np->n_keywtype); 8077c478bd9Sstevel@tonic-gate } 8087c478bd9Sstevel@tonic-gate /* NOTREACHED */ 8097c478bd9Sstevel@tonic-gate 8107c478bd9Sstevel@tonic-gate case ARRAY: 8117c478bd9Sstevel@tonic-gate case VAR: 8127c478bd9Sstevel@tonic-gate /* 8137c478bd9Sstevel@tonic-gate * If reading the argument list, create a dummy node 8147c478bd9Sstevel@tonic-gate * for the duration of that function. These variables 8157c478bd9Sstevel@tonic-gate * can be removed from the symbol table at function end 8167c478bd9Sstevel@tonic-gate * but they must still exist because the execution tree 8177c478bd9Sstevel@tonic-gate * knows about them. 8187c478bd9Sstevel@tonic-gate */ 8197c478bd9Sstevel@tonic-gate if (funparm) { 8207c478bd9Sstevel@tonic-gate do_funparm: 8217c478bd9Sstevel@tonic-gate np = emptynode(PARM, i = (cp-linebuf)); 8227c478bd9Sstevel@tonic-gate np->n_flags = FSTRING; 8237c478bd9Sstevel@tonic-gate np->n_string = _null; 8247c478bd9Sstevel@tonic-gate np->n_strlen = 0; 8257c478bd9Sstevel@tonic-gate (void) memcpy(np->n_name, linebuf, 8267c478bd9Sstevel@tonic-gate (i+1) * sizeof (wchar_t)); 8277c478bd9Sstevel@tonic-gate addsymtab(np); 8287c478bd9Sstevel@tonic-gate yylval.node = np; 8297c478bd9Sstevel@tonic-gate } else if (np == varNF || (np == varFS && 8307c478bd9Sstevel@tonic-gate (!doing_begin || begin_getline))) { 8317c478bd9Sstevel@tonic-gate /* 8327c478bd9Sstevel@tonic-gate * If the user program references NF or sets 8337c478bd9Sstevel@tonic-gate * FS either outside of a begin block or 8347c478bd9Sstevel@tonic-gate * in a begin block after a getline then the 8357c478bd9Sstevel@tonic-gate * input line will be split immediately upon read 8367c478bd9Sstevel@tonic-gate * rather than when a field is first referenced. 8377c478bd9Sstevel@tonic-gate */ 8387c478bd9Sstevel@tonic-gate needsplit = 1; 8397c478bd9Sstevel@tonic-gate } else if (np == varENVIRON) 8407c478bd9Sstevel@tonic-gate needenviron = 1; 841*79777a7dSnakanon /*FALLTHRU*/ 8427c478bd9Sstevel@tonic-gate case PARM: 8437c478bd9Sstevel@tonic-gate return (VAR); 8447c478bd9Sstevel@tonic-gate 8457c478bd9Sstevel@tonic-gate case UFUNC: 8467c478bd9Sstevel@tonic-gate /* 8477c478bd9Sstevel@tonic-gate * It is ok to redefine functions as parameters 8487c478bd9Sstevel@tonic-gate */ 8497c478bd9Sstevel@tonic-gate if (funparm) goto do_funparm; 850*79777a7dSnakanon /*FALLTHRU*/ 8517c478bd9Sstevel@tonic-gate case FUNC: 8527c478bd9Sstevel@tonic-gate case GETLINE: 8537c478bd9Sstevel@tonic-gate /* 8547c478bd9Sstevel@tonic-gate * When a getline is encountered, clear the 'doing_begin' flag. 8557c478bd9Sstevel@tonic-gate * This will force the 'needsplit' flag to be set, even inside 8567c478bd9Sstevel@tonic-gate * a begin block, if FS is altered. (See VAR case above) 8577c478bd9Sstevel@tonic-gate */ 8587c478bd9Sstevel@tonic-gate if (doing_begin) 8597c478bd9Sstevel@tonic-gate begin_getline = 1; 8607c478bd9Sstevel@tonic-gate return (np->n_type); 8617c478bd9Sstevel@tonic-gate } 8627c478bd9Sstevel@tonic-gate /* NOTREACHED */ 86388f3d729Sakaplan return (0); 8647c478bd9Sstevel@tonic-gate } 8657c478bd9Sstevel@tonic-gate 8667c478bd9Sstevel@tonic-gate /* 8677c478bd9Sstevel@tonic-gate * Read a string for the lexical analyzer. 8687c478bd9Sstevel@tonic-gate * `endc' terminates the string. 8697c478bd9Sstevel@tonic-gate */ 8707c478bd9Sstevel@tonic-gate static int 8717c478bd9Sstevel@tonic-gate lexstring(wint_t endc) 8727c478bd9Sstevel@tonic-gate { 873cb4658fbSceastha size_t length = lexescape(endc, 0, 0); 8747c478bd9Sstevel@tonic-gate 8757c478bd9Sstevel@tonic-gate yylval.node = stringnode(linebuf, FALLOC, length); 8767c478bd9Sstevel@tonic-gate return (CONSTANT); 8777c478bd9Sstevel@tonic-gate } 8787c478bd9Sstevel@tonic-gate 8797c478bd9Sstevel@tonic-gate /* 8807c478bd9Sstevel@tonic-gate * Read a regular expression. 8817c478bd9Sstevel@tonic-gate */ 8827c478bd9Sstevel@tonic-gate static int 8837c478bd9Sstevel@tonic-gate lexregexp(wint_t endc) 8847c478bd9Sstevel@tonic-gate { 8857c478bd9Sstevel@tonic-gate (void) lexescape(endc, 1, 0); 8867c478bd9Sstevel@tonic-gate yylval.node = renode(linebuf); 8877c478bd9Sstevel@tonic-gate return (URE); 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate 8907c478bd9Sstevel@tonic-gate /* 8917c478bd9Sstevel@tonic-gate * Process a string, converting the escape characters as required by 8927c478bd9Sstevel@tonic-gate * 1003.2. The processed string ends up in the global linebuf[]. This 8937c478bd9Sstevel@tonic-gate * routine also changes the value of 'progfd' - the program file 8947c478bd9Sstevel@tonic-gate * descriptor, so it should be used with some care. It is presently used to 8957c478bd9Sstevel@tonic-gate * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()). 8967c478bd9Sstevel@tonic-gate */ 8977c478bd9Sstevel@tonic-gate void 8987c478bd9Sstevel@tonic-gate strescape(wchar_t *str) 8997c478bd9Sstevel@tonic-gate { 9007c478bd9Sstevel@tonic-gate progptr = str; 9017c478bd9Sstevel@tonic-gate proglen = wcslen(str) + 1; /* Include \0 */ 9027c478bd9Sstevel@tonic-gate (void) lexescape('\0', 0, 1); 9037c478bd9Sstevel@tonic-gate progptr = NULL; 9047c478bd9Sstevel@tonic-gate } 9057c478bd9Sstevel@tonic-gate 9067c478bd9Sstevel@tonic-gate /* 9077c478bd9Sstevel@tonic-gate * Read a string or regular expression, terminated by ``endc'', 9087c478bd9Sstevel@tonic-gate * for lexical analyzer, processing escape sequences. 9097c478bd9Sstevel@tonic-gate * Return string length. 9107c478bd9Sstevel@tonic-gate */ 9117c478bd9Sstevel@tonic-gate static size_t 9127c478bd9Sstevel@tonic-gate lexescape(wint_t endc, int regx, int cmd_line_operand) 9137c478bd9Sstevel@tonic-gate { 9147c478bd9Sstevel@tonic-gate static char nlre[256]; 9157c478bd9Sstevel@tonic-gate static char nlstr[256]; 9167c478bd9Sstevel@tonic-gate static char eofre[256]; 9177c478bd9Sstevel@tonic-gate static char eofstr[256]; 9187c478bd9Sstevel@tonic-gate int first_time = 1; 9197c478bd9Sstevel@tonic-gate wint_t c; 9207c478bd9Sstevel@tonic-gate wchar_t *cp; 9217c478bd9Sstevel@tonic-gate int n, max; 9227c478bd9Sstevel@tonic-gate 9237c478bd9Sstevel@tonic-gate if (first_time == 1) { 9247c478bd9Sstevel@tonic-gate (void) strcpy(nlre, gettext("Newline in regular expression\n")); 9257c478bd9Sstevel@tonic-gate (void) strcpy(nlstr, gettext("Newline in string\n")); 9267c478bd9Sstevel@tonic-gate (void) strcpy(eofre, gettext("EOF in regular expression\n")); 9277c478bd9Sstevel@tonic-gate (void) strcpy(eofstr, gettext("EOF in string\n")); 9287c478bd9Sstevel@tonic-gate first_time = 0; 9297c478bd9Sstevel@tonic-gate } 9307c478bd9Sstevel@tonic-gate 9317c478bd9Sstevel@tonic-gate cp = linebuf; 9327c478bd9Sstevel@tonic-gate while ((c = lexgetc()) != endc) { 9337c478bd9Sstevel@tonic-gate if (c == '\n') 9347c478bd9Sstevel@tonic-gate awkerr(regx ? nlre : nlstr); 9357c478bd9Sstevel@tonic-gate if (c == '\\') { 9367c478bd9Sstevel@tonic-gate switch (c = lexgetc(), c) { 9377c478bd9Sstevel@tonic-gate case '\\': 9387c478bd9Sstevel@tonic-gate if (regx) 9397c478bd9Sstevel@tonic-gate *cp++ = '\\'; 9407c478bd9Sstevel@tonic-gate break; 9417c478bd9Sstevel@tonic-gate 9427c478bd9Sstevel@tonic-gate case '/': 9437c478bd9Sstevel@tonic-gate c = '/'; 9447c478bd9Sstevel@tonic-gate break; 9457c478bd9Sstevel@tonic-gate 9467c478bd9Sstevel@tonic-gate case 'n': 9477c478bd9Sstevel@tonic-gate c = '\n'; 9487c478bd9Sstevel@tonic-gate break; 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate case 'b': 9517c478bd9Sstevel@tonic-gate c = '\b'; 9527c478bd9Sstevel@tonic-gate break; 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate case 't': 9557c478bd9Sstevel@tonic-gate c = '\t'; 9567c478bd9Sstevel@tonic-gate break; 9577c478bd9Sstevel@tonic-gate 9587c478bd9Sstevel@tonic-gate case 'r': 9597c478bd9Sstevel@tonic-gate c = '\r'; 9607c478bd9Sstevel@tonic-gate break; 9617c478bd9Sstevel@tonic-gate 9627c478bd9Sstevel@tonic-gate case 'f': 9637c478bd9Sstevel@tonic-gate c = '\f'; 9647c478bd9Sstevel@tonic-gate break; 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate case 'v': 9677c478bd9Sstevel@tonic-gate c = '\v'; 9687c478bd9Sstevel@tonic-gate break; 9697c478bd9Sstevel@tonic-gate 9707c478bd9Sstevel@tonic-gate case 'a': 9717c478bd9Sstevel@tonic-gate c = (char)0x07; 9727c478bd9Sstevel@tonic-gate break; 9737c478bd9Sstevel@tonic-gate 9747c478bd9Sstevel@tonic-gate case 'x': 9757c478bd9Sstevel@tonic-gate n = 0; 9767c478bd9Sstevel@tonic-gate while (iswxdigit(c = lexgetc())) { 9777c478bd9Sstevel@tonic-gate if (iswdigit(c)) 9787c478bd9Sstevel@tonic-gate c -= '0'; 9797c478bd9Sstevel@tonic-gate else if (iswupper(c)) 9807c478bd9Sstevel@tonic-gate c -= 'A'-10; 9817c478bd9Sstevel@tonic-gate else 9827c478bd9Sstevel@tonic-gate c -= 'a'-10; 9837c478bd9Sstevel@tonic-gate n = (n<<4) + c; 9847c478bd9Sstevel@tonic-gate } 9857c478bd9Sstevel@tonic-gate lexungetc(c); 9867c478bd9Sstevel@tonic-gate c = n; 9877c478bd9Sstevel@tonic-gate break; 9887c478bd9Sstevel@tonic-gate 9897c478bd9Sstevel@tonic-gate case '0': 9907c478bd9Sstevel@tonic-gate case '1': 9917c478bd9Sstevel@tonic-gate case '2': 9927c478bd9Sstevel@tonic-gate case '3': 9937c478bd9Sstevel@tonic-gate case '4': 9947c478bd9Sstevel@tonic-gate case '5': 9957c478bd9Sstevel@tonic-gate case '6': 9967c478bd9Sstevel@tonic-gate case '7': 9977c478bd9Sstevel@tonic-gate #if 0 9987c478bd9Sstevel@tonic-gate /* 9997c478bd9Sstevel@tonic-gate * Posix.2 draft 10 disallows the use of back-referencing - it explicitly 10007c478bd9Sstevel@tonic-gate * requires processing of the octal escapes both in strings and 10017c478bd9Sstevel@tonic-gate * regular expressions. The following code is disabled instead of 10027c478bd9Sstevel@tonic-gate * removed as back-referencing may be reintroduced in a future draft 10037c478bd9Sstevel@tonic-gate * of the standard. 10047c478bd9Sstevel@tonic-gate */ 10057c478bd9Sstevel@tonic-gate /* 10067c478bd9Sstevel@tonic-gate * For regular expressions, we disallow 10077c478bd9Sstevel@tonic-gate * \ooo to mean octal character, in favour 10087c478bd9Sstevel@tonic-gate * of back referencing. 10097c478bd9Sstevel@tonic-gate */ 10107c478bd9Sstevel@tonic-gate if (regx) { 10117c478bd9Sstevel@tonic-gate *cp++ = '\\'; 10127c478bd9Sstevel@tonic-gate break; 10137c478bd9Sstevel@tonic-gate } 10147c478bd9Sstevel@tonic-gate #endif 10157c478bd9Sstevel@tonic-gate max = 3; 10167c478bd9Sstevel@tonic-gate n = 0; 10177c478bd9Sstevel@tonic-gate do { 10187c478bd9Sstevel@tonic-gate n = (n<<3) + c-'0'; 10197c478bd9Sstevel@tonic-gate if ((c = lexgetc()) > '7' || c < '0') 10207c478bd9Sstevel@tonic-gate break; 10217c478bd9Sstevel@tonic-gate } while (--max); 10227c478bd9Sstevel@tonic-gate lexungetc(c); 10237c478bd9Sstevel@tonic-gate /* 10247c478bd9Sstevel@tonic-gate * an octal escape sequence must have at least 10257c478bd9Sstevel@tonic-gate * 2 digits after the backslash, otherwise 10267c478bd9Sstevel@tonic-gate * it gets passed straight thru for possible 10277c478bd9Sstevel@tonic-gate * use in backreferencing. 10287c478bd9Sstevel@tonic-gate */ 10297c478bd9Sstevel@tonic-gate if (max == 3) { 10307c478bd9Sstevel@tonic-gate *cp++ = '\\'; 10317c478bd9Sstevel@tonic-gate n += '0'; 10327c478bd9Sstevel@tonic-gate } 10337c478bd9Sstevel@tonic-gate c = n; 10347c478bd9Sstevel@tonic-gate break; 10357c478bd9Sstevel@tonic-gate 10367c478bd9Sstevel@tonic-gate case '\n': 10377c478bd9Sstevel@tonic-gate continue; 10387c478bd9Sstevel@tonic-gate 10397c478bd9Sstevel@tonic-gate default: 10407c478bd9Sstevel@tonic-gate if (c != endc || cmd_line_operand) { 10417c478bd9Sstevel@tonic-gate *cp++ = '\\'; 10427c478bd9Sstevel@tonic-gate if (c == endc) 10437c478bd9Sstevel@tonic-gate lexungetc(c); 10447c478bd9Sstevel@tonic-gate } 10457c478bd9Sstevel@tonic-gate } 10467c478bd9Sstevel@tonic-gate } 10477c478bd9Sstevel@tonic-gate if (c == WEOF) 10487c478bd9Sstevel@tonic-gate awkerr(regx ? eofre : eofstr); 10497c478bd9Sstevel@tonic-gate *cp++ = c; 10507c478bd9Sstevel@tonic-gate } 10517c478bd9Sstevel@tonic-gate *cp = '\0'; 10527c478bd9Sstevel@tonic-gate return (cp - linebuf); 10537c478bd9Sstevel@tonic-gate } 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate /* 10567c478bd9Sstevel@tonic-gate * Build a regular expression NODE. 10577c478bd9Sstevel@tonic-gate * Argument is the string holding the expression. 10587c478bd9Sstevel@tonic-gate */ 10597c478bd9Sstevel@tonic-gate NODE * 10607c478bd9Sstevel@tonic-gate renode(wchar_t *s) 10617c478bd9Sstevel@tonic-gate { 1062cb4658fbSceastha NODE *np; 10637c478bd9Sstevel@tonic-gate int n; 10647c478bd9Sstevel@tonic-gate 10657c478bd9Sstevel@tonic-gate np = emptynode(RE, 0); 10667c478bd9Sstevel@tonic-gate np->n_left = np->n_right = NNULL; 1067*79777a7dSnakanon if ((n = REGWCOMP(&np->n_regexp, s)) != REG_OK) { 10687c478bd9Sstevel@tonic-gate int m; 10697c478bd9Sstevel@tonic-gate char *p; 10707c478bd9Sstevel@tonic-gate 1071*79777a7dSnakanon m = REGWERROR(n, np->n_regexp, NULL, 0); 10727c478bd9Sstevel@tonic-gate p = (char *)emalloc(m); 1073*79777a7dSnakanon REGWERROR(n, np->n_regexp, p, m); 10747c478bd9Sstevel@tonic-gate awkerr("/%S/: %s", s, p); 10757c478bd9Sstevel@tonic-gate } 10767c478bd9Sstevel@tonic-gate return (np); 10777c478bd9Sstevel@tonic-gate } 10787c478bd9Sstevel@tonic-gate /* 10797c478bd9Sstevel@tonic-gate * Get a character for the lexical analyser routine. 10807c478bd9Sstevel@tonic-gate */ 10817c478bd9Sstevel@tonic-gate static wint_t 10827c478bd9Sstevel@tonic-gate lexgetc() 10837c478bd9Sstevel@tonic-gate { 1084cb4658fbSceastha wint_t c; 10857c478bd9Sstevel@tonic-gate static char **files = &progfiles[0]; 10867c478bd9Sstevel@tonic-gate 10877c478bd9Sstevel@tonic-gate if (progfp != FNULL && (c = fgetwc(progfp)) != WEOF) 10887c478bd9Sstevel@tonic-gate ; 10897c478bd9Sstevel@tonic-gate else { 10907c478bd9Sstevel@tonic-gate if (progptr != NULL) { 10917c478bd9Sstevel@tonic-gate if (proglen-- <= 0) 10927c478bd9Sstevel@tonic-gate c = WEOF; 10937c478bd9Sstevel@tonic-gate else 10947c478bd9Sstevel@tonic-gate c = *progptr++; 10957c478bd9Sstevel@tonic-gate } else { 10967c478bd9Sstevel@tonic-gate if (progfp != FNULL) 10977c478bd9Sstevel@tonic-gate if (progfp != stdin) 10987c478bd9Sstevel@tonic-gate (void) fclose(progfp); 10997c478bd9Sstevel@tonic-gate else 11007c478bd9Sstevel@tonic-gate clearerr(progfp); 11017c478bd9Sstevel@tonic-gate progfp = FNULL; 11027c478bd9Sstevel@tonic-gate if (files < progfilep) { 11037c478bd9Sstevel@tonic-gate filename = *files++; 11047c478bd9Sstevel@tonic-gate lineno = 1; 11057c478bd9Sstevel@tonic-gate if (filename[0] == '-' && filename[1] == '\0') 11067c478bd9Sstevel@tonic-gate progfp = stdin; 1107cb4658fbSceastha else if ((progfp = fopen(filename, r)) 1108cb4658fbSceastha == FNULL) { 11097c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 11107c478bd9Sstevel@tonic-gate gettext("script file \"%s\""), filename); 11117c478bd9Sstevel@tonic-gate exit(1); 11127c478bd9Sstevel@tonic-gate } 11137c478bd9Sstevel@tonic-gate c = fgetwc(progfp); 11147c478bd9Sstevel@tonic-gate } 11157c478bd9Sstevel@tonic-gate } 11167c478bd9Sstevel@tonic-gate } 11177c478bd9Sstevel@tonic-gate if (c == '\n') 11187c478bd9Sstevel@tonic-gate ++lineno; 11197c478bd9Sstevel@tonic-gate if (conptr >= &context[NCONTEXT]) 11207c478bd9Sstevel@tonic-gate conptr = &context[0]; 11217c478bd9Sstevel@tonic-gate if (c != WEOF) 11227c478bd9Sstevel@tonic-gate *conptr++ = c; 11237c478bd9Sstevel@tonic-gate return (c); 11247c478bd9Sstevel@tonic-gate } 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate /* 11277c478bd9Sstevel@tonic-gate * Return a character for lexical analyser. 11287c478bd9Sstevel@tonic-gate * Only one returned character is (not enforced) legitimite. 11297c478bd9Sstevel@tonic-gate */ 11307c478bd9Sstevel@tonic-gate static void 11317c478bd9Sstevel@tonic-gate lexungetc(wint_t c) 11327c478bd9Sstevel@tonic-gate { 11337c478bd9Sstevel@tonic-gate if (c == '\n') 11347c478bd9Sstevel@tonic-gate --lineno; 11357c478bd9Sstevel@tonic-gate if (c != WEOF) { 11367c478bd9Sstevel@tonic-gate if (conptr == &context[0]) 11377c478bd9Sstevel@tonic-gate conptr = &context[NCONTEXT]; 11387c478bd9Sstevel@tonic-gate *--conptr = '\0'; 11397c478bd9Sstevel@tonic-gate } 11407c478bd9Sstevel@tonic-gate if (progfp != FNULL) { 11417c478bd9Sstevel@tonic-gate (void) ungetwc(c, progfp); 11427c478bd9Sstevel@tonic-gate return; 11437c478bd9Sstevel@tonic-gate } 11447c478bd9Sstevel@tonic-gate if (c == WEOF) 11457c478bd9Sstevel@tonic-gate return; 11467c478bd9Sstevel@tonic-gate *--progptr = c; 11477c478bd9Sstevel@tonic-gate proglen++; 11487c478bd9Sstevel@tonic-gate } 11497c478bd9Sstevel@tonic-gate 11507c478bd9Sstevel@tonic-gate /* 11517c478bd9Sstevel@tonic-gate * Syntax errors during parsing. 11527c478bd9Sstevel@tonic-gate */ 11537c478bd9Sstevel@tonic-gate void 11547c478bd9Sstevel@tonic-gate yyerror(char *s, ...) 11557c478bd9Sstevel@tonic-gate { 11567c478bd9Sstevel@tonic-gate if (lexlast == FUNC || lexlast == GETLINE || lexlast == KEYWORD) 11577c478bd9Sstevel@tonic-gate if (lexlast == KEYWORD) 11587c478bd9Sstevel@tonic-gate awkerr(gettext("inadmissible use of reserved keyword")); 11597c478bd9Sstevel@tonic-gate else 11607c478bd9Sstevel@tonic-gate awkerr(gettext("attempt to redefine builtin function")); 11617c478bd9Sstevel@tonic-gate awkerr(s); 11627c478bd9Sstevel@tonic-gate } 11637c478bd9Sstevel@tonic-gate 11647c478bd9Sstevel@tonic-gate /* 11657c478bd9Sstevel@tonic-gate * Error routine for all awk errors. 11667c478bd9Sstevel@tonic-gate */ 11677c478bd9Sstevel@tonic-gate /* ARGSUSED */ 11687c478bd9Sstevel@tonic-gate void 11697c478bd9Sstevel@tonic-gate awkerr(char *fmt, ...) 11707c478bd9Sstevel@tonic-gate { 11717c478bd9Sstevel@tonic-gate va_list args; 11727c478bd9Sstevel@tonic-gate 11737c478bd9Sstevel@tonic-gate va_start(args, fmt); 11747c478bd9Sstevel@tonic-gate awkierr(0, fmt, args); 11757c478bd9Sstevel@tonic-gate va_end(args); 11767c478bd9Sstevel@tonic-gate } 11777c478bd9Sstevel@tonic-gate 11787c478bd9Sstevel@tonic-gate /* 11797c478bd9Sstevel@tonic-gate * Error routine like "awkerr" except that it prints out 11807c478bd9Sstevel@tonic-gate * a message that includes an errno-specific indication. 11817c478bd9Sstevel@tonic-gate */ 11827c478bd9Sstevel@tonic-gate /* ARGSUSED */ 11837c478bd9Sstevel@tonic-gate void 11847c478bd9Sstevel@tonic-gate awkperr(char *fmt, ...) 11857c478bd9Sstevel@tonic-gate { 11867c478bd9Sstevel@tonic-gate va_list args; 11877c478bd9Sstevel@tonic-gate 11887c478bd9Sstevel@tonic-gate va_start(args, fmt); 11897c478bd9Sstevel@tonic-gate awkierr(1, fmt, args); 11907c478bd9Sstevel@tonic-gate va_end(args); 11917c478bd9Sstevel@tonic-gate } 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate /* 11947c478bd9Sstevel@tonic-gate * Common internal routine for awkerr, awkperr 11957c478bd9Sstevel@tonic-gate */ 11967c478bd9Sstevel@tonic-gate static void 11977c478bd9Sstevel@tonic-gate awkierr(int perr, char *fmt, va_list ap) 11987c478bd9Sstevel@tonic-gate { 11997c478bd9Sstevel@tonic-gate static char sep1[] = "\n>>>\t"; 12007c478bd9Sstevel@tonic-gate static char sep2[] = "\t<<<"; 12017c478bd9Sstevel@tonic-gate int saveerr = errno; 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: ", _cmdname); 12047c478bd9Sstevel@tonic-gate if (running) { 12057c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("line %u ("), 12067c478bd9Sstevel@tonic-gate curnode == NNULL ? 0 : curnode->n_lineno); 12077c478bd9Sstevel@tonic-gate if (phase == 0) 1208cb4658fbSceastha (void) fprintf(stderr, "NR=%lld): ", 1209cb4658fbSceastha (INT)exprint(varNR)); 12107c478bd9Sstevel@tonic-gate else 12117c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s): ", 12127c478bd9Sstevel@tonic-gate phase == BEGIN ? s_BEGIN : s_END); 12137c478bd9Sstevel@tonic-gate } else if (lineno != 0) { 12147c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("file \"%s\": "), filename); 12157c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("line %u: "), lineno); 12167c478bd9Sstevel@tonic-gate } 12177c478bd9Sstevel@tonic-gate (void) vfprintf(stderr, gettext(fmt), ap); 12187c478bd9Sstevel@tonic-gate if (perr == 1) 12197c478bd9Sstevel@tonic-gate (void) fprintf(stderr, ": %s", strerror(saveerr)); 12207c478bd9Sstevel@tonic-gate if (perr != 2 && !running) { 1221cb4658fbSceastha wchar_t *cp; 1222cb4658fbSceastha int n; 1223cb4658fbSceastha int c; 12247c478bd9Sstevel@tonic-gate 12257c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext(" Context is:%s"), sep1); 12267c478bd9Sstevel@tonic-gate cp = conptr; 12277c478bd9Sstevel@tonic-gate n = NCONTEXT; 12287c478bd9Sstevel@tonic-gate do { 12297c478bd9Sstevel@tonic-gate if (cp >= &context[NCONTEXT]) 12307c478bd9Sstevel@tonic-gate cp = &context[0]; 12317c478bd9Sstevel@tonic-gate if ((c = *cp++) != '\0') 12327c478bd9Sstevel@tonic-gate (void) fputs(c == '\n' ? sep1 : toprint(c), 12337c478bd9Sstevel@tonic-gate stderr); 12347c478bd9Sstevel@tonic-gate } while (--n != 0); 12357c478bd9Sstevel@tonic-gate (void) fputs(sep2, stderr); 12367c478bd9Sstevel@tonic-gate } 12377c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "\n"); 12387c478bd9Sstevel@tonic-gate exit(1); 12397c478bd9Sstevel@tonic-gate } 12407c478bd9Sstevel@tonic-gate 12417c478bd9Sstevel@tonic-gate wchar_t * 12427c478bd9Sstevel@tonic-gate emalloc(unsigned n) 12437c478bd9Sstevel@tonic-gate { 12447c478bd9Sstevel@tonic-gate wchar_t *cp; 12457c478bd9Sstevel@tonic-gate 12467c478bd9Sstevel@tonic-gate if ((cp = malloc(n)) == NULL) 12477c478bd9Sstevel@tonic-gate awkerr(nomem); 1248cb4658fbSceastha return (cp); 12497c478bd9Sstevel@tonic-gate } 12507c478bd9Sstevel@tonic-gate 12517c478bd9Sstevel@tonic-gate wchar_t * 12527c478bd9Sstevel@tonic-gate erealloc(wchar_t *p, unsigned n) 12537c478bd9Sstevel@tonic-gate { 12547c478bd9Sstevel@tonic-gate wchar_t *cp; 12557c478bd9Sstevel@tonic-gate 12567c478bd9Sstevel@tonic-gate if ((cp = realloc(p, n)) == NULL) 12577c478bd9Sstevel@tonic-gate awkerr(nomem); 1258cb4658fbSceastha return (cp); 12597c478bd9Sstevel@tonic-gate } 12607c478bd9Sstevel@tonic-gate 12617c478bd9Sstevel@tonic-gate 12627c478bd9Sstevel@tonic-gate /* 12637c478bd9Sstevel@tonic-gate * usage message for awk 12647c478bd9Sstevel@tonic-gate */ 12657c478bd9Sstevel@tonic-gate static int 12667c478bd9Sstevel@tonic-gate usage() 12677c478bd9Sstevel@tonic-gate { 12687c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext( 12697c478bd9Sstevel@tonic-gate "Usage: awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n" 12707c478bd9Sstevel@tonic-gate " awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n")); 12717c478bd9Sstevel@tonic-gate return (2); 12727c478bd9Sstevel@tonic-gate } 12737c478bd9Sstevel@tonic-gate 12747c478bd9Sstevel@tonic-gate 12757c478bd9Sstevel@tonic-gate static wchar_t * 12767c478bd9Sstevel@tonic-gate mbconvert(char *str) 12777c478bd9Sstevel@tonic-gate { 12787c478bd9Sstevel@tonic-gate static wchar_t *op = 0; 12797c478bd9Sstevel@tonic-gate 12807c478bd9Sstevel@tonic-gate if (op != 0) 12817c478bd9Sstevel@tonic-gate free(op); 12827c478bd9Sstevel@tonic-gate return (op = mbstowcsdup(str)); 12837c478bd9Sstevel@tonic-gate } 12847c478bd9Sstevel@tonic-gate 12857c478bd9Sstevel@tonic-gate char * 12867c478bd9Sstevel@tonic-gate mbunconvert(wchar_t *str) 12877c478bd9Sstevel@tonic-gate { 12887c478bd9Sstevel@tonic-gate static char *op = 0; 12897c478bd9Sstevel@tonic-gate 12907c478bd9Sstevel@tonic-gate if (op != 0) 12917c478bd9Sstevel@tonic-gate free(op); 12927c478bd9Sstevel@tonic-gate return (op = wcstombsdup(str)); 12937c478bd9Sstevel@tonic-gate } 12947c478bd9Sstevel@tonic-gate 12957c478bd9Sstevel@tonic-gate /* 12967c478bd9Sstevel@tonic-gate * Solaris port - following functions are typical MKS functions written 12977c478bd9Sstevel@tonic-gate * to work for Solaris. 12987c478bd9Sstevel@tonic-gate */ 12997c478bd9Sstevel@tonic-gate 13007c478bd9Sstevel@tonic-gate wchar_t * 13017c478bd9Sstevel@tonic-gate mbstowcsdup(s) 13027c478bd9Sstevel@tonic-gate char *s; 13037c478bd9Sstevel@tonic-gate { 13047c478bd9Sstevel@tonic-gate int n; 13057c478bd9Sstevel@tonic-gate wchar_t *w; 13067c478bd9Sstevel@tonic-gate 13077c478bd9Sstevel@tonic-gate n = strlen(s) + 1; 13087c478bd9Sstevel@tonic-gate if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL) 13097c478bd9Sstevel@tonic-gate return (NULL); 13107c478bd9Sstevel@tonic-gate 1311*79777a7dSnakanon if (mbstowcs(w, s, n) == (size_t)-1) 13127c478bd9Sstevel@tonic-gate return (NULL); 13137c478bd9Sstevel@tonic-gate return (w); 13147c478bd9Sstevel@tonic-gate 13157c478bd9Sstevel@tonic-gate } 13167c478bd9Sstevel@tonic-gate 13177c478bd9Sstevel@tonic-gate char * 13187c478bd9Sstevel@tonic-gate wcstombsdup(wchar_t *w) 13197c478bd9Sstevel@tonic-gate { 13207c478bd9Sstevel@tonic-gate int n; 13217c478bd9Sstevel@tonic-gate char *mb; 13227c478bd9Sstevel@tonic-gate 13237c478bd9Sstevel@tonic-gate /* Fetch memory for worst case string length */ 13247c478bd9Sstevel@tonic-gate n = wslen(w) + 1; 13257c478bd9Sstevel@tonic-gate n *= MB_CUR_MAX; 13267c478bd9Sstevel@tonic-gate if ((mb = (char *)malloc(n)) == NULL) { 13277c478bd9Sstevel@tonic-gate return (NULL); 13287c478bd9Sstevel@tonic-gate } 13297c478bd9Sstevel@tonic-gate 13307c478bd9Sstevel@tonic-gate /* Convert the string */ 13317c478bd9Sstevel@tonic-gate if ((n = wcstombs(mb, w, n)) == -1) { 13327c478bd9Sstevel@tonic-gate int saverr = errno; 13337c478bd9Sstevel@tonic-gate 13347c478bd9Sstevel@tonic-gate free(mb); 13357c478bd9Sstevel@tonic-gate errno = saverr; 13367c478bd9Sstevel@tonic-gate return (0); 13377c478bd9Sstevel@tonic-gate } 13387c478bd9Sstevel@tonic-gate 13397c478bd9Sstevel@tonic-gate /* Shrink the string down */ 13407c478bd9Sstevel@tonic-gate if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL) { 13417c478bd9Sstevel@tonic-gate return (NULL); 13427c478bd9Sstevel@tonic-gate } 13437c478bd9Sstevel@tonic-gate return (mb); 13447c478bd9Sstevel@tonic-gate } 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate /* 13477c478bd9Sstevel@tonic-gate * The upe_ctrls[] table contains the printable 'control-sequences' for the 13487c478bd9Sstevel@tonic-gate * character values 0..31 and 127. The first entry is for value 127, thus the 13497c478bd9Sstevel@tonic-gate * entries for the remaining character values are from 1..32. 13507c478bd9Sstevel@tonic-gate */ 13517c478bd9Sstevel@tonic-gate static const char *const upe_ctrls[] = 13527c478bd9Sstevel@tonic-gate { 13537c478bd9Sstevel@tonic-gate "^?", 13547c478bd9Sstevel@tonic-gate "^@", "^A", "^B", "^C", "^D", "^E", "^F", "^G", 13557c478bd9Sstevel@tonic-gate "^H", "^I", "^J", "^K", "^L", "^M", "^N", "^O", 13567c478bd9Sstevel@tonic-gate "^P", "^Q", "^R", "^S", "^T", "^U", "^V", "^W", 13577c478bd9Sstevel@tonic-gate "^X", "^Y", "^Z", "^[", "^\\", "^]", "^^", "^_" 13587c478bd9Sstevel@tonic-gate }; 13597c478bd9Sstevel@tonic-gate 13607c478bd9Sstevel@tonic-gate 13617c478bd9Sstevel@tonic-gate /* 13627c478bd9Sstevel@tonic-gate * Return a printable string corresponding to the given character value. If 13637c478bd9Sstevel@tonic-gate * the character is printable, simply return it as the string. If it is in 13647c478bd9Sstevel@tonic-gate * the range specified by table 5-101 in the UPE, return the corresponding 13657c478bd9Sstevel@tonic-gate * string. Otherwise, return an octal escape sequence. 13667c478bd9Sstevel@tonic-gate */ 13677c478bd9Sstevel@tonic-gate static const char * 13687c478bd9Sstevel@tonic-gate toprint(c) 13697c478bd9Sstevel@tonic-gate wchar_t c; 13707c478bd9Sstevel@tonic-gate { 13717c478bd9Sstevel@tonic-gate int n, len; 13727c478bd9Sstevel@tonic-gate unsigned char *ptr; 13737c478bd9Sstevel@tonic-gate static char mbch[MB_LEN_MAX+1]; 13747c478bd9Sstevel@tonic-gate static char buf[5 * MB_LEN_MAX + 1]; 13757c478bd9Sstevel@tonic-gate 13767c478bd9Sstevel@tonic-gate if ((n = wctomb(mbch, c)) == -1) { 13777c478bd9Sstevel@tonic-gate /* Should never happen */ 13787c478bd9Sstevel@tonic-gate (void) sprintf(buf, "\\%x", c); 13797c478bd9Sstevel@tonic-gate return (buf); 13807c478bd9Sstevel@tonic-gate } 13817c478bd9Sstevel@tonic-gate mbch[n] = '\0'; 13827c478bd9Sstevel@tonic-gate if (iswprint(c)) { 13837c478bd9Sstevel@tonic-gate return (mbch); 13847c478bd9Sstevel@tonic-gate } else if (c == 127) { 13857c478bd9Sstevel@tonic-gate return (upe_ctrls[0]); 13867c478bd9Sstevel@tonic-gate } else if (c < 32) { 13877c478bd9Sstevel@tonic-gate /* Print as in Table 5-101 in the UPE */ 13887c478bd9Sstevel@tonic-gate return (upe_ctrls[c+1]); 13897c478bd9Sstevel@tonic-gate } else { 13907c478bd9Sstevel@tonic-gate /* Print as an octal escape sequence */ 13917c478bd9Sstevel@tonic-gate for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr) 13927c478bd9Sstevel@tonic-gate len += sprintf(buf+len, "\\%03o", *ptr); 13937c478bd9Sstevel@tonic-gate } 13947c478bd9Sstevel@tonic-gate return (buf); 13957c478bd9Sstevel@tonic-gate } 13967c478bd9Sstevel@tonic-gate 13977c478bd9Sstevel@tonic-gate static int 13987c478bd9Sstevel@tonic-gate wcoff(const wchar_t *astring, const int off) 13997c478bd9Sstevel@tonic-gate { 14007c478bd9Sstevel@tonic-gate const wchar_t *s = astring; 14017c478bd9Sstevel@tonic-gate int c = 0; 14027c478bd9Sstevel@tonic-gate char mb[MB_LEN_MAX]; 14037c478bd9Sstevel@tonic-gate 14047c478bd9Sstevel@tonic-gate while (c < off) { 14057c478bd9Sstevel@tonic-gate int n; 14067c478bd9Sstevel@tonic-gate if ((n = wctomb(mb, *s)) == 0) 14077c478bd9Sstevel@tonic-gate break; 14087c478bd9Sstevel@tonic-gate if (n == -1) 14097c478bd9Sstevel@tonic-gate n = 1; 14107c478bd9Sstevel@tonic-gate c += n; 14117c478bd9Sstevel@tonic-gate s++; 14127c478bd9Sstevel@tonic-gate } 14137c478bd9Sstevel@tonic-gate 14147c478bd9Sstevel@tonic-gate return (s - astring); 14157c478bd9Sstevel@tonic-gate } 14167c478bd9Sstevel@tonic-gate 1417*79777a7dSnakanon #define NREGHASH 64 1418*79777a7dSnakanon #define NREGHOLD 1024 /* max number unused entries */ 1419*79777a7dSnakanon 1420*79777a7dSnakanon static int nregunref; 1421*79777a7dSnakanon 1422*79777a7dSnakanon struct reghashq { 1423*79777a7dSnakanon struct qelem hq; 1424*79777a7dSnakanon struct regcache *regcachep; 1425*79777a7dSnakanon }; 1426*79777a7dSnakanon 1427*79777a7dSnakanon struct regcache { 1428*79777a7dSnakanon struct qelem lq; 1429*79777a7dSnakanon wchar_t *pattern; 1430*79777a7dSnakanon regex_t re; 1431*79777a7dSnakanon int refcnt; 1432*79777a7dSnakanon struct reghashq hash; 1433*79777a7dSnakanon }; 1434*79777a7dSnakanon 1435*79777a7dSnakanon static struct qelem reghash[NREGHASH], reglink; 1436*79777a7dSnakanon 1437*79777a7dSnakanon /* 1438*79777a7dSnakanon * Generate a hash value of the given wchar string. 1439*79777a7dSnakanon * The hashing method is similar to what Java does for strings. 1440*79777a7dSnakanon */ 1441*79777a7dSnakanon static uint_t 1442*79777a7dSnakanon regtxthash(const wchar_t *str) 14437c478bd9Sstevel@tonic-gate { 1444*79777a7dSnakanon int k = 0; 1445*79777a7dSnakanon 1446*79777a7dSnakanon while (*str != L'\0') 1447*79777a7dSnakanon k = (31 * k) + *str++; 1448*79777a7dSnakanon 1449*79777a7dSnakanon k += ~(k << 9); 1450*79777a7dSnakanon k ^= (k >> 14); 1451*79777a7dSnakanon k += (k << 4); 1452*79777a7dSnakanon k ^= (k >> 10); 1453*79777a7dSnakanon 1454*79777a7dSnakanon return (k % NREGHASH); 1455*79777a7dSnakanon } 1456*79777a7dSnakanon 1457*79777a7dSnakanon int 1458*79777a7dSnakanon int_regwcomp(REGEXP *r, const wchar_t *pattern) 1459*79777a7dSnakanon { 1460*79777a7dSnakanon regex_t re; 14617c478bd9Sstevel@tonic-gate char *mbpattern; 14627c478bd9Sstevel@tonic-gate int ret; 1463*79777a7dSnakanon uint_t key; 1464*79777a7dSnakanon struct qelem *qp; 1465*79777a7dSnakanon struct regcache *rcp; 1466*79777a7dSnakanon 1467*79777a7dSnakanon key = regtxthash(pattern); 1468*79777a7dSnakanon for (qp = reghash[key].q_forw; qp != NULL; qp = qp->q_forw) { 1469*79777a7dSnakanon rcp = ((struct reghashq *)qp)->regcachep; 1470*79777a7dSnakanon if (*rcp->pattern == *pattern && 1471*79777a7dSnakanon wcscmp(rcp->pattern, pattern) == 0) 1472*79777a7dSnakanon break; 1473*79777a7dSnakanon } 1474*79777a7dSnakanon if (qp != NULL) { 1475*79777a7dSnakanon /* update link. put this one at the beginning */ 1476*79777a7dSnakanon if (rcp != (struct regcache *)reglink.q_forw) { 1477*79777a7dSnakanon remque(&rcp->lq); 1478*79777a7dSnakanon insque(&rcp->lq, ®link); 1479*79777a7dSnakanon } 1480*79777a7dSnakanon if (rcp->refcnt == 0) 1481*79777a7dSnakanon nregunref--; /* no longer unref'ed */ 1482*79777a7dSnakanon rcp->refcnt++; 1483*79777a7dSnakanon *(struct regcache **)r = rcp; 1484*79777a7dSnakanon return (REG_OK); 1485*79777a7dSnakanon } 14867c478bd9Sstevel@tonic-gate 14877c478bd9Sstevel@tonic-gate if ((mbpattern = wcstombsdup((wchar_t *)pattern)) == NULL) 14887c478bd9Sstevel@tonic-gate return (REG_ESPACE); 14897c478bd9Sstevel@tonic-gate 1490*79777a7dSnakanon ret = regcomp(&re, mbpattern, REG_EXTENDED); 14917c478bd9Sstevel@tonic-gate 14927c478bd9Sstevel@tonic-gate free(mbpattern); 14937c478bd9Sstevel@tonic-gate 1494*79777a7dSnakanon if (ret != REG_OK) 1495*79777a7dSnakanon return (ret); 1496*79777a7dSnakanon 1497*79777a7dSnakanon if ((rcp = malloc(sizeof (struct regcache))) == NULL) 1498*79777a7dSnakanon return (REG_ESPACE); 1499*79777a7dSnakanon rcp->re = re; 1500*79777a7dSnakanon if ((rcp->pattern = wsdup(pattern)) == NULL) { 1501*79777a7dSnakanon regfree(&re); 1502*79777a7dSnakanon free(rcp); 1503*79777a7dSnakanon return (REG_ESPACE); 1504*79777a7dSnakanon } 1505*79777a7dSnakanon rcp->refcnt = 1; 1506*79777a7dSnakanon insque(&rcp->lq, ®link); 1507*79777a7dSnakanon insque(&rcp->hash.hq, ®hash[key]); 1508*79777a7dSnakanon rcp->hash.regcachep = rcp; 1509*79777a7dSnakanon 1510*79777a7dSnakanon *(struct regcache **)r = rcp; 15117c478bd9Sstevel@tonic-gate return (ret); 15127c478bd9Sstevel@tonic-gate } 15137c478bd9Sstevel@tonic-gate 1514*79777a7dSnakanon void 1515*79777a7dSnakanon int_regwfree(REGEXP r) 1516*79777a7dSnakanon { 1517*79777a7dSnakanon int cnt; 1518*79777a7dSnakanon struct qelem *qp, *nqp; 1519*79777a7dSnakanon struct regcache *rcp; 1520*79777a7dSnakanon 1521*79777a7dSnakanon rcp = (struct regcache *)r; 1522*79777a7dSnakanon 1523*79777a7dSnakanon if (--rcp->refcnt != 0) 1524*79777a7dSnakanon return; 1525*79777a7dSnakanon 1526*79777a7dSnakanon /* this cache has no reference */ 1527*79777a7dSnakanon if (++nregunref < NREGHOLD) 1528*79777a7dSnakanon return; 1529*79777a7dSnakanon 1530*79777a7dSnakanon /* 1531*79777a7dSnakanon * We've got too much unref'ed regex. Free half of least 1532*79777a7dSnakanon * used regex. 1533*79777a7dSnakanon */ 1534*79777a7dSnakanon cnt = 0; 1535*79777a7dSnakanon for (qp = reglink.q_forw; qp != NULL; qp = nqp) { 1536*79777a7dSnakanon nqp = qp->q_forw; 1537*79777a7dSnakanon rcp = (struct regcache *)qp; 1538*79777a7dSnakanon if (rcp->refcnt != 0) 1539*79777a7dSnakanon continue; 1540*79777a7dSnakanon 1541*79777a7dSnakanon /* free half of them */ 1542*79777a7dSnakanon if (++cnt < (NREGHOLD / 2)) 1543*79777a7dSnakanon continue; 1544*79777a7dSnakanon 1545*79777a7dSnakanon /* detach and free */ 1546*79777a7dSnakanon remque(&rcp->lq); 1547*79777a7dSnakanon remque(&rcp->hash.hq); 1548*79777a7dSnakanon 1549*79777a7dSnakanon /* free up */ 1550*79777a7dSnakanon free(rcp->pattern); 1551*79777a7dSnakanon regfree(&rcp->re); 1552*79777a7dSnakanon free(rcp); 1553*79777a7dSnakanon 1554*79777a7dSnakanon nregunref--; 1555*79777a7dSnakanon } 1556*79777a7dSnakanon } 1557*79777a7dSnakanon 1558*79777a7dSnakanon size_t 1559*79777a7dSnakanon int_regwerror(int errcode, REGEXP r, char *errbuf, size_t bufsiz) 1560*79777a7dSnakanon { 1561*79777a7dSnakanon struct regcache *rcp; 1562*79777a7dSnakanon 1563*79777a7dSnakanon rcp = (struct regcache *)r; 1564*79777a7dSnakanon return (regerror(errcode, &rcp->re, errbuf, bufsiz)); 1565*79777a7dSnakanon } 1566*79777a7dSnakanon 15677c478bd9Sstevel@tonic-gate int 1568*79777a7dSnakanon int_regwexec(REGEXP r, /* compiled RE */ 15697c478bd9Sstevel@tonic-gate const wchar_t *astring, /* subject string */ 15707c478bd9Sstevel@tonic-gate size_t nsub, /* number of subexpressions */ 15717c478bd9Sstevel@tonic-gate int_regwmatch_t *sub, /* subexpression pointers */ 15727c478bd9Sstevel@tonic-gate int flags) 15737c478bd9Sstevel@tonic-gate { 15747c478bd9Sstevel@tonic-gate char *mbs; 15757c478bd9Sstevel@tonic-gate regmatch_t *mbsub = NULL; 1576cb4658fbSceastha int i; 1577*79777a7dSnakanon struct regcache *rcp; 15787c478bd9Sstevel@tonic-gate 15797c478bd9Sstevel@tonic-gate if ((mbs = wcstombsdup((wchar_t *)astring)) == NULL) 15807c478bd9Sstevel@tonic-gate return (REG_ESPACE); 15817c478bd9Sstevel@tonic-gate 15827c478bd9Sstevel@tonic-gate if (nsub > 0 && sub) { 15837c478bd9Sstevel@tonic-gate if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL) 15847c478bd9Sstevel@tonic-gate return (REG_ESPACE); 15857c478bd9Sstevel@tonic-gate } 15867c478bd9Sstevel@tonic-gate 1587*79777a7dSnakanon rcp = (struct regcache *)r; 1588*79777a7dSnakanon 1589*79777a7dSnakanon i = regexec(&rcp->re, mbs, nsub, mbsub, flags); 15907c478bd9Sstevel@tonic-gate 15917c478bd9Sstevel@tonic-gate /* Now, adjust the pointers/counts in sub */ 15927c478bd9Sstevel@tonic-gate if (i == REG_OK && nsub > 0 && mbsub) { 1593cb4658fbSceastha int j, k; 15947c478bd9Sstevel@tonic-gate 15957c478bd9Sstevel@tonic-gate for (j = 0; j < nsub; j++) { 15967c478bd9Sstevel@tonic-gate regmatch_t *ms = &mbsub[j]; 15977c478bd9Sstevel@tonic-gate int_regwmatch_t *ws = &sub[j]; 15987c478bd9Sstevel@tonic-gate 15997c478bd9Sstevel@tonic-gate if ((k = ms->rm_so) >= 0) { 16007c478bd9Sstevel@tonic-gate ws->rm_so = wcoff(astring, k); 16017c478bd9Sstevel@tonic-gate ws->rm_sp = astring + ws->rm_so; 16027c478bd9Sstevel@tonic-gate } 16037c478bd9Sstevel@tonic-gate if ((k = ms->rm_eo) >= 0) { 16047c478bd9Sstevel@tonic-gate ws->rm_eo = wcoff(astring, k); 16057c478bd9Sstevel@tonic-gate ws->rm_ep = astring + ws->rm_eo; 16067c478bd9Sstevel@tonic-gate } 16077c478bd9Sstevel@tonic-gate } 16087c478bd9Sstevel@tonic-gate } 16097c478bd9Sstevel@tonic-gate 16107c478bd9Sstevel@tonic-gate free(mbs); 16117c478bd9Sstevel@tonic-gate if (mbsub) 16127c478bd9Sstevel@tonic-gate free(mbsub); 16137c478bd9Sstevel@tonic-gate return (i); 16147c478bd9Sstevel@tonic-gate } 16157c478bd9Sstevel@tonic-gate 16167c478bd9Sstevel@tonic-gate int 1617*79777a7dSnakanon int_regwdosuba(REGEXP rp, /* compiled RE: Pattern */ 16187c478bd9Sstevel@tonic-gate const wchar_t *rpl, /* replacement string: /rpl/ */ 16197c478bd9Sstevel@tonic-gate const wchar_t *src, /* source string */ 16207c478bd9Sstevel@tonic-gate wchar_t **dstp, /* destination string */ 16217c478bd9Sstevel@tonic-gate int len, /* destination length */ 16227c478bd9Sstevel@tonic-gate int *globp) /* IN: occurence, 0 for all; OUT: substitutions */ 16237c478bd9Sstevel@tonic-gate { 16247c478bd9Sstevel@tonic-gate wchar_t *dst, *odst; 1625cb4658fbSceastha const wchar_t *ip, *xp; 1626cb4658fbSceastha wchar_t *op; 1627cb4658fbSceastha int i; 1628cb4658fbSceastha wchar_t c; 16297c478bd9Sstevel@tonic-gate int glob, iglob = *globp, oglob = 0; 16307c478bd9Sstevel@tonic-gate #define NSUB 10 16317c478bd9Sstevel@tonic-gate int_regwmatch_t rm[NSUB], *rmp; 16327c478bd9Sstevel@tonic-gate int flags; 16337c478bd9Sstevel@tonic-gate wchar_t *end; 16347c478bd9Sstevel@tonic-gate int regerr; 16357c478bd9Sstevel@tonic-gate 16367c478bd9Sstevel@tonic-gate /* handle overflow of dst. we need "i" more bytes */ 16377c478bd9Sstevel@tonic-gate #ifdef OVERFLOW 16387c478bd9Sstevel@tonic-gate #undef OVERFLOW 1639*79777a7dSnakanon #define OVERFLOW(i) { \ 16407c478bd9Sstevel@tonic-gate int pos = op - dst; \ 16417c478bd9Sstevel@tonic-gate dst = (wchar_t *)realloc(odst = dst, \ 16427c478bd9Sstevel@tonic-gate (len += len + i) * sizeof (wchar_t)); \ 16437c478bd9Sstevel@tonic-gate if (dst == NULL) \ 16447c478bd9Sstevel@tonic-gate goto nospace; \ 16457c478bd9Sstevel@tonic-gate op = dst + pos; \ 16467c478bd9Sstevel@tonic-gate end = dst + len; \ 1647*79777a7dSnakanon } 16487c478bd9Sstevel@tonic-gate #endif 16497c478bd9Sstevel@tonic-gate 16507c478bd9Sstevel@tonic-gate *dstp = dst = (wchar_t *)malloc(len * sizeof (wchar_t)); 16517c478bd9Sstevel@tonic-gate if (dst == NULL) 16527c478bd9Sstevel@tonic-gate return (REG_ESPACE); 16537c478bd9Sstevel@tonic-gate 16547c478bd9Sstevel@tonic-gate if (rp == NULL || rpl == NULL || src == NULL || dst == NULL) 16557c478bd9Sstevel@tonic-gate return (REG_EFATAL); 16567c478bd9Sstevel@tonic-gate 16577c478bd9Sstevel@tonic-gate glob = 0; /* match count */ 16587c478bd9Sstevel@tonic-gate ip = src; /* source position */ 16597c478bd9Sstevel@tonic-gate op = dst; /* destination position */ 16607c478bd9Sstevel@tonic-gate end = dst + len; 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate flags = 0; 16637c478bd9Sstevel@tonic-gate while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) { 16647c478bd9Sstevel@tonic-gate /* Copy text preceding match */ 16657c478bd9Sstevel@tonic-gate if (op + (i = rm[0].rm_sp - ip) >= end) 1666*79777a7dSnakanon OVERFLOW(i) 16677c478bd9Sstevel@tonic-gate while (i--) 16687c478bd9Sstevel@tonic-gate *op++ = *ip++; 16697c478bd9Sstevel@tonic-gate 16707c478bd9Sstevel@tonic-gate if (iglob == 0 || ++glob == iglob) { 16717c478bd9Sstevel@tonic-gate oglob++; 16727c478bd9Sstevel@tonic-gate xp = rpl; /* do substitute */ 16737c478bd9Sstevel@tonic-gate } else 16747c478bd9Sstevel@tonic-gate xp = L"&"; /* preserve text */ 16757c478bd9Sstevel@tonic-gate 16767c478bd9Sstevel@tonic-gate /* Perform replacement of matched substing */ 16777c478bd9Sstevel@tonic-gate while ((c = *xp++) != '\0') { 16787c478bd9Sstevel@tonic-gate rmp = NULL; 16797c478bd9Sstevel@tonic-gate if (c == '&') 16807c478bd9Sstevel@tonic-gate rmp = &rm[0]; 16817c478bd9Sstevel@tonic-gate else if (c == '\\') { 16827c478bd9Sstevel@tonic-gate if ('0' <= *xp && *xp <= '9') 16837c478bd9Sstevel@tonic-gate rmp = &rm[*xp++ - '0']; 16847c478bd9Sstevel@tonic-gate else if (*xp != '\0') 16857c478bd9Sstevel@tonic-gate c = *xp++; 16867c478bd9Sstevel@tonic-gate } 16877c478bd9Sstevel@tonic-gate 16887c478bd9Sstevel@tonic-gate if (rmp == NULL) { /* Ordinary character. */ 16897c478bd9Sstevel@tonic-gate *op++ = c; 16907c478bd9Sstevel@tonic-gate if (op >= end) 1691*79777a7dSnakanon OVERFLOW(1) 16927c478bd9Sstevel@tonic-gate } else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) { 16937c478bd9Sstevel@tonic-gate ip = rmp->rm_sp; 16947c478bd9Sstevel@tonic-gate if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end) 1695*79777a7dSnakanon OVERFLOW(i) 16967c478bd9Sstevel@tonic-gate while (i--) 16977c478bd9Sstevel@tonic-gate *op++ = *ip++; 16987c478bd9Sstevel@tonic-gate } 16997c478bd9Sstevel@tonic-gate } 17007c478bd9Sstevel@tonic-gate 17017c478bd9Sstevel@tonic-gate ip = rm[0].rm_ep; 17027c478bd9Sstevel@tonic-gate if (*ip == '\0') /* If at end break */ 17037c478bd9Sstevel@tonic-gate break; 17047c478bd9Sstevel@tonic-gate else if (rm[0].rm_sp == rm[0].rm_ep) { 17057c478bd9Sstevel@tonic-gate /* If empty match copy next char */ 17067c478bd9Sstevel@tonic-gate *op++ = *ip++; 17077c478bd9Sstevel@tonic-gate if (op >= end) 1708*79777a7dSnakanon OVERFLOW(1) 17097c478bd9Sstevel@tonic-gate } 17107c478bd9Sstevel@tonic-gate flags = REG_NOTBOL; 17117c478bd9Sstevel@tonic-gate } 17127c478bd9Sstevel@tonic-gate 17137c478bd9Sstevel@tonic-gate if (regerr != REG_OK && regerr != REG_NOMATCH) 17147c478bd9Sstevel@tonic-gate return (regerr); 17157c478bd9Sstevel@tonic-gate 17167c478bd9Sstevel@tonic-gate /* Copy rest of text */ 17177c478bd9Sstevel@tonic-gate if (op + (i = wcslen(ip)) >= end) 1718*79777a7dSnakanon OVERFLOW(i) 17197c478bd9Sstevel@tonic-gate while (i--) 17207c478bd9Sstevel@tonic-gate *op++ = *ip++; 17217c478bd9Sstevel@tonic-gate *op++ = '\0'; 17227c478bd9Sstevel@tonic-gate 17237c478bd9Sstevel@tonic-gate if ((*dstp = dst = (wchar_t *)realloc(odst = dst, 17247c478bd9Sstevel@tonic-gate sizeof (wchar_t) * (size_t)(op - dst))) == NULL) { 17257c478bd9Sstevel@tonic-gate nospace: 17267c478bd9Sstevel@tonic-gate free(odst); 17277c478bd9Sstevel@tonic-gate return (REG_ESPACE); 17287c478bd9Sstevel@tonic-gate } 17297c478bd9Sstevel@tonic-gate 17307c478bd9Sstevel@tonic-gate *globp = oglob; 17317c478bd9Sstevel@tonic-gate 17327c478bd9Sstevel@tonic-gate return ((oglob == 0) ? REG_NOMATCH : REG_OK); 17337c478bd9Sstevel@tonic-gate } 1734