1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate /* 28*7c478bd9Sstevel@tonic-gate * awk -- mainline, yylex, etc. 29*7c478bd9Sstevel@tonic-gate * 30*7c478bd9Sstevel@tonic-gate * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved. 31*7c478bd9Sstevel@tonic-gate * 32*7c478bd9Sstevel@tonic-gate * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes 33*7c478bd9Sstevel@tonic-gate */ 34*7c478bd9Sstevel@tonic-gate 35*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 36*7c478bd9Sstevel@tonic-gate 37*7c478bd9Sstevel@tonic-gate #include "awk.h" 38*7c478bd9Sstevel@tonic-gate #include "y.tab.h" 39*7c478bd9Sstevel@tonic-gate #include <stdarg.h> 40*7c478bd9Sstevel@tonic-gate #include <unistd.h> 41*7c478bd9Sstevel@tonic-gate #include <locale.h> 42*7c478bd9Sstevel@tonic-gate 43*7c478bd9Sstevel@tonic-gate static char *progfiles[NPFILE]; /* Programmes files for yylex */ 44*7c478bd9Sstevel@tonic-gate static char **progfilep = &progfiles[0]; /* Pointer to last file */ 45*7c478bd9Sstevel@tonic-gate static wchar_t *progptr; /* In-memory programme */ 46*7c478bd9Sstevel@tonic-gate static int proglen; /* Length of progptr */ 47*7c478bd9Sstevel@tonic-gate static wchar_t context[NCONTEXT]; /* Circular buffer of context */ 48*7c478bd9Sstevel@tonic-gate static wchar_t *conptr = &context[0]; /* context ptr */ 49*7c478bd9Sstevel@tonic-gate static FILE *progfp; /* Stdio stream for programme */ 50*7c478bd9Sstevel@tonic-gate static char *filename; 51*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 52*7c478bd9Sstevel@tonic-gate static int dflag; 53*7c478bd9Sstevel@tonic-gate #endif 54*7c478bd9Sstevel@tonic-gate 55*7c478bd9Sstevel@tonic-gate #define AWK_EXEC_MAGIC "<MKS AWKC>" 56*7c478bd9Sstevel@tonic-gate #define LEN_EXEC_MAGIC 10 57*7c478bd9Sstevel@tonic-gate 58*7c478bd9Sstevel@tonic-gate static char unbal[] = "unbalanced E char"; 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate static void awkarginit(int c, char **av); 61*7c478bd9Sstevel@tonic-gate static int lexid(wint_t c); 62*7c478bd9Sstevel@tonic-gate static int lexnumber(wint_t c); 63*7c478bd9Sstevel@tonic-gate static int lexstring(wint_t endc); 64*7c478bd9Sstevel@tonic-gate static int lexregexp(register wint_t endc); 65*7c478bd9Sstevel@tonic-gate 66*7c478bd9Sstevel@tonic-gate static void awkvarinit(void); 67*7c478bd9Sstevel@tonic-gate static wint_t lexgetc(void); 68*7c478bd9Sstevel@tonic-gate static void lexungetc(wint_t c); 69*7c478bd9Sstevel@tonic-gate static size_t lexescape(wint_t endc, int regx, int cmd_line_operand); 70*7c478bd9Sstevel@tonic-gate static void awkierr(int perr, char *fmt, va_list ap); 71*7c478bd9Sstevel@tonic-gate static int usage(void); 72*7c478bd9Sstevel@tonic-gate void strescape(wchar_t *str); 73*7c478bd9Sstevel@tonic-gate static const char *toprint(wint_t); 74*7c478bd9Sstevel@tonic-gate char *_cmdname; 75*7c478bd9Sstevel@tonic-gate static wchar_t *mbconvert(char *str); 76*7c478bd9Sstevel@tonic-gate 77*7c478bd9Sstevel@tonic-gate 78*7c478bd9Sstevel@tonic-gate /* 79*7c478bd9Sstevel@tonic-gate * mainline for awk 80*7c478bd9Sstevel@tonic-gate */ 81*7c478bd9Sstevel@tonic-gate int 82*7c478bd9Sstevel@tonic-gate main(int argc, char *argv[]) 83*7c478bd9Sstevel@tonic-gate { 84*7c478bd9Sstevel@tonic-gate register wchar_t *ap; 85*7c478bd9Sstevel@tonic-gate register char *cmd; 86*7c478bd9Sstevel@tonic-gate 87*7c478bd9Sstevel@tonic-gate cmd = argv[0]; 88*7c478bd9Sstevel@tonic-gate _cmdname = cmd; 89*7c478bd9Sstevel@tonic-gate 90*7c478bd9Sstevel@tonic-gate linebuf = emalloc(NLINE * sizeof(wchar_t)); 91*7c478bd9Sstevel@tonic-gate 92*7c478bd9Sstevel@tonic-gate /*l 93*7c478bd9Sstevel@tonic-gate * At this point only messaging should be internationalized. 94*7c478bd9Sstevel@tonic-gate * numbers are still scanned as in the Posix locale. 95*7c478bd9Sstevel@tonic-gate */ 96*7c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL,""); 97*7c478bd9Sstevel@tonic-gate (void) setlocale(LC_NUMERIC,"C"); 98*7c478bd9Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) 99*7c478bd9Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" 100*7c478bd9Sstevel@tonic-gate #endif 101*7c478bd9Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 102*7c478bd9Sstevel@tonic-gate 103*7c478bd9Sstevel@tonic-gate awkvarinit(); 104*7c478bd9Sstevel@tonic-gate /*running = 1;*/ 105*7c478bd9Sstevel@tonic-gate while (argc>1 && *argv[1]=='-') { 106*7c478bd9Sstevel@tonic-gate void *save_ptr = NULL; 107*7c478bd9Sstevel@tonic-gate ap = mbstowcsdup(&argv[1][1]); 108*7c478bd9Sstevel@tonic-gate if (ap == NULL) 109*7c478bd9Sstevel@tonic-gate break; 110*7c478bd9Sstevel@tonic-gate if (*ap == '\0') { 111*7c478bd9Sstevel@tonic-gate free(ap); 112*7c478bd9Sstevel@tonic-gate break; 113*7c478bd9Sstevel@tonic-gate } 114*7c478bd9Sstevel@tonic-gate save_ptr = (void *) ap; 115*7c478bd9Sstevel@tonic-gate ++argv; 116*7c478bd9Sstevel@tonic-gate --argc; 117*7c478bd9Sstevel@tonic-gate if (*ap=='-' && ap[1]=='\0') 118*7c478bd9Sstevel@tonic-gate break; 119*7c478bd9Sstevel@tonic-gate for ( ; *ap != '\0'; ++ap) { 120*7c478bd9Sstevel@tonic-gate switch (*ap) { 121*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 122*7c478bd9Sstevel@tonic-gate case 'd': 123*7c478bd9Sstevel@tonic-gate dflag = 1; 124*7c478bd9Sstevel@tonic-gate continue; 125*7c478bd9Sstevel@tonic-gate 126*7c478bd9Sstevel@tonic-gate #endif 127*7c478bd9Sstevel@tonic-gate case 'f': 128*7c478bd9Sstevel@tonic-gate if (argc < 2) { 129*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 130*7c478bd9Sstevel@tonic-gate gettext("Missing script file\n")); 131*7c478bd9Sstevel@tonic-gate return (1); 132*7c478bd9Sstevel@tonic-gate } 133*7c478bd9Sstevel@tonic-gate *progfilep++ = argv[1]; 134*7c478bd9Sstevel@tonic-gate --argc; 135*7c478bd9Sstevel@tonic-gate ++argv; 136*7c478bd9Sstevel@tonic-gate continue; 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate case 'F': 139*7c478bd9Sstevel@tonic-gate if (ap[1] == '\0') { 140*7c478bd9Sstevel@tonic-gate if (argc < 2) { 141*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 142*7c478bd9Sstevel@tonic-gate gettext("Missing field separator\n")); 143*7c478bd9Sstevel@tonic-gate return (1); 144*7c478bd9Sstevel@tonic-gate } 145*7c478bd9Sstevel@tonic-gate ap = mbstowcsdup(argv[1]); 146*7c478bd9Sstevel@tonic-gate --argc; 147*7c478bd9Sstevel@tonic-gate ++argv; 148*7c478bd9Sstevel@tonic-gate } else 149*7c478bd9Sstevel@tonic-gate ++ap; 150*7c478bd9Sstevel@tonic-gate strescape(ap); 151*7c478bd9Sstevel@tonic-gate strassign(varFS, linebuf, FALLOC, 152*7c478bd9Sstevel@tonic-gate wcslen(linebuf)); 153*7c478bd9Sstevel@tonic-gate break; 154*7c478bd9Sstevel@tonic-gate 155*7c478bd9Sstevel@tonic-gate case 'v': { 156*7c478bd9Sstevel@tonic-gate register wchar_t *vp; 157*7c478bd9Sstevel@tonic-gate register wchar_t *arg; 158*7c478bd9Sstevel@tonic-gate 159*7c478bd9Sstevel@tonic-gate if (argc < 2) { 160*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 161*7c478bd9Sstevel@tonic-gate gettext("Missing variable assignment\n")); 162*7c478bd9Sstevel@tonic-gate return (1); 163*7c478bd9Sstevel@tonic-gate } 164*7c478bd9Sstevel@tonic-gate arg = mbconvert(argv[1]); 165*7c478bd9Sstevel@tonic-gate if ((vp = wcschr(arg, '=')) != NULL) { 166*7c478bd9Sstevel@tonic-gate *vp = '\0'; 167*7c478bd9Sstevel@tonic-gate strescape(vp+1); 168*7c478bd9Sstevel@tonic-gate strassign(vlook(arg), linebuf, 169*7c478bd9Sstevel@tonic-gate FALLOC|FSENSE, wcslen(linebuf)); 170*7c478bd9Sstevel@tonic-gate *vp = '='; 171*7c478bd9Sstevel@tonic-gate } 172*7c478bd9Sstevel@tonic-gate --argc; 173*7c478bd9Sstevel@tonic-gate ++argv; 174*7c478bd9Sstevel@tonic-gate continue; 175*7c478bd9Sstevel@tonic-gate } 176*7c478bd9Sstevel@tonic-gate 177*7c478bd9Sstevel@tonic-gate default: 178*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 179*7c478bd9Sstevel@tonic-gate gettext("Unknown option \"-%S\"\n"), ap); 180*7c478bd9Sstevel@tonic-gate return (usage()); 181*7c478bd9Sstevel@tonic-gate } 182*7c478bd9Sstevel@tonic-gate break; 183*7c478bd9Sstevel@tonic-gate } 184*7c478bd9Sstevel@tonic-gate if (save_ptr) 185*7c478bd9Sstevel@tonic-gate free(save_ptr); 186*7c478bd9Sstevel@tonic-gate } 187*7c478bd9Sstevel@tonic-gate if (progfilep == &progfiles[0]) { 188*7c478bd9Sstevel@tonic-gate if (argc < 2) 189*7c478bd9Sstevel@tonic-gate return (usage()); 190*7c478bd9Sstevel@tonic-gate filename = "[command line]"; /* BUG: NEEDS TRANSLATION */ 191*7c478bd9Sstevel@tonic-gate progptr = mbstowcsdup(argv[1]); 192*7c478bd9Sstevel@tonic-gate proglen = wcslen(progptr); 193*7c478bd9Sstevel@tonic-gate --argc; 194*7c478bd9Sstevel@tonic-gate ++argv; 195*7c478bd9Sstevel@tonic-gate } 196*7c478bd9Sstevel@tonic-gate 197*7c478bd9Sstevel@tonic-gate argv[0] = cmd; 198*7c478bd9Sstevel@tonic-gate 199*7c478bd9Sstevel@tonic-gate awkarginit(argc, argv); 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate /*running = 0;*/ 202*7c478bd9Sstevel@tonic-gate (void)yyparse(); 203*7c478bd9Sstevel@tonic-gate 204*7c478bd9Sstevel@tonic-gate lineno = 0; 205*7c478bd9Sstevel@tonic-gate /* 206*7c478bd9Sstevel@tonic-gate * Ok, done parsing, so now activate the rest of the nls stuff, set 207*7c478bd9Sstevel@tonic-gate * the radix character. 208*7c478bd9Sstevel@tonic-gate */ 209*7c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL,""); 210*7c478bd9Sstevel@tonic-gate radixpoint = *localeconv()->decimal_point; 211*7c478bd9Sstevel@tonic-gate awk(); 212*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 213*7c478bd9Sstevel@tonic-gate return (0); 214*7c478bd9Sstevel@tonic-gate } 215*7c478bd9Sstevel@tonic-gate 216*7c478bd9Sstevel@tonic-gate /* 217*7c478bd9Sstevel@tonic-gate * Do initial setup of buffers, etc. 218*7c478bd9Sstevel@tonic-gate * This must be called before most processing 219*7c478bd9Sstevel@tonic-gate * and especially before lexical analysis. 220*7c478bd9Sstevel@tonic-gate * Variables initialised here will be overruled by command 221*7c478bd9Sstevel@tonic-gate * line parameter initialisation. 222*7c478bd9Sstevel@tonic-gate */ 223*7c478bd9Sstevel@tonic-gate static void 224*7c478bd9Sstevel@tonic-gate awkvarinit() 225*7c478bd9Sstevel@tonic-gate { 226*7c478bd9Sstevel@tonic-gate register NODE *np; 227*7c478bd9Sstevel@tonic-gate 228*7c478bd9Sstevel@tonic-gate (void) setvbuf(stderr, NULL, _IONBF, 0); 229*7c478bd9Sstevel@tonic-gate 230*7c478bd9Sstevel@tonic-gate if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) { 231*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 232*7c478bd9Sstevel@tonic-gate gettext("not enough available file descriptors")); 233*7c478bd9Sstevel@tonic-gate exit(1); 234*7c478bd9Sstevel@tonic-gate } 235*7c478bd9Sstevel@tonic-gate ofiles = (OFILE *) emalloc(sizeof(OFILE)*NIOSTREAM); 236*7c478bd9Sstevel@tonic-gate #ifdef A_ZERO_POINTERS 237*7c478bd9Sstevel@tonic-gate (void) memset((wchar_t *) ofiles, 0, sizeof(OFILE) * NIOSTREAM); 238*7c478bd9Sstevel@tonic-gate #else 239*7c478bd9Sstevel@tonic-gate { 240*7c478bd9Sstevel@tonic-gate /* initialize file descriptor table */ 241*7c478bd9Sstevel@tonic-gate OFILE *fp; 242*7c478bd9Sstevel@tonic-gate for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) { 243*7c478bd9Sstevel@tonic-gate fp->f_fp = FNULL; 244*7c478bd9Sstevel@tonic-gate fp->f_mode = 0; 245*7c478bd9Sstevel@tonic-gate fp->f_name = (char *)0; 246*7c478bd9Sstevel@tonic-gate } 247*7c478bd9Sstevel@tonic-gate } 248*7c478bd9Sstevel@tonic-gate #endif 249*7c478bd9Sstevel@tonic-gate constant = intnode((INT)0); 250*7c478bd9Sstevel@tonic-gate 251*7c478bd9Sstevel@tonic-gate const0 = intnode((INT)0); 252*7c478bd9Sstevel@tonic-gate const1 = intnode((INT)1); 253*7c478bd9Sstevel@tonic-gate constundef = emptynode(CONSTANT, 0); 254*7c478bd9Sstevel@tonic-gate constundef->n_flags = FSTRING|FVINT; 255*7c478bd9Sstevel@tonic-gate constundef->n_string = _null; 256*7c478bd9Sstevel@tonic-gate constundef->n_strlen = 0; 257*7c478bd9Sstevel@tonic-gate inc_oper = emptynode(ADD, 0); 258*7c478bd9Sstevel@tonic-gate inc_oper->n_right = const1; 259*7c478bd9Sstevel@tonic-gate asn_oper = emptynode(ADD, 0); 260*7c478bd9Sstevel@tonic-gate field0 = node(FIELD, const0, NNULL); 261*7c478bd9Sstevel@tonic-gate 262*7c478bd9Sstevel@tonic-gate { 263*7c478bd9Sstevel@tonic-gate register RESFUNC near*rp; 264*7c478bd9Sstevel@tonic-gate 265*7c478bd9Sstevel@tonic-gate for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) { 266*7c478bd9Sstevel@tonic-gate np = finstall(rp->rf_name, rp->rf_func, rp->rf_type); 267*7c478bd9Sstevel@tonic-gate } 268*7c478bd9Sstevel@tonic-gate } 269*7c478bd9Sstevel@tonic-gate { 270*7c478bd9Sstevel@tonic-gate register RESERVED near*rp; 271*7c478bd9Sstevel@tonic-gate 272*7c478bd9Sstevel@tonic-gate for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) { 273*7c478bd9Sstevel@tonic-gate switch (rp->r_type) { 274*7c478bd9Sstevel@tonic-gate case SVAR: 275*7c478bd9Sstevel@tonic-gate case VAR: 276*7c478bd9Sstevel@tonic-gate running = 1; 277*7c478bd9Sstevel@tonic-gate np = vlook(rp->r_name); 278*7c478bd9Sstevel@tonic-gate if (rp->r_type == SVAR) 279*7c478bd9Sstevel@tonic-gate np->n_flags |= FSPECIAL; 280*7c478bd9Sstevel@tonic-gate if (rp->r_svalue != NULL) 281*7c478bd9Sstevel@tonic-gate strassign(np, rp->r_svalue, FSTATIC, 282*7c478bd9Sstevel@tonic-gate (size_t)rp->r_ivalue); 283*7c478bd9Sstevel@tonic-gate else { 284*7c478bd9Sstevel@tonic-gate constant->n_int = rp->r_ivalue; 285*7c478bd9Sstevel@tonic-gate (void)assign(np, constant); 286*7c478bd9Sstevel@tonic-gate } 287*7c478bd9Sstevel@tonic-gate running = 0; 288*7c478bd9Sstevel@tonic-gate break; 289*7c478bd9Sstevel@tonic-gate 290*7c478bd9Sstevel@tonic-gate case KEYWORD: 291*7c478bd9Sstevel@tonic-gate kinstall(rp->r_name, (int)rp->r_ivalue); 292*7c478bd9Sstevel@tonic-gate break; 293*7c478bd9Sstevel@tonic-gate } 294*7c478bd9Sstevel@tonic-gate } 295*7c478bd9Sstevel@tonic-gate } 296*7c478bd9Sstevel@tonic-gate 297*7c478bd9Sstevel@tonic-gate varNR = vlook(s_NR); 298*7c478bd9Sstevel@tonic-gate varFNR = vlook(s_FNR); 299*7c478bd9Sstevel@tonic-gate varNF = vlook(s_NF); 300*7c478bd9Sstevel@tonic-gate varOFMT = vlook(s_OFMT); 301*7c478bd9Sstevel@tonic-gate varCONVFMT = vlook(s_CONVFMT); 302*7c478bd9Sstevel@tonic-gate varOFS = vlook(s_OFS); 303*7c478bd9Sstevel@tonic-gate varORS = vlook(s_ORS); 304*7c478bd9Sstevel@tonic-gate varRS = vlook(s_RS); 305*7c478bd9Sstevel@tonic-gate varFS = vlook(s_FS); 306*7c478bd9Sstevel@tonic-gate varARGC = vlook(s_ARGC); 307*7c478bd9Sstevel@tonic-gate varSUBSEP = vlook(s_SUBSEP); 308*7c478bd9Sstevel@tonic-gate varENVIRON = vlook(s_ENVIRON); 309*7c478bd9Sstevel@tonic-gate varFILENAME = vlook(s_FILENAME); 310*7c478bd9Sstevel@tonic-gate varSYMTAB = vlook(s_SYMTAB); 311*7c478bd9Sstevel@tonic-gate incNR = node(ASG, varNR, node(ADD, varNR, const1)); 312*7c478bd9Sstevel@tonic-gate incFNR = node(ASG, varFNR, node(ADD, varFNR, const1)); 313*7c478bd9Sstevel@tonic-gate clrFNR = node(ASG, varFNR, const0); 314*7c478bd9Sstevel@tonic-gate } 315*7c478bd9Sstevel@tonic-gate 316*7c478bd9Sstevel@tonic-gate /* 317*7c478bd9Sstevel@tonic-gate * Initialise awk ARGC, ARGV variables. 318*7c478bd9Sstevel@tonic-gate */ 319*7c478bd9Sstevel@tonic-gate static void 320*7c478bd9Sstevel@tonic-gate awkarginit(int ac, char **av) 321*7c478bd9Sstevel@tonic-gate { 322*7c478bd9Sstevel@tonic-gate register int i; 323*7c478bd9Sstevel@tonic-gate register wchar_t *cp; 324*7c478bd9Sstevel@tonic-gate 325*7c478bd9Sstevel@tonic-gate ARGVsubi = node(INDEX, vlook(s_ARGV), constant); 326*7c478bd9Sstevel@tonic-gate running = 1; 327*7c478bd9Sstevel@tonic-gate constant->n_int = ac; 328*7c478bd9Sstevel@tonic-gate (void)assign(varARGC, constant); 329*7c478bd9Sstevel@tonic-gate for (i = 0; i < ac; ++i) { 330*7c478bd9Sstevel@tonic-gate cp = mbstowcsdup(av[i]); 331*7c478bd9Sstevel@tonic-gate constant->n_int = i; 332*7c478bd9Sstevel@tonic-gate strassign(exprreduce(ARGVsubi), cp, 333*7c478bd9Sstevel@tonic-gate FSTATIC|FSENSE, wcslen(cp)); 334*7c478bd9Sstevel@tonic-gate } 335*7c478bd9Sstevel@tonic-gate running = 0; 336*7c478bd9Sstevel@tonic-gate } 337*7c478bd9Sstevel@tonic-gate 338*7c478bd9Sstevel@tonic-gate /* 339*7c478bd9Sstevel@tonic-gate * Clean up when done parsing a function. 340*7c478bd9Sstevel@tonic-gate * All formal parameters, because of a deal (funparm) in 341*7c478bd9Sstevel@tonic-gate * yylex, get put into the symbol table in front of any 342*7c478bd9Sstevel@tonic-gate * global variable of the same name. When the entire 343*7c478bd9Sstevel@tonic-gate * function is parsed, remove these formal dummy nodes 344*7c478bd9Sstevel@tonic-gate * from the symbol table but retain the nodes because 345*7c478bd9Sstevel@tonic-gate * the generated tree points at them. 346*7c478bd9Sstevel@tonic-gate */ 347*7c478bd9Sstevel@tonic-gate void 348*7c478bd9Sstevel@tonic-gate uexit(NODE *np) 349*7c478bd9Sstevel@tonic-gate { 350*7c478bd9Sstevel@tonic-gate register NODE *formal; 351*7c478bd9Sstevel@tonic-gate 352*7c478bd9Sstevel@tonic-gate while ((formal = getlist(&np)) != NNULL) 353*7c478bd9Sstevel@tonic-gate delsymtab(formal, 0); 354*7c478bd9Sstevel@tonic-gate } 355*7c478bd9Sstevel@tonic-gate 356*7c478bd9Sstevel@tonic-gate /* 357*7c478bd9Sstevel@tonic-gate * The lexical analyzer. 358*7c478bd9Sstevel@tonic-gate */ 359*7c478bd9Sstevel@tonic-gate int 360*7c478bd9Sstevel@tonic-gate yylex() 361*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 362*7c478bd9Sstevel@tonic-gate { 363*7c478bd9Sstevel@tonic-gate register int l; 364*7c478bd9Sstevel@tonic-gate 365*7c478bd9Sstevel@tonic-gate l = yyhex(); 366*7c478bd9Sstevel@tonic-gate if (dflag) 367*7c478bd9Sstevel@tonic-gate (void) printf("%d\n", l); 368*7c478bd9Sstevel@tonic-gate return (l); 369*7c478bd9Sstevel@tonic-gate } 370*7c478bd9Sstevel@tonic-gate yyhex() 371*7c478bd9Sstevel@tonic-gate #endif 372*7c478bd9Sstevel@tonic-gate { 373*7c478bd9Sstevel@tonic-gate register wint_t c, c1; 374*7c478bd9Sstevel@tonic-gate int i; 375*7c478bd9Sstevel@tonic-gate static int savetoken = 0; 376*7c478bd9Sstevel@tonic-gate static wasfield; 377*7c478bd9Sstevel@tonic-gate static int isfuncdef; 378*7c478bd9Sstevel@tonic-gate static int nbrace, nparen, nbracket; 379*7c478bd9Sstevel@tonic-gate static struct ctosymstruct { 380*7c478bd9Sstevel@tonic-gate wint_t c, sym; 381*7c478bd9Sstevel@tonic-gate } ctosym[] = { 382*7c478bd9Sstevel@tonic-gate { '|', BAR }, { '^', CARAT }, 383*7c478bd9Sstevel@tonic-gate { '~', TILDE }, { '<', LANGLE }, 384*7c478bd9Sstevel@tonic-gate { '>', RANGLE }, { '+', PLUSC }, 385*7c478bd9Sstevel@tonic-gate { '-', HYPHEN }, { '*', STAR }, 386*7c478bd9Sstevel@tonic-gate { '/', SLASH }, { '%', PERCENT }, 387*7c478bd9Sstevel@tonic-gate { '!', EXCLAMATION }, { '$', DOLLAR }, 388*7c478bd9Sstevel@tonic-gate { '[', LSQUARE }, { ']', RSQUARE }, 389*7c478bd9Sstevel@tonic-gate { '(', LPAREN }, { ')', RPAREN }, 390*7c478bd9Sstevel@tonic-gate { ';', SEMI }, { '{', LBRACE }, 391*7c478bd9Sstevel@tonic-gate { '}', RBRACE }, { 0, 0 } 392*7c478bd9Sstevel@tonic-gate }; 393*7c478bd9Sstevel@tonic-gate 394*7c478bd9Sstevel@tonic-gate if (savetoken) { 395*7c478bd9Sstevel@tonic-gate c = savetoken; 396*7c478bd9Sstevel@tonic-gate savetoken = 0; 397*7c478bd9Sstevel@tonic-gate } else if (redelim != '\0') { 398*7c478bd9Sstevel@tonic-gate c = redelim; 399*7c478bd9Sstevel@tonic-gate redelim = 0; 400*7c478bd9Sstevel@tonic-gate catterm = 0; 401*7c478bd9Sstevel@tonic-gate savetoken = c; 402*7c478bd9Sstevel@tonic-gate return (lexlast = lexregexp(c)); 403*7c478bd9Sstevel@tonic-gate } else while ((c = lexgetc()) != WEOF) { 404*7c478bd9Sstevel@tonic-gate if (iswalpha(c) || c=='_') { 405*7c478bd9Sstevel@tonic-gate c = lexid(c); 406*7c478bd9Sstevel@tonic-gate } else if (iswdigit(c) || c=='.') { 407*7c478bd9Sstevel@tonic-gate c = lexnumber(c); 408*7c478bd9Sstevel@tonic-gate } else if (isWblank(c)) { 409*7c478bd9Sstevel@tonic-gate continue; 410*7c478bd9Sstevel@tonic-gate } else switch (c) { 411*7c478bd9Sstevel@tonic-gate #if DOS || OS2 412*7c478bd9Sstevel@tonic-gate case 032: /* ^Z */ 413*7c478bd9Sstevel@tonic-gate continue; 414*7c478bd9Sstevel@tonic-gate #endif 415*7c478bd9Sstevel@tonic-gate 416*7c478bd9Sstevel@tonic-gate case '"': 417*7c478bd9Sstevel@tonic-gate c = lexstring(c); 418*7c478bd9Sstevel@tonic-gate break; 419*7c478bd9Sstevel@tonic-gate 420*7c478bd9Sstevel@tonic-gate case '#': 421*7c478bd9Sstevel@tonic-gate while ((c = lexgetc())!='\n' && c!=WEOF) 422*7c478bd9Sstevel@tonic-gate ; 423*7c478bd9Sstevel@tonic-gate lexungetc(c); 424*7c478bd9Sstevel@tonic-gate continue; 425*7c478bd9Sstevel@tonic-gate 426*7c478bd9Sstevel@tonic-gate case '+': 427*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '+') 428*7c478bd9Sstevel@tonic-gate c = INC; 429*7c478bd9Sstevel@tonic-gate else if (c1 == '=') 430*7c478bd9Sstevel@tonic-gate c = AADD; 431*7c478bd9Sstevel@tonic-gate else 432*7c478bd9Sstevel@tonic-gate lexungetc(c1); 433*7c478bd9Sstevel@tonic-gate break; 434*7c478bd9Sstevel@tonic-gate 435*7c478bd9Sstevel@tonic-gate case '-': 436*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '-') 437*7c478bd9Sstevel@tonic-gate c = DEC; 438*7c478bd9Sstevel@tonic-gate else if (c1 == '=') 439*7c478bd9Sstevel@tonic-gate c = ASUB; 440*7c478bd9Sstevel@tonic-gate else 441*7c478bd9Sstevel@tonic-gate lexungetc(c1); 442*7c478bd9Sstevel@tonic-gate break; 443*7c478bd9Sstevel@tonic-gate 444*7c478bd9Sstevel@tonic-gate case '*': 445*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 446*7c478bd9Sstevel@tonic-gate c = AMUL; 447*7c478bd9Sstevel@tonic-gate else if (c1 == '*') { 448*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 449*7c478bd9Sstevel@tonic-gate c = AEXP; 450*7c478bd9Sstevel@tonic-gate else { 451*7c478bd9Sstevel@tonic-gate c = EXP; 452*7c478bd9Sstevel@tonic-gate lexungetc(c1); 453*7c478bd9Sstevel@tonic-gate } 454*7c478bd9Sstevel@tonic-gate } else 455*7c478bd9Sstevel@tonic-gate lexungetc(c1); 456*7c478bd9Sstevel@tonic-gate break; 457*7c478bd9Sstevel@tonic-gate 458*7c478bd9Sstevel@tonic-gate case '^': 459*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') { 460*7c478bd9Sstevel@tonic-gate c = AEXP; 461*7c478bd9Sstevel@tonic-gate } else { 462*7c478bd9Sstevel@tonic-gate c = EXP; 463*7c478bd9Sstevel@tonic-gate lexungetc(c1); 464*7c478bd9Sstevel@tonic-gate } 465*7c478bd9Sstevel@tonic-gate break; 466*7c478bd9Sstevel@tonic-gate 467*7c478bd9Sstevel@tonic-gate case '/': 468*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=' 469*7c478bd9Sstevel@tonic-gate && lexlast!=RE && lexlast!=NRE 470*7c478bd9Sstevel@tonic-gate && lexlast!=';' && lexlast!='\n' 471*7c478bd9Sstevel@tonic-gate && lexlast!=',' && lexlast!='(') 472*7c478bd9Sstevel@tonic-gate c = ADIV; 473*7c478bd9Sstevel@tonic-gate else 474*7c478bd9Sstevel@tonic-gate lexungetc(c1); 475*7c478bd9Sstevel@tonic-gate break; 476*7c478bd9Sstevel@tonic-gate 477*7c478bd9Sstevel@tonic-gate case '%': 478*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 479*7c478bd9Sstevel@tonic-gate c = AREM; 480*7c478bd9Sstevel@tonic-gate else 481*7c478bd9Sstevel@tonic-gate lexungetc(c1); 482*7c478bd9Sstevel@tonic-gate break; 483*7c478bd9Sstevel@tonic-gate 484*7c478bd9Sstevel@tonic-gate case '&': 485*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '&') 486*7c478bd9Sstevel@tonic-gate c = AND; 487*7c478bd9Sstevel@tonic-gate else 488*7c478bd9Sstevel@tonic-gate lexungetc(c1); 489*7c478bd9Sstevel@tonic-gate break; 490*7c478bd9Sstevel@tonic-gate 491*7c478bd9Sstevel@tonic-gate case '|': 492*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '|') 493*7c478bd9Sstevel@tonic-gate c = OR; 494*7c478bd9Sstevel@tonic-gate else { 495*7c478bd9Sstevel@tonic-gate lexungetc(c1); 496*7c478bd9Sstevel@tonic-gate if (inprint) 497*7c478bd9Sstevel@tonic-gate c = PIPE; 498*7c478bd9Sstevel@tonic-gate } 499*7c478bd9Sstevel@tonic-gate break; 500*7c478bd9Sstevel@tonic-gate 501*7c478bd9Sstevel@tonic-gate case '>': 502*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 503*7c478bd9Sstevel@tonic-gate c = GE; 504*7c478bd9Sstevel@tonic-gate else if (c1 == '>') 505*7c478bd9Sstevel@tonic-gate c = APPEND; 506*7c478bd9Sstevel@tonic-gate else { 507*7c478bd9Sstevel@tonic-gate lexungetc(c1); 508*7c478bd9Sstevel@tonic-gate if (nparen==0 && inprint) 509*7c478bd9Sstevel@tonic-gate c = WRITE; 510*7c478bd9Sstevel@tonic-gate } 511*7c478bd9Sstevel@tonic-gate break; 512*7c478bd9Sstevel@tonic-gate 513*7c478bd9Sstevel@tonic-gate case '<': 514*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 515*7c478bd9Sstevel@tonic-gate c = LE; 516*7c478bd9Sstevel@tonic-gate else 517*7c478bd9Sstevel@tonic-gate lexungetc(c1); 518*7c478bd9Sstevel@tonic-gate break; 519*7c478bd9Sstevel@tonic-gate 520*7c478bd9Sstevel@tonic-gate case '!': 521*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 522*7c478bd9Sstevel@tonic-gate c = NE; 523*7c478bd9Sstevel@tonic-gate else if (c1 == '~') 524*7c478bd9Sstevel@tonic-gate c = NRE; 525*7c478bd9Sstevel@tonic-gate else 526*7c478bd9Sstevel@tonic-gate lexungetc(c1); 527*7c478bd9Sstevel@tonic-gate break; 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate case '=': 530*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '=') 531*7c478bd9Sstevel@tonic-gate c = EQ; 532*7c478bd9Sstevel@tonic-gate else { 533*7c478bd9Sstevel@tonic-gate lexungetc(c1); 534*7c478bd9Sstevel@tonic-gate c = ASG; 535*7c478bd9Sstevel@tonic-gate } 536*7c478bd9Sstevel@tonic-gate break; 537*7c478bd9Sstevel@tonic-gate 538*7c478bd9Sstevel@tonic-gate case '\n': 539*7c478bd9Sstevel@tonic-gate switch (lexlast) { 540*7c478bd9Sstevel@tonic-gate case ')': 541*7c478bd9Sstevel@tonic-gate if (catterm || inprint) { 542*7c478bd9Sstevel@tonic-gate c = ';'; 543*7c478bd9Sstevel@tonic-gate break; 544*7c478bd9Sstevel@tonic-gate } 545*7c478bd9Sstevel@tonic-gate case AND: 546*7c478bd9Sstevel@tonic-gate case OR: 547*7c478bd9Sstevel@tonic-gate case COMMA: 548*7c478bd9Sstevel@tonic-gate case '{': 549*7c478bd9Sstevel@tonic-gate case ELSE: 550*7c478bd9Sstevel@tonic-gate case ';': 551*7c478bd9Sstevel@tonic-gate case DO: 552*7c478bd9Sstevel@tonic-gate continue; 553*7c478bd9Sstevel@tonic-gate 554*7c478bd9Sstevel@tonic-gate case '}': 555*7c478bd9Sstevel@tonic-gate if (nbrace != 0) 556*7c478bd9Sstevel@tonic-gate continue; 557*7c478bd9Sstevel@tonic-gate 558*7c478bd9Sstevel@tonic-gate default: 559*7c478bd9Sstevel@tonic-gate c = ';'; 560*7c478bd9Sstevel@tonic-gate break; 561*7c478bd9Sstevel@tonic-gate } 562*7c478bd9Sstevel@tonic-gate break; 563*7c478bd9Sstevel@tonic-gate 564*7c478bd9Sstevel@tonic-gate case ELSE: 565*7c478bd9Sstevel@tonic-gate if (lexlast != ';') { 566*7c478bd9Sstevel@tonic-gate savetoken = ELSE; 567*7c478bd9Sstevel@tonic-gate c = ';'; 568*7c478bd9Sstevel@tonic-gate } 569*7c478bd9Sstevel@tonic-gate break; 570*7c478bd9Sstevel@tonic-gate 571*7c478bd9Sstevel@tonic-gate case '(': 572*7c478bd9Sstevel@tonic-gate ++nparen; 573*7c478bd9Sstevel@tonic-gate break; 574*7c478bd9Sstevel@tonic-gate 575*7c478bd9Sstevel@tonic-gate case ')': 576*7c478bd9Sstevel@tonic-gate if (--nparen < 0) 577*7c478bd9Sstevel@tonic-gate awkerr(unbal, "()"); 578*7c478bd9Sstevel@tonic-gate break; 579*7c478bd9Sstevel@tonic-gate 580*7c478bd9Sstevel@tonic-gate case '{': 581*7c478bd9Sstevel@tonic-gate nbrace++; 582*7c478bd9Sstevel@tonic-gate break; 583*7c478bd9Sstevel@tonic-gate 584*7c478bd9Sstevel@tonic-gate case '}': 585*7c478bd9Sstevel@tonic-gate if (--nbrace < 0) { 586*7c478bd9Sstevel@tonic-gate char brk[3]; 587*7c478bd9Sstevel@tonic-gate 588*7c478bd9Sstevel@tonic-gate brk[0] = '{'; 589*7c478bd9Sstevel@tonic-gate brk[1] = '}'; 590*7c478bd9Sstevel@tonic-gate brk[2] = '\0'; 591*7c478bd9Sstevel@tonic-gate awkerr(unbal, brk); 592*7c478bd9Sstevel@tonic-gate } 593*7c478bd9Sstevel@tonic-gate if (lexlast != ';') { 594*7c478bd9Sstevel@tonic-gate savetoken = c; 595*7c478bd9Sstevel@tonic-gate c = ';'; 596*7c478bd9Sstevel@tonic-gate } 597*7c478bd9Sstevel@tonic-gate break; 598*7c478bd9Sstevel@tonic-gate 599*7c478bd9Sstevel@tonic-gate case '[': 600*7c478bd9Sstevel@tonic-gate ++nbracket; 601*7c478bd9Sstevel@tonic-gate break; 602*7c478bd9Sstevel@tonic-gate 603*7c478bd9Sstevel@tonic-gate case ']': 604*7c478bd9Sstevel@tonic-gate if (--nbracket < 0) { 605*7c478bd9Sstevel@tonic-gate char brk[3]; 606*7c478bd9Sstevel@tonic-gate 607*7c478bd9Sstevel@tonic-gate brk[0] = '['; 608*7c478bd9Sstevel@tonic-gate brk[1] = ']'; 609*7c478bd9Sstevel@tonic-gate brk[2] = '\0'; 610*7c478bd9Sstevel@tonic-gate awkerr(unbal, brk); 611*7c478bd9Sstevel@tonic-gate } 612*7c478bd9Sstevel@tonic-gate break; 613*7c478bd9Sstevel@tonic-gate 614*7c478bd9Sstevel@tonic-gate case '\\': 615*7c478bd9Sstevel@tonic-gate if ((c1 = lexgetc()) == '\n') 616*7c478bd9Sstevel@tonic-gate continue; 617*7c478bd9Sstevel@tonic-gate lexungetc(c1); 618*7c478bd9Sstevel@tonic-gate break; 619*7c478bd9Sstevel@tonic-gate 620*7c478bd9Sstevel@tonic-gate case ',': 621*7c478bd9Sstevel@tonic-gate c = COMMA; 622*7c478bd9Sstevel@tonic-gate break; 623*7c478bd9Sstevel@tonic-gate 624*7c478bd9Sstevel@tonic-gate case '?': 625*7c478bd9Sstevel@tonic-gate c = QUEST; 626*7c478bd9Sstevel@tonic-gate break; 627*7c478bd9Sstevel@tonic-gate 628*7c478bd9Sstevel@tonic-gate case ':': 629*7c478bd9Sstevel@tonic-gate c = COLON; 630*7c478bd9Sstevel@tonic-gate break; 631*7c478bd9Sstevel@tonic-gate 632*7c478bd9Sstevel@tonic-gate default: 633*7c478bd9Sstevel@tonic-gate if (!iswprint(c)) 634*7c478bd9Sstevel@tonic-gate awkerr( 635*7c478bd9Sstevel@tonic-gate gettext("invalid character \"%s\""), 636*7c478bd9Sstevel@tonic-gate toprint(c)); 637*7c478bd9Sstevel@tonic-gate break; 638*7c478bd9Sstevel@tonic-gate } 639*7c478bd9Sstevel@tonic-gate break; 640*7c478bd9Sstevel@tonic-gate } 641*7c478bd9Sstevel@tonic-gate 642*7c478bd9Sstevel@tonic-gate switch (c) { 643*7c478bd9Sstevel@tonic-gate case ']': 644*7c478bd9Sstevel@tonic-gate ++catterm; 645*7c478bd9Sstevel@tonic-gate break; 646*7c478bd9Sstevel@tonic-gate 647*7c478bd9Sstevel@tonic-gate case VAR: 648*7c478bd9Sstevel@tonic-gate if (catterm) { 649*7c478bd9Sstevel@tonic-gate savetoken = c; 650*7c478bd9Sstevel@tonic-gate c = CONCAT; 651*7c478bd9Sstevel@tonic-gate catterm = 0; 652*7c478bd9Sstevel@tonic-gate } else if (!isfuncdef) { 653*7c478bd9Sstevel@tonic-gate if ((c1=lexgetc()) != '(') 654*7c478bd9Sstevel@tonic-gate ++catterm; 655*7c478bd9Sstevel@tonic-gate lexungetc(c1); 656*7c478bd9Sstevel@tonic-gate } 657*7c478bd9Sstevel@tonic-gate isfuncdef = 0; 658*7c478bd9Sstevel@tonic-gate break; 659*7c478bd9Sstevel@tonic-gate 660*7c478bd9Sstevel@tonic-gate case PARM: 661*7c478bd9Sstevel@tonic-gate case CONSTANT: 662*7c478bd9Sstevel@tonic-gate if (catterm) { 663*7c478bd9Sstevel@tonic-gate savetoken = c; 664*7c478bd9Sstevel@tonic-gate c = CONCAT; 665*7c478bd9Sstevel@tonic-gate catterm = 0; 666*7c478bd9Sstevel@tonic-gate } else { 667*7c478bd9Sstevel@tonic-gate if (lexlast == '$') 668*7c478bd9Sstevel@tonic-gate wasfield = 2; 669*7c478bd9Sstevel@tonic-gate ++catterm; 670*7c478bd9Sstevel@tonic-gate } 671*7c478bd9Sstevel@tonic-gate break; 672*7c478bd9Sstevel@tonic-gate 673*7c478bd9Sstevel@tonic-gate case INC: 674*7c478bd9Sstevel@tonic-gate case DEC: 675*7c478bd9Sstevel@tonic-gate if (!catterm || lexlast!=CONSTANT || wasfield) 676*7c478bd9Sstevel@tonic-gate break; 677*7c478bd9Sstevel@tonic-gate 678*7c478bd9Sstevel@tonic-gate case UFUNC: 679*7c478bd9Sstevel@tonic-gate case FUNC: 680*7c478bd9Sstevel@tonic-gate case GETLINE: 681*7c478bd9Sstevel@tonic-gate case '!': 682*7c478bd9Sstevel@tonic-gate case '$': 683*7c478bd9Sstevel@tonic-gate case '(': 684*7c478bd9Sstevel@tonic-gate if (catterm) { 685*7c478bd9Sstevel@tonic-gate savetoken = c; 686*7c478bd9Sstevel@tonic-gate c = CONCAT; 687*7c478bd9Sstevel@tonic-gate catterm = 0; 688*7c478bd9Sstevel@tonic-gate } 689*7c478bd9Sstevel@tonic-gate break; 690*7c478bd9Sstevel@tonic-gate 691*7c478bd9Sstevel@tonic-gate /*{*/case '}': 692*7c478bd9Sstevel@tonic-gate if (nbrace == 0) 693*7c478bd9Sstevel@tonic-gate savetoken = ';'; 694*7c478bd9Sstevel@tonic-gate case ';': 695*7c478bd9Sstevel@tonic-gate inprint = 0; 696*7c478bd9Sstevel@tonic-gate default: 697*7c478bd9Sstevel@tonic-gate if (c == DEFFUNC) 698*7c478bd9Sstevel@tonic-gate isfuncdef = 1; 699*7c478bd9Sstevel@tonic-gate catterm = 0; 700*7c478bd9Sstevel@tonic-gate } 701*7c478bd9Sstevel@tonic-gate lexlast = c; 702*7c478bd9Sstevel@tonic-gate if (wasfield) 703*7c478bd9Sstevel@tonic-gate wasfield--; 704*7c478bd9Sstevel@tonic-gate /* 705*7c478bd9Sstevel@tonic-gate * Map character constants to symbolic names. 706*7c478bd9Sstevel@tonic-gate */ 707*7c478bd9Sstevel@tonic-gate for (i = 0; ctosym[i].c != 0; i++) 708*7c478bd9Sstevel@tonic-gate if (c == ctosym[i].c) { 709*7c478bd9Sstevel@tonic-gate c = ctosym[i].sym; 710*7c478bd9Sstevel@tonic-gate break; 711*7c478bd9Sstevel@tonic-gate } 712*7c478bd9Sstevel@tonic-gate return ((int)c); 713*7c478bd9Sstevel@tonic-gate } 714*7c478bd9Sstevel@tonic-gate 715*7c478bd9Sstevel@tonic-gate /* 716*7c478bd9Sstevel@tonic-gate * Read a number for the lexical analyzer. 717*7c478bd9Sstevel@tonic-gate * Input is the first character of the number. 718*7c478bd9Sstevel@tonic-gate * Return value is the lexical type. 719*7c478bd9Sstevel@tonic-gate */ 720*7c478bd9Sstevel@tonic-gate static int 721*7c478bd9Sstevel@tonic-gate lexnumber(wint_t c) 722*7c478bd9Sstevel@tonic-gate { 723*7c478bd9Sstevel@tonic-gate register wchar_t *cp; 724*7c478bd9Sstevel@tonic-gate register int dotfound = 0; 725*7c478bd9Sstevel@tonic-gate register int efound = 0; 726*7c478bd9Sstevel@tonic-gate INT number; 727*7c478bd9Sstevel@tonic-gate 728*7c478bd9Sstevel@tonic-gate cp = linebuf; 729*7c478bd9Sstevel@tonic-gate do { 730*7c478bd9Sstevel@tonic-gate if (iswdigit(c)) 731*7c478bd9Sstevel@tonic-gate ; 732*7c478bd9Sstevel@tonic-gate else if (c == '.') { 733*7c478bd9Sstevel@tonic-gate if (dotfound++) 734*7c478bd9Sstevel@tonic-gate break; 735*7c478bd9Sstevel@tonic-gate } else if (c=='e' || c=='E') { 736*7c478bd9Sstevel@tonic-gate if ((c = lexgetc())!='-' && c!='+') { 737*7c478bd9Sstevel@tonic-gate lexungetc(c); 738*7c478bd9Sstevel@tonic-gate c = 'e'; 739*7c478bd9Sstevel@tonic-gate } else 740*7c478bd9Sstevel@tonic-gate *cp++ = 'e'; 741*7c478bd9Sstevel@tonic-gate if (efound++) 742*7c478bd9Sstevel@tonic-gate break; 743*7c478bd9Sstevel@tonic-gate } else 744*7c478bd9Sstevel@tonic-gate break; 745*7c478bd9Sstevel@tonic-gate *cp++ = c; 746*7c478bd9Sstevel@tonic-gate } while ((c = lexgetc()) != WEOF); 747*7c478bd9Sstevel@tonic-gate *cp = '\0'; 748*7c478bd9Sstevel@tonic-gate if (dotfound && cp==linebuf+1) 749*7c478bd9Sstevel@tonic-gate return (DOT); 750*7c478bd9Sstevel@tonic-gate lexungetc(c); 751*7c478bd9Sstevel@tonic-gate errno = 0; 752*7c478bd9Sstevel@tonic-gate if (!dotfound 753*7c478bd9Sstevel@tonic-gate && !efound 754*7c478bd9Sstevel@tonic-gate && ((number=wcstol(linebuf, (wchar_t **)0, 10)), errno!=ERANGE)) 755*7c478bd9Sstevel@tonic-gate yylval.node = intnode(number); 756*7c478bd9Sstevel@tonic-gate else 757*7c478bd9Sstevel@tonic-gate yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0)); 758*7c478bd9Sstevel@tonic-gate return (CONSTANT); 759*7c478bd9Sstevel@tonic-gate } 760*7c478bd9Sstevel@tonic-gate 761*7c478bd9Sstevel@tonic-gate /* 762*7c478bd9Sstevel@tonic-gate * Read an identifier. 763*7c478bd9Sstevel@tonic-gate * Input is first character of identifier. 764*7c478bd9Sstevel@tonic-gate * Return VAR. 765*7c478bd9Sstevel@tonic-gate */ 766*7c478bd9Sstevel@tonic-gate static int 767*7c478bd9Sstevel@tonic-gate lexid(wint_t c) 768*7c478bd9Sstevel@tonic-gate { 769*7c478bd9Sstevel@tonic-gate register wchar_t *cp; 770*7c478bd9Sstevel@tonic-gate register size_t i; 771*7c478bd9Sstevel@tonic-gate register NODE *np; 772*7c478bd9Sstevel@tonic-gate 773*7c478bd9Sstevel@tonic-gate cp = linebuf; 774*7c478bd9Sstevel@tonic-gate do { 775*7c478bd9Sstevel@tonic-gate *cp++ = c; 776*7c478bd9Sstevel@tonic-gate c = lexgetc(); 777*7c478bd9Sstevel@tonic-gate } while (iswalpha(c) || iswdigit(c) || c=='_'); 778*7c478bd9Sstevel@tonic-gate *cp = '\0'; 779*7c478bd9Sstevel@tonic-gate lexungetc(c); 780*7c478bd9Sstevel@tonic-gate yylval.node = np = vlook(linebuf); 781*7c478bd9Sstevel@tonic-gate 782*7c478bd9Sstevel@tonic-gate switch(np->n_type) { 783*7c478bd9Sstevel@tonic-gate case KEYWORD: 784*7c478bd9Sstevel@tonic-gate switch (np->n_keywtype) { 785*7c478bd9Sstevel@tonic-gate case PRINT: 786*7c478bd9Sstevel@tonic-gate case PRINTF: 787*7c478bd9Sstevel@tonic-gate ++inprint; 788*7c478bd9Sstevel@tonic-gate default: 789*7c478bd9Sstevel@tonic-gate return ((int)np->n_keywtype); 790*7c478bd9Sstevel@tonic-gate } 791*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 792*7c478bd9Sstevel@tonic-gate 793*7c478bd9Sstevel@tonic-gate case ARRAY: 794*7c478bd9Sstevel@tonic-gate case VAR: 795*7c478bd9Sstevel@tonic-gate /* 796*7c478bd9Sstevel@tonic-gate * If reading the argument list, create a dummy node 797*7c478bd9Sstevel@tonic-gate * for the duration of that function. These variables 798*7c478bd9Sstevel@tonic-gate * can be removed from the symbol table at function end 799*7c478bd9Sstevel@tonic-gate * but they must still exist because the execution tree 800*7c478bd9Sstevel@tonic-gate * knows about them. 801*7c478bd9Sstevel@tonic-gate */ 802*7c478bd9Sstevel@tonic-gate if (funparm) { 803*7c478bd9Sstevel@tonic-gate do_funparm: 804*7c478bd9Sstevel@tonic-gate np = emptynode(PARM, i=(cp-linebuf)); 805*7c478bd9Sstevel@tonic-gate np->n_flags = FSTRING; 806*7c478bd9Sstevel@tonic-gate np->n_string = _null; 807*7c478bd9Sstevel@tonic-gate np->n_strlen = 0; 808*7c478bd9Sstevel@tonic-gate (void) memcpy(np->n_name, linebuf, 809*7c478bd9Sstevel@tonic-gate (i+1) * sizeof(wchar_t)); 810*7c478bd9Sstevel@tonic-gate addsymtab(np); 811*7c478bd9Sstevel@tonic-gate yylval.node = np; 812*7c478bd9Sstevel@tonic-gate } else if (np == varNF || (np == varFS && 813*7c478bd9Sstevel@tonic-gate (!doing_begin || begin_getline))) { 814*7c478bd9Sstevel@tonic-gate /* 815*7c478bd9Sstevel@tonic-gate * If the user program references NF or sets 816*7c478bd9Sstevel@tonic-gate * FS either outside of a begin block or 817*7c478bd9Sstevel@tonic-gate * in a begin block after a getline then the 818*7c478bd9Sstevel@tonic-gate * input line will be split immediately upon read 819*7c478bd9Sstevel@tonic-gate * rather than when a field is first referenced. 820*7c478bd9Sstevel@tonic-gate */ 821*7c478bd9Sstevel@tonic-gate needsplit = 1; 822*7c478bd9Sstevel@tonic-gate } else if (np == varENVIRON) 823*7c478bd9Sstevel@tonic-gate needenviron = 1; 824*7c478bd9Sstevel@tonic-gate case PARM: 825*7c478bd9Sstevel@tonic-gate return (VAR); 826*7c478bd9Sstevel@tonic-gate 827*7c478bd9Sstevel@tonic-gate case UFUNC: 828*7c478bd9Sstevel@tonic-gate /* 829*7c478bd9Sstevel@tonic-gate * It is ok to redefine functions as parameters 830*7c478bd9Sstevel@tonic-gate */ 831*7c478bd9Sstevel@tonic-gate if (funparm) goto do_funparm; 832*7c478bd9Sstevel@tonic-gate case FUNC: 833*7c478bd9Sstevel@tonic-gate case GETLINE: 834*7c478bd9Sstevel@tonic-gate /* 835*7c478bd9Sstevel@tonic-gate * When a getline is encountered, clear the 'doing_begin' flag. 836*7c478bd9Sstevel@tonic-gate * This will force the 'needsplit' flag to be set, even inside 837*7c478bd9Sstevel@tonic-gate * a begin block, if FS is altered. (See VAR case above) 838*7c478bd9Sstevel@tonic-gate */ 839*7c478bd9Sstevel@tonic-gate if (doing_begin) 840*7c478bd9Sstevel@tonic-gate begin_getline = 1; 841*7c478bd9Sstevel@tonic-gate return (np->n_type); 842*7c478bd9Sstevel@tonic-gate } 843*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 844*7c478bd9Sstevel@tonic-gate } 845*7c478bd9Sstevel@tonic-gate 846*7c478bd9Sstevel@tonic-gate /* 847*7c478bd9Sstevel@tonic-gate * Read a string for the lexical analyzer. 848*7c478bd9Sstevel@tonic-gate * `endc' terminates the string. 849*7c478bd9Sstevel@tonic-gate */ 850*7c478bd9Sstevel@tonic-gate static int 851*7c478bd9Sstevel@tonic-gate lexstring(wint_t endc) 852*7c478bd9Sstevel@tonic-gate { 853*7c478bd9Sstevel@tonic-gate register size_t length = lexescape(endc, 0, 0); 854*7c478bd9Sstevel@tonic-gate 855*7c478bd9Sstevel@tonic-gate yylval.node = stringnode(linebuf, FALLOC, length); 856*7c478bd9Sstevel@tonic-gate return (CONSTANT); 857*7c478bd9Sstevel@tonic-gate } 858*7c478bd9Sstevel@tonic-gate 859*7c478bd9Sstevel@tonic-gate /* 860*7c478bd9Sstevel@tonic-gate * Read a regular expression. 861*7c478bd9Sstevel@tonic-gate */ 862*7c478bd9Sstevel@tonic-gate static int 863*7c478bd9Sstevel@tonic-gate lexregexp(wint_t endc) 864*7c478bd9Sstevel@tonic-gate { 865*7c478bd9Sstevel@tonic-gate (void) lexescape(endc, 1, 0); 866*7c478bd9Sstevel@tonic-gate yylval.node = renode(linebuf); 867*7c478bd9Sstevel@tonic-gate return (URE); 868*7c478bd9Sstevel@tonic-gate } 869*7c478bd9Sstevel@tonic-gate 870*7c478bd9Sstevel@tonic-gate /* 871*7c478bd9Sstevel@tonic-gate * Process a string, converting the escape characters as required by 872*7c478bd9Sstevel@tonic-gate * 1003.2. The processed string ends up in the global linebuf[]. This 873*7c478bd9Sstevel@tonic-gate * routine also changes the value of 'progfd' - the program file 874*7c478bd9Sstevel@tonic-gate * descriptor, so it should be used with some care. It is presently used to 875*7c478bd9Sstevel@tonic-gate * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()). 876*7c478bd9Sstevel@tonic-gate */ 877*7c478bd9Sstevel@tonic-gate void 878*7c478bd9Sstevel@tonic-gate strescape(wchar_t *str) 879*7c478bd9Sstevel@tonic-gate { 880*7c478bd9Sstevel@tonic-gate progptr = str; 881*7c478bd9Sstevel@tonic-gate proglen = wcslen(str) + 1; /* Include \0 */ 882*7c478bd9Sstevel@tonic-gate (void) lexescape('\0', 0, 1); 883*7c478bd9Sstevel@tonic-gate progptr = NULL; 884*7c478bd9Sstevel@tonic-gate } 885*7c478bd9Sstevel@tonic-gate 886*7c478bd9Sstevel@tonic-gate /* 887*7c478bd9Sstevel@tonic-gate * Read a string or regular expression, terminated by ``endc'', 888*7c478bd9Sstevel@tonic-gate * for lexical analyzer, processing escape sequences. 889*7c478bd9Sstevel@tonic-gate * Return string length. 890*7c478bd9Sstevel@tonic-gate */ 891*7c478bd9Sstevel@tonic-gate static size_t 892*7c478bd9Sstevel@tonic-gate lexescape(wint_t endc, int regx, int cmd_line_operand) 893*7c478bd9Sstevel@tonic-gate { 894*7c478bd9Sstevel@tonic-gate static char nlre[256]; 895*7c478bd9Sstevel@tonic-gate static char nlstr[256]; 896*7c478bd9Sstevel@tonic-gate static char eofre[256]; 897*7c478bd9Sstevel@tonic-gate static char eofstr[256]; 898*7c478bd9Sstevel@tonic-gate int first_time = 1; 899*7c478bd9Sstevel@tonic-gate wint_t c; 900*7c478bd9Sstevel@tonic-gate wchar_t *cp; 901*7c478bd9Sstevel@tonic-gate int n, max; 902*7c478bd9Sstevel@tonic-gate 903*7c478bd9Sstevel@tonic-gate if (first_time == 1) { 904*7c478bd9Sstevel@tonic-gate (void) strcpy(nlre, gettext("Newline in regular expression\n")); 905*7c478bd9Sstevel@tonic-gate (void) strcpy(nlstr, gettext("Newline in string\n")); 906*7c478bd9Sstevel@tonic-gate (void) strcpy(eofre, gettext("EOF in regular expression\n")); 907*7c478bd9Sstevel@tonic-gate (void) strcpy(eofstr, gettext("EOF in string\n")); 908*7c478bd9Sstevel@tonic-gate first_time = 0; 909*7c478bd9Sstevel@tonic-gate } 910*7c478bd9Sstevel@tonic-gate 911*7c478bd9Sstevel@tonic-gate cp = linebuf; 912*7c478bd9Sstevel@tonic-gate while ((c = lexgetc()) != endc) { 913*7c478bd9Sstevel@tonic-gate if (c == '\n') 914*7c478bd9Sstevel@tonic-gate awkerr(regx ? nlre : nlstr); 915*7c478bd9Sstevel@tonic-gate if (c == '\\') { 916*7c478bd9Sstevel@tonic-gate switch (c = lexgetc(), c) { 917*7c478bd9Sstevel@tonic-gate case '\\': 918*7c478bd9Sstevel@tonic-gate if (regx) 919*7c478bd9Sstevel@tonic-gate *cp++ = '\\'; 920*7c478bd9Sstevel@tonic-gate break; 921*7c478bd9Sstevel@tonic-gate 922*7c478bd9Sstevel@tonic-gate case '/': 923*7c478bd9Sstevel@tonic-gate c = '/'; 924*7c478bd9Sstevel@tonic-gate break; 925*7c478bd9Sstevel@tonic-gate 926*7c478bd9Sstevel@tonic-gate case 'n': 927*7c478bd9Sstevel@tonic-gate c = '\n'; 928*7c478bd9Sstevel@tonic-gate break; 929*7c478bd9Sstevel@tonic-gate 930*7c478bd9Sstevel@tonic-gate case 'b': 931*7c478bd9Sstevel@tonic-gate c = '\b'; 932*7c478bd9Sstevel@tonic-gate break; 933*7c478bd9Sstevel@tonic-gate 934*7c478bd9Sstevel@tonic-gate case 't': 935*7c478bd9Sstevel@tonic-gate c = '\t'; 936*7c478bd9Sstevel@tonic-gate break; 937*7c478bd9Sstevel@tonic-gate 938*7c478bd9Sstevel@tonic-gate case 'r': 939*7c478bd9Sstevel@tonic-gate c = '\r'; 940*7c478bd9Sstevel@tonic-gate break; 941*7c478bd9Sstevel@tonic-gate 942*7c478bd9Sstevel@tonic-gate case 'f': 943*7c478bd9Sstevel@tonic-gate c = '\f'; 944*7c478bd9Sstevel@tonic-gate break; 945*7c478bd9Sstevel@tonic-gate 946*7c478bd9Sstevel@tonic-gate case 'v': 947*7c478bd9Sstevel@tonic-gate c = '\v'; 948*7c478bd9Sstevel@tonic-gate break; 949*7c478bd9Sstevel@tonic-gate 950*7c478bd9Sstevel@tonic-gate case 'a': 951*7c478bd9Sstevel@tonic-gate c = (char) 0x07; 952*7c478bd9Sstevel@tonic-gate break; 953*7c478bd9Sstevel@tonic-gate 954*7c478bd9Sstevel@tonic-gate case 'x': 955*7c478bd9Sstevel@tonic-gate n = 0; 956*7c478bd9Sstevel@tonic-gate while (iswxdigit(c = lexgetc())) { 957*7c478bd9Sstevel@tonic-gate if (iswdigit(c)) 958*7c478bd9Sstevel@tonic-gate c -= '0'; 959*7c478bd9Sstevel@tonic-gate else if (iswupper(c)) 960*7c478bd9Sstevel@tonic-gate c -= 'A'-10; 961*7c478bd9Sstevel@tonic-gate else 962*7c478bd9Sstevel@tonic-gate c -= 'a'-10; 963*7c478bd9Sstevel@tonic-gate n = (n<<4) + c; 964*7c478bd9Sstevel@tonic-gate } 965*7c478bd9Sstevel@tonic-gate lexungetc(c); 966*7c478bd9Sstevel@tonic-gate c = n; 967*7c478bd9Sstevel@tonic-gate break; 968*7c478bd9Sstevel@tonic-gate 969*7c478bd9Sstevel@tonic-gate case '0': 970*7c478bd9Sstevel@tonic-gate case '1': 971*7c478bd9Sstevel@tonic-gate case '2': 972*7c478bd9Sstevel@tonic-gate case '3': 973*7c478bd9Sstevel@tonic-gate case '4': 974*7c478bd9Sstevel@tonic-gate case '5': 975*7c478bd9Sstevel@tonic-gate case '6': 976*7c478bd9Sstevel@tonic-gate case '7': 977*7c478bd9Sstevel@tonic-gate #if 0 978*7c478bd9Sstevel@tonic-gate /* 979*7c478bd9Sstevel@tonic-gate * Posix.2 draft 10 disallows the use of back-referencing - it explicitly 980*7c478bd9Sstevel@tonic-gate * requires processing of the octal escapes both in strings and 981*7c478bd9Sstevel@tonic-gate * regular expressions. The following code is disabled instead of 982*7c478bd9Sstevel@tonic-gate * removed as back-referencing may be reintroduced in a future draft 983*7c478bd9Sstevel@tonic-gate * of the standard. 984*7c478bd9Sstevel@tonic-gate */ 985*7c478bd9Sstevel@tonic-gate /* 986*7c478bd9Sstevel@tonic-gate * For regular expressions, we disallow 987*7c478bd9Sstevel@tonic-gate * \ooo to mean octal character, in favour 988*7c478bd9Sstevel@tonic-gate * of back referencing. 989*7c478bd9Sstevel@tonic-gate */ 990*7c478bd9Sstevel@tonic-gate if (regx) { 991*7c478bd9Sstevel@tonic-gate *cp++ = '\\'; 992*7c478bd9Sstevel@tonic-gate break; 993*7c478bd9Sstevel@tonic-gate } 994*7c478bd9Sstevel@tonic-gate #endif 995*7c478bd9Sstevel@tonic-gate max = 3; 996*7c478bd9Sstevel@tonic-gate n = 0; 997*7c478bd9Sstevel@tonic-gate do { 998*7c478bd9Sstevel@tonic-gate n = (n<<3) + c-'0'; 999*7c478bd9Sstevel@tonic-gate if ((c = lexgetc())>'7' || c<'0') 1000*7c478bd9Sstevel@tonic-gate break; 1001*7c478bd9Sstevel@tonic-gate } while (--max); 1002*7c478bd9Sstevel@tonic-gate lexungetc(c); 1003*7c478bd9Sstevel@tonic-gate /* 1004*7c478bd9Sstevel@tonic-gate * an octal escape sequence must have at least 1005*7c478bd9Sstevel@tonic-gate * 2 digits after the backslash, otherwise 1006*7c478bd9Sstevel@tonic-gate * it gets passed straight thru for possible 1007*7c478bd9Sstevel@tonic-gate * use in backreferencing. 1008*7c478bd9Sstevel@tonic-gate */ 1009*7c478bd9Sstevel@tonic-gate if (max == 3) { 1010*7c478bd9Sstevel@tonic-gate *cp++ = '\\'; 1011*7c478bd9Sstevel@tonic-gate n += '0'; 1012*7c478bd9Sstevel@tonic-gate } 1013*7c478bd9Sstevel@tonic-gate c = n; 1014*7c478bd9Sstevel@tonic-gate break; 1015*7c478bd9Sstevel@tonic-gate 1016*7c478bd9Sstevel@tonic-gate case '\n': 1017*7c478bd9Sstevel@tonic-gate continue; 1018*7c478bd9Sstevel@tonic-gate 1019*7c478bd9Sstevel@tonic-gate default: 1020*7c478bd9Sstevel@tonic-gate if (c != endc || cmd_line_operand) { 1021*7c478bd9Sstevel@tonic-gate *cp++ = '\\'; 1022*7c478bd9Sstevel@tonic-gate if (c == endc) 1023*7c478bd9Sstevel@tonic-gate lexungetc(c); 1024*7c478bd9Sstevel@tonic-gate } 1025*7c478bd9Sstevel@tonic-gate } 1026*7c478bd9Sstevel@tonic-gate } 1027*7c478bd9Sstevel@tonic-gate if (c == WEOF) 1028*7c478bd9Sstevel@tonic-gate awkerr(regx ? eofre : eofstr); 1029*7c478bd9Sstevel@tonic-gate *cp++ = c; 1030*7c478bd9Sstevel@tonic-gate } 1031*7c478bd9Sstevel@tonic-gate *cp = '\0'; 1032*7c478bd9Sstevel@tonic-gate return (cp - linebuf); 1033*7c478bd9Sstevel@tonic-gate } 1034*7c478bd9Sstevel@tonic-gate 1035*7c478bd9Sstevel@tonic-gate /* 1036*7c478bd9Sstevel@tonic-gate * Build a regular expression NODE. 1037*7c478bd9Sstevel@tonic-gate * Argument is the string holding the expression. 1038*7c478bd9Sstevel@tonic-gate */ 1039*7c478bd9Sstevel@tonic-gate NODE * 1040*7c478bd9Sstevel@tonic-gate renode(wchar_t *s) 1041*7c478bd9Sstevel@tonic-gate { 1042*7c478bd9Sstevel@tonic-gate register NODE *np; 1043*7c478bd9Sstevel@tonic-gate int n; 1044*7c478bd9Sstevel@tonic-gate 1045*7c478bd9Sstevel@tonic-gate np = emptynode(RE, 0); 1046*7c478bd9Sstevel@tonic-gate np->n_left = np->n_right = NNULL; 1047*7c478bd9Sstevel@tonic-gate np->n_regexp = (REGEXP)emalloc(sizeof(regex_t)); 1048*7c478bd9Sstevel@tonic-gate if ((n = REGWCOMP(np->n_regexp, s, REG_EXTENDED)) != REG_OK) { 1049*7c478bd9Sstevel@tonic-gate int m; 1050*7c478bd9Sstevel@tonic-gate char *p; 1051*7c478bd9Sstevel@tonic-gate 1052*7c478bd9Sstevel@tonic-gate m = regerror(n, np->n_regexp, NULL, 0); 1053*7c478bd9Sstevel@tonic-gate p = (char *)emalloc(m); 1054*7c478bd9Sstevel@tonic-gate regerror(n, np->n_regexp, p, m); 1055*7c478bd9Sstevel@tonic-gate awkerr("/%S/: %s", s, p); 1056*7c478bd9Sstevel@tonic-gate } 1057*7c478bd9Sstevel@tonic-gate return (np); 1058*7c478bd9Sstevel@tonic-gate } 1059*7c478bd9Sstevel@tonic-gate /* 1060*7c478bd9Sstevel@tonic-gate * Get a character for the lexical analyser routine. 1061*7c478bd9Sstevel@tonic-gate */ 1062*7c478bd9Sstevel@tonic-gate static wint_t 1063*7c478bd9Sstevel@tonic-gate lexgetc() 1064*7c478bd9Sstevel@tonic-gate { 1065*7c478bd9Sstevel@tonic-gate register wint_t c; 1066*7c478bd9Sstevel@tonic-gate static char **files = &progfiles[0]; 1067*7c478bd9Sstevel@tonic-gate 1068*7c478bd9Sstevel@tonic-gate if (progfp!=FNULL && (c = fgetwc(progfp))!=WEOF) 1069*7c478bd9Sstevel@tonic-gate ; 1070*7c478bd9Sstevel@tonic-gate else { 1071*7c478bd9Sstevel@tonic-gate if (progptr != NULL) { 1072*7c478bd9Sstevel@tonic-gate if (proglen-- <= 0) 1073*7c478bd9Sstevel@tonic-gate c = WEOF; 1074*7c478bd9Sstevel@tonic-gate else 1075*7c478bd9Sstevel@tonic-gate c = *progptr++; 1076*7c478bd9Sstevel@tonic-gate } else { 1077*7c478bd9Sstevel@tonic-gate if (progfp != FNULL) 1078*7c478bd9Sstevel@tonic-gate if (progfp != stdin) 1079*7c478bd9Sstevel@tonic-gate (void)fclose(progfp); 1080*7c478bd9Sstevel@tonic-gate else 1081*7c478bd9Sstevel@tonic-gate clearerr(progfp); 1082*7c478bd9Sstevel@tonic-gate progfp = FNULL; 1083*7c478bd9Sstevel@tonic-gate if (files < progfilep) { 1084*7c478bd9Sstevel@tonic-gate filename = *files++; 1085*7c478bd9Sstevel@tonic-gate lineno = 1; 1086*7c478bd9Sstevel@tonic-gate if (filename[0]=='-' && filename[1]=='\0') 1087*7c478bd9Sstevel@tonic-gate progfp = stdin; 1088*7c478bd9Sstevel@tonic-gate else if ((progfp=fopen(filename, r)) == FNULL) { 1089*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 1090*7c478bd9Sstevel@tonic-gate gettext("script file \"%s\""), filename); 1091*7c478bd9Sstevel@tonic-gate exit(1); 1092*7c478bd9Sstevel@tonic-gate } 1093*7c478bd9Sstevel@tonic-gate c = fgetwc(progfp); 1094*7c478bd9Sstevel@tonic-gate } 1095*7c478bd9Sstevel@tonic-gate } 1096*7c478bd9Sstevel@tonic-gate } 1097*7c478bd9Sstevel@tonic-gate if (c == '\n') 1098*7c478bd9Sstevel@tonic-gate ++lineno; 1099*7c478bd9Sstevel@tonic-gate if (conptr >= &context[NCONTEXT]) 1100*7c478bd9Sstevel@tonic-gate conptr = &context[0]; 1101*7c478bd9Sstevel@tonic-gate if (c != WEOF) 1102*7c478bd9Sstevel@tonic-gate *conptr++ = c; 1103*7c478bd9Sstevel@tonic-gate return (c); 1104*7c478bd9Sstevel@tonic-gate } 1105*7c478bd9Sstevel@tonic-gate 1106*7c478bd9Sstevel@tonic-gate /* 1107*7c478bd9Sstevel@tonic-gate * Return a character for lexical analyser. 1108*7c478bd9Sstevel@tonic-gate * Only one returned character is (not enforced) legitimite. 1109*7c478bd9Sstevel@tonic-gate */ 1110*7c478bd9Sstevel@tonic-gate static void 1111*7c478bd9Sstevel@tonic-gate lexungetc(wint_t c) 1112*7c478bd9Sstevel@tonic-gate { 1113*7c478bd9Sstevel@tonic-gate if (c == '\n') 1114*7c478bd9Sstevel@tonic-gate --lineno; 1115*7c478bd9Sstevel@tonic-gate if (c != WEOF) { 1116*7c478bd9Sstevel@tonic-gate if (conptr == &context[0]) 1117*7c478bd9Sstevel@tonic-gate conptr = &context[NCONTEXT]; 1118*7c478bd9Sstevel@tonic-gate *--conptr = '\0'; 1119*7c478bd9Sstevel@tonic-gate } 1120*7c478bd9Sstevel@tonic-gate if (progfp != FNULL) { 1121*7c478bd9Sstevel@tonic-gate (void)ungetwc(c, progfp); 1122*7c478bd9Sstevel@tonic-gate return; 1123*7c478bd9Sstevel@tonic-gate } 1124*7c478bd9Sstevel@tonic-gate if (c == WEOF) 1125*7c478bd9Sstevel@tonic-gate return; 1126*7c478bd9Sstevel@tonic-gate *--progptr = c; 1127*7c478bd9Sstevel@tonic-gate proglen++; 1128*7c478bd9Sstevel@tonic-gate } 1129*7c478bd9Sstevel@tonic-gate 1130*7c478bd9Sstevel@tonic-gate /* 1131*7c478bd9Sstevel@tonic-gate * Syntax errors during parsing. 1132*7c478bd9Sstevel@tonic-gate */ 1133*7c478bd9Sstevel@tonic-gate void 1134*7c478bd9Sstevel@tonic-gate yyerror(char *s, ...) 1135*7c478bd9Sstevel@tonic-gate { 1136*7c478bd9Sstevel@tonic-gate if (lexlast==FUNC || lexlast==GETLINE || lexlast==KEYWORD) 1137*7c478bd9Sstevel@tonic-gate if (lexlast == KEYWORD) 1138*7c478bd9Sstevel@tonic-gate awkerr(gettext("inadmissible use of reserved keyword")); 1139*7c478bd9Sstevel@tonic-gate else 1140*7c478bd9Sstevel@tonic-gate awkerr(gettext("attempt to redefine builtin function")); 1141*7c478bd9Sstevel@tonic-gate awkerr(s); 1142*7c478bd9Sstevel@tonic-gate } 1143*7c478bd9Sstevel@tonic-gate 1144*7c478bd9Sstevel@tonic-gate /* 1145*7c478bd9Sstevel@tonic-gate * Error routine for all awk errors. 1146*7c478bd9Sstevel@tonic-gate */ 1147*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1148*7c478bd9Sstevel@tonic-gate void 1149*7c478bd9Sstevel@tonic-gate awkerr(char *fmt, ...) 1150*7c478bd9Sstevel@tonic-gate { 1151*7c478bd9Sstevel@tonic-gate va_list args; 1152*7c478bd9Sstevel@tonic-gate 1153*7c478bd9Sstevel@tonic-gate va_start(args, fmt); 1154*7c478bd9Sstevel@tonic-gate awkierr(0, fmt, args); 1155*7c478bd9Sstevel@tonic-gate va_end(args); 1156*7c478bd9Sstevel@tonic-gate } 1157*7c478bd9Sstevel@tonic-gate 1158*7c478bd9Sstevel@tonic-gate /* 1159*7c478bd9Sstevel@tonic-gate * Error routine like "awkerr" except that it prints out 1160*7c478bd9Sstevel@tonic-gate * a message that includes an errno-specific indication. 1161*7c478bd9Sstevel@tonic-gate */ 1162*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1163*7c478bd9Sstevel@tonic-gate void 1164*7c478bd9Sstevel@tonic-gate awkperr(char *fmt, ...) 1165*7c478bd9Sstevel@tonic-gate { 1166*7c478bd9Sstevel@tonic-gate va_list args; 1167*7c478bd9Sstevel@tonic-gate 1168*7c478bd9Sstevel@tonic-gate va_start(args, fmt); 1169*7c478bd9Sstevel@tonic-gate awkierr(1, fmt, args); 1170*7c478bd9Sstevel@tonic-gate va_end(args); 1171*7c478bd9Sstevel@tonic-gate } 1172*7c478bd9Sstevel@tonic-gate 1173*7c478bd9Sstevel@tonic-gate /* 1174*7c478bd9Sstevel@tonic-gate * Common internal routine for awkerr, awkperr 1175*7c478bd9Sstevel@tonic-gate */ 1176*7c478bd9Sstevel@tonic-gate static void 1177*7c478bd9Sstevel@tonic-gate awkierr(int perr, char *fmt, va_list ap) 1178*7c478bd9Sstevel@tonic-gate { 1179*7c478bd9Sstevel@tonic-gate static char sep1[] = "\n>>>\t"; 1180*7c478bd9Sstevel@tonic-gate static char sep2[] = "\t<<<"; 1181*7c478bd9Sstevel@tonic-gate int saveerr = errno; 1182*7c478bd9Sstevel@tonic-gate 1183*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: ", _cmdname); 1184*7c478bd9Sstevel@tonic-gate if (running) { 1185*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("line %u ("), 1186*7c478bd9Sstevel@tonic-gate curnode==NNULL ? 0 : curnode->n_lineno); 1187*7c478bd9Sstevel@tonic-gate if (phase == 0) 1188*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "NR=%lld): ", (INT)exprint(varNR)); 1189*7c478bd9Sstevel@tonic-gate else 1190*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s): ", 1191*7c478bd9Sstevel@tonic-gate phase==BEGIN ? s_BEGIN : s_END); 1192*7c478bd9Sstevel@tonic-gate } else if (lineno != 0) { 1193*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("file \"%s\": "), filename); 1194*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext("line %u: "), lineno); 1195*7c478bd9Sstevel@tonic-gate } 1196*7c478bd9Sstevel@tonic-gate (void) vfprintf(stderr, gettext(fmt), ap); 1197*7c478bd9Sstevel@tonic-gate if (perr == 1) 1198*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, ": %s", strerror(saveerr)); 1199*7c478bd9Sstevel@tonic-gate if (perr != 2 && !running) { 1200*7c478bd9Sstevel@tonic-gate register wchar_t *cp; 1201*7c478bd9Sstevel@tonic-gate register int n; 1202*7c478bd9Sstevel@tonic-gate register int c; 1203*7c478bd9Sstevel@tonic-gate 1204*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext(" Context is:%s"), sep1); 1205*7c478bd9Sstevel@tonic-gate cp = conptr; 1206*7c478bd9Sstevel@tonic-gate n = NCONTEXT; 1207*7c478bd9Sstevel@tonic-gate do { 1208*7c478bd9Sstevel@tonic-gate if (cp >= &context[NCONTEXT]) 1209*7c478bd9Sstevel@tonic-gate cp = &context[0]; 1210*7c478bd9Sstevel@tonic-gate if ((c = *cp++) != '\0') 1211*7c478bd9Sstevel@tonic-gate (void)fputs(c=='\n' ? sep1 : toprint(c), 1212*7c478bd9Sstevel@tonic-gate stderr); 1213*7c478bd9Sstevel@tonic-gate } while (--n != 0); 1214*7c478bd9Sstevel@tonic-gate (void)fputs(sep2, stderr); 1215*7c478bd9Sstevel@tonic-gate } 1216*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "\n"); 1217*7c478bd9Sstevel@tonic-gate exit(1); 1218*7c478bd9Sstevel@tonic-gate } 1219*7c478bd9Sstevel@tonic-gate 1220*7c478bd9Sstevel@tonic-gate wchar_t * 1221*7c478bd9Sstevel@tonic-gate emalloc(unsigned n) 1222*7c478bd9Sstevel@tonic-gate { 1223*7c478bd9Sstevel@tonic-gate wchar_t *cp; 1224*7c478bd9Sstevel@tonic-gate 1225*7c478bd9Sstevel@tonic-gate if ((cp = malloc(n)) == NULL) 1226*7c478bd9Sstevel@tonic-gate awkerr(nomem); 1227*7c478bd9Sstevel@tonic-gate return cp; 1228*7c478bd9Sstevel@tonic-gate } 1229*7c478bd9Sstevel@tonic-gate 1230*7c478bd9Sstevel@tonic-gate wchar_t * 1231*7c478bd9Sstevel@tonic-gate erealloc(wchar_t *p, unsigned n) 1232*7c478bd9Sstevel@tonic-gate { 1233*7c478bd9Sstevel@tonic-gate wchar_t *cp; 1234*7c478bd9Sstevel@tonic-gate 1235*7c478bd9Sstevel@tonic-gate if ((cp = realloc(p, n)) == NULL) 1236*7c478bd9Sstevel@tonic-gate awkerr(nomem); 1237*7c478bd9Sstevel@tonic-gate return cp; 1238*7c478bd9Sstevel@tonic-gate } 1239*7c478bd9Sstevel@tonic-gate 1240*7c478bd9Sstevel@tonic-gate 1241*7c478bd9Sstevel@tonic-gate /* 1242*7c478bd9Sstevel@tonic-gate * usage message for awk 1243*7c478bd9Sstevel@tonic-gate */ 1244*7c478bd9Sstevel@tonic-gate static int 1245*7c478bd9Sstevel@tonic-gate usage() 1246*7c478bd9Sstevel@tonic-gate { 1247*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, gettext( 1248*7c478bd9Sstevel@tonic-gate "Usage: awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n" 1249*7c478bd9Sstevel@tonic-gate " awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n")); 1250*7c478bd9Sstevel@tonic-gate return (2); 1251*7c478bd9Sstevel@tonic-gate } 1252*7c478bd9Sstevel@tonic-gate 1253*7c478bd9Sstevel@tonic-gate 1254*7c478bd9Sstevel@tonic-gate static wchar_t * 1255*7c478bd9Sstevel@tonic-gate mbconvert(char *str) 1256*7c478bd9Sstevel@tonic-gate { 1257*7c478bd9Sstevel@tonic-gate static wchar_t *op = 0; 1258*7c478bd9Sstevel@tonic-gate 1259*7c478bd9Sstevel@tonic-gate if (op != 0) 1260*7c478bd9Sstevel@tonic-gate free(op); 1261*7c478bd9Sstevel@tonic-gate return (op = mbstowcsdup(str)); 1262*7c478bd9Sstevel@tonic-gate } 1263*7c478bd9Sstevel@tonic-gate 1264*7c478bd9Sstevel@tonic-gate char * 1265*7c478bd9Sstevel@tonic-gate mbunconvert(wchar_t *str) 1266*7c478bd9Sstevel@tonic-gate { 1267*7c478bd9Sstevel@tonic-gate static char *op = 0; 1268*7c478bd9Sstevel@tonic-gate 1269*7c478bd9Sstevel@tonic-gate if (op != 0) 1270*7c478bd9Sstevel@tonic-gate free(op); 1271*7c478bd9Sstevel@tonic-gate return (op = wcstombsdup(str)); 1272*7c478bd9Sstevel@tonic-gate } 1273*7c478bd9Sstevel@tonic-gate 1274*7c478bd9Sstevel@tonic-gate /* 1275*7c478bd9Sstevel@tonic-gate * Solaris port - following functions are typical MKS functions written 1276*7c478bd9Sstevel@tonic-gate * to work for Solaris. 1277*7c478bd9Sstevel@tonic-gate */ 1278*7c478bd9Sstevel@tonic-gate 1279*7c478bd9Sstevel@tonic-gate wchar_t * 1280*7c478bd9Sstevel@tonic-gate mbstowcsdup(s) 1281*7c478bd9Sstevel@tonic-gate char *s; 1282*7c478bd9Sstevel@tonic-gate { 1283*7c478bd9Sstevel@tonic-gate int n; 1284*7c478bd9Sstevel@tonic-gate wchar_t *w; 1285*7c478bd9Sstevel@tonic-gate 1286*7c478bd9Sstevel@tonic-gate n = strlen(s) + 1; 1287*7c478bd9Sstevel@tonic-gate if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL) 1288*7c478bd9Sstevel@tonic-gate return (NULL); 1289*7c478bd9Sstevel@tonic-gate 1290*7c478bd9Sstevel@tonic-gate if (mbstowcs(w, s, n) == -1) 1291*7c478bd9Sstevel@tonic-gate return (NULL); 1292*7c478bd9Sstevel@tonic-gate return (w); 1293*7c478bd9Sstevel@tonic-gate 1294*7c478bd9Sstevel@tonic-gate } 1295*7c478bd9Sstevel@tonic-gate 1296*7c478bd9Sstevel@tonic-gate char * 1297*7c478bd9Sstevel@tonic-gate wcstombsdup(wchar_t *w) 1298*7c478bd9Sstevel@tonic-gate { 1299*7c478bd9Sstevel@tonic-gate int n; 1300*7c478bd9Sstevel@tonic-gate char *mb; 1301*7c478bd9Sstevel@tonic-gate 1302*7c478bd9Sstevel@tonic-gate /* Fetch memory for worst case string length */ 1303*7c478bd9Sstevel@tonic-gate n = wslen(w) + 1; 1304*7c478bd9Sstevel@tonic-gate n *= MB_CUR_MAX; 1305*7c478bd9Sstevel@tonic-gate if ((mb = (char *)malloc(n)) == NULL) { 1306*7c478bd9Sstevel@tonic-gate return (NULL); 1307*7c478bd9Sstevel@tonic-gate } 1308*7c478bd9Sstevel@tonic-gate 1309*7c478bd9Sstevel@tonic-gate /* Convert the string */ 1310*7c478bd9Sstevel@tonic-gate if ((n = wcstombs(mb, w, n)) == -1) { 1311*7c478bd9Sstevel@tonic-gate int saverr = errno; 1312*7c478bd9Sstevel@tonic-gate 1313*7c478bd9Sstevel@tonic-gate free(mb); 1314*7c478bd9Sstevel@tonic-gate errno = saverr; 1315*7c478bd9Sstevel@tonic-gate return (0); 1316*7c478bd9Sstevel@tonic-gate } 1317*7c478bd9Sstevel@tonic-gate 1318*7c478bd9Sstevel@tonic-gate /* Shrink the string down */ 1319*7c478bd9Sstevel@tonic-gate if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL) { 1320*7c478bd9Sstevel@tonic-gate return (NULL); 1321*7c478bd9Sstevel@tonic-gate } 1322*7c478bd9Sstevel@tonic-gate return (mb); 1323*7c478bd9Sstevel@tonic-gate } 1324*7c478bd9Sstevel@tonic-gate 1325*7c478bd9Sstevel@tonic-gate /* 1326*7c478bd9Sstevel@tonic-gate * The upe_ctrls[] table contains the printable 'control-sequences' for the 1327*7c478bd9Sstevel@tonic-gate * character values 0..31 and 127. The first entry is for value 127, thus the 1328*7c478bd9Sstevel@tonic-gate * entries for the remaining character values are from 1..32. 1329*7c478bd9Sstevel@tonic-gate */ 1330*7c478bd9Sstevel@tonic-gate static const char *const upe_ctrls[] = 1331*7c478bd9Sstevel@tonic-gate { 1332*7c478bd9Sstevel@tonic-gate "^?", 1333*7c478bd9Sstevel@tonic-gate "^@", "^A", "^B", "^C", "^D", "^E", "^F", "^G", 1334*7c478bd9Sstevel@tonic-gate "^H", "^I", "^J", "^K", "^L", "^M", "^N", "^O", 1335*7c478bd9Sstevel@tonic-gate "^P", "^Q", "^R", "^S", "^T", "^U", "^V", "^W", 1336*7c478bd9Sstevel@tonic-gate "^X", "^Y", "^Z", "^[", "^\\", "^]", "^^", "^_" 1337*7c478bd9Sstevel@tonic-gate }; 1338*7c478bd9Sstevel@tonic-gate 1339*7c478bd9Sstevel@tonic-gate 1340*7c478bd9Sstevel@tonic-gate /* 1341*7c478bd9Sstevel@tonic-gate * Return a printable string corresponding to the given character value. If 1342*7c478bd9Sstevel@tonic-gate * the character is printable, simply return it as the string. If it is in 1343*7c478bd9Sstevel@tonic-gate * the range specified by table 5-101 in the UPE, return the corresponding 1344*7c478bd9Sstevel@tonic-gate * string. Otherwise, return an octal escape sequence. 1345*7c478bd9Sstevel@tonic-gate */ 1346*7c478bd9Sstevel@tonic-gate static const char * 1347*7c478bd9Sstevel@tonic-gate toprint(c) 1348*7c478bd9Sstevel@tonic-gate wchar_t c; 1349*7c478bd9Sstevel@tonic-gate { 1350*7c478bd9Sstevel@tonic-gate int n, len; 1351*7c478bd9Sstevel@tonic-gate unsigned char *ptr; 1352*7c478bd9Sstevel@tonic-gate static char mbch[MB_LEN_MAX+1]; 1353*7c478bd9Sstevel@tonic-gate static char buf[5 * MB_LEN_MAX + 1]; 1354*7c478bd9Sstevel@tonic-gate 1355*7c478bd9Sstevel@tonic-gate if ((n = wctomb(mbch, c)) == -1) { 1356*7c478bd9Sstevel@tonic-gate /* Should never happen */ 1357*7c478bd9Sstevel@tonic-gate (void) sprintf(buf, "\\%x", c); 1358*7c478bd9Sstevel@tonic-gate return (buf); 1359*7c478bd9Sstevel@tonic-gate } 1360*7c478bd9Sstevel@tonic-gate mbch[n] = '\0'; 1361*7c478bd9Sstevel@tonic-gate if (iswprint(c)) { 1362*7c478bd9Sstevel@tonic-gate return (mbch); 1363*7c478bd9Sstevel@tonic-gate } else if (c == 127) { 1364*7c478bd9Sstevel@tonic-gate return (upe_ctrls[0]); 1365*7c478bd9Sstevel@tonic-gate } else if (c < 32) { 1366*7c478bd9Sstevel@tonic-gate /* Print as in Table 5-101 in the UPE */ 1367*7c478bd9Sstevel@tonic-gate return (upe_ctrls[c+1]); 1368*7c478bd9Sstevel@tonic-gate } else { 1369*7c478bd9Sstevel@tonic-gate /* Print as an octal escape sequence */ 1370*7c478bd9Sstevel@tonic-gate for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr) 1371*7c478bd9Sstevel@tonic-gate len += sprintf(buf+len, "\\%03o", *ptr); 1372*7c478bd9Sstevel@tonic-gate } 1373*7c478bd9Sstevel@tonic-gate return (buf); 1374*7c478bd9Sstevel@tonic-gate } 1375*7c478bd9Sstevel@tonic-gate 1376*7c478bd9Sstevel@tonic-gate static int 1377*7c478bd9Sstevel@tonic-gate wcoff(const wchar_t *astring, const int off) 1378*7c478bd9Sstevel@tonic-gate { 1379*7c478bd9Sstevel@tonic-gate const wchar_t *s = astring; 1380*7c478bd9Sstevel@tonic-gate int c = 0; 1381*7c478bd9Sstevel@tonic-gate char mb[MB_LEN_MAX]; 1382*7c478bd9Sstevel@tonic-gate 1383*7c478bd9Sstevel@tonic-gate while (c < off) { 1384*7c478bd9Sstevel@tonic-gate int n; 1385*7c478bd9Sstevel@tonic-gate if ((n = wctomb(mb, *s)) == 0) 1386*7c478bd9Sstevel@tonic-gate break; 1387*7c478bd9Sstevel@tonic-gate if (n == -1) 1388*7c478bd9Sstevel@tonic-gate n = 1; 1389*7c478bd9Sstevel@tonic-gate c += n; 1390*7c478bd9Sstevel@tonic-gate s++; 1391*7c478bd9Sstevel@tonic-gate } 1392*7c478bd9Sstevel@tonic-gate 1393*7c478bd9Sstevel@tonic-gate return (s - astring); 1394*7c478bd9Sstevel@tonic-gate } 1395*7c478bd9Sstevel@tonic-gate 1396*7c478bd9Sstevel@tonic-gate int 1397*7c478bd9Sstevel@tonic-gate int_regwcomp(register regex_t *r, const wchar_t *pattern, int uflags) 1398*7c478bd9Sstevel@tonic-gate { 1399*7c478bd9Sstevel@tonic-gate char *mbpattern; 1400*7c478bd9Sstevel@tonic-gate int ret; 1401*7c478bd9Sstevel@tonic-gate 1402*7c478bd9Sstevel@tonic-gate if ((mbpattern = wcstombsdup((wchar_t *) pattern)) == NULL) 1403*7c478bd9Sstevel@tonic-gate return (REG_ESPACE); 1404*7c478bd9Sstevel@tonic-gate 1405*7c478bd9Sstevel@tonic-gate ret = regcomp(r, mbpattern, uflags); 1406*7c478bd9Sstevel@tonic-gate 1407*7c478bd9Sstevel@tonic-gate free(mbpattern); 1408*7c478bd9Sstevel@tonic-gate 1409*7c478bd9Sstevel@tonic-gate return (ret); 1410*7c478bd9Sstevel@tonic-gate } 1411*7c478bd9Sstevel@tonic-gate 1412*7c478bd9Sstevel@tonic-gate int 1413*7c478bd9Sstevel@tonic-gate int_regwexec(const regex_t *r, /* compiled RE */ 1414*7c478bd9Sstevel@tonic-gate const wchar_t *astring, /* subject string */ 1415*7c478bd9Sstevel@tonic-gate size_t nsub, /* number of subexpressions */ 1416*7c478bd9Sstevel@tonic-gate int_regwmatch_t *sub, /* subexpression pointers */ 1417*7c478bd9Sstevel@tonic-gate int flags) 1418*7c478bd9Sstevel@tonic-gate { 1419*7c478bd9Sstevel@tonic-gate char *mbs; 1420*7c478bd9Sstevel@tonic-gate regmatch_t *mbsub = NULL; 1421*7c478bd9Sstevel@tonic-gate register int i; 1422*7c478bd9Sstevel@tonic-gate 1423*7c478bd9Sstevel@tonic-gate if ((mbs = wcstombsdup((wchar_t *) astring)) == NULL) 1424*7c478bd9Sstevel@tonic-gate return (REG_ESPACE); 1425*7c478bd9Sstevel@tonic-gate 1426*7c478bd9Sstevel@tonic-gate if (nsub > 0 && sub) { 1427*7c478bd9Sstevel@tonic-gate if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL) 1428*7c478bd9Sstevel@tonic-gate return (REG_ESPACE); 1429*7c478bd9Sstevel@tonic-gate } 1430*7c478bd9Sstevel@tonic-gate 1431*7c478bd9Sstevel@tonic-gate i = regexec(r, mbs, nsub, mbsub, flags); 1432*7c478bd9Sstevel@tonic-gate 1433*7c478bd9Sstevel@tonic-gate /* Now, adjust the pointers/counts in sub */ 1434*7c478bd9Sstevel@tonic-gate if (i == REG_OK && nsub > 0 && mbsub) { 1435*7c478bd9Sstevel@tonic-gate register int j, k; 1436*7c478bd9Sstevel@tonic-gate 1437*7c478bd9Sstevel@tonic-gate for (j = 0; j < nsub; j++) { 1438*7c478bd9Sstevel@tonic-gate regmatch_t *ms = &mbsub[j]; 1439*7c478bd9Sstevel@tonic-gate int_regwmatch_t *ws = &sub[j]; 1440*7c478bd9Sstevel@tonic-gate 1441*7c478bd9Sstevel@tonic-gate if ((k = ms->rm_so) >= 0) { 1442*7c478bd9Sstevel@tonic-gate ws->rm_so = wcoff(astring, k); 1443*7c478bd9Sstevel@tonic-gate ws->rm_sp = astring + ws->rm_so; 1444*7c478bd9Sstevel@tonic-gate } 1445*7c478bd9Sstevel@tonic-gate if ((k = ms->rm_eo) >= 0) { 1446*7c478bd9Sstevel@tonic-gate ws->rm_eo = wcoff(astring, k); 1447*7c478bd9Sstevel@tonic-gate ws->rm_ep = astring + ws->rm_eo; 1448*7c478bd9Sstevel@tonic-gate } 1449*7c478bd9Sstevel@tonic-gate } 1450*7c478bd9Sstevel@tonic-gate } 1451*7c478bd9Sstevel@tonic-gate 1452*7c478bd9Sstevel@tonic-gate free(mbs); 1453*7c478bd9Sstevel@tonic-gate if (mbsub) 1454*7c478bd9Sstevel@tonic-gate free(mbsub); 1455*7c478bd9Sstevel@tonic-gate return (i); 1456*7c478bd9Sstevel@tonic-gate } 1457*7c478bd9Sstevel@tonic-gate 1458*7c478bd9Sstevel@tonic-gate int 1459*7c478bd9Sstevel@tonic-gate int_regwdosuba(register regex_t *rp, /* compiled RE: Pattern */ 1460*7c478bd9Sstevel@tonic-gate const wchar_t *rpl, /* replacement string: /rpl/ */ 1461*7c478bd9Sstevel@tonic-gate const wchar_t *src, /* source string */ 1462*7c478bd9Sstevel@tonic-gate wchar_t **dstp, /* destination string */ 1463*7c478bd9Sstevel@tonic-gate int len, /* destination length */ 1464*7c478bd9Sstevel@tonic-gate int *globp) /* IN: occurence, 0 for all; OUT: substitutions */ 1465*7c478bd9Sstevel@tonic-gate { 1466*7c478bd9Sstevel@tonic-gate wchar_t *dst, *odst; 1467*7c478bd9Sstevel@tonic-gate register const wchar_t *ip, *xp; 1468*7c478bd9Sstevel@tonic-gate register wchar_t *op; 1469*7c478bd9Sstevel@tonic-gate register int i; 1470*7c478bd9Sstevel@tonic-gate register wchar_t c; 1471*7c478bd9Sstevel@tonic-gate int glob, iglob = *globp, oglob = 0; 1472*7c478bd9Sstevel@tonic-gate #define NSUB 10 1473*7c478bd9Sstevel@tonic-gate int_regwmatch_t rm[NSUB], *rmp; 1474*7c478bd9Sstevel@tonic-gate int flags; 1475*7c478bd9Sstevel@tonic-gate wchar_t *end; 1476*7c478bd9Sstevel@tonic-gate int regerr; 1477*7c478bd9Sstevel@tonic-gate 1478*7c478bd9Sstevel@tonic-gate /* handle overflow of dst. we need "i" more bytes */ 1479*7c478bd9Sstevel@tonic-gate #ifdef OVERFLOW 1480*7c478bd9Sstevel@tonic-gate #undef OVERFLOW 1481*7c478bd9Sstevel@tonic-gate #define OVERFLOW(i) if (1) { \ 1482*7c478bd9Sstevel@tonic-gate int pos = op - dst; \ 1483*7c478bd9Sstevel@tonic-gate dst = (wchar_t *) realloc(odst = dst, \ 1484*7c478bd9Sstevel@tonic-gate (len += len + i) * sizeof (wchar_t)); \ 1485*7c478bd9Sstevel@tonic-gate if (dst == NULL) \ 1486*7c478bd9Sstevel@tonic-gate goto nospace; \ 1487*7c478bd9Sstevel@tonic-gate op = dst + pos; \ 1488*7c478bd9Sstevel@tonic-gate end = dst + len; \ 1489*7c478bd9Sstevel@tonic-gate } else 1490*7c478bd9Sstevel@tonic-gate #endif 1491*7c478bd9Sstevel@tonic-gate 1492*7c478bd9Sstevel@tonic-gate *dstp = dst = (wchar_t *) malloc(len * sizeof (wchar_t)); 1493*7c478bd9Sstevel@tonic-gate if (dst == NULL) 1494*7c478bd9Sstevel@tonic-gate return (REG_ESPACE); 1495*7c478bd9Sstevel@tonic-gate 1496*7c478bd9Sstevel@tonic-gate if (rp == NULL || rpl == NULL || src == NULL || dst == NULL) 1497*7c478bd9Sstevel@tonic-gate return (REG_EFATAL); 1498*7c478bd9Sstevel@tonic-gate 1499*7c478bd9Sstevel@tonic-gate glob = 0; /* match count */ 1500*7c478bd9Sstevel@tonic-gate ip = src; /* source position */ 1501*7c478bd9Sstevel@tonic-gate op = dst; /* destination position */ 1502*7c478bd9Sstevel@tonic-gate end = dst + len; 1503*7c478bd9Sstevel@tonic-gate 1504*7c478bd9Sstevel@tonic-gate flags = 0; 1505*7c478bd9Sstevel@tonic-gate while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) { 1506*7c478bd9Sstevel@tonic-gate /* Copy text preceding match */ 1507*7c478bd9Sstevel@tonic-gate if (op + (i = rm[0].rm_sp - ip) >= end) 1508*7c478bd9Sstevel@tonic-gate OVERFLOW(i); 1509*7c478bd9Sstevel@tonic-gate while (i--) 1510*7c478bd9Sstevel@tonic-gate *op++ = *ip++; 1511*7c478bd9Sstevel@tonic-gate 1512*7c478bd9Sstevel@tonic-gate if (iglob == 0 || ++glob == iglob) { 1513*7c478bd9Sstevel@tonic-gate oglob++; 1514*7c478bd9Sstevel@tonic-gate xp = rpl; /* do substitute */ 1515*7c478bd9Sstevel@tonic-gate } else 1516*7c478bd9Sstevel@tonic-gate xp = L"&"; /* preserve text */ 1517*7c478bd9Sstevel@tonic-gate 1518*7c478bd9Sstevel@tonic-gate /* Perform replacement of matched substing */ 1519*7c478bd9Sstevel@tonic-gate while ((c = *xp++) != '\0') { 1520*7c478bd9Sstevel@tonic-gate rmp = NULL; 1521*7c478bd9Sstevel@tonic-gate if (c == '&') 1522*7c478bd9Sstevel@tonic-gate rmp = &rm[0]; 1523*7c478bd9Sstevel@tonic-gate else if (c == '\\') { 1524*7c478bd9Sstevel@tonic-gate if ('0' <= *xp && *xp <= '9') 1525*7c478bd9Sstevel@tonic-gate rmp = &rm[*xp++ - '0']; 1526*7c478bd9Sstevel@tonic-gate else if (*xp != '\0') 1527*7c478bd9Sstevel@tonic-gate c = *xp++; 1528*7c478bd9Sstevel@tonic-gate } 1529*7c478bd9Sstevel@tonic-gate 1530*7c478bd9Sstevel@tonic-gate if (rmp == NULL) { /* Ordinary character. */ 1531*7c478bd9Sstevel@tonic-gate *op++ = c; 1532*7c478bd9Sstevel@tonic-gate if (op >= end) 1533*7c478bd9Sstevel@tonic-gate OVERFLOW(1); 1534*7c478bd9Sstevel@tonic-gate } else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) { 1535*7c478bd9Sstevel@tonic-gate ip = rmp->rm_sp; 1536*7c478bd9Sstevel@tonic-gate if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end) 1537*7c478bd9Sstevel@tonic-gate OVERFLOW(i); 1538*7c478bd9Sstevel@tonic-gate while (i--) 1539*7c478bd9Sstevel@tonic-gate *op++ = *ip++; 1540*7c478bd9Sstevel@tonic-gate } 1541*7c478bd9Sstevel@tonic-gate } 1542*7c478bd9Sstevel@tonic-gate 1543*7c478bd9Sstevel@tonic-gate ip = rm[0].rm_ep; 1544*7c478bd9Sstevel@tonic-gate if (*ip == '\0') /* If at end break */ 1545*7c478bd9Sstevel@tonic-gate break; 1546*7c478bd9Sstevel@tonic-gate else if (rm[0].rm_sp == rm[0].rm_ep) { 1547*7c478bd9Sstevel@tonic-gate /* If empty match copy next char */ 1548*7c478bd9Sstevel@tonic-gate *op++ = *ip++; 1549*7c478bd9Sstevel@tonic-gate if (op >= end) 1550*7c478bd9Sstevel@tonic-gate OVERFLOW(1); 1551*7c478bd9Sstevel@tonic-gate } 1552*7c478bd9Sstevel@tonic-gate flags = REG_NOTBOL; 1553*7c478bd9Sstevel@tonic-gate } 1554*7c478bd9Sstevel@tonic-gate 1555*7c478bd9Sstevel@tonic-gate if (regerr != REG_OK && regerr != REG_NOMATCH) 1556*7c478bd9Sstevel@tonic-gate return (regerr); 1557*7c478bd9Sstevel@tonic-gate 1558*7c478bd9Sstevel@tonic-gate /* Copy rest of text */ 1559*7c478bd9Sstevel@tonic-gate if (op + (i = wcslen(ip)) >= end) 1560*7c478bd9Sstevel@tonic-gate OVERFLOW(i); 1561*7c478bd9Sstevel@tonic-gate while (i--) 1562*7c478bd9Sstevel@tonic-gate *op++ = *ip++; 1563*7c478bd9Sstevel@tonic-gate *op++ = '\0'; 1564*7c478bd9Sstevel@tonic-gate 1565*7c478bd9Sstevel@tonic-gate if ((*dstp = dst = (wchar_t *) realloc(odst = dst, 1566*7c478bd9Sstevel@tonic-gate sizeof (wchar_t) * (size_t)(op - dst))) == NULL) { 1567*7c478bd9Sstevel@tonic-gate nospace: 1568*7c478bd9Sstevel@tonic-gate free(odst); 1569*7c478bd9Sstevel@tonic-gate return (REG_ESPACE); 1570*7c478bd9Sstevel@tonic-gate } 1571*7c478bd9Sstevel@tonic-gate 1572*7c478bd9Sstevel@tonic-gate *globp = oglob; 1573*7c478bd9Sstevel@tonic-gate 1574*7c478bd9Sstevel@tonic-gate return ((oglob == 0) ? REG_NOMATCH : REG_OK); 1575*7c478bd9Sstevel@tonic-gate } 1576