1*84441f85SGarrett D'Amore /* 2*84441f85SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3*84441f85SGarrett D'Amore * Copyright (c) 1992 Diomidis Spinellis. 4*84441f85SGarrett D'Amore * Copyright (c) 1992, 1993 5*84441f85SGarrett D'Amore * The Regents of the University of California. All rights reserved. 6*84441f85SGarrett D'Amore * 7*84441f85SGarrett D'Amore * This code is derived from software contributed to Berkeley by 8*84441f85SGarrett D'Amore * Diomidis Spinellis of Imperial College, University of London. 9*84441f85SGarrett D'Amore * 10*84441f85SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 11*84441f85SGarrett D'Amore * modification, are permitted provided that the following conditions 12*84441f85SGarrett D'Amore * are met: 13*84441f85SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 14*84441f85SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 15*84441f85SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 16*84441f85SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 17*84441f85SGarrett D'Amore * documentation and/or other materials provided with the distribution. 18*84441f85SGarrett D'Amore * 4. Neither the name of the University nor the names of its contributors 19*84441f85SGarrett D'Amore * may be used to endorse or promote products derived from this software 20*84441f85SGarrett D'Amore * without specific prior written permission. 21*84441f85SGarrett D'Amore * 22*84441f85SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23*84441f85SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24*84441f85SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25*84441f85SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26*84441f85SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27*84441f85SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28*84441f85SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29*84441f85SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30*84441f85SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31*84441f85SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32*84441f85SGarrett D'Amore * SUCH DAMAGE. 33*84441f85SGarrett D'Amore */ 34*84441f85SGarrett D'Amore 35*84441f85SGarrett D'Amore #include <sys/types.h> 36*84441f85SGarrett D'Amore #include <sys/stat.h> 37*84441f85SGarrett D'Amore 38*84441f85SGarrett D'Amore #include <ctype.h> 39*84441f85SGarrett D'Amore #include <err.h> 40*84441f85SGarrett D'Amore #include <errno.h> 41*84441f85SGarrett D'Amore #include <fcntl.h> 42*84441f85SGarrett D'Amore #include <limits.h> 43*84441f85SGarrett D'Amore #include <regex.h> 44*84441f85SGarrett D'Amore #include <stdio.h> 45*84441f85SGarrett D'Amore #include <stdlib.h> 46*84441f85SGarrett D'Amore #include <string.h> 47*84441f85SGarrett D'Amore #include <wchar.h> 48*84441f85SGarrett D'Amore #include <libintl.h> 49*84441f85SGarrett D'Amore #include <note.h> 50*84441f85SGarrett D'Amore 51*84441f85SGarrett D'Amore #include "defs.h" 52*84441f85SGarrett D'Amore #include "extern.h" 53*84441f85SGarrett D'Amore 54*84441f85SGarrett D'Amore #define LHSZ 128 55*84441f85SGarrett D'Amore #define LHMASK (LHSZ - 1) 56*84441f85SGarrett D'Amore static struct labhash { 57*84441f85SGarrett D'Amore struct labhash *lh_next; 58*84441f85SGarrett D'Amore uint_t lh_hash; 59*84441f85SGarrett D'Amore struct s_command *lh_cmd; 60*84441f85SGarrett D'Amore int lh_ref; 61*84441f85SGarrett D'Amore } *labels[LHSZ]; 62*84441f85SGarrett D'Amore 63*84441f85SGarrett D'Amore static char *compile_addr(char *, struct s_addr *); 64*84441f85SGarrett D'Amore static char *compile_ccl(char **, char *); 65*84441f85SGarrett D'Amore static char *compile_delimited(char *, char *, int); 66*84441f85SGarrett D'Amore static char *compile_flags(char *, struct s_subst *); 67*84441f85SGarrett D'Amore static regex_t *compile_re(char *, int); 68*84441f85SGarrett D'Amore static char *compile_subst(char *, struct s_subst *); 69*84441f85SGarrett D'Amore static char *compile_text(void); 70*84441f85SGarrett D'Amore static char *compile_tr(char *, struct s_tr **); 71*84441f85SGarrett D'Amore static struct s_command 72*84441f85SGarrett D'Amore **compile_stream(struct s_command **); 73*84441f85SGarrett D'Amore static char *duptoeol(char *, const char *); 74*84441f85SGarrett D'Amore static void enterlabel(struct s_command *); 75*84441f85SGarrett D'Amore static struct s_command 76*84441f85SGarrett D'Amore *findlabel(char *); 77*84441f85SGarrett D'Amore static void fixuplabel(struct s_command *, struct s_command *); 78*84441f85SGarrett D'Amore static void uselabel(void); 79*84441f85SGarrett D'Amore 80*84441f85SGarrett D'Amore /* 81*84441f85SGarrett D'Amore * Command specification. This is used to drive the command parser. 82*84441f85SGarrett D'Amore */ 83*84441f85SGarrett D'Amore struct s_format { 84*84441f85SGarrett D'Amore char code; /* Command code */ 85*84441f85SGarrett D'Amore int naddr; /* Number of address args */ 86*84441f85SGarrett D'Amore enum e_args args; /* Argument type */ 87*84441f85SGarrett D'Amore }; 88*84441f85SGarrett D'Amore 89*84441f85SGarrett D'Amore static struct s_format cmd_fmts[] = { 90*84441f85SGarrett D'Amore {'{', 2, GROUP}, 91*84441f85SGarrett D'Amore {'}', 0, ENDGROUP}, 92*84441f85SGarrett D'Amore {'a', 1, TEXT}, 93*84441f85SGarrett D'Amore {'b', 2, BRANCH}, 94*84441f85SGarrett D'Amore {'c', 2, TEXT}, 95*84441f85SGarrett D'Amore {'d', 2, EMPTY}, 96*84441f85SGarrett D'Amore {'D', 2, EMPTY}, 97*84441f85SGarrett D'Amore {'g', 2, EMPTY}, 98*84441f85SGarrett D'Amore {'G', 2, EMPTY}, 99*84441f85SGarrett D'Amore {'h', 2, EMPTY}, 100*84441f85SGarrett D'Amore {'H', 2, EMPTY}, 101*84441f85SGarrett D'Amore {'i', 1, TEXT}, 102*84441f85SGarrett D'Amore {'l', 2, EMPTY}, 103*84441f85SGarrett D'Amore {'n', 2, EMPTY}, 104*84441f85SGarrett D'Amore {'N', 2, EMPTY}, 105*84441f85SGarrett D'Amore {'p', 2, EMPTY}, 106*84441f85SGarrett D'Amore {'P', 2, EMPTY}, 107*84441f85SGarrett D'Amore {'q', 1, EMPTY}, 108*84441f85SGarrett D'Amore {'r', 1, RFILE}, 109*84441f85SGarrett D'Amore {'s', 2, SUBST}, 110*84441f85SGarrett D'Amore {'t', 2, BRANCH}, 111*84441f85SGarrett D'Amore {'w', 2, WFILE}, 112*84441f85SGarrett D'Amore {'x', 2, EMPTY}, 113*84441f85SGarrett D'Amore {'y', 2, TR}, 114*84441f85SGarrett D'Amore {'!', 2, NONSEL}, 115*84441f85SGarrett D'Amore {':', 0, LABEL}, 116*84441f85SGarrett D'Amore {'#', 0, COMMENT}, 117*84441f85SGarrett D'Amore {'=', 1, EMPTY}, 118*84441f85SGarrett D'Amore {'\0', 0, COMMENT}, 119*84441f85SGarrett D'Amore }; 120*84441f85SGarrett D'Amore 121*84441f85SGarrett D'Amore /* The compiled program. */ 122*84441f85SGarrett D'Amore struct s_command *prog; 123*84441f85SGarrett D'Amore 124*84441f85SGarrett D'Amore /* 125*84441f85SGarrett D'Amore * Compile the program into prog. 126*84441f85SGarrett D'Amore * Initialise appends. 127*84441f85SGarrett D'Amore */ 128*84441f85SGarrett D'Amore void 129*84441f85SGarrett D'Amore compile(void) 130*84441f85SGarrett D'Amore { 131*84441f85SGarrett D'Amore *compile_stream(&prog) = NULL; 132*84441f85SGarrett D'Amore fixuplabel(prog, NULL); 133*84441f85SGarrett D'Amore uselabel(); 134*84441f85SGarrett D'Amore if (appendnum == 0) 135*84441f85SGarrett D'Amore appends = NULL; 136*84441f85SGarrett D'Amore else if ((appends = malloc(sizeof (struct s_appends) * appendnum)) == 137*84441f85SGarrett D'Amore NULL) 138*84441f85SGarrett D'Amore err(1, "malloc"); 139*84441f85SGarrett D'Amore if ((match = malloc((maxnsub + 1) * sizeof (regmatch_t))) == NULL) 140*84441f85SGarrett D'Amore err(1, "malloc"); 141*84441f85SGarrett D'Amore } 142*84441f85SGarrett D'Amore 143*84441f85SGarrett D'Amore #define EATSPACE() do { \ 144*84441f85SGarrett D'Amore if (p) \ 145*84441f85SGarrett D'Amore while (*p && isspace((unsigned char)*p)) \ 146*84441f85SGarrett D'Amore p++; \ 147*84441f85SGarrett D'Amore _NOTE(CONSTCOND) \ 148*84441f85SGarrett D'Amore } while (0) 149*84441f85SGarrett D'Amore 150*84441f85SGarrett D'Amore static struct s_command ** 151*84441f85SGarrett D'Amore compile_stream(struct s_command **link) 152*84441f85SGarrett D'Amore { 153*84441f85SGarrett D'Amore char *p; 154*84441f85SGarrett D'Amore static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ 155*84441f85SGarrett D'Amore struct s_command *cmd, *cmd2, *stack; 156*84441f85SGarrett D'Amore struct s_format *fp; 157*84441f85SGarrett D'Amore char re[_POSIX2_LINE_MAX + 1]; 158*84441f85SGarrett D'Amore int naddr; /* Number of addresses */ 159*84441f85SGarrett D'Amore 160*84441f85SGarrett D'Amore stack = 0; 161*84441f85SGarrett D'Amore for (;;) { 162*84441f85SGarrett D'Amore if ((p = cu_fgets(lbuf, sizeof (lbuf), NULL)) == NULL) { 163*84441f85SGarrett D'Amore if (stack != 0) 164*84441f85SGarrett D'Amore fatal(_("unexpected EOF (pending }'s)")); 165*84441f85SGarrett D'Amore return (link); 166*84441f85SGarrett D'Amore } 167*84441f85SGarrett D'Amore 168*84441f85SGarrett D'Amore semicolon: EATSPACE(); 169*84441f85SGarrett D'Amore if (p) { 170*84441f85SGarrett D'Amore if (*p == '#' || *p == '\0') 171*84441f85SGarrett D'Amore continue; 172*84441f85SGarrett D'Amore else if (*p == ';') { 173*84441f85SGarrett D'Amore p++; 174*84441f85SGarrett D'Amore goto semicolon; 175*84441f85SGarrett D'Amore } 176*84441f85SGarrett D'Amore } 177*84441f85SGarrett D'Amore if ((*link = cmd = malloc(sizeof (struct s_command))) == NULL) 178*84441f85SGarrett D'Amore err(1, "malloc"); 179*84441f85SGarrett D'Amore link = &cmd->next; 180*84441f85SGarrett D'Amore cmd->startline = cmd->nonsel = 0; 181*84441f85SGarrett D'Amore /* First parse the addresses */ 182*84441f85SGarrett D'Amore naddr = 0; 183*84441f85SGarrett D'Amore 184*84441f85SGarrett D'Amore /* Valid characters to start an address */ 185*84441f85SGarrett D'Amore #define addrchar(c) (strchr("0123456789/\\$", (c))) 186*84441f85SGarrett D'Amore if (addrchar(*p)) { 187*84441f85SGarrett D'Amore naddr++; 188*84441f85SGarrett D'Amore if ((cmd->a1 = malloc(sizeof (struct s_addr))) == NULL) 189*84441f85SGarrett D'Amore err(1, "malloc"); 190*84441f85SGarrett D'Amore p = compile_addr(p, cmd->a1); 191*84441f85SGarrett D'Amore EATSPACE(); /* EXTENSION */ 192*84441f85SGarrett D'Amore if (*p == ',') { 193*84441f85SGarrett D'Amore p++; 194*84441f85SGarrett D'Amore EATSPACE(); /* EXTENSION */ 195*84441f85SGarrett D'Amore naddr++; 196*84441f85SGarrett D'Amore if ((cmd->a2 = malloc(sizeof (struct s_addr))) 197*84441f85SGarrett D'Amore == NULL) 198*84441f85SGarrett D'Amore err(1, "malloc"); 199*84441f85SGarrett D'Amore p = compile_addr(p, cmd->a2); 200*84441f85SGarrett D'Amore EATSPACE(); 201*84441f85SGarrett D'Amore } else 202*84441f85SGarrett D'Amore cmd->a2 = 0; 203*84441f85SGarrett D'Amore } else 204*84441f85SGarrett D'Amore cmd->a1 = cmd->a2 = 0; 205*84441f85SGarrett D'Amore 206*84441f85SGarrett D'Amore nonsel: /* Now parse the command */ 207*84441f85SGarrett D'Amore if (!*p) 208*84441f85SGarrett D'Amore fatal(_("command expected")); 209*84441f85SGarrett D'Amore cmd->code = *p; 210*84441f85SGarrett D'Amore for (fp = cmd_fmts; fp->code; fp++) 211*84441f85SGarrett D'Amore if (fp->code == *p) 212*84441f85SGarrett D'Amore break; 213*84441f85SGarrett D'Amore if (!fp->code) 214*84441f85SGarrett D'Amore fatal(_("invalid command code %c"), *p); 215*84441f85SGarrett D'Amore if (naddr > fp->naddr) 216*84441f85SGarrett D'Amore fatal(_("command %c expects up to %d address(es), " 217*84441f85SGarrett D'Amore "found %d"), *p, fp->naddr, naddr); 218*84441f85SGarrett D'Amore switch (fp->args) { 219*84441f85SGarrett D'Amore case NONSEL: /* ! */ 220*84441f85SGarrett D'Amore p++; 221*84441f85SGarrett D'Amore EATSPACE(); 222*84441f85SGarrett D'Amore cmd->nonsel = 1; 223*84441f85SGarrett D'Amore goto nonsel; 224*84441f85SGarrett D'Amore case GROUP: /* { */ 225*84441f85SGarrett D'Amore p++; 226*84441f85SGarrett D'Amore EATSPACE(); 227*84441f85SGarrett D'Amore cmd->next = stack; 228*84441f85SGarrett D'Amore stack = cmd; 229*84441f85SGarrett D'Amore link = &cmd->u.c; 230*84441f85SGarrett D'Amore if (*p) 231*84441f85SGarrett D'Amore goto semicolon; 232*84441f85SGarrett D'Amore break; 233*84441f85SGarrett D'Amore case ENDGROUP: 234*84441f85SGarrett D'Amore /* 235*84441f85SGarrett D'Amore * Short-circuit command processing, since end of 236*84441f85SGarrett D'Amore * group is really just a noop. 237*84441f85SGarrett D'Amore */ 238*84441f85SGarrett D'Amore cmd->nonsel = 1; 239*84441f85SGarrett D'Amore if (stack == 0) 240*84441f85SGarrett D'Amore fatal(_("unexpected }")); 241*84441f85SGarrett D'Amore cmd2 = stack; 242*84441f85SGarrett D'Amore stack = cmd2->next; 243*84441f85SGarrett D'Amore cmd2->next = cmd; 244*84441f85SGarrett D'Amore /*FALLTHROUGH*/ 245*84441f85SGarrett D'Amore case EMPTY: /* d D g G h H l n N p P q x = \0 */ 246*84441f85SGarrett D'Amore p++; 247*84441f85SGarrett D'Amore EATSPACE(); 248*84441f85SGarrett D'Amore if (*p == ';') { 249*84441f85SGarrett D'Amore p++; 250*84441f85SGarrett D'Amore link = &cmd->next; 251*84441f85SGarrett D'Amore goto semicolon; 252*84441f85SGarrett D'Amore } 253*84441f85SGarrett D'Amore if (*p) 254*84441f85SGarrett D'Amore fatal(_("extra characters at the end of %c " 255*84441f85SGarrett D'Amore "command"), cmd->code); 256*84441f85SGarrett D'Amore break; 257*84441f85SGarrett D'Amore case TEXT: /* a c i */ 258*84441f85SGarrett D'Amore p++; 259*84441f85SGarrett D'Amore EATSPACE(); 260*84441f85SGarrett D'Amore if (*p != '\\') 261*84441f85SGarrett D'Amore fatal(_("command %c expects \\ " 262*84441f85SGarrett D'Amore "followed by text"), cmd->code); 263*84441f85SGarrett D'Amore p++; 264*84441f85SGarrett D'Amore EATSPACE(); 265*84441f85SGarrett D'Amore if (*p) 266*84441f85SGarrett D'Amore fatal(_("extra characters after \\ " 267*84441f85SGarrett D'Amore "at the end of %c command"), 268*84441f85SGarrett D'Amore cmd->code); 269*84441f85SGarrett D'Amore cmd->t = compile_text(); 270*84441f85SGarrett D'Amore break; 271*84441f85SGarrett D'Amore case COMMENT: /* \0 # */ 272*84441f85SGarrett D'Amore break; 273*84441f85SGarrett D'Amore case WFILE: /* w */ 274*84441f85SGarrett D'Amore p++; 275*84441f85SGarrett D'Amore EATSPACE(); 276*84441f85SGarrett D'Amore if (*p == '\0') 277*84441f85SGarrett D'Amore fatal(_("filename expected")); 278*84441f85SGarrett D'Amore cmd->t = duptoeol(p, "w command"); 279*84441f85SGarrett D'Amore if (aflag) 280*84441f85SGarrett D'Amore cmd->u.fd = -1; 281*84441f85SGarrett D'Amore else if ((cmd->u.fd = open(p, 282*84441f85SGarrett D'Amore O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1) 283*84441f85SGarrett D'Amore err(1, "%s", p); 284*84441f85SGarrett D'Amore break; 285*84441f85SGarrett D'Amore case RFILE: /* r */ 286*84441f85SGarrett D'Amore p++; 287*84441f85SGarrett D'Amore EATSPACE(); 288*84441f85SGarrett D'Amore if (*p == '\0') 289*84441f85SGarrett D'Amore fatal(_("filename expected")); 290*84441f85SGarrett D'Amore else 291*84441f85SGarrett D'Amore cmd->t = duptoeol(p, "read command"); 292*84441f85SGarrett D'Amore break; 293*84441f85SGarrett D'Amore case BRANCH: /* b t */ 294*84441f85SGarrett D'Amore p++; 295*84441f85SGarrett D'Amore EATSPACE(); 296*84441f85SGarrett D'Amore if (*p == '\0') 297*84441f85SGarrett D'Amore cmd->t = NULL; 298*84441f85SGarrett D'Amore else 299*84441f85SGarrett D'Amore cmd->t = duptoeol(p, "branch"); 300*84441f85SGarrett D'Amore break; 301*84441f85SGarrett D'Amore case LABEL: /* : */ 302*84441f85SGarrett D'Amore p++; 303*84441f85SGarrett D'Amore EATSPACE(); 304*84441f85SGarrett D'Amore cmd->t = duptoeol(p, "label"); 305*84441f85SGarrett D'Amore if (strlen(p) == 0) 306*84441f85SGarrett D'Amore fatal(_("empty label")); 307*84441f85SGarrett D'Amore enterlabel(cmd); 308*84441f85SGarrett D'Amore break; 309*84441f85SGarrett D'Amore case SUBST: /* s */ 310*84441f85SGarrett D'Amore p++; 311*84441f85SGarrett D'Amore if (*p == '\0' || *p == '\\') 312*84441f85SGarrett D'Amore fatal(_("substitute pattern can not " 313*84441f85SGarrett D'Amore "be delimited by newline or backslash")); 314*84441f85SGarrett D'Amore if ((cmd->u.s = calloc(1, sizeof (struct s_subst))) == 315*84441f85SGarrett D'Amore NULL) 316*84441f85SGarrett D'Amore err(1, "malloc"); 317*84441f85SGarrett D'Amore p = compile_delimited(p, re, 0); 318*84441f85SGarrett D'Amore if (p == NULL) 319*84441f85SGarrett D'Amore fatal(_("unterminated substitute pattern")); 320*84441f85SGarrett D'Amore 321*84441f85SGarrett D'Amore /* Compile RE with no case sensitivity temporarily */ 322*84441f85SGarrett D'Amore if (*re == '\0') 323*84441f85SGarrett D'Amore cmd->u.s->re = NULL; 324*84441f85SGarrett D'Amore else 325*84441f85SGarrett D'Amore cmd->u.s->re = compile_re(re, 0); 326*84441f85SGarrett D'Amore --p; 327*84441f85SGarrett D'Amore p = compile_subst(p, cmd->u.s); 328*84441f85SGarrett D'Amore p = compile_flags(p, cmd->u.s); 329*84441f85SGarrett D'Amore 330*84441f85SGarrett D'Amore /* Recompile RE with case sens. from "I" flag if any */ 331*84441f85SGarrett D'Amore if (*re == '\0') 332*84441f85SGarrett D'Amore cmd->u.s->re = NULL; 333*84441f85SGarrett D'Amore else 334*84441f85SGarrett D'Amore cmd->u.s->re = compile_re(re, cmd->u.s->icase); 335*84441f85SGarrett D'Amore EATSPACE(); 336*84441f85SGarrett D'Amore if (*p == ';') { 337*84441f85SGarrett D'Amore p++; 338*84441f85SGarrett D'Amore link = &cmd->next; 339*84441f85SGarrett D'Amore goto semicolon; 340*84441f85SGarrett D'Amore } 341*84441f85SGarrett D'Amore break; 342*84441f85SGarrett D'Amore case TR: /* y */ 343*84441f85SGarrett D'Amore p++; 344*84441f85SGarrett D'Amore p = compile_tr(p, &cmd->u.y); 345*84441f85SGarrett D'Amore EATSPACE(); 346*84441f85SGarrett D'Amore if (*p == ';') { 347*84441f85SGarrett D'Amore p++; 348*84441f85SGarrett D'Amore link = &cmd->next; 349*84441f85SGarrett D'Amore goto semicolon; 350*84441f85SGarrett D'Amore } 351*84441f85SGarrett D'Amore if (*p) 352*84441f85SGarrett D'Amore fatal(_("extra text at the end of a " 353*84441f85SGarrett D'Amore "transform command")); 354*84441f85SGarrett D'Amore break; 355*84441f85SGarrett D'Amore } 356*84441f85SGarrett D'Amore } 357*84441f85SGarrett D'Amore } 358*84441f85SGarrett D'Amore 359*84441f85SGarrett D'Amore /* 360*84441f85SGarrett D'Amore * Get a delimited string. P points to the delimeter of the string; d points 361*84441f85SGarrett D'Amore * to a buffer area. Newline and delimiter escapes are processed; other 362*84441f85SGarrett D'Amore * escapes are ignored. 363*84441f85SGarrett D'Amore * 364*84441f85SGarrett D'Amore * Returns a pointer to the first character after the final delimiter or NULL 365*84441f85SGarrett D'Amore * in the case of a non-terminated string. The character array d is filled 366*84441f85SGarrett D'Amore * with the processed string. 367*84441f85SGarrett D'Amore */ 368*84441f85SGarrett D'Amore static char * 369*84441f85SGarrett D'Amore compile_delimited(char *p, char *d, int is_tr) 370*84441f85SGarrett D'Amore { 371*84441f85SGarrett D'Amore char c; 372*84441f85SGarrett D'Amore 373*84441f85SGarrett D'Amore c = *p++; 374*84441f85SGarrett D'Amore if (c == '\0') 375*84441f85SGarrett D'Amore return (NULL); 376*84441f85SGarrett D'Amore else if (c == '\\') 377*84441f85SGarrett D'Amore fatal(_("\\ can not be used as a string delimiter")); 378*84441f85SGarrett D'Amore else if (c == '\n') 379*84441f85SGarrett D'Amore fatal(_("newline can not be used as a string delimiter")); 380*84441f85SGarrett D'Amore while (*p) { 381*84441f85SGarrett D'Amore if (*p == '[' && *p != c) { 382*84441f85SGarrett D'Amore if ((d = compile_ccl(&p, d)) == NULL) 383*84441f85SGarrett D'Amore fatal(_("unbalanced brackets ([])")); 384*84441f85SGarrett D'Amore continue; 385*84441f85SGarrett D'Amore } else if (*p == '\\' && p[1] == '[') { 386*84441f85SGarrett D'Amore *d++ = *p++; 387*84441f85SGarrett D'Amore } else if (*p == '\\' && p[1] == c) 388*84441f85SGarrett D'Amore p++; 389*84441f85SGarrett D'Amore else if (*p == '\\' && p[1] == 'n') { 390*84441f85SGarrett D'Amore *d++ = '\n'; 391*84441f85SGarrett D'Amore p += 2; 392*84441f85SGarrett D'Amore continue; 393*84441f85SGarrett D'Amore } else if (*p == '\\' && p[1] == '\\') { 394*84441f85SGarrett D'Amore if (is_tr) 395*84441f85SGarrett D'Amore p++; 396*84441f85SGarrett D'Amore else 397*84441f85SGarrett D'Amore *d++ = *p++; 398*84441f85SGarrett D'Amore } else if (*p == c) { 399*84441f85SGarrett D'Amore *d = '\0'; 400*84441f85SGarrett D'Amore return (p + 1); 401*84441f85SGarrett D'Amore } 402*84441f85SGarrett D'Amore *d++ = *p++; 403*84441f85SGarrett D'Amore } 404*84441f85SGarrett D'Amore return (NULL); 405*84441f85SGarrett D'Amore } 406*84441f85SGarrett D'Amore 407*84441f85SGarrett D'Amore 408*84441f85SGarrett D'Amore /* compile_ccl: expand a POSIX character class */ 409*84441f85SGarrett D'Amore static char * 410*84441f85SGarrett D'Amore compile_ccl(char **sp, char *t) 411*84441f85SGarrett D'Amore { 412*84441f85SGarrett D'Amore int c, d; 413*84441f85SGarrett D'Amore char *s = *sp; 414*84441f85SGarrett D'Amore 415*84441f85SGarrett D'Amore *t++ = *s++; 416*84441f85SGarrett D'Amore if (*s == '^') 417*84441f85SGarrett D'Amore *t++ = *s++; 418*84441f85SGarrett D'Amore if (*s == ']') 419*84441f85SGarrett D'Amore *t++ = *s++; 420*84441f85SGarrett D'Amore for (; *s && (*t = *s) != ']'; s++, t++) 421*84441f85SGarrett D'Amore if (*s == '[' && 422*84441f85SGarrett D'Amore ((d = *(s+1)) == '.' || d == ':' || d == '=')) { 423*84441f85SGarrett D'Amore *++t = *++s, t++, s++; 424*84441f85SGarrett D'Amore for (c = *s; (*t = *s) != ']' || c != d; s++, t++) 425*84441f85SGarrett D'Amore if ((c = *s) == '\0') 426*84441f85SGarrett D'Amore return (NULL); 427*84441f85SGarrett D'Amore } 428*84441f85SGarrett D'Amore return ((*s == ']') ? *sp = ++s, ++t : NULL); 429*84441f85SGarrett D'Amore } 430*84441f85SGarrett D'Amore 431*84441f85SGarrett D'Amore /* 432*84441f85SGarrett D'Amore * Compiles the regular expression in RE and returns a pointer to the compiled 433*84441f85SGarrett D'Amore * regular expression. 434*84441f85SGarrett D'Amore * Cflags are passed to regcomp. 435*84441f85SGarrett D'Amore */ 436*84441f85SGarrett D'Amore static regex_t * 437*84441f85SGarrett D'Amore compile_re(char *re, int case_insensitive) 438*84441f85SGarrett D'Amore { 439*84441f85SGarrett D'Amore regex_t *rep; 440*84441f85SGarrett D'Amore int eval, flags; 441*84441f85SGarrett D'Amore 442*84441f85SGarrett D'Amore 443*84441f85SGarrett D'Amore flags = rflags; 444*84441f85SGarrett D'Amore if (case_insensitive) 445*84441f85SGarrett D'Amore flags |= REG_ICASE; 446*84441f85SGarrett D'Amore if ((rep = malloc(sizeof (regex_t))) == NULL) 447*84441f85SGarrett D'Amore err(1, "malloc"); 448*84441f85SGarrett D'Amore if ((eval = regcomp(rep, re, flags)) != 0) 449*84441f85SGarrett D'Amore fatal(_("RE error: %s"), strregerror(eval, rep)); 450*84441f85SGarrett D'Amore if (maxnsub < rep->re_nsub) 451*84441f85SGarrett D'Amore maxnsub = rep->re_nsub; 452*84441f85SGarrett D'Amore return (rep); 453*84441f85SGarrett D'Amore } 454*84441f85SGarrett D'Amore 455*84441f85SGarrett D'Amore /* 456*84441f85SGarrett D'Amore * Compile the substitution string of a regular expression and set res to 457*84441f85SGarrett D'Amore * point to a saved copy of it. Nsub is the number of parenthesized regular 458*84441f85SGarrett D'Amore * expressions. 459*84441f85SGarrett D'Amore */ 460*84441f85SGarrett D'Amore static char * 461*84441f85SGarrett D'Amore compile_subst(char *p, struct s_subst *s) 462*84441f85SGarrett D'Amore { 463*84441f85SGarrett D'Amore static char lbuf[_POSIX2_LINE_MAX + 1]; 464*84441f85SGarrett D'Amore int asize; 465*84441f85SGarrett D'Amore uintptr_t size; 466*84441f85SGarrett D'Amore uchar_t ref; 467*84441f85SGarrett D'Amore char c, *text, *op, *sp; 468*84441f85SGarrett D'Amore int more = 1, sawesc = 0; 469*84441f85SGarrett D'Amore 470*84441f85SGarrett D'Amore c = *p++; /* Terminator character */ 471*84441f85SGarrett D'Amore if (c == '\0') 472*84441f85SGarrett D'Amore return (NULL); 473*84441f85SGarrett D'Amore 474*84441f85SGarrett D'Amore s->maxbref = 0; 475*84441f85SGarrett D'Amore s->linenum = linenum; 476*84441f85SGarrett D'Amore asize = 2 * _POSIX2_LINE_MAX + 1; 477*84441f85SGarrett D'Amore if ((text = malloc(asize)) == NULL) 478*84441f85SGarrett D'Amore err(1, "malloc"); 479*84441f85SGarrett D'Amore size = 0; 480*84441f85SGarrett D'Amore do { 481*84441f85SGarrett D'Amore op = sp = text + size; 482*84441f85SGarrett D'Amore for (; *p; p++) { 483*84441f85SGarrett D'Amore if (*p == '\\' || sawesc) { 484*84441f85SGarrett D'Amore /* 485*84441f85SGarrett D'Amore * If this is a continuation from the last 486*84441f85SGarrett D'Amore * buffer, we won't have a character to 487*84441f85SGarrett D'Amore * skip over. 488*84441f85SGarrett D'Amore */ 489*84441f85SGarrett D'Amore if (sawesc) 490*84441f85SGarrett D'Amore sawesc = 0; 491*84441f85SGarrett D'Amore else 492*84441f85SGarrett D'Amore p++; 493*84441f85SGarrett D'Amore 494*84441f85SGarrett D'Amore if (*p == '\0') { 495*84441f85SGarrett D'Amore /* 496*84441f85SGarrett D'Amore * This escaped character is continued 497*84441f85SGarrett D'Amore * in the next part of the line. Note 498*84441f85SGarrett D'Amore * this fact, then cause the loop to 499*84441f85SGarrett D'Amore * exit w/ normal EOL case and reenter 500*84441f85SGarrett D'Amore * above with the new buffer. 501*84441f85SGarrett D'Amore */ 502*84441f85SGarrett D'Amore sawesc = 1; 503*84441f85SGarrett D'Amore p--; 504*84441f85SGarrett D'Amore continue; 505*84441f85SGarrett D'Amore } else if (strchr("123456789", *p) != NULL) { 506*84441f85SGarrett D'Amore *sp++ = '\\'; 507*84441f85SGarrett D'Amore ref = *p - '0'; 508*84441f85SGarrett D'Amore if (s->re != NULL && 509*84441f85SGarrett D'Amore ref > s->re->re_nsub) 510*84441f85SGarrett D'Amore fatal(_("not defined in " 511*84441f85SGarrett D'Amore "the RE: \\%c"), *p); 512*84441f85SGarrett D'Amore if (s->maxbref < ref) 513*84441f85SGarrett D'Amore s->maxbref = ref; 514*84441f85SGarrett D'Amore } else if (*p == '&' || *p == '\\') 515*84441f85SGarrett D'Amore *sp++ = '\\'; 516*84441f85SGarrett D'Amore } else if (*p == c) { 517*84441f85SGarrett D'Amore if (*++p == '\0' && more) { 518*84441f85SGarrett D'Amore if (cu_fgets(lbuf, sizeof (lbuf), 519*84441f85SGarrett D'Amore &more)) 520*84441f85SGarrett D'Amore p = lbuf; 521*84441f85SGarrett D'Amore } 522*84441f85SGarrett D'Amore *sp++ = '\0'; 523*84441f85SGarrett D'Amore size += (uintptr_t)sp - (uintptr_t)op; 524*84441f85SGarrett D'Amore if ((s->new = realloc(text, size)) == NULL) 525*84441f85SGarrett D'Amore err(1, "realloc"); 526*84441f85SGarrett D'Amore return (p); 527*84441f85SGarrett D'Amore } else if (*p == '\n') { 528*84441f85SGarrett D'Amore fatal(_("unescaped newline inside " 529*84441f85SGarrett D'Amore "substitute pattern")); 530*84441f85SGarrett D'Amore /* NOTREACHED */ 531*84441f85SGarrett D'Amore } 532*84441f85SGarrett D'Amore *sp++ = *p; 533*84441f85SGarrett D'Amore } 534*84441f85SGarrett D'Amore size += (uintptr_t)sp - (uintptr_t)op; 535*84441f85SGarrett D'Amore if (asize - size < _POSIX2_LINE_MAX + 1) { 536*84441f85SGarrett D'Amore asize *= 2; 537*84441f85SGarrett D'Amore if ((text = realloc(text, asize)) == NULL) 538*84441f85SGarrett D'Amore err(1, "realloc"); 539*84441f85SGarrett D'Amore } 540*84441f85SGarrett D'Amore } while (cu_fgets(p = lbuf, sizeof (lbuf), &more)); 541*84441f85SGarrett D'Amore fatal(_("unterminated substitute in regular expression")); 542*84441f85SGarrett D'Amore return (NULL); 543*84441f85SGarrett D'Amore } 544*84441f85SGarrett D'Amore 545*84441f85SGarrett D'Amore /* 546*84441f85SGarrett D'Amore * Compile the flags of the s command 547*84441f85SGarrett D'Amore */ 548*84441f85SGarrett D'Amore static char * 549*84441f85SGarrett D'Amore compile_flags(char *p, struct s_subst *s) 550*84441f85SGarrett D'Amore { 551*84441f85SGarrett D'Amore int gn; /* True if we have seen g or n */ 552*84441f85SGarrett D'Amore unsigned long nval; 553*84441f85SGarrett D'Amore char wfile[_POSIX2_LINE_MAX + 1], *q; 554*84441f85SGarrett D'Amore 555*84441f85SGarrett D'Amore s->n = 1; /* Default */ 556*84441f85SGarrett D'Amore s->p = 0; 557*84441f85SGarrett D'Amore s->wfile = NULL; 558*84441f85SGarrett D'Amore s->wfd = -1; 559*84441f85SGarrett D'Amore s->icase = 0; 560*84441f85SGarrett D'Amore gn = 0; 561*84441f85SGarrett D'Amore for (;;) { 562*84441f85SGarrett D'Amore EATSPACE(); /* EXTENSION */ 563*84441f85SGarrett D'Amore switch (*p) { 564*84441f85SGarrett D'Amore case 'g': 565*84441f85SGarrett D'Amore if (gn) 566*84441f85SGarrett D'Amore fatal(_("more than one number or " 567*84441f85SGarrett D'Amore "'g' in substitute flags")); 568*84441f85SGarrett D'Amore gn = 1; 569*84441f85SGarrett D'Amore s->n = 0; 570*84441f85SGarrett D'Amore break; 571*84441f85SGarrett D'Amore case '\0': 572*84441f85SGarrett D'Amore case '\n': 573*84441f85SGarrett D'Amore case ';': 574*84441f85SGarrett D'Amore return (p); 575*84441f85SGarrett D'Amore case 'p': 576*84441f85SGarrett D'Amore s->p = 1; 577*84441f85SGarrett D'Amore break; 578*84441f85SGarrett D'Amore case 'I': 579*84441f85SGarrett D'Amore s->icase = 1; 580*84441f85SGarrett D'Amore break; 581*84441f85SGarrett D'Amore case '1': case '2': case '3': 582*84441f85SGarrett D'Amore case '4': case '5': case '6': 583*84441f85SGarrett D'Amore case '7': case '8': case '9': 584*84441f85SGarrett D'Amore if (gn) 585*84441f85SGarrett D'Amore fatal(_("more than one number or " 586*84441f85SGarrett D'Amore "'g' in substitute flags")); 587*84441f85SGarrett D'Amore gn = 1; 588*84441f85SGarrett D'Amore errno = 0; 589*84441f85SGarrett D'Amore nval = strtol(p, &p, 10); 590*84441f85SGarrett D'Amore if (errno == ERANGE || nval > INT_MAX) 591*84441f85SGarrett D'Amore fatal(_("overflow in the 'N' substitute flag")); 592*84441f85SGarrett D'Amore s->n = nval; 593*84441f85SGarrett D'Amore p--; 594*84441f85SGarrett D'Amore break; 595*84441f85SGarrett D'Amore case 'w': 596*84441f85SGarrett D'Amore p++; 597*84441f85SGarrett D'Amore #ifdef HISTORIC_PRACTICE 598*84441f85SGarrett D'Amore if (*p != ' ') { 599*84441f85SGarrett D'Amore fatal(_("space missing before w wfile")); 600*84441f85SGarrett D'Amore return (p); 601*84441f85SGarrett D'Amore } 602*84441f85SGarrett D'Amore #endif 603*84441f85SGarrett D'Amore EATSPACE(); 604*84441f85SGarrett D'Amore q = wfile; 605*84441f85SGarrett D'Amore while (*p) { 606*84441f85SGarrett D'Amore if (*p == '\n') 607*84441f85SGarrett D'Amore break; 608*84441f85SGarrett D'Amore *q++ = *p++; 609*84441f85SGarrett D'Amore } 610*84441f85SGarrett D'Amore *q = '\0'; 611*84441f85SGarrett D'Amore if (q == wfile) 612*84441f85SGarrett D'Amore fatal(_("no wfile specified")); 613*84441f85SGarrett D'Amore s->wfile = strdup(wfile); 614*84441f85SGarrett D'Amore if (!aflag && (s->wfd = open(wfile, 615*84441f85SGarrett D'Amore O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1) 616*84441f85SGarrett D'Amore err(1, "%s", wfile); 617*84441f85SGarrett D'Amore return (p); 618*84441f85SGarrett D'Amore default: 619*84441f85SGarrett D'Amore fatal(_("bad flag in substitute command: '%c'"), *p); 620*84441f85SGarrett D'Amore break; 621*84441f85SGarrett D'Amore } 622*84441f85SGarrett D'Amore p++; 623*84441f85SGarrett D'Amore } 624*84441f85SGarrett D'Amore } 625*84441f85SGarrett D'Amore 626*84441f85SGarrett D'Amore /* 627*84441f85SGarrett D'Amore * Compile a translation set of strings into a lookup table. 628*84441f85SGarrett D'Amore */ 629*84441f85SGarrett D'Amore static char * 630*84441f85SGarrett D'Amore compile_tr(char *p, struct s_tr **py) 631*84441f85SGarrett D'Amore { 632*84441f85SGarrett D'Amore struct s_tr *y; 633*84441f85SGarrett D'Amore int i; 634*84441f85SGarrett D'Amore const char *op, *np; 635*84441f85SGarrett D'Amore char old[_POSIX2_LINE_MAX + 1]; 636*84441f85SGarrett D'Amore char new[_POSIX2_LINE_MAX + 1]; 637*84441f85SGarrett D'Amore size_t oclen, oldlen, nclen, newlen; 638*84441f85SGarrett D'Amore mbstate_t mbs1, mbs2; 639*84441f85SGarrett D'Amore 640*84441f85SGarrett D'Amore if ((*py = y = malloc(sizeof (*y))) == NULL) 641*84441f85SGarrett D'Amore err(1, NULL); 642*84441f85SGarrett D'Amore y->multis = NULL; 643*84441f85SGarrett D'Amore y->nmultis = 0; 644*84441f85SGarrett D'Amore 645*84441f85SGarrett D'Amore if (*p == '\0' || *p == '\\') 646*84441f85SGarrett D'Amore fatal(_("transform pattern can not be delimited by " 647*84441f85SGarrett D'Amore "newline or backslash")); 648*84441f85SGarrett D'Amore p = compile_delimited(p, old, 1); 649*84441f85SGarrett D'Amore if (p == NULL) 650*84441f85SGarrett D'Amore fatal(_("unterminated transform source string")); 651*84441f85SGarrett D'Amore p = compile_delimited(p - 1, new, 1); 652*84441f85SGarrett D'Amore if (p == NULL) 653*84441f85SGarrett D'Amore fatal(_("unterminated transform target string")); 654*84441f85SGarrett D'Amore EATSPACE(); 655*84441f85SGarrett D'Amore op = old; 656*84441f85SGarrett D'Amore oldlen = mbsrtowcs(NULL, &op, 0, NULL); 657*84441f85SGarrett D'Amore if (oldlen == (size_t)-1) 658*84441f85SGarrett D'Amore err(1, NULL); 659*84441f85SGarrett D'Amore np = new; 660*84441f85SGarrett D'Amore newlen = mbsrtowcs(NULL, &np, 0, NULL); 661*84441f85SGarrett D'Amore if (newlen == (size_t)-1) 662*84441f85SGarrett D'Amore err(1, NULL); 663*84441f85SGarrett D'Amore if (newlen != oldlen) 664*84441f85SGarrett D'Amore fatal(_("transform strings are not the same length")); 665*84441f85SGarrett D'Amore if (MB_CUR_MAX == 1) { 666*84441f85SGarrett D'Amore /* 667*84441f85SGarrett D'Amore * The single-byte encoding case is easy: generate a 668*84441f85SGarrett D'Amore * lookup table. 669*84441f85SGarrett D'Amore */ 670*84441f85SGarrett D'Amore for (i = 0; i <= UCHAR_MAX; i++) 671*84441f85SGarrett D'Amore y->bytetab[i] = (char)i; 672*84441f85SGarrett D'Amore for (; *op; op++, np++) 673*84441f85SGarrett D'Amore y->bytetab[(uchar_t)*op] = *np; 674*84441f85SGarrett D'Amore } else { 675*84441f85SGarrett D'Amore /* 676*84441f85SGarrett D'Amore * Multi-byte encoding case: generate a lookup table as 677*84441f85SGarrett D'Amore * above, but only for single-byte characters. The first 678*84441f85SGarrett D'Amore * bytes of multi-byte characters have their lookup table 679*84441f85SGarrett D'Amore * entries set to 0, which causes do_tr() to search through 680*84441f85SGarrett D'Amore * an auxiliary vector of multi-byte mappings. 681*84441f85SGarrett D'Amore */ 682*84441f85SGarrett D'Amore (void) memset(&mbs1, 0, sizeof (mbs1)); 683*84441f85SGarrett D'Amore (void) memset(&mbs2, 0, sizeof (mbs2)); 684*84441f85SGarrett D'Amore for (i = 0; i <= UCHAR_MAX; i++) 685*84441f85SGarrett D'Amore y->bytetab[i] = (btowc(i) != WEOF) ? (uchar_t)i : 0; 686*84441f85SGarrett D'Amore while (*op != '\0') { 687*84441f85SGarrett D'Amore oclen = mbrlen(op, MB_LEN_MAX, &mbs1); 688*84441f85SGarrett D'Amore if (oclen == (size_t)-1 || oclen == (size_t)-2) 689*84441f85SGarrett D'Amore errx(1, "%s", strerror(EILSEQ)); 690*84441f85SGarrett D'Amore nclen = mbrlen(np, MB_LEN_MAX, &mbs2); 691*84441f85SGarrett D'Amore if (nclen == (size_t)-1 || nclen == (size_t)-2) 692*84441f85SGarrett D'Amore errx(1, "%s", strerror(EILSEQ)); 693*84441f85SGarrett D'Amore if (oclen == 1 && nclen == 1) 694*84441f85SGarrett D'Amore y->bytetab[(uchar_t)*op] = *np; 695*84441f85SGarrett D'Amore else { 696*84441f85SGarrett D'Amore y->bytetab[(uchar_t)*op] = 0; 697*84441f85SGarrett D'Amore y->multis = realloc(y->multis, 698*84441f85SGarrett D'Amore (y->nmultis + 1) * sizeof (*y->multis)); 699*84441f85SGarrett D'Amore if (y->multis == NULL) 700*84441f85SGarrett D'Amore err(1, NULL); 701*84441f85SGarrett D'Amore i = y->nmultis++; 702*84441f85SGarrett D'Amore y->multis[i].fromlen = oclen; 703*84441f85SGarrett D'Amore (void) memcpy(y->multis[i].from, op, oclen); 704*84441f85SGarrett D'Amore y->multis[i].tolen = nclen; 705*84441f85SGarrett D'Amore (void) memcpy(y->multis[i].to, np, nclen); 706*84441f85SGarrett D'Amore } 707*84441f85SGarrett D'Amore op += oclen; 708*84441f85SGarrett D'Amore np += nclen; 709*84441f85SGarrett D'Amore } 710*84441f85SGarrett D'Amore } 711*84441f85SGarrett D'Amore return (p); 712*84441f85SGarrett D'Amore } 713*84441f85SGarrett D'Amore 714*84441f85SGarrett D'Amore /* 715*84441f85SGarrett D'Amore * Compile the text following an a or i command. 716*84441f85SGarrett D'Amore */ 717*84441f85SGarrett D'Amore static char * 718*84441f85SGarrett D'Amore compile_text(void) 719*84441f85SGarrett D'Amore { 720*84441f85SGarrett D'Amore int esc_nl; 721*84441f85SGarrett D'Amore uintptr_t size, asize; 722*84441f85SGarrett D'Amore char *text, *p, *op, *s; 723*84441f85SGarrett D'Amore char lbuf[_POSIX2_LINE_MAX + 1]; 724*84441f85SGarrett D'Amore 725*84441f85SGarrett D'Amore asize = 2 * _POSIX2_LINE_MAX + 1; 726*84441f85SGarrett D'Amore if ((text = malloc(asize)) == NULL) 727*84441f85SGarrett D'Amore err(1, "malloc"); 728*84441f85SGarrett D'Amore size = 0; 729*84441f85SGarrett D'Amore while (cu_fgets(lbuf, sizeof (lbuf), NULL)) { 730*84441f85SGarrett D'Amore op = s = text + size; 731*84441f85SGarrett D'Amore p = lbuf; 732*84441f85SGarrett D'Amore EATSPACE(); 733*84441f85SGarrett D'Amore for (esc_nl = 0; *p != '\0'; p++) { 734*84441f85SGarrett D'Amore if (*p == '\\' && p[1] != '\0' && *++p == '\n') 735*84441f85SGarrett D'Amore esc_nl = 1; 736*84441f85SGarrett D'Amore *s++ = *p; 737*84441f85SGarrett D'Amore } 738*84441f85SGarrett D'Amore size += (uintptr_t)s - (uintptr_t)op; 739*84441f85SGarrett D'Amore if (!esc_nl) { 740*84441f85SGarrett D'Amore *s = '\0'; 741*84441f85SGarrett D'Amore break; 742*84441f85SGarrett D'Amore } 743*84441f85SGarrett D'Amore if (asize - size < _POSIX2_LINE_MAX + 1) { 744*84441f85SGarrett D'Amore asize *= 2; 745*84441f85SGarrett D'Amore if ((text = realloc(text, asize)) == NULL) 746*84441f85SGarrett D'Amore err(1, "realloc"); 747*84441f85SGarrett D'Amore } 748*84441f85SGarrett D'Amore } 749*84441f85SGarrett D'Amore text[size] = '\0'; 750*84441f85SGarrett D'Amore if ((p = realloc(text, size + 1)) == NULL) 751*84441f85SGarrett D'Amore err(1, "realloc"); 752*84441f85SGarrett D'Amore return (p); 753*84441f85SGarrett D'Amore } 754*84441f85SGarrett D'Amore 755*84441f85SGarrett D'Amore /* 756*84441f85SGarrett D'Amore * Get an address and return a pointer to the first character after 757*84441f85SGarrett D'Amore * it. Fill the structure pointed to according to the address. 758*84441f85SGarrett D'Amore */ 759*84441f85SGarrett D'Amore static char * 760*84441f85SGarrett D'Amore compile_addr(char *p, struct s_addr *a) 761*84441f85SGarrett D'Amore { 762*84441f85SGarrett D'Amore char *end, re[_POSIX2_LINE_MAX + 1]; 763*84441f85SGarrett D'Amore int icase; 764*84441f85SGarrett D'Amore 765*84441f85SGarrett D'Amore icase = 0; 766*84441f85SGarrett D'Amore 767*84441f85SGarrett D'Amore a->type = 0; 768*84441f85SGarrett D'Amore switch (*p) { 769*84441f85SGarrett D'Amore case '\\': /* Context address */ 770*84441f85SGarrett D'Amore ++p; 771*84441f85SGarrett D'Amore /* FALLTHROUGH */ 772*84441f85SGarrett D'Amore case '/': /* Context address */ 773*84441f85SGarrett D'Amore p = compile_delimited(p, re, 0); 774*84441f85SGarrett D'Amore if (p == NULL) 775*84441f85SGarrett D'Amore fatal(_("unterminated regular expression")); 776*84441f85SGarrett D'Amore 777*84441f85SGarrett D'Amore /* Check for case insensitive regexp flag */ 778*84441f85SGarrett D'Amore if (*p == 'I') { 779*84441f85SGarrett D'Amore icase = 1; 780*84441f85SGarrett D'Amore p++; 781*84441f85SGarrett D'Amore } 782*84441f85SGarrett D'Amore if (*re == '\0') 783*84441f85SGarrett D'Amore a->u.r = NULL; 784*84441f85SGarrett D'Amore else 785*84441f85SGarrett D'Amore a->u.r = compile_re(re, icase); 786*84441f85SGarrett D'Amore a->type = AT_RE; 787*84441f85SGarrett D'Amore return (p); 788*84441f85SGarrett D'Amore 789*84441f85SGarrett D'Amore case '$': /* Last line */ 790*84441f85SGarrett D'Amore a->type = AT_LAST; 791*84441f85SGarrett D'Amore return (p + 1); 792*84441f85SGarrett D'Amore 793*84441f85SGarrett D'Amore case '+': /* Relative line number */ 794*84441f85SGarrett D'Amore a->type = AT_RELLINE; 795*84441f85SGarrett D'Amore p++; 796*84441f85SGarrett D'Amore /* FALLTHROUGH */ 797*84441f85SGarrett D'Amore /* Line number */ 798*84441f85SGarrett D'Amore case '0': case '1': case '2': case '3': case '4': 799*84441f85SGarrett D'Amore case '5': case '6': case '7': case '8': case '9': 800*84441f85SGarrett D'Amore if (a->type == 0) 801*84441f85SGarrett D'Amore a->type = AT_LINE; 802*84441f85SGarrett D'Amore a->u.l = strtol(p, &end, 10); 803*84441f85SGarrett D'Amore return (end); 804*84441f85SGarrett D'Amore default: 805*84441f85SGarrett D'Amore fatal(_("expected context address")); 806*84441f85SGarrett D'Amore return (NULL); 807*84441f85SGarrett D'Amore } 808*84441f85SGarrett D'Amore } 809*84441f85SGarrett D'Amore 810*84441f85SGarrett D'Amore /* 811*84441f85SGarrett D'Amore * duptoeol -- 812*84441f85SGarrett D'Amore * Return a copy of all the characters up to \n or \0. 813*84441f85SGarrett D'Amore */ 814*84441f85SGarrett D'Amore static char * 815*84441f85SGarrett D'Amore duptoeol(char *s, const char *ctype) 816*84441f85SGarrett D'Amore { 817*84441f85SGarrett D'Amore size_t len; 818*84441f85SGarrett D'Amore int ws; 819*84441f85SGarrett D'Amore char *p, *start; 820*84441f85SGarrett D'Amore 821*84441f85SGarrett D'Amore ws = 0; 822*84441f85SGarrett D'Amore for (start = s; *s != '\0' && *s != '\n'; ++s) 823*84441f85SGarrett D'Amore ws = isspace((unsigned char)*s); 824*84441f85SGarrett D'Amore *s = '\0'; 825*84441f85SGarrett D'Amore if (ws) 826*84441f85SGarrett D'Amore warnx(_("%lu: %s: whitespace after %s"), linenum, fname, ctype); 827*84441f85SGarrett D'Amore len = (uintptr_t)s - (uintptr_t)start + 1; 828*84441f85SGarrett D'Amore if ((p = malloc(len)) == NULL) 829*84441f85SGarrett D'Amore err(1, "malloc"); 830*84441f85SGarrett D'Amore return (memmove(p, start, len)); 831*84441f85SGarrett D'Amore } 832*84441f85SGarrett D'Amore 833*84441f85SGarrett D'Amore /* 834*84441f85SGarrett D'Amore * Convert goto label names to addresses, and count a and r commands, in 835*84441f85SGarrett D'Amore * the given subset of the script. Free the memory used by labels in b 836*84441f85SGarrett D'Amore * and t commands (but not by :). 837*84441f85SGarrett D'Amore * 838*84441f85SGarrett D'Amore * TODO: Remove } nodes 839*84441f85SGarrett D'Amore */ 840*84441f85SGarrett D'Amore static void 841*84441f85SGarrett D'Amore fixuplabel(struct s_command *cp, struct s_command *end) 842*84441f85SGarrett D'Amore { 843*84441f85SGarrett D'Amore 844*84441f85SGarrett D'Amore for (; cp != end; cp = cp->next) 845*84441f85SGarrett D'Amore switch (cp->code) { 846*84441f85SGarrett D'Amore case 'a': 847*84441f85SGarrett D'Amore case 'r': 848*84441f85SGarrett D'Amore appendnum++; 849*84441f85SGarrett D'Amore break; 850*84441f85SGarrett D'Amore case 'b': 851*84441f85SGarrett D'Amore case 't': 852*84441f85SGarrett D'Amore /* Resolve branch target. */ 853*84441f85SGarrett D'Amore if (cp->t == NULL) { 854*84441f85SGarrett D'Amore cp->u.c = NULL; 855*84441f85SGarrett D'Amore break; 856*84441f85SGarrett D'Amore } 857*84441f85SGarrett D'Amore if ((cp->u.c = findlabel(cp->t)) == NULL) 858*84441f85SGarrett D'Amore fatal(_("undefined label '%s'"), cp->t); 859*84441f85SGarrett D'Amore free(cp->t); 860*84441f85SGarrett D'Amore break; 861*84441f85SGarrett D'Amore case '{': 862*84441f85SGarrett D'Amore /* Do interior commands. */ 863*84441f85SGarrett D'Amore fixuplabel(cp->u.c, cp->next); 864*84441f85SGarrett D'Amore break; 865*84441f85SGarrett D'Amore } 866*84441f85SGarrett D'Amore } 867*84441f85SGarrett D'Amore 868*84441f85SGarrett D'Amore /* 869*84441f85SGarrett D'Amore * Associate the given command label for later lookup. 870*84441f85SGarrett D'Amore */ 871*84441f85SGarrett D'Amore static void 872*84441f85SGarrett D'Amore enterlabel(struct s_command *cp) 873*84441f85SGarrett D'Amore { 874*84441f85SGarrett D'Amore struct labhash **lhp, *lh; 875*84441f85SGarrett D'Amore uchar_t *p; 876*84441f85SGarrett D'Amore uint_t h, c; 877*84441f85SGarrett D'Amore 878*84441f85SGarrett D'Amore for (h = 0, p = (uchar_t *)cp->t; (c = *p) != 0; p++) 879*84441f85SGarrett D'Amore h = (h << 5) + h + c; 880*84441f85SGarrett D'Amore lhp = &labels[h & LHMASK]; 881*84441f85SGarrett D'Amore for (lh = *lhp; lh != NULL; lh = lh->lh_next) 882*84441f85SGarrett D'Amore if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) 883*84441f85SGarrett D'Amore fatal(_("duplicate label '%s'"), cp->t); 884*84441f85SGarrett D'Amore if ((lh = malloc(sizeof (*lh))) == NULL) 885*84441f85SGarrett D'Amore err(1, "malloc"); 886*84441f85SGarrett D'Amore lh->lh_next = *lhp; 887*84441f85SGarrett D'Amore lh->lh_hash = h; 888*84441f85SGarrett D'Amore lh->lh_cmd = cp; 889*84441f85SGarrett D'Amore lh->lh_ref = 0; 890*84441f85SGarrett D'Amore *lhp = lh; 891*84441f85SGarrett D'Amore } 892*84441f85SGarrett D'Amore 893*84441f85SGarrett D'Amore /* 894*84441f85SGarrett D'Amore * Find the label contained in the command l in the command linked 895*84441f85SGarrett D'Amore * list cp. L is excluded from the search. Return NULL if not found. 896*84441f85SGarrett D'Amore */ 897*84441f85SGarrett D'Amore static struct s_command * 898*84441f85SGarrett D'Amore findlabel(char *name) 899*84441f85SGarrett D'Amore { 900*84441f85SGarrett D'Amore struct labhash *lh; 901*84441f85SGarrett D'Amore uchar_t *p; 902*84441f85SGarrett D'Amore uint_t h, c; 903*84441f85SGarrett D'Amore 904*84441f85SGarrett D'Amore for (h = 0, p = (uchar_t *)name; (c = *p) != 0; p++) 905*84441f85SGarrett D'Amore h = (h << 5) + h + c; 906*84441f85SGarrett D'Amore for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { 907*84441f85SGarrett D'Amore if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { 908*84441f85SGarrett D'Amore lh->lh_ref = 1; 909*84441f85SGarrett D'Amore return (lh->lh_cmd); 910*84441f85SGarrett D'Amore } 911*84441f85SGarrett D'Amore } 912*84441f85SGarrett D'Amore return (NULL); 913*84441f85SGarrett D'Amore } 914*84441f85SGarrett D'Amore 915*84441f85SGarrett D'Amore /* 916*84441f85SGarrett D'Amore * Warn about any unused labels. As a side effect, release the label hash 917*84441f85SGarrett D'Amore * table space. 918*84441f85SGarrett D'Amore */ 919*84441f85SGarrett D'Amore static void 920*84441f85SGarrett D'Amore uselabel(void) 921*84441f85SGarrett D'Amore { 922*84441f85SGarrett D'Amore struct labhash *lh, *next; 923*84441f85SGarrett D'Amore int i; 924*84441f85SGarrett D'Amore 925*84441f85SGarrett D'Amore for (i = 0; i < LHSZ; i++) { 926*84441f85SGarrett D'Amore for (lh = labels[i]; lh != NULL; lh = next) { 927*84441f85SGarrett D'Amore next = lh->lh_next; 928*84441f85SGarrett D'Amore if (!lh->lh_ref) 929*84441f85SGarrett D'Amore warnx(_("%lu: %s: unused label '%s'"), 930*84441f85SGarrett D'Amore linenum, fname, lh->lh_cmd->t); 931*84441f85SGarrett D'Amore free(lh); 932*84441f85SGarrett D'Amore } 933*84441f85SGarrett D'Amore } 934*84441f85SGarrett D'Amore } 935