19b50d902SRodney W. Grimes /*-
28a16b7a1SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause
38a16b7a1SPedro F. Giffuni *
49b50d902SRodney W. Grimes * Copyright (c) 1992 Diomidis Spinellis.
59b50d902SRodney W. Grimes * Copyright (c) 1992, 1993
69b50d902SRodney W. Grimes * The Regents of the University of California. All rights reserved.
79b50d902SRodney W. Grimes *
89b50d902SRodney W. Grimes * This code is derived from software contributed to Berkeley by
99b50d902SRodney W. Grimes * Diomidis Spinellis of Imperial College, University of London.
109b50d902SRodney W. Grimes *
119b50d902SRodney W. Grimes * Redistribution and use in source and binary forms, with or without
129b50d902SRodney W. Grimes * modification, are permitted provided that the following conditions
139b50d902SRodney W. Grimes * are met:
149b50d902SRodney W. Grimes * 1. Redistributions of source code must retain the above copyright
159b50d902SRodney W. Grimes * notice, this list of conditions and the following disclaimer.
169b50d902SRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright
179b50d902SRodney W. Grimes * notice, this list of conditions and the following disclaimer in the
189b50d902SRodney W. Grimes * documentation and/or other materials provided with the distribution.
19fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors
209b50d902SRodney W. Grimes * may be used to endorse or promote products derived from this software
219b50d902SRodney W. Grimes * without specific prior written permission.
229b50d902SRodney W. Grimes *
239b50d902SRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
249b50d902SRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
259b50d902SRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
269b50d902SRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
279b50d902SRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
289b50d902SRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
299b50d902SRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
309b50d902SRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
319b50d902SRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
329b50d902SRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
339b50d902SRodney W. Grimes * SUCH DAMAGE.
349b50d902SRodney W. Grimes */
359b50d902SRodney W. Grimes
36e74bf75fSMark Murray #include <sys/cdefs.h>
379b50d902SRodney W. Grimes
389b50d902SRodney W. Grimes #include <sys/types.h>
399b50d902SRodney W. Grimes #include <sys/stat.h>
409b50d902SRodney W. Grimes
419b50d902SRodney W. Grimes #include <ctype.h>
4273a08bb2SPhilippe Charnier #include <err.h>
4381a8648aSTim J. Robbins #include <errno.h>
449b50d902SRodney W. Grimes #include <fcntl.h>
459b50d902SRodney W. Grimes #include <limits.h>
469b50d902SRodney W. Grimes #include <regex.h>
473a4d70c5SKyle Evans #include <stdbool.h>
489b50d902SRodney W. Grimes #include <stdio.h>
499b50d902SRodney W. Grimes #include <stdlib.h>
509b50d902SRodney W. Grimes #include <string.h>
5181a8648aSTim J. Robbins #include <wchar.h>
529b50d902SRodney W. Grimes
539b50d902SRodney W. Grimes #include "defs.h"
549b50d902SRodney W. Grimes #include "extern.h"
559b50d902SRodney W. Grimes
569b50d902SRodney W. Grimes #define LHSZ 128
579b50d902SRodney W. Grimes #define LHMASK (LHSZ - 1)
589b50d902SRodney W. Grimes static struct labhash {
599b50d902SRodney W. Grimes struct labhash *lh_next;
609b50d902SRodney W. Grimes u_int lh_hash;
619b50d902SRodney W. Grimes struct s_command *lh_cmd;
629b50d902SRodney W. Grimes int lh_ref;
639b50d902SRodney W. Grimes } *labels[LHSZ];
649b50d902SRodney W. Grimes
65249a8a80SPedro F. Giffuni static char *compile_addr(char *, struct s_addr *);
66249a8a80SPedro F. Giffuni static char *compile_ccl(char **, char *);
67249a8a80SPedro F. Giffuni static char *compile_delimited(char *, char *, int);
68249a8a80SPedro F. Giffuni static char *compile_flags(char *, struct s_subst *);
69249a8a80SPedro F. Giffuni static regex_t *compile_re(char *, int);
70249a8a80SPedro F. Giffuni static char *compile_subst(char *, struct s_subst *);
71249a8a80SPedro F. Giffuni static char *compile_text(void);
72249a8a80SPedro F. Giffuni static char *compile_tr(char *, struct s_tr **);
739b50d902SRodney W. Grimes static struct s_command
743f330d7dSWarner Losh **compile_stream(struct s_command **);
75249a8a80SPedro F. Giffuni static char *duptoeol(char *, const char *);
763f330d7dSWarner Losh static void enterlabel(struct s_command *);
779b50d902SRodney W. Grimes static struct s_command
78249a8a80SPedro F. Giffuni *findlabel(char *);
79249a8a80SPedro F. Giffuni static void fixuplabel(struct s_command *, struct s_command *);
803f330d7dSWarner Losh static void uselabel(void);
819b50d902SRodney W. Grimes
829b50d902SRodney W. Grimes /*
839b50d902SRodney W. Grimes * Command specification. This is used to drive the command parser.
849b50d902SRodney W. Grimes */
859b50d902SRodney W. Grimes struct s_format {
869b50d902SRodney W. Grimes char code; /* Command code */
879b50d902SRodney W. Grimes int naddr; /* Number of address args */
889b50d902SRodney W. Grimes enum e_args args; /* Argument type */
899b50d902SRodney W. Grimes };
909b50d902SRodney W. Grimes
919b50d902SRodney W. Grimes static struct s_format cmd_fmts[] = {
929b50d902SRodney W. Grimes {'{', 2, GROUP},
93ce19262dSJordan K. Hubbard {'}', 0, ENDGROUP},
949b50d902SRodney W. Grimes {'a', 1, TEXT},
959b50d902SRodney W. Grimes {'b', 2, BRANCH},
969b50d902SRodney W. Grimes {'c', 2, TEXT},
979b50d902SRodney W. Grimes {'d', 2, EMPTY},
989b50d902SRodney W. Grimes {'D', 2, EMPTY},
999b50d902SRodney W. Grimes {'g', 2, EMPTY},
1009b50d902SRodney W. Grimes {'G', 2, EMPTY},
1019b50d902SRodney W. Grimes {'h', 2, EMPTY},
1029b50d902SRodney W. Grimes {'H', 2, EMPTY},
1039b50d902SRodney W. Grimes {'i', 1, TEXT},
1049b50d902SRodney W. Grimes {'l', 2, EMPTY},
1059b50d902SRodney W. Grimes {'n', 2, EMPTY},
1069b50d902SRodney W. Grimes {'N', 2, EMPTY},
1079b50d902SRodney W. Grimes {'p', 2, EMPTY},
1089b50d902SRodney W. Grimes {'P', 2, EMPTY},
1099b50d902SRodney W. Grimes {'q', 1, EMPTY},
1109b50d902SRodney W. Grimes {'r', 1, RFILE},
1119b50d902SRodney W. Grimes {'s', 2, SUBST},
1129b50d902SRodney W. Grimes {'t', 2, BRANCH},
1139b50d902SRodney W. Grimes {'w', 2, WFILE},
1149b50d902SRodney W. Grimes {'x', 2, EMPTY},
1159b50d902SRodney W. Grimes {'y', 2, TR},
1169b50d902SRodney W. Grimes {'!', 2, NONSEL},
1179b50d902SRodney W. Grimes {':', 0, LABEL},
1189b50d902SRodney W. Grimes {'#', 0, COMMENT},
1199b50d902SRodney W. Grimes {'=', 1, EMPTY},
1209b50d902SRodney W. Grimes {'\0', 0, COMMENT},
1219b50d902SRodney W. Grimes };
1229b50d902SRodney W. Grimes
1239b50d902SRodney W. Grimes /* The compiled program. */
1249b50d902SRodney W. Grimes struct s_command *prog;
1259b50d902SRodney W. Grimes
1269b50d902SRodney W. Grimes /*
1279b50d902SRodney W. Grimes * Compile the program into prog.
1289b50d902SRodney W. Grimes * Initialise appends.
1299b50d902SRodney W. Grimes */
1309b50d902SRodney W. Grimes void
compile(void)131e6478125SDag-Erling Smørgrav compile(void)
1329b50d902SRodney W. Grimes {
133ce19262dSJordan K. Hubbard *compile_stream(&prog) = NULL;
1349b50d902SRodney W. Grimes fixuplabel(prog, NULL);
1359b50d902SRodney W. Grimes uselabel();
1360ea56610SMike Heffner if (appendnum == 0)
1370ea56610SMike Heffner appends = NULL;
1380ea56610SMike Heffner else if ((appends = malloc(sizeof(struct s_appends) * appendnum)) ==
1390ea56610SMike Heffner NULL)
1408e33c0a0SDavid E. O'Brien err(1, "malloc");
1418e33c0a0SDavid E. O'Brien if ((match = malloc((maxnsub + 1) * sizeof(regmatch_t))) == NULL)
1428e33c0a0SDavid E. O'Brien err(1, "malloc");
1439b50d902SRodney W. Grimes }
1449b50d902SRodney W. Grimes
1459b50d902SRodney W. Grimes #define EATSPACE() do { \
146249a8a80SPedro F. Giffuni if (p) \
147726aebe5SAndrey A. Chernov while (*p && isspace((unsigned char)*p)) \
1489b50d902SRodney W. Grimes p++; \
1499b50d902SRodney W. Grimes } while (0)
1509b50d902SRodney W. Grimes
1519b50d902SRodney W. Grimes static struct s_command **
compile_stream(struct s_command ** link)152e6478125SDag-Erling Smørgrav compile_stream(struct s_command **link)
1539b50d902SRodney W. Grimes {
154249a8a80SPedro F. Giffuni char *p;
155249a8a80SPedro F. Giffuni static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
156ce19262dSJordan K. Hubbard struct s_command *cmd, *cmd2, *stack;
1579b50d902SRodney W. Grimes struct s_format *fp;
158bdd72b70SSuleiman Souhlal char re[_POSIX2_LINE_MAX + 1];
1599b50d902SRodney W. Grimes int naddr; /* Number of addresses */
1609b50d902SRodney W. Grimes
161e55a6575SPedro F. Giffuni stack = NULL;
1629b50d902SRodney W. Grimes for (;;) {
163249a8a80SPedro F. Giffuni if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
164e55a6575SPedro F. Giffuni if (stack != NULL)
16573a08bb2SPhilippe Charnier errx(1, "%lu: %s: unexpected EOF (pending }'s)",
16673a08bb2SPhilippe Charnier linenum, fname);
1679b50d902SRodney W. Grimes return (link);
1689b50d902SRodney W. Grimes }
1699b50d902SRodney W. Grimes
170249a8a80SPedro F. Giffuni semicolon: EATSPACE();
171249a8a80SPedro F. Giffuni if (p) {
172249a8a80SPedro F. Giffuni if (*p == '#' || *p == '\0')
173249a8a80SPedro F. Giffuni continue;
174249a8a80SPedro F. Giffuni else if (*p == ';') {
1750467aed3STim J. Robbins p++;
1760467aed3STim J. Robbins goto semicolon;
1770467aed3STim J. Robbins }
178249a8a80SPedro F. Giffuni }
1798e33c0a0SDavid E. O'Brien if ((*link = cmd = malloc(sizeof(struct s_command))) == NULL)
1808e33c0a0SDavid E. O'Brien err(1, "malloc");
1819b50d902SRodney W. Grimes link = &cmd->next;
182f879e8d9SBrian Somers cmd->startline = cmd->nonsel = 0;
1839b50d902SRodney W. Grimes /* First parse the addresses */
1849b50d902SRodney W. Grimes naddr = 0;
1859b50d902SRodney W. Grimes
1869b50d902SRodney W. Grimes /* Valid characters to start an address */
1879b50d902SRodney W. Grimes #define addrchar(c) (strchr("0123456789/\\$", (c)))
1889b50d902SRodney W. Grimes if (addrchar(*p)) {
1899b50d902SRodney W. Grimes naddr++;
1908e33c0a0SDavid E. O'Brien if ((cmd->a1 = malloc(sizeof(struct s_addr))) == NULL)
1918e33c0a0SDavid E. O'Brien err(1, "malloc");
1929b50d902SRodney W. Grimes p = compile_addr(p, cmd->a1);
1939b50d902SRodney W. Grimes EATSPACE(); /* EXTENSION */
1949b50d902SRodney W. Grimes if (*p == ',') {
1959b50d902SRodney W. Grimes p++;
1969b50d902SRodney W. Grimes EATSPACE(); /* EXTENSION */
197ce19262dSJordan K. Hubbard naddr++;
1988e33c0a0SDavid E. O'Brien if ((cmd->a2 = malloc(sizeof(struct s_addr)))
1998e33c0a0SDavid E. O'Brien == NULL)
2008e33c0a0SDavid E. O'Brien err(1, "malloc");
2019b50d902SRodney W. Grimes p = compile_addr(p, cmd->a2);
202ce19262dSJordan K. Hubbard EATSPACE();
203ce19262dSJordan K. Hubbard } else
204e55a6575SPedro F. Giffuni cmd->a2 = NULL;
205ce19262dSJordan K. Hubbard } else
206e55a6575SPedro F. Giffuni cmd->a1 = cmd->a2 = NULL;
2079b50d902SRodney W. Grimes
2089b50d902SRodney W. Grimes nonsel: /* Now parse the command */
209249a8a80SPedro F. Giffuni if (!*p)
21073a08bb2SPhilippe Charnier errx(1, "%lu: %s: command expected", linenum, fname);
2119b50d902SRodney W. Grimes cmd->code = *p;
2129b50d902SRodney W. Grimes for (fp = cmd_fmts; fp->code; fp++)
2139b50d902SRodney W. Grimes if (fp->code == *p)
2149b50d902SRodney W. Grimes break;
2159b50d902SRodney W. Grimes if (!fp->code)
216249a8a80SPedro F. Giffuni errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
2179b50d902SRodney W. Grimes if (naddr > fp->naddr)
21873a08bb2SPhilippe Charnier errx(1,
21973a08bb2SPhilippe Charnier "%lu: %s: command %c expects up to %d address(es), found %d",
22073a08bb2SPhilippe Charnier linenum, fname, *p, fp->naddr, naddr);
2219b50d902SRodney W. Grimes switch (fp->args) {
2229b50d902SRodney W. Grimes case NONSEL: /* ! */
2239b50d902SRodney W. Grimes p++;
224ce19262dSJordan K. Hubbard EATSPACE();
22546da6c48SDiomidis Spinellis cmd->nonsel = 1;
2269b50d902SRodney W. Grimes goto nonsel;
2279b50d902SRodney W. Grimes case GROUP: /* { */
2289b50d902SRodney W. Grimes p++;
229249a8a80SPedro F. Giffuni EATSPACE();
230ce19262dSJordan K. Hubbard cmd->next = stack;
231ce19262dSJordan K. Hubbard stack = cmd;
232ce19262dSJordan K. Hubbard link = &cmd->u.c;
233249a8a80SPedro F. Giffuni if (*p)
234ce19262dSJordan K. Hubbard goto semicolon;
2359b50d902SRodney W. Grimes break;
236ce19262dSJordan K. Hubbard case ENDGROUP:
237ce19262dSJordan K. Hubbard /*
238ce19262dSJordan K. Hubbard * Short-circuit command processing, since end of
239ce19262dSJordan K. Hubbard * group is really just a noop.
240ce19262dSJordan K. Hubbard */
241ce19262dSJordan K. Hubbard cmd->nonsel = 1;
242e55a6575SPedro F. Giffuni if (stack == NULL)
24373a08bb2SPhilippe Charnier errx(1, "%lu: %s: unexpected }", linenum, fname);
244ce19262dSJordan K. Hubbard cmd2 = stack;
245ce19262dSJordan K. Hubbard stack = cmd2->next;
246ce19262dSJordan K. Hubbard cmd2->next = cmd;
247ce19262dSJordan K. Hubbard /*FALLTHROUGH*/
2489b50d902SRodney W. Grimes case EMPTY: /* d D g G h H l n N p P q x = \0 */
2499b50d902SRodney W. Grimes p++;
250249a8a80SPedro F. Giffuni EATSPACE();
2519b50d902SRodney W. Grimes if (*p == ';') {
2529b50d902SRodney W. Grimes p++;
2539b50d902SRodney W. Grimes link = &cmd->next;
2549b50d902SRodney W. Grimes goto semicolon;
2559b50d902SRodney W. Grimes }
256249a8a80SPedro F. Giffuni if (*p)
25773a08bb2SPhilippe Charnier errx(1, "%lu: %s: extra characters at the end of %c command",
25873a08bb2SPhilippe Charnier linenum, fname, cmd->code);
2599b50d902SRodney W. Grimes break;
2609b50d902SRodney W. Grimes case TEXT: /* a c i */
2619b50d902SRodney W. Grimes p++;
2629b50d902SRodney W. Grimes EATSPACE();
2639b50d902SRodney W. Grimes if (*p != '\\')
26473a08bb2SPhilippe Charnier errx(1,
26573a08bb2SPhilippe Charnier "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
2669b50d902SRodney W. Grimes p++;
267249a8a80SPedro F. Giffuni EATSPACE();
268249a8a80SPedro F. Giffuni if (*p)
26973a08bb2SPhilippe Charnier errx(1,
270249a8a80SPedro F. Giffuni "%lu: %s: extra characters after \\ at the end of %c command",
271249a8a80SPedro F. Giffuni linenum, fname, cmd->code);
272249a8a80SPedro F. Giffuni cmd->t = compile_text();
2739b50d902SRodney W. Grimes break;
2749b50d902SRodney W. Grimes case COMMENT: /* \0 # */
2759b50d902SRodney W. Grimes break;
2769b50d902SRodney W. Grimes case WFILE: /* w */
2779b50d902SRodney W. Grimes p++;
2789b50d902SRodney W. Grimes EATSPACE();
2799b50d902SRodney W. Grimes if (*p == '\0')
28073a08bb2SPhilippe Charnier errx(1, "%lu: %s: filename expected", linenum, fname);
281249a8a80SPedro F. Giffuni cmd->t = duptoeol(p, "w command");
2829b50d902SRodney W. Grimes if (aflag)
2839b50d902SRodney W. Grimes cmd->u.fd = -1;
284249a8a80SPedro F. Giffuni else if ((cmd->u.fd = open(p,
2859b50d902SRodney W. Grimes O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
2869b50d902SRodney W. Grimes DEFFILEMODE)) == -1)
28773a08bb2SPhilippe Charnier err(1, "%s", p);
2889b50d902SRodney W. Grimes break;
2899b50d902SRodney W. Grimes case RFILE: /* r */
2909b50d902SRodney W. Grimes p++;
2919b50d902SRodney W. Grimes EATSPACE();
2929b50d902SRodney W. Grimes if (*p == '\0')
29373a08bb2SPhilippe Charnier errx(1, "%lu: %s: filename expected", linenum, fname);
2949b50d902SRodney W. Grimes else
295249a8a80SPedro F. Giffuni cmd->t = duptoeol(p, "read command");
2969b50d902SRodney W. Grimes break;
2979b50d902SRodney W. Grimes case BRANCH: /* b t */
2989b50d902SRodney W. Grimes p++;
299249a8a80SPedro F. Giffuni EATSPACE();
300249a8a80SPedro F. Giffuni if (*p == '\0')
3019b50d902SRodney W. Grimes cmd->t = NULL;
3029b50d902SRodney W. Grimes else
303249a8a80SPedro F. Giffuni cmd->t = duptoeol(p, "branch");
3049b50d902SRodney W. Grimes break;
3059b50d902SRodney W. Grimes case LABEL: /* : */
3069b50d902SRodney W. Grimes p++;
3079b50d902SRodney W. Grimes EATSPACE();
308249a8a80SPedro F. Giffuni cmd->t = duptoeol(p, "label");
309249a8a80SPedro F. Giffuni if (strlen(p) == 0)
31073a08bb2SPhilippe Charnier errx(1, "%lu: %s: empty label", linenum, fname);
3119b50d902SRodney W. Grimes enterlabel(cmd);
3129b50d902SRodney W. Grimes break;
3139b50d902SRodney W. Grimes case SUBST: /* s */
3149b50d902SRodney W. Grimes p++;
315249a8a80SPedro F. Giffuni if (*p == '\0' || *p == '\\')
31673a08bb2SPhilippe Charnier errx(1,
31773a08bb2SPhilippe Charnier "%lu: %s: substitute pattern can not be delimited by newline or backslash",
31873a08bb2SPhilippe Charnier linenum, fname);
319870945d8SXin LI if ((cmd->u.s = calloc(1, sizeof(struct s_subst))) == NULL)
3208e33c0a0SDavid E. O'Brien err(1, "malloc");
32176570d0aSDiomidis Spinellis p = compile_delimited(p, re, 0);
3229b50d902SRodney W. Grimes if (p == NULL)
32373a08bb2SPhilippe Charnier errx(1,
32473a08bb2SPhilippe Charnier "%lu: %s: unterminated substitute pattern", linenum, fname);
325d3d0d3a3SHiroki Sato
326249a8a80SPedro F. Giffuni /* Compile RE with no case sensitivity temporarily */
327249a8a80SPedro F. Giffuni if (*re == '\0')
328249a8a80SPedro F. Giffuni cmd->u.s->re = NULL;
329249a8a80SPedro F. Giffuni else
330249a8a80SPedro F. Giffuni cmd->u.s->re = compile_re(re, 0);
331870945d8SXin LI --p;
332870945d8SXin LI p = compile_subst(p, cmd->u.s);
333870945d8SXin LI p = compile_flags(p, cmd->u.s);
334d3d0d3a3SHiroki Sato
335249a8a80SPedro F. Giffuni /* Recompile RE with case sensitivity from "I" flag if any */
336249a8a80SPedro F. Giffuni if (*re == '\0')
337249a8a80SPedro F. Giffuni cmd->u.s->re = NULL;
338249a8a80SPedro F. Giffuni else
339bdd72b70SSuleiman Souhlal cmd->u.s->re = compile_re(re, cmd->u.s->icase);
3409b50d902SRodney W. Grimes EATSPACE();
3419b50d902SRodney W. Grimes if (*p == ';') {
3429b50d902SRodney W. Grimes p++;
3439b50d902SRodney W. Grimes link = &cmd->next;
3449b50d902SRodney W. Grimes goto semicolon;
3459b50d902SRodney W. Grimes }
3469b50d902SRodney W. Grimes break;
3479b50d902SRodney W. Grimes case TR: /* y */
3489b50d902SRodney W. Grimes p++;
34981a8648aSTim J. Robbins p = compile_tr(p, &cmd->u.y);
3509b50d902SRodney W. Grimes EATSPACE();
3519b50d902SRodney W. Grimes if (*p == ';') {
3529b50d902SRodney W. Grimes p++;
3539b50d902SRodney W. Grimes link = &cmd->next;
3549b50d902SRodney W. Grimes goto semicolon;
3559b50d902SRodney W. Grimes }
3569b50d902SRodney W. Grimes if (*p)
35773a08bb2SPhilippe Charnier errx(1,
35873a08bb2SPhilippe Charnier "%lu: %s: extra text at the end of a transform command", linenum, fname);
3599b50d902SRodney W. Grimes break;
3609b50d902SRodney W. Grimes }
3619b50d902SRodney W. Grimes }
3629b50d902SRodney W. Grimes }
3639b50d902SRodney W. Grimes
3643a4d70c5SKyle Evans static int
hex2char(const char * in,char * out,int len)3653a4d70c5SKyle Evans hex2char(const char *in, char *out, int len)
3663a4d70c5SKyle Evans {
3673a4d70c5SKyle Evans long ord;
3683a4d70c5SKyle Evans char *endptr, hexbuf[3];
3693a4d70c5SKyle Evans
3703a4d70c5SKyle Evans hexbuf[0] = in[0];
3713a4d70c5SKyle Evans hexbuf[1] = len > 1 ? in[1] : '\0';
3723a4d70c5SKyle Evans hexbuf[2] = '\0';
3733a4d70c5SKyle Evans
3743a4d70c5SKyle Evans errno = 0;
3753a4d70c5SKyle Evans ord = strtol(hexbuf, &endptr, 16);
3763a4d70c5SKyle Evans if (*endptr != '\0' || errno != 0)
3773a4d70c5SKyle Evans return (ERANGE);
3783a4d70c5SKyle Evans *out = (char)ord;
3793a4d70c5SKyle Evans return (0);
3803a4d70c5SKyle Evans }
3813a4d70c5SKyle Evans
3823a4d70c5SKyle Evans static bool
hexdigit(char c)3833a4d70c5SKyle Evans hexdigit(char c)
3843a4d70c5SKyle Evans {
3853a4d70c5SKyle Evans int lc;
3863a4d70c5SKyle Evans
3873a4d70c5SKyle Evans lc = tolower(c);
3883a4d70c5SKyle Evans return isdigit(lc) || (lc >= 'a' && lc <= 'f');
3893a4d70c5SKyle Evans }
3903a4d70c5SKyle Evans
3913a4d70c5SKyle Evans static bool
dohex(const char * in,char * out,int * len)3923a4d70c5SKyle Evans dohex(const char *in, char *out, int *len)
3933a4d70c5SKyle Evans {
3943a4d70c5SKyle Evans int tmplen;
3953a4d70c5SKyle Evans
3963a4d70c5SKyle Evans if (!hexdigit(in[0]))
3973a4d70c5SKyle Evans return (false);
3983a4d70c5SKyle Evans tmplen = 1;
3993a4d70c5SKyle Evans if (hexdigit(in[1]))
4003a4d70c5SKyle Evans ++tmplen;
4013a4d70c5SKyle Evans if (hex2char(in, out, tmplen) == 0) {
4023a4d70c5SKyle Evans *len = tmplen;
4033a4d70c5SKyle Evans return (true);
4043a4d70c5SKyle Evans }
4053a4d70c5SKyle Evans
4063a4d70c5SKyle Evans return (false);
4073a4d70c5SKyle Evans }
4083a4d70c5SKyle Evans
4099b50d902SRodney W. Grimes /*
410463a577bSEitan Adler * Get a delimited string. P points to the delimiter of the string; d points
4119b50d902SRodney W. Grimes * to a buffer area. Newline and delimiter escapes are processed; other
4129b50d902SRodney W. Grimes * escapes are ignored.
4139b50d902SRodney W. Grimes *
4149b50d902SRodney W. Grimes * Returns a pointer to the first character after the final delimiter or NULL
4159b50d902SRodney W. Grimes * in the case of a non-terminated string. The character array d is filled
4169b50d902SRodney W. Grimes * with the processed string.
4179b50d902SRodney W. Grimes */
418249a8a80SPedro F. Giffuni static char *
compile_delimited(char * p,char * d,int is_tr)419249a8a80SPedro F. Giffuni compile_delimited(char *p, char *d, int is_tr)
4209b50d902SRodney W. Grimes {
4213a4d70c5SKyle Evans int hexlen;
4229b50d902SRodney W. Grimes char c;
4239b50d902SRodney W. Grimes
4249b50d902SRodney W. Grimes c = *p++;
4259b50d902SRodney W. Grimes if (c == '\0')
4269b50d902SRodney W. Grimes return (NULL);
4279b50d902SRodney W. Grimes else if (c == '\\')
42873a08bb2SPhilippe Charnier errx(1, "%lu: %s: \\ can not be used as a string delimiter",
42973a08bb2SPhilippe Charnier linenum, fname);
4309b50d902SRodney W. Grimes else if (c == '\n')
43173a08bb2SPhilippe Charnier errx(1, "%lu: %s: newline can not be used as a string delimiter",
43273a08bb2SPhilippe Charnier linenum, fname);
4339b50d902SRodney W. Grimes while (*p) {
434128e6a12SDiomidis Spinellis if (*p == '[' && *p != c) {
435*14fdf163SYuri Pankov if (!is_tr) {
436*14fdf163SYuri Pankov if ((d = compile_ccl(&p, d)) == NULL) {
437*14fdf163SYuri Pankov errx(1,
438*14fdf163SYuri Pankov "%lu: %s: unbalanced brackets ([])",
439*14fdf163SYuri Pankov linenum, fname);
440*14fdf163SYuri Pankov }
441ce19262dSJordan K. Hubbard continue;
442*14fdf163SYuri Pankov }
443ce19262dSJordan K. Hubbard } else if (*p == '\\' && p[1] == '[') {
444*14fdf163SYuri Pankov if (is_tr)
445*14fdf163SYuri Pankov p++;
446*14fdf163SYuri Pankov else
447ce19262dSJordan K. Hubbard *d++ = *p++;
4486e816d87SKyle Evans } else if (*p == '\\' && p[1] == c) {
44943daed47SPedro F. Giffuni p++;
4506e816d87SKyle Evans } else if (*p == '\\' &&
4516e816d87SKyle Evans (p[1] == 'n' || p[1] == 'r' || p[1] == 't')) {
4526e816d87SKyle Evans switch (p[1]) {
4536e816d87SKyle Evans case 'n':
4549b50d902SRodney W. Grimes *d++ = '\n';
4556e816d87SKyle Evans break;
4566e816d87SKyle Evans case 'r':
4576e816d87SKyle Evans *d++ = '\r';
4586e816d87SKyle Evans break;
4596e816d87SKyle Evans case 't':
4606e816d87SKyle Evans *d++ = '\t';
4616e816d87SKyle Evans break;
4626e816d87SKyle Evans }
4639b50d902SRodney W. Grimes p += 2;
4649b50d902SRodney W. Grimes continue;
4653a4d70c5SKyle Evans } else if (*p == '\\' && p[1] == 'x') {
4663a4d70c5SKyle Evans if (dohex(&p[2], d, &hexlen)) {
4673a4d70c5SKyle Evans ++d;
4683a4d70c5SKyle Evans p += hexlen + 2;
4693a4d70c5SKyle Evans continue;
4703a4d70c5SKyle Evans }
47176570d0aSDiomidis Spinellis } else if (*p == '\\' && p[1] == '\\') {
47276570d0aSDiomidis Spinellis if (is_tr)
47376570d0aSDiomidis Spinellis p++;
47476570d0aSDiomidis Spinellis else
4759b50d902SRodney W. Grimes *d++ = *p++;
47676570d0aSDiomidis Spinellis } else if (*p == c) {
4779b50d902SRodney W. Grimes *d = '\0';
4789b50d902SRodney W. Grimes return (p + 1);
4799b50d902SRodney W. Grimes }
4809b50d902SRodney W. Grimes *d++ = *p++;
4819b50d902SRodney W. Grimes }
4829b50d902SRodney W. Grimes return (NULL);
4839b50d902SRodney W. Grimes }
4849b50d902SRodney W. Grimes
485ce19262dSJordan K. Hubbard
486ce19262dSJordan K. Hubbard /* compile_ccl: expand a POSIX character class */
487ce19262dSJordan K. Hubbard static char *
compile_ccl(char ** sp,char * t)488249a8a80SPedro F. Giffuni compile_ccl(char **sp, char *t)
489ce19262dSJordan K. Hubbard {
4903a4d70c5SKyle Evans int c, d, hexlen;
491249a8a80SPedro F. Giffuni char *s = *sp;
492ce19262dSJordan K. Hubbard
493ce19262dSJordan K. Hubbard *t++ = *s++;
494ce19262dSJordan K. Hubbard if (*s == '^')
495ce19262dSJordan K. Hubbard *t++ = *s++;
496ce19262dSJordan K. Hubbard if (*s == ']')
497ce19262dSJordan K. Hubbard *t++ = *s++;
4986e816d87SKyle Evans for (; *s && (*t = *s) != ']'; s++, t++) {
499ce19262dSJordan K. Hubbard if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
500ce19262dSJordan K. Hubbard *++t = *++s, t++, s++;
501ce19262dSJordan K. Hubbard for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
502ce19262dSJordan K. Hubbard if ((c = *s) == '\0')
503ce19262dSJordan K. Hubbard return NULL;
5046e816d87SKyle Evans } else if (*s == '\\') {
5056e816d87SKyle Evans switch (s[1]) {
5066e816d87SKyle Evans case 'n':
5076e816d87SKyle Evans *t = '\n';
5086e816d87SKyle Evans s++;
5096e816d87SKyle Evans break;
5106e816d87SKyle Evans case 'r':
5116e816d87SKyle Evans *t = '\r';
5126e816d87SKyle Evans s++;
5136e816d87SKyle Evans break;
5146e816d87SKyle Evans case 't':
5156e816d87SKyle Evans *t = '\t';
5166e816d87SKyle Evans s++;
5176e816d87SKyle Evans break;
5183a4d70c5SKyle Evans case 'x':
5193a4d70c5SKyle Evans if (dohex(&s[2], t, &hexlen))
5203a4d70c5SKyle Evans s += hexlen + 1;
5213a4d70c5SKyle Evans break;
5226e816d87SKyle Evans }
5236e816d87SKyle Evans }
524ac8f32ceSDiomidis Spinellis }
525ce19262dSJordan K. Hubbard return (*s == ']') ? *sp = ++s, ++t : NULL;
526ce19262dSJordan K. Hubbard }
527ce19262dSJordan K. Hubbard
5289b50d902SRodney W. Grimes /*
529bdd72b70SSuleiman Souhlal * Compiles the regular expression in RE and returns a pointer to the compiled
530bdd72b70SSuleiman Souhlal * regular expression.
5319b50d902SRodney W. Grimes * Cflags are passed to regcomp.
5329b50d902SRodney W. Grimes */
533249a8a80SPedro F. Giffuni static regex_t *
compile_re(char * re,int case_insensitive)534249a8a80SPedro F. Giffuni compile_re(char *re, int case_insensitive)
5359b50d902SRodney W. Grimes {
536bdd72b70SSuleiman Souhlal regex_t *rep;
537bdd72b70SSuleiman Souhlal int eval, flags;
5389b50d902SRodney W. Grimes
539bdd72b70SSuleiman Souhlal
540bdd72b70SSuleiman Souhlal flags = rflags;
541bdd72b70SSuleiman Souhlal if (case_insensitive)
542bdd72b70SSuleiman Souhlal flags |= REG_ICASE;
543bdd72b70SSuleiman Souhlal if ((rep = malloc(sizeof(regex_t))) == NULL)
5448e33c0a0SDavid E. O'Brien err(1, "malloc");
545d3e5e11cSDavid Malone if ((eval = regcomp(rep, re, flags)) != 0)
54673a08bb2SPhilippe Charnier errx(1, "%lu: %s: RE error: %s",
547bdd72b70SSuleiman Souhlal linenum, fname, strregerror(eval, rep));
548bdd72b70SSuleiman Souhlal if (maxnsub < rep->re_nsub)
549bdd72b70SSuleiman Souhlal maxnsub = rep->re_nsub;
550bdd72b70SSuleiman Souhlal return (rep);
5519b50d902SRodney W. Grimes }
5529b50d902SRodney W. Grimes
5539b50d902SRodney W. Grimes /*
5549b50d902SRodney W. Grimes * Compile the substitution string of a regular expression and set res to
5559b50d902SRodney W. Grimes * point to a saved copy of it. Nsub is the number of parenthesized regular
5569b50d902SRodney W. Grimes * expressions.
5579b50d902SRodney W. Grimes */
558249a8a80SPedro F. Giffuni static char *
compile_subst(char * p,struct s_subst * s)559249a8a80SPedro F. Giffuni compile_subst(char *p, struct s_subst *s)
5609b50d902SRodney W. Grimes {
561249a8a80SPedro F. Giffuni static char lbuf[_POSIX2_LINE_MAX + 1];
5623a4d70c5SKyle Evans int asize, hexlen, size;
563e74bf75fSMark Murray u_char ref;
5649b50d902SRodney W. Grimes char c, *text, *op, *sp;
565249a8a80SPedro F. Giffuni int more = 1, sawesc = 0;
5669b50d902SRodney W. Grimes
5679b50d902SRodney W. Grimes c = *p++; /* Terminator character */
5689b50d902SRodney W. Grimes if (c == '\0')
5699b50d902SRodney W. Grimes return (NULL);
5709b50d902SRodney W. Grimes
5719b50d902SRodney W. Grimes s->maxbref = 0;
5729b50d902SRodney W. Grimes s->linenum = linenum;
5739b50d902SRodney W. Grimes asize = 2 * _POSIX2_LINE_MAX + 1;
5748e33c0a0SDavid E. O'Brien if ((text = malloc(asize)) == NULL)
5758e33c0a0SDavid E. O'Brien err(1, "malloc");
5769b50d902SRodney W. Grimes size = 0;
5779b50d902SRodney W. Grimes do {
5789b50d902SRodney W. Grimes op = sp = text + size;
579249a8a80SPedro F. Giffuni for (; *p; p++) {
580f020c7faSBrian Feldman if (*p == '\\' || sawesc) {
581f020c7faSBrian Feldman /*
582f020c7faSBrian Feldman * If this is a continuation from the last
583f020c7faSBrian Feldman * buffer, we won't have a character to
584f020c7faSBrian Feldman * skip over.
585f020c7faSBrian Feldman */
586f020c7faSBrian Feldman if (sawesc)
587f020c7faSBrian Feldman sawesc = 0;
588f020c7faSBrian Feldman else
5899b50d902SRodney W. Grimes p++;
590f020c7faSBrian Feldman
591f020c7faSBrian Feldman if (*p == '\0') {
592f020c7faSBrian Feldman /*
593f020c7faSBrian Feldman * This escaped character is continued
594f020c7faSBrian Feldman * in the next part of the line. Note
595f020c7faSBrian Feldman * this fact, then cause the loop to
596f020c7faSBrian Feldman * exit w/ normal EOL case and reenter
597f020c7faSBrian Feldman * above with the new buffer.
598f020c7faSBrian Feldman */
599f020c7faSBrian Feldman sawesc = 1;
600f020c7faSBrian Feldman p--;
601249a8a80SPedro F. Giffuni continue;
602f020c7faSBrian Feldman } else if (strchr("123456789", *p) != NULL) {
6039b50d902SRodney W. Grimes *sp++ = '\\';
6049b50d902SRodney W. Grimes ref = *p - '0';
6059b50d902SRodney W. Grimes if (s->re != NULL &&
6069b50d902SRodney W. Grimes ref > s->re->re_nsub)
60773a08bb2SPhilippe Charnier errx(1, "%lu: %s: \\%c not defined in the RE",
60873a08bb2SPhilippe Charnier linenum, fname, *p);
6099b50d902SRodney W. Grimes if (s->maxbref < ref)
6109b50d902SRodney W. Grimes s->maxbref = ref;
6116e816d87SKyle Evans } else {
6126e816d87SKyle Evans switch (*p) {
6136e816d87SKyle Evans case '&':
6146e816d87SKyle Evans case '\\':
6159b50d902SRodney W. Grimes *sp++ = '\\';
6166e816d87SKyle Evans break;
6176e816d87SKyle Evans case 'n':
6186e816d87SKyle Evans *p = '\n';
6196e816d87SKyle Evans break;
6206e816d87SKyle Evans case 'r':
6216e816d87SKyle Evans *p = '\r';
6226e816d87SKyle Evans break;
6236e816d87SKyle Evans case 't':
6246e816d87SKyle Evans *p = '\t';
6256e816d87SKyle Evans break;
6263a4d70c5SKyle Evans case 'x':
6273a4d70c5SKyle Evans #define ADVANCE_N(s, n) \
6283a4d70c5SKyle Evans do { \
6293a4d70c5SKyle Evans char *adv = (s); \
6303a4d70c5SKyle Evans while (*(adv + (n) - 1) != '\0') { \
6313a4d70c5SKyle Evans *adv = *(adv + (n)); \
6323a4d70c5SKyle Evans ++adv; \
6333a4d70c5SKyle Evans } \
6343a4d70c5SKyle Evans *adv = '\0'; \
6353a4d70c5SKyle Evans } while (0);
6363a4d70c5SKyle Evans if (dohex(&p[1], p, &hexlen)) {
6373a4d70c5SKyle Evans ADVANCE_N(p + 1,
6383a4d70c5SKyle Evans hexlen);
6393a4d70c5SKyle Evans }
6403a4d70c5SKyle Evans break;
6416e816d87SKyle Evans }
6426e816d87SKyle Evans }
6439b50d902SRodney W. Grimes } else if (*p == c) {
644c56690efSArchie Cobbs if (*++p == '\0' && more) {
645249a8a80SPedro F. Giffuni if (cu_fgets(lbuf, sizeof(lbuf), &more))
646249a8a80SPedro F. Giffuni p = lbuf;
6471a6583daSArchie Cobbs }
6489b50d902SRodney W. Grimes *sp++ = '\0';
6499b50d902SRodney W. Grimes size += sp - op;
6508e33c0a0SDavid E. O'Brien if ((s->new = realloc(text, size)) == NULL)
6518e33c0a0SDavid E. O'Brien err(1, "realloc");
6529b50d902SRodney W. Grimes return (p);
6539b50d902SRodney W. Grimes } else if (*p == '\n') {
65473a08bb2SPhilippe Charnier errx(1,
65573a08bb2SPhilippe Charnier "%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
6569b50d902SRodney W. Grimes /* NOTREACHED */
6579b50d902SRodney W. Grimes }
6589b50d902SRodney W. Grimes *sp++ = *p;
6599b50d902SRodney W. Grimes }
6609b50d902SRodney W. Grimes size += sp - op;
6619b50d902SRodney W. Grimes if (asize - size < _POSIX2_LINE_MAX + 1) {
6629b50d902SRodney W. Grimes asize *= 2;
6638e33c0a0SDavid E. O'Brien if ((text = realloc(text, asize)) == NULL)
6648e33c0a0SDavid E. O'Brien err(1, "realloc");
6659b50d902SRodney W. Grimes }
666249a8a80SPedro F. Giffuni } while (cu_fgets(p = lbuf, sizeof(lbuf), &more) != NULL);
66773a08bb2SPhilippe Charnier errx(1, "%lu: %s: unterminated substitute in regular expression",
66873a08bb2SPhilippe Charnier linenum, fname);
6699b50d902SRodney W. Grimes /* NOTREACHED */
6709b50d902SRodney W. Grimes }
6719b50d902SRodney W. Grimes
6729b50d902SRodney W. Grimes /*
6739b50d902SRodney W. Grimes * Compile the flags of the s command
6749b50d902SRodney W. Grimes */
675249a8a80SPedro F. Giffuni static char *
compile_flags(char * p,struct s_subst * s)676249a8a80SPedro F. Giffuni compile_flags(char *p, struct s_subst *s)
6779b50d902SRodney W. Grimes {
6789b50d902SRodney W. Grimes int gn; /* True if we have seen g or n */
679d432588eSDiomidis Spinellis unsigned long nval;
680249a8a80SPedro F. Giffuni char wfile[_POSIX2_LINE_MAX + 1], *q, *eq;
6819b50d902SRodney W. Grimes
6829b50d902SRodney W. Grimes s->n = 1; /* Default */
6839b50d902SRodney W. Grimes s->p = 0;
6849b50d902SRodney W. Grimes s->wfile = NULL;
6859b50d902SRodney W. Grimes s->wfd = -1;
686bdd72b70SSuleiman Souhlal s->icase = 0;
6879b50d902SRodney W. Grimes for (gn = 0;;) {
688249a8a80SPedro F. Giffuni EATSPACE(); /* EXTENSION */
6899b50d902SRodney W. Grimes switch (*p) {
6909b50d902SRodney W. Grimes case 'g':
6919b50d902SRodney W. Grimes if (gn)
69273a08bb2SPhilippe Charnier errx(1,
69373a08bb2SPhilippe Charnier "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
6949b50d902SRodney W. Grimes gn = 1;
6959b50d902SRodney W. Grimes s->n = 0;
6969b50d902SRodney W. Grimes break;
6979b50d902SRodney W. Grimes case '\0':
6989b50d902SRodney W. Grimes case '\n':
6999b50d902SRodney W. Grimes case ';':
7009b50d902SRodney W. Grimes return (p);
7019b50d902SRodney W. Grimes case 'p':
7029b50d902SRodney W. Grimes s->p = 1;
7039b50d902SRodney W. Grimes break;
70449e89014SEitan Adler case 'i':
705bdd72b70SSuleiman Souhlal case 'I':
706bdd72b70SSuleiman Souhlal s->icase = 1;
707bdd72b70SSuleiman Souhlal break;
7089b50d902SRodney W. Grimes case '1': case '2': case '3':
7099b50d902SRodney W. Grimes case '4': case '5': case '6':
7109b50d902SRodney W. Grimes case '7': case '8': case '9':
7119b50d902SRodney W. Grimes if (gn)
71273a08bb2SPhilippe Charnier errx(1,
71373a08bb2SPhilippe Charnier "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
7149b50d902SRodney W. Grimes gn = 1;
715d432588eSDiomidis Spinellis errno = 0;
716249a8a80SPedro F. Giffuni nval = strtol(p, &p, 10);
717d432588eSDiomidis Spinellis if (errno == ERANGE || nval > INT_MAX)
718d432588eSDiomidis Spinellis errx(1,
719d432588eSDiomidis Spinellis "%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
720d432588eSDiomidis Spinellis s->n = nval;
721249a8a80SPedro F. Giffuni p--;
722249a8a80SPedro F. Giffuni break;
7239b50d902SRodney W. Grimes case 'w':
7249b50d902SRodney W. Grimes p++;
7259b50d902SRodney W. Grimes #ifdef HISTORIC_PRACTICE
7269b50d902SRodney W. Grimes if (*p != ' ') {
72773a08bb2SPhilippe Charnier warnx("%lu: %s: space missing before w wfile", linenum, fname);
7289b50d902SRodney W. Grimes return (p);
7299b50d902SRodney W. Grimes }
7309b50d902SRodney W. Grimes #endif
7319b50d902SRodney W. Grimes EATSPACE();
732249a8a80SPedro F. Giffuni q = wfile;
733249a8a80SPedro F. Giffuni eq = wfile + sizeof(wfile) - 1;
734249a8a80SPedro F. Giffuni while (*p) {
735249a8a80SPedro F. Giffuni if (*p == '\n')
736249a8a80SPedro F. Giffuni break;
737249a8a80SPedro F. Giffuni if (q >= eq)
738249a8a80SPedro F. Giffuni err(1, "wfile too long");
739249a8a80SPedro F. Giffuni *q++ = *p++;
740249a8a80SPedro F. Giffuni }
741249a8a80SPedro F. Giffuni *q = '\0';
742249a8a80SPedro F. Giffuni if (q == wfile)
743249a8a80SPedro F. Giffuni errx(1, "%lu: %s: no wfile specified", linenum, fname);
744249a8a80SPedro F. Giffuni s->wfile = strdup(wfile);
745249a8a80SPedro F. Giffuni if (!aflag && (s->wfd = open(wfile,
7469b50d902SRodney W. Grimes O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
7479b50d902SRodney W. Grimes DEFFILEMODE)) == -1)
748249a8a80SPedro F. Giffuni err(1, "%s", wfile);
7499b50d902SRodney W. Grimes return (p);
7509b50d902SRodney W. Grimes default:
751249a8a80SPedro F. Giffuni errx(1, "%lu: %s: bad flag in substitute command: '%c'",
752249a8a80SPedro F. Giffuni linenum, fname, *p);
7539b50d902SRodney W. Grimes break;
7549b50d902SRodney W. Grimes }
7559b50d902SRodney W. Grimes p++;
7569b50d902SRodney W. Grimes }
7579b50d902SRodney W. Grimes }
7589b50d902SRodney W. Grimes
7599b50d902SRodney W. Grimes /*
7609b50d902SRodney W. Grimes * Compile a translation set of strings into a lookup table.
7619b50d902SRodney W. Grimes */
762249a8a80SPedro F. Giffuni static char *
compile_tr(char * p,struct s_tr ** py)763249a8a80SPedro F. Giffuni compile_tr(char *p, struct s_tr **py)
7649b50d902SRodney W. Grimes {
76581a8648aSTim J. Robbins struct s_tr *y;
7669b50d902SRodney W. Grimes int i;
76781a8648aSTim J. Robbins const char *op, *np;
7689b50d902SRodney W. Grimes char old[_POSIX2_LINE_MAX + 1];
7699b50d902SRodney W. Grimes char new[_POSIX2_LINE_MAX + 1];
77081a8648aSTim J. Robbins size_t oclen, oldlen, nclen, newlen;
77181a8648aSTim J. Robbins mbstate_t mbs1, mbs2;
77281a8648aSTim J. Robbins
77381a8648aSTim J. Robbins if ((*py = y = malloc(sizeof(*y))) == NULL)
774249a8a80SPedro F. Giffuni err(1, NULL);
77581a8648aSTim J. Robbins y->multis = NULL;
77681a8648aSTim J. Robbins y->nmultis = 0;
7779b50d902SRodney W. Grimes
7789b50d902SRodney W. Grimes if (*p == '\0' || *p == '\\')
77973a08bb2SPhilippe Charnier errx(1,
78073a08bb2SPhilippe Charnier "%lu: %s: transform pattern can not be delimited by newline or backslash",
78173a08bb2SPhilippe Charnier linenum, fname);
78276570d0aSDiomidis Spinellis p = compile_delimited(p, old, 1);
78373a08bb2SPhilippe Charnier if (p == NULL)
78473a08bb2SPhilippe Charnier errx(1, "%lu: %s: unterminated transform source string",
78573a08bb2SPhilippe Charnier linenum, fname);
78676570d0aSDiomidis Spinellis p = compile_delimited(p - 1, new, 1);
78773a08bb2SPhilippe Charnier if (p == NULL)
78873a08bb2SPhilippe Charnier errx(1, "%lu: %s: unterminated transform target string",
78973a08bb2SPhilippe Charnier linenum, fname);
7909b50d902SRodney W. Grimes EATSPACE();
79181a8648aSTim J. Robbins op = old;
79281a8648aSTim J. Robbins oldlen = mbsrtowcs(NULL, &op, 0, NULL);
79381a8648aSTim J. Robbins if (oldlen == (size_t)-1)
794249a8a80SPedro F. Giffuni err(1, NULL);
79581a8648aSTim J. Robbins np = new;
79681a8648aSTim J. Robbins newlen = mbsrtowcs(NULL, &np, 0, NULL);
79781a8648aSTim J. Robbins if (newlen == (size_t)-1)
798249a8a80SPedro F. Giffuni err(1, NULL);
79981a8648aSTim J. Robbins if (newlen != oldlen)
80073a08bb2SPhilippe Charnier errx(1, "%lu: %s: transform strings are not the same length",
80173a08bb2SPhilippe Charnier linenum, fname);
80281a8648aSTim J. Robbins if (MB_CUR_MAX == 1) {
80381a8648aSTim J. Robbins /*
80481a8648aSTim J. Robbins * The single-byte encoding case is easy: generate a
80581a8648aSTim J. Robbins * lookup table.
80681a8648aSTim J. Robbins */
8079b50d902SRodney W. Grimes for (i = 0; i <= UCHAR_MAX; i++)
80881a8648aSTim J. Robbins y->bytetab[i] = (char)i;
80981a8648aSTim J. Robbins for (; *op; op++, np++)
81081a8648aSTim J. Robbins y->bytetab[(u_char)*op] = *np;
81181a8648aSTim J. Robbins } else {
81281a8648aSTim J. Robbins /*
81381a8648aSTim J. Robbins * Multi-byte encoding case: generate a lookup table as
81481a8648aSTim J. Robbins * above, but only for single-byte characters. The first
81581a8648aSTim J. Robbins * bytes of multi-byte characters have their lookup table
81681a8648aSTim J. Robbins * entries set to 0, which causes do_tr() to search through
81781a8648aSTim J. Robbins * an auxiliary vector of multi-byte mappings.
81881a8648aSTim J. Robbins */
81981a8648aSTim J. Robbins memset(&mbs1, 0, sizeof(mbs1));
82081a8648aSTim J. Robbins memset(&mbs2, 0, sizeof(mbs2));
82181a8648aSTim J. Robbins for (i = 0; i <= UCHAR_MAX; i++)
82281a8648aSTim J. Robbins y->bytetab[i] = (btowc(i) != WEOF) ? i : 0;
82381a8648aSTim J. Robbins while (*op != '\0') {
82481a8648aSTim J. Robbins oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
82581a8648aSTim J. Robbins if (oclen == (size_t)-1 || oclen == (size_t)-2)
82681a8648aSTim J. Robbins errc(1, EILSEQ, NULL);
82781a8648aSTim J. Robbins nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
82881a8648aSTim J. Robbins if (nclen == (size_t)-1 || nclen == (size_t)-2)
82981a8648aSTim J. Robbins errc(1, EILSEQ, NULL);
83081a8648aSTim J. Robbins if (oclen == 1 && nclen == 1)
83181a8648aSTim J. Robbins y->bytetab[(u_char)*op] = *np;
83281a8648aSTim J. Robbins else {
83381a8648aSTim J. Robbins y->bytetab[(u_char)*op] = 0;
83481a8648aSTim J. Robbins y->multis = realloc(y->multis,
83581a8648aSTim J. Robbins (y->nmultis + 1) * sizeof(*y->multis));
83681a8648aSTim J. Robbins if (y->multis == NULL)
837249a8a80SPedro F. Giffuni err(1, NULL);
83881a8648aSTim J. Robbins i = y->nmultis++;
83981a8648aSTim J. Robbins y->multis[i].fromlen = oclen;
84081a8648aSTim J. Robbins memcpy(y->multis[i].from, op, oclen);
84181a8648aSTim J. Robbins y->multis[i].tolen = nclen;
84281a8648aSTim J. Robbins memcpy(y->multis[i].to, np, nclen);
84381a8648aSTim J. Robbins }
84481a8648aSTim J. Robbins op += oclen;
84581a8648aSTim J. Robbins np += nclen;
84681a8648aSTim J. Robbins }
84781a8648aSTim J. Robbins }
8489b50d902SRodney W. Grimes return (p);
8499b50d902SRodney W. Grimes }
8509b50d902SRodney W. Grimes
8519b50d902SRodney W. Grimes /*
852678fec50SPedro F. Giffuni * Compile the text following an a, c, or i command.
8539b50d902SRodney W. Grimes */
8549b50d902SRodney W. Grimes static char *
compile_text(void)855249a8a80SPedro F. Giffuni compile_text(void)
8569b50d902SRodney W. Grimes {
85749e65599SBruce Evans int asize, esc_nl, size;
858249a8a80SPedro F. Giffuni char *text, *p, *op, *s;
859249a8a80SPedro F. Giffuni char lbuf[_POSIX2_LINE_MAX + 1];
8609b50d902SRodney W. Grimes
8619b50d902SRodney W. Grimes asize = 2 * _POSIX2_LINE_MAX + 1;
8628e33c0a0SDavid E. O'Brien if ((text = malloc(asize)) == NULL)
8638e33c0a0SDavid E. O'Brien err(1, "malloc");
8649b50d902SRodney W. Grimes size = 0;
865249a8a80SPedro F. Giffuni while (cu_fgets(lbuf, sizeof(lbuf), NULL) != NULL) {
8669b50d902SRodney W. Grimes op = s = text + size;
867249a8a80SPedro F. Giffuni p = lbuf;
868b780b03cSPedro F. Giffuni #ifdef LEGACY_BSDSED_COMPAT
869b780b03cSPedro F. Giffuni EATSPACE();
870b780b03cSPedro F. Giffuni #endif
87149e65599SBruce Evans for (esc_nl = 0; *p != '\0'; p++) {
87249e65599SBruce Evans if (*p == '\\' && p[1] != '\0' && *++p == '\n')
87349e65599SBruce Evans esc_nl = 1;
8749b50d902SRodney W. Grimes *s++ = *p;
8759b50d902SRodney W. Grimes }
8769b50d902SRodney W. Grimes size += s - op;
87749e65599SBruce Evans if (!esc_nl) {
8789b50d902SRodney W. Grimes *s = '\0';
8799b50d902SRodney W. Grimes break;
8809b50d902SRodney W. Grimes }
8819b50d902SRodney W. Grimes if (asize - size < _POSIX2_LINE_MAX + 1) {
8829b50d902SRodney W. Grimes asize *= 2;
8838e33c0a0SDavid E. O'Brien if ((text = realloc(text, asize)) == NULL)
8848e33c0a0SDavid E. O'Brien err(1, "realloc");
8859b50d902SRodney W. Grimes }
8869b50d902SRodney W. Grimes }
88713ede3c0SBrian Somers text[size] = '\0';
888249a8a80SPedro F. Giffuni if ((p = realloc(text, size + 1)) == NULL)
8898e33c0a0SDavid E. O'Brien err(1, "realloc");
890249a8a80SPedro F. Giffuni return (p);
8919b50d902SRodney W. Grimes }
8929b50d902SRodney W. Grimes
8939b50d902SRodney W. Grimes /*
8949b50d902SRodney W. Grimes * Get an address and return a pointer to the first character after
8959b50d902SRodney W. Grimes * it. Fill the structure pointed to according to the address.
8969b50d902SRodney W. Grimes */
897249a8a80SPedro F. Giffuni static char *
compile_addr(char * p,struct s_addr * a)898249a8a80SPedro F. Giffuni compile_addr(char *p, struct s_addr *a)
8999b50d902SRodney W. Grimes {
900bdd72b70SSuleiman Souhlal char *end, re[_POSIX2_LINE_MAX + 1];
901bdd72b70SSuleiman Souhlal int icase;
902bdd72b70SSuleiman Souhlal
903bdd72b70SSuleiman Souhlal icase = 0;
9049b50d902SRodney W. Grimes
905f879e8d9SBrian Somers a->type = 0;
9069b50d902SRodney W. Grimes switch (*p) {
9079b50d902SRodney W. Grimes case '\\': /* Context address */
9089b50d902SRodney W. Grimes ++p;
9099b50d902SRodney W. Grimes /* FALLTHROUGH */
9109b50d902SRodney W. Grimes case '/': /* Context address */
91176570d0aSDiomidis Spinellis p = compile_delimited(p, re, 0);
9129b50d902SRodney W. Grimes if (p == NULL)
91373a08bb2SPhilippe Charnier errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
914bdd72b70SSuleiman Souhlal /* Check for case insensitive regexp flag */
915bdd72b70SSuleiman Souhlal if (*p == 'I') {
916bdd72b70SSuleiman Souhlal icase = 1;
917bdd72b70SSuleiman Souhlal p++;
918bdd72b70SSuleiman Souhlal }
919bdd72b70SSuleiman Souhlal if (*re == '\0')
920bdd72b70SSuleiman Souhlal a->u.r = NULL;
921bdd72b70SSuleiman Souhlal else
922bdd72b70SSuleiman Souhlal a->u.r = compile_re(re, icase);
9239b50d902SRodney W. Grimes a->type = AT_RE;
9249b50d902SRodney W. Grimes return (p);
9259b50d902SRodney W. Grimes
9269b50d902SRodney W. Grimes case '$': /* Last line */
9279b50d902SRodney W. Grimes a->type = AT_LAST;
9289b50d902SRodney W. Grimes return (p + 1);
929f879e8d9SBrian Somers
930f879e8d9SBrian Somers case '+': /* Relative line number */
931f879e8d9SBrian Somers a->type = AT_RELLINE;
932f879e8d9SBrian Somers p++;
933f879e8d9SBrian Somers /* FALLTHROUGH */
9349b50d902SRodney W. Grimes /* Line number */
9359b50d902SRodney W. Grimes case '0': case '1': case '2': case '3': case '4':
9369b50d902SRodney W. Grimes case '5': case '6': case '7': case '8': case '9':
937f879e8d9SBrian Somers if (a->type == 0)
9389b50d902SRodney W. Grimes a->type = AT_LINE;
9399b50d902SRodney W. Grimes a->u.l = strtol(p, &end, 10);
9409b50d902SRodney W. Grimes return (end);
9419b50d902SRodney W. Grimes default:
94273a08bb2SPhilippe Charnier errx(1, "%lu: %s: expected context address", linenum, fname);
9439b50d902SRodney W. Grimes return (NULL);
9449b50d902SRodney W. Grimes }
9459b50d902SRodney W. Grimes }
9469b50d902SRodney W. Grimes
9479b50d902SRodney W. Grimes /*
9489b50d902SRodney W. Grimes * duptoeol --
9499b50d902SRodney W. Grimes * Return a copy of all the characters up to \n or \0.
9509b50d902SRodney W. Grimes */
9519b50d902SRodney W. Grimes static char *
duptoeol(char * s,const char * ctype)952249a8a80SPedro F. Giffuni duptoeol(char *s, const char *ctype)
9539b50d902SRodney W. Grimes {
9549b50d902SRodney W. Grimes size_t len;
9559b50d902SRodney W. Grimes int ws;
956249a8a80SPedro F. Giffuni char *p, *start;
9579b50d902SRodney W. Grimes
9589b50d902SRodney W. Grimes ws = 0;
9599b50d902SRodney W. Grimes for (start = s; *s != '\0' && *s != '\n'; ++s)
960726aebe5SAndrey A. Chernov ws = isspace((unsigned char)*s);
961249a8a80SPedro F. Giffuni *s = '\0';
9629b50d902SRodney W. Grimes if (ws)
96373a08bb2SPhilippe Charnier warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
964249a8a80SPedro F. Giffuni len = s - start + 1;
965249a8a80SPedro F. Giffuni if ((p = malloc(len)) == NULL)
9668e33c0a0SDavid E. O'Brien err(1, "malloc");
967249a8a80SPedro F. Giffuni return (memmove(p, start, len));
9689b50d902SRodney W. Grimes }
9699b50d902SRodney W. Grimes
9709b50d902SRodney W. Grimes /*
9719b50d902SRodney W. Grimes * Convert goto label names to addresses, and count a and r commands, in
9729b50d902SRodney W. Grimes * the given subset of the script. Free the memory used by labels in b
9739b50d902SRodney W. Grimes * and t commands (but not by :).
9749b50d902SRodney W. Grimes *
9759b50d902SRodney W. Grimes * TODO: Remove } nodes
9769b50d902SRodney W. Grimes */
9779b50d902SRodney W. Grimes static void
fixuplabel(struct s_command * cp,struct s_command * end)978249a8a80SPedro F. Giffuni fixuplabel(struct s_command *cp, struct s_command *end)
9799b50d902SRodney W. Grimes {
9809b50d902SRodney W. Grimes
9819b50d902SRodney W. Grimes for (; cp != end; cp = cp->next)
9829b50d902SRodney W. Grimes switch (cp->code) {
9839b50d902SRodney W. Grimes case 'a':
9849b50d902SRodney W. Grimes case 'r':
9859b50d902SRodney W. Grimes appendnum++;
9869b50d902SRodney W. Grimes break;
9879b50d902SRodney W. Grimes case 'b':
9889b50d902SRodney W. Grimes case 't':
9899b50d902SRodney W. Grimes /* Resolve branch target. */
9909b50d902SRodney W. Grimes if (cp->t == NULL) {
9919b50d902SRodney W. Grimes cp->u.c = NULL;
9929b50d902SRodney W. Grimes break;
9939b50d902SRodney W. Grimes }
9949b50d902SRodney W. Grimes if ((cp->u.c = findlabel(cp->t)) == NULL)
995249a8a80SPedro F. Giffuni errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
9969b50d902SRodney W. Grimes free(cp->t);
9979b50d902SRodney W. Grimes break;
9989b50d902SRodney W. Grimes case '{':
9999b50d902SRodney W. Grimes /* Do interior commands. */
10009b50d902SRodney W. Grimes fixuplabel(cp->u.c, cp->next);
10019b50d902SRodney W. Grimes break;
10029b50d902SRodney W. Grimes }
10039b50d902SRodney W. Grimes }
10049b50d902SRodney W. Grimes
10059b50d902SRodney W. Grimes /*
10069b50d902SRodney W. Grimes * Associate the given command label for later lookup.
10079b50d902SRodney W. Grimes */
10089b50d902SRodney W. Grimes static void
enterlabel(struct s_command * cp)1009e6478125SDag-Erling Smørgrav enterlabel(struct s_command *cp)
10109b50d902SRodney W. Grimes {
1011e74bf75fSMark Murray struct labhash **lhp, *lh;
1012e74bf75fSMark Murray u_char *p;
1013e74bf75fSMark Murray u_int h, c;
10149b50d902SRodney W. Grimes
10159b50d902SRodney W. Grimes for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
10169b50d902SRodney W. Grimes h = (h << 5) + h + c;
10179b50d902SRodney W. Grimes lhp = &labels[h & LHMASK];
10189b50d902SRodney W. Grimes for (lh = *lhp; lh != NULL; lh = lh->lh_next)
10199b50d902SRodney W. Grimes if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
102073a08bb2SPhilippe Charnier errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
10218e33c0a0SDavid E. O'Brien if ((lh = malloc(sizeof *lh)) == NULL)
10228e33c0a0SDavid E. O'Brien err(1, "malloc");
10239b50d902SRodney W. Grimes lh->lh_next = *lhp;
10249b50d902SRodney W. Grimes lh->lh_hash = h;
10259b50d902SRodney W. Grimes lh->lh_cmd = cp;
10269b50d902SRodney W. Grimes lh->lh_ref = 0;
10279b50d902SRodney W. Grimes *lhp = lh;
10289b50d902SRodney W. Grimes }
10299b50d902SRodney W. Grimes
10309b50d902SRodney W. Grimes /*
10319b50d902SRodney W. Grimes * Find the label contained in the command l in the command linked
10329b50d902SRodney W. Grimes * list cp. L is excluded from the search. Return NULL if not found.
10339b50d902SRodney W. Grimes */
10349b50d902SRodney W. Grimes static struct s_command *
findlabel(char * name)1035249a8a80SPedro F. Giffuni findlabel(char *name)
10369b50d902SRodney W. Grimes {
1037e74bf75fSMark Murray struct labhash *lh;
1038249a8a80SPedro F. Giffuni u_char *p;
1039e74bf75fSMark Murray u_int h, c;
10409b50d902SRodney W. Grimes
1041249a8a80SPedro F. Giffuni for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
10429b50d902SRodney W. Grimes h = (h << 5) + h + c;
10439b50d902SRodney W. Grimes for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
10449b50d902SRodney W. Grimes if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
10459b50d902SRodney W. Grimes lh->lh_ref = 1;
10469b50d902SRodney W. Grimes return (lh->lh_cmd);
10479b50d902SRodney W. Grimes }
10489b50d902SRodney W. Grimes }
10499b50d902SRodney W. Grimes return (NULL);
10509b50d902SRodney W. Grimes }
10519b50d902SRodney W. Grimes
10529b50d902SRodney W. Grimes /*
10539b50d902SRodney W. Grimes * Warn about any unused labels. As a side effect, release the label hash
10549b50d902SRodney W. Grimes * table space.
10559b50d902SRodney W. Grimes */
10569b50d902SRodney W. Grimes static void
uselabel(void)1057e6478125SDag-Erling Smørgrav uselabel(void)
10589b50d902SRodney W. Grimes {
1059e74bf75fSMark Murray struct labhash *lh, *next;
1060e74bf75fSMark Murray int i;
10619b50d902SRodney W. Grimes
10629b50d902SRodney W. Grimes for (i = 0; i < LHSZ; i++) {
10639b50d902SRodney W. Grimes for (lh = labels[i]; lh != NULL; lh = next) {
10649b50d902SRodney W. Grimes next = lh->lh_next;
10659b50d902SRodney W. Grimes if (!lh->lh_ref)
106673a08bb2SPhilippe Charnier warnx("%lu: %s: unused label '%s'",
106773a08bb2SPhilippe Charnier linenum, fname, lh->lh_cmd->t);
10689b50d902SRodney W. Grimes free(lh);
10699b50d902SRodney W. Grimes }
10709b50d902SRodney W. Grimes }
10719b50d902SRodney W. Grimes }
1072