1 /*- 2 * Copyright (c) 1992 Diomidis Spinellis. 3 * Copyright (c) 1992, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Diomidis Spinellis of Imperial College, University of London. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #ifndef lint 42 static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; 43 #endif 44 45 #include <sys/types.h> 46 #include <sys/stat.h> 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <limits.h> 53 #include <regex.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <wchar.h> 58 59 #include "defs.h" 60 #include "extern.h" 61 62 #define LHSZ 128 63 #define LHMASK (LHSZ - 1) 64 static struct labhash { 65 struct labhash *lh_next; 66 u_int lh_hash; 67 struct s_command *lh_cmd; 68 int lh_ref; 69 } *labels[LHSZ]; 70 71 static char *compile_addr(char *, struct s_addr *); 72 static char *compile_ccl(char **, char *); 73 static char *compile_delimited(char *, char *); 74 static char *compile_flags(char *, struct s_subst *); 75 static char *compile_re(char *, regex_t **); 76 static char *compile_subst(char *, struct s_subst *); 77 static char *compile_text(void); 78 static char *compile_tr(char *, struct s_tr **); 79 static struct s_command 80 **compile_stream(struct s_command **); 81 static char *duptoeol(char *, const char *); 82 static void enterlabel(struct s_command *); 83 static struct s_command 84 *findlabel(char *); 85 static void fixuplabel(struct s_command *, struct s_command *); 86 static void uselabel(void); 87 88 /* 89 * Command specification. This is used to drive the command parser. 90 */ 91 struct s_format { 92 char code; /* Command code */ 93 int naddr; /* Number of address args */ 94 enum e_args args; /* Argument type */ 95 }; 96 97 static struct s_format cmd_fmts[] = { 98 {'{', 2, GROUP}, 99 {'}', 0, ENDGROUP}, 100 {'a', 1, TEXT}, 101 {'b', 2, BRANCH}, 102 {'c', 2, TEXT}, 103 {'d', 2, EMPTY}, 104 {'D', 2, EMPTY}, 105 {'g', 2, EMPTY}, 106 {'G', 2, EMPTY}, 107 {'h', 2, EMPTY}, 108 {'H', 2, EMPTY}, 109 {'i', 1, TEXT}, 110 {'l', 2, EMPTY}, 111 {'n', 2, EMPTY}, 112 {'N', 2, EMPTY}, 113 {'p', 2, EMPTY}, 114 {'P', 2, EMPTY}, 115 {'q', 1, EMPTY}, 116 {'r', 1, RFILE}, 117 {'s', 2, SUBST}, 118 {'t', 2, BRANCH}, 119 {'w', 2, WFILE}, 120 {'x', 2, EMPTY}, 121 {'y', 2, TR}, 122 {'!', 2, NONSEL}, 123 {':', 0, LABEL}, 124 {'#', 0, COMMENT}, 125 {'=', 1, EMPTY}, 126 {'\0', 0, COMMENT}, 127 }; 128 129 /* The compiled program. */ 130 struct s_command *prog; 131 132 /* 133 * Compile the program into prog. 134 * Initialise appends. 135 */ 136 void 137 compile(void) 138 { 139 *compile_stream(&prog) = NULL; 140 fixuplabel(prog, NULL); 141 uselabel(); 142 if (appendnum == 0) 143 appends = NULL; 144 else if ((appends = malloc(sizeof(struct s_appends) * appendnum)) == 145 NULL) 146 err(1, "malloc"); 147 if ((match = malloc((maxnsub + 1) * sizeof(regmatch_t))) == NULL) 148 err(1, "malloc"); 149 } 150 151 #define EATSPACE() do { \ 152 if (p) \ 153 while (*p && isspace((unsigned char)*p)) \ 154 p++; \ 155 } while (0) 156 157 static struct s_command ** 158 compile_stream(struct s_command **link) 159 { 160 char *p; 161 static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ 162 struct s_command *cmd, *cmd2, *stack; 163 struct s_format *fp; 164 int naddr; /* Number of addresses */ 165 166 stack = 0; 167 for (;;) { 168 if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) { 169 if (stack != 0) 170 errx(1, "%lu: %s: unexpected EOF (pending }'s)", 171 linenum, fname); 172 return (link); 173 } 174 175 semicolon: EATSPACE(); 176 if (p) { 177 if (*p == '#' || *p == '\0') 178 continue; 179 else if (*p == ';') { 180 p++; 181 goto semicolon; 182 } 183 } 184 if ((*link = cmd = malloc(sizeof(struct s_command))) == NULL) 185 err(1, "malloc"); 186 link = &cmd->next; 187 cmd->nonsel = cmd->inrange = 0; 188 /* First parse the addresses */ 189 naddr = 0; 190 191 /* Valid characters to start an address */ 192 #define addrchar(c) (strchr("0123456789/\\$", (c))) 193 if (addrchar(*p)) { 194 naddr++; 195 if ((cmd->a1 = malloc(sizeof(struct s_addr))) == NULL) 196 err(1, "malloc"); 197 p = compile_addr(p, cmd->a1); 198 EATSPACE(); /* EXTENSION */ 199 if (*p == ',') { 200 p++; 201 EATSPACE(); /* EXTENSION */ 202 naddr++; 203 if ((cmd->a2 = malloc(sizeof(struct s_addr))) 204 == NULL) 205 err(1, "malloc"); 206 p = compile_addr(p, cmd->a2); 207 EATSPACE(); 208 } else 209 cmd->a2 = 0; 210 } else 211 cmd->a1 = cmd->a2 = 0; 212 213 nonsel: /* Now parse the command */ 214 if (!*p) 215 errx(1, "%lu: %s: command expected", linenum, fname); 216 cmd->code = *p; 217 for (fp = cmd_fmts; fp->code; fp++) 218 if (fp->code == *p) 219 break; 220 if (!fp->code) 221 errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p); 222 if (naddr > fp->naddr) 223 errx(1, 224 "%lu: %s: command %c expects up to %d address(es), found %d", 225 linenum, fname, *p, fp->naddr, naddr); 226 switch (fp->args) { 227 case NONSEL: /* ! */ 228 p++; 229 EATSPACE(); 230 cmd->nonsel = ! cmd->nonsel; 231 goto nonsel; 232 case GROUP: /* { */ 233 p++; 234 EATSPACE(); 235 cmd->next = stack; 236 stack = cmd; 237 link = &cmd->u.c; 238 if (*p) 239 goto semicolon; 240 break; 241 case ENDGROUP: 242 /* 243 * Short-circuit command processing, since end of 244 * group is really just a noop. 245 */ 246 cmd->nonsel = 1; 247 if (stack == 0) 248 errx(1, "%lu: %s: unexpected }", linenum, fname); 249 cmd2 = stack; 250 stack = cmd2->next; 251 cmd2->next = cmd; 252 /*FALLTHROUGH*/ 253 case EMPTY: /* d D g G h H l n N p P q x = \0 */ 254 p++; 255 EATSPACE(); 256 if (*p == ';') { 257 p++; 258 link = &cmd->next; 259 goto semicolon; 260 } 261 if (*p) 262 errx(1, "%lu: %s: extra characters at the end of %c command", 263 linenum, fname, cmd->code); 264 break; 265 case TEXT: /* a c i */ 266 p++; 267 EATSPACE(); 268 if (*p != '\\') 269 errx(1, 270 "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code); 271 p++; 272 EATSPACE(); 273 if (*p) 274 errx(1, 275 "%lu: %s: extra characters after \\ at the end of %c command", 276 linenum, fname, cmd->code); 277 cmd->t = compile_text(); 278 break; 279 case COMMENT: /* \0 # */ 280 break; 281 case WFILE: /* w */ 282 p++; 283 EATSPACE(); 284 if (*p == '\0') 285 errx(1, "%lu: %s: filename expected", linenum, fname); 286 cmd->t = duptoeol(p, "w command"); 287 if (aflag) 288 cmd->u.fd = -1; 289 else if ((cmd->u.fd = open(p, 290 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 291 DEFFILEMODE)) == -1) 292 err(1, "%s", p); 293 break; 294 case RFILE: /* r */ 295 p++; 296 EATSPACE(); 297 if (*p == '\0') 298 errx(1, "%lu: %s: filename expected", linenum, fname); 299 else 300 cmd->t = duptoeol(p, "read command"); 301 break; 302 case BRANCH: /* b t */ 303 p++; 304 EATSPACE(); 305 if (*p == '\0') 306 cmd->t = NULL; 307 else 308 cmd->t = duptoeol(p, "branch"); 309 break; 310 case LABEL: /* : */ 311 p++; 312 EATSPACE(); 313 cmd->t = duptoeol(p, "label"); 314 if (strlen(p) == 0) 315 errx(1, "%lu: %s: empty label", linenum, fname); 316 enterlabel(cmd); 317 break; 318 case SUBST: /* s */ 319 p++; 320 if (*p == '\0' || *p == '\\') 321 errx(1, 322 "%lu: %s: substitute pattern can not be delimited by newline or backslash", 323 linenum, fname); 324 if ((cmd->u.s = malloc(sizeof(struct s_subst))) == NULL) 325 err(1, "malloc"); 326 p = compile_re(p, &cmd->u.s->re); 327 if (p == NULL) 328 errx(1, 329 "%lu: %s: unterminated substitute pattern", linenum, fname); 330 --p; 331 p = compile_subst(p, cmd->u.s); 332 p = compile_flags(p, cmd->u.s); 333 EATSPACE(); 334 if (*p == ';') { 335 p++; 336 link = &cmd->next; 337 goto semicolon; 338 } 339 break; 340 case TR: /* y */ 341 p++; 342 p = compile_tr(p, &cmd->u.y); 343 EATSPACE(); 344 if (*p == ';') { 345 p++; 346 link = &cmd->next; 347 goto semicolon; 348 } 349 if (*p) 350 errx(1, 351 "%lu: %s: extra text at the end of a transform command", linenum, fname); 352 break; 353 } 354 } 355 } 356 357 /* 358 * Get a delimited string. P points to the delimeter of the string; d points 359 * to a buffer area. Newline and delimiter escapes are processed; other 360 * escapes are ignored. 361 * 362 * Returns a pointer to the first character after the final delimiter or NULL 363 * in the case of a non-terminated string. The character array d is filled 364 * with the processed string. 365 */ 366 static char * 367 compile_delimited(char *p, char *d) 368 { 369 char c; 370 371 c = *p++; 372 if (c == '\0') 373 return (NULL); 374 else if (c == '\\') 375 errx(1, "%lu: %s: \\ can not be used as a string delimiter", 376 linenum, fname); 377 else if (c == '\n') 378 errx(1, "%lu: %s: newline can not be used as a string delimiter", 379 linenum, fname); 380 while (*p) { 381 if (*p == '[') { 382 if ((d = compile_ccl(&p, d)) == NULL) 383 errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname); 384 continue; 385 } else if (*p == '\\' && p[1] == '[') { 386 *d++ = *p++; 387 } else if (*p == '\\' && p[1] == c) 388 p++; 389 else if (*p == '\\' && p[1] == 'n') { 390 *d++ = '\n'; 391 p += 2; 392 continue; 393 } else if (*p == '\\' && p[1] == '\\') 394 *d++ = *p++; 395 else if (*p == c) { 396 *d = '\0'; 397 return (p + 1); 398 } 399 *d++ = *p++; 400 } 401 return (NULL); 402 } 403 404 405 /* compile_ccl: expand a POSIX character class */ 406 static char * 407 compile_ccl(char **sp, char *t) 408 { 409 int c, d; 410 char *s = *sp; 411 412 *t++ = *s++; 413 if (*s == '^') 414 *t++ = *s++; 415 if (*s == ']') 416 *t++ = *s++; 417 for (; *s && (*t = *s) != ']'; s++, t++) 418 if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { 419 *++t = *++s, t++, s++; 420 for (c = *s; (*t = *s) != ']' || c != d; s++, t++) 421 if ((c = *s) == '\0') 422 return NULL; 423 } else if (*s == '\\' && s[1] == 'n') 424 *t = '\n', s++; 425 return (*s == ']') ? *sp = ++s, ++t : NULL; 426 } 427 428 /* 429 * Get a regular expression. P points to the delimiter of the regular 430 * expression; repp points to the address of a regexp pointer. Newline 431 * and delimiter escapes are processed; other escapes are ignored. 432 * Returns a pointer to the first character after the final delimiter 433 * or NULL in the case of a non terminated regular expression. The regexp 434 * pointer is set to the compiled regular expression. 435 * Cflags are passed to regcomp. 436 */ 437 static char * 438 compile_re(char *p, regex_t **repp) 439 { 440 int eval; 441 char re[_POSIX2_LINE_MAX + 1]; 442 443 p = compile_delimited(p, re); 444 if (p && strlen(re) == 0) { 445 *repp = NULL; 446 return (p); 447 } 448 if ((*repp = malloc(sizeof(regex_t))) == NULL) 449 err(1, "malloc"); 450 if (p && (eval = regcomp(*repp, re, rflags)) != 0) 451 errx(1, "%lu: %s: RE error: %s", 452 linenum, fname, strregerror(eval, *repp)); 453 if (maxnsub < (*repp)->re_nsub) 454 maxnsub = (*repp)->re_nsub; 455 return (p); 456 } 457 458 /* 459 * Compile the substitution string of a regular expression and set res to 460 * point to a saved copy of it. Nsub is the number of parenthesized regular 461 * expressions. 462 */ 463 static char * 464 compile_subst(char *p, struct s_subst *s) 465 { 466 static char lbuf[_POSIX2_LINE_MAX + 1]; 467 int asize, size; 468 u_char ref; 469 char c, *text, *op, *sp; 470 int more = 1, sawesc = 0; 471 472 c = *p++; /* Terminator character */ 473 if (c == '\0') 474 return (NULL); 475 476 s->maxbref = 0; 477 s->linenum = linenum; 478 asize = 2 * _POSIX2_LINE_MAX + 1; 479 if ((text = malloc(asize)) == NULL) 480 err(1, "malloc"); 481 size = 0; 482 do { 483 op = sp = text + size; 484 for (; *p; p++) { 485 if (*p == '\\' || sawesc) { 486 /* 487 * If this is a continuation from the last 488 * buffer, we won't have a character to 489 * skip over. 490 */ 491 if (sawesc) 492 sawesc = 0; 493 else 494 p++; 495 496 if (*p == '\0') { 497 /* 498 * This escaped character is continued 499 * in the next part of the line. Note 500 * this fact, then cause the loop to 501 * exit w/ normal EOL case and reenter 502 * above with the new buffer. 503 */ 504 sawesc = 1; 505 p--; 506 continue; 507 } else if (strchr("123456789", *p) != NULL) { 508 *sp++ = '\\'; 509 ref = *p - '0'; 510 if (s->re != NULL && 511 ref > s->re->re_nsub) 512 errx(1, "%lu: %s: \\%c not defined in the RE", 513 linenum, fname, *p); 514 if (s->maxbref < ref) 515 s->maxbref = ref; 516 } else if (*p == '&' || *p == '\\') 517 *sp++ = '\\'; 518 } else if (*p == c) { 519 if (*++p == '\0' && more) { 520 if (cu_fgets(lbuf, sizeof(lbuf), &more)) 521 p = lbuf; 522 } 523 *sp++ = '\0'; 524 size += sp - op; 525 if ((s->new = realloc(text, size)) == NULL) 526 err(1, "realloc"); 527 return (p); 528 } else if (*p == '\n') { 529 errx(1, 530 "%lu: %s: unescaped newline inside substitute pattern", linenum, fname); 531 /* NOTREACHED */ 532 } 533 *sp++ = *p; 534 } 535 size += sp - op; 536 if (asize - size < _POSIX2_LINE_MAX + 1) { 537 asize *= 2; 538 if ((text = realloc(text, asize)) == NULL) 539 err(1, "realloc"); 540 } 541 } while (cu_fgets(p = lbuf, sizeof(lbuf), &more)); 542 errx(1, "%lu: %s: unterminated substitute in regular expression", 543 linenum, fname); 544 /* NOTREACHED */ 545 } 546 547 /* 548 * Compile the flags of the s command 549 */ 550 static char * 551 compile_flags(char *p, struct s_subst *s) 552 { 553 int gn; /* True if we have seen g or n */ 554 char wfile[_POSIX2_LINE_MAX + 1], *q; 555 556 s->n = 1; /* Default */ 557 s->p = 0; 558 s->wfile = NULL; 559 s->wfd = -1; 560 for (gn = 0;;) { 561 EATSPACE(); /* EXTENSION */ 562 switch (*p) { 563 case 'g': 564 if (gn) 565 errx(1, 566 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); 567 gn = 1; 568 s->n = 0; 569 break; 570 case '\0': 571 case '\n': 572 case ';': 573 return (p); 574 case 'p': 575 s->p = 1; 576 break; 577 case '1': case '2': case '3': 578 case '4': case '5': case '6': 579 case '7': case '8': case '9': 580 if (gn) 581 errx(1, 582 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); 583 gn = 1; 584 /* XXX Check for overflow */ 585 s->n = (int)strtol(p, &p, 10); 586 break; 587 case 'w': 588 p++; 589 #ifdef HISTORIC_PRACTICE 590 if (*p != ' ') { 591 warnx("%lu: %s: space missing before w wfile", linenum, fname); 592 return (p); 593 } 594 #endif 595 EATSPACE(); 596 q = wfile; 597 while (*p) { 598 if (*p == '\n') 599 break; 600 *q++ = *p++; 601 } 602 *q = '\0'; 603 if (q == wfile) 604 errx(1, "%lu: %s: no wfile specified", linenum, fname); 605 s->wfile = strdup(wfile); 606 if (!aflag && (s->wfd = open(wfile, 607 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 608 DEFFILEMODE)) == -1) 609 err(1, "%s", wfile); 610 return (p); 611 default: 612 errx(1, "%lu: %s: bad flag in substitute command: '%c'", 613 linenum, fname, *p); 614 break; 615 } 616 p++; 617 } 618 } 619 620 /* 621 * Compile a translation set of strings into a lookup table. 622 */ 623 static char * 624 compile_tr(char *p, struct s_tr **py) 625 { 626 struct s_tr *y; 627 int i; 628 const char *op, *np; 629 char old[_POSIX2_LINE_MAX + 1]; 630 char new[_POSIX2_LINE_MAX + 1]; 631 size_t oclen, oldlen, nclen, newlen; 632 mbstate_t mbs1, mbs2; 633 634 if ((*py = y = malloc(sizeof(*y))) == NULL) 635 err(1, NULL); 636 y->multis = NULL; 637 y->nmultis = 0; 638 639 if (*p == '\0' || *p == '\\') 640 errx(1, 641 "%lu: %s: transform pattern can not be delimited by newline or backslash", 642 linenum, fname); 643 p = compile_delimited(p, old); 644 if (p == NULL) 645 errx(1, "%lu: %s: unterminated transform source string", 646 linenum, fname); 647 p = compile_delimited(--p, new); 648 if (p == NULL) 649 errx(1, "%lu: %s: unterminated transform target string", 650 linenum, fname); 651 EATSPACE(); 652 op = old; 653 oldlen = mbsrtowcs(NULL, &op, 0, NULL); 654 if (oldlen == (size_t)-1) 655 err(1, NULL); 656 np = new; 657 newlen = mbsrtowcs(NULL, &np, 0, NULL); 658 if (newlen == (size_t)-1) 659 err(1, NULL); 660 if (newlen != oldlen) 661 errx(1, "%lu: %s: transform strings are not the same length", 662 linenum, fname); 663 if (MB_CUR_MAX == 1) { 664 /* 665 * The single-byte encoding case is easy: generate a 666 * lookup table. 667 */ 668 for (i = 0; i <= UCHAR_MAX; i++) 669 y->bytetab[i] = (char)i; 670 for (; *op; op++, np++) 671 y->bytetab[(u_char)*op] = *np; 672 } else { 673 /* 674 * Multi-byte encoding case: generate a lookup table as 675 * above, but only for single-byte characters. The first 676 * bytes of multi-byte characters have their lookup table 677 * entries set to 0, which causes do_tr() to search through 678 * an auxiliary vector of multi-byte mappings. 679 */ 680 memset(&mbs1, 0, sizeof(mbs1)); 681 memset(&mbs2, 0, sizeof(mbs2)); 682 for (i = 0; i <= UCHAR_MAX; i++) 683 y->bytetab[i] = (btowc(i) != WEOF) ? i : 0; 684 while (*op != '\0') { 685 oclen = mbrlen(op, MB_LEN_MAX, &mbs1); 686 if (oclen == (size_t)-1 || oclen == (size_t)-2) 687 errc(1, EILSEQ, NULL); 688 nclen = mbrlen(np, MB_LEN_MAX, &mbs2); 689 if (nclen == (size_t)-1 || nclen == (size_t)-2) 690 errc(1, EILSEQ, NULL); 691 if (oclen == 1 && nclen == 1) 692 y->bytetab[(u_char)*op] = *np; 693 else { 694 y->bytetab[(u_char)*op] = 0; 695 y->multis = realloc(y->multis, 696 (y->nmultis + 1) * sizeof(*y->multis)); 697 if (y->multis == NULL) 698 err(1, NULL); 699 i = y->nmultis++; 700 y->multis[i].fromlen = oclen; 701 memcpy(y->multis[i].from, op, oclen); 702 y->multis[i].tolen = nclen; 703 memcpy(y->multis[i].to, np, nclen); 704 } 705 op += oclen; 706 np += nclen; 707 } 708 } 709 return (p); 710 } 711 712 /* 713 * Compile the text following an a or i command. 714 */ 715 static char * 716 compile_text(void) 717 { 718 int asize, esc_nl, size; 719 char *text, *p, *op, *s; 720 char lbuf[_POSIX2_LINE_MAX + 1]; 721 722 asize = 2 * _POSIX2_LINE_MAX + 1; 723 if ((text = malloc(asize)) == NULL) 724 err(1, "malloc"); 725 size = 0; 726 while (cu_fgets(lbuf, sizeof(lbuf), NULL)) { 727 op = s = text + size; 728 p = lbuf; 729 EATSPACE(); 730 for (esc_nl = 0; *p != '\0'; p++) { 731 if (*p == '\\' && p[1] != '\0' && *++p == '\n') 732 esc_nl = 1; 733 *s++ = *p; 734 } 735 size += s - op; 736 if (!esc_nl) { 737 *s = '\0'; 738 break; 739 } 740 if (asize - size < _POSIX2_LINE_MAX + 1) { 741 asize *= 2; 742 if ((text = realloc(text, asize)) == NULL) 743 err(1, "realloc"); 744 } 745 } 746 text[size] = '\0'; 747 if ((p = realloc(text, size + 1)) == NULL) 748 err(1, "realloc"); 749 return (p); 750 } 751 752 /* 753 * Get an address and return a pointer to the first character after 754 * it. Fill the structure pointed to according to the address. 755 */ 756 static char * 757 compile_addr(char *p, struct s_addr *a) 758 { 759 char *end; 760 761 switch (*p) { 762 case '\\': /* Context address */ 763 ++p; 764 /* FALLTHROUGH */ 765 case '/': /* Context address */ 766 p = compile_re(p, &a->u.r); 767 if (p == NULL) 768 errx(1, "%lu: %s: unterminated regular expression", linenum, fname); 769 a->type = AT_RE; 770 return (p); 771 772 case '$': /* Last line */ 773 a->type = AT_LAST; 774 return (p + 1); 775 /* Line number */ 776 case '0': case '1': case '2': case '3': case '4': 777 case '5': case '6': case '7': case '8': case '9': 778 a->type = AT_LINE; 779 a->u.l = strtol(p, &end, 10); 780 return (end); 781 default: 782 errx(1, "%lu: %s: expected context address", linenum, fname); 783 return (NULL); 784 } 785 } 786 787 /* 788 * duptoeol -- 789 * Return a copy of all the characters up to \n or \0. 790 */ 791 static char * 792 duptoeol(char *s, const char *ctype) 793 { 794 size_t len; 795 int ws; 796 char *p, *start; 797 798 ws = 0; 799 for (start = s; *s != '\0' && *s != '\n'; ++s) 800 ws = isspace((unsigned char)*s); 801 *s = '\0'; 802 if (ws) 803 warnx("%lu: %s: whitespace after %s", linenum, fname, ctype); 804 len = s - start + 1; 805 if ((p = malloc(len)) == NULL) 806 err(1, "malloc"); 807 return (memmove(p, start, len)); 808 } 809 810 /* 811 * Convert goto label names to addresses, and count a and r commands, in 812 * the given subset of the script. Free the memory used by labels in b 813 * and t commands (but not by :). 814 * 815 * TODO: Remove } nodes 816 */ 817 static void 818 fixuplabel(struct s_command *cp, struct s_command *end) 819 { 820 821 for (; cp != end; cp = cp->next) 822 switch (cp->code) { 823 case 'a': 824 case 'r': 825 appendnum++; 826 break; 827 case 'b': 828 case 't': 829 /* Resolve branch target. */ 830 if (cp->t == NULL) { 831 cp->u.c = NULL; 832 break; 833 } 834 if ((cp->u.c = findlabel(cp->t)) == NULL) 835 errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t); 836 free(cp->t); 837 break; 838 case '{': 839 /* Do interior commands. */ 840 fixuplabel(cp->u.c, cp->next); 841 break; 842 } 843 } 844 845 /* 846 * Associate the given command label for later lookup. 847 */ 848 static void 849 enterlabel(struct s_command *cp) 850 { 851 struct labhash **lhp, *lh; 852 u_char *p; 853 u_int h, c; 854 855 for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) 856 h = (h << 5) + h + c; 857 lhp = &labels[h & LHMASK]; 858 for (lh = *lhp; lh != NULL; lh = lh->lh_next) 859 if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) 860 errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t); 861 if ((lh = malloc(sizeof *lh)) == NULL) 862 err(1, "malloc"); 863 lh->lh_next = *lhp; 864 lh->lh_hash = h; 865 lh->lh_cmd = cp; 866 lh->lh_ref = 0; 867 *lhp = lh; 868 } 869 870 /* 871 * Find the label contained in the command l in the command linked 872 * list cp. L is excluded from the search. Return NULL if not found. 873 */ 874 static struct s_command * 875 findlabel(char *name) 876 { 877 struct labhash *lh; 878 u_char *p; 879 u_int h, c; 880 881 for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) 882 h = (h << 5) + h + c; 883 for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { 884 if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { 885 lh->lh_ref = 1; 886 return (lh->lh_cmd); 887 } 888 } 889 return (NULL); 890 } 891 892 /* 893 * Warn about any unused labels. As a side effect, release the label hash 894 * table space. 895 */ 896 static void 897 uselabel(void) 898 { 899 struct labhash *lh, *next; 900 int i; 901 902 for (i = 0; i < LHSZ; i++) { 903 for (lh = labels[i]; lh != NULL; lh = next) { 904 next = lh->lh_next; 905 if (!lh->lh_ref) 906 warnx("%lu: %s: unused label '%s'", 907 linenum, fname, lh->lh_cmd->t); 908 free(lh); 909 } 910 } 911 } 912