1 /* $OpenBSD: gnum4.c,v 1.52 2017/08/21 21:41:13 deraadt Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 1999 Marc Espie 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 /* 31 * functions needed to support gnu-m4 extensions, including a fake freezing 32 */ 33 34 #include <sys/types.h> 35 #include <sys/wait.h> 36 #include <ctype.h> 37 #include <err.h> 38 #include <paths.h> 39 #include <regex.h> 40 #include <stdarg.h> 41 #include <stddef.h> 42 #include <stdlib.h> 43 #include <stdint.h> 44 #include <stdio.h> 45 #include <string.h> 46 #include <errno.h> 47 #include <unistd.h> 48 #include <limits.h> 49 #include "mdef.h" 50 #include "stdd.h" 51 #include "extern.h" 52 53 54 int mimic_gnu = 0; 55 56 /* 57 * Support for include path search 58 * First search in the current directory. 59 * If not found, and the path is not absolute, include path kicks in. 60 * First, -I options, in the order found on the command line. 61 * Then M4PATH env variable 62 */ 63 64 static struct path_entry { 65 char *name; 66 struct path_entry *next; 67 } *first, *last; 68 69 static struct path_entry *new_path_entry(const char *); 70 static void ensure_m4path(void); 71 static struct input_file *dopath(struct input_file *, const char *); 72 73 static struct path_entry * 74 new_path_entry(const char *dirname) 75 { 76 struct path_entry *n; 77 78 n = malloc(sizeof(struct path_entry)); 79 if (!n) 80 errx(1, "out of memory"); 81 n->name = xstrdup(dirname); 82 n->next = 0; 83 return n; 84 } 85 86 void 87 addtoincludepath(const char *dirname) 88 { 89 struct path_entry *n; 90 91 n = new_path_entry(dirname); 92 93 if (last) { 94 last->next = n; 95 last = n; 96 } 97 else 98 last = first = n; 99 } 100 101 static void 102 ensure_m4path(void) 103 { 104 static int envpathdone = 0; 105 char *envpath; 106 char *sweep; 107 char *path; 108 109 if (envpathdone) 110 return; 111 envpathdone = TRUE; 112 envpath = getenv("M4PATH"); 113 if (!envpath) 114 return; 115 /* for portability: getenv result is read-only */ 116 envpath = xstrdup(envpath); 117 for (sweep = envpath; 118 (path = strsep(&sweep, ":")) != NULL;) 119 addtoincludepath(path); 120 free(envpath); 121 } 122 123 static 124 struct input_file * 125 dopath(struct input_file *i, const char *filename) 126 { 127 char path[PATH_MAX]; 128 struct path_entry *pe; 129 FILE *f; 130 131 for (pe = first; pe; pe = pe->next) { 132 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 133 if ((f = fopen(path, "r")) != NULL) { 134 set_input(i, f, path); 135 return i; 136 } 137 } 138 return NULL; 139 } 140 141 struct input_file * 142 fopen_trypath(struct input_file *i, const char *filename) 143 { 144 FILE *f; 145 146 f = fopen(filename, "r"); 147 if (f != NULL) { 148 set_input(i, f, filename); 149 return i; 150 } 151 if (filename[0] == '/') 152 return NULL; 153 154 ensure_m4path(); 155 156 return dopath(i, filename); 157 } 158 159 void 160 doindir(const char *argv[], int argc) 161 { 162 ndptr n; 163 struct macro_definition *p = NULL; 164 165 n = lookup(argv[2]); 166 if (n == NULL || (p = macro_getdef(n)) == NULL) 167 m4errx(1, "indir: undefined macro %s.", argv[2]); 168 argv[1] = p->defn; 169 170 eval(argv+1, argc-1, p->type, is_traced(n)); 171 } 172 173 void 174 dobuiltin(const char *argv[], int argc) 175 { 176 ndptr p; 177 178 argv[1] = NULL; 179 p = macro_getbuiltin(argv[2]); 180 if (p != NULL) 181 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 182 else 183 m4errx(1, "unknown builtin %s.", argv[2]); 184 } 185 186 187 /* We need some temporary buffer space, as pb pushes BACK and substitution 188 * proceeds forward... */ 189 static char *buffer; 190 static size_t bufsize = 0; 191 static size_t current = 0; 192 193 static void addchars(const char *, size_t); 194 static void addchar(int); 195 static char *twiddle(const char *); 196 static char *getstring(void); 197 static void exit_regerror(int, regex_t *, const char *); 198 static void do_subst(const char *, regex_t *, const char *, const char *, 199 regmatch_t *); 200 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); 201 static void do_regexp(const char *, regex_t *, const char *, const char *, 202 regmatch_t *); 203 static void add_sub(int, const char *, regex_t *, regmatch_t *); 204 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 205 #define addconstantstring(s) addchars((s), sizeof(s)-1) 206 207 static void 208 addchars(const char *c, size_t n) 209 { 210 if (n == 0) 211 return; 212 while (current + n > bufsize) { 213 if (bufsize == 0) 214 bufsize = 1024; 215 else if (bufsize <= SIZE_MAX/2) { 216 bufsize *= 2; 217 } else { 218 errx(1, "size overflow"); 219 } 220 buffer = xrealloc(buffer, bufsize, NULL); 221 } 222 memcpy(buffer+current, c, n); 223 current += n; 224 } 225 226 static void 227 addchar(int c) 228 { 229 if (current +1 > bufsize) { 230 if (bufsize == 0) 231 bufsize = 1024; 232 else 233 bufsize *= 2; 234 buffer = xrealloc(buffer, bufsize, NULL); 235 } 236 buffer[current++] = c; 237 } 238 239 static char * 240 getstring(void) 241 { 242 addchar('\0'); 243 current = 0; 244 return buffer; 245 } 246 247 248 static void 249 exit_regerror(int er, regex_t *re, const char *source) 250 { 251 size_t errlen; 252 char *errbuf; 253 254 errlen = regerror(er, re, NULL, 0); 255 errbuf = xalloc(errlen, 256 "malloc in regerror: %lu", (unsigned long)errlen); 257 regerror(er, re, errbuf, errlen); 258 m4errx(1, "regular expression error in %s: %s.", source, errbuf); 259 } 260 261 /* warnx() plus check to see if we need to change exit code or exit . 262 * -E flag functionality. 263 */ 264 void 265 m4_warnx(const char *fmt, ...) 266 { 267 va_list ap; 268 269 va_start(ap, fmt); 270 warnx(fmt, ap); 271 va_end(ap); 272 273 if (fatal_warns) 274 exit(1); 275 if (error_warns) 276 exit_code = 1; 277 } 278 279 static void 280 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 281 { 282 if (n > (int)re->re_nsub) 283 m4_warnx("No subexpression %d", n); 284 /* Subexpressions that did not match are 285 * not an error. */ 286 else if (pm[n].rm_so != -1 && 287 pm[n].rm_eo != -1) { 288 addchars(string + pm[n].rm_so, 289 pm[n].rm_eo - pm[n].rm_so); 290 } 291 } 292 293 /* Add replacement string to the output buffer, recognizing special 294 * constructs and replacing them with substrings of the original string. 295 */ 296 static void 297 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 298 { 299 const char *p; 300 301 for (p = replace; *p != '\0'; p++) { 302 if (*p == '&' && !mimic_gnu) { 303 add_sub(0, string, re, pm); 304 continue; 305 } 306 if (*p == '\\') { 307 if (p[1] == '\\') { 308 addchar(p[1]); 309 p++; 310 continue; 311 } 312 if (p[1] == '&') { 313 if (mimic_gnu) 314 add_sub(0, string, re, pm); 315 else 316 addchar(p[1]); 317 p++; 318 continue; 319 } 320 if (isdigit((unsigned char)p[1])) { 321 add_sub(*(++p) - '0', string, re, pm); 322 continue; 323 } 324 } 325 addchar(*p); 326 } 327 } 328 329 static void 330 do_subst(const char *string, regex_t *re, const char *source, 331 const char *replace, regmatch_t *pm) 332 { 333 int error; 334 int flags = 0; 335 const char *last_match = NULL; 336 337 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 338 if (pm[0].rm_eo != 0) { 339 if (string[pm[0].rm_eo-1] == '\n') 340 flags = 0; 341 else 342 flags = REG_NOTBOL; 343 } 344 345 /* NULL length matches are special... We use the `vi-mode' 346 * rule: don't allow a NULL-match at the last match 347 * position. 348 */ 349 if (pm[0].rm_so == pm[0].rm_eo && 350 string + pm[0].rm_so == last_match) { 351 if (*string == '\0') 352 return; 353 addchar(*string); 354 if (*string++ == '\n') 355 flags = 0; 356 else 357 flags = REG_NOTBOL; 358 continue; 359 } 360 last_match = string + pm[0].rm_so; 361 addchars(string, pm[0].rm_so); 362 add_replace(string, re, replace, pm); 363 string += pm[0].rm_eo; 364 } 365 if (error != REG_NOMATCH) 366 exit_regerror(error, re, source); 367 pbstr(string); 368 } 369 370 static void 371 do_regexp(const char *string, regex_t *re, const char *source, 372 const char *replace, regmatch_t *pm) 373 { 374 int error; 375 376 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 377 case 0: 378 add_replace(string, re, replace, pm); 379 pbstr(getstring()); 380 break; 381 case REG_NOMATCH: 382 break; 383 default: 384 exit_regerror(error, re, source); 385 } 386 } 387 388 static void 389 do_regexpindex(const char *string, regex_t *re, const char *source, 390 regmatch_t *pm) 391 { 392 int error; 393 394 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 395 case 0: 396 pbunsigned(pm[0].rm_so); 397 break; 398 case REG_NOMATCH: 399 pbnum(-1); 400 break; 401 default: 402 exit_regerror(error, re, source); 403 } 404 } 405 406 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 407 * says. So we twiddle with the regexp before passing it to regcomp. 408 */ 409 static char * 410 twiddle(const char *p) 411 { 412 /* + at start of regexp is a normal character for Gnu m4 */ 413 if (*p == '^') { 414 addchar(*p); 415 p++; 416 } 417 if (*p == '+') { 418 addchar('\\'); 419 } 420 /* This could use strcspn for speed... */ 421 while (*p != '\0') { 422 if (*p == '\\') { 423 switch(p[1]) { 424 case '(': 425 case ')': 426 case '|': 427 addchar(p[1]); 428 break; 429 case 'w': 430 addconstantstring("[_a-zA-Z0-9]"); 431 break; 432 case 'W': 433 addconstantstring("[^_a-zA-Z0-9]"); 434 break; 435 case '<': 436 addconstantstring("[[:<:]]"); 437 break; 438 case '>': 439 addconstantstring("[[:>:]]"); 440 break; 441 default: 442 addchars(p, 2); 443 break; 444 } 445 p+=2; 446 continue; 447 } 448 if (*p == '(' || *p == ')' || *p == '|') 449 addchar('\\'); 450 451 addchar(*p); 452 p++; 453 } 454 return getstring(); 455 } 456 457 /* patsubst(string, regexp, opt replacement) */ 458 /* argv[2]: string 459 * argv[3]: regexp 460 * argv[4]: opt rep 461 */ 462 void 463 dopatsubst(const char *argv[], int argc) 464 { 465 if (argc <= 3) { 466 m4_warnx("Too few arguments to patsubst"); 467 return; 468 } 469 /* special case: empty regexp */ 470 if (argv[3][0] == '\0') { 471 const char *s; 472 size_t len; 473 if (argc > 4 && argv[4]) 474 len = strlen(argv[4]); 475 else 476 len = 0; 477 for (s = argv[2]; *s != '\0'; s++) { 478 addchars(argv[4], len); 479 addchar(*s); 480 } 481 } else { 482 int error; 483 regex_t re; 484 regmatch_t *pmatch; 485 int mode = REG_EXTENDED; 486 const char *source; 487 size_t l = strlen(argv[3]); 488 489 if (!mimic_gnu || 490 (argv[3][0] == '^') || 491 (l > 0 && argv[3][l-1] == '$')) 492 mode |= REG_NEWLINE; 493 494 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 495 error = regcomp(&re, source, mode); 496 if (error != 0) 497 exit_regerror(error, &re, source); 498 499 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 500 NULL); 501 do_subst(argv[2], &re, source, 502 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 503 free(pmatch); 504 regfree(&re); 505 } 506 pbstr(getstring()); 507 } 508 509 void 510 doregexp(const char *argv[], int argc) 511 { 512 int error; 513 regex_t re; 514 regmatch_t *pmatch; 515 const char *source; 516 517 if (argc <= 3) { 518 m4_warnx("Too few arguments to regexp"); 519 return; 520 } 521 /* special gnu case */ 522 if (argv[3][0] == '\0' && mimic_gnu) { 523 if (argc == 4 || argv[4] == NULL) 524 return; 525 else 526 pbstr(argv[4]); 527 } 528 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 529 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); 530 if (error != 0) 531 exit_regerror(error, &re, source); 532 533 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); 534 if (argc == 4 || argv[4] == NULL) 535 do_regexpindex(argv[2], &re, source, pmatch); 536 else 537 do_regexp(argv[2], &re, source, argv[4], pmatch); 538 free(pmatch); 539 regfree(&re); 540 } 541 542 void 543 doformat(const char *argv[], int argc) 544 { 545 const char *format = argv[2]; 546 int pos = 3; 547 int left_padded; 548 long width; 549 size_t l; 550 const char *thisarg = NULL; 551 char temp[2]; 552 long extra; 553 554 while (*format != 0) { 555 if (*format != '%') { 556 addchar(*format++); 557 continue; 558 } 559 560 format++; 561 if (*format == '%') { 562 addchar(*format++); 563 continue; 564 } 565 if (*format == 0) { 566 addchar('%'); 567 break; 568 } 569 570 if (*format == '*') { 571 format++; 572 if (pos >= argc) 573 m4errx(1, 574 "Format with too many format specifiers."); 575 width = strtol(argv[pos++], NULL, 10); 576 } else { 577 width = strtol(format, __DECONST(char **,&format), 10); 578 } 579 if (width < 0) { 580 left_padded = 1; 581 width = -width; 582 } else { 583 left_padded = 0; 584 } 585 if (*format == '.') { 586 format++; 587 if (*format == '*') { 588 format++; 589 if (pos >= argc) 590 m4errx(1, 591 "Format with too many format specifiers."); 592 extra = strtol(argv[pos++], NULL, 10); 593 } else { 594 extra = strtol(format, __DECONST(char **, &format), 10); 595 } 596 } else { 597 extra = LONG_MAX; 598 } 599 if (pos >= argc) 600 m4errx(1, "Format with too many format specifiers."); 601 switch(*format) { 602 case 's': 603 thisarg = argv[pos++]; 604 break; 605 case 'c': 606 temp[0] = strtoul(argv[pos++], NULL, 10); 607 temp[1] = 0; 608 thisarg = temp; 609 break; 610 default: 611 m4errx(1, "Unsupported format specification: %s.", 612 argv[2]); 613 } 614 format++; 615 l = strlen(thisarg); 616 if ((long)l > extra) 617 l = extra; 618 if (!left_padded) { 619 while ((long)l < width--) 620 addchar(' '); 621 } 622 addchars(thisarg, l); 623 if (left_padded) { 624 while ((long)l < width--) 625 addchar(' '); 626 } 627 } 628 pbstr(getstring()); 629 } 630 631 void 632 doesyscmd(const char *cmd) 633 { 634 int p[2]; 635 pid_t cpid; 636 char *argv[4]; 637 int cc; 638 int status; 639 640 /* Follow gnu m4 documentation: first flush buffers. */ 641 fflush(NULL); 642 643 argv[0] = __DECONST(char *, "sh"); 644 argv[1] = __DECONST(char *, "-c"); 645 argv[2] = __DECONST(char *, cmd); 646 argv[3] = NULL; 647 648 /* Just set up standard output, share stderr and stdin with m4 */ 649 if (pipe(p) == -1) 650 err(1, "bad pipe"); 651 switch(cpid = fork()) { 652 case -1: 653 err(1, "bad fork"); 654 /* NOTREACHED */ 655 case 0: 656 (void) close(p[0]); 657 (void) dup2(p[1], 1); 658 (void) close(p[1]); 659 execv(_PATH_BSHELL, argv); 660 exit(1); 661 default: 662 /* Read result in two stages, since m4's buffer is 663 * pushback-only. */ 664 (void) close(p[1]); 665 do { 666 char result[BUFSIZE]; 667 cc = read(p[0], result, sizeof result); 668 if (cc > 0) 669 addchars(result, cc); 670 } while (cc > 0 || (cc == -1 && errno == EINTR)); 671 672 (void) close(p[0]); 673 while (waitpid(cpid, &status, 0) == -1) { 674 if (errno != EINTR) 675 break; 676 } 677 pbstr(getstring()); 678 } 679 } 680 681 void 682 getdivfile(const char *name) 683 { 684 FILE *f; 685 int c; 686 687 f = fopen(name, "r"); 688 if (!f) 689 return; 690 691 while ((c = getc(f))!= EOF) 692 putc(c, active); 693 (void) fclose(f); 694 } 695