1 /* $OpenBSD: gnum4.c,v 1.52 2017/08/21 21:41:13 deraadt Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 1999 Marc Espie 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* 33 * functions needed to support gnu-m4 extensions, including a fake freezing 34 */ 35 36 #include <sys/types.h> 37 #include <sys/wait.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <paths.h> 41 #include <regex.h> 42 #include <stdarg.h> 43 #include <stddef.h> 44 #include <stdlib.h> 45 #include <stdint.h> 46 #include <stdio.h> 47 #include <string.h> 48 #include <errno.h> 49 #include <unistd.h> 50 #include <limits.h> 51 #include "mdef.h" 52 #include "stdd.h" 53 #include "extern.h" 54 55 56 int mimic_gnu = 0; 57 58 /* 59 * Support for include path search 60 * First search in the current directory. 61 * If not found, and the path is not absolute, include path kicks in. 62 * First, -I options, in the order found on the command line. 63 * Then M4PATH env variable 64 */ 65 66 static struct path_entry { 67 char *name; 68 struct path_entry *next; 69 } *first, *last; 70 71 static struct path_entry *new_path_entry(const char *); 72 static void ensure_m4path(void); 73 static struct input_file *dopath(struct input_file *, const char *); 74 75 static struct path_entry * 76 new_path_entry(const char *dirname) 77 { 78 struct path_entry *n; 79 80 n = malloc(sizeof(struct path_entry)); 81 if (!n) 82 errx(1, "out of memory"); 83 n->name = xstrdup(dirname); 84 n->next = 0; 85 return n; 86 } 87 88 void 89 addtoincludepath(const char *dirname) 90 { 91 struct path_entry *n; 92 93 n = new_path_entry(dirname); 94 95 if (last) { 96 last->next = n; 97 last = n; 98 } 99 else 100 last = first = n; 101 } 102 103 static void 104 ensure_m4path(void) 105 { 106 static int envpathdone = 0; 107 char *envpath; 108 char *sweep; 109 char *path; 110 111 if (envpathdone) 112 return; 113 envpathdone = TRUE; 114 envpath = getenv("M4PATH"); 115 if (!envpath) 116 return; 117 /* for portability: getenv result is read-only */ 118 envpath = xstrdup(envpath); 119 for (sweep = envpath; 120 (path = strsep(&sweep, ":")) != NULL;) 121 addtoincludepath(path); 122 free(envpath); 123 } 124 125 static 126 struct input_file * 127 dopath(struct input_file *i, const char *filename) 128 { 129 char path[PATH_MAX]; 130 struct path_entry *pe; 131 FILE *f; 132 133 for (pe = first; pe; pe = pe->next) { 134 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 135 if ((f = fopen(path, "r")) != NULL) { 136 set_input(i, f, path); 137 return i; 138 } 139 } 140 return NULL; 141 } 142 143 struct input_file * 144 fopen_trypath(struct input_file *i, const char *filename) 145 { 146 FILE *f; 147 148 f = fopen(filename, "r"); 149 if (f != NULL) { 150 set_input(i, f, filename); 151 return i; 152 } 153 if (filename[0] == '/') 154 return NULL; 155 156 ensure_m4path(); 157 158 return dopath(i, filename); 159 } 160 161 void 162 doindir(const char *argv[], int argc) 163 { 164 ndptr n; 165 struct macro_definition *p = NULL; 166 167 n = lookup(argv[2]); 168 if (n == NULL || (p = macro_getdef(n)) == NULL) 169 m4errx(1, "indir: undefined macro %s.", argv[2]); 170 argv[1] = p->defn; 171 172 eval(argv+1, argc-1, p->type, is_traced(n)); 173 } 174 175 void 176 dobuiltin(const char *argv[], int argc) 177 { 178 ndptr p; 179 180 argv[1] = NULL; 181 p = macro_getbuiltin(argv[2]); 182 if (p != NULL) 183 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 184 else 185 m4errx(1, "unknown builtin %s.", argv[2]); 186 } 187 188 189 /* We need some temporary buffer space, as pb pushes BACK and substitution 190 * proceeds forward... */ 191 static char *buffer; 192 static size_t bufsize = 0; 193 static size_t current = 0; 194 195 static void addchars(const char *, size_t); 196 static void addchar(int); 197 static char *twiddle(const char *); 198 static char *getstring(void); 199 static void exit_regerror(int, regex_t *, const char *); 200 static void do_subst(const char *, regex_t *, const char *, const char *, 201 regmatch_t *); 202 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); 203 static void do_regexp(const char *, regex_t *, const char *, const char *, 204 regmatch_t *); 205 static void add_sub(int, const char *, regex_t *, regmatch_t *); 206 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 207 #define addconstantstring(s) addchars((s), sizeof(s)-1) 208 209 static void 210 addchars(const char *c, size_t n) 211 { 212 if (n == 0) 213 return; 214 while (current + n > bufsize) { 215 if (bufsize == 0) 216 bufsize = 1024; 217 else if (bufsize <= SIZE_MAX/2) { 218 bufsize *= 2; 219 } else { 220 errx(1, "size overflow"); 221 } 222 buffer = xrealloc(buffer, bufsize, NULL); 223 } 224 memcpy(buffer+current, c, n); 225 current += n; 226 } 227 228 static void 229 addchar(int c) 230 { 231 if (current +1 > bufsize) { 232 if (bufsize == 0) 233 bufsize = 1024; 234 else 235 bufsize *= 2; 236 buffer = xrealloc(buffer, bufsize, NULL); 237 } 238 buffer[current++] = c; 239 } 240 241 static char * 242 getstring(void) 243 { 244 addchar('\0'); 245 current = 0; 246 return buffer; 247 } 248 249 250 static void 251 exit_regerror(int er, regex_t *re, const char *source) 252 { 253 size_t errlen; 254 char *errbuf; 255 256 errlen = regerror(er, re, NULL, 0); 257 errbuf = xalloc(errlen, 258 "malloc in regerror: %lu", (unsigned long)errlen); 259 regerror(er, re, errbuf, errlen); 260 m4errx(1, "regular expression error in %s: %s.", source, errbuf); 261 } 262 263 /* warnx() plus check to see if we need to change exit code or exit . 264 * -E flag functionality. 265 */ 266 void 267 m4_warnx(const char *fmt, ...) 268 { 269 va_list ap; 270 271 va_start(ap, fmt); 272 warnx(fmt, ap); 273 va_end(ap); 274 275 if (fatal_warns) 276 exit(1); 277 if (error_warns) 278 exit_code = 1; 279 } 280 281 static void 282 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 283 { 284 if (n > (int)re->re_nsub) 285 m4_warnx("No subexpression %d", n); 286 /* Subexpressions that did not match are 287 * not an error. */ 288 else if (pm[n].rm_so != -1 && 289 pm[n].rm_eo != -1) { 290 addchars(string + pm[n].rm_so, 291 pm[n].rm_eo - pm[n].rm_so); 292 } 293 } 294 295 /* Add replacement string to the output buffer, recognizing special 296 * constructs and replacing them with substrings of the original string. 297 */ 298 static void 299 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 300 { 301 const char *p; 302 303 for (p = replace; *p != '\0'; p++) { 304 if (*p == '&' && !mimic_gnu) { 305 add_sub(0, string, re, pm); 306 continue; 307 } 308 if (*p == '\\') { 309 if (p[1] == '\\') { 310 addchar(p[1]); 311 p++; 312 continue; 313 } 314 if (p[1] == '&') { 315 if (mimic_gnu) 316 add_sub(0, string, re, pm); 317 else 318 addchar(p[1]); 319 p++; 320 continue; 321 } 322 if (isdigit((unsigned char)p[1])) { 323 add_sub(*(++p) - '0', string, re, pm); 324 continue; 325 } 326 } 327 addchar(*p); 328 } 329 } 330 331 static void 332 do_subst(const char *string, regex_t *re, const char *source, 333 const char *replace, regmatch_t *pm) 334 { 335 int error; 336 int flags = 0; 337 const char *last_match = NULL; 338 339 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 340 if (pm[0].rm_eo != 0) { 341 if (string[pm[0].rm_eo-1] == '\n') 342 flags = 0; 343 else 344 flags = REG_NOTBOL; 345 } 346 347 /* NULL length matches are special... We use the `vi-mode' 348 * rule: don't allow a NULL-match at the last match 349 * position. 350 */ 351 if (pm[0].rm_so == pm[0].rm_eo && 352 string + pm[0].rm_so == last_match) { 353 if (*string == '\0') 354 return; 355 addchar(*string); 356 if (*string++ == '\n') 357 flags = 0; 358 else 359 flags = REG_NOTBOL; 360 continue; 361 } 362 last_match = string + pm[0].rm_so; 363 addchars(string, pm[0].rm_so); 364 add_replace(string, re, replace, pm); 365 string += pm[0].rm_eo; 366 } 367 if (error != REG_NOMATCH) 368 exit_regerror(error, re, source); 369 pbstr(string); 370 } 371 372 static void 373 do_regexp(const char *string, regex_t *re, const char *source, 374 const char *replace, regmatch_t *pm) 375 { 376 int error; 377 378 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 379 case 0: 380 add_replace(string, re, replace, pm); 381 pbstr(getstring()); 382 break; 383 case REG_NOMATCH: 384 break; 385 default: 386 exit_regerror(error, re, source); 387 } 388 } 389 390 static void 391 do_regexpindex(const char *string, regex_t *re, const char *source, 392 regmatch_t *pm) 393 { 394 int error; 395 396 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 397 case 0: 398 pbunsigned(pm[0].rm_so); 399 break; 400 case REG_NOMATCH: 401 pbnum(-1); 402 break; 403 default: 404 exit_regerror(error, re, source); 405 } 406 } 407 408 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 409 * says. So we twiddle with the regexp before passing it to regcomp. 410 */ 411 static char * 412 twiddle(const char *p) 413 { 414 /* + at start of regexp is a normal character for Gnu m4 */ 415 if (*p == '^') { 416 addchar(*p); 417 p++; 418 } 419 if (*p == '+') { 420 addchar('\\'); 421 } 422 /* This could use strcspn for speed... */ 423 while (*p != '\0') { 424 if (*p == '\\') { 425 switch(p[1]) { 426 case '(': 427 case ')': 428 case '|': 429 addchar(p[1]); 430 break; 431 case 'w': 432 addconstantstring("[_a-zA-Z0-9]"); 433 break; 434 case 'W': 435 addconstantstring("[^_a-zA-Z0-9]"); 436 break; 437 case '<': 438 addconstantstring("[[:<:]]"); 439 break; 440 case '>': 441 addconstantstring("[[:>:]]"); 442 break; 443 default: 444 addchars(p, 2); 445 break; 446 } 447 p+=2; 448 continue; 449 } 450 if (*p == '(' || *p == ')' || *p == '|') 451 addchar('\\'); 452 453 addchar(*p); 454 p++; 455 } 456 return getstring(); 457 } 458 459 /* patsubst(string, regexp, opt replacement) */ 460 /* argv[2]: string 461 * argv[3]: regexp 462 * argv[4]: opt rep 463 */ 464 void 465 dopatsubst(const char *argv[], int argc) 466 { 467 if (argc <= 3) { 468 m4_warnx("Too few arguments to patsubst"); 469 return; 470 } 471 /* special case: empty regexp */ 472 if (argv[3][0] == '\0') { 473 const char *s; 474 size_t len; 475 if (argc > 4 && argv[4]) 476 len = strlen(argv[4]); 477 else 478 len = 0; 479 for (s = argv[2]; *s != '\0'; s++) { 480 addchars(argv[4], len); 481 addchar(*s); 482 } 483 } else { 484 int error; 485 regex_t re; 486 regmatch_t *pmatch; 487 int mode = REG_EXTENDED; 488 const char *source; 489 size_t l = strlen(argv[3]); 490 491 if (!mimic_gnu || 492 (argv[3][0] == '^') || 493 (l > 0 && argv[3][l-1] == '$')) 494 mode |= REG_NEWLINE; 495 496 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 497 error = regcomp(&re, source, mode); 498 if (error != 0) 499 exit_regerror(error, &re, source); 500 501 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 502 NULL); 503 do_subst(argv[2], &re, source, 504 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 505 free(pmatch); 506 regfree(&re); 507 } 508 pbstr(getstring()); 509 } 510 511 void 512 doregexp(const char *argv[], int argc) 513 { 514 int error; 515 regex_t re; 516 regmatch_t *pmatch; 517 const char *source; 518 519 if (argc <= 3) { 520 m4_warnx("Too few arguments to regexp"); 521 return; 522 } 523 /* special gnu case */ 524 if (argv[3][0] == '\0' && mimic_gnu) { 525 if (argc == 4 || argv[4] == NULL) 526 return; 527 else 528 pbstr(argv[4]); 529 } 530 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 531 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); 532 if (error != 0) 533 exit_regerror(error, &re, source); 534 535 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); 536 if (argc == 4 || argv[4] == NULL) 537 do_regexpindex(argv[2], &re, source, pmatch); 538 else 539 do_regexp(argv[2], &re, source, argv[4], pmatch); 540 free(pmatch); 541 regfree(&re); 542 } 543 544 void 545 doformat(const char *argv[], int argc) 546 { 547 const char *format = argv[2]; 548 int pos = 3; 549 int left_padded; 550 long width; 551 size_t l; 552 const char *thisarg = NULL; 553 char temp[2]; 554 long extra; 555 556 while (*format != 0) { 557 if (*format != '%') { 558 addchar(*format++); 559 continue; 560 } 561 562 format++; 563 if (*format == '%') { 564 addchar(*format++); 565 continue; 566 } 567 if (*format == 0) { 568 addchar('%'); 569 break; 570 } 571 572 if (*format == '*') { 573 format++; 574 if (pos >= argc) 575 m4errx(1, 576 "Format with too many format specifiers."); 577 width = strtol(argv[pos++], NULL, 10); 578 } else { 579 width = strtol(format, __DECONST(char **,&format), 10); 580 } 581 if (width < 0) { 582 left_padded = 1; 583 width = -width; 584 } else { 585 left_padded = 0; 586 } 587 if (*format == '.') { 588 format++; 589 if (*format == '*') { 590 format++; 591 if (pos >= argc) 592 m4errx(1, 593 "Format with too many format specifiers."); 594 extra = strtol(argv[pos++], NULL, 10); 595 } else { 596 extra = strtol(format, __DECONST(char **, &format), 10); 597 } 598 } else { 599 extra = LONG_MAX; 600 } 601 if (pos >= argc) 602 m4errx(1, "Format with too many format specifiers."); 603 switch(*format) { 604 case 's': 605 thisarg = argv[pos++]; 606 break; 607 case 'c': 608 temp[0] = strtoul(argv[pos++], NULL, 10); 609 temp[1] = 0; 610 thisarg = temp; 611 break; 612 default: 613 m4errx(1, "Unsupported format specification: %s.", 614 argv[2]); 615 } 616 format++; 617 l = strlen(thisarg); 618 if ((long)l > extra) 619 l = extra; 620 if (!left_padded) { 621 while ((long)l < width--) 622 addchar(' '); 623 } 624 addchars(thisarg, l); 625 if (left_padded) { 626 while ((long)l < width--) 627 addchar(' '); 628 } 629 } 630 pbstr(getstring()); 631 } 632 633 void 634 doesyscmd(const char *cmd) 635 { 636 int p[2]; 637 pid_t cpid; 638 char *argv[4]; 639 int cc; 640 int status; 641 642 /* Follow gnu m4 documentation: first flush buffers. */ 643 fflush(NULL); 644 645 argv[0] = __DECONST(char *, "sh"); 646 argv[1] = __DECONST(char *, "-c"); 647 argv[2] = __DECONST(char *, cmd); 648 argv[3] = NULL; 649 650 /* Just set up standard output, share stderr and stdin with m4 */ 651 if (pipe(p) == -1) 652 err(1, "bad pipe"); 653 switch(cpid = fork()) { 654 case -1: 655 err(1, "bad fork"); 656 /* NOTREACHED */ 657 case 0: 658 (void) close(p[0]); 659 (void) dup2(p[1], 1); 660 (void) close(p[1]); 661 execv(_PATH_BSHELL, argv); 662 exit(1); 663 default: 664 /* Read result in two stages, since m4's buffer is 665 * pushback-only. */ 666 (void) close(p[1]); 667 do { 668 char result[BUFSIZE]; 669 cc = read(p[0], result, sizeof result); 670 if (cc > 0) 671 addchars(result, cc); 672 } while (cc > 0 || (cc == -1 && errno == EINTR)); 673 674 (void) close(p[0]); 675 while (waitpid(cpid, &status, 0) == -1) { 676 if (errno != EINTR) 677 break; 678 } 679 pbstr(getstring()); 680 } 681 } 682 683 void 684 getdivfile(const char *name) 685 { 686 FILE *f; 687 int c; 688 689 f = fopen(name, "r"); 690 if (!f) 691 return; 692 693 while ((c = getc(f))!= EOF) 694 putc(c, active); 695 (void) fclose(f); 696 } 697