1 /* $OpenBSD: gnum4.c,v 1.50 2015/04/29 00:13:26 millert Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 1999 Marc Espie 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* 33 * functions needed to support gnu-m4 extensions, including a fake freezing 34 */ 35 36 #include <sys/types.h> 37 #include <sys/wait.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <paths.h> 41 #include <regex.h> 42 #include <stddef.h> 43 #include <stdlib.h> 44 #include <stdint.h> 45 #include <stdio.h> 46 #include <string.h> 47 #include <errno.h> 48 #include <unistd.h> 49 #include <limits.h> 50 #include "mdef.h" 51 #include "stdd.h" 52 #include "extern.h" 53 54 55 int mimic_gnu = 0; 56 57 /* 58 * Support for include path search 59 * First search in the current directory. 60 * If not found, and the path is not absolute, include path kicks in. 61 * First, -I options, in the order found on the command line. 62 * Then M4PATH env variable 63 */ 64 65 static struct path_entry { 66 char *name; 67 struct path_entry *next; 68 } *first, *last; 69 70 static struct path_entry *new_path_entry(const char *); 71 static void ensure_m4path(void); 72 static struct input_file *dopath(struct input_file *, const char *); 73 74 static struct path_entry * 75 new_path_entry(const char *dirname) 76 { 77 struct path_entry *n; 78 79 n = malloc(sizeof(struct path_entry)); 80 if (!n) 81 errx(1, "out of memory"); 82 n->name = xstrdup(dirname); 83 n->next = 0; 84 return n; 85 } 86 87 void 88 addtoincludepath(const char *dirname) 89 { 90 struct path_entry *n; 91 92 n = new_path_entry(dirname); 93 94 if (last) { 95 last->next = n; 96 last = n; 97 } 98 else 99 last = first = n; 100 } 101 102 static void 103 ensure_m4path(void) 104 { 105 static int envpathdone = 0; 106 char *envpath; 107 char *sweep; 108 char *path; 109 110 if (envpathdone) 111 return; 112 envpathdone = TRUE; 113 envpath = getenv("M4PATH"); 114 if (!envpath) 115 return; 116 /* for portability: getenv result is read-only */ 117 envpath = xstrdup(envpath); 118 for (sweep = envpath; 119 (path = strsep(&sweep, ":")) != NULL;) 120 addtoincludepath(path); 121 free(envpath); 122 } 123 124 static 125 struct input_file * 126 dopath(struct input_file *i, const char *filename) 127 { 128 char path[PATH_MAX]; 129 struct path_entry *pe; 130 FILE *f; 131 132 for (pe = first; pe; pe = pe->next) { 133 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 134 if ((f = fopen(path, "r")) != NULL) { 135 set_input(i, f, path); 136 return i; 137 } 138 } 139 return NULL; 140 } 141 142 struct input_file * 143 fopen_trypath(struct input_file *i, const char *filename) 144 { 145 FILE *f; 146 147 f = fopen(filename, "r"); 148 if (f != NULL) { 149 set_input(i, f, filename); 150 return i; 151 } 152 if (filename[0] == '/') 153 return NULL; 154 155 ensure_m4path(); 156 157 return dopath(i, filename); 158 } 159 160 void 161 doindir(const char *argv[], int argc) 162 { 163 ndptr n; 164 struct macro_definition *p = NULL; 165 166 n = lookup(argv[2]); 167 if (n == NULL || (p = macro_getdef(n)) == NULL) 168 m4errx(1, "indir: undefined macro %s.", argv[2]); 169 argv[1] = p->defn; 170 171 eval(argv+1, argc-1, p->type, is_traced(n)); 172 } 173 174 void 175 dobuiltin(const char *argv[], int argc) 176 { 177 ndptr p; 178 179 argv[1] = NULL; 180 p = macro_getbuiltin(argv[2]); 181 if (p != NULL) 182 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 183 else 184 m4errx(1, "unknown builtin %s.", argv[2]); 185 } 186 187 188 /* We need some temporary buffer space, as pb pushes BACK and substitution 189 * proceeds forward... */ 190 static char *buffer; 191 static size_t bufsize = 0; 192 static size_t current = 0; 193 194 static void addchars(const char *, size_t); 195 static void addchar(int); 196 static char *twiddle(const char *); 197 static char *getstring(void); 198 static void exit_regerror(int, regex_t *, const char *); 199 static void do_subst(const char *, regex_t *, const char *, const char *, 200 regmatch_t *); 201 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); 202 static void do_regexp(const char *, regex_t *, const char *, const char *, 203 regmatch_t *); 204 static void add_sub(int, const char *, regex_t *, regmatch_t *); 205 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 206 #define addconstantstring(s) addchars((s), sizeof(s)-1) 207 208 static void 209 addchars(const char *c, size_t n) 210 { 211 if (n == 0) 212 return; 213 while (current + n > bufsize) { 214 if (bufsize == 0) 215 bufsize = 1024; 216 else if (bufsize <= SIZE_MAX/2) { 217 bufsize *= 2; 218 } else { 219 errx(1, "size overflow"); 220 } 221 buffer = xrealloc(buffer, bufsize, NULL); 222 } 223 memcpy(buffer+current, c, n); 224 current += n; 225 } 226 227 static void 228 addchar(int c) 229 { 230 if (current +1 > bufsize) { 231 if (bufsize == 0) 232 bufsize = 1024; 233 else 234 bufsize *= 2; 235 buffer = xrealloc(buffer, bufsize, NULL); 236 } 237 buffer[current++] = c; 238 } 239 240 static char * 241 getstring(void) 242 { 243 addchar('\0'); 244 current = 0; 245 return buffer; 246 } 247 248 249 static void 250 exit_regerror(int er, regex_t *re, const char *source) 251 { 252 size_t errlen; 253 char *errbuf; 254 255 errlen = regerror(er, re, NULL, 0); 256 errbuf = xalloc(errlen, 257 "malloc in regerror: %lu", (unsigned long)errlen); 258 regerror(er, re, errbuf, errlen); 259 m4errx(1, "regular expression error in %s: %s.", source, errbuf); 260 } 261 262 static void 263 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 264 { 265 if (n > (int)re->re_nsub) 266 warnx("No subexpression %d", n); 267 /* Subexpressions that did not match are 268 * not an error. */ 269 else if (pm[n].rm_so != -1 && 270 pm[n].rm_eo != -1) { 271 addchars(string + pm[n].rm_so, 272 pm[n].rm_eo - pm[n].rm_so); 273 } 274 } 275 276 /* Add replacement string to the output buffer, recognizing special 277 * constructs and replacing them with substrings of the original string. 278 */ 279 static void 280 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 281 { 282 const char *p; 283 284 for (p = replace; *p != '\0'; p++) { 285 if (*p == '&' && !mimic_gnu) { 286 add_sub(0, string, re, pm); 287 continue; 288 } 289 if (*p == '\\') { 290 if (p[1] == '\\') { 291 addchar(p[1]); 292 p++; 293 continue; 294 } 295 if (p[1] == '&') { 296 if (mimic_gnu) 297 add_sub(0, string, re, pm); 298 else 299 addchar(p[1]); 300 p++; 301 continue; 302 } 303 if (isdigit((unsigned char)p[1])) { 304 add_sub(*(++p) - '0', string, re, pm); 305 continue; 306 } 307 } 308 addchar(*p); 309 } 310 } 311 312 static void 313 do_subst(const char *string, regex_t *re, const char *source, 314 const char *replace, regmatch_t *pm) 315 { 316 int error; 317 int flags = 0; 318 const char *last_match = NULL; 319 320 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 321 if (pm[0].rm_eo != 0) { 322 if (string[pm[0].rm_eo-1] == '\n') 323 flags = 0; 324 else 325 flags = REG_NOTBOL; 326 } 327 328 /* NULL length matches are special... We use the `vi-mode' 329 * rule: don't allow a NULL-match at the last match 330 * position. 331 */ 332 if (pm[0].rm_so == pm[0].rm_eo && 333 string + pm[0].rm_so == last_match) { 334 if (*string == '\0') 335 return; 336 addchar(*string); 337 if (*string++ == '\n') 338 flags = 0; 339 else 340 flags = REG_NOTBOL; 341 continue; 342 } 343 last_match = string + pm[0].rm_so; 344 addchars(string, pm[0].rm_so); 345 add_replace(string, re, replace, pm); 346 string += pm[0].rm_eo; 347 } 348 if (error != REG_NOMATCH) 349 exit_regerror(error, re, source); 350 pbstr(string); 351 } 352 353 static void 354 do_regexp(const char *string, regex_t *re, const char *source, 355 const char *replace, regmatch_t *pm) 356 { 357 int error; 358 359 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 360 case 0: 361 add_replace(string, re, replace, pm); 362 pbstr(getstring()); 363 break; 364 case REG_NOMATCH: 365 break; 366 default: 367 exit_regerror(error, re, source); 368 } 369 } 370 371 static void 372 do_regexpindex(const char *string, regex_t *re, const char *source, 373 regmatch_t *pm) 374 { 375 int error; 376 377 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 378 case 0: 379 pbunsigned(pm[0].rm_so); 380 break; 381 case REG_NOMATCH: 382 pbnum(-1); 383 break; 384 default: 385 exit_regerror(error, re, source); 386 } 387 } 388 389 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 390 * says. So we twiddle with the regexp before passing it to regcomp. 391 */ 392 static char * 393 twiddle(const char *p) 394 { 395 /* + at start of regexp is a normal character for Gnu m4 */ 396 if (*p == '^') { 397 addchar(*p); 398 p++; 399 } 400 if (*p == '+') { 401 addchar('\\'); 402 } 403 /* This could use strcspn for speed... */ 404 while (*p != '\0') { 405 if (*p == '\\') { 406 switch(p[1]) { 407 case '(': 408 case ')': 409 case '|': 410 addchar(p[1]); 411 break; 412 case 'w': 413 addconstantstring("[_a-zA-Z0-9]"); 414 break; 415 case 'W': 416 addconstantstring("[^_a-zA-Z0-9]"); 417 break; 418 case '<': 419 addconstantstring("[[:<:]]"); 420 break; 421 case '>': 422 addconstantstring("[[:>:]]"); 423 break; 424 default: 425 addchars(p, 2); 426 break; 427 } 428 p+=2; 429 continue; 430 } 431 if (*p == '(' || *p == ')' || *p == '|') 432 addchar('\\'); 433 434 addchar(*p); 435 p++; 436 } 437 return getstring(); 438 } 439 440 /* patsubst(string, regexp, opt replacement) */ 441 /* argv[2]: string 442 * argv[3]: regexp 443 * argv[4]: opt rep 444 */ 445 void 446 dopatsubst(const char *argv[], int argc) 447 { 448 if (argc <= 3) { 449 warnx("Too few arguments to patsubst"); 450 return; 451 } 452 /* special case: empty regexp */ 453 if (argv[3][0] == '\0') { 454 const char *s; 455 size_t len; 456 if (argc > 4 && argv[4]) 457 len = strlen(argv[4]); 458 else 459 len = 0; 460 for (s = argv[2]; *s != '\0'; s++) { 461 addchars(argv[4], len); 462 addchar(*s); 463 } 464 } else { 465 int error; 466 regex_t re; 467 regmatch_t *pmatch; 468 int mode = REG_EXTENDED; 469 const char *source; 470 size_t l = strlen(argv[3]); 471 472 if (!mimic_gnu || 473 (argv[3][0] == '^') || 474 (l > 0 && argv[3][l-1] == '$')) 475 mode |= REG_NEWLINE; 476 477 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 478 error = regcomp(&re, source, mode); 479 if (error != 0) 480 exit_regerror(error, &re, source); 481 482 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 483 NULL); 484 do_subst(argv[2], &re, source, 485 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 486 free(pmatch); 487 regfree(&re); 488 } 489 pbstr(getstring()); 490 } 491 492 void 493 doregexp(const char *argv[], int argc) 494 { 495 int error; 496 regex_t re; 497 regmatch_t *pmatch; 498 const char *source; 499 500 if (argc <= 3) { 501 warnx("Too few arguments to regexp"); 502 return; 503 } 504 /* special gnu case */ 505 if (argv[3][0] == '\0' && mimic_gnu) { 506 if (argc == 4 || argv[4] == NULL) 507 return; 508 else 509 pbstr(argv[4]); 510 } 511 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 512 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); 513 if (error != 0) 514 exit_regerror(error, &re, source); 515 516 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); 517 if (argc == 4 || argv[4] == NULL) 518 do_regexpindex(argv[2], &re, source, pmatch); 519 else 520 do_regexp(argv[2], &re, source, argv[4], pmatch); 521 free(pmatch); 522 regfree(&re); 523 } 524 525 void 526 doformat(const char *argv[], int argc) 527 { 528 const char *format = argv[2]; 529 int pos = 3; 530 int left_padded; 531 long width; 532 size_t l; 533 const char *thisarg = NULL; 534 char temp[2]; 535 long extra; 536 537 while (*format != 0) { 538 if (*format != '%') { 539 addchar(*format++); 540 continue; 541 } 542 543 format++; 544 if (*format == '%') { 545 addchar(*format++); 546 continue; 547 } 548 if (*format == 0) { 549 addchar('%'); 550 break; 551 } 552 553 if (*format == '*') { 554 format++; 555 if (pos >= argc) 556 m4errx(1, 557 "Format with too many format specifiers."); 558 width = strtol(argv[pos++], NULL, 10); 559 } else { 560 width = strtol(format, __DECONST(char **,&format), 10); 561 } 562 if (width < 0) { 563 left_padded = 1; 564 width = -width; 565 } else { 566 left_padded = 0; 567 } 568 if (*format == '.') { 569 format++; 570 if (*format == '*') { 571 format++; 572 if (pos >= argc) 573 m4errx(1, 574 "Format with too many format specifiers."); 575 extra = strtol(argv[pos++], NULL, 10); 576 } else { 577 extra = strtol(format, __DECONST(char **, &format), 10); 578 } 579 } else { 580 extra = LONG_MAX; 581 } 582 if (pos >= argc) 583 m4errx(1, "Format with too many format specifiers."); 584 switch(*format) { 585 case 's': 586 thisarg = argv[pos++]; 587 break; 588 case 'c': 589 temp[0] = strtoul(argv[pos++], NULL, 10); 590 temp[1] = 0; 591 thisarg = temp; 592 break; 593 default: 594 m4errx(1, "Unsupported format specification: %s.", 595 argv[2]); 596 } 597 format++; 598 l = strlen(thisarg); 599 if ((long)l > extra) 600 l = extra; 601 if (!left_padded) { 602 while ((long)l < width--) 603 addchar(' '); 604 } 605 addchars(thisarg, l); 606 if (left_padded) { 607 while ((long)l < width--) 608 addchar(' '); 609 } 610 } 611 pbstr(getstring()); 612 } 613 614 void 615 doesyscmd(const char *cmd) 616 { 617 int p[2]; 618 pid_t pid, cpid; 619 char *argv[4]; 620 int cc; 621 int status; 622 623 /* Follow gnu m4 documentation: first flush buffers. */ 624 fflush(NULL); 625 626 argv[0] = __DECONST(char *, "sh"); 627 argv[1] = __DECONST(char *, "-c"); 628 argv[2] = __DECONST(char *, cmd); 629 argv[3] = NULL; 630 631 /* Just set up standard output, share stderr and stdin with m4 */ 632 if (pipe(p) == -1) 633 err(1, "bad pipe"); 634 switch(cpid = fork()) { 635 case -1: 636 err(1, "bad fork"); 637 /* NOTREACHED */ 638 case 0: 639 (void) close(p[0]); 640 (void) dup2(p[1], 1); 641 (void) close(p[1]); 642 execv(_PATH_BSHELL, argv); 643 exit(1); 644 default: 645 /* Read result in two stages, since m4's buffer is 646 * pushback-only. */ 647 (void) close(p[1]); 648 do { 649 char result[BUFSIZE]; 650 cc = read(p[0], result, sizeof result); 651 if (cc > 0) 652 addchars(result, cc); 653 } while (cc > 0 || (cc == -1 && errno == EINTR)); 654 655 (void) close(p[0]); 656 while ((pid = wait(&status)) != cpid && pid >= 0) 657 continue; 658 pbstr(getstring()); 659 } 660 } 661 662 void 663 getdivfile(const char *name) 664 { 665 FILE *f; 666 int c; 667 668 f = fopen(name, "r"); 669 if (!f) 670 return; 671 672 while ((c = getc(f))!= EOF) 673 putc(c, active); 674 (void) fclose(f); 675 } 676