1 /* $OpenBSD: gnum4.c,v 1.50 2015/04/29 00:13:26 millert Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 /* 31 * functions needed to support gnu-m4 extensions, including a fake freezing 32 */ 33 34 #include <sys/types.h> 35 #include <sys/wait.h> 36 #include <ctype.h> 37 #include <err.h> 38 #include <paths.h> 39 #include <regex.h> 40 #include <stddef.h> 41 #include <stdlib.h> 42 #include <stdint.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <errno.h> 46 #include <unistd.h> 47 #include <limits.h> 48 #include "mdef.h" 49 #include "stdd.h" 50 #include "extern.h" 51 52 53 int mimic_gnu = 0; 54 55 /* 56 * Support for include path search 57 * First search in the current directory. 58 * If not found, and the path is not absolute, include path kicks in. 59 * First, -I options, in the order found on the command line. 60 * Then M4PATH env variable 61 */ 62 63 static struct path_entry { 64 char *name; 65 struct path_entry *next; 66 } *first, *last; 67 68 static struct path_entry *new_path_entry(const char *); 69 static void ensure_m4path(void); 70 static struct input_file *dopath(struct input_file *, const char *); 71 72 static struct path_entry * 73 new_path_entry(const char *dirname) 74 { 75 struct path_entry *n; 76 77 n = malloc(sizeof(struct path_entry)); 78 if (!n) 79 errx(1, "out of memory"); 80 n->name = xstrdup(dirname); 81 n->next = 0; 82 return n; 83 } 84 85 void 86 addtoincludepath(const char *dirname) 87 { 88 struct path_entry *n; 89 90 n = new_path_entry(dirname); 91 92 if (last) { 93 last->next = n; 94 last = n; 95 } 96 else 97 last = first = n; 98 } 99 100 static void 101 ensure_m4path(void) 102 { 103 static int envpathdone = 0; 104 char *envpath; 105 char *sweep; 106 char *path; 107 108 if (envpathdone) 109 return; 110 envpathdone = TRUE; 111 envpath = getenv("M4PATH"); 112 if (!envpath) 113 return; 114 /* for portability: getenv result is read-only */ 115 envpath = xstrdup(envpath); 116 for (sweep = envpath; 117 (path = strsep(&sweep, ":")) != NULL;) 118 addtoincludepath(path); 119 free(envpath); 120 } 121 122 static 123 struct input_file * 124 dopath(struct input_file *i, const char *filename) 125 { 126 char path[PATH_MAX]; 127 struct path_entry *pe; 128 FILE *f; 129 130 for (pe = first; pe; pe = pe->next) { 131 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 132 if ((f = fopen(path, "r")) != NULL) { 133 set_input(i, f, path); 134 return i; 135 } 136 } 137 return NULL; 138 } 139 140 struct input_file * 141 fopen_trypath(struct input_file *i, const char *filename) 142 { 143 FILE *f; 144 145 f = fopen(filename, "r"); 146 if (f != NULL) { 147 set_input(i, f, filename); 148 return i; 149 } 150 if (filename[0] == '/') 151 return NULL; 152 153 ensure_m4path(); 154 155 return dopath(i, filename); 156 } 157 158 void 159 doindir(const char *argv[], int argc) 160 { 161 ndptr n; 162 struct macro_definition *p = NULL; 163 164 n = lookup(argv[2]); 165 if (n == NULL || (p = macro_getdef(n)) == NULL) 166 m4errx(1, "indir: undefined macro %s.", argv[2]); 167 argv[1] = p->defn; 168 169 eval(argv+1, argc-1, p->type, is_traced(n)); 170 } 171 172 void 173 dobuiltin(const char *argv[], int argc) 174 { 175 ndptr p; 176 177 argv[1] = NULL; 178 p = macro_getbuiltin(argv[2]); 179 if (p != NULL) 180 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 181 else 182 m4errx(1, "unknown builtin %s.", argv[2]); 183 } 184 185 186 /* We need some temporary buffer space, as pb pushes BACK and substitution 187 * proceeds forward... */ 188 static char *buffer; 189 static size_t bufsize = 0; 190 static size_t current = 0; 191 192 static void addchars(const char *, size_t); 193 static void addchar(int); 194 static char *twiddle(const char *); 195 static char *getstring(void); 196 static void exit_regerror(int, regex_t *, const char *); 197 static void do_subst(const char *, regex_t *, const char *, const char *, 198 regmatch_t *); 199 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); 200 static void do_regexp(const char *, regex_t *, const char *, const char *, 201 regmatch_t *); 202 static void add_sub(int, const char *, regex_t *, regmatch_t *); 203 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 204 #define addconstantstring(s) addchars((s), sizeof(s)-1) 205 206 static void 207 addchars(const char *c, size_t n) 208 { 209 if (n == 0) 210 return; 211 while (current + n > bufsize) { 212 if (bufsize == 0) 213 bufsize = 1024; 214 else if (bufsize <= SIZE_MAX/2) { 215 bufsize *= 2; 216 } else { 217 errx(1, "size overflow"); 218 } 219 buffer = xrealloc(buffer, bufsize, NULL); 220 } 221 memcpy(buffer+current, c, n); 222 current += n; 223 } 224 225 static void 226 addchar(int c) 227 { 228 if (current +1 > bufsize) { 229 if (bufsize == 0) 230 bufsize = 1024; 231 else 232 bufsize *= 2; 233 buffer = xrealloc(buffer, bufsize, NULL); 234 } 235 buffer[current++] = c; 236 } 237 238 static char * 239 getstring(void) 240 { 241 addchar('\0'); 242 current = 0; 243 return buffer; 244 } 245 246 247 static void 248 exit_regerror(int er, regex_t *re, const char *source) 249 { 250 size_t errlen; 251 char *errbuf; 252 253 errlen = regerror(er, re, NULL, 0); 254 errbuf = xalloc(errlen, 255 "malloc in regerror: %lu", (unsigned long)errlen); 256 regerror(er, re, errbuf, errlen); 257 m4errx(1, "regular expression error in %s: %s.", source, errbuf); 258 } 259 260 static void 261 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 262 { 263 if (n > (int)re->re_nsub) 264 warnx("No subexpression %d", n); 265 /* Subexpressions that did not match are 266 * not an error. */ 267 else if (pm[n].rm_so != -1 && 268 pm[n].rm_eo != -1) { 269 addchars(string + pm[n].rm_so, 270 pm[n].rm_eo - pm[n].rm_so); 271 } 272 } 273 274 /* Add replacement string to the output buffer, recognizing special 275 * constructs and replacing them with substrings of the original string. 276 */ 277 static void 278 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 279 { 280 const char *p; 281 282 for (p = replace; *p != '\0'; p++) { 283 if (*p == '&' && !mimic_gnu) { 284 add_sub(0, string, re, pm); 285 continue; 286 } 287 if (*p == '\\') { 288 if (p[1] == '\\') { 289 addchar(p[1]); 290 p++; 291 continue; 292 } 293 if (p[1] == '&') { 294 if (mimic_gnu) 295 add_sub(0, string, re, pm); 296 else 297 addchar(p[1]); 298 p++; 299 continue; 300 } 301 if (isdigit((unsigned char)p[1])) { 302 add_sub(*(++p) - '0', string, re, pm); 303 continue; 304 } 305 } 306 addchar(*p); 307 } 308 } 309 310 static void 311 do_subst(const char *string, regex_t *re, const char *source, 312 const char *replace, regmatch_t *pm) 313 { 314 int error; 315 int flags = 0; 316 const char *last_match = NULL; 317 318 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 319 if (pm[0].rm_eo != 0) { 320 if (string[pm[0].rm_eo-1] == '\n') 321 flags = 0; 322 else 323 flags = REG_NOTBOL; 324 } 325 326 /* NULL length matches are special... We use the `vi-mode' 327 * rule: don't allow a NULL-match at the last match 328 * position. 329 */ 330 if (pm[0].rm_so == pm[0].rm_eo && 331 string + pm[0].rm_so == last_match) { 332 if (*string == '\0') 333 return; 334 addchar(*string); 335 if (*string++ == '\n') 336 flags = 0; 337 else 338 flags = REG_NOTBOL; 339 continue; 340 } 341 last_match = string + pm[0].rm_so; 342 addchars(string, pm[0].rm_so); 343 add_replace(string, re, replace, pm); 344 string += pm[0].rm_eo; 345 } 346 if (error != REG_NOMATCH) 347 exit_regerror(error, re, source); 348 pbstr(string); 349 } 350 351 static void 352 do_regexp(const char *string, regex_t *re, const char *source, 353 const char *replace, regmatch_t *pm) 354 { 355 int error; 356 357 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 358 case 0: 359 add_replace(string, re, replace, pm); 360 pbstr(getstring()); 361 break; 362 case REG_NOMATCH: 363 break; 364 default: 365 exit_regerror(error, re, source); 366 } 367 } 368 369 static void 370 do_regexpindex(const char *string, regex_t *re, const char *source, 371 regmatch_t *pm) 372 { 373 int error; 374 375 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 376 case 0: 377 pbunsigned(pm[0].rm_so); 378 break; 379 case REG_NOMATCH: 380 pbnum(-1); 381 break; 382 default: 383 exit_regerror(error, re, source); 384 } 385 } 386 387 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 388 * says. So we twiddle with the regexp before passing it to regcomp. 389 */ 390 static char * 391 twiddle(const char *p) 392 { 393 /* + at start of regexp is a normal character for Gnu m4 */ 394 if (*p == '^') { 395 addchar(*p); 396 p++; 397 } 398 if (*p == '+') { 399 addchar('\\'); 400 } 401 /* This could use strcspn for speed... */ 402 while (*p != '\0') { 403 if (*p == '\\') { 404 switch(p[1]) { 405 case '(': 406 case ')': 407 case '|': 408 addchar(p[1]); 409 break; 410 case 'w': 411 addconstantstring("[_a-zA-Z0-9]"); 412 break; 413 case 'W': 414 addconstantstring("[^_a-zA-Z0-9]"); 415 break; 416 case '<': 417 addconstantstring("[[:<:]]"); 418 break; 419 case '>': 420 addconstantstring("[[:>:]]"); 421 break; 422 default: 423 addchars(p, 2); 424 break; 425 } 426 p+=2; 427 continue; 428 } 429 if (*p == '(' || *p == ')' || *p == '|') 430 addchar('\\'); 431 432 addchar(*p); 433 p++; 434 } 435 return getstring(); 436 } 437 438 /* patsubst(string, regexp, opt replacement) */ 439 /* argv[2]: string 440 * argv[3]: regexp 441 * argv[4]: opt rep 442 */ 443 void 444 dopatsubst(const char *argv[], int argc) 445 { 446 if (argc <= 3) { 447 warnx("Too few arguments to patsubst"); 448 return; 449 } 450 /* special case: empty regexp */ 451 if (argv[3][0] == '\0') { 452 const char *s; 453 size_t len; 454 if (argc > 4 && argv[4]) 455 len = strlen(argv[4]); 456 else 457 len = 0; 458 for (s = argv[2]; *s != '\0'; s++) { 459 addchars(argv[4], len); 460 addchar(*s); 461 } 462 } else { 463 int error; 464 regex_t re; 465 regmatch_t *pmatch; 466 int mode = REG_EXTENDED; 467 const char *source; 468 size_t l = strlen(argv[3]); 469 470 if (!mimic_gnu || 471 (argv[3][0] == '^') || 472 (l > 0 && argv[3][l-1] == '$')) 473 mode |= REG_NEWLINE; 474 475 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 476 error = regcomp(&re, source, mode); 477 if (error != 0) 478 exit_regerror(error, &re, source); 479 480 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 481 NULL); 482 do_subst(argv[2], &re, source, 483 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 484 free(pmatch); 485 regfree(&re); 486 } 487 pbstr(getstring()); 488 } 489 490 void 491 doregexp(const char *argv[], int argc) 492 { 493 int error; 494 regex_t re; 495 regmatch_t *pmatch; 496 const char *source; 497 498 if (argc <= 3) { 499 warnx("Too few arguments to regexp"); 500 return; 501 } 502 /* special gnu case */ 503 if (argv[3][0] == '\0' && mimic_gnu) { 504 if (argc == 4 || argv[4] == NULL) 505 return; 506 else 507 pbstr(argv[4]); 508 } 509 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 510 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); 511 if (error != 0) 512 exit_regerror(error, &re, source); 513 514 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); 515 if (argc == 4 || argv[4] == NULL) 516 do_regexpindex(argv[2], &re, source, pmatch); 517 else 518 do_regexp(argv[2], &re, source, argv[4], pmatch); 519 free(pmatch); 520 regfree(&re); 521 } 522 523 void 524 doformat(const char *argv[], int argc) 525 { 526 const char *format = argv[2]; 527 int pos = 3; 528 int left_padded; 529 long width; 530 size_t l; 531 const char *thisarg = NULL; 532 char temp[2]; 533 long extra; 534 535 while (*format != 0) { 536 if (*format != '%') { 537 addchar(*format++); 538 continue; 539 } 540 541 format++; 542 if (*format == '%') { 543 addchar(*format++); 544 continue; 545 } 546 if (*format == 0) { 547 addchar('%'); 548 break; 549 } 550 551 if (*format == '*') { 552 format++; 553 if (pos >= argc) 554 m4errx(1, 555 "Format with too many format specifiers."); 556 width = strtol(argv[pos++], NULL, 10); 557 } else { 558 width = strtol(format, __DECONST(char **,&format), 10); 559 } 560 if (width < 0) { 561 left_padded = 1; 562 width = -width; 563 } else { 564 left_padded = 0; 565 } 566 if (*format == '.') { 567 format++; 568 if (*format == '*') { 569 format++; 570 if (pos >= argc) 571 m4errx(1, 572 "Format with too many format specifiers."); 573 extra = strtol(argv[pos++], NULL, 10); 574 } else { 575 extra = strtol(format, __DECONST(char **, &format), 10); 576 } 577 } else { 578 extra = LONG_MAX; 579 } 580 if (pos >= argc) 581 m4errx(1, "Format with too many format specifiers."); 582 switch(*format) { 583 case 's': 584 thisarg = argv[pos++]; 585 break; 586 case 'c': 587 temp[0] = strtoul(argv[pos++], NULL, 10); 588 temp[1] = 0; 589 thisarg = temp; 590 break; 591 default: 592 m4errx(1, "Unsupported format specification: %s.", 593 argv[2]); 594 } 595 format++; 596 l = strlen(thisarg); 597 if ((long)l > extra) 598 l = extra; 599 if (!left_padded) { 600 while ((long)l < width--) 601 addchar(' '); 602 } 603 addchars(thisarg, l); 604 if (left_padded) { 605 while ((long)l < width--) 606 addchar(' '); 607 } 608 } 609 pbstr(getstring()); 610 } 611 612 void 613 doesyscmd(const char *cmd) 614 { 615 int p[2]; 616 pid_t pid, cpid; 617 char *argv[4]; 618 int cc; 619 int status; 620 621 /* Follow gnu m4 documentation: first flush buffers. */ 622 fflush(NULL); 623 624 argv[0] = __DECONST(char *, "sh"); 625 argv[1] = __DECONST(char *, "-c"); 626 argv[2] = __DECONST(char *, cmd); 627 argv[3] = NULL; 628 629 /* Just set up standard output, share stderr and stdin with m4 */ 630 if (pipe(p) == -1) 631 err(1, "bad pipe"); 632 switch(cpid = fork()) { 633 case -1: 634 err(1, "bad fork"); 635 /* NOTREACHED */ 636 case 0: 637 (void) close(p[0]); 638 (void) dup2(p[1], 1); 639 (void) close(p[1]); 640 execv(_PATH_BSHELL, argv); 641 exit(1); 642 default: 643 /* Read result in two stages, since m4's buffer is 644 * pushback-only. */ 645 (void) close(p[1]); 646 do { 647 char result[BUFSIZE]; 648 cc = read(p[0], result, sizeof result); 649 if (cc > 0) 650 addchars(result, cc); 651 } while (cc > 0 || (cc == -1 && errno == EINTR)); 652 653 (void) close(p[0]); 654 while ((pid = wait(&status)) != cpid && pid >= 0) 655 continue; 656 pbstr(getstring()); 657 } 658 } 659 660 void 661 getdivfile(const char *name) 662 { 663 FILE *f; 664 int c; 665 666 f = fopen(name, "r"); 667 if (!f) 668 return; 669 670 while ((c = getc(f))!= EOF) 671 putc(c, active); 672 (void) fclose(f); 673 } 674