1 /* $OpenBSD: gnum4.c,v 1.42 2011/11/06 12:25:43 espie Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 /* 31 * functions needed to support gnu-m4 extensions, including a fake freezing 32 */ 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/wait.h> 37 #include <ctype.h> 38 #include <err.h> 39 #include <paths.h> 40 #include <regex.h> 41 #include <stddef.h> 42 #include <stdlib.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <errno.h> 46 #include <unistd.h> 47 #include "mdef.h" 48 #include "stdd.h" 49 #include "extern.h" 50 51 52 int mimic_gnu = 0; 53 54 /* 55 * Support for include path search 56 * First search in the current directory. 57 * If not found, and the path is not absolute, include path kicks in. 58 * First, -I options, in the order found on the command line. 59 * Then M4PATH env variable 60 */ 61 62 struct path_entry { 63 char *name; 64 struct path_entry *next; 65 } *first, *last; 66 67 static struct path_entry *new_path_entry(const char *); 68 static void ensure_m4path(void); 69 static struct input_file *dopath(struct input_file *, const char *); 70 71 static struct path_entry * 72 new_path_entry(const char *dirname) 73 { 74 struct path_entry *n; 75 76 n = malloc(sizeof(struct path_entry)); 77 if (!n) 78 errx(1, "out of memory"); 79 n->name = strdup(dirname); 80 if (!n->name) 81 errx(1, "out of memory"); 82 n->next = 0; 83 return n; 84 } 85 86 void 87 addtoincludepath(const char *dirname) 88 { 89 struct path_entry *n; 90 91 n = new_path_entry(dirname); 92 93 if (last) { 94 last->next = n; 95 last = n; 96 } 97 else 98 last = first = n; 99 } 100 101 static void 102 ensure_m4path(void) 103 { 104 static int envpathdone = 0; 105 char *envpath; 106 char *sweep; 107 char *path; 108 109 if (envpathdone) 110 return; 111 envpathdone = TRUE; 112 envpath = getenv("M4PATH"); 113 if (!envpath) 114 return; 115 /* for portability: getenv result is read-only */ 116 envpath = strdup(envpath); 117 if (!envpath) 118 errx(1, "out of memory"); 119 for (sweep = envpath; 120 (path = strsep(&sweep, ":")) != NULL;) 121 addtoincludepath(path); 122 free(envpath); 123 } 124 125 static 126 struct input_file * 127 dopath(struct input_file *i, const char *filename) 128 { 129 char path[MAXPATHLEN]; 130 struct path_entry *pe; 131 FILE *f; 132 133 for (pe = first; pe; pe = pe->next) { 134 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 135 if ((f = fopen(path, "r")) != 0) { 136 set_input(i, f, path); 137 return i; 138 } 139 } 140 return NULL; 141 } 142 143 struct input_file * 144 fopen_trypath(struct input_file *i, const char *filename) 145 { 146 FILE *f; 147 148 f = fopen(filename, "r"); 149 if (f != NULL) { 150 set_input(i, f, filename); 151 return i; 152 } 153 if (filename[0] == '/') 154 return NULL; 155 156 ensure_m4path(); 157 158 return dopath(i, filename); 159 } 160 161 void 162 doindir(const char *argv[], int argc) 163 { 164 ndptr n; 165 struct macro_definition *p = NULL; 166 167 n = lookup(argv[2]); 168 if (n == NULL || (p = macro_getdef(n)) == NULL) 169 m4errx(1, "indir: undefined macro %s.", argv[2]); 170 argv[1] = p->defn; 171 172 eval(argv+1, argc-1, p->type, is_traced(n)); 173 } 174 175 void 176 dobuiltin(const char *argv[], int argc) 177 { 178 ndptr p; 179 180 argv[1] = NULL; 181 p = macro_getbuiltin(argv[2]); 182 if (p != NULL) 183 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 184 else 185 m4errx(1, "unknown builtin %s.", argv[2]); 186 } 187 188 189 /* We need some temporary buffer space, as pb pushes BACK and substitution 190 * proceeds forward... */ 191 static char *buffer; 192 static size_t bufsize = 0; 193 static size_t current = 0; 194 195 static void addchars(const char *, size_t); 196 static void addchar(int); 197 static char *twiddle(const char *); 198 static char *getstring(void); 199 static void exit_regerror(int, regex_t *); 200 static void do_subst(const char *, regex_t *, const char *, regmatch_t *); 201 static void do_regexpindex(const char *, regex_t *, regmatch_t *); 202 static void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 203 static void add_sub(int, const char *, regex_t *, regmatch_t *); 204 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 205 #define addconstantstring(s) addchars((s), sizeof(s)-1) 206 207 static void 208 addchars(const char *c, size_t n) 209 { 210 if (n == 0) 211 return; 212 while (current + n > bufsize) { 213 if (bufsize == 0) 214 bufsize = 1024; 215 else 216 bufsize *= 2; 217 buffer = xrealloc(buffer, bufsize, NULL); 218 } 219 memcpy(buffer+current, c, n); 220 current += n; 221 } 222 223 static void 224 addchar(int c) 225 { 226 if (current +1 > bufsize) { 227 if (bufsize == 0) 228 bufsize = 1024; 229 else 230 bufsize *= 2; 231 buffer = xrealloc(buffer, bufsize, NULL); 232 } 233 buffer[current++] = c; 234 } 235 236 static char * 237 getstring(void) 238 { 239 addchar('\0'); 240 current = 0; 241 return buffer; 242 } 243 244 245 static void 246 exit_regerror(int er, regex_t *re) 247 { 248 size_t errlen; 249 char *errbuf; 250 251 errlen = regerror(er, re, NULL, 0); 252 errbuf = xalloc(errlen, 253 "malloc in regerror: %lu", (unsigned long)errlen); 254 regerror(er, re, errbuf, errlen); 255 m4errx(1, "regular expression error: %s.", errbuf); 256 } 257 258 static void 259 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 260 { 261 if (n > (int)re->re_nsub) 262 warnx("No subexpression %d", n); 263 /* Subexpressions that did not match are 264 * not an error. */ 265 else if (pm[n].rm_so != -1 && 266 pm[n].rm_eo != -1) { 267 addchars(string + pm[n].rm_so, 268 pm[n].rm_eo - pm[n].rm_so); 269 } 270 } 271 272 /* Add replacement string to the output buffer, recognizing special 273 * constructs and replacing them with substrings of the original string. 274 */ 275 static void 276 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 277 { 278 const char *p; 279 280 for (p = replace; *p != '\0'; p++) { 281 if (*p == '&' && !mimic_gnu) { 282 add_sub(0, string, re, pm); 283 continue; 284 } 285 if (*p == '\\') { 286 if (p[1] == '\\') { 287 addchar(p[1]); 288 p++; 289 continue; 290 } 291 if (p[1] == '&') { 292 if (mimic_gnu) 293 add_sub(0, string, re, pm); 294 else 295 addchar(p[1]); 296 p++; 297 continue; 298 } 299 if (isdigit(p[1])) { 300 add_sub(*(++p) - '0', string, re, pm); 301 continue; 302 } 303 } 304 addchar(*p); 305 } 306 } 307 308 static void 309 do_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 310 { 311 int error; 312 int flags = 0; 313 const char *last_match = NULL; 314 315 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 316 if (pm[0].rm_eo != 0) { 317 if (string[pm[0].rm_eo-1] == '\n') 318 flags = 0; 319 else 320 flags = REG_NOTBOL; 321 } 322 323 /* NULL length matches are special... We use the `vi-mode' 324 * rule: don't allow a NULL-match at the last match 325 * position. 326 */ 327 if (pm[0].rm_so == pm[0].rm_eo && 328 string + pm[0].rm_so == last_match) { 329 if (*string == '\0') 330 return; 331 addchar(*string); 332 if (*string++ == '\n') 333 flags = 0; 334 else 335 flags = REG_NOTBOL; 336 continue; 337 } 338 last_match = string + pm[0].rm_so; 339 addchars(string, pm[0].rm_so); 340 add_replace(string, re, replace, pm); 341 string += pm[0].rm_eo; 342 } 343 if (error != REG_NOMATCH) 344 exit_regerror(error, re); 345 pbstr(string); 346 } 347 348 static void 349 do_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 350 { 351 int error; 352 353 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 354 case 0: 355 add_replace(string, re, replace, pm); 356 pbstr(getstring()); 357 break; 358 case REG_NOMATCH: 359 break; 360 default: 361 exit_regerror(error, re); 362 } 363 } 364 365 static void 366 do_regexpindex(const char *string, regex_t *re, regmatch_t *pm) 367 { 368 int error; 369 370 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 371 case 0: 372 pbunsigned(pm[0].rm_so); 373 break; 374 case REG_NOMATCH: 375 pbnum(-1); 376 break; 377 default: 378 exit_regerror(error, re); 379 } 380 } 381 382 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 383 * says. So we twiddle with the regexp before passing it to regcomp. 384 */ 385 static char * 386 twiddle(const char *p) 387 { 388 /* + at start of regexp is a normal character for Gnu m4 */ 389 if (*p == '^') { 390 addchar(*p); 391 p++; 392 } 393 if (*p == '+') { 394 addchar('\\'); 395 } 396 /* This could use strcspn for speed... */ 397 while (*p != '\0') { 398 if (*p == '\\') { 399 switch(p[1]) { 400 case '(': 401 case ')': 402 case '|': 403 addchar(p[1]); 404 break; 405 case 'w': 406 addconstantstring("[_a-zA-Z0-9]"); 407 break; 408 case 'W': 409 addconstantstring("[^_a-zA-Z0-9]"); 410 break; 411 case '<': 412 addconstantstring("[[:<:]]"); 413 break; 414 case '>': 415 addconstantstring("[[:>:]]"); 416 break; 417 default: 418 addchars(p, 2); 419 break; 420 } 421 p+=2; 422 continue; 423 } 424 if (*p == '(' || *p == ')' || *p == '|') 425 addchar('\\'); 426 427 addchar(*p); 428 p++; 429 } 430 return getstring(); 431 } 432 433 /* patsubst(string, regexp, opt replacement) */ 434 /* argv[2]: string 435 * argv[3]: regexp 436 * argv[4]: opt rep 437 */ 438 void 439 dopatsubst(const char *argv[], int argc) 440 { 441 if (argc <= 3) { 442 warnx("Too few arguments to patsubst"); 443 return; 444 } 445 /* special case: empty regexp */ 446 if (argv[3][0] == '\0') { 447 const char *s; 448 size_t len; 449 if (argc > 4 && argv[4]) 450 len = strlen(argv[4]); 451 else 452 len = 0; 453 for (s = argv[2]; *s != '\0'; s++) { 454 addchars(argv[4], len); 455 addchar(*s); 456 } 457 } else { 458 int error; 459 regex_t re; 460 regmatch_t *pmatch; 461 int mode = REG_EXTENDED; 462 size_t l = strlen(argv[3]); 463 464 if (!mimic_gnu || 465 (argv[3][0] == '^') || 466 (l > 0 && argv[3][l-1] == '$')) 467 mode |= REG_NEWLINE; 468 469 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 470 mode); 471 if (error != 0) 472 exit_regerror(error, &re); 473 474 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 475 do_subst(argv[2], &re, 476 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 477 free(pmatch); 478 regfree(&re); 479 } 480 pbstr(getstring()); 481 } 482 483 void 484 doregexp(const char *argv[], int argc) 485 { 486 int error; 487 regex_t re; 488 regmatch_t *pmatch; 489 490 if (argc <= 3) { 491 warnx("Too few arguments to regexp"); 492 return; 493 } 494 /* special gnu case */ 495 if (argv[3][0] == '\0' && mimic_gnu) { 496 if (argc == 4 || argv[4] == NULL) 497 return; 498 else 499 pbstr(argv[4]); 500 } 501 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 502 REG_EXTENDED|REG_NEWLINE); 503 if (error != 0) 504 exit_regerror(error, &re); 505 506 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 507 if (argc == 4 || argv[4] == NULL) 508 do_regexpindex(argv[2], &re, pmatch); 509 else 510 do_regexp(argv[2], &re, argv[4], pmatch); 511 free(pmatch); 512 regfree(&re); 513 } 514 515 void 516 doformat(const char *argv[], int argc) 517 { 518 const char *format = argv[2]; 519 int pos = 3; 520 int left_padded; 521 long width; 522 size_t l; 523 const char *thisarg = NULL; 524 char temp[2]; 525 long extra; 526 527 while (*format != 0) { 528 if (*format != '%') { 529 addchar(*format++); 530 continue; 531 } 532 533 format++; 534 if (*format == '%') { 535 addchar(*format++); 536 continue; 537 } 538 if (*format == 0) { 539 addchar('%'); 540 break; 541 } 542 543 if (*format == '*') { 544 format++; 545 if (pos >= argc) 546 m4errx(1, 547 "Format with too many format specifiers."); 548 width = strtol(argv[pos++], NULL, 10); 549 } else { 550 width = strtol(format, __DECONST(char **,&format), 10); 551 } 552 if (width < 0) { 553 left_padded = 1; 554 width = -width; 555 } else { 556 left_padded = 0; 557 } 558 if (*format == '.') { 559 format++; 560 if (*format == '*') { 561 format++; 562 if (pos >= argc) 563 m4errx(1, 564 "Format with too many format specifiers."); 565 extra = strtol(argv[pos++], NULL, 10); 566 } else { 567 extra = strtol(format, __DECONST(char **, &format), 10); 568 } 569 } else { 570 extra = LONG_MAX; 571 } 572 if (pos >= argc) 573 m4errx(1, "Format with too many format specifiers."); 574 switch(*format) { 575 case 's': 576 thisarg = argv[pos++]; 577 break; 578 case 'c': 579 temp[0] = strtoul(argv[pos++], NULL, 10); 580 temp[1] = 0; 581 thisarg = temp; 582 break; 583 default: 584 m4errx(1, "Unsupported format specification: %s.", 585 argv[2]); 586 } 587 format++; 588 l = strlen(thisarg); 589 if ((long)l > extra) 590 l = extra; 591 if (!left_padded) { 592 while ((long)l < width--) 593 addchar(' '); 594 } 595 addchars(thisarg, l); 596 if (left_padded) { 597 while ((long)l < width--) 598 addchar(' '); 599 } 600 } 601 pbstr(getstring()); 602 } 603 604 void 605 doesyscmd(const char *cmd) 606 { 607 int p[2]; 608 pid_t pid, cpid; 609 char *argv[4]; 610 int cc; 611 int status; 612 613 /* Follow gnu m4 documentation: first flush buffers. */ 614 fflush(NULL); 615 616 argv[0] = __DECONST(char *, "sh"); 617 argv[1] = __DECONST(char *, "-c"); 618 argv[2] = __DECONST(char *, cmd); 619 argv[3] = NULL; 620 621 /* Just set up standard output, share stderr and stdin with m4 */ 622 if (pipe(p) == -1) 623 err(1, "bad pipe"); 624 switch(cpid = fork()) { 625 case -1: 626 err(1, "bad fork"); 627 /* NOTREACHED */ 628 case 0: 629 (void) close(p[0]); 630 (void) dup2(p[1], 1); 631 (void) close(p[1]); 632 execv(_PATH_BSHELL, argv); 633 exit(1); 634 default: 635 /* Read result in two stages, since m4's buffer is 636 * pushback-only. */ 637 (void) close(p[1]); 638 do { 639 char result[BUFSIZE]; 640 cc = read(p[0], result, sizeof result); 641 if (cc > 0) 642 addchars(result, cc); 643 } while (cc > 0 || (cc == -1 && errno == EINTR)); 644 645 (void) close(p[0]); 646 while ((pid = wait(&status)) != cpid && pid >= 0) 647 continue; 648 pbstr(getstring()); 649 } 650 } 651 652 void 653 getdivfile(const char *name) 654 { 655 FILE *f; 656 int c; 657 658 f = fopen(name, "r"); 659 if (!f) 660 return; 661 662 while ((c = getc(f))!= EOF) 663 putc(c, active); 664 (void) fclose(f); 665 } 666