1 /* $OpenBSD: gnum4.c,v 1.16 2002/02/16 21:27:48 millert Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * functions needed to support gnu-m4 extensions, including a fake freezing 30 */ 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/wait.h> 35 #include <ctype.h> 36 #include <paths.h> 37 #include <regex.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <stdio.h> 41 #include <string.h> 42 #include <err.h> 43 #include <errno.h> 44 #include <unistd.h> 45 #include "mdef.h" 46 #include "stdd.h" 47 #include "extern.h" 48 49 50 int mimic_gnu = 0; 51 52 /* 53 * Support for include path search 54 * First search in the the current directory. 55 * If not found, and the path is not absolute, include path kicks in. 56 * First, -I options, in the order found on the command line. 57 * Then M4PATH env variable 58 */ 59 60 struct path_entry { 61 char *name; 62 struct path_entry *next; 63 } *first, *last; 64 65 static struct path_entry *new_path_entry(const char *); 66 static void ensure_m4path(void); 67 static struct input_file *dopath(struct input_file *, const char *); 68 69 static struct path_entry * 70 new_path_entry(dirname) 71 const char *dirname; 72 { 73 struct path_entry *n; 74 75 n = malloc(sizeof(struct path_entry)); 76 if (!n) 77 errx(1, "out of memory"); 78 n->name = strdup(dirname); 79 if (!n->name) 80 errx(1, "out of memory"); 81 n->next = 0; 82 return n; 83 } 84 85 void 86 addtoincludepath(dirname) 87 const char *dirname; 88 { 89 struct path_entry *n; 90 91 n = new_path_entry(dirname); 92 93 if (last) { 94 last->next = n; 95 last = n; 96 } 97 else 98 last = first = n; 99 } 100 101 static void 102 ensure_m4path() 103 { 104 static int envpathdone = 0; 105 char *envpath; 106 char *sweep; 107 char *path; 108 109 if (envpathdone) 110 return; 111 envpathdone = TRUE; 112 envpath = getenv("M4PATH"); 113 if (!envpath) 114 return; 115 /* for portability: getenv result is read-only */ 116 envpath = strdup(envpath); 117 if (!envpath) 118 errx(1, "out of memory"); 119 for (sweep = envpath; 120 (path = strsep(&sweep, ":")) != NULL;) 121 addtoincludepath(path); 122 free(envpath); 123 } 124 125 static 126 struct input_file * 127 dopath(i, filename) 128 struct input_file *i; 129 const char *filename; 130 { 131 char path[MAXPATHLEN]; 132 struct path_entry *pe; 133 FILE *f; 134 135 for (pe = first; pe; pe = pe->next) { 136 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 137 if ((f = fopen(path, "r")) != 0) { 138 set_input(i, f, path); 139 return i; 140 } 141 } 142 return NULL; 143 } 144 145 struct input_file * 146 fopen_trypath(i, filename) 147 struct input_file *i; 148 const char *filename; 149 { 150 FILE *f; 151 152 f = fopen(filename, "r"); 153 if (f != NULL) { 154 set_input(i, f, filename); 155 return i; 156 } 157 if (filename[0] == '/') 158 return NULL; 159 160 ensure_m4path(); 161 162 return dopath(i, filename); 163 } 164 165 void 166 doindir(argv, argc) 167 const char *argv[]; 168 int argc; 169 { 170 ndptr p; 171 172 p = lookup(argv[2]); 173 if (p == NULL) 174 errx(1, "undefined macro %s", argv[2]); 175 argv[1] = p->defn; 176 eval(argv+1, argc-1, p->type); 177 } 178 179 void 180 dobuiltin(argv, argc) 181 const char *argv[]; 182 int argc; 183 { 184 int n; 185 argv[1] = NULL; 186 n = builtin_type(argv[2]); 187 if (n != -1) 188 eval(argv+1, argc-1, n); 189 else 190 errx(1, "unknown builtin %s", argv[2]); 191 } 192 193 194 /* We need some temporary buffer space, as pb pushes BACK and substitution 195 * proceeds forward... */ 196 static char *buffer; 197 static size_t bufsize = 0; 198 static size_t current = 0; 199 200 static void addchars(const char *, size_t); 201 static void addchar(char); 202 static char *twiddle(const char *); 203 static char *getstring(void); 204 static void exit_regerror(int, regex_t *); 205 static void do_subst(const char *, regex_t *, const char *, regmatch_t *); 206 static void do_regexpindex(const char *, regex_t *, regmatch_t *); 207 static void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 208 static void add_sub(int, const char *, regex_t *, regmatch_t *); 209 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 210 #define addconstantstring(s) addchars((s), sizeof(s)-1) 211 212 static void 213 addchars(c, n) 214 const char *c; 215 size_t n; 216 { 217 if (n == 0) 218 return; 219 while (current + n > bufsize) { 220 if (bufsize == 0) 221 bufsize = 1024; 222 else 223 bufsize *= 2; 224 buffer = realloc(buffer, bufsize); 225 if (buffer == NULL) 226 errx(1, "out of memory"); 227 } 228 memcpy(buffer+current, c, n); 229 current += n; 230 } 231 232 static void 233 addchar(c) 234 char c; 235 { 236 if (current +1 > bufsize) { 237 if (bufsize == 0) 238 bufsize = 1024; 239 else 240 bufsize *= 2; 241 buffer = realloc(buffer, bufsize); 242 if (buffer == NULL) 243 errx(1, "out of memory"); 244 } 245 buffer[current++] = c; 246 } 247 248 static char * 249 getstring() 250 { 251 addchar('\0'); 252 current = 0; 253 return buffer; 254 } 255 256 257 static void 258 exit_regerror(er, re) 259 int er; 260 regex_t *re; 261 { 262 size_t errlen; 263 char *errbuf; 264 265 errlen = regerror(er, re, NULL, 0); 266 errbuf = xalloc(errlen); 267 regerror(er, re, errbuf, errlen); 268 errx(1, "regular expression error: %s", errbuf); 269 } 270 271 static void 272 add_sub(n, string, re, pm) 273 int n; 274 const char *string; 275 regex_t *re; 276 regmatch_t *pm; 277 { 278 if (n > re->re_nsub) 279 warnx("No subexpression %d", n); 280 /* Subexpressions that did not match are 281 * not an error. */ 282 else if (pm[n].rm_so != -1 && 283 pm[n].rm_eo != -1) { 284 addchars(string + pm[n].rm_so, 285 pm[n].rm_eo - pm[n].rm_so); 286 } 287 } 288 289 /* Add replacement string to the output buffer, recognizing special 290 * constructs and replacing them with substrings of the original string. 291 */ 292 static void 293 add_replace(string, re, replace, pm) 294 const char *string; 295 regex_t *re; 296 const char *replace; 297 regmatch_t *pm; 298 { 299 const char *p; 300 301 for (p = replace; *p != '\0'; p++) { 302 if (*p == '&' && !mimic_gnu) { 303 add_sub(0, string, re, pm); 304 continue; 305 } 306 if (*p == '\\') { 307 if (p[1] == '\\') { 308 addchar(p[1]); 309 p++; 310 continue; 311 } 312 if (p[1] == '&') { 313 if (mimic_gnu) 314 add_sub(0, string, re, pm); 315 else 316 addchar(p[1]); 317 p++; 318 continue; 319 } 320 if (isdigit(p[1])) { 321 add_sub(*(++p) - '0', string, re, pm); 322 continue; 323 } 324 } 325 addchar(*p); 326 } 327 } 328 329 static void 330 do_subst(string, re, replace, pm) 331 const char *string; 332 regex_t *re; 333 const char *replace; 334 regmatch_t *pm; 335 { 336 int error; 337 int flags = 0; 338 const char *last_match = NULL; 339 340 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 341 if (pm[0].rm_eo != 0) { 342 if (string[pm[0].rm_eo-1] == '\n') 343 flags = 0; 344 else 345 flags = REG_NOTBOL; 346 } 347 348 /* NULL length matches are special... We use the `vi-mode' 349 * rule: don't allow a NULL-match at the last match 350 * position. 351 */ 352 if (pm[0].rm_so == pm[0].rm_eo && 353 string + pm[0].rm_so == last_match) { 354 if (*string == '\0') 355 return; 356 addchar(*string); 357 if (*string++ == '\n') 358 flags = 0; 359 else 360 flags = REG_NOTBOL; 361 continue; 362 } 363 last_match = string + pm[0].rm_so; 364 addchars(string, pm[0].rm_so); 365 add_replace(string, re, replace, pm); 366 string += pm[0].rm_eo; 367 } 368 if (error != REG_NOMATCH) 369 exit_regerror(error, re); 370 pbstr(string); 371 } 372 373 static void 374 do_regexp(string, re, replace, pm) 375 const char *string; 376 regex_t *re; 377 const char *replace; 378 regmatch_t *pm; 379 { 380 int error; 381 382 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 383 case 0: 384 add_replace(string, re, replace, pm); 385 pbstr(getstring()); 386 break; 387 case REG_NOMATCH: 388 break; 389 default: 390 exit_regerror(error, re); 391 } 392 } 393 394 static void 395 do_regexpindex(string, re, pm) 396 const char *string; 397 regex_t *re; 398 regmatch_t *pm; 399 { 400 int error; 401 402 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 403 case 0: 404 pbunsigned(pm[0].rm_so); 405 break; 406 case REG_NOMATCH: 407 pbnum(-1); 408 break; 409 default: 410 exit_regerror(error, re); 411 } 412 } 413 414 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 415 * says. So we twiddle with the regexp before passing it to regcomp. 416 */ 417 static char * 418 twiddle(p) 419 const char *p; 420 { 421 /* This could use strcspn for speed... */ 422 while (*p != '\0') { 423 if (*p == '\\') { 424 switch(p[1]) { 425 case '(': 426 case ')': 427 case '|': 428 addchar(p[1]); 429 break; 430 case 'w': 431 addconstantstring("[_a-zA-Z0-9]"); 432 break; 433 case 'W': 434 addconstantstring("[^_a-zA-Z0-9]"); 435 break; 436 case '<': 437 addconstantstring("[[:<:]]"); 438 break; 439 case '>': 440 addconstantstring("[[:>:]]"); 441 break; 442 default: 443 addchars(p, 2); 444 break; 445 } 446 p+=2; 447 continue; 448 } 449 if (*p == '(' || *p == ')' || *p == '|') 450 addchar('\\'); 451 452 addchar(*p); 453 p++; 454 } 455 return getstring(); 456 } 457 458 /* patsubst(string, regexp, opt replacement) */ 459 /* argv[2]: string 460 * argv[3]: regexp 461 * argv[4]: opt rep 462 */ 463 void 464 dopatsubst(argv, argc) 465 const char *argv[]; 466 int argc; 467 { 468 int error; 469 regex_t re; 470 regmatch_t *pmatch; 471 472 if (argc <= 3) { 473 warnx("Too few arguments to patsubst"); 474 return; 475 } 476 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 477 REG_NEWLINE | REG_EXTENDED); 478 if (error != 0) 479 exit_regerror(error, &re); 480 481 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 482 do_subst(argv[2], &re, 483 argc != 4 && argv[4] != NULL ? argv[4] : "", pmatch); 484 pbstr(getstring()); 485 free(pmatch); 486 regfree(&re); 487 } 488 489 void 490 doregexp(argv, argc) 491 const char *argv[]; 492 int argc; 493 { 494 int error; 495 regex_t re; 496 regmatch_t *pmatch; 497 498 if (argc <= 3) { 499 warnx("Too few arguments to regexp"); 500 return; 501 } 502 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 503 REG_EXTENDED); 504 if (error != 0) 505 exit_regerror(error, &re); 506 507 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 508 if (argv[4] == NULL || argc == 4) 509 do_regexpindex(argv[2], &re, pmatch); 510 else 511 do_regexp(argv[2], &re, argv[4], pmatch); 512 free(pmatch); 513 regfree(&re); 514 } 515 516 void 517 doesyscmd(cmd) 518 const char *cmd; 519 { 520 int p[2]; 521 pid_t pid, cpid; 522 char *argv[4]; 523 int cc; 524 int status; 525 526 /* Follow gnu m4 documentation: first flush buffers. */ 527 fflush(NULL); 528 529 argv[0] = "sh"; 530 argv[1] = "-c"; 531 argv[2] = (char *)cmd; 532 argv[3] = NULL; 533 534 /* Just set up standard output, share stderr and stdin with m4 */ 535 if (pipe(p) == -1) 536 err(1, "bad pipe"); 537 switch(cpid = fork()) { 538 case -1: 539 err(1, "bad fork"); 540 /* NOTREACHED */ 541 case 0: 542 (void) close(p[0]); 543 (void) dup2(p[1], 1); 544 (void) close(p[1]); 545 execv(_PATH_BSHELL, argv); 546 exit(1); 547 default: 548 /* Read result in two stages, since m4's buffer is 549 * pushback-only. */ 550 (void) close(p[1]); 551 do { 552 char result[BUFSIZE]; 553 cc = read(p[0], result, sizeof result); 554 if (cc > 0) 555 addchars(result, cc); 556 } while (cc > 0 || (cc == -1 && errno == EINTR)); 557 558 (void) close(p[0]); 559 while ((pid = wait(&status)) != cpid && pid >= 0) 560 continue; 561 pbstr(getstring()); 562 } 563 } 564