1 /* $Id: read.c,v 1.131 2015/03/11 13:05:20 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include "config.h" 20 21 #include <sys/types.h> 22 #if HAVE_MMAP 23 #include <sys/mman.h> 24 #include <sys/stat.h> 25 #endif 26 #include <sys/wait.h> 27 28 #include <assert.h> 29 #include <ctype.h> 30 #include <errno.h> 31 #include <fcntl.h> 32 #include <stdarg.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 39 #include "mandoc.h" 40 #include "mandoc_aux.h" 41 #include "libmandoc.h" 42 #include "mdoc.h" 43 #include "man.h" 44 45 #define REPARSE_LIMIT 1000 46 47 struct mparse { 48 struct man *pman; /* persistent man parser */ 49 struct mdoc *pmdoc; /* persistent mdoc parser */ 50 struct man *man; /* man parser */ 51 struct mdoc *mdoc; /* mdoc parser */ 52 struct roff *roff; /* roff parser (!NULL) */ 53 const struct mchars *mchars; /* character table */ 54 char *sodest; /* filename pointed to by .so */ 55 const char *file; /* filename of current input file */ 56 struct buf *primary; /* buffer currently being parsed */ 57 struct buf *secondary; /* preprocessed copy of input */ 58 const char *defos; /* default operating system */ 59 mandocmsg mmsg; /* warning/error message handler */ 60 enum mandoclevel file_status; /* status of current parse */ 61 enum mandoclevel wlevel; /* ignore messages below this */ 62 int options; /* parser options */ 63 int filenc; /* encoding of the current file */ 64 int reparse_count; /* finite interp. stack */ 65 int line; /* line number in the file */ 66 pid_t child; /* the gunzip(1) process */ 67 }; 68 69 static void choose_parser(struct mparse *); 70 static void resize_buf(struct buf *, size_t); 71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int); 72 static int read_whole_file(struct mparse *, const char *, int, 73 struct buf *, int *); 74 static void mparse_end(struct mparse *); 75 static void mparse_parse_buffer(struct mparse *, struct buf, 76 const char *); 77 78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 79 MANDOCERR_OK, 80 MANDOCERR_WARNING, 81 MANDOCERR_WARNING, 82 MANDOCERR_ERROR, 83 MANDOCERR_UNSUPP, 84 MANDOCERR_MAX, 85 MANDOCERR_MAX 86 }; 87 88 static const char * const mandocerrs[MANDOCERR_MAX] = { 89 "ok", 90 91 "generic warning", 92 93 /* related to the prologue */ 94 "missing manual title, using UNTITLED", 95 "missing manual title, using \"\"", 96 "lower case character in document title", 97 "missing manual section, using \"\"", 98 "unknown manual section", 99 "missing date, using today's date", 100 "cannot parse date, using it verbatim", 101 "missing Os macro, using \"\"", 102 "duplicate prologue macro", 103 "late prologue macro", 104 "skipping late title macro", 105 "prologue macros out of order", 106 107 /* related to document structure */ 108 ".so is fragile, better use ln(1)", 109 "no document body", 110 "content before first section header", 111 "first section is not \"NAME\"", 112 "NAME section without name", 113 "NAME section without description", 114 "description not at the end of NAME", 115 "bad NAME section content", 116 "missing description line, using \"\"", 117 "sections out of conventional order", 118 "duplicate section title", 119 "unexpected section", 120 "unusual Xr order", 121 "unusual Xr punctuation", 122 "AUTHORS section without An macro", 123 124 /* related to macros and nesting */ 125 "obsolete macro", 126 "macro neither callable nor escaped", 127 "skipping paragraph macro", 128 "moving paragraph macro out of list", 129 "skipping no-space macro", 130 "blocks badly nested", 131 "nested displays are not portable", 132 "moving content out of list", 133 ".Vt block has child macro", 134 "fill mode already enabled, skipping", 135 "fill mode already disabled, skipping", 136 "line scope broken", 137 138 /* related to missing macro arguments */ 139 "skipping empty request", 140 "conditional request controls empty scope", 141 "skipping empty macro", 142 "empty block", 143 "empty argument, using 0n", 144 "missing display type, using -ragged", 145 "list type is not the first argument", 146 "missing -width in -tag list, using 8n", 147 "missing utility name, using \"\"", 148 "missing function name, using \"\"", 149 "empty head in list item", 150 "empty list item", 151 "missing font type, using \\fR", 152 "unknown font type, using \\fR", 153 "nothing follows prefix", 154 "empty reference block", 155 "missing -std argument, adding it", 156 "missing option string, using \"\"", 157 "missing resource identifier, using \"\"", 158 "missing eqn box, using \"\"", 159 160 /* related to bad macro arguments */ 161 "unterminated quoted argument", 162 "duplicate argument", 163 "skipping duplicate argument", 164 "skipping duplicate display type", 165 "skipping duplicate list type", 166 "skipping -width argument", 167 "wrong number of cells", 168 "unknown AT&T UNIX version", 169 "comma in function argument", 170 "parenthesis in function name", 171 "invalid content in Rs block", 172 "invalid Boolean argument", 173 "unknown font, skipping request", 174 "odd number of characters in request", 175 176 /* related to plain text */ 177 "blank line in fill mode, using .sp", 178 "tab in filled text", 179 "whitespace at end of input line", 180 "bad comment style", 181 "invalid escape sequence", 182 "undefined string, using \"\"", 183 184 /* related to tables */ 185 "tbl line starts with span", 186 "tbl column starts with span", 187 "skipping vertical bar in tbl layout", 188 189 "generic error", 190 191 /* related to tables */ 192 "non-alphabetic character in tbl options", 193 "skipping unknown tbl option", 194 "missing tbl option argument", 195 "wrong tbl option argument size", 196 "empty tbl layout", 197 "invalid character in tbl layout", 198 "unmatched parenthesis in tbl layout", 199 "tbl without any data cells", 200 "ignoring data in spanned tbl cell", 201 "ignoring extra tbl data cells", 202 "data block open at end of tbl", 203 204 /* related to document structure and macros */ 205 NULL, 206 "input stack limit exceeded, infinite loop?", 207 "skipping bad character", 208 "skipping unknown macro", 209 "skipping insecure request", 210 "skipping item outside list", 211 "skipping column outside column list", 212 "skipping end of block that is not open", 213 "fewer RS blocks open, skipping", 214 "inserting missing end of block", 215 "appending missing end of block", 216 217 /* related to request and macro arguments */ 218 "escaped character not allowed in a name", 219 "NOT IMPLEMENTED: Bd -file", 220 "missing list type, using -item", 221 "missing manual name, using \"\"", 222 "uname(3) system call failed, using UNKNOWN", 223 "unknown standard specifier", 224 "skipping request without numeric argument", 225 "NOT IMPLEMENTED: .so with absolute path or \"..\"", 226 ".so request failed", 227 "skipping all arguments", 228 "skipping excess arguments", 229 "divide by zero", 230 231 "unsupported feature", 232 "input too large", 233 "unsupported control character", 234 "unsupported roff request", 235 "eqn delim option in tbl", 236 "unsupported tbl layout modifier", 237 "ignoring macro in table", 238 }; 239 240 static const char * const mandoclevels[MANDOCLEVEL_MAX] = { 241 "SUCCESS", 242 "RESERVED", 243 "WARNING", 244 "ERROR", 245 "UNSUPP", 246 "BADARG", 247 "SYSERR" 248 }; 249 250 251 static void 252 resize_buf(struct buf *buf, size_t initial) 253 { 254 255 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 256 buf->buf = mandoc_realloc(buf->buf, buf->sz); 257 } 258 259 static void 260 choose_parser(struct mparse *curp) 261 { 262 char *cp, *ep; 263 int format; 264 265 /* 266 * If neither command line arguments -mdoc or -man select 267 * a parser nor the roff parser found a .Dd or .TH macro 268 * yet, look ahead in the main input buffer. 269 */ 270 271 if ((format = roff_getformat(curp->roff)) == 0) { 272 cp = curp->primary->buf; 273 ep = cp + curp->primary->sz; 274 while (cp < ep) { 275 if (*cp == '.' || *cp == '\'') { 276 cp++; 277 if (cp[0] == 'D' && cp[1] == 'd') { 278 format = MPARSE_MDOC; 279 break; 280 } 281 if (cp[0] == 'T' && cp[1] == 'H') { 282 format = MPARSE_MAN; 283 break; 284 } 285 } 286 cp = memchr(cp, '\n', ep - cp); 287 if (cp == NULL) 288 break; 289 cp++; 290 } 291 } 292 293 if (format == MPARSE_MDOC) { 294 if (NULL == curp->pmdoc) 295 curp->pmdoc = mdoc_alloc( 296 curp->roff, curp, curp->defos, 297 MPARSE_QUICK & curp->options ? 1 : 0); 298 assert(curp->pmdoc); 299 curp->mdoc = curp->pmdoc; 300 return; 301 } 302 303 /* Fall back to man(7) as a last resort. */ 304 305 if (NULL == curp->pman) 306 curp->pman = man_alloc( 307 curp->roff, curp, curp->defos, 308 MPARSE_QUICK & curp->options ? 1 : 0); 309 assert(curp->pman); 310 curp->man = curp->pman; 311 } 312 313 /* 314 * Main parse routine for a buffer. 315 * It assumes encoding and line numbering are already set up. 316 * It can recurse directly (for invocations of user-defined 317 * macros, inline equations, and input line traps) 318 * and indirectly (for .so file inclusion). 319 */ 320 static void 321 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) 322 { 323 const struct tbl_span *span; 324 struct buf ln; 325 const char *save_file; 326 char *cp; 327 size_t pos; /* byte number in the ln buffer */ 328 enum rofferr rr; 329 int of; 330 int lnn; /* line number in the real file */ 331 int fd; 332 pid_t save_child; 333 unsigned char c; 334 335 memset(&ln, 0, sizeof(ln)); 336 337 lnn = curp->line; 338 pos = 0; 339 340 while (i < blk.sz) { 341 if (0 == pos && '\0' == blk.buf[i]) 342 break; 343 344 if (start) { 345 curp->line = lnn; 346 curp->reparse_count = 0; 347 348 if (lnn < 3 && 349 curp->filenc & MPARSE_UTF8 && 350 curp->filenc & MPARSE_LATIN1) 351 curp->filenc = preconv_cue(&blk, i); 352 } 353 354 while (i < blk.sz && (start || blk.buf[i] != '\0')) { 355 356 /* 357 * When finding an unescaped newline character, 358 * leave the character loop to process the line. 359 * Skip a preceding carriage return, if any. 360 */ 361 362 if ('\r' == blk.buf[i] && i + 1 < blk.sz && 363 '\n' == blk.buf[i + 1]) 364 ++i; 365 if ('\n' == blk.buf[i]) { 366 ++i; 367 ++lnn; 368 break; 369 } 370 371 /* 372 * Make sure we have space for the worst 373 * case of 11 bytes: "\\[u10ffff]\0" 374 */ 375 376 if (pos + 11 > ln.sz) 377 resize_buf(&ln, 256); 378 379 /* 380 * Encode 8-bit input. 381 */ 382 383 c = blk.buf[i]; 384 if (c & 0x80) { 385 if ( ! (curp->filenc && preconv_encode( 386 &blk, &i, &ln, &pos, &curp->filenc))) { 387 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 388 curp->line, pos, "0x%x", c); 389 ln.buf[pos++] = '?'; 390 i++; 391 } 392 continue; 393 } 394 395 /* 396 * Exclude control characters. 397 */ 398 399 if (c == 0x7f || (c < 0x20 && c != 0x09)) { 400 mandoc_vmsg(c == 0x00 || c == 0x04 || 401 c > 0x0a ? MANDOCERR_CHAR_BAD : 402 MANDOCERR_CHAR_UNSUPP, 403 curp, curp->line, pos, "0x%x", c); 404 i++; 405 if (c != '\r') 406 ln.buf[pos++] = '?'; 407 continue; 408 } 409 410 /* Trailing backslash = a plain char. */ 411 412 if (blk.buf[i] != '\\' || i + 1 == blk.sz) { 413 ln.buf[pos++] = blk.buf[i++]; 414 continue; 415 } 416 417 /* 418 * Found escape and at least one other character. 419 * When it's a newline character, skip it. 420 * When there is a carriage return in between, 421 * skip that one as well. 422 */ 423 424 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && 425 '\n' == blk.buf[i + 2]) 426 ++i; 427 if ('\n' == blk.buf[i + 1]) { 428 i += 2; 429 ++lnn; 430 continue; 431 } 432 433 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { 434 i += 2; 435 /* Comment, skip to end of line */ 436 for (; i < blk.sz; ++i) { 437 if ('\n' == blk.buf[i]) { 438 ++i; 439 ++lnn; 440 break; 441 } 442 } 443 444 /* Backout trailing whitespaces */ 445 for (; pos > 0; --pos) { 446 if (ln.buf[pos - 1] != ' ') 447 break; 448 if (pos > 2 && ln.buf[pos - 2] == '\\') 449 break; 450 } 451 break; 452 } 453 454 /* Catch escaped bogus characters. */ 455 456 c = (unsigned char) blk.buf[i+1]; 457 458 if ( ! (isascii(c) && 459 (isgraph(c) || isblank(c)))) { 460 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 461 curp->line, pos, "0x%x", c); 462 i += 2; 463 ln.buf[pos++] = '?'; 464 continue; 465 } 466 467 /* Some other escape sequence, copy & cont. */ 468 469 ln.buf[pos++] = blk.buf[i++]; 470 ln.buf[pos++] = blk.buf[i++]; 471 } 472 473 if (pos >= ln.sz) 474 resize_buf(&ln, 256); 475 476 ln.buf[pos] = '\0'; 477 478 /* 479 * A significant amount of complexity is contained by 480 * the roff preprocessor. It's line-oriented but can be 481 * expressed on one line, so we need at times to 482 * readjust our starting point and re-run it. The roff 483 * preprocessor can also readjust the buffers with new 484 * data, so we pass them in wholesale. 485 */ 486 487 of = 0; 488 489 /* 490 * Maintain a lookaside buffer of all parsed lines. We 491 * only do this if mparse_keep() has been invoked (the 492 * buffer may be accessed with mparse_getkeep()). 493 */ 494 495 if (curp->secondary) { 496 curp->secondary->buf = mandoc_realloc( 497 curp->secondary->buf, 498 curp->secondary->sz + pos + 2); 499 memcpy(curp->secondary->buf + 500 curp->secondary->sz, 501 ln.buf, pos); 502 curp->secondary->sz += pos; 503 curp->secondary->buf 504 [curp->secondary->sz] = '\n'; 505 curp->secondary->sz++; 506 curp->secondary->buf 507 [curp->secondary->sz] = '\0'; 508 } 509 rerun: 510 rr = roff_parseln(curp->roff, curp->line, &ln, &of); 511 512 switch (rr) { 513 case ROFF_REPARSE: 514 if (REPARSE_LIMIT >= ++curp->reparse_count) 515 mparse_buf_r(curp, ln, of, 0); 516 else 517 mandoc_msg(MANDOCERR_ROFFLOOP, curp, 518 curp->line, pos, NULL); 519 pos = 0; 520 continue; 521 case ROFF_APPEND: 522 pos = strlen(ln.buf); 523 continue; 524 case ROFF_RERUN: 525 goto rerun; 526 case ROFF_IGN: 527 pos = 0; 528 continue; 529 case ROFF_SO: 530 if ( ! (curp->options & MPARSE_SO) && 531 (i >= blk.sz || blk.buf[i] == '\0')) { 532 curp->sodest = mandoc_strdup(ln.buf + of); 533 free(ln.buf); 534 return; 535 } 536 /* 537 * We remove `so' clauses from our lookaside 538 * buffer because we're going to descend into 539 * the file recursively. 540 */ 541 if (curp->secondary) 542 curp->secondary->sz -= pos + 1; 543 save_file = curp->file; 544 save_child = curp->child; 545 if (mparse_open(curp, &fd, ln.buf + of) == 546 MANDOCLEVEL_OK) { 547 mparse_readfd(curp, fd, ln.buf + of); 548 curp->file = save_file; 549 } else { 550 curp->file = save_file; 551 mandoc_vmsg(MANDOCERR_SO_FAIL, 552 curp, curp->line, pos, 553 ".so %s", ln.buf + of); 554 ln.sz = mandoc_asprintf(&cp, 555 ".sp\nSee the file %s.\n.sp", 556 ln.buf + of); 557 free(ln.buf); 558 ln.buf = cp; 559 of = 0; 560 mparse_buf_r(curp, ln, of, 0); 561 } 562 curp->child = save_child; 563 pos = 0; 564 continue; 565 default: 566 break; 567 } 568 569 /* 570 * If input parsers have not been allocated, do so now. 571 * We keep these instanced between parsers, but set them 572 * locally per parse routine since we can use different 573 * parsers with each one. 574 */ 575 576 if ( ! (curp->man || curp->mdoc)) 577 choose_parser(curp); 578 579 /* 580 * Lastly, push down into the parsers themselves. 581 * If libroff returns ROFF_TBL, then add it to the 582 * currently open parse. Since we only get here if 583 * there does exist data (see tbl_data.c), we're 584 * guaranteed that something's been allocated. 585 * Do the same for ROFF_EQN. 586 */ 587 588 if (rr == ROFF_TBL) { 589 while ((span = roff_span(curp->roff)) != NULL) 590 if (curp->man == NULL) 591 mdoc_addspan(curp->mdoc, span); 592 else 593 man_addspan(curp->man, span); 594 } else if (rr == ROFF_EQN) { 595 if (curp->man == NULL) 596 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff)); 597 else 598 man_addeqn(curp->man, roff_eqn(curp->roff)); 599 } else if ((curp->man == NULL ? 600 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) : 601 man_parseln(curp->man, curp->line, ln.buf, of)) == 2) 602 break; 603 604 /* Temporary buffers typically are not full. */ 605 606 if (0 == start && '\0' == blk.buf[i]) 607 break; 608 609 /* Start the next input line. */ 610 611 pos = 0; 612 } 613 614 free(ln.buf); 615 } 616 617 static int 618 read_whole_file(struct mparse *curp, const char *file, int fd, 619 struct buf *fb, int *with_mmap) 620 { 621 size_t off; 622 ssize_t ssz; 623 624 #if HAVE_MMAP 625 struct stat st; 626 if (-1 == fstat(fd, &st)) { 627 perror(file); 628 exit((int)MANDOCLEVEL_SYSERR); 629 } 630 631 /* 632 * If we're a regular file, try just reading in the whole entry 633 * via mmap(). This is faster than reading it into blocks, and 634 * since each file is only a few bytes to begin with, I'm not 635 * concerned that this is going to tank any machines. 636 */ 637 638 if (S_ISREG(st.st_mode)) { 639 if (st.st_size > 0x7fffffff) { 640 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); 641 return(0); 642 } 643 *with_mmap = 1; 644 fb->sz = (size_t)st.st_size; 645 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); 646 if (fb->buf != MAP_FAILED) 647 return(1); 648 } 649 #endif 650 651 /* 652 * If this isn't a regular file (like, say, stdin), then we must 653 * go the old way and just read things in bit by bit. 654 */ 655 656 *with_mmap = 0; 657 off = 0; 658 fb->sz = 0; 659 fb->buf = NULL; 660 for (;;) { 661 if (off == fb->sz) { 662 if (fb->sz == (1U << 31)) { 663 mandoc_msg(MANDOCERR_TOOLARGE, curp, 664 0, 0, NULL); 665 break; 666 } 667 resize_buf(fb, 65536); 668 } 669 ssz = read(fd, fb->buf + (int)off, fb->sz - off); 670 if (ssz == 0) { 671 fb->sz = off; 672 return(1); 673 } 674 if (ssz == -1) { 675 perror(file); 676 exit((int)MANDOCLEVEL_SYSERR); 677 } 678 off += (size_t)ssz; 679 } 680 681 free(fb->buf); 682 fb->buf = NULL; 683 return(0); 684 } 685 686 static void 687 mparse_end(struct mparse *curp) 688 { 689 690 if (curp->mdoc == NULL && 691 curp->man == NULL && 692 curp->sodest == NULL) { 693 if (curp->options & MPARSE_MDOC) 694 curp->mdoc = curp->pmdoc; 695 else { 696 if (curp->pman == NULL) 697 curp->pman = man_alloc( 698 curp->roff, curp, curp->defos, 699 curp->options & MPARSE_QUICK ? 1 : 0); 700 curp->man = curp->pman; 701 } 702 } 703 if (curp->mdoc) 704 mdoc_endparse(curp->mdoc); 705 if (curp->man) 706 man_endparse(curp->man); 707 roff_endparse(curp->roff); 708 } 709 710 static void 711 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) 712 { 713 struct buf *svprimary; 714 const char *svfile; 715 size_t offset; 716 static int recursion_depth; 717 718 if (64 < recursion_depth) { 719 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); 720 return; 721 } 722 723 /* Line number is per-file. */ 724 svfile = curp->file; 725 curp->file = file; 726 svprimary = curp->primary; 727 curp->primary = &blk; 728 curp->line = 1; 729 recursion_depth++; 730 731 /* Skip an UTF-8 byte order mark. */ 732 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && 733 (unsigned char)blk.buf[0] == 0xef && 734 (unsigned char)blk.buf[1] == 0xbb && 735 (unsigned char)blk.buf[2] == 0xbf) { 736 offset = 3; 737 curp->filenc &= ~MPARSE_LATIN1; 738 } else 739 offset = 0; 740 741 mparse_buf_r(curp, blk, offset, 1); 742 743 if (--recursion_depth == 0) 744 mparse_end(curp); 745 746 curp->primary = svprimary; 747 curp->file = svfile; 748 } 749 750 enum mandoclevel 751 mparse_readmem(struct mparse *curp, void *buf, size_t len, 752 const char *file) 753 { 754 struct buf blk; 755 756 blk.buf = buf; 757 blk.sz = len; 758 759 mparse_parse_buffer(curp, blk, file); 760 return(curp->file_status); 761 } 762 763 /* 764 * Read the whole file into memory and call the parsers. 765 * Called recursively when an .so request is encountered. 766 */ 767 enum mandoclevel 768 mparse_readfd(struct mparse *curp, int fd, const char *file) 769 { 770 struct buf blk; 771 int with_mmap; 772 int save_filenc; 773 774 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { 775 save_filenc = curp->filenc; 776 curp->filenc = curp->options & 777 (MPARSE_UTF8 | MPARSE_LATIN1); 778 mparse_parse_buffer(curp, blk, file); 779 curp->filenc = save_filenc; 780 #if HAVE_MMAP 781 if (with_mmap) 782 munmap(blk.buf, blk.sz); 783 else 784 #endif 785 free(blk.buf); 786 } 787 788 if (fd != STDIN_FILENO && close(fd) == -1) 789 perror(file); 790 791 mparse_wait(curp); 792 return(curp->file_status); 793 } 794 795 enum mandoclevel 796 mparse_open(struct mparse *curp, int *fd, const char *file) 797 { 798 int pfd[2]; 799 int save_errno; 800 char *cp; 801 802 curp->file = file; 803 804 /* Unless zipped, try to just open the file. */ 805 806 if ((cp = strrchr(file, '.')) == NULL || 807 strcmp(cp + 1, "gz")) { 808 curp->child = 0; 809 if ((*fd = open(file, O_RDONLY)) != -1) 810 return(MANDOCLEVEL_OK); 811 812 /* Open failed; try to append ".gz". */ 813 814 mandoc_asprintf(&cp, "%s.gz", file); 815 file = cp; 816 } else 817 cp = NULL; 818 819 /* Before forking, make sure the file can be read. */ 820 821 save_errno = errno; 822 if (access(file, R_OK) == -1) { 823 if (cp != NULL) 824 errno = save_errno; 825 free(cp); 826 *fd = -1; 827 curp->child = 0; 828 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); 829 return(MANDOCLEVEL_ERROR); 830 } 831 832 /* Run gunzip(1). */ 833 834 if (pipe(pfd) == -1) { 835 perror("pipe"); 836 exit((int)MANDOCLEVEL_SYSERR); 837 } 838 839 switch (curp->child = fork()) { 840 case -1: 841 perror("fork"); 842 exit((int)MANDOCLEVEL_SYSERR); 843 case 0: 844 close(pfd[0]); 845 if (dup2(pfd[1], STDOUT_FILENO) == -1) { 846 perror("dup"); 847 exit((int)MANDOCLEVEL_SYSERR); 848 } 849 execlp("gunzip", "gunzip", "-c", file, NULL); 850 perror("exec"); 851 exit((int)MANDOCLEVEL_SYSERR); 852 default: 853 close(pfd[1]); 854 *fd = pfd[0]; 855 return(MANDOCLEVEL_OK); 856 } 857 } 858 859 enum mandoclevel 860 mparse_wait(struct mparse *curp) 861 { 862 int status; 863 864 if (curp->child == 0) 865 return(MANDOCLEVEL_OK); 866 867 if (waitpid(curp->child, &status, 0) == -1) { 868 perror("wait"); 869 exit((int)MANDOCLEVEL_SYSERR); 870 } 871 curp->child = 0; 872 if (WIFSIGNALED(status)) { 873 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 874 "gunzip died from signal %d", WTERMSIG(status)); 875 return(MANDOCLEVEL_ERROR); 876 } 877 if (WEXITSTATUS(status)) { 878 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 879 "gunzip failed with code %d", WEXITSTATUS(status)); 880 return(MANDOCLEVEL_ERROR); 881 } 882 return(MANDOCLEVEL_OK); 883 } 884 885 struct mparse * 886 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, 887 const struct mchars *mchars, const char *defos) 888 { 889 struct mparse *curp; 890 891 curp = mandoc_calloc(1, sizeof(struct mparse)); 892 893 curp->options = options; 894 curp->wlevel = wlevel; 895 curp->mmsg = mmsg; 896 curp->defos = defos; 897 898 curp->mchars = mchars; 899 curp->roff = roff_alloc(curp, curp->mchars, options); 900 if (curp->options & MPARSE_MDOC) 901 curp->pmdoc = mdoc_alloc( 902 curp->roff, curp, curp->defos, 903 curp->options & MPARSE_QUICK ? 1 : 0); 904 if (curp->options & MPARSE_MAN) 905 curp->pman = man_alloc( 906 curp->roff, curp, curp->defos, 907 curp->options & MPARSE_QUICK ? 1 : 0); 908 909 return(curp); 910 } 911 912 void 913 mparse_reset(struct mparse *curp) 914 { 915 916 roff_reset(curp->roff); 917 918 if (curp->mdoc) 919 mdoc_reset(curp->mdoc); 920 if (curp->man) 921 man_reset(curp->man); 922 if (curp->secondary) 923 curp->secondary->sz = 0; 924 925 curp->file_status = MANDOCLEVEL_OK; 926 curp->mdoc = NULL; 927 curp->man = NULL; 928 929 free(curp->sodest); 930 curp->sodest = NULL; 931 } 932 933 void 934 mparse_free(struct mparse *curp) 935 { 936 937 if (curp->pmdoc) 938 mdoc_free(curp->pmdoc); 939 if (curp->pman) 940 man_free(curp->pman); 941 if (curp->roff) 942 roff_free(curp->roff); 943 if (curp->secondary) 944 free(curp->secondary->buf); 945 946 free(curp->secondary); 947 free(curp->sodest); 948 free(curp); 949 } 950 951 void 952 mparse_result(struct mparse *curp, 953 struct mdoc **mdoc, struct man **man, char **sodest) 954 { 955 956 if (sodest && NULL != (*sodest = curp->sodest)) { 957 *mdoc = NULL; 958 *man = NULL; 959 return; 960 } 961 if (mdoc) 962 *mdoc = curp->mdoc; 963 if (man) 964 *man = curp->man; 965 } 966 967 void 968 mandoc_vmsg(enum mandocerr t, struct mparse *m, 969 int ln, int pos, const char *fmt, ...) 970 { 971 char buf[256]; 972 va_list ap; 973 974 va_start(ap, fmt); 975 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 976 va_end(ap); 977 978 mandoc_msg(t, m, ln, pos, buf); 979 } 980 981 void 982 mandoc_msg(enum mandocerr er, struct mparse *m, 983 int ln, int col, const char *msg) 984 { 985 enum mandoclevel level; 986 987 level = MANDOCLEVEL_UNSUPP; 988 while (er < mandoclimits[level]) 989 level--; 990 991 if (level < m->wlevel && er != MANDOCERR_FILE) 992 return; 993 994 if (m->mmsg) 995 (*m->mmsg)(er, level, m->file, ln, col, msg); 996 997 if (m->file_status < level) 998 m->file_status = level; 999 } 1000 1001 const char * 1002 mparse_strerror(enum mandocerr er) 1003 { 1004 1005 return(mandocerrs[er]); 1006 } 1007 1008 const char * 1009 mparse_strlevel(enum mandoclevel lvl) 1010 { 1011 return(mandoclevels[lvl]); 1012 } 1013 1014 void 1015 mparse_keep(struct mparse *p) 1016 { 1017 1018 assert(NULL == p->secondary); 1019 p->secondary = mandoc_calloc(1, sizeof(struct buf)); 1020 } 1021 1022 const char * 1023 mparse_getkeep(const struct mparse *p) 1024 { 1025 1026 assert(p->secondary); 1027 return(p->secondary->sz ? p->secondary->buf : NULL); 1028 } 1029