1 /* $Id: read.c,v 1.150.2.5 2017/01/09 02:25:53 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include "config.h" 20 21 #include <sys/types.h> 22 #if HAVE_MMAP 23 #include <sys/mman.h> 24 #include <sys/stat.h> 25 #endif 26 27 #include <assert.h> 28 #include <ctype.h> 29 #if HAVE_ERR 30 #include <err.h> 31 #endif 32 #include <errno.h> 33 #include <fcntl.h> 34 #include <stdarg.h> 35 #include <stdint.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <unistd.h> 40 #include <zlib.h> 41 42 #include "mandoc_aux.h" 43 #include "mandoc.h" 44 #include "roff.h" 45 #include "mdoc.h" 46 #include "man.h" 47 #include "libmandoc.h" 48 #include "roff_int.h" 49 50 #define REPARSE_LIMIT 1000 51 52 struct mparse { 53 struct roff_man *man; /* man parser */ 54 struct roff *roff; /* roff parser (!NULL) */ 55 char *sodest; /* filename pointed to by .so */ 56 const char *file; /* filename of current input file */ 57 struct buf *primary; /* buffer currently being parsed */ 58 struct buf *secondary; /* preprocessed copy of input */ 59 const char *defos; /* default operating system */ 60 mandocmsg mmsg; /* warning/error message handler */ 61 enum mandoclevel file_status; /* status of current parse */ 62 enum mandoclevel wlevel; /* ignore messages below this */ 63 int options; /* parser options */ 64 int gzip; /* current input file is gzipped */ 65 int filenc; /* encoding of the current file */ 66 int reparse_count; /* finite interp. stack */ 67 int line; /* line number in the file */ 68 }; 69 70 static void choose_parser(struct mparse *); 71 static void resize_buf(struct buf *, size_t); 72 static void mparse_buf_r(struct mparse *, struct buf, size_t, int); 73 static int read_whole_file(struct mparse *, const char *, int, 74 struct buf *, int *); 75 static void mparse_end(struct mparse *); 76 static void mparse_parse_buffer(struct mparse *, struct buf, 77 const char *); 78 79 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 80 MANDOCERR_OK, 81 MANDOCERR_WARNING, 82 MANDOCERR_WARNING, 83 MANDOCERR_ERROR, 84 MANDOCERR_UNSUPP, 85 MANDOCERR_MAX, 86 MANDOCERR_MAX 87 }; 88 89 static const char * const mandocerrs[MANDOCERR_MAX] = { 90 "ok", 91 92 "generic warning", 93 94 /* related to the prologue */ 95 "missing manual title, using UNTITLED", 96 "missing manual title, using \"\"", 97 "lower case character in document title", 98 "missing manual section, using \"\"", 99 "unknown manual section", 100 "missing date, using today's date", 101 "cannot parse date, using it verbatim", 102 "missing Os macro, using \"\"", 103 "duplicate prologue macro", 104 "late prologue macro", 105 "skipping late title macro", 106 "prologue macros out of order", 107 108 /* related to document structure */ 109 ".so is fragile, better use ln(1)", 110 "no document body", 111 "content before first section header", 112 "first section is not \"NAME\"", 113 "NAME section without Nm before Nd", 114 "NAME section without description", 115 "description not at the end of NAME", 116 "bad NAME section content", 117 "missing comma before name", 118 "missing description line, using \"\"", 119 "sections out of conventional order", 120 "duplicate section title", 121 "unexpected section", 122 "unusual Xr order", 123 "unusual Xr punctuation", 124 "AUTHORS section without An macro", 125 126 /* related to macros and nesting */ 127 "obsolete macro", 128 "macro neither callable nor escaped", 129 "skipping paragraph macro", 130 "moving paragraph macro out of list", 131 "skipping no-space macro", 132 "blocks badly nested", 133 "nested displays are not portable", 134 "moving content out of list", 135 "fill mode already enabled, skipping", 136 "fill mode already disabled, skipping", 137 "line scope broken", 138 139 /* related to missing macro arguments */ 140 "skipping empty request", 141 "conditional request controls empty scope", 142 "skipping empty macro", 143 "empty block", 144 "empty argument, using 0n", 145 "missing display type, using -ragged", 146 "list type is not the first argument", 147 "missing -width in -tag list, using 6n", 148 "missing utility name, using \"\"", 149 "missing function name, using \"\"", 150 "empty head in list item", 151 "empty list item", 152 "missing font type, using \\fR", 153 "unknown font type, using \\fR", 154 "nothing follows prefix", 155 "empty reference block", 156 "missing section argument", 157 "missing -std argument, adding it", 158 "missing option string, using \"\"", 159 "missing resource identifier, using \"\"", 160 "missing eqn box, using \"\"", 161 162 /* related to bad macro arguments */ 163 "unterminated quoted argument", 164 "duplicate argument", 165 "skipping duplicate argument", 166 "skipping duplicate display type", 167 "skipping duplicate list type", 168 "skipping -width argument", 169 "wrong number of cells", 170 "unknown AT&T UNIX version", 171 "comma in function argument", 172 "parenthesis in function name", 173 "invalid content in Rs block", 174 "invalid Boolean argument", 175 "unknown font, skipping request", 176 "odd number of characters in request", 177 178 /* related to plain text */ 179 "blank line in fill mode, using .sp", 180 "tab in filled text", 181 "whitespace at end of input line", 182 "bad comment style", 183 "invalid escape sequence", 184 "undefined string, using \"\"", 185 186 /* related to tables */ 187 "tbl line starts with span", 188 "tbl column starts with span", 189 "skipping vertical bar in tbl layout", 190 191 "generic error", 192 193 /* related to tables */ 194 "non-alphabetic character in tbl options", 195 "skipping unknown tbl option", 196 "missing tbl option argument", 197 "wrong tbl option argument size", 198 "empty tbl layout", 199 "invalid character in tbl layout", 200 "unmatched parenthesis in tbl layout", 201 "tbl without any data cells", 202 "ignoring data in spanned tbl cell", 203 "ignoring extra tbl data cells", 204 "data block open at end of tbl", 205 206 /* related to document structure and macros */ 207 NULL, 208 "input stack limit exceeded, infinite loop?", 209 "skipping bad character", 210 "skipping unknown macro", 211 "skipping insecure request", 212 "skipping item outside list", 213 "skipping column outside column list", 214 "skipping end of block that is not open", 215 "fewer RS blocks open, skipping", 216 "inserting missing end of block", 217 "appending missing end of block", 218 219 /* related to request and macro arguments */ 220 "escaped character not allowed in a name", 221 "NOT IMPLEMENTED: Bd -file", 222 "skipping display without arguments", 223 "missing list type, using -item", 224 "missing manual name, using \"\"", 225 "uname(3) system call failed, using UNKNOWN", 226 "unknown standard specifier", 227 "skipping request without numeric argument", 228 "NOT IMPLEMENTED: .so with absolute path or \"..\"", 229 ".so request failed", 230 "skipping all arguments", 231 "skipping excess arguments", 232 "divide by zero", 233 234 "unsupported feature", 235 "input too large", 236 "unsupported control character", 237 "unsupported roff request", 238 "eqn delim option in tbl", 239 "unsupported tbl layout modifier", 240 "ignoring macro in table", 241 }; 242 243 static const char * const mandoclevels[MANDOCLEVEL_MAX] = { 244 "SUCCESS", 245 "RESERVED", 246 "WARNING", 247 "ERROR", 248 "UNSUPP", 249 "BADARG", 250 "SYSERR" 251 }; 252 253 254 static void 255 resize_buf(struct buf *buf, size_t initial) 256 { 257 258 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 259 buf->buf = mandoc_realloc(buf->buf, buf->sz); 260 } 261 262 static void 263 choose_parser(struct mparse *curp) 264 { 265 char *cp, *ep; 266 int format; 267 268 /* 269 * If neither command line arguments -mdoc or -man select 270 * a parser nor the roff parser found a .Dd or .TH macro 271 * yet, look ahead in the main input buffer. 272 */ 273 274 if ((format = roff_getformat(curp->roff)) == 0) { 275 cp = curp->primary->buf; 276 ep = cp + curp->primary->sz; 277 while (cp < ep) { 278 if (*cp == '.' || *cp == '\'') { 279 cp++; 280 if (cp[0] == 'D' && cp[1] == 'd') { 281 format = MPARSE_MDOC; 282 break; 283 } 284 if (cp[0] == 'T' && cp[1] == 'H') { 285 format = MPARSE_MAN; 286 break; 287 } 288 } 289 cp = memchr(cp, '\n', ep - cp); 290 if (cp == NULL) 291 break; 292 cp++; 293 } 294 } 295 296 if (format == MPARSE_MDOC) { 297 mdoc_hash_init(); 298 curp->man->macroset = MACROSET_MDOC; 299 curp->man->first->tok = TOKEN_NONE; 300 } else { 301 man_hash_init(); 302 curp->man->macroset = MACROSET_MAN; 303 curp->man->first->tok = TOKEN_NONE; 304 } 305 } 306 307 /* 308 * Main parse routine for a buffer. 309 * It assumes encoding and line numbering are already set up. 310 * It can recurse directly (for invocations of user-defined 311 * macros, inline equations, and input line traps) 312 * and indirectly (for .so file inclusion). 313 */ 314 static void 315 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) 316 { 317 const struct tbl_span *span; 318 struct buf ln; 319 const char *save_file; 320 char *cp; 321 size_t pos; /* byte number in the ln buffer */ 322 size_t j; /* auxiliary byte number in the blk buffer */ 323 enum rofferr rr; 324 int of; 325 int lnn; /* line number in the real file */ 326 int fd; 327 unsigned char c; 328 329 memset(&ln, 0, sizeof(ln)); 330 331 lnn = curp->line; 332 pos = 0; 333 334 while (i < blk.sz) { 335 if (0 == pos && '\0' == blk.buf[i]) 336 break; 337 338 if (start) { 339 curp->line = lnn; 340 curp->reparse_count = 0; 341 342 if (lnn < 3 && 343 curp->filenc & MPARSE_UTF8 && 344 curp->filenc & MPARSE_LATIN1) 345 curp->filenc = preconv_cue(&blk, i); 346 } 347 348 while (i < blk.sz && (start || blk.buf[i] != '\0')) { 349 350 /* 351 * When finding an unescaped newline character, 352 * leave the character loop to process the line. 353 * Skip a preceding carriage return, if any. 354 */ 355 356 if ('\r' == blk.buf[i] && i + 1 < blk.sz && 357 '\n' == blk.buf[i + 1]) 358 ++i; 359 if ('\n' == blk.buf[i]) { 360 ++i; 361 ++lnn; 362 break; 363 } 364 365 /* 366 * Make sure we have space for the worst 367 * case of 11 bytes: "\\[u10ffff]\0" 368 */ 369 370 if (pos + 11 > ln.sz) 371 resize_buf(&ln, 256); 372 373 /* 374 * Encode 8-bit input. 375 */ 376 377 c = blk.buf[i]; 378 if (c & 0x80) { 379 if ( ! (curp->filenc && preconv_encode( 380 &blk, &i, &ln, &pos, &curp->filenc))) { 381 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 382 curp->line, pos, "0x%x", c); 383 ln.buf[pos++] = '?'; 384 i++; 385 } 386 continue; 387 } 388 389 /* 390 * Exclude control characters. 391 */ 392 393 if (c == 0x7f || (c < 0x20 && c != 0x09)) { 394 mandoc_vmsg(c == 0x00 || c == 0x04 || 395 c > 0x0a ? MANDOCERR_CHAR_BAD : 396 MANDOCERR_CHAR_UNSUPP, 397 curp, curp->line, pos, "0x%x", c); 398 i++; 399 if (c != '\r') 400 ln.buf[pos++] = '?'; 401 continue; 402 } 403 404 /* Trailing backslash = a plain char. */ 405 406 if (blk.buf[i] != '\\' || i + 1 == blk.sz) { 407 ln.buf[pos++] = blk.buf[i++]; 408 continue; 409 } 410 411 /* 412 * Found escape and at least one other character. 413 * When it's a newline character, skip it. 414 * When there is a carriage return in between, 415 * skip that one as well. 416 */ 417 418 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && 419 '\n' == blk.buf[i + 2]) 420 ++i; 421 if ('\n' == blk.buf[i + 1]) { 422 i += 2; 423 ++lnn; 424 continue; 425 } 426 427 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { 428 j = i; 429 i += 2; 430 /* Comment, skip to end of line */ 431 for (; i < blk.sz; ++i) { 432 if (blk.buf[i] != '\n') 433 continue; 434 if (blk.buf[i - 1] == ' ' || 435 blk.buf[i - 1] == '\t') 436 mandoc_msg( 437 MANDOCERR_SPACE_EOL, 438 curp, curp->line, 439 pos + i-1 - j, NULL); 440 ++i; 441 ++lnn; 442 break; 443 } 444 445 /* Backout trailing whitespaces */ 446 for (; pos > 0; --pos) { 447 if (ln.buf[pos - 1] != ' ') 448 break; 449 if (pos > 2 && ln.buf[pos - 2] == '\\') 450 break; 451 } 452 break; 453 } 454 455 /* Catch escaped bogus characters. */ 456 457 c = (unsigned char) blk.buf[i+1]; 458 459 if ( ! (isascii(c) && 460 (isgraph(c) || isblank(c)))) { 461 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 462 curp->line, pos, "0x%x", c); 463 i += 2; 464 ln.buf[pos++] = '?'; 465 continue; 466 } 467 468 /* Some other escape sequence, copy & cont. */ 469 470 ln.buf[pos++] = blk.buf[i++]; 471 ln.buf[pos++] = blk.buf[i++]; 472 } 473 474 if (pos >= ln.sz) 475 resize_buf(&ln, 256); 476 477 ln.buf[pos] = '\0'; 478 479 /* 480 * A significant amount of complexity is contained by 481 * the roff preprocessor. It's line-oriented but can be 482 * expressed on one line, so we need at times to 483 * readjust our starting point and re-run it. The roff 484 * preprocessor can also readjust the buffers with new 485 * data, so we pass them in wholesale. 486 */ 487 488 of = 0; 489 490 /* 491 * Maintain a lookaside buffer of all parsed lines. We 492 * only do this if mparse_keep() has been invoked (the 493 * buffer may be accessed with mparse_getkeep()). 494 */ 495 496 if (curp->secondary) { 497 curp->secondary->buf = mandoc_realloc( 498 curp->secondary->buf, 499 curp->secondary->sz + pos + 2); 500 memcpy(curp->secondary->buf + 501 curp->secondary->sz, 502 ln.buf, pos); 503 curp->secondary->sz += pos; 504 curp->secondary->buf 505 [curp->secondary->sz] = '\n'; 506 curp->secondary->sz++; 507 curp->secondary->buf 508 [curp->secondary->sz] = '\0'; 509 } 510 rerun: 511 rr = roff_parseln(curp->roff, curp->line, &ln, &of); 512 513 switch (rr) { 514 case ROFF_REPARSE: 515 if (REPARSE_LIMIT >= ++curp->reparse_count) 516 mparse_buf_r(curp, ln, of, 0); 517 else 518 mandoc_msg(MANDOCERR_ROFFLOOP, curp, 519 curp->line, pos, NULL); 520 pos = 0; 521 continue; 522 case ROFF_APPEND: 523 pos = strlen(ln.buf); 524 continue; 525 case ROFF_RERUN: 526 goto rerun; 527 case ROFF_IGN: 528 pos = 0; 529 continue; 530 case ROFF_SO: 531 if ( ! (curp->options & MPARSE_SO) && 532 (i >= blk.sz || blk.buf[i] == '\0')) { 533 curp->sodest = mandoc_strdup(ln.buf + of); 534 free(ln.buf); 535 return; 536 } 537 /* 538 * We remove `so' clauses from our lookaside 539 * buffer because we're going to descend into 540 * the file recursively. 541 */ 542 if (curp->secondary) 543 curp->secondary->sz -= pos + 1; 544 save_file = curp->file; 545 if ((fd = mparse_open(curp, ln.buf + of)) != -1) { 546 mparse_readfd(curp, fd, ln.buf + of); 547 close(fd); 548 curp->file = save_file; 549 } else { 550 curp->file = save_file; 551 mandoc_vmsg(MANDOCERR_SO_FAIL, 552 curp, curp->line, pos, 553 ".so %s", ln.buf + of); 554 ln.sz = mandoc_asprintf(&cp, 555 ".sp\nSee the file %s.\n.sp", 556 ln.buf + of); 557 free(ln.buf); 558 ln.buf = cp; 559 of = 0; 560 mparse_buf_r(curp, ln, of, 0); 561 } 562 pos = 0; 563 continue; 564 default: 565 break; 566 } 567 568 if (curp->man->macroset == MACROSET_NONE) 569 choose_parser(curp); 570 571 /* 572 * Lastly, push down into the parsers themselves. 573 * If libroff returns ROFF_TBL, then add it to the 574 * currently open parse. Since we only get here if 575 * there does exist data (see tbl_data.c), we're 576 * guaranteed that something's been allocated. 577 * Do the same for ROFF_EQN. 578 */ 579 580 if (rr == ROFF_TBL) 581 while ((span = roff_span(curp->roff)) != NULL) 582 roff_addtbl(curp->man, span); 583 else if (rr == ROFF_EQN) 584 roff_addeqn(curp->man, roff_eqn(curp->roff)); 585 else if ((curp->man->macroset == MACROSET_MDOC ? 586 mdoc_parseln(curp->man, curp->line, ln.buf, of) : 587 man_parseln(curp->man, curp->line, ln.buf, of)) == 2) 588 break; 589 590 /* Temporary buffers typically are not full. */ 591 592 if (0 == start && '\0' == blk.buf[i]) 593 break; 594 595 /* Start the next input line. */ 596 597 pos = 0; 598 } 599 600 free(ln.buf); 601 } 602 603 static int 604 read_whole_file(struct mparse *curp, const char *file, int fd, 605 struct buf *fb, int *with_mmap) 606 { 607 gzFile gz; 608 size_t off; 609 ssize_t ssz; 610 611 #if HAVE_MMAP 612 struct stat st; 613 614 if (fstat(fd, &st) == -1) 615 err((int)MANDOCLEVEL_SYSERR, "%s", file); 616 617 /* 618 * If we're a regular file, try just reading in the whole entry 619 * via mmap(). This is faster than reading it into blocks, and 620 * since each file is only a few bytes to begin with, I'm not 621 * concerned that this is going to tank any machines. 622 */ 623 624 if (curp->gzip == 0 && S_ISREG(st.st_mode)) { 625 if (st.st_size > 0x7fffffff) { 626 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); 627 return 0; 628 } 629 *with_mmap = 1; 630 fb->sz = (size_t)st.st_size; 631 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); 632 if (fb->buf != MAP_FAILED) 633 return 1; 634 } 635 #endif 636 637 if (curp->gzip) { 638 if ((gz = gzdopen(fd, "rb")) == NULL) 639 err((int)MANDOCLEVEL_SYSERR, "%s", file); 640 } else 641 gz = NULL; 642 643 /* 644 * If this isn't a regular file (like, say, stdin), then we must 645 * go the old way and just read things in bit by bit. 646 */ 647 648 *with_mmap = 0; 649 off = 0; 650 fb->sz = 0; 651 fb->buf = NULL; 652 for (;;) { 653 if (off == fb->sz) { 654 if (fb->sz == (1U << 31)) { 655 mandoc_msg(MANDOCERR_TOOLARGE, curp, 656 0, 0, NULL); 657 break; 658 } 659 resize_buf(fb, 65536); 660 } 661 ssz = curp->gzip ? 662 gzread(gz, fb->buf + (int)off, fb->sz - off) : 663 read(fd, fb->buf + (int)off, fb->sz - off); 664 if (ssz == 0) { 665 fb->sz = off; 666 return 1; 667 } 668 if (ssz == -1) 669 err((int)MANDOCLEVEL_SYSERR, "%s", file); 670 off += (size_t)ssz; 671 } 672 673 free(fb->buf); 674 fb->buf = NULL; 675 return 0; 676 } 677 678 static void 679 mparse_end(struct mparse *curp) 680 { 681 if (curp->man->macroset == MACROSET_NONE) 682 curp->man->macroset = MACROSET_MAN; 683 if (curp->man->macroset == MACROSET_MDOC) 684 mdoc_endparse(curp->man); 685 else 686 man_endparse(curp->man); 687 roff_endparse(curp->roff); 688 } 689 690 static void 691 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) 692 { 693 struct buf *svprimary; 694 const char *svfile; 695 size_t offset; 696 static int recursion_depth; 697 698 if (64 < recursion_depth) { 699 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); 700 return; 701 } 702 703 /* Line number is per-file. */ 704 svfile = curp->file; 705 curp->file = file; 706 svprimary = curp->primary; 707 curp->primary = &blk; 708 curp->line = 1; 709 recursion_depth++; 710 711 /* Skip an UTF-8 byte order mark. */ 712 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && 713 (unsigned char)blk.buf[0] == 0xef && 714 (unsigned char)blk.buf[1] == 0xbb && 715 (unsigned char)blk.buf[2] == 0xbf) { 716 offset = 3; 717 curp->filenc &= ~MPARSE_LATIN1; 718 } else 719 offset = 0; 720 721 mparse_buf_r(curp, blk, offset, 1); 722 723 if (--recursion_depth == 0) 724 mparse_end(curp); 725 726 curp->primary = svprimary; 727 curp->file = svfile; 728 } 729 730 enum mandoclevel 731 mparse_readmem(struct mparse *curp, void *buf, size_t len, 732 const char *file) 733 { 734 struct buf blk; 735 736 blk.buf = buf; 737 blk.sz = len; 738 739 mparse_parse_buffer(curp, blk, file); 740 return curp->file_status; 741 } 742 743 /* 744 * Read the whole file into memory and call the parsers. 745 * Called recursively when an .so request is encountered. 746 */ 747 enum mandoclevel 748 mparse_readfd(struct mparse *curp, int fd, const char *file) 749 { 750 struct buf blk; 751 int with_mmap; 752 int save_filenc; 753 754 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { 755 save_filenc = curp->filenc; 756 curp->filenc = curp->options & 757 (MPARSE_UTF8 | MPARSE_LATIN1); 758 mparse_parse_buffer(curp, blk, file); 759 curp->filenc = save_filenc; 760 #if HAVE_MMAP 761 if (with_mmap) 762 munmap(blk.buf, blk.sz); 763 else 764 #endif 765 free(blk.buf); 766 } 767 return curp->file_status; 768 } 769 770 int 771 mparse_open(struct mparse *curp, const char *file) 772 { 773 char *cp; 774 int fd; 775 776 curp->file = file; 777 cp = strrchr(file, '.'); 778 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); 779 780 /* First try to use the filename as it is. */ 781 782 if ((fd = open(file, O_RDONLY)) != -1) 783 return fd; 784 785 /* 786 * If that doesn't work and the filename doesn't 787 * already end in .gz, try appending .gz. 788 */ 789 790 if ( ! curp->gzip) { 791 mandoc_asprintf(&cp, "%s.gz", file); 792 fd = open(cp, O_RDONLY); 793 free(cp); 794 if (fd != -1) { 795 curp->gzip = 1; 796 return fd; 797 } 798 } 799 800 /* Neither worked, give up. */ 801 802 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); 803 return -1; 804 } 805 806 struct mparse * 807 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, 808 const char *defos) 809 { 810 struct mparse *curp; 811 812 curp = mandoc_calloc(1, sizeof(struct mparse)); 813 814 curp->options = options; 815 curp->wlevel = wlevel; 816 curp->mmsg = mmsg; 817 curp->defos = defos; 818 819 curp->roff = roff_alloc(curp, options); 820 curp->man = roff_man_alloc( curp->roff, curp, curp->defos, 821 curp->options & MPARSE_QUICK ? 1 : 0); 822 if (curp->options & MPARSE_MDOC) { 823 mdoc_hash_init(); 824 curp->man->macroset = MACROSET_MDOC; 825 } else if (curp->options & MPARSE_MAN) { 826 man_hash_init(); 827 curp->man->macroset = MACROSET_MAN; 828 } 829 curp->man->first->tok = TOKEN_NONE; 830 return curp; 831 } 832 833 void 834 mparse_reset(struct mparse *curp) 835 { 836 roff_reset(curp->roff); 837 roff_man_reset(curp->man); 838 if (curp->secondary) 839 curp->secondary->sz = 0; 840 841 curp->file_status = MANDOCLEVEL_OK; 842 843 free(curp->sodest); 844 curp->sodest = NULL; 845 } 846 847 void 848 mparse_free(struct mparse *curp) 849 { 850 851 roff_man_free(curp->man); 852 if (curp->roff) 853 roff_free(curp->roff); 854 if (curp->secondary) 855 free(curp->secondary->buf); 856 857 free(curp->secondary); 858 free(curp->sodest); 859 free(curp); 860 } 861 862 void 863 mparse_result(struct mparse *curp, struct roff_man **man, 864 char **sodest) 865 { 866 867 if (sodest && NULL != (*sodest = curp->sodest)) { 868 *man = NULL; 869 return; 870 } 871 if (man) 872 *man = curp->man; 873 } 874 875 void 876 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc) 877 { 878 if (curp->file_status > *rc) 879 *rc = curp->file_status; 880 } 881 882 void 883 mandoc_vmsg(enum mandocerr t, struct mparse *m, 884 int ln, int pos, const char *fmt, ...) 885 { 886 char buf[256]; 887 va_list ap; 888 889 va_start(ap, fmt); 890 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 891 va_end(ap); 892 893 mandoc_msg(t, m, ln, pos, buf); 894 } 895 896 void 897 mandoc_msg(enum mandocerr er, struct mparse *m, 898 int ln, int col, const char *msg) 899 { 900 enum mandoclevel level; 901 902 level = MANDOCLEVEL_UNSUPP; 903 while (er < mandoclimits[level]) 904 level--; 905 906 if (level < m->wlevel && er != MANDOCERR_FILE) 907 return; 908 909 if (m->mmsg) 910 (*m->mmsg)(er, level, m->file, ln, col, msg); 911 912 if (m->file_status < level) 913 m->file_status = level; 914 } 915 916 const char * 917 mparse_strerror(enum mandocerr er) 918 { 919 920 return mandocerrs[er]; 921 } 922 923 const char * 924 mparse_strlevel(enum mandoclevel lvl) 925 { 926 return mandoclevels[lvl]; 927 } 928 929 void 930 mparse_keep(struct mparse *p) 931 { 932 933 assert(NULL == p->secondary); 934 p->secondary = mandoc_calloc(1, sizeof(struct buf)); 935 } 936 937 const char * 938 mparse_getkeep(const struct mparse *p) 939 { 940 941 assert(p->secondary); 942 return p->secondary->sz ? p->secondary->buf : NULL; 943 } 944