1*260e9a87SYuri Pankov /* $Id: read.c,v 1.131 2015/03/11 13:05:20 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*260e9a87SYuri Pankov * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5*260e9a87SYuri Pankov * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
695c635efSGarrett D'Amore *
795c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
895c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
995c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
1095c635efSGarrett D'Amore *
1195c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1295c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1395c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1495c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1595c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1695c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1795c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1895c635efSGarrett D'Amore */
1995c635efSGarrett D'Amore #include "config.h"
2095c635efSGarrett D'Amore
21*260e9a87SYuri Pankov #include <sys/types.h>
22*260e9a87SYuri Pankov #if HAVE_MMAP
2395c635efSGarrett D'Amore #include <sys/mman.h>
24*260e9a87SYuri Pankov #include <sys/stat.h>
2595c635efSGarrett D'Amore #endif
26*260e9a87SYuri Pankov #include <sys/wait.h>
2795c635efSGarrett D'Amore
2895c635efSGarrett D'Amore #include <assert.h>
2995c635efSGarrett D'Amore #include <ctype.h>
30*260e9a87SYuri Pankov #include <errno.h>
3195c635efSGarrett D'Amore #include <fcntl.h>
3295c635efSGarrett D'Amore #include <stdarg.h>
3395c635efSGarrett D'Amore #include <stdint.h>
3495c635efSGarrett D'Amore #include <stdio.h>
3595c635efSGarrett D'Amore #include <stdlib.h>
3695c635efSGarrett D'Amore #include <string.h>
3795c635efSGarrett D'Amore #include <unistd.h>
3895c635efSGarrett D'Amore
3995c635efSGarrett D'Amore #include "mandoc.h"
40*260e9a87SYuri Pankov #include "mandoc_aux.h"
4195c635efSGarrett D'Amore #include "libmandoc.h"
4295c635efSGarrett D'Amore #include "mdoc.h"
4395c635efSGarrett D'Amore #include "man.h"
4495c635efSGarrett D'Amore
4595c635efSGarrett D'Amore #define REPARSE_LIMIT 1000
4695c635efSGarrett D'Amore
4795c635efSGarrett D'Amore struct mparse {
4895c635efSGarrett D'Amore struct man *pman; /* persistent man parser */
4995c635efSGarrett D'Amore struct mdoc *pmdoc; /* persistent mdoc parser */
5095c635efSGarrett D'Amore struct man *man; /* man parser */
5195c635efSGarrett D'Amore struct mdoc *mdoc; /* mdoc parser */
5295c635efSGarrett D'Amore struct roff *roff; /* roff parser (!NULL) */
53*260e9a87SYuri Pankov const struct mchars *mchars; /* character table */
54*260e9a87SYuri Pankov char *sodest; /* filename pointed to by .so */
55*260e9a87SYuri Pankov const char *file; /* filename of current input file */
56*260e9a87SYuri Pankov struct buf *primary; /* buffer currently being parsed */
57*260e9a87SYuri Pankov struct buf *secondary; /* preprocessed copy of input */
58*260e9a87SYuri Pankov const char *defos; /* default operating system */
5995c635efSGarrett D'Amore mandocmsg mmsg; /* warning/error message handler */
60*260e9a87SYuri Pankov enum mandoclevel file_status; /* status of current parse */
61*260e9a87SYuri Pankov enum mandoclevel wlevel; /* ignore messages below this */
62*260e9a87SYuri Pankov int options; /* parser options */
63*260e9a87SYuri Pankov int filenc; /* encoding of the current file */
64*260e9a87SYuri Pankov int reparse_count; /* finite interp. stack */
65*260e9a87SYuri Pankov int line; /* line number in the file */
66*260e9a87SYuri Pankov pid_t child; /* the gunzip(1) process */
6795c635efSGarrett D'Amore };
6895c635efSGarrett D'Amore
69*260e9a87SYuri Pankov static void choose_parser(struct mparse *);
7095c635efSGarrett D'Amore static void resize_buf(struct buf *, size_t);
71*260e9a87SYuri Pankov static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72*260e9a87SYuri Pankov static int read_whole_file(struct mparse *, const char *, int,
73*260e9a87SYuri Pankov struct buf *, int *);
7495c635efSGarrett D'Amore static void mparse_end(struct mparse *);
75698f87a4SGarrett D'Amore static void mparse_parse_buffer(struct mparse *, struct buf,
76698f87a4SGarrett D'Amore const char *);
7795c635efSGarrett D'Amore
7895c635efSGarrett D'Amore static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
7995c635efSGarrett D'Amore MANDOCERR_OK,
8095c635efSGarrett D'Amore MANDOCERR_WARNING,
8195c635efSGarrett D'Amore MANDOCERR_WARNING,
8295c635efSGarrett D'Amore MANDOCERR_ERROR,
83*260e9a87SYuri Pankov MANDOCERR_UNSUPP,
8495c635efSGarrett D'Amore MANDOCERR_MAX,
8595c635efSGarrett D'Amore MANDOCERR_MAX
8695c635efSGarrett D'Amore };
8795c635efSGarrett D'Amore
8895c635efSGarrett D'Amore static const char * const mandocerrs[MANDOCERR_MAX] = {
8995c635efSGarrett D'Amore "ok",
9095c635efSGarrett D'Amore
9195c635efSGarrett D'Amore "generic warning",
9295c635efSGarrett D'Amore
9395c635efSGarrett D'Amore /* related to the prologue */
94*260e9a87SYuri Pankov "missing manual title, using UNTITLED",
95*260e9a87SYuri Pankov "missing manual title, using \"\"",
96*260e9a87SYuri Pankov "lower case character in document title",
97*260e9a87SYuri Pankov "missing manual section, using \"\"",
9895c635efSGarrett D'Amore "unknown manual section",
99*260e9a87SYuri Pankov "missing date, using today's date",
10095c635efSGarrett D'Amore "cannot parse date, using it verbatim",
101*260e9a87SYuri Pankov "missing Os macro, using \"\"",
10295c635efSGarrett D'Amore "duplicate prologue macro",
103*260e9a87SYuri Pankov "late prologue macro",
104*260e9a87SYuri Pankov "skipping late title macro",
105*260e9a87SYuri Pankov "prologue macros out of order",
10695c635efSGarrett D'Amore
10795c635efSGarrett D'Amore /* related to document structure */
10895c635efSGarrett D'Amore ".so is fragile, better use ln(1)",
109*260e9a87SYuri Pankov "no document body",
110*260e9a87SYuri Pankov "content before first section header",
111*260e9a87SYuri Pankov "first section is not \"NAME\"",
112*260e9a87SYuri Pankov "NAME section without name",
113*260e9a87SYuri Pankov "NAME section without description",
114*260e9a87SYuri Pankov "description not at the end of NAME",
115*260e9a87SYuri Pankov "bad NAME section content",
116*260e9a87SYuri Pankov "missing description line, using \"\"",
11795c635efSGarrett D'Amore "sections out of conventional order",
118*260e9a87SYuri Pankov "duplicate section title",
119*260e9a87SYuri Pankov "unexpected section",
120*260e9a87SYuri Pankov "unusual Xr order",
121*260e9a87SYuri Pankov "unusual Xr punctuation",
122*260e9a87SYuri Pankov "AUTHORS section without An macro",
12395c635efSGarrett D'Amore
12495c635efSGarrett D'Amore /* related to macros and nesting */
125*260e9a87SYuri Pankov "obsolete macro",
126*260e9a87SYuri Pankov "macro neither callable nor escaped",
12795c635efSGarrett D'Amore "skipping paragraph macro",
128698f87a4SGarrett D'Amore "moving paragraph macro out of list",
12995c635efSGarrett D'Amore "skipping no-space macro",
13095c635efSGarrett D'Amore "blocks badly nested",
13195c635efSGarrett D'Amore "nested displays are not portable",
132*260e9a87SYuri Pankov "moving content out of list",
133*260e9a87SYuri Pankov ".Vt block has child macro",
134*260e9a87SYuri Pankov "fill mode already enabled, skipping",
135*260e9a87SYuri Pankov "fill mode already disabled, skipping",
13695c635efSGarrett D'Amore "line scope broken",
13795c635efSGarrett D'Amore
13895c635efSGarrett D'Amore /* related to missing macro arguments */
139*260e9a87SYuri Pankov "skipping empty request",
140*260e9a87SYuri Pankov "conditional request controls empty scope",
14195c635efSGarrett D'Amore "skipping empty macro",
142*260e9a87SYuri Pankov "empty block",
143*260e9a87SYuri Pankov "empty argument, using 0n",
144*260e9a87SYuri Pankov "missing display type, using -ragged",
145*260e9a87SYuri Pankov "list type is not the first argument",
146*260e9a87SYuri Pankov "missing -width in -tag list, using 8n",
147*260e9a87SYuri Pankov "missing utility name, using \"\"",
148*260e9a87SYuri Pankov "missing function name, using \"\"",
149*260e9a87SYuri Pankov "empty head in list item",
150*260e9a87SYuri Pankov "empty list item",
151*260e9a87SYuri Pankov "missing font type, using \\fR",
152*260e9a87SYuri Pankov "unknown font type, using \\fR",
153*260e9a87SYuri Pankov "nothing follows prefix",
154*260e9a87SYuri Pankov "empty reference block",
155*260e9a87SYuri Pankov "missing -std argument, adding it",
156*260e9a87SYuri Pankov "missing option string, using \"\"",
157*260e9a87SYuri Pankov "missing resource identifier, using \"\"",
158*260e9a87SYuri Pankov "missing eqn box, using \"\"",
15995c635efSGarrett D'Amore
16095c635efSGarrett D'Amore /* related to bad macro arguments */
161*260e9a87SYuri Pankov "unterminated quoted argument",
16295c635efSGarrett D'Amore "duplicate argument",
163*260e9a87SYuri Pankov "skipping duplicate argument",
164*260e9a87SYuri Pankov "skipping duplicate display type",
165*260e9a87SYuri Pankov "skipping duplicate list type",
166*260e9a87SYuri Pankov "skipping -width argument",
167*260e9a87SYuri Pankov "wrong number of cells",
16895c635efSGarrett D'Amore "unknown AT&T UNIX version",
169*260e9a87SYuri Pankov "comma in function argument",
170*260e9a87SYuri Pankov "parenthesis in function name",
171*260e9a87SYuri Pankov "invalid content in Rs block",
172*260e9a87SYuri Pankov "invalid Boolean argument",
173*260e9a87SYuri Pankov "unknown font, skipping request",
174*260e9a87SYuri Pankov "odd number of characters in request",
17595c635efSGarrett D'Amore
17695c635efSGarrett D'Amore /* related to plain text */
177*260e9a87SYuri Pankov "blank line in fill mode, using .sp",
178*260e9a87SYuri Pankov "tab in filled text",
179*260e9a87SYuri Pankov "whitespace at end of input line",
18095c635efSGarrett D'Amore "bad comment style",
181*260e9a87SYuri Pankov "invalid escape sequence",
182*260e9a87SYuri Pankov "undefined string, using \"\"",
18395c635efSGarrett D'Amore
184*260e9a87SYuri Pankov /* related to tables */
185*260e9a87SYuri Pankov "tbl line starts with span",
186*260e9a87SYuri Pankov "tbl column starts with span",
187*260e9a87SYuri Pankov "skipping vertical bar in tbl layout",
18895c635efSGarrett D'Amore
18995c635efSGarrett D'Amore "generic error",
19095c635efSGarrett D'Amore
19195c635efSGarrett D'Amore /* related to tables */
192*260e9a87SYuri Pankov "non-alphabetic character in tbl options",
193*260e9a87SYuri Pankov "skipping unknown tbl option",
194*260e9a87SYuri Pankov "missing tbl option argument",
195*260e9a87SYuri Pankov "wrong tbl option argument size",
196*260e9a87SYuri Pankov "empty tbl layout",
197*260e9a87SYuri Pankov "invalid character in tbl layout",
198*260e9a87SYuri Pankov "unmatched parenthesis in tbl layout",
199*260e9a87SYuri Pankov "tbl without any data cells",
200*260e9a87SYuri Pankov "ignoring data in spanned tbl cell",
201*260e9a87SYuri Pankov "ignoring extra tbl data cells",
202*260e9a87SYuri Pankov "data block open at end of tbl",
20395c635efSGarrett D'Amore
204*260e9a87SYuri Pankov /* related to document structure and macros */
205*260e9a87SYuri Pankov NULL,
20695c635efSGarrett D'Amore "input stack limit exceeded, infinite loop?",
20795c635efSGarrett D'Amore "skipping bad character",
20895c635efSGarrett D'Amore "skipping unknown macro",
209*260e9a87SYuri Pankov "skipping insecure request",
210*260e9a87SYuri Pankov "skipping item outside list",
211698f87a4SGarrett D'Amore "skipping column outside column list",
21295c635efSGarrett D'Amore "skipping end of block that is not open",
213*260e9a87SYuri Pankov "fewer RS blocks open, skipping",
214*260e9a87SYuri Pankov "inserting missing end of block",
215*260e9a87SYuri Pankov "appending missing end of block",
21695c635efSGarrett D'Amore
217*260e9a87SYuri Pankov /* related to request and macro arguments */
218*260e9a87SYuri Pankov "escaped character not allowed in a name",
219*260e9a87SYuri Pankov "NOT IMPLEMENTED: Bd -file",
220*260e9a87SYuri Pankov "missing list type, using -item",
221*260e9a87SYuri Pankov "missing manual name, using \"\"",
222*260e9a87SYuri Pankov "uname(3) system call failed, using UNKNOWN",
223*260e9a87SYuri Pankov "unknown standard specifier",
224*260e9a87SYuri Pankov "skipping request without numeric argument",
22595c635efSGarrett D'Amore "NOT IMPLEMENTED: .so with absolute path or \"..\"",
226*260e9a87SYuri Pankov ".so request failed",
227*260e9a87SYuri Pankov "skipping all arguments",
228*260e9a87SYuri Pankov "skipping excess arguments",
229*260e9a87SYuri Pankov "divide by zero",
230*260e9a87SYuri Pankov
231*260e9a87SYuri Pankov "unsupported feature",
232*260e9a87SYuri Pankov "input too large",
233*260e9a87SYuri Pankov "unsupported control character",
234*260e9a87SYuri Pankov "unsupported roff request",
235*260e9a87SYuri Pankov "eqn delim option in tbl",
236*260e9a87SYuri Pankov "unsupported tbl layout modifier",
237*260e9a87SYuri Pankov "ignoring macro in table",
23895c635efSGarrett D'Amore };
23995c635efSGarrett D'Amore
24095c635efSGarrett D'Amore static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
24195c635efSGarrett D'Amore "SUCCESS",
24295c635efSGarrett D'Amore "RESERVED",
24395c635efSGarrett D'Amore "WARNING",
24495c635efSGarrett D'Amore "ERROR",
245*260e9a87SYuri Pankov "UNSUPP",
24695c635efSGarrett D'Amore "BADARG",
24795c635efSGarrett D'Amore "SYSERR"
24895c635efSGarrett D'Amore };
24995c635efSGarrett D'Amore
250*260e9a87SYuri Pankov
25195c635efSGarrett D'Amore static void
resize_buf(struct buf * buf,size_t initial)25295c635efSGarrett D'Amore resize_buf(struct buf *buf, size_t initial)
25395c635efSGarrett D'Amore {
25495c635efSGarrett D'Amore
25595c635efSGarrett D'Amore buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
25695c635efSGarrett D'Amore buf->buf = mandoc_realloc(buf->buf, buf->sz);
25795c635efSGarrett D'Amore }
25895c635efSGarrett D'Amore
25995c635efSGarrett D'Amore static void
choose_parser(struct mparse * curp)260*260e9a87SYuri Pankov choose_parser(struct mparse *curp)
26195c635efSGarrett D'Amore {
262*260e9a87SYuri Pankov char *cp, *ep;
263*260e9a87SYuri Pankov int format;
26495c635efSGarrett D'Amore
26595c635efSGarrett D'Amore /*
266*260e9a87SYuri Pankov * If neither command line arguments -mdoc or -man select
267*260e9a87SYuri Pankov * a parser nor the roff parser found a .Dd or .TH macro
268*260e9a87SYuri Pankov * yet, look ahead in the main input buffer.
26995c635efSGarrett D'Amore */
27095c635efSGarrett D'Amore
271*260e9a87SYuri Pankov if ((format = roff_getformat(curp->roff)) == 0) {
272*260e9a87SYuri Pankov cp = curp->primary->buf;
273*260e9a87SYuri Pankov ep = cp + curp->primary->sz;
274*260e9a87SYuri Pankov while (cp < ep) {
275*260e9a87SYuri Pankov if (*cp == '.' || *cp == '\'') {
276*260e9a87SYuri Pankov cp++;
277*260e9a87SYuri Pankov if (cp[0] == 'D' && cp[1] == 'd') {
278*260e9a87SYuri Pankov format = MPARSE_MDOC;
27995c635efSGarrett D'Amore break;
28095c635efSGarrett D'Amore }
281*260e9a87SYuri Pankov if (cp[0] == 'T' && cp[1] == 'H') {
282*260e9a87SYuri Pankov format = MPARSE_MAN;
283*260e9a87SYuri Pankov break;
284*260e9a87SYuri Pankov }
285*260e9a87SYuri Pankov }
286*260e9a87SYuri Pankov cp = memchr(cp, '\n', ep - cp);
287*260e9a87SYuri Pankov if (cp == NULL)
288*260e9a87SYuri Pankov break;
289*260e9a87SYuri Pankov cp++;
290*260e9a87SYuri Pankov }
291*260e9a87SYuri Pankov }
29295c635efSGarrett D'Amore
293*260e9a87SYuri Pankov if (format == MPARSE_MDOC) {
29495c635efSGarrett D'Amore if (NULL == curp->pmdoc)
295*260e9a87SYuri Pankov curp->pmdoc = mdoc_alloc(
296*260e9a87SYuri Pankov curp->roff, curp, curp->defos,
297*260e9a87SYuri Pankov MPARSE_QUICK & curp->options ? 1 : 0);
29895c635efSGarrett D'Amore assert(curp->pmdoc);
29995c635efSGarrett D'Amore curp->mdoc = curp->pmdoc;
30095c635efSGarrett D'Amore return;
30195c635efSGarrett D'Amore }
30295c635efSGarrett D'Amore
303*260e9a87SYuri Pankov /* Fall back to man(7) as a last resort. */
304*260e9a87SYuri Pankov
30595c635efSGarrett D'Amore if (NULL == curp->pman)
306*260e9a87SYuri Pankov curp->pman = man_alloc(
307*260e9a87SYuri Pankov curp->roff, curp, curp->defos,
308*260e9a87SYuri Pankov MPARSE_QUICK & curp->options ? 1 : 0);
30995c635efSGarrett D'Amore assert(curp->pman);
31095c635efSGarrett D'Amore curp->man = curp->pman;
31195c635efSGarrett D'Amore }
31295c635efSGarrett D'Amore
31395c635efSGarrett D'Amore /*
314*260e9a87SYuri Pankov * Main parse routine for a buffer.
315*260e9a87SYuri Pankov * It assumes encoding and line numbering are already set up.
316*260e9a87SYuri Pankov * It can recurse directly (for invocations of user-defined
317*260e9a87SYuri Pankov * macros, inline equations, and input line traps)
318*260e9a87SYuri Pankov * and indirectly (for .so file inclusion).
31995c635efSGarrett D'Amore */
32095c635efSGarrett D'Amore static void
mparse_buf_r(struct mparse * curp,struct buf blk,size_t i,int start)321*260e9a87SYuri Pankov mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
32295c635efSGarrett D'Amore {
32395c635efSGarrett D'Amore const struct tbl_span *span;
32495c635efSGarrett D'Amore struct buf ln;
325*260e9a87SYuri Pankov const char *save_file;
326*260e9a87SYuri Pankov char *cp;
327*260e9a87SYuri Pankov size_t pos; /* byte number in the ln buffer */
32895c635efSGarrett D'Amore enum rofferr rr;
329*260e9a87SYuri Pankov int of;
33095c635efSGarrett D'Amore int lnn; /* line number in the real file */
331*260e9a87SYuri Pankov int fd;
332*260e9a87SYuri Pankov pid_t save_child;
33395c635efSGarrett D'Amore unsigned char c;
33495c635efSGarrett D'Amore
335*260e9a87SYuri Pankov memset(&ln, 0, sizeof(ln));
33695c635efSGarrett D'Amore
33795c635efSGarrett D'Amore lnn = curp->line;
33895c635efSGarrett D'Amore pos = 0;
33995c635efSGarrett D'Amore
340*260e9a87SYuri Pankov while (i < blk.sz) {
34195c635efSGarrett D'Amore if (0 == pos && '\0' == blk.buf[i])
34295c635efSGarrett D'Amore break;
34395c635efSGarrett D'Amore
34495c635efSGarrett D'Amore if (start) {
34595c635efSGarrett D'Amore curp->line = lnn;
34695c635efSGarrett D'Amore curp->reparse_count = 0;
347*260e9a87SYuri Pankov
348*260e9a87SYuri Pankov if (lnn < 3 &&
349*260e9a87SYuri Pankov curp->filenc & MPARSE_UTF8 &&
350*260e9a87SYuri Pankov curp->filenc & MPARSE_LATIN1)
351*260e9a87SYuri Pankov curp->filenc = preconv_cue(&blk, i);
35295c635efSGarrett D'Amore }
35395c635efSGarrett D'Amore
354*260e9a87SYuri Pankov while (i < blk.sz && (start || blk.buf[i] != '\0')) {
35595c635efSGarrett D'Amore
35695c635efSGarrett D'Amore /*
35795c635efSGarrett D'Amore * When finding an unescaped newline character,
35895c635efSGarrett D'Amore * leave the character loop to process the line.
35995c635efSGarrett D'Amore * Skip a preceding carriage return, if any.
36095c635efSGarrett D'Amore */
36195c635efSGarrett D'Amore
362*260e9a87SYuri Pankov if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
36395c635efSGarrett D'Amore '\n' == blk.buf[i + 1])
36495c635efSGarrett D'Amore ++i;
36595c635efSGarrett D'Amore if ('\n' == blk.buf[i]) {
36695c635efSGarrett D'Amore ++i;
36795c635efSGarrett D'Amore ++lnn;
36895c635efSGarrett D'Amore break;
36995c635efSGarrett D'Amore }
37095c635efSGarrett D'Amore
37195c635efSGarrett D'Amore /*
372*260e9a87SYuri Pankov * Make sure we have space for the worst
373*260e9a87SYuri Pankov * case of 11 bytes: "\\[u10ffff]\0"
374698f87a4SGarrett D'Amore */
375698f87a4SGarrett D'Amore
376*260e9a87SYuri Pankov if (pos + 11 > ln.sz)
377698f87a4SGarrett D'Amore resize_buf(&ln, 256);
378698f87a4SGarrett D'Amore
379698f87a4SGarrett D'Amore /*
380*260e9a87SYuri Pankov * Encode 8-bit input.
38195c635efSGarrett D'Amore */
38295c635efSGarrett D'Amore
383*260e9a87SYuri Pankov c = blk.buf[i];
384*260e9a87SYuri Pankov if (c & 0x80) {
385*260e9a87SYuri Pankov if ( ! (curp->filenc && preconv_encode(
386*260e9a87SYuri Pankov &blk, &i, &ln, &pos, &curp->filenc))) {
387*260e9a87SYuri Pankov mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
388*260e9a87SYuri Pankov curp->line, pos, "0x%x", c);
389*260e9a87SYuri Pankov ln.buf[pos++] = '?';
39095c635efSGarrett D'Amore i++;
391*260e9a87SYuri Pankov }
392*260e9a87SYuri Pankov continue;
393*260e9a87SYuri Pankov }
394*260e9a87SYuri Pankov
395*260e9a87SYuri Pankov /*
396*260e9a87SYuri Pankov * Exclude control characters.
397*260e9a87SYuri Pankov */
398*260e9a87SYuri Pankov
399*260e9a87SYuri Pankov if (c == 0x7f || (c < 0x20 && c != 0x09)) {
400*260e9a87SYuri Pankov mandoc_vmsg(c == 0x00 || c == 0x04 ||
401*260e9a87SYuri Pankov c > 0x0a ? MANDOCERR_CHAR_BAD :
402*260e9a87SYuri Pankov MANDOCERR_CHAR_UNSUPP,
403*260e9a87SYuri Pankov curp, curp->line, pos, "0x%x", c);
404*260e9a87SYuri Pankov i++;
405*260e9a87SYuri Pankov if (c != '\r')
40695c635efSGarrett D'Amore ln.buf[pos++] = '?';
40795c635efSGarrett D'Amore continue;
40895c635efSGarrett D'Amore }
40995c635efSGarrett D'Amore
41095c635efSGarrett D'Amore /* Trailing backslash = a plain char. */
41195c635efSGarrett D'Amore
412*260e9a87SYuri Pankov if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
41395c635efSGarrett D'Amore ln.buf[pos++] = blk.buf[i++];
41495c635efSGarrett D'Amore continue;
41595c635efSGarrett D'Amore }
41695c635efSGarrett D'Amore
41795c635efSGarrett D'Amore /*
41895c635efSGarrett D'Amore * Found escape and at least one other character.
41995c635efSGarrett D'Amore * When it's a newline character, skip it.
42095c635efSGarrett D'Amore * When there is a carriage return in between,
42195c635efSGarrett D'Amore * skip that one as well.
42295c635efSGarrett D'Amore */
42395c635efSGarrett D'Amore
424*260e9a87SYuri Pankov if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
42595c635efSGarrett D'Amore '\n' == blk.buf[i + 2])
42695c635efSGarrett D'Amore ++i;
42795c635efSGarrett D'Amore if ('\n' == blk.buf[i + 1]) {
42895c635efSGarrett D'Amore i += 2;
42995c635efSGarrett D'Amore ++lnn;
43095c635efSGarrett D'Amore continue;
43195c635efSGarrett D'Amore }
43295c635efSGarrett D'Amore
43395c635efSGarrett D'Amore if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
43495c635efSGarrett D'Amore i += 2;
43595c635efSGarrett D'Amore /* Comment, skip to end of line */
436*260e9a87SYuri Pankov for (; i < blk.sz; ++i) {
43795c635efSGarrett D'Amore if ('\n' == blk.buf[i]) {
43895c635efSGarrett D'Amore ++i;
43995c635efSGarrett D'Amore ++lnn;
44095c635efSGarrett D'Amore break;
44195c635efSGarrett D'Amore }
44295c635efSGarrett D'Amore }
44395c635efSGarrett D'Amore
44495c635efSGarrett D'Amore /* Backout trailing whitespaces */
44595c635efSGarrett D'Amore for (; pos > 0; --pos) {
44695c635efSGarrett D'Amore if (ln.buf[pos - 1] != ' ')
44795c635efSGarrett D'Amore break;
44895c635efSGarrett D'Amore if (pos > 2 && ln.buf[pos - 2] == '\\')
44995c635efSGarrett D'Amore break;
45095c635efSGarrett D'Amore }
45195c635efSGarrett D'Amore break;
45295c635efSGarrett D'Amore }
45395c635efSGarrett D'Amore
454698f87a4SGarrett D'Amore /* Catch escaped bogus characters. */
45595c635efSGarrett D'Amore
456698f87a4SGarrett D'Amore c = (unsigned char) blk.buf[i+1];
457698f87a4SGarrett D'Amore
458698f87a4SGarrett D'Amore if ( ! (isascii(c) &&
459698f87a4SGarrett D'Amore (isgraph(c) || isblank(c)))) {
460*260e9a87SYuri Pankov mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
461*260e9a87SYuri Pankov curp->line, pos, "0x%x", c);
462698f87a4SGarrett D'Amore i += 2;
463698f87a4SGarrett D'Amore ln.buf[pos++] = '?';
464698f87a4SGarrett D'Amore continue;
465698f87a4SGarrett D'Amore }
466698f87a4SGarrett D'Amore
467698f87a4SGarrett D'Amore /* Some other escape sequence, copy & cont. */
46895c635efSGarrett D'Amore
46995c635efSGarrett D'Amore ln.buf[pos++] = blk.buf[i++];
47095c635efSGarrett D'Amore ln.buf[pos++] = blk.buf[i++];
47195c635efSGarrett D'Amore }
47295c635efSGarrett D'Amore
473*260e9a87SYuri Pankov if (pos >= ln.sz)
47495c635efSGarrett D'Amore resize_buf(&ln, 256);
47595c635efSGarrett D'Amore
47695c635efSGarrett D'Amore ln.buf[pos] = '\0';
47795c635efSGarrett D'Amore
47895c635efSGarrett D'Amore /*
47995c635efSGarrett D'Amore * A significant amount of complexity is contained by
48095c635efSGarrett D'Amore * the roff preprocessor. It's line-oriented but can be
48195c635efSGarrett D'Amore * expressed on one line, so we need at times to
48295c635efSGarrett D'Amore * readjust our starting point and re-run it. The roff
48395c635efSGarrett D'Amore * preprocessor can also readjust the buffers with new
48495c635efSGarrett D'Amore * data, so we pass them in wholesale.
48595c635efSGarrett D'Amore */
48695c635efSGarrett D'Amore
48795c635efSGarrett D'Amore of = 0;
48895c635efSGarrett D'Amore
48995c635efSGarrett D'Amore /*
49095c635efSGarrett D'Amore * Maintain a lookaside buffer of all parsed lines. We
49195c635efSGarrett D'Amore * only do this if mparse_keep() has been invoked (the
49295c635efSGarrett D'Amore * buffer may be accessed with mparse_getkeep()).
49395c635efSGarrett D'Amore */
49495c635efSGarrett D'Amore
49595c635efSGarrett D'Amore if (curp->secondary) {
496*260e9a87SYuri Pankov curp->secondary->buf = mandoc_realloc(
497*260e9a87SYuri Pankov curp->secondary->buf,
49895c635efSGarrett D'Amore curp->secondary->sz + pos + 2);
49995c635efSGarrett D'Amore memcpy(curp->secondary->buf +
50095c635efSGarrett D'Amore curp->secondary->sz,
50195c635efSGarrett D'Amore ln.buf, pos);
50295c635efSGarrett D'Amore curp->secondary->sz += pos;
50395c635efSGarrett D'Amore curp->secondary->buf
50495c635efSGarrett D'Amore [curp->secondary->sz] = '\n';
50595c635efSGarrett D'Amore curp->secondary->sz++;
50695c635efSGarrett D'Amore curp->secondary->buf
50795c635efSGarrett D'Amore [curp->secondary->sz] = '\0';
50895c635efSGarrett D'Amore }
50995c635efSGarrett D'Amore rerun:
510*260e9a87SYuri Pankov rr = roff_parseln(curp->roff, curp->line, &ln, &of);
51195c635efSGarrett D'Amore
51295c635efSGarrett D'Amore switch (rr) {
513*260e9a87SYuri Pankov case ROFF_REPARSE:
51495c635efSGarrett D'Amore if (REPARSE_LIMIT >= ++curp->reparse_count)
515*260e9a87SYuri Pankov mparse_buf_r(curp, ln, of, 0);
51695c635efSGarrett D'Amore else
51795c635efSGarrett D'Amore mandoc_msg(MANDOCERR_ROFFLOOP, curp,
51895c635efSGarrett D'Amore curp->line, pos, NULL);
51995c635efSGarrett D'Amore pos = 0;
52095c635efSGarrett D'Amore continue;
521*260e9a87SYuri Pankov case ROFF_APPEND:
522*260e9a87SYuri Pankov pos = strlen(ln.buf);
52395c635efSGarrett D'Amore continue;
524*260e9a87SYuri Pankov case ROFF_RERUN:
52595c635efSGarrett D'Amore goto rerun;
526*260e9a87SYuri Pankov case ROFF_IGN:
52795c635efSGarrett D'Amore pos = 0;
52895c635efSGarrett D'Amore continue;
529*260e9a87SYuri Pankov case ROFF_SO:
530*260e9a87SYuri Pankov if ( ! (curp->options & MPARSE_SO) &&
531*260e9a87SYuri Pankov (i >= blk.sz || blk.buf[i] == '\0')) {
532*260e9a87SYuri Pankov curp->sodest = mandoc_strdup(ln.buf + of);
533*260e9a87SYuri Pankov free(ln.buf);
534*260e9a87SYuri Pankov return;
535*260e9a87SYuri Pankov }
53695c635efSGarrett D'Amore /*
53795c635efSGarrett D'Amore * We remove `so' clauses from our lookaside
53895c635efSGarrett D'Amore * buffer because we're going to descend into
53995c635efSGarrett D'Amore * the file recursively.
54095c635efSGarrett D'Amore */
54195c635efSGarrett D'Amore if (curp->secondary)
54295c635efSGarrett D'Amore curp->secondary->sz -= pos + 1;
543*260e9a87SYuri Pankov save_file = curp->file;
544*260e9a87SYuri Pankov save_child = curp->child;
545*260e9a87SYuri Pankov if (mparse_open(curp, &fd, ln.buf + of) ==
546*260e9a87SYuri Pankov MANDOCLEVEL_OK) {
547*260e9a87SYuri Pankov mparse_readfd(curp, fd, ln.buf + of);
548*260e9a87SYuri Pankov curp->file = save_file;
549*260e9a87SYuri Pankov } else {
550*260e9a87SYuri Pankov curp->file = save_file;
551*260e9a87SYuri Pankov mandoc_vmsg(MANDOCERR_SO_FAIL,
552*260e9a87SYuri Pankov curp, curp->line, pos,
553*260e9a87SYuri Pankov ".so %s", ln.buf + of);
554*260e9a87SYuri Pankov ln.sz = mandoc_asprintf(&cp,
555*260e9a87SYuri Pankov ".sp\nSee the file %s.\n.sp",
556*260e9a87SYuri Pankov ln.buf + of);
557*260e9a87SYuri Pankov free(ln.buf);
558*260e9a87SYuri Pankov ln.buf = cp;
559*260e9a87SYuri Pankov of = 0;
560*260e9a87SYuri Pankov mparse_buf_r(curp, ln, of, 0);
561*260e9a87SYuri Pankov }
562*260e9a87SYuri Pankov curp->child = save_child;
56395c635efSGarrett D'Amore pos = 0;
56495c635efSGarrett D'Amore continue;
56595c635efSGarrett D'Amore default:
56695c635efSGarrett D'Amore break;
56795c635efSGarrett D'Amore }
56895c635efSGarrett D'Amore
56995c635efSGarrett D'Amore /*
57095c635efSGarrett D'Amore * If input parsers have not been allocated, do so now.
57195c635efSGarrett D'Amore * We keep these instanced between parsers, but set them
57295c635efSGarrett D'Amore * locally per parse routine since we can use different
57395c635efSGarrett D'Amore * parsers with each one.
57495c635efSGarrett D'Amore */
57595c635efSGarrett D'Amore
57695c635efSGarrett D'Amore if ( ! (curp->man || curp->mdoc))
577*260e9a87SYuri Pankov choose_parser(curp);
57895c635efSGarrett D'Amore
57995c635efSGarrett D'Amore /*
580*260e9a87SYuri Pankov * Lastly, push down into the parsers themselves.
58195c635efSGarrett D'Amore * If libroff returns ROFF_TBL, then add it to the
58295c635efSGarrett D'Amore * currently open parse. Since we only get here if
58395c635efSGarrett D'Amore * there does exist data (see tbl_data.c), we're
58495c635efSGarrett D'Amore * guaranteed that something's been allocated.
58595c635efSGarrett D'Amore * Do the same for ROFF_EQN.
58695c635efSGarrett D'Amore */
58795c635efSGarrett D'Amore
588*260e9a87SYuri Pankov if (rr == ROFF_TBL) {
589*260e9a87SYuri Pankov while ((span = roff_span(curp->roff)) != NULL)
590*260e9a87SYuri Pankov if (curp->man == NULL)
59195c635efSGarrett D'Amore mdoc_addspan(curp->mdoc, span);
592*260e9a87SYuri Pankov else
593*260e9a87SYuri Pankov man_addspan(curp->man, span);
594*260e9a87SYuri Pankov } else if (rr == ROFF_EQN) {
595*260e9a87SYuri Pankov if (curp->man == NULL)
596*260e9a87SYuri Pankov mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
597*260e9a87SYuri Pankov else
598*260e9a87SYuri Pankov man_addeqn(curp->man, roff_eqn(curp->roff));
599*260e9a87SYuri Pankov } else if ((curp->man == NULL ?
600*260e9a87SYuri Pankov mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
601*260e9a87SYuri Pankov man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
60295c635efSGarrett D'Amore break;
60395c635efSGarrett D'Amore
60495c635efSGarrett D'Amore /* Temporary buffers typically are not full. */
60595c635efSGarrett D'Amore
60695c635efSGarrett D'Amore if (0 == start && '\0' == blk.buf[i])
60795c635efSGarrett D'Amore break;
60895c635efSGarrett D'Amore
60995c635efSGarrett D'Amore /* Start the next input line. */
61095c635efSGarrett D'Amore
61195c635efSGarrett D'Amore pos = 0;
61295c635efSGarrett D'Amore }
61395c635efSGarrett D'Amore
61495c635efSGarrett D'Amore free(ln.buf);
61595c635efSGarrett D'Amore }
61695c635efSGarrett D'Amore
61795c635efSGarrett D'Amore static int
read_whole_file(struct mparse * curp,const char * file,int fd,struct buf * fb,int * with_mmap)618*260e9a87SYuri Pankov read_whole_file(struct mparse *curp, const char *file, int fd,
619*260e9a87SYuri Pankov struct buf *fb, int *with_mmap)
62095c635efSGarrett D'Amore {
62195c635efSGarrett D'Amore size_t off;
62295c635efSGarrett D'Amore ssize_t ssz;
62395c635efSGarrett D'Amore
624*260e9a87SYuri Pankov #if HAVE_MMAP
62595c635efSGarrett D'Amore struct stat st;
62695c635efSGarrett D'Amore if (-1 == fstat(fd, &st)) {
62795c635efSGarrett D'Amore perror(file);
628*260e9a87SYuri Pankov exit((int)MANDOCLEVEL_SYSERR);
62995c635efSGarrett D'Amore }
63095c635efSGarrett D'Amore
63195c635efSGarrett D'Amore /*
63295c635efSGarrett D'Amore * If we're a regular file, try just reading in the whole entry
63395c635efSGarrett D'Amore * via mmap(). This is faster than reading it into blocks, and
63495c635efSGarrett D'Amore * since each file is only a few bytes to begin with, I'm not
63595c635efSGarrett D'Amore * concerned that this is going to tank any machines.
63695c635efSGarrett D'Amore */
63795c635efSGarrett D'Amore
63895c635efSGarrett D'Amore if (S_ISREG(st.st_mode)) {
639*260e9a87SYuri Pankov if (st.st_size > 0x7fffffff) {
640*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
64195c635efSGarrett D'Amore return(0);
64295c635efSGarrett D'Amore }
64395c635efSGarrett D'Amore *with_mmap = 1;
64495c635efSGarrett D'Amore fb->sz = (size_t)st.st_size;
645698f87a4SGarrett D'Amore fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
64695c635efSGarrett D'Amore if (fb->buf != MAP_FAILED)
64795c635efSGarrett D'Amore return(1);
64895c635efSGarrett D'Amore }
64995c635efSGarrett D'Amore #endif
65095c635efSGarrett D'Amore
65195c635efSGarrett D'Amore /*
65295c635efSGarrett D'Amore * If this isn't a regular file (like, say, stdin), then we must
65395c635efSGarrett D'Amore * go the old way and just read things in bit by bit.
65495c635efSGarrett D'Amore */
65595c635efSGarrett D'Amore
65695c635efSGarrett D'Amore *with_mmap = 0;
65795c635efSGarrett D'Amore off = 0;
65895c635efSGarrett D'Amore fb->sz = 0;
65995c635efSGarrett D'Amore fb->buf = NULL;
66095c635efSGarrett D'Amore for (;;) {
66195c635efSGarrett D'Amore if (off == fb->sz) {
66295c635efSGarrett D'Amore if (fb->sz == (1U << 31)) {
663*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TOOLARGE, curp,
664*260e9a87SYuri Pankov 0, 0, NULL);
66595c635efSGarrett D'Amore break;
66695c635efSGarrett D'Amore }
66795c635efSGarrett D'Amore resize_buf(fb, 65536);
66895c635efSGarrett D'Amore }
66995c635efSGarrett D'Amore ssz = read(fd, fb->buf + (int)off, fb->sz - off);
67095c635efSGarrett D'Amore if (ssz == 0) {
67195c635efSGarrett D'Amore fb->sz = off;
67295c635efSGarrett D'Amore return(1);
67395c635efSGarrett D'Amore }
67495c635efSGarrett D'Amore if (ssz == -1) {
67595c635efSGarrett D'Amore perror(file);
676*260e9a87SYuri Pankov exit((int)MANDOCLEVEL_SYSERR);
67795c635efSGarrett D'Amore }
67895c635efSGarrett D'Amore off += (size_t)ssz;
67995c635efSGarrett D'Amore }
68095c635efSGarrett D'Amore
68195c635efSGarrett D'Amore free(fb->buf);
68295c635efSGarrett D'Amore fb->buf = NULL;
68395c635efSGarrett D'Amore return(0);
68495c635efSGarrett D'Amore }
68595c635efSGarrett D'Amore
68695c635efSGarrett D'Amore static void
mparse_end(struct mparse * curp)68795c635efSGarrett D'Amore mparse_end(struct mparse *curp)
68895c635efSGarrett D'Amore {
68995c635efSGarrett D'Amore
690*260e9a87SYuri Pankov if (curp->mdoc == NULL &&
691*260e9a87SYuri Pankov curp->man == NULL &&
692*260e9a87SYuri Pankov curp->sodest == NULL) {
693*260e9a87SYuri Pankov if (curp->options & MPARSE_MDOC)
694*260e9a87SYuri Pankov curp->mdoc = curp->pmdoc;
695*260e9a87SYuri Pankov else {
696*260e9a87SYuri Pankov if (curp->pman == NULL)
697*260e9a87SYuri Pankov curp->pman = man_alloc(
698*260e9a87SYuri Pankov curp->roff, curp, curp->defos,
699*260e9a87SYuri Pankov curp->options & MPARSE_QUICK ? 1 : 0);
700*260e9a87SYuri Pankov curp->man = curp->pman;
70195c635efSGarrett D'Amore }
70295c635efSGarrett D'Amore }
703*260e9a87SYuri Pankov if (curp->mdoc)
704*260e9a87SYuri Pankov mdoc_endparse(curp->mdoc);
705*260e9a87SYuri Pankov if (curp->man)
706*260e9a87SYuri Pankov man_endparse(curp->man);
70795c635efSGarrett D'Amore roff_endparse(curp->roff);
70895c635efSGarrett D'Amore }
70995c635efSGarrett D'Amore
71095c635efSGarrett D'Amore static void
mparse_parse_buffer(struct mparse * curp,struct buf blk,const char * file)711698f87a4SGarrett D'Amore mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
71295c635efSGarrett D'Amore {
713*260e9a87SYuri Pankov struct buf *svprimary;
71495c635efSGarrett D'Amore const char *svfile;
715*260e9a87SYuri Pankov size_t offset;
716698f87a4SGarrett D'Amore static int recursion_depth;
717698f87a4SGarrett D'Amore
718698f87a4SGarrett D'Amore if (64 < recursion_depth) {
719698f87a4SGarrett D'Amore mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
720698f87a4SGarrett D'Amore return;
721698f87a4SGarrett D'Amore }
72295c635efSGarrett D'Amore
72395c635efSGarrett D'Amore /* Line number is per-file. */
72495c635efSGarrett D'Amore svfile = curp->file;
72595c635efSGarrett D'Amore curp->file = file;
726*260e9a87SYuri Pankov svprimary = curp->primary;
727*260e9a87SYuri Pankov curp->primary = &blk;
72895c635efSGarrett D'Amore curp->line = 1;
729698f87a4SGarrett D'Amore recursion_depth++;
73095c635efSGarrett D'Amore
731*260e9a87SYuri Pankov /* Skip an UTF-8 byte order mark. */
732*260e9a87SYuri Pankov if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
733*260e9a87SYuri Pankov (unsigned char)blk.buf[0] == 0xef &&
734*260e9a87SYuri Pankov (unsigned char)blk.buf[1] == 0xbb &&
735*260e9a87SYuri Pankov (unsigned char)blk.buf[2] == 0xbf) {
736*260e9a87SYuri Pankov offset = 3;
737*260e9a87SYuri Pankov curp->filenc &= ~MPARSE_LATIN1;
738*260e9a87SYuri Pankov } else
739*260e9a87SYuri Pankov offset = 0;
74095c635efSGarrett D'Amore
741*260e9a87SYuri Pankov mparse_buf_r(curp, blk, offset, 1);
742*260e9a87SYuri Pankov
743*260e9a87SYuri Pankov if (--recursion_depth == 0)
74495c635efSGarrett D'Amore mparse_end(curp);
74595c635efSGarrett D'Amore
746*260e9a87SYuri Pankov curp->primary = svprimary;
74795c635efSGarrett D'Amore curp->file = svfile;
74895c635efSGarrett D'Amore }
74995c635efSGarrett D'Amore
75095c635efSGarrett D'Amore enum mandoclevel
mparse_readmem(struct mparse * curp,void * buf,size_t len,const char * file)751*260e9a87SYuri Pankov mparse_readmem(struct mparse *curp, void *buf, size_t len,
75295c635efSGarrett D'Amore const char *file)
75395c635efSGarrett D'Amore {
75495c635efSGarrett D'Amore struct buf blk;
75595c635efSGarrett D'Amore
756*260e9a87SYuri Pankov blk.buf = buf;
75795c635efSGarrett D'Amore blk.sz = len;
75895c635efSGarrett D'Amore
759698f87a4SGarrett D'Amore mparse_parse_buffer(curp, blk, file);
76095c635efSGarrett D'Amore return(curp->file_status);
76195c635efSGarrett D'Amore }
76295c635efSGarrett D'Amore
763*260e9a87SYuri Pankov /*
764*260e9a87SYuri Pankov * Read the whole file into memory and call the parsers.
765*260e9a87SYuri Pankov * Called recursively when an .so request is encountered.
766*260e9a87SYuri Pankov */
767698f87a4SGarrett D'Amore enum mandoclevel
mparse_readfd(struct mparse * curp,int fd,const char * file)768698f87a4SGarrett D'Amore mparse_readfd(struct mparse *curp, int fd, const char *file)
76995c635efSGarrett D'Amore {
77095c635efSGarrett D'Amore struct buf blk;
77195c635efSGarrett D'Amore int with_mmap;
772*260e9a87SYuri Pankov int save_filenc;
77395c635efSGarrett D'Amore
774*260e9a87SYuri Pankov if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
775*260e9a87SYuri Pankov save_filenc = curp->filenc;
776*260e9a87SYuri Pankov curp->filenc = curp->options &
777*260e9a87SYuri Pankov (MPARSE_UTF8 | MPARSE_LATIN1);
778698f87a4SGarrett D'Amore mparse_parse_buffer(curp, blk, file);
779*260e9a87SYuri Pankov curp->filenc = save_filenc;
780*260e9a87SYuri Pankov #if HAVE_MMAP
78195c635efSGarrett D'Amore if (with_mmap)
78295c635efSGarrett D'Amore munmap(blk.buf, blk.sz);
78395c635efSGarrett D'Amore else
78495c635efSGarrett D'Amore #endif
78595c635efSGarrett D'Amore free(blk.buf);
786*260e9a87SYuri Pankov }
78795c635efSGarrett D'Amore
788*260e9a87SYuri Pankov if (fd != STDIN_FILENO && close(fd) == -1)
78995c635efSGarrett D'Amore perror(file);
790*260e9a87SYuri Pankov
791*260e9a87SYuri Pankov mparse_wait(curp);
79295c635efSGarrett D'Amore return(curp->file_status);
79395c635efSGarrett D'Amore }
79495c635efSGarrett D'Amore
795*260e9a87SYuri Pankov enum mandoclevel
mparse_open(struct mparse * curp,int * fd,const char * file)796*260e9a87SYuri Pankov mparse_open(struct mparse *curp, int *fd, const char *file)
797*260e9a87SYuri Pankov {
798*260e9a87SYuri Pankov int pfd[2];
799*260e9a87SYuri Pankov int save_errno;
800*260e9a87SYuri Pankov char *cp;
801*260e9a87SYuri Pankov
802*260e9a87SYuri Pankov curp->file = file;
803*260e9a87SYuri Pankov
804*260e9a87SYuri Pankov /* Unless zipped, try to just open the file. */
805*260e9a87SYuri Pankov
806*260e9a87SYuri Pankov if ((cp = strrchr(file, '.')) == NULL ||
807*260e9a87SYuri Pankov strcmp(cp + 1, "gz")) {
808*260e9a87SYuri Pankov curp->child = 0;
809*260e9a87SYuri Pankov if ((*fd = open(file, O_RDONLY)) != -1)
810*260e9a87SYuri Pankov return(MANDOCLEVEL_OK);
811*260e9a87SYuri Pankov
812*260e9a87SYuri Pankov /* Open failed; try to append ".gz". */
813*260e9a87SYuri Pankov
814*260e9a87SYuri Pankov mandoc_asprintf(&cp, "%s.gz", file);
815*260e9a87SYuri Pankov file = cp;
816*260e9a87SYuri Pankov } else
817*260e9a87SYuri Pankov cp = NULL;
818*260e9a87SYuri Pankov
819*260e9a87SYuri Pankov /* Before forking, make sure the file can be read. */
820*260e9a87SYuri Pankov
821*260e9a87SYuri Pankov save_errno = errno;
822*260e9a87SYuri Pankov if (access(file, R_OK) == -1) {
823*260e9a87SYuri Pankov if (cp != NULL)
824*260e9a87SYuri Pankov errno = save_errno;
825*260e9a87SYuri Pankov free(cp);
826*260e9a87SYuri Pankov *fd = -1;
827*260e9a87SYuri Pankov curp->child = 0;
828*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
829*260e9a87SYuri Pankov return(MANDOCLEVEL_ERROR);
830*260e9a87SYuri Pankov }
831*260e9a87SYuri Pankov
832*260e9a87SYuri Pankov /* Run gunzip(1). */
833*260e9a87SYuri Pankov
834*260e9a87SYuri Pankov if (pipe(pfd) == -1) {
835*260e9a87SYuri Pankov perror("pipe");
836*260e9a87SYuri Pankov exit((int)MANDOCLEVEL_SYSERR);
837*260e9a87SYuri Pankov }
838*260e9a87SYuri Pankov
839*260e9a87SYuri Pankov switch (curp->child = fork()) {
840*260e9a87SYuri Pankov case -1:
841*260e9a87SYuri Pankov perror("fork");
842*260e9a87SYuri Pankov exit((int)MANDOCLEVEL_SYSERR);
843*260e9a87SYuri Pankov case 0:
844*260e9a87SYuri Pankov close(pfd[0]);
845*260e9a87SYuri Pankov if (dup2(pfd[1], STDOUT_FILENO) == -1) {
846*260e9a87SYuri Pankov perror("dup");
847*260e9a87SYuri Pankov exit((int)MANDOCLEVEL_SYSERR);
848*260e9a87SYuri Pankov }
849*260e9a87SYuri Pankov execlp("gunzip", "gunzip", "-c", file, NULL);
850*260e9a87SYuri Pankov perror("exec");
851*260e9a87SYuri Pankov exit((int)MANDOCLEVEL_SYSERR);
852*260e9a87SYuri Pankov default:
853*260e9a87SYuri Pankov close(pfd[1]);
854*260e9a87SYuri Pankov *fd = pfd[0];
855*260e9a87SYuri Pankov return(MANDOCLEVEL_OK);
856*260e9a87SYuri Pankov }
857*260e9a87SYuri Pankov }
858*260e9a87SYuri Pankov
859*260e9a87SYuri Pankov enum mandoclevel
mparse_wait(struct mparse * curp)860*260e9a87SYuri Pankov mparse_wait(struct mparse *curp)
861*260e9a87SYuri Pankov {
862*260e9a87SYuri Pankov int status;
863*260e9a87SYuri Pankov
864*260e9a87SYuri Pankov if (curp->child == 0)
865*260e9a87SYuri Pankov return(MANDOCLEVEL_OK);
866*260e9a87SYuri Pankov
867*260e9a87SYuri Pankov if (waitpid(curp->child, &status, 0) == -1) {
868*260e9a87SYuri Pankov perror("wait");
869*260e9a87SYuri Pankov exit((int)MANDOCLEVEL_SYSERR);
870*260e9a87SYuri Pankov }
871*260e9a87SYuri Pankov curp->child = 0;
872*260e9a87SYuri Pankov if (WIFSIGNALED(status)) {
873*260e9a87SYuri Pankov mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
874*260e9a87SYuri Pankov "gunzip died from signal %d", WTERMSIG(status));
875*260e9a87SYuri Pankov return(MANDOCLEVEL_ERROR);
876*260e9a87SYuri Pankov }
877*260e9a87SYuri Pankov if (WEXITSTATUS(status)) {
878*260e9a87SYuri Pankov mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
879*260e9a87SYuri Pankov "gunzip failed with code %d", WEXITSTATUS(status));
880*260e9a87SYuri Pankov return(MANDOCLEVEL_ERROR);
881*260e9a87SYuri Pankov }
882*260e9a87SYuri Pankov return(MANDOCLEVEL_OK);
883*260e9a87SYuri Pankov }
884*260e9a87SYuri Pankov
88595c635efSGarrett D'Amore struct mparse *
mparse_alloc(int options,enum mandoclevel wlevel,mandocmsg mmsg,const struct mchars * mchars,const char * defos)886*260e9a87SYuri Pankov mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
887*260e9a87SYuri Pankov const struct mchars *mchars, const char *defos)
88895c635efSGarrett D'Amore {
88995c635efSGarrett D'Amore struct mparse *curp;
89095c635efSGarrett D'Amore
89195c635efSGarrett D'Amore curp = mandoc_calloc(1, sizeof(struct mparse));
89295c635efSGarrett D'Amore
893*260e9a87SYuri Pankov curp->options = options;
89495c635efSGarrett D'Amore curp->wlevel = wlevel;
89595c635efSGarrett D'Amore curp->mmsg = mmsg;
896698f87a4SGarrett D'Amore curp->defos = defos;
89795c635efSGarrett D'Amore
898*260e9a87SYuri Pankov curp->mchars = mchars;
899*260e9a87SYuri Pankov curp->roff = roff_alloc(curp, curp->mchars, options);
900*260e9a87SYuri Pankov if (curp->options & MPARSE_MDOC)
901*260e9a87SYuri Pankov curp->pmdoc = mdoc_alloc(
902*260e9a87SYuri Pankov curp->roff, curp, curp->defos,
903*260e9a87SYuri Pankov curp->options & MPARSE_QUICK ? 1 : 0);
904*260e9a87SYuri Pankov if (curp->options & MPARSE_MAN)
905*260e9a87SYuri Pankov curp->pman = man_alloc(
906*260e9a87SYuri Pankov curp->roff, curp, curp->defos,
907*260e9a87SYuri Pankov curp->options & MPARSE_QUICK ? 1 : 0);
908*260e9a87SYuri Pankov
90995c635efSGarrett D'Amore return(curp);
91095c635efSGarrett D'Amore }
91195c635efSGarrett D'Amore
91295c635efSGarrett D'Amore void
mparse_reset(struct mparse * curp)91395c635efSGarrett D'Amore mparse_reset(struct mparse *curp)
91495c635efSGarrett D'Amore {
91595c635efSGarrett D'Amore
91695c635efSGarrett D'Amore roff_reset(curp->roff);
91795c635efSGarrett D'Amore
91895c635efSGarrett D'Amore if (curp->mdoc)
91995c635efSGarrett D'Amore mdoc_reset(curp->mdoc);
92095c635efSGarrett D'Amore if (curp->man)
92195c635efSGarrett D'Amore man_reset(curp->man);
92295c635efSGarrett D'Amore if (curp->secondary)
92395c635efSGarrett D'Amore curp->secondary->sz = 0;
92495c635efSGarrett D'Amore
92595c635efSGarrett D'Amore curp->file_status = MANDOCLEVEL_OK;
92695c635efSGarrett D'Amore curp->mdoc = NULL;
92795c635efSGarrett D'Amore curp->man = NULL;
928*260e9a87SYuri Pankov
929*260e9a87SYuri Pankov free(curp->sodest);
930*260e9a87SYuri Pankov curp->sodest = NULL;
93195c635efSGarrett D'Amore }
93295c635efSGarrett D'Amore
93395c635efSGarrett D'Amore void
mparse_free(struct mparse * curp)93495c635efSGarrett D'Amore mparse_free(struct mparse *curp)
93595c635efSGarrett D'Amore {
93695c635efSGarrett D'Amore
93795c635efSGarrett D'Amore if (curp->pmdoc)
93895c635efSGarrett D'Amore mdoc_free(curp->pmdoc);
93995c635efSGarrett D'Amore if (curp->pman)
94095c635efSGarrett D'Amore man_free(curp->pman);
94195c635efSGarrett D'Amore if (curp->roff)
94295c635efSGarrett D'Amore roff_free(curp->roff);
94395c635efSGarrett D'Amore if (curp->secondary)
94495c635efSGarrett D'Amore free(curp->secondary->buf);
94595c635efSGarrett D'Amore
94695c635efSGarrett D'Amore free(curp->secondary);
947*260e9a87SYuri Pankov free(curp->sodest);
94895c635efSGarrett D'Amore free(curp);
94995c635efSGarrett D'Amore }
95095c635efSGarrett D'Amore
95195c635efSGarrett D'Amore void
mparse_result(struct mparse * curp,struct mdoc ** mdoc,struct man ** man,char ** sodest)952*260e9a87SYuri Pankov mparse_result(struct mparse *curp,
953*260e9a87SYuri Pankov struct mdoc **mdoc, struct man **man, char **sodest)
95495c635efSGarrett D'Amore {
95595c635efSGarrett D'Amore
956*260e9a87SYuri Pankov if (sodest && NULL != (*sodest = curp->sodest)) {
957*260e9a87SYuri Pankov *mdoc = NULL;
958*260e9a87SYuri Pankov *man = NULL;
959*260e9a87SYuri Pankov return;
960*260e9a87SYuri Pankov }
96195c635efSGarrett D'Amore if (mdoc)
96295c635efSGarrett D'Amore *mdoc = curp->mdoc;
96395c635efSGarrett D'Amore if (man)
96495c635efSGarrett D'Amore *man = curp->man;
96595c635efSGarrett D'Amore }
96695c635efSGarrett D'Amore
96795c635efSGarrett D'Amore void
mandoc_vmsg(enum mandocerr t,struct mparse * m,int ln,int pos,const char * fmt,...)96895c635efSGarrett D'Amore mandoc_vmsg(enum mandocerr t, struct mparse *m,
96995c635efSGarrett D'Amore int ln, int pos, const char *fmt, ...)
97095c635efSGarrett D'Amore {
97195c635efSGarrett D'Amore char buf[256];
97295c635efSGarrett D'Amore va_list ap;
97395c635efSGarrett D'Amore
97495c635efSGarrett D'Amore va_start(ap, fmt);
975*260e9a87SYuri Pankov (void)vsnprintf(buf, sizeof(buf), fmt, ap);
97695c635efSGarrett D'Amore va_end(ap);
97795c635efSGarrett D'Amore
97895c635efSGarrett D'Amore mandoc_msg(t, m, ln, pos, buf);
97995c635efSGarrett D'Amore }
98095c635efSGarrett D'Amore
98195c635efSGarrett D'Amore void
mandoc_msg(enum mandocerr er,struct mparse * m,int ln,int col,const char * msg)98295c635efSGarrett D'Amore mandoc_msg(enum mandocerr er, struct mparse *m,
98395c635efSGarrett D'Amore int ln, int col, const char *msg)
98495c635efSGarrett D'Amore {
98595c635efSGarrett D'Amore enum mandoclevel level;
98695c635efSGarrett D'Amore
987*260e9a87SYuri Pankov level = MANDOCLEVEL_UNSUPP;
98895c635efSGarrett D'Amore while (er < mandoclimits[level])
98995c635efSGarrett D'Amore level--;
99095c635efSGarrett D'Amore
991*260e9a87SYuri Pankov if (level < m->wlevel && er != MANDOCERR_FILE)
99295c635efSGarrett D'Amore return;
99395c635efSGarrett D'Amore
99495c635efSGarrett D'Amore if (m->mmsg)
99595c635efSGarrett D'Amore (*m->mmsg)(er, level, m->file, ln, col, msg);
99695c635efSGarrett D'Amore
99795c635efSGarrett D'Amore if (m->file_status < level)
99895c635efSGarrett D'Amore m->file_status = level;
99995c635efSGarrett D'Amore }
100095c635efSGarrett D'Amore
100195c635efSGarrett D'Amore const char *
mparse_strerror(enum mandocerr er)100295c635efSGarrett D'Amore mparse_strerror(enum mandocerr er)
100395c635efSGarrett D'Amore {
100495c635efSGarrett D'Amore
100595c635efSGarrett D'Amore return(mandocerrs[er]);
100695c635efSGarrett D'Amore }
100795c635efSGarrett D'Amore
100895c635efSGarrett D'Amore const char *
mparse_strlevel(enum mandoclevel lvl)100995c635efSGarrett D'Amore mparse_strlevel(enum mandoclevel lvl)
101095c635efSGarrett D'Amore {
101195c635efSGarrett D'Amore return(mandoclevels[lvl]);
101295c635efSGarrett D'Amore }
101395c635efSGarrett D'Amore
101495c635efSGarrett D'Amore void
mparse_keep(struct mparse * p)101595c635efSGarrett D'Amore mparse_keep(struct mparse *p)
101695c635efSGarrett D'Amore {
101795c635efSGarrett D'Amore
101895c635efSGarrett D'Amore assert(NULL == p->secondary);
101995c635efSGarrett D'Amore p->secondary = mandoc_calloc(1, sizeof(struct buf));
102095c635efSGarrett D'Amore }
102195c635efSGarrett D'Amore
102295c635efSGarrett D'Amore const char *
mparse_getkeep(const struct mparse * p)102395c635efSGarrett D'Amore mparse_getkeep(const struct mparse *p)
102495c635efSGarrett D'Amore {
102595c635efSGarrett D'Amore
102695c635efSGarrett D'Amore assert(p->secondary);
102795c635efSGarrett D'Amore return(p->secondary->sz ? p->secondary->buf : NULL);
102895c635efSGarrett D'Amore }
1029