xref: /titanic_50/usr/src/cmd/mandoc/read.c (revision 260e9a87725c090ba5835b1f9f0b62fa2f96036f)
1*260e9a87SYuri Pankov /*	$Id: read.c,v 1.131 2015/03/11 13:05:20 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*260e9a87SYuri Pankov  * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5*260e9a87SYuri Pankov  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
695c635efSGarrett D'Amore  *
795c635efSGarrett D'Amore  * Permission to use, copy, modify, and distribute this software for any
895c635efSGarrett D'Amore  * purpose with or without fee is hereby granted, provided that the above
995c635efSGarrett D'Amore  * copyright notice and this permission notice appear in all copies.
1095c635efSGarrett D'Amore  *
1195c635efSGarrett D'Amore  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1295c635efSGarrett D'Amore  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1395c635efSGarrett D'Amore  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1495c635efSGarrett D'Amore  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1595c635efSGarrett D'Amore  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1695c635efSGarrett D'Amore  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1795c635efSGarrett D'Amore  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1895c635efSGarrett D'Amore  */
1995c635efSGarrett D'Amore #include "config.h"
2095c635efSGarrett D'Amore 
21*260e9a87SYuri Pankov #include <sys/types.h>
22*260e9a87SYuri Pankov #if HAVE_MMAP
2395c635efSGarrett D'Amore #include <sys/mman.h>
24*260e9a87SYuri Pankov #include <sys/stat.h>
2595c635efSGarrett D'Amore #endif
26*260e9a87SYuri Pankov #include <sys/wait.h>
2795c635efSGarrett D'Amore 
2895c635efSGarrett D'Amore #include <assert.h>
2995c635efSGarrett D'Amore #include <ctype.h>
30*260e9a87SYuri Pankov #include <errno.h>
3195c635efSGarrett D'Amore #include <fcntl.h>
3295c635efSGarrett D'Amore #include <stdarg.h>
3395c635efSGarrett D'Amore #include <stdint.h>
3495c635efSGarrett D'Amore #include <stdio.h>
3595c635efSGarrett D'Amore #include <stdlib.h>
3695c635efSGarrett D'Amore #include <string.h>
3795c635efSGarrett D'Amore #include <unistd.h>
3895c635efSGarrett D'Amore 
3995c635efSGarrett D'Amore #include "mandoc.h"
40*260e9a87SYuri Pankov #include "mandoc_aux.h"
4195c635efSGarrett D'Amore #include "libmandoc.h"
4295c635efSGarrett D'Amore #include "mdoc.h"
4395c635efSGarrett D'Amore #include "man.h"
4495c635efSGarrett D'Amore 
4595c635efSGarrett D'Amore #define	REPARSE_LIMIT	1000
4695c635efSGarrett D'Amore 
4795c635efSGarrett D'Amore struct	mparse {
4895c635efSGarrett D'Amore 	struct man	 *pman; /* persistent man parser */
4995c635efSGarrett D'Amore 	struct mdoc	 *pmdoc; /* persistent mdoc parser */
5095c635efSGarrett D'Amore 	struct man	 *man; /* man parser */
5195c635efSGarrett D'Amore 	struct mdoc	 *mdoc; /* mdoc parser */
5295c635efSGarrett D'Amore 	struct roff	 *roff; /* roff parser (!NULL) */
53*260e9a87SYuri Pankov 	const struct mchars *mchars; /* character table */
54*260e9a87SYuri Pankov 	char		 *sodest; /* filename pointed to by .so */
55*260e9a87SYuri Pankov 	const char	 *file; /* filename of current input file */
56*260e9a87SYuri Pankov 	struct buf	 *primary; /* buffer currently being parsed */
57*260e9a87SYuri Pankov 	struct buf	 *secondary; /* preprocessed copy of input */
58*260e9a87SYuri Pankov 	const char	 *defos; /* default operating system */
5995c635efSGarrett D'Amore 	mandocmsg	  mmsg; /* warning/error message handler */
60*260e9a87SYuri Pankov 	enum mandoclevel  file_status; /* status of current parse */
61*260e9a87SYuri Pankov 	enum mandoclevel  wlevel; /* ignore messages below this */
62*260e9a87SYuri Pankov 	int		  options; /* parser options */
63*260e9a87SYuri Pankov 	int		  filenc; /* encoding of the current file */
64*260e9a87SYuri Pankov 	int		  reparse_count; /* finite interp. stack */
65*260e9a87SYuri Pankov 	int		  line; /* line number in the file */
66*260e9a87SYuri Pankov 	pid_t		  child; /* the gunzip(1) process */
6795c635efSGarrett D'Amore };
6895c635efSGarrett D'Amore 
69*260e9a87SYuri Pankov static	void	  choose_parser(struct mparse *);
7095c635efSGarrett D'Amore static	void	  resize_buf(struct buf *, size_t);
71*260e9a87SYuri Pankov static	void	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
72*260e9a87SYuri Pankov static	int	  read_whole_file(struct mparse *, const char *, int,
73*260e9a87SYuri Pankov 				struct buf *, int *);
7495c635efSGarrett D'Amore static	void	  mparse_end(struct mparse *);
75698f87a4SGarrett D'Amore static	void	  mparse_parse_buffer(struct mparse *, struct buf,
76698f87a4SGarrett D'Amore 			const char *);
7795c635efSGarrett D'Amore 
7895c635efSGarrett D'Amore static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
7995c635efSGarrett D'Amore 	MANDOCERR_OK,
8095c635efSGarrett D'Amore 	MANDOCERR_WARNING,
8195c635efSGarrett D'Amore 	MANDOCERR_WARNING,
8295c635efSGarrett D'Amore 	MANDOCERR_ERROR,
83*260e9a87SYuri Pankov 	MANDOCERR_UNSUPP,
8495c635efSGarrett D'Amore 	MANDOCERR_MAX,
8595c635efSGarrett D'Amore 	MANDOCERR_MAX
8695c635efSGarrett D'Amore };
8795c635efSGarrett D'Amore 
8895c635efSGarrett D'Amore static	const char * const	mandocerrs[MANDOCERR_MAX] = {
8995c635efSGarrett D'Amore 	"ok",
9095c635efSGarrett D'Amore 
9195c635efSGarrett D'Amore 	"generic warning",
9295c635efSGarrett D'Amore 
9395c635efSGarrett D'Amore 	/* related to the prologue */
94*260e9a87SYuri Pankov 	"missing manual title, using UNTITLED",
95*260e9a87SYuri Pankov 	"missing manual title, using \"\"",
96*260e9a87SYuri Pankov 	"lower case character in document title",
97*260e9a87SYuri Pankov 	"missing manual section, using \"\"",
9895c635efSGarrett D'Amore 	"unknown manual section",
99*260e9a87SYuri Pankov 	"missing date, using today's date",
10095c635efSGarrett D'Amore 	"cannot parse date, using it verbatim",
101*260e9a87SYuri Pankov 	"missing Os macro, using \"\"",
10295c635efSGarrett D'Amore 	"duplicate prologue macro",
103*260e9a87SYuri Pankov 	"late prologue macro",
104*260e9a87SYuri Pankov 	"skipping late title macro",
105*260e9a87SYuri Pankov 	"prologue macros out of order",
10695c635efSGarrett D'Amore 
10795c635efSGarrett D'Amore 	/* related to document structure */
10895c635efSGarrett D'Amore 	".so is fragile, better use ln(1)",
109*260e9a87SYuri Pankov 	"no document body",
110*260e9a87SYuri Pankov 	"content before first section header",
111*260e9a87SYuri Pankov 	"first section is not \"NAME\"",
112*260e9a87SYuri Pankov 	"NAME section without name",
113*260e9a87SYuri Pankov 	"NAME section without description",
114*260e9a87SYuri Pankov 	"description not at the end of NAME",
115*260e9a87SYuri Pankov 	"bad NAME section content",
116*260e9a87SYuri Pankov 	"missing description line, using \"\"",
11795c635efSGarrett D'Amore 	"sections out of conventional order",
118*260e9a87SYuri Pankov 	"duplicate section title",
119*260e9a87SYuri Pankov 	"unexpected section",
120*260e9a87SYuri Pankov 	"unusual Xr order",
121*260e9a87SYuri Pankov 	"unusual Xr punctuation",
122*260e9a87SYuri Pankov 	"AUTHORS section without An macro",
12395c635efSGarrett D'Amore 
12495c635efSGarrett D'Amore 	/* related to macros and nesting */
125*260e9a87SYuri Pankov 	"obsolete macro",
126*260e9a87SYuri Pankov 	"macro neither callable nor escaped",
12795c635efSGarrett D'Amore 	"skipping paragraph macro",
128698f87a4SGarrett D'Amore 	"moving paragraph macro out of list",
12995c635efSGarrett D'Amore 	"skipping no-space macro",
13095c635efSGarrett D'Amore 	"blocks badly nested",
13195c635efSGarrett D'Amore 	"nested displays are not portable",
132*260e9a87SYuri Pankov 	"moving content out of list",
133*260e9a87SYuri Pankov 	".Vt block has child macro",
134*260e9a87SYuri Pankov 	"fill mode already enabled, skipping",
135*260e9a87SYuri Pankov 	"fill mode already disabled, skipping",
13695c635efSGarrett D'Amore 	"line scope broken",
13795c635efSGarrett D'Amore 
13895c635efSGarrett D'Amore 	/* related to missing macro arguments */
139*260e9a87SYuri Pankov 	"skipping empty request",
140*260e9a87SYuri Pankov 	"conditional request controls empty scope",
14195c635efSGarrett D'Amore 	"skipping empty macro",
142*260e9a87SYuri Pankov 	"empty block",
143*260e9a87SYuri Pankov 	"empty argument, using 0n",
144*260e9a87SYuri Pankov 	"missing display type, using -ragged",
145*260e9a87SYuri Pankov 	"list type is not the first argument",
146*260e9a87SYuri Pankov 	"missing -width in -tag list, using 8n",
147*260e9a87SYuri Pankov 	"missing utility name, using \"\"",
148*260e9a87SYuri Pankov 	"missing function name, using \"\"",
149*260e9a87SYuri Pankov 	"empty head in list item",
150*260e9a87SYuri Pankov 	"empty list item",
151*260e9a87SYuri Pankov 	"missing font type, using \\fR",
152*260e9a87SYuri Pankov 	"unknown font type, using \\fR",
153*260e9a87SYuri Pankov 	"nothing follows prefix",
154*260e9a87SYuri Pankov 	"empty reference block",
155*260e9a87SYuri Pankov 	"missing -std argument, adding it",
156*260e9a87SYuri Pankov 	"missing option string, using \"\"",
157*260e9a87SYuri Pankov 	"missing resource identifier, using \"\"",
158*260e9a87SYuri Pankov 	"missing eqn box, using \"\"",
15995c635efSGarrett D'Amore 
16095c635efSGarrett D'Amore 	/* related to bad macro arguments */
161*260e9a87SYuri Pankov 	"unterminated quoted argument",
16295c635efSGarrett D'Amore 	"duplicate argument",
163*260e9a87SYuri Pankov 	"skipping duplicate argument",
164*260e9a87SYuri Pankov 	"skipping duplicate display type",
165*260e9a87SYuri Pankov 	"skipping duplicate list type",
166*260e9a87SYuri Pankov 	"skipping -width argument",
167*260e9a87SYuri Pankov 	"wrong number of cells",
16895c635efSGarrett D'Amore 	"unknown AT&T UNIX version",
169*260e9a87SYuri Pankov 	"comma in function argument",
170*260e9a87SYuri Pankov 	"parenthesis in function name",
171*260e9a87SYuri Pankov 	"invalid content in Rs block",
172*260e9a87SYuri Pankov 	"invalid Boolean argument",
173*260e9a87SYuri Pankov 	"unknown font, skipping request",
174*260e9a87SYuri Pankov 	"odd number of characters in request",
17595c635efSGarrett D'Amore 
17695c635efSGarrett D'Amore 	/* related to plain text */
177*260e9a87SYuri Pankov 	"blank line in fill mode, using .sp",
178*260e9a87SYuri Pankov 	"tab in filled text",
179*260e9a87SYuri Pankov 	"whitespace at end of input line",
18095c635efSGarrett D'Amore 	"bad comment style",
181*260e9a87SYuri Pankov 	"invalid escape sequence",
182*260e9a87SYuri Pankov 	"undefined string, using \"\"",
18395c635efSGarrett D'Amore 
184*260e9a87SYuri Pankov 	/* related to tables */
185*260e9a87SYuri Pankov 	"tbl line starts with span",
186*260e9a87SYuri Pankov 	"tbl column starts with span",
187*260e9a87SYuri Pankov 	"skipping vertical bar in tbl layout",
18895c635efSGarrett D'Amore 
18995c635efSGarrett D'Amore 	"generic error",
19095c635efSGarrett D'Amore 
19195c635efSGarrett D'Amore 	/* related to tables */
192*260e9a87SYuri Pankov 	"non-alphabetic character in tbl options",
193*260e9a87SYuri Pankov 	"skipping unknown tbl option",
194*260e9a87SYuri Pankov 	"missing tbl option argument",
195*260e9a87SYuri Pankov 	"wrong tbl option argument size",
196*260e9a87SYuri Pankov 	"empty tbl layout",
197*260e9a87SYuri Pankov 	"invalid character in tbl layout",
198*260e9a87SYuri Pankov 	"unmatched parenthesis in tbl layout",
199*260e9a87SYuri Pankov 	"tbl without any data cells",
200*260e9a87SYuri Pankov 	"ignoring data in spanned tbl cell",
201*260e9a87SYuri Pankov 	"ignoring extra tbl data cells",
202*260e9a87SYuri Pankov 	"data block open at end of tbl",
20395c635efSGarrett D'Amore 
204*260e9a87SYuri Pankov 	/* related to document structure and macros */
205*260e9a87SYuri Pankov 	NULL,
20695c635efSGarrett D'Amore 	"input stack limit exceeded, infinite loop?",
20795c635efSGarrett D'Amore 	"skipping bad character",
20895c635efSGarrett D'Amore 	"skipping unknown macro",
209*260e9a87SYuri Pankov 	"skipping insecure request",
210*260e9a87SYuri Pankov 	"skipping item outside list",
211698f87a4SGarrett D'Amore 	"skipping column outside column list",
21295c635efSGarrett D'Amore 	"skipping end of block that is not open",
213*260e9a87SYuri Pankov 	"fewer RS blocks open, skipping",
214*260e9a87SYuri Pankov 	"inserting missing end of block",
215*260e9a87SYuri Pankov 	"appending missing end of block",
21695c635efSGarrett D'Amore 
217*260e9a87SYuri Pankov 	/* related to request and macro arguments */
218*260e9a87SYuri Pankov 	"escaped character not allowed in a name",
219*260e9a87SYuri Pankov 	"NOT IMPLEMENTED: Bd -file",
220*260e9a87SYuri Pankov 	"missing list type, using -item",
221*260e9a87SYuri Pankov 	"missing manual name, using \"\"",
222*260e9a87SYuri Pankov 	"uname(3) system call failed, using UNKNOWN",
223*260e9a87SYuri Pankov 	"unknown standard specifier",
224*260e9a87SYuri Pankov 	"skipping request without numeric argument",
22595c635efSGarrett D'Amore 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
226*260e9a87SYuri Pankov 	".so request failed",
227*260e9a87SYuri Pankov 	"skipping all arguments",
228*260e9a87SYuri Pankov 	"skipping excess arguments",
229*260e9a87SYuri Pankov 	"divide by zero",
230*260e9a87SYuri Pankov 
231*260e9a87SYuri Pankov 	"unsupported feature",
232*260e9a87SYuri Pankov 	"input too large",
233*260e9a87SYuri Pankov 	"unsupported control character",
234*260e9a87SYuri Pankov 	"unsupported roff request",
235*260e9a87SYuri Pankov 	"eqn delim option in tbl",
236*260e9a87SYuri Pankov 	"unsupported tbl layout modifier",
237*260e9a87SYuri Pankov 	"ignoring macro in table",
23895c635efSGarrett D'Amore };
23995c635efSGarrett D'Amore 
24095c635efSGarrett D'Amore static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
24195c635efSGarrett D'Amore 	"SUCCESS",
24295c635efSGarrett D'Amore 	"RESERVED",
24395c635efSGarrett D'Amore 	"WARNING",
24495c635efSGarrett D'Amore 	"ERROR",
245*260e9a87SYuri Pankov 	"UNSUPP",
24695c635efSGarrett D'Amore 	"BADARG",
24795c635efSGarrett D'Amore 	"SYSERR"
24895c635efSGarrett D'Amore };
24995c635efSGarrett D'Amore 
250*260e9a87SYuri Pankov 
25195c635efSGarrett D'Amore static void
resize_buf(struct buf * buf,size_t initial)25295c635efSGarrett D'Amore resize_buf(struct buf *buf, size_t initial)
25395c635efSGarrett D'Amore {
25495c635efSGarrett D'Amore 
25595c635efSGarrett D'Amore 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
25695c635efSGarrett D'Amore 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
25795c635efSGarrett D'Amore }
25895c635efSGarrett D'Amore 
25995c635efSGarrett D'Amore static void
choose_parser(struct mparse * curp)260*260e9a87SYuri Pankov choose_parser(struct mparse *curp)
26195c635efSGarrett D'Amore {
262*260e9a87SYuri Pankov 	char		*cp, *ep;
263*260e9a87SYuri Pankov 	int		 format;
26495c635efSGarrett D'Amore 
26595c635efSGarrett D'Amore 	/*
266*260e9a87SYuri Pankov 	 * If neither command line arguments -mdoc or -man select
267*260e9a87SYuri Pankov 	 * a parser nor the roff parser found a .Dd or .TH macro
268*260e9a87SYuri Pankov 	 * yet, look ahead in the main input buffer.
26995c635efSGarrett D'Amore 	 */
27095c635efSGarrett D'Amore 
271*260e9a87SYuri Pankov 	if ((format = roff_getformat(curp->roff)) == 0) {
272*260e9a87SYuri Pankov 		cp = curp->primary->buf;
273*260e9a87SYuri Pankov 		ep = cp + curp->primary->sz;
274*260e9a87SYuri Pankov 		while (cp < ep) {
275*260e9a87SYuri Pankov 			if (*cp == '.' || *cp == '\'') {
276*260e9a87SYuri Pankov 				cp++;
277*260e9a87SYuri Pankov 				if (cp[0] == 'D' && cp[1] == 'd') {
278*260e9a87SYuri Pankov 					format = MPARSE_MDOC;
27995c635efSGarrett D'Amore 					break;
28095c635efSGarrett D'Amore 				}
281*260e9a87SYuri Pankov 				if (cp[0] == 'T' && cp[1] == 'H') {
282*260e9a87SYuri Pankov 					format = MPARSE_MAN;
283*260e9a87SYuri Pankov 					break;
284*260e9a87SYuri Pankov 				}
285*260e9a87SYuri Pankov 			}
286*260e9a87SYuri Pankov 			cp = memchr(cp, '\n', ep - cp);
287*260e9a87SYuri Pankov 			if (cp == NULL)
288*260e9a87SYuri Pankov 				break;
289*260e9a87SYuri Pankov 			cp++;
290*260e9a87SYuri Pankov 		}
291*260e9a87SYuri Pankov 	}
29295c635efSGarrett D'Amore 
293*260e9a87SYuri Pankov 	if (format == MPARSE_MDOC) {
29495c635efSGarrett D'Amore 		if (NULL == curp->pmdoc)
295*260e9a87SYuri Pankov 			curp->pmdoc = mdoc_alloc(
296*260e9a87SYuri Pankov 			    curp->roff, curp, curp->defos,
297*260e9a87SYuri Pankov 			    MPARSE_QUICK & curp->options ? 1 : 0);
29895c635efSGarrett D'Amore 		assert(curp->pmdoc);
29995c635efSGarrett D'Amore 		curp->mdoc = curp->pmdoc;
30095c635efSGarrett D'Amore 		return;
30195c635efSGarrett D'Amore 	}
30295c635efSGarrett D'Amore 
303*260e9a87SYuri Pankov 	/* Fall back to man(7) as a last resort. */
304*260e9a87SYuri Pankov 
30595c635efSGarrett D'Amore 	if (NULL == curp->pman)
306*260e9a87SYuri Pankov 		curp->pman = man_alloc(
307*260e9a87SYuri Pankov 		    curp->roff, curp, curp->defos,
308*260e9a87SYuri Pankov 		    MPARSE_QUICK & curp->options ? 1 : 0);
30995c635efSGarrett D'Amore 	assert(curp->pman);
31095c635efSGarrett D'Amore 	curp->man = curp->pman;
31195c635efSGarrett D'Amore }
31295c635efSGarrett D'Amore 
31395c635efSGarrett D'Amore /*
314*260e9a87SYuri Pankov  * Main parse routine for a buffer.
315*260e9a87SYuri Pankov  * It assumes encoding and line numbering are already set up.
316*260e9a87SYuri Pankov  * It can recurse directly (for invocations of user-defined
317*260e9a87SYuri Pankov  * macros, inline equations, and input line traps)
318*260e9a87SYuri Pankov  * and indirectly (for .so file inclusion).
31995c635efSGarrett D'Amore  */
32095c635efSGarrett D'Amore static void
mparse_buf_r(struct mparse * curp,struct buf blk,size_t i,int start)321*260e9a87SYuri Pankov mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
32295c635efSGarrett D'Amore {
32395c635efSGarrett D'Amore 	const struct tbl_span	*span;
32495c635efSGarrett D'Amore 	struct buf	 ln;
325*260e9a87SYuri Pankov 	const char	*save_file;
326*260e9a87SYuri Pankov 	char		*cp;
327*260e9a87SYuri Pankov 	size_t		 pos; /* byte number in the ln buffer */
32895c635efSGarrett D'Amore 	enum rofferr	 rr;
329*260e9a87SYuri Pankov 	int		 of;
33095c635efSGarrett D'Amore 	int		 lnn; /* line number in the real file */
331*260e9a87SYuri Pankov 	int		 fd;
332*260e9a87SYuri Pankov 	pid_t		 save_child;
33395c635efSGarrett D'Amore 	unsigned char	 c;
33495c635efSGarrett D'Amore 
335*260e9a87SYuri Pankov 	memset(&ln, 0, sizeof(ln));
33695c635efSGarrett D'Amore 
33795c635efSGarrett D'Amore 	lnn = curp->line;
33895c635efSGarrett D'Amore 	pos = 0;
33995c635efSGarrett D'Amore 
340*260e9a87SYuri Pankov 	while (i < blk.sz) {
34195c635efSGarrett D'Amore 		if (0 == pos && '\0' == blk.buf[i])
34295c635efSGarrett D'Amore 			break;
34395c635efSGarrett D'Amore 
34495c635efSGarrett D'Amore 		if (start) {
34595c635efSGarrett D'Amore 			curp->line = lnn;
34695c635efSGarrett D'Amore 			curp->reparse_count = 0;
347*260e9a87SYuri Pankov 
348*260e9a87SYuri Pankov 			if (lnn < 3 &&
349*260e9a87SYuri Pankov 			    curp->filenc & MPARSE_UTF8 &&
350*260e9a87SYuri Pankov 			    curp->filenc & MPARSE_LATIN1)
351*260e9a87SYuri Pankov 				curp->filenc = preconv_cue(&blk, i);
35295c635efSGarrett D'Amore 		}
35395c635efSGarrett D'Amore 
354*260e9a87SYuri Pankov 		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
35595c635efSGarrett D'Amore 
35695c635efSGarrett D'Amore 			/*
35795c635efSGarrett D'Amore 			 * When finding an unescaped newline character,
35895c635efSGarrett D'Amore 			 * leave the character loop to process the line.
35995c635efSGarrett D'Amore 			 * Skip a preceding carriage return, if any.
36095c635efSGarrett D'Amore 			 */
36195c635efSGarrett D'Amore 
362*260e9a87SYuri Pankov 			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
36395c635efSGarrett D'Amore 			    '\n' == blk.buf[i + 1])
36495c635efSGarrett D'Amore 				++i;
36595c635efSGarrett D'Amore 			if ('\n' == blk.buf[i]) {
36695c635efSGarrett D'Amore 				++i;
36795c635efSGarrett D'Amore 				++lnn;
36895c635efSGarrett D'Amore 				break;
36995c635efSGarrett D'Amore 			}
37095c635efSGarrett D'Amore 
37195c635efSGarrett D'Amore 			/*
372*260e9a87SYuri Pankov 			 * Make sure we have space for the worst
373*260e9a87SYuri Pankov 			 * case of 11 bytes: "\\[u10ffff]\0"
374698f87a4SGarrett D'Amore 			 */
375698f87a4SGarrett D'Amore 
376*260e9a87SYuri Pankov 			if (pos + 11 > ln.sz)
377698f87a4SGarrett D'Amore 				resize_buf(&ln, 256);
378698f87a4SGarrett D'Amore 
379698f87a4SGarrett D'Amore 			/*
380*260e9a87SYuri Pankov 			 * Encode 8-bit input.
38195c635efSGarrett D'Amore 			 */
38295c635efSGarrett D'Amore 
383*260e9a87SYuri Pankov 			c = blk.buf[i];
384*260e9a87SYuri Pankov 			if (c & 0x80) {
385*260e9a87SYuri Pankov 				if ( ! (curp->filenc && preconv_encode(
386*260e9a87SYuri Pankov 				    &blk, &i, &ln, &pos, &curp->filenc))) {
387*260e9a87SYuri Pankov 					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
388*260e9a87SYuri Pankov 					    curp->line, pos, "0x%x", c);
389*260e9a87SYuri Pankov 					ln.buf[pos++] = '?';
39095c635efSGarrett D'Amore 					i++;
391*260e9a87SYuri Pankov 				}
392*260e9a87SYuri Pankov 				continue;
393*260e9a87SYuri Pankov 			}
394*260e9a87SYuri Pankov 
395*260e9a87SYuri Pankov 			/*
396*260e9a87SYuri Pankov 			 * Exclude control characters.
397*260e9a87SYuri Pankov 			 */
398*260e9a87SYuri Pankov 
399*260e9a87SYuri Pankov 			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
400*260e9a87SYuri Pankov 				mandoc_vmsg(c == 0x00 || c == 0x04 ||
401*260e9a87SYuri Pankov 				    c > 0x0a ? MANDOCERR_CHAR_BAD :
402*260e9a87SYuri Pankov 				    MANDOCERR_CHAR_UNSUPP,
403*260e9a87SYuri Pankov 				    curp, curp->line, pos, "0x%x", c);
404*260e9a87SYuri Pankov 				i++;
405*260e9a87SYuri Pankov 				if (c != '\r')
40695c635efSGarrett D'Amore 					ln.buf[pos++] = '?';
40795c635efSGarrett D'Amore 				continue;
40895c635efSGarrett D'Amore 			}
40995c635efSGarrett D'Amore 
41095c635efSGarrett D'Amore 			/* Trailing backslash = a plain char. */
41195c635efSGarrett D'Amore 
412*260e9a87SYuri Pankov 			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
41395c635efSGarrett D'Amore 				ln.buf[pos++] = blk.buf[i++];
41495c635efSGarrett D'Amore 				continue;
41595c635efSGarrett D'Amore 			}
41695c635efSGarrett D'Amore 
41795c635efSGarrett D'Amore 			/*
41895c635efSGarrett D'Amore 			 * Found escape and at least one other character.
41995c635efSGarrett D'Amore 			 * When it's a newline character, skip it.
42095c635efSGarrett D'Amore 			 * When there is a carriage return in between,
42195c635efSGarrett D'Amore 			 * skip that one as well.
42295c635efSGarrett D'Amore 			 */
42395c635efSGarrett D'Amore 
424*260e9a87SYuri Pankov 			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
42595c635efSGarrett D'Amore 			    '\n' == blk.buf[i + 2])
42695c635efSGarrett D'Amore 				++i;
42795c635efSGarrett D'Amore 			if ('\n' == blk.buf[i + 1]) {
42895c635efSGarrett D'Amore 				i += 2;
42995c635efSGarrett D'Amore 				++lnn;
43095c635efSGarrett D'Amore 				continue;
43195c635efSGarrett D'Amore 			}
43295c635efSGarrett D'Amore 
43395c635efSGarrett D'Amore 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
43495c635efSGarrett D'Amore 				i += 2;
43595c635efSGarrett D'Amore 				/* Comment, skip to end of line */
436*260e9a87SYuri Pankov 				for (; i < blk.sz; ++i) {
43795c635efSGarrett D'Amore 					if ('\n' == blk.buf[i]) {
43895c635efSGarrett D'Amore 						++i;
43995c635efSGarrett D'Amore 						++lnn;
44095c635efSGarrett D'Amore 						break;
44195c635efSGarrett D'Amore 					}
44295c635efSGarrett D'Amore 				}
44395c635efSGarrett D'Amore 
44495c635efSGarrett D'Amore 				/* Backout trailing whitespaces */
44595c635efSGarrett D'Amore 				for (; pos > 0; --pos) {
44695c635efSGarrett D'Amore 					if (ln.buf[pos - 1] != ' ')
44795c635efSGarrett D'Amore 						break;
44895c635efSGarrett D'Amore 					if (pos > 2 && ln.buf[pos - 2] == '\\')
44995c635efSGarrett D'Amore 						break;
45095c635efSGarrett D'Amore 				}
45195c635efSGarrett D'Amore 				break;
45295c635efSGarrett D'Amore 			}
45395c635efSGarrett D'Amore 
454698f87a4SGarrett D'Amore 			/* Catch escaped bogus characters. */
45595c635efSGarrett D'Amore 
456698f87a4SGarrett D'Amore 			c = (unsigned char) blk.buf[i+1];
457698f87a4SGarrett D'Amore 
458698f87a4SGarrett D'Amore 			if ( ! (isascii(c) &&
459698f87a4SGarrett D'Amore 			    (isgraph(c) || isblank(c)))) {
460*260e9a87SYuri Pankov 				mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
461*260e9a87SYuri Pankov 				    curp->line, pos, "0x%x", c);
462698f87a4SGarrett D'Amore 				i += 2;
463698f87a4SGarrett D'Amore 				ln.buf[pos++] = '?';
464698f87a4SGarrett D'Amore 				continue;
465698f87a4SGarrett D'Amore 			}
466698f87a4SGarrett D'Amore 
467698f87a4SGarrett D'Amore 			/* Some other escape sequence, copy & cont. */
46895c635efSGarrett D'Amore 
46995c635efSGarrett D'Amore 			ln.buf[pos++] = blk.buf[i++];
47095c635efSGarrett D'Amore 			ln.buf[pos++] = blk.buf[i++];
47195c635efSGarrett D'Amore 		}
47295c635efSGarrett D'Amore 
473*260e9a87SYuri Pankov 		if (pos >= ln.sz)
47495c635efSGarrett D'Amore 			resize_buf(&ln, 256);
47595c635efSGarrett D'Amore 
47695c635efSGarrett D'Amore 		ln.buf[pos] = '\0';
47795c635efSGarrett D'Amore 
47895c635efSGarrett D'Amore 		/*
47995c635efSGarrett D'Amore 		 * A significant amount of complexity is contained by
48095c635efSGarrett D'Amore 		 * the roff preprocessor.  It's line-oriented but can be
48195c635efSGarrett D'Amore 		 * expressed on one line, so we need at times to
48295c635efSGarrett D'Amore 		 * readjust our starting point and re-run it.  The roff
48395c635efSGarrett D'Amore 		 * preprocessor can also readjust the buffers with new
48495c635efSGarrett D'Amore 		 * data, so we pass them in wholesale.
48595c635efSGarrett D'Amore 		 */
48695c635efSGarrett D'Amore 
48795c635efSGarrett D'Amore 		of = 0;
48895c635efSGarrett D'Amore 
48995c635efSGarrett D'Amore 		/*
49095c635efSGarrett D'Amore 		 * Maintain a lookaside buffer of all parsed lines.  We
49195c635efSGarrett D'Amore 		 * only do this if mparse_keep() has been invoked (the
49295c635efSGarrett D'Amore 		 * buffer may be accessed with mparse_getkeep()).
49395c635efSGarrett D'Amore 		 */
49495c635efSGarrett D'Amore 
49595c635efSGarrett D'Amore 		if (curp->secondary) {
496*260e9a87SYuri Pankov 			curp->secondary->buf = mandoc_realloc(
497*260e9a87SYuri Pankov 			    curp->secondary->buf,
49895c635efSGarrett D'Amore 			    curp->secondary->sz + pos + 2);
49995c635efSGarrett D'Amore 			memcpy(curp->secondary->buf +
50095c635efSGarrett D'Amore 			    curp->secondary->sz,
50195c635efSGarrett D'Amore 			    ln.buf, pos);
50295c635efSGarrett D'Amore 			curp->secondary->sz += pos;
50395c635efSGarrett D'Amore 			curp->secondary->buf
50495c635efSGarrett D'Amore 				[curp->secondary->sz] = '\n';
50595c635efSGarrett D'Amore 			curp->secondary->sz++;
50695c635efSGarrett D'Amore 			curp->secondary->buf
50795c635efSGarrett D'Amore 				[curp->secondary->sz] = '\0';
50895c635efSGarrett D'Amore 		}
50995c635efSGarrett D'Amore rerun:
510*260e9a87SYuri Pankov 		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
51195c635efSGarrett D'Amore 
51295c635efSGarrett D'Amore 		switch (rr) {
513*260e9a87SYuri Pankov 		case ROFF_REPARSE:
51495c635efSGarrett D'Amore 			if (REPARSE_LIMIT >= ++curp->reparse_count)
515*260e9a87SYuri Pankov 				mparse_buf_r(curp, ln, of, 0);
51695c635efSGarrett D'Amore 			else
51795c635efSGarrett D'Amore 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
51895c635efSGarrett D'Amore 				    curp->line, pos, NULL);
51995c635efSGarrett D'Amore 			pos = 0;
52095c635efSGarrett D'Amore 			continue;
521*260e9a87SYuri Pankov 		case ROFF_APPEND:
522*260e9a87SYuri Pankov 			pos = strlen(ln.buf);
52395c635efSGarrett D'Amore 			continue;
524*260e9a87SYuri Pankov 		case ROFF_RERUN:
52595c635efSGarrett D'Amore 			goto rerun;
526*260e9a87SYuri Pankov 		case ROFF_IGN:
52795c635efSGarrett D'Amore 			pos = 0;
52895c635efSGarrett D'Amore 			continue;
529*260e9a87SYuri Pankov 		case ROFF_SO:
530*260e9a87SYuri Pankov 			if ( ! (curp->options & MPARSE_SO) &&
531*260e9a87SYuri Pankov 			    (i >= blk.sz || blk.buf[i] == '\0')) {
532*260e9a87SYuri Pankov 				curp->sodest = mandoc_strdup(ln.buf + of);
533*260e9a87SYuri Pankov 				free(ln.buf);
534*260e9a87SYuri Pankov 				return;
535*260e9a87SYuri Pankov 			}
53695c635efSGarrett D'Amore 			/*
53795c635efSGarrett D'Amore 			 * We remove `so' clauses from our lookaside
53895c635efSGarrett D'Amore 			 * buffer because we're going to descend into
53995c635efSGarrett D'Amore 			 * the file recursively.
54095c635efSGarrett D'Amore 			 */
54195c635efSGarrett D'Amore 			if (curp->secondary)
54295c635efSGarrett D'Amore 				curp->secondary->sz -= pos + 1;
543*260e9a87SYuri Pankov 			save_file = curp->file;
544*260e9a87SYuri Pankov 			save_child = curp->child;
545*260e9a87SYuri Pankov 			if (mparse_open(curp, &fd, ln.buf + of) ==
546*260e9a87SYuri Pankov 			    MANDOCLEVEL_OK) {
547*260e9a87SYuri Pankov 				mparse_readfd(curp, fd, ln.buf + of);
548*260e9a87SYuri Pankov 				curp->file = save_file;
549*260e9a87SYuri Pankov 			} else {
550*260e9a87SYuri Pankov 				curp->file = save_file;
551*260e9a87SYuri Pankov 				mandoc_vmsg(MANDOCERR_SO_FAIL,
552*260e9a87SYuri Pankov 				    curp, curp->line, pos,
553*260e9a87SYuri Pankov 				    ".so %s", ln.buf + of);
554*260e9a87SYuri Pankov 				ln.sz = mandoc_asprintf(&cp,
555*260e9a87SYuri Pankov 				    ".sp\nSee the file %s.\n.sp",
556*260e9a87SYuri Pankov 				    ln.buf + of);
557*260e9a87SYuri Pankov 				free(ln.buf);
558*260e9a87SYuri Pankov 				ln.buf = cp;
559*260e9a87SYuri Pankov 				of = 0;
560*260e9a87SYuri Pankov 				mparse_buf_r(curp, ln, of, 0);
561*260e9a87SYuri Pankov 			}
562*260e9a87SYuri Pankov 			curp->child = save_child;
56395c635efSGarrett D'Amore 			pos = 0;
56495c635efSGarrett D'Amore 			continue;
56595c635efSGarrett D'Amore 		default:
56695c635efSGarrett D'Amore 			break;
56795c635efSGarrett D'Amore 		}
56895c635efSGarrett D'Amore 
56995c635efSGarrett D'Amore 		/*
57095c635efSGarrett D'Amore 		 * If input parsers have not been allocated, do so now.
57195c635efSGarrett D'Amore 		 * We keep these instanced between parsers, but set them
57295c635efSGarrett D'Amore 		 * locally per parse routine since we can use different
57395c635efSGarrett D'Amore 		 * parsers with each one.
57495c635efSGarrett D'Amore 		 */
57595c635efSGarrett D'Amore 
57695c635efSGarrett D'Amore 		if ( ! (curp->man || curp->mdoc))
577*260e9a87SYuri Pankov 			choose_parser(curp);
57895c635efSGarrett D'Amore 
57995c635efSGarrett D'Amore 		/*
580*260e9a87SYuri Pankov 		 * Lastly, push down into the parsers themselves.
58195c635efSGarrett D'Amore 		 * If libroff returns ROFF_TBL, then add it to the
58295c635efSGarrett D'Amore 		 * currently open parse.  Since we only get here if
58395c635efSGarrett D'Amore 		 * there does exist data (see tbl_data.c), we're
58495c635efSGarrett D'Amore 		 * guaranteed that something's been allocated.
58595c635efSGarrett D'Amore 		 * Do the same for ROFF_EQN.
58695c635efSGarrett D'Amore 		 */
58795c635efSGarrett D'Amore 
588*260e9a87SYuri Pankov 		if (rr == ROFF_TBL) {
589*260e9a87SYuri Pankov 			while ((span = roff_span(curp->roff)) != NULL)
590*260e9a87SYuri Pankov 				if (curp->man == NULL)
59195c635efSGarrett D'Amore 					mdoc_addspan(curp->mdoc, span);
592*260e9a87SYuri Pankov 				else
593*260e9a87SYuri Pankov 					man_addspan(curp->man, span);
594*260e9a87SYuri Pankov 		} else if (rr == ROFF_EQN) {
595*260e9a87SYuri Pankov 			if (curp->man == NULL)
596*260e9a87SYuri Pankov 				mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
597*260e9a87SYuri Pankov 			else
598*260e9a87SYuri Pankov 				man_addeqn(curp->man, roff_eqn(curp->roff));
599*260e9a87SYuri Pankov 		} else if ((curp->man == NULL ?
600*260e9a87SYuri Pankov 		    mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
601*260e9a87SYuri Pankov 		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
60295c635efSGarrett D'Amore 				break;
60395c635efSGarrett D'Amore 
60495c635efSGarrett D'Amore 		/* Temporary buffers typically are not full. */
60595c635efSGarrett D'Amore 
60695c635efSGarrett D'Amore 		if (0 == start && '\0' == blk.buf[i])
60795c635efSGarrett D'Amore 			break;
60895c635efSGarrett D'Amore 
60995c635efSGarrett D'Amore 		/* Start the next input line. */
61095c635efSGarrett D'Amore 
61195c635efSGarrett D'Amore 		pos = 0;
61295c635efSGarrett D'Amore 	}
61395c635efSGarrett D'Amore 
61495c635efSGarrett D'Amore 	free(ln.buf);
61595c635efSGarrett D'Amore }
61695c635efSGarrett D'Amore 
61795c635efSGarrett D'Amore static int
read_whole_file(struct mparse * curp,const char * file,int fd,struct buf * fb,int * with_mmap)618*260e9a87SYuri Pankov read_whole_file(struct mparse *curp, const char *file, int fd,
619*260e9a87SYuri Pankov 		struct buf *fb, int *with_mmap)
62095c635efSGarrett D'Amore {
62195c635efSGarrett D'Amore 	size_t		 off;
62295c635efSGarrett D'Amore 	ssize_t		 ssz;
62395c635efSGarrett D'Amore 
624*260e9a87SYuri Pankov #if HAVE_MMAP
62595c635efSGarrett D'Amore 	struct stat	 st;
62695c635efSGarrett D'Amore 	if (-1 == fstat(fd, &st)) {
62795c635efSGarrett D'Amore 		perror(file);
628*260e9a87SYuri Pankov 		exit((int)MANDOCLEVEL_SYSERR);
62995c635efSGarrett D'Amore 	}
63095c635efSGarrett D'Amore 
63195c635efSGarrett D'Amore 	/*
63295c635efSGarrett D'Amore 	 * If we're a regular file, try just reading in the whole entry
63395c635efSGarrett D'Amore 	 * via mmap().  This is faster than reading it into blocks, and
63495c635efSGarrett D'Amore 	 * since each file is only a few bytes to begin with, I'm not
63595c635efSGarrett D'Amore 	 * concerned that this is going to tank any machines.
63695c635efSGarrett D'Amore 	 */
63795c635efSGarrett D'Amore 
63895c635efSGarrett D'Amore 	if (S_ISREG(st.st_mode)) {
639*260e9a87SYuri Pankov 		if (st.st_size > 0x7fffffff) {
640*260e9a87SYuri Pankov 			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
64195c635efSGarrett D'Amore 			return(0);
64295c635efSGarrett D'Amore 		}
64395c635efSGarrett D'Amore 		*with_mmap = 1;
64495c635efSGarrett D'Amore 		fb->sz = (size_t)st.st_size;
645698f87a4SGarrett D'Amore 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
64695c635efSGarrett D'Amore 		if (fb->buf != MAP_FAILED)
64795c635efSGarrett D'Amore 			return(1);
64895c635efSGarrett D'Amore 	}
64995c635efSGarrett D'Amore #endif
65095c635efSGarrett D'Amore 
65195c635efSGarrett D'Amore 	/*
65295c635efSGarrett D'Amore 	 * If this isn't a regular file (like, say, stdin), then we must
65395c635efSGarrett D'Amore 	 * go the old way and just read things in bit by bit.
65495c635efSGarrett D'Amore 	 */
65595c635efSGarrett D'Amore 
65695c635efSGarrett D'Amore 	*with_mmap = 0;
65795c635efSGarrett D'Amore 	off = 0;
65895c635efSGarrett D'Amore 	fb->sz = 0;
65995c635efSGarrett D'Amore 	fb->buf = NULL;
66095c635efSGarrett D'Amore 	for (;;) {
66195c635efSGarrett D'Amore 		if (off == fb->sz) {
66295c635efSGarrett D'Amore 			if (fb->sz == (1U << 31)) {
663*260e9a87SYuri Pankov 				mandoc_msg(MANDOCERR_TOOLARGE, curp,
664*260e9a87SYuri Pankov 				    0, 0, NULL);
66595c635efSGarrett D'Amore 				break;
66695c635efSGarrett D'Amore 			}
66795c635efSGarrett D'Amore 			resize_buf(fb, 65536);
66895c635efSGarrett D'Amore 		}
66995c635efSGarrett D'Amore 		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
67095c635efSGarrett D'Amore 		if (ssz == 0) {
67195c635efSGarrett D'Amore 			fb->sz = off;
67295c635efSGarrett D'Amore 			return(1);
67395c635efSGarrett D'Amore 		}
67495c635efSGarrett D'Amore 		if (ssz == -1) {
67595c635efSGarrett D'Amore 			perror(file);
676*260e9a87SYuri Pankov 			exit((int)MANDOCLEVEL_SYSERR);
67795c635efSGarrett D'Amore 		}
67895c635efSGarrett D'Amore 		off += (size_t)ssz;
67995c635efSGarrett D'Amore 	}
68095c635efSGarrett D'Amore 
68195c635efSGarrett D'Amore 	free(fb->buf);
68295c635efSGarrett D'Amore 	fb->buf = NULL;
68395c635efSGarrett D'Amore 	return(0);
68495c635efSGarrett D'Amore }
68595c635efSGarrett D'Amore 
68695c635efSGarrett D'Amore static void
mparse_end(struct mparse * curp)68795c635efSGarrett D'Amore mparse_end(struct mparse *curp)
68895c635efSGarrett D'Amore {
68995c635efSGarrett D'Amore 
690*260e9a87SYuri Pankov 	if (curp->mdoc == NULL &&
691*260e9a87SYuri Pankov 	    curp->man == NULL &&
692*260e9a87SYuri Pankov 	    curp->sodest == NULL) {
693*260e9a87SYuri Pankov 		if (curp->options & MPARSE_MDOC)
694*260e9a87SYuri Pankov 			curp->mdoc = curp->pmdoc;
695*260e9a87SYuri Pankov 		else {
696*260e9a87SYuri Pankov 			if (curp->pman == NULL)
697*260e9a87SYuri Pankov 				curp->pman = man_alloc(
698*260e9a87SYuri Pankov 				    curp->roff, curp, curp->defos,
699*260e9a87SYuri Pankov 				    curp->options & MPARSE_QUICK ? 1 : 0);
700*260e9a87SYuri Pankov 			curp->man = curp->pman;
70195c635efSGarrett D'Amore 		}
70295c635efSGarrett D'Amore 	}
703*260e9a87SYuri Pankov 	if (curp->mdoc)
704*260e9a87SYuri Pankov 		mdoc_endparse(curp->mdoc);
705*260e9a87SYuri Pankov 	if (curp->man)
706*260e9a87SYuri Pankov 		man_endparse(curp->man);
70795c635efSGarrett D'Amore 	roff_endparse(curp->roff);
70895c635efSGarrett D'Amore }
70995c635efSGarrett D'Amore 
71095c635efSGarrett D'Amore static void
mparse_parse_buffer(struct mparse * curp,struct buf blk,const char * file)711698f87a4SGarrett D'Amore mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
71295c635efSGarrett D'Amore {
713*260e9a87SYuri Pankov 	struct buf	*svprimary;
71495c635efSGarrett D'Amore 	const char	*svfile;
715*260e9a87SYuri Pankov 	size_t		 offset;
716698f87a4SGarrett D'Amore 	static int	 recursion_depth;
717698f87a4SGarrett D'Amore 
718698f87a4SGarrett D'Amore 	if (64 < recursion_depth) {
719698f87a4SGarrett D'Amore 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
720698f87a4SGarrett D'Amore 		return;
721698f87a4SGarrett D'Amore 	}
72295c635efSGarrett D'Amore 
72395c635efSGarrett D'Amore 	/* Line number is per-file. */
72495c635efSGarrett D'Amore 	svfile = curp->file;
72595c635efSGarrett D'Amore 	curp->file = file;
726*260e9a87SYuri Pankov 	svprimary = curp->primary;
727*260e9a87SYuri Pankov 	curp->primary = &blk;
72895c635efSGarrett D'Amore 	curp->line = 1;
729698f87a4SGarrett D'Amore 	recursion_depth++;
73095c635efSGarrett D'Amore 
731*260e9a87SYuri Pankov 	/* Skip an UTF-8 byte order mark. */
732*260e9a87SYuri Pankov 	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
733*260e9a87SYuri Pankov 	    (unsigned char)blk.buf[0] == 0xef &&
734*260e9a87SYuri Pankov 	    (unsigned char)blk.buf[1] == 0xbb &&
735*260e9a87SYuri Pankov 	    (unsigned char)blk.buf[2] == 0xbf) {
736*260e9a87SYuri Pankov 		offset = 3;
737*260e9a87SYuri Pankov 		curp->filenc &= ~MPARSE_LATIN1;
738*260e9a87SYuri Pankov 	} else
739*260e9a87SYuri Pankov 		offset = 0;
74095c635efSGarrett D'Amore 
741*260e9a87SYuri Pankov 	mparse_buf_r(curp, blk, offset, 1);
742*260e9a87SYuri Pankov 
743*260e9a87SYuri Pankov 	if (--recursion_depth == 0)
74495c635efSGarrett D'Amore 		mparse_end(curp);
74595c635efSGarrett D'Amore 
746*260e9a87SYuri Pankov 	curp->primary = svprimary;
74795c635efSGarrett D'Amore 	curp->file = svfile;
74895c635efSGarrett D'Amore }
74995c635efSGarrett D'Amore 
75095c635efSGarrett D'Amore enum mandoclevel
mparse_readmem(struct mparse * curp,void * buf,size_t len,const char * file)751*260e9a87SYuri Pankov mparse_readmem(struct mparse *curp, void *buf, size_t len,
75295c635efSGarrett D'Amore 		const char *file)
75395c635efSGarrett D'Amore {
75495c635efSGarrett D'Amore 	struct buf blk;
75595c635efSGarrett D'Amore 
756*260e9a87SYuri Pankov 	blk.buf = buf;
75795c635efSGarrett D'Amore 	blk.sz = len;
75895c635efSGarrett D'Amore 
759698f87a4SGarrett D'Amore 	mparse_parse_buffer(curp, blk, file);
76095c635efSGarrett D'Amore 	return(curp->file_status);
76195c635efSGarrett D'Amore }
76295c635efSGarrett D'Amore 
763*260e9a87SYuri Pankov /*
764*260e9a87SYuri Pankov  * Read the whole file into memory and call the parsers.
765*260e9a87SYuri Pankov  * Called recursively when an .so request is encountered.
766*260e9a87SYuri Pankov  */
767698f87a4SGarrett D'Amore enum mandoclevel
mparse_readfd(struct mparse * curp,int fd,const char * file)768698f87a4SGarrett D'Amore mparse_readfd(struct mparse *curp, int fd, const char *file)
76995c635efSGarrett D'Amore {
77095c635efSGarrett D'Amore 	struct buf	 blk;
77195c635efSGarrett D'Amore 	int		 with_mmap;
772*260e9a87SYuri Pankov 	int		 save_filenc;
77395c635efSGarrett D'Amore 
774*260e9a87SYuri Pankov 	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
775*260e9a87SYuri Pankov 		save_filenc = curp->filenc;
776*260e9a87SYuri Pankov 		curp->filenc = curp->options &
777*260e9a87SYuri Pankov 		    (MPARSE_UTF8 | MPARSE_LATIN1);
778698f87a4SGarrett D'Amore 		mparse_parse_buffer(curp, blk, file);
779*260e9a87SYuri Pankov 		curp->filenc = save_filenc;
780*260e9a87SYuri Pankov #if HAVE_MMAP
78195c635efSGarrett D'Amore 		if (with_mmap)
78295c635efSGarrett D'Amore 			munmap(blk.buf, blk.sz);
78395c635efSGarrett D'Amore 		else
78495c635efSGarrett D'Amore #endif
78595c635efSGarrett D'Amore 			free(blk.buf);
786*260e9a87SYuri Pankov 	}
78795c635efSGarrett D'Amore 
788*260e9a87SYuri Pankov 	if (fd != STDIN_FILENO && close(fd) == -1)
78995c635efSGarrett D'Amore 		perror(file);
790*260e9a87SYuri Pankov 
791*260e9a87SYuri Pankov 	mparse_wait(curp);
79295c635efSGarrett D'Amore 	return(curp->file_status);
79395c635efSGarrett D'Amore }
79495c635efSGarrett D'Amore 
795*260e9a87SYuri Pankov enum mandoclevel
mparse_open(struct mparse * curp,int * fd,const char * file)796*260e9a87SYuri Pankov mparse_open(struct mparse *curp, int *fd, const char *file)
797*260e9a87SYuri Pankov {
798*260e9a87SYuri Pankov 	int		  pfd[2];
799*260e9a87SYuri Pankov 	int		  save_errno;
800*260e9a87SYuri Pankov 	char		 *cp;
801*260e9a87SYuri Pankov 
802*260e9a87SYuri Pankov 	curp->file = file;
803*260e9a87SYuri Pankov 
804*260e9a87SYuri Pankov 	/* Unless zipped, try to just open the file. */
805*260e9a87SYuri Pankov 
806*260e9a87SYuri Pankov 	if ((cp = strrchr(file, '.')) == NULL ||
807*260e9a87SYuri Pankov 	    strcmp(cp + 1, "gz")) {
808*260e9a87SYuri Pankov 		curp->child = 0;
809*260e9a87SYuri Pankov 		if ((*fd = open(file, O_RDONLY)) != -1)
810*260e9a87SYuri Pankov 			return(MANDOCLEVEL_OK);
811*260e9a87SYuri Pankov 
812*260e9a87SYuri Pankov 		/* Open failed; try to append ".gz". */
813*260e9a87SYuri Pankov 
814*260e9a87SYuri Pankov 		mandoc_asprintf(&cp, "%s.gz", file);
815*260e9a87SYuri Pankov 		file = cp;
816*260e9a87SYuri Pankov 	} else
817*260e9a87SYuri Pankov 		cp = NULL;
818*260e9a87SYuri Pankov 
819*260e9a87SYuri Pankov 	/* Before forking, make sure the file can be read. */
820*260e9a87SYuri Pankov 
821*260e9a87SYuri Pankov 	save_errno = errno;
822*260e9a87SYuri Pankov 	if (access(file, R_OK) == -1) {
823*260e9a87SYuri Pankov 		if (cp != NULL)
824*260e9a87SYuri Pankov 			errno = save_errno;
825*260e9a87SYuri Pankov 		free(cp);
826*260e9a87SYuri Pankov 		*fd = -1;
827*260e9a87SYuri Pankov 		curp->child = 0;
828*260e9a87SYuri Pankov 		mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
829*260e9a87SYuri Pankov 		return(MANDOCLEVEL_ERROR);
830*260e9a87SYuri Pankov 	}
831*260e9a87SYuri Pankov 
832*260e9a87SYuri Pankov 	/* Run gunzip(1). */
833*260e9a87SYuri Pankov 
834*260e9a87SYuri Pankov 	if (pipe(pfd) == -1) {
835*260e9a87SYuri Pankov 		perror("pipe");
836*260e9a87SYuri Pankov 		exit((int)MANDOCLEVEL_SYSERR);
837*260e9a87SYuri Pankov 	}
838*260e9a87SYuri Pankov 
839*260e9a87SYuri Pankov 	switch (curp->child = fork()) {
840*260e9a87SYuri Pankov 	case -1:
841*260e9a87SYuri Pankov 		perror("fork");
842*260e9a87SYuri Pankov 		exit((int)MANDOCLEVEL_SYSERR);
843*260e9a87SYuri Pankov 	case 0:
844*260e9a87SYuri Pankov 		close(pfd[0]);
845*260e9a87SYuri Pankov 		if (dup2(pfd[1], STDOUT_FILENO) == -1) {
846*260e9a87SYuri Pankov 			perror("dup");
847*260e9a87SYuri Pankov 			exit((int)MANDOCLEVEL_SYSERR);
848*260e9a87SYuri Pankov 		}
849*260e9a87SYuri Pankov 		execlp("gunzip", "gunzip", "-c", file, NULL);
850*260e9a87SYuri Pankov 		perror("exec");
851*260e9a87SYuri Pankov 		exit((int)MANDOCLEVEL_SYSERR);
852*260e9a87SYuri Pankov 	default:
853*260e9a87SYuri Pankov 		close(pfd[1]);
854*260e9a87SYuri Pankov 		*fd = pfd[0];
855*260e9a87SYuri Pankov 		return(MANDOCLEVEL_OK);
856*260e9a87SYuri Pankov 	}
857*260e9a87SYuri Pankov }
858*260e9a87SYuri Pankov 
859*260e9a87SYuri Pankov enum mandoclevel
mparse_wait(struct mparse * curp)860*260e9a87SYuri Pankov mparse_wait(struct mparse *curp)
861*260e9a87SYuri Pankov {
862*260e9a87SYuri Pankov 	int	  status;
863*260e9a87SYuri Pankov 
864*260e9a87SYuri Pankov 	if (curp->child == 0)
865*260e9a87SYuri Pankov 		return(MANDOCLEVEL_OK);
866*260e9a87SYuri Pankov 
867*260e9a87SYuri Pankov 	if (waitpid(curp->child, &status, 0) == -1) {
868*260e9a87SYuri Pankov 		perror("wait");
869*260e9a87SYuri Pankov 		exit((int)MANDOCLEVEL_SYSERR);
870*260e9a87SYuri Pankov 	}
871*260e9a87SYuri Pankov 	curp->child = 0;
872*260e9a87SYuri Pankov 	if (WIFSIGNALED(status)) {
873*260e9a87SYuri Pankov 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
874*260e9a87SYuri Pankov 		    "gunzip died from signal %d", WTERMSIG(status));
875*260e9a87SYuri Pankov 		return(MANDOCLEVEL_ERROR);
876*260e9a87SYuri Pankov 	}
877*260e9a87SYuri Pankov 	if (WEXITSTATUS(status)) {
878*260e9a87SYuri Pankov 		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
879*260e9a87SYuri Pankov 		    "gunzip failed with code %d", WEXITSTATUS(status));
880*260e9a87SYuri Pankov 		return(MANDOCLEVEL_ERROR);
881*260e9a87SYuri Pankov 	}
882*260e9a87SYuri Pankov 	return(MANDOCLEVEL_OK);
883*260e9a87SYuri Pankov }
884*260e9a87SYuri Pankov 
88595c635efSGarrett D'Amore struct mparse *
mparse_alloc(int options,enum mandoclevel wlevel,mandocmsg mmsg,const struct mchars * mchars,const char * defos)886*260e9a87SYuri Pankov mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
887*260e9a87SYuri Pankov     const struct mchars *mchars, const char *defos)
88895c635efSGarrett D'Amore {
88995c635efSGarrett D'Amore 	struct mparse	*curp;
89095c635efSGarrett D'Amore 
89195c635efSGarrett D'Amore 	curp = mandoc_calloc(1, sizeof(struct mparse));
89295c635efSGarrett D'Amore 
893*260e9a87SYuri Pankov 	curp->options = options;
89495c635efSGarrett D'Amore 	curp->wlevel = wlevel;
89595c635efSGarrett D'Amore 	curp->mmsg = mmsg;
896698f87a4SGarrett D'Amore 	curp->defos = defos;
89795c635efSGarrett D'Amore 
898*260e9a87SYuri Pankov 	curp->mchars = mchars;
899*260e9a87SYuri Pankov 	curp->roff = roff_alloc(curp, curp->mchars, options);
900*260e9a87SYuri Pankov 	if (curp->options & MPARSE_MDOC)
901*260e9a87SYuri Pankov 		curp->pmdoc = mdoc_alloc(
902*260e9a87SYuri Pankov 		    curp->roff, curp, curp->defos,
903*260e9a87SYuri Pankov 		    curp->options & MPARSE_QUICK ? 1 : 0);
904*260e9a87SYuri Pankov 	if (curp->options & MPARSE_MAN)
905*260e9a87SYuri Pankov 		curp->pman = man_alloc(
906*260e9a87SYuri Pankov 		    curp->roff, curp, curp->defos,
907*260e9a87SYuri Pankov 		    curp->options & MPARSE_QUICK ? 1 : 0);
908*260e9a87SYuri Pankov 
90995c635efSGarrett D'Amore 	return(curp);
91095c635efSGarrett D'Amore }
91195c635efSGarrett D'Amore 
91295c635efSGarrett D'Amore void
mparse_reset(struct mparse * curp)91395c635efSGarrett D'Amore mparse_reset(struct mparse *curp)
91495c635efSGarrett D'Amore {
91595c635efSGarrett D'Amore 
91695c635efSGarrett D'Amore 	roff_reset(curp->roff);
91795c635efSGarrett D'Amore 
91895c635efSGarrett D'Amore 	if (curp->mdoc)
91995c635efSGarrett D'Amore 		mdoc_reset(curp->mdoc);
92095c635efSGarrett D'Amore 	if (curp->man)
92195c635efSGarrett D'Amore 		man_reset(curp->man);
92295c635efSGarrett D'Amore 	if (curp->secondary)
92395c635efSGarrett D'Amore 		curp->secondary->sz = 0;
92495c635efSGarrett D'Amore 
92595c635efSGarrett D'Amore 	curp->file_status = MANDOCLEVEL_OK;
92695c635efSGarrett D'Amore 	curp->mdoc = NULL;
92795c635efSGarrett D'Amore 	curp->man = NULL;
928*260e9a87SYuri Pankov 
929*260e9a87SYuri Pankov 	free(curp->sodest);
930*260e9a87SYuri Pankov 	curp->sodest = NULL;
93195c635efSGarrett D'Amore }
93295c635efSGarrett D'Amore 
93395c635efSGarrett D'Amore void
mparse_free(struct mparse * curp)93495c635efSGarrett D'Amore mparse_free(struct mparse *curp)
93595c635efSGarrett D'Amore {
93695c635efSGarrett D'Amore 
93795c635efSGarrett D'Amore 	if (curp->pmdoc)
93895c635efSGarrett D'Amore 		mdoc_free(curp->pmdoc);
93995c635efSGarrett D'Amore 	if (curp->pman)
94095c635efSGarrett D'Amore 		man_free(curp->pman);
94195c635efSGarrett D'Amore 	if (curp->roff)
94295c635efSGarrett D'Amore 		roff_free(curp->roff);
94395c635efSGarrett D'Amore 	if (curp->secondary)
94495c635efSGarrett D'Amore 		free(curp->secondary->buf);
94595c635efSGarrett D'Amore 
94695c635efSGarrett D'Amore 	free(curp->secondary);
947*260e9a87SYuri Pankov 	free(curp->sodest);
94895c635efSGarrett D'Amore 	free(curp);
94995c635efSGarrett D'Amore }
95095c635efSGarrett D'Amore 
95195c635efSGarrett D'Amore void
mparse_result(struct mparse * curp,struct mdoc ** mdoc,struct man ** man,char ** sodest)952*260e9a87SYuri Pankov mparse_result(struct mparse *curp,
953*260e9a87SYuri Pankov 	struct mdoc **mdoc, struct man **man, char **sodest)
95495c635efSGarrett D'Amore {
95595c635efSGarrett D'Amore 
956*260e9a87SYuri Pankov 	if (sodest && NULL != (*sodest = curp->sodest)) {
957*260e9a87SYuri Pankov 		*mdoc = NULL;
958*260e9a87SYuri Pankov 		*man = NULL;
959*260e9a87SYuri Pankov 		return;
960*260e9a87SYuri Pankov 	}
96195c635efSGarrett D'Amore 	if (mdoc)
96295c635efSGarrett D'Amore 		*mdoc = curp->mdoc;
96395c635efSGarrett D'Amore 	if (man)
96495c635efSGarrett D'Amore 		*man = curp->man;
96595c635efSGarrett D'Amore }
96695c635efSGarrett D'Amore 
96795c635efSGarrett D'Amore void
mandoc_vmsg(enum mandocerr t,struct mparse * m,int ln,int pos,const char * fmt,...)96895c635efSGarrett D'Amore mandoc_vmsg(enum mandocerr t, struct mparse *m,
96995c635efSGarrett D'Amore 		int ln, int pos, const char *fmt, ...)
97095c635efSGarrett D'Amore {
97195c635efSGarrett D'Amore 	char		 buf[256];
97295c635efSGarrett D'Amore 	va_list		 ap;
97395c635efSGarrett D'Amore 
97495c635efSGarrett D'Amore 	va_start(ap, fmt);
975*260e9a87SYuri Pankov 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
97695c635efSGarrett D'Amore 	va_end(ap);
97795c635efSGarrett D'Amore 
97895c635efSGarrett D'Amore 	mandoc_msg(t, m, ln, pos, buf);
97995c635efSGarrett D'Amore }
98095c635efSGarrett D'Amore 
98195c635efSGarrett D'Amore void
mandoc_msg(enum mandocerr er,struct mparse * m,int ln,int col,const char * msg)98295c635efSGarrett D'Amore mandoc_msg(enum mandocerr er, struct mparse *m,
98395c635efSGarrett D'Amore 		int ln, int col, const char *msg)
98495c635efSGarrett D'Amore {
98595c635efSGarrett D'Amore 	enum mandoclevel level;
98695c635efSGarrett D'Amore 
987*260e9a87SYuri Pankov 	level = MANDOCLEVEL_UNSUPP;
98895c635efSGarrett D'Amore 	while (er < mandoclimits[level])
98995c635efSGarrett D'Amore 		level--;
99095c635efSGarrett D'Amore 
991*260e9a87SYuri Pankov 	if (level < m->wlevel && er != MANDOCERR_FILE)
99295c635efSGarrett D'Amore 		return;
99395c635efSGarrett D'Amore 
99495c635efSGarrett D'Amore 	if (m->mmsg)
99595c635efSGarrett D'Amore 		(*m->mmsg)(er, level, m->file, ln, col, msg);
99695c635efSGarrett D'Amore 
99795c635efSGarrett D'Amore 	if (m->file_status < level)
99895c635efSGarrett D'Amore 		m->file_status = level;
99995c635efSGarrett D'Amore }
100095c635efSGarrett D'Amore 
100195c635efSGarrett D'Amore const char *
mparse_strerror(enum mandocerr er)100295c635efSGarrett D'Amore mparse_strerror(enum mandocerr er)
100395c635efSGarrett D'Amore {
100495c635efSGarrett D'Amore 
100595c635efSGarrett D'Amore 	return(mandocerrs[er]);
100695c635efSGarrett D'Amore }
100795c635efSGarrett D'Amore 
100895c635efSGarrett D'Amore const char *
mparse_strlevel(enum mandoclevel lvl)100995c635efSGarrett D'Amore mparse_strlevel(enum mandoclevel lvl)
101095c635efSGarrett D'Amore {
101195c635efSGarrett D'Amore 	return(mandoclevels[lvl]);
101295c635efSGarrett D'Amore }
101395c635efSGarrett D'Amore 
101495c635efSGarrett D'Amore void
mparse_keep(struct mparse * p)101595c635efSGarrett D'Amore mparse_keep(struct mparse *p)
101695c635efSGarrett D'Amore {
101795c635efSGarrett D'Amore 
101895c635efSGarrett D'Amore 	assert(NULL == p->secondary);
101995c635efSGarrett D'Amore 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
102095c635efSGarrett D'Amore }
102195c635efSGarrett D'Amore 
102295c635efSGarrett D'Amore const char *
mparse_getkeep(const struct mparse * p)102395c635efSGarrett D'Amore mparse_getkeep(const struct mparse *p)
102495c635efSGarrett D'Amore {
102595c635efSGarrett D'Amore 
102695c635efSGarrett D'Amore 	assert(p->secondary);
102795c635efSGarrett D'Amore 	return(p->secondary->sz ? p->secondary->buf : NULL);
102895c635efSGarrett D'Amore }
1029