xref: /freebsd/contrib/mandoc/roff.c (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 /* $Id: roff.c,v 1.400 2023/10/24 20:53:12 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42 
43 /* Maximum number of string expansions per line, to break infinite loops. */
44 #define	EXPAND_LIMIT	1000
45 
46 /* Types of definitions of macros and strings. */
47 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
48 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
49 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
50 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
51 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
52 			 ROFFDEF_REN | ROFFDEF_STD)
53 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
54 
55 /* --- data types --------------------------------------------------------- */
56 
57 /*
58  * An incredibly-simple string buffer.
59  */
60 struct	roffstr {
61 	char		*p; /* nil-terminated buffer */
62 	size_t		 sz; /* saved strlen(p) */
63 };
64 
65 /*
66  * A key-value roffstr pair as part of a singly-linked list.
67  */
68 struct	roffkv {
69 	struct roffstr	 key;
70 	struct roffstr	 val;
71 	struct roffkv	*next; /* next in list */
72 };
73 
74 /*
75  * A single number register as part of a singly-linked list.
76  */
77 struct	roffreg {
78 	struct roffstr	 key;
79 	int		 val;
80 	int		 step;
81 	struct roffreg	*next;
82 };
83 
84 /*
85  * Association of request and macro names with token IDs.
86  */
87 struct	roffreq {
88 	enum roff_tok	 tok;
89 	char		 name[];
90 };
91 
92 /*
93  * A macro processing context.
94  * More than one is needed when macro calls are nested.
95  */
96 struct	mctx {
97 	char		**argv;
98 	int		 argc;
99 	int		 argsz;
100 };
101 
102 struct	roff {
103 	struct roff_man	*man; /* mdoc or man parser */
104 	struct roffnode	*last; /* leaf of stack */
105 	struct mctx	*mstack; /* stack of macro contexts */
106 	int		*rstack; /* stack of inverted `ie' values */
107 	struct ohash	*reqtab; /* request lookup table */
108 	struct roffreg	*regtab; /* number registers */
109 	struct roffkv	*strtab; /* user-defined strings & macros */
110 	struct roffkv	*rentab; /* renamed strings & macros */
111 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
112 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
113 	const char	*current_string; /* value of last called user macro */
114 	struct tbl_node	*first_tbl; /* first table parsed */
115 	struct tbl_node	*last_tbl; /* last table parsed */
116 	struct tbl_node	*tbl; /* current table being parsed */
117 	struct eqn_node	*last_eqn; /* equation parser */
118 	struct eqn_node	*eqn; /* active equation parser */
119 	int		 eqn_inline; /* current equation is inline */
120 	int		 options; /* parse options */
121 	int		 mstacksz; /* current size of mstack */
122 	int		 mstackpos; /* position in mstack */
123 	int		 rstacksz; /* current size limit of rstack */
124 	int		 rstackpos; /* position in rstack */
125 	int		 format; /* current file in mdoc or man format */
126 	char		 control; /* control character */
127 	char		 escape; /* escape character */
128 };
129 
130 /*
131  * A macro definition, condition, or ignored block.
132  */
133 struct	roffnode {
134 	enum roff_tok	 tok; /* type of node */
135 	struct roffnode	*parent; /* up one in stack */
136 	int		 line; /* parse line */
137 	int		 col; /* parse col */
138 	char		*name; /* node name, e.g. macro name */
139 	char		*end; /* custom end macro of the block */
140 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
141 	int		 rule; /* content is: 1=evaluated 0=skipped */
142 };
143 
144 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
145 			 enum roff_tok tok, /* tok of macro */ \
146 			 struct buf *buf, /* input buffer */ \
147 			 int ln, /* parse line */ \
148 			 int ppos, /* original pos in buffer */ \
149 			 int pos, /* current pos in buffer */ \
150 			 int *offs /* reset offset of buffer data */
151 
152 typedef	int (*roffproc)(ROFF_ARGS);
153 
154 struct	roffmac {
155 	roffproc	 proc; /* process new macro */
156 	roffproc	 text; /* process as child text of macro */
157 	roffproc	 sub; /* process as child of macro */
158 	int		 flags;
159 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
160 };
161 
162 struct	predef {
163 	const char	*name; /* predefined input name */
164 	const char	*str; /* replacement symbol */
165 };
166 
167 #define	PREDEF(__name, __str) \
168 	{ (__name), (__str) },
169 
170 /* --- function prototypes ------------------------------------------------ */
171 
172 static	int		 roffnode_cleanscope(struct roff *);
173 static	int		 roffnode_pop(struct roff *);
174 static	void		 roffnode_push(struct roff *, enum roff_tok,
175 				const char *, int, int);
176 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
177 static	int		 roff_als(ROFF_ARGS);
178 static	int		 roff_block(ROFF_ARGS);
179 static	int		 roff_block_text(ROFF_ARGS);
180 static	int		 roff_block_sub(ROFF_ARGS);
181 static	int		 roff_break(ROFF_ARGS);
182 static	int		 roff_cblock(ROFF_ARGS);
183 static	int		 roff_cc(ROFF_ARGS);
184 static	int		 roff_ccond(struct roff *, int, int);
185 static	int		 roff_char(ROFF_ARGS);
186 static	int		 roff_cond(ROFF_ARGS);
187 static	int		 roff_cond_checkend(ROFF_ARGS);
188 static	int		 roff_cond_text(ROFF_ARGS);
189 static	int		 roff_cond_sub(ROFF_ARGS);
190 static	int		 roff_ds(ROFF_ARGS);
191 static	int		 roff_ec(ROFF_ARGS);
192 static	int		 roff_eo(ROFF_ARGS);
193 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
194 static	int		 roff_evalcond(struct roff *, int, char *, int *);
195 static	int		 roff_evalnum(struct roff *, int,
196 				const char *, int *, int *, int);
197 static	int		 roff_evalpar(struct roff *, int,
198 				const char *, int *, int *, int);
199 static	int		 roff_evalstrcond(const char *, int *);
200 static	int		 roff_expand(struct roff *, struct buf *,
201 				int, int, char);
202 static	void		 roff_expand_patch(struct buf *, int,
203 				const char *, int);
204 static	void		 roff_free1(struct roff *);
205 static	void		 roff_freereg(struct roffreg *);
206 static	void		 roff_freestr(struct roffkv *);
207 static	size_t		 roff_getname(struct roff *, char **, int, int);
208 static	int		 roff_getnum(const char *, int *, int *, int);
209 static	int		 roff_getop(const char *, int *, char *);
210 static	int		 roff_getregn(struct roff *,
211 				const char *, size_t, char);
212 static	int		 roff_getregro(const struct roff *,
213 				const char *name);
214 static	const char	*roff_getstrn(struct roff *,
215 				const char *, size_t, int *);
216 static	int		 roff_hasregn(const struct roff *,
217 				const char *, size_t);
218 static	int		 roff_insec(ROFF_ARGS);
219 static	int		 roff_it(ROFF_ARGS);
220 static	int		 roff_line_ignore(ROFF_ARGS);
221 static	void		 roff_man_alloc1(struct roff_man *);
222 static	void		 roff_man_free1(struct roff_man *);
223 static	int		 roff_manyarg(ROFF_ARGS);
224 static	int		 roff_mc(ROFF_ARGS);
225 static	int		 roff_noarg(ROFF_ARGS);
226 static	int		 roff_nop(ROFF_ARGS);
227 static	int		 roff_nr(ROFF_ARGS);
228 static	int		 roff_onearg(ROFF_ARGS);
229 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
230 				int, int);
231 static	int		 roff_parse_comment(struct roff *, struct buf *,
232 				int, int, char);
233 static	int		 roff_parsetext(struct roff *, struct buf *,
234 				int, int *);
235 static	int		 roff_renamed(ROFF_ARGS);
236 static	int		 roff_req_or_macro(ROFF_ARGS);
237 static	int		 roff_return(ROFF_ARGS);
238 static	int		 roff_rm(ROFF_ARGS);
239 static	int		 roff_rn(ROFF_ARGS);
240 static	int		 roff_rr(ROFF_ARGS);
241 static	void		 roff_setregn(struct roff *, const char *,
242 				size_t, int, char, int);
243 static	void		 roff_setstr(struct roff *,
244 				const char *, const char *, int);
245 static	void		 roff_setstrn(struct roffkv **, const char *,
246 				size_t, const char *, size_t, int);
247 static	int		 roff_shift(ROFF_ARGS);
248 static	int		 roff_so(ROFF_ARGS);
249 static	int		 roff_tr(ROFF_ARGS);
250 static	int		 roff_Dd(ROFF_ARGS);
251 static	int		 roff_TE(ROFF_ARGS);
252 static	int		 roff_TS(ROFF_ARGS);
253 static	int		 roff_EQ(ROFF_ARGS);
254 static	int		 roff_EN(ROFF_ARGS);
255 static	int		 roff_T_(ROFF_ARGS);
256 static	int		 roff_unsupp(ROFF_ARGS);
257 static	int		 roff_userdef(ROFF_ARGS);
258 
259 /* --- constant data ------------------------------------------------------ */
260 
261 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
262 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
263 
264 const char *__roff_name[MAN_MAX + 1] = {
265 	"br",		"ce",		"fi",		"ft",
266 	"ll",		"mc",		"nf",
267 	"po",		"rj",		"sp",
268 	"ta",		"ti",		NULL,
269 	"ab",		"ad",		"af",		"aln",
270 	"als",		"am",		"am1",		"ami",
271 	"ami1",		"as",		"as1",		"asciify",
272 	"backtrace",	"bd",		"bleedat",	"blm",
273         "box",		"boxa",		"bp",		"BP",
274 	"break",	"breakchar",	"brnl",		"brp",
275 	"brpnl",	"c2",		"cc",
276 	"cf",		"cflags",	"ch",		"char",
277 	"chop",		"class",	"close",	"CL",
278 	"color",	"composite",	"continue",	"cp",
279 	"cropat",	"cs",		"cu",		"da",
280 	"dch",		"Dd",		"de",		"de1",
281 	"defcolor",	"dei",		"dei1",		"device",
282 	"devicem",	"di",		"do",		"ds",
283 	"ds1",		"dwh",		"dt",		"ec",
284 	"ecr",		"ecs",		"el",		"em",
285 	"EN",		"eo",		"EP",		"EQ",
286 	"errprint",	"ev",		"evc",		"ex",
287 	"fallback",	"fam",		"fc",		"fchar",
288 	"fcolor",	"fdeferlig",	"feature",	"fkern",
289 	"fl",		"flig",		"fp",		"fps",
290 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
291 	"fzoom",	"gcolor",	"hc",		"hcode",
292 	"hidechar",	"hla",		"hlm",		"hpf",
293 	"hpfa",		"hpfcode",	"hw",		"hy",
294 	"hylang",	"hylen",	"hym",		"hypp",
295 	"hys",		"ie",		"if",		"ig",
296 	"index",	"it",		"itc",		"IX",
297 	"kern",		"kernafter",	"kernbefore",	"kernpair",
298 	"lc",		"lc_ctype",	"lds",		"length",
299 	"letadj",	"lf",		"lg",		"lhang",
300 	"linetabs",	"lnr",		"lnrf",		"lpfx",
301 	"ls",		"lsm",		"lt",
302 	"mediasize",	"minss",	"mk",		"mso",
303 	"na",		"ne",		"nh",		"nhychar",
304 	"nm",		"nn",		"nop",		"nr",
305 	"nrf",		"nroff",	"ns",		"nx",
306 	"open",		"opena",	"os",		"output",
307 	"padj",		"papersize",	"pc",		"pev",
308 	"pi",		"PI",		"pl",		"pm",
309 	"pn",		"pnr",		"ps",
310 	"psbb",		"pshape",	"pso",		"ptr",
311 	"pvs",		"rchar",	"rd",		"recursionlimit",
312 	"return",	"rfschar",	"rhang",
313 	"rm",		"rn",		"rnn",		"rr",
314 	"rs",		"rt",		"schar",	"sentchar",
315 	"shc",		"shift",	"sizes",	"so",
316 	"spacewidth",	"special",	"spreadwarn",	"ss",
317 	"sty",		"substring",	"sv",		"sy",
318 	"T&",		"tc",		"TE",
319 	"TH",		"tkf",		"tl",
320 	"tm",		"tm1",		"tmc",		"tr",
321 	"track",	"transchar",	"trf",		"trimat",
322 	"trin",		"trnt",		"troff",	"TS",
323 	"uf",		"ul",		"unformat",	"unwatch",
324 	"unwatchn",	"vpt",		"vs",		"warn",
325 	"warnscale",	"watch",	"watchlength",	"watchn",
326 	"wh",		"while",	"write",	"writec",
327 	"writem",	"xflag",	".",		NULL,
328 	NULL,		"text",
329 	"Dd",		"Dt",		"Os",		"Sh",
330 	"Ss",		"Pp",		"D1",		"Dl",
331 	"Bd",		"Ed",		"Bl",		"El",
332 	"It",		"Ad",		"An",		"Ap",
333 	"Ar",		"Cd",		"Cm",		"Dv",
334 	"Er",		"Ev",		"Ex",		"Fa",
335 	"Fd",		"Fl",		"Fn",		"Ft",
336 	"Ic",		"In",		"Li",		"Nd",
337 	"Nm",		"Op",		"Ot",		"Pa",
338 	"Rv",		"St",		"Va",		"Vt",
339 	"Xr",		"%A",		"%B",		"%D",
340 	"%I",		"%J",		"%N",		"%O",
341 	"%P",		"%R",		"%T",		"%V",
342 	"Ac",		"Ao",		"Aq",		"At",
343 	"Bc",		"Bf",		"Bo",		"Bq",
344 	"Bsx",		"Bx",		"Db",		"Dc",
345 	"Do",		"Dq",		"Ec",		"Ef",
346 	"Em",		"Eo",		"Fx",		"Ms",
347 	"No",		"Ns",		"Nx",		"Ox",
348 	"Pc",		"Pf",		"Po",		"Pq",
349 	"Qc",		"Ql",		"Qo",		"Qq",
350 	"Re",		"Rs",		"Sc",		"So",
351 	"Sq",		"Sm",		"Sx",		"Sy",
352 	"Tn",		"Ux",		"Xc",		"Xo",
353 	"Fo",		"Fc",		"Oo",		"Oc",
354 	"Bk",		"Ek",		"Bt",		"Hf",
355 	"Fr",		"Ud",		"Lb",		"Lp",
356 	"Lk",		"Mt",		"Brq",		"Bro",
357 	"Brc",		"%C",		"Es",		"En",
358 	"Dx",		"%Q",		"%U",		"Ta",
359 	"Tg",		NULL,
360 	"TH",		"SH",		"SS",		"TP",
361 	"TQ",
362 	"LP",		"PP",		"P",		"IP",
363 	"HP",		"SM",		"SB",		"BI",
364 	"IB",		"BR",		"RB",		"R",
365 	"B",		"I",		"IR",		"RI",
366 	"RE",		"RS",		"DT",		"UC",
367 	"PD",		"AT",		"in",
368 	"SY",		"YS",		"OP",
369 	"EX",		"EE",		"UR",
370 	"UE",		"MT",		"ME",		"MR",
371 	NULL
372 };
373 const	char *const *roff_name = __roff_name;
374 
375 static	struct roffmac	 roffs[TOKEN_NONE] = {
376 	{ roff_noarg, NULL, NULL, 0 },  /* br */
377 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
378 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
379 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
380 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
381 	{ roff_mc, NULL, NULL, 0 },  /* mc */
382 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
383 	{ roff_onearg, NULL, NULL, 0 },  /* po */
384 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
385 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
386 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
387 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
388 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
389 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
390 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
391 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
392 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
393 	{ roff_als, NULL, NULL, 0 },  /* als */
394 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
395 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
396 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
397 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
398 	{ roff_ds, NULL, NULL, 0 },  /* as */
399 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
400 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
401 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
402 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
403 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
404 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
405 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
406 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
407 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
408 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
409 	{ roff_break, NULL, NULL, 0 },  /* break */
410 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
411 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
412 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
413 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
414 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
415 	{ roff_cc, NULL, NULL, 0 },  /* cc */
416 	{ roff_insec, NULL, NULL, 0 },  /* cf */
417 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
418 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
419 	{ roff_char, NULL, NULL, 0 },  /* char */
420 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
421 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
422 	{ roff_insec, NULL, NULL, 0 },  /* close */
423 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
425 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
426 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
427 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
428 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
429 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
430 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
431 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
432 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
433 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
434 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
435 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
436 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
437 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
438 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
439 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
440 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
441 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
442 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
443 	{ roff_ds, NULL, NULL, 0 },  /* ds */
444 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
445 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
446 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
447 	{ roff_ec, NULL, NULL, 0 },  /* ec */
448 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
449 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
450 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
451 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
452 	{ roff_EN, NULL, NULL, 0 },  /* EN */
453 	{ roff_eo, NULL, NULL, 0 },  /* eo */
454 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
455 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
456 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
457 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
458 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
459 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
460 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
462 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
463 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
472 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
489 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
490 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
491 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
492 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
493 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
494 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
495 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
496 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
497 	{ roff_it, NULL, NULL, 0 },  /* it */
498 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
499 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
500 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
501 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
502 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
503 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
504 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
505 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
506 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
507 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
508 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
509 	{ roff_insec, NULL, NULL, 0 },  /* lf */
510 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
511 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
512 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
513 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
514 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
515 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
516 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
517 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
522 	{ roff_insec, NULL, NULL, 0 },  /* mso */
523 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
524 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
525 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
526 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
527 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
528 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
529 	{ roff_nop, NULL, NULL, 0 },  /* nop */
530 	{ roff_nr, NULL, NULL, 0 },  /* nr */
531 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
532 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
533 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
534 	{ roff_insec, NULL, NULL, 0 },  /* nx */
535 	{ roff_insec, NULL, NULL, 0 },  /* open */
536 	{ roff_insec, NULL, NULL, 0 },  /* opena */
537 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
538 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
543 	{ roff_insec, NULL, NULL, 0 },  /* pi */
544 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
545 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
546 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
547 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
548 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
550 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
551 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
552 	{ roff_insec, NULL, NULL, 0 },  /* pso */
553 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
554 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
555 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
556 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
557 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
558 	{ roff_return, NULL, NULL, 0 },  /* return */
559 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
560 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
561 	{ roff_rm, NULL, NULL, 0 },  /* rm */
562 	{ roff_rn, NULL, NULL, 0 },  /* rn */
563 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
564 	{ roff_rr, NULL, NULL, 0 },  /* rr */
565 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
567 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
570 	{ roff_shift, NULL, NULL, 0 },  /* shift */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
572 	{ roff_so, NULL, NULL, 0 },  /* so */
573 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
575 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
576 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
577 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
578 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
579 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
580 	{ roff_insec, NULL, NULL, 0 },  /* sy */
581 	{ roff_T_, NULL, NULL, 0 },  /* T& */
582 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
583 	{ roff_TE, NULL, NULL, 0 },  /* TE */
584 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
585 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
586 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
587 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
588 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
589 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
590 	{ roff_tr, NULL, NULL, 0 },  /* tr */
591 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
592 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
593 	{ roff_insec, NULL, NULL, 0 },  /* trf */
594 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
595 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
596 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
597 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
598 	{ roff_TS, NULL, NULL, 0 },  /* TS */
599 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
601 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
606 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
607 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
608 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
609 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
610 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
611 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
612 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
613 	{ roff_insec, NULL, NULL, 0 },  /* write */
614 	{ roff_insec, NULL, NULL, 0 },  /* writec */
615 	{ roff_insec, NULL, NULL, 0 },  /* writem */
616 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
617 	{ roff_cblock, NULL, NULL, 0 },  /* . */
618 	{ roff_renamed, NULL, NULL, 0 },
619 	{ roff_userdef, NULL, NULL, 0 }
620 };
621 
622 /* Array of injected predefined strings. */
623 #define	PREDEFS_MAX	 38
624 static	const struct predef predefs[PREDEFS_MAX] = {
625 #include "predefs.in"
626 };
627 
628 static	int	 roffce_lines;	/* number of input lines to center */
629 static	struct roff_node *roffce_node;  /* active request */
630 static	int	 roffit_lines;  /* number of lines to delay */
631 static	char	*roffit_macro;  /* nil-terminated macro line */
632 
633 
634 /* --- request table ------------------------------------------------------ */
635 
636 struct ohash *
637 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
638 {
639 	struct ohash	*htab;
640 	struct roffreq	*req;
641 	enum roff_tok	 tok;
642 	size_t		 sz;
643 	unsigned int	 slot;
644 
645 	htab = mandoc_malloc(sizeof(*htab));
646 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
647 
648 	for (tok = mintok; tok < maxtok; tok++) {
649 		if (roff_name[tok] == NULL)
650 			continue;
651 		sz = strlen(roff_name[tok]);
652 		req = mandoc_malloc(sizeof(*req) + sz + 1);
653 		req->tok = tok;
654 		memcpy(req->name, roff_name[tok], sz + 1);
655 		slot = ohash_qlookup(htab, req->name);
656 		ohash_insert(htab, slot, req);
657 	}
658 	return htab;
659 }
660 
661 void
662 roffhash_free(struct ohash *htab)
663 {
664 	struct roffreq	*req;
665 	unsigned int	 slot;
666 
667 	if (htab == NULL)
668 		return;
669 	for (req = ohash_first(htab, &slot); req != NULL;
670 	     req = ohash_next(htab, &slot))
671 		free(req);
672 	ohash_delete(htab);
673 	free(htab);
674 }
675 
676 enum roff_tok
677 roffhash_find(struct ohash *htab, const char *name, size_t sz)
678 {
679 	struct roffreq	*req;
680 	const char	*end;
681 
682 	if (sz) {
683 		end = name + sz;
684 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
685 	} else
686 		req = ohash_find(htab, ohash_qlookup(htab, name));
687 	return req == NULL ? TOKEN_NONE : req->tok;
688 }
689 
690 /* --- stack of request blocks -------------------------------------------- */
691 
692 /*
693  * Pop the current node off of the stack of roff instructions currently
694  * pending.  Return 1 if it is a loop or 0 otherwise.
695  */
696 static int
697 roffnode_pop(struct roff *r)
698 {
699 	struct roffnode	*p;
700 	int		 inloop;
701 
702 	p = r->last;
703 	inloop = p->tok == ROFF_while;
704 	r->last = p->parent;
705 	free(p->name);
706 	free(p->end);
707 	free(p);
708 	return inloop;
709 }
710 
711 /*
712  * Push a roff node onto the instruction stack.  This must later be
713  * removed with roffnode_pop().
714  */
715 static void
716 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
717 		int line, int col)
718 {
719 	struct roffnode	*p;
720 
721 	p = mandoc_calloc(1, sizeof(struct roffnode));
722 	p->tok = tok;
723 	if (name)
724 		p->name = mandoc_strdup(name);
725 	p->parent = r->last;
726 	p->line = line;
727 	p->col = col;
728 	p->rule = p->parent ? p->parent->rule : 0;
729 
730 	r->last = p;
731 }
732 
733 /* --- roff parser state data management ---------------------------------- */
734 
735 static void
736 roff_free1(struct roff *r)
737 {
738 	int		 i;
739 
740 	tbl_free(r->first_tbl);
741 	r->first_tbl = r->last_tbl = r->tbl = NULL;
742 
743 	eqn_free(r->last_eqn);
744 	r->last_eqn = r->eqn = NULL;
745 
746 	while (r->mstackpos >= 0)
747 		roff_userret(r);
748 
749 	while (r->last)
750 		roffnode_pop(r);
751 
752 	free (r->rstack);
753 	r->rstack = NULL;
754 	r->rstacksz = 0;
755 	r->rstackpos = -1;
756 
757 	roff_freereg(r->regtab);
758 	r->regtab = NULL;
759 
760 	roff_freestr(r->strtab);
761 	roff_freestr(r->rentab);
762 	roff_freestr(r->xmbtab);
763 	r->strtab = r->rentab = r->xmbtab = NULL;
764 
765 	if (r->xtab)
766 		for (i = 0; i < 128; i++)
767 			free(r->xtab[i].p);
768 	free(r->xtab);
769 	r->xtab = NULL;
770 }
771 
772 void
773 roff_reset(struct roff *r)
774 {
775 	roff_free1(r);
776 	r->options |= MPARSE_COMMENT;
777 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
778 	r->control = '\0';
779 	r->escape = '\\';
780 	roffce_lines = 0;
781 	roffce_node = NULL;
782 	roffit_lines = 0;
783 	roffit_macro = NULL;
784 }
785 
786 void
787 roff_free(struct roff *r)
788 {
789 	int		 i;
790 
791 	roff_free1(r);
792 	for (i = 0; i < r->mstacksz; i++)
793 		free(r->mstack[i].argv);
794 	free(r->mstack);
795 	roffhash_free(r->reqtab);
796 	free(r);
797 }
798 
799 struct roff *
800 roff_alloc(int options)
801 {
802 	struct roff	*r;
803 
804 	r = mandoc_calloc(1, sizeof(struct roff));
805 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
806 	r->options = options | MPARSE_COMMENT;
807 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
808 	r->mstackpos = -1;
809 	r->rstackpos = -1;
810 	r->escape = '\\';
811 	return r;
812 }
813 
814 /* --- syntax tree state data management ---------------------------------- */
815 
816 static void
817 roff_man_free1(struct roff_man *man)
818 {
819 	if (man->meta.first != NULL)
820 		roff_node_delete(man, man->meta.first);
821 	free(man->meta.msec);
822 	free(man->meta.vol);
823 	free(man->meta.os);
824 	free(man->meta.arch);
825 	free(man->meta.title);
826 	free(man->meta.name);
827 	free(man->meta.date);
828 	free(man->meta.sodest);
829 }
830 
831 void
832 roff_state_reset(struct roff_man *man)
833 {
834 	man->last = man->meta.first;
835 	man->last_es = NULL;
836 	man->flags = 0;
837 	man->lastsec = man->lastnamed = SEC_NONE;
838 	man->next = ROFF_NEXT_CHILD;
839 	roff_setreg(man->roff, "nS", 0, '=');
840 }
841 
842 static void
843 roff_man_alloc1(struct roff_man *man)
844 {
845 	memset(&man->meta, 0, sizeof(man->meta));
846 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
847 	man->meta.first->type = ROFFT_ROOT;
848 	man->meta.macroset = MACROSET_NONE;
849 	roff_state_reset(man);
850 }
851 
852 void
853 roff_man_reset(struct roff_man *man)
854 {
855 	roff_man_free1(man);
856 	roff_man_alloc1(man);
857 }
858 
859 void
860 roff_man_free(struct roff_man *man)
861 {
862 	roff_man_free1(man);
863 	free(man->os_r);
864 	free(man);
865 }
866 
867 struct roff_man *
868 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
869 {
870 	struct roff_man *man;
871 
872 	man = mandoc_calloc(1, sizeof(*man));
873 	man->roff = roff;
874 	man->os_s = os_s;
875 	man->quick = quick;
876 	roff_man_alloc1(man);
877 	roff->man = man;
878 	return man;
879 }
880 
881 /* --- syntax tree handling ----------------------------------------------- */
882 
883 struct roff_node *
884 roff_node_alloc(struct roff_man *man, int line, int pos,
885 	enum roff_type type, int tok)
886 {
887 	struct roff_node	*n;
888 
889 	n = mandoc_calloc(1, sizeof(*n));
890 	n->line = line;
891 	n->pos = pos;
892 	n->tok = tok;
893 	n->type = type;
894 	n->sec = man->lastsec;
895 
896 	if (man->flags & MDOC_SYNOPSIS)
897 		n->flags |= NODE_SYNPRETTY;
898 	else
899 		n->flags &= ~NODE_SYNPRETTY;
900 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
901 		n->flags |= NODE_NOFILL;
902 	else
903 		n->flags &= ~NODE_NOFILL;
904 	if (man->flags & MDOC_NEWLINE)
905 		n->flags |= NODE_LINE;
906 	man->flags &= ~MDOC_NEWLINE;
907 
908 	return n;
909 }
910 
911 void
912 roff_node_append(struct roff_man *man, struct roff_node *n)
913 {
914 
915 	switch (man->next) {
916 	case ROFF_NEXT_SIBLING:
917 		if (man->last->next != NULL) {
918 			n->next = man->last->next;
919 			man->last->next->prev = n;
920 		} else
921 			man->last->parent->last = n;
922 		man->last->next = n;
923 		n->prev = man->last;
924 		n->parent = man->last->parent;
925 		break;
926 	case ROFF_NEXT_CHILD:
927 		if (man->last->child != NULL) {
928 			n->next = man->last->child;
929 			man->last->child->prev = n;
930 		} else
931 			man->last->last = n;
932 		man->last->child = n;
933 		n->parent = man->last;
934 		break;
935 	default:
936 		abort();
937 	}
938 	man->last = n;
939 
940 	switch (n->type) {
941 	case ROFFT_HEAD:
942 		n->parent->head = n;
943 		break;
944 	case ROFFT_BODY:
945 		if (n->end != ENDBODY_NOT)
946 			return;
947 		n->parent->body = n;
948 		break;
949 	case ROFFT_TAIL:
950 		n->parent->tail = n;
951 		break;
952 	default:
953 		return;
954 	}
955 
956 	/*
957 	 * Copy over the normalised-data pointer of our parent.  Not
958 	 * everybody has one, but copying a null pointer is fine.
959 	 */
960 
961 	n->norm = n->parent->norm;
962 	assert(n->parent->type == ROFFT_BLOCK);
963 }
964 
965 void
966 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
967 {
968 	struct roff_node	*n;
969 
970 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
971 	n->string = roff_strdup(man->roff, word);
972 	roff_node_append(man, n);
973 	n->flags |= NODE_VALID | NODE_ENDED;
974 	man->next = ROFF_NEXT_SIBLING;
975 }
976 
977 void
978 roff_word_append(struct roff_man *man, const char *word)
979 {
980 	struct roff_node	*n;
981 	char			*addstr, *newstr;
982 
983 	n = man->last;
984 	addstr = roff_strdup(man->roff, word);
985 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
986 	free(addstr);
987 	free(n->string);
988 	n->string = newstr;
989 	man->next = ROFF_NEXT_SIBLING;
990 }
991 
992 void
993 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
994 {
995 	struct roff_node	*n;
996 
997 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
998 	roff_node_append(man, n);
999 	man->next = ROFF_NEXT_CHILD;
1000 }
1001 
1002 struct roff_node *
1003 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1004 {
1005 	struct roff_node	*n;
1006 
1007 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1008 	roff_node_append(man, n);
1009 	man->next = ROFF_NEXT_CHILD;
1010 	return n;
1011 }
1012 
1013 struct roff_node *
1014 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1015 {
1016 	struct roff_node	*n;
1017 
1018 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1019 	roff_node_append(man, n);
1020 	man->next = ROFF_NEXT_CHILD;
1021 	return n;
1022 }
1023 
1024 struct roff_node *
1025 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1026 {
1027 	struct roff_node	*n;
1028 
1029 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1030 	roff_node_append(man, n);
1031 	man->next = ROFF_NEXT_CHILD;
1032 	return n;
1033 }
1034 
1035 static void
1036 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1037 {
1038 	struct roff_node	*n;
1039 	struct tbl_span		*span;
1040 
1041 	if (man->meta.macroset == MACROSET_MAN)
1042 		man_breakscope(man, ROFF_TS);
1043 	while ((span = tbl_span(tbl)) != NULL) {
1044 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1045 		n->span = span;
1046 		roff_node_append(man, n);
1047 		n->flags |= NODE_VALID | NODE_ENDED;
1048 		man->next = ROFF_NEXT_SIBLING;
1049 	}
1050 }
1051 
1052 void
1053 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1054 {
1055 
1056 	/* Adjust siblings. */
1057 
1058 	if (n->prev)
1059 		n->prev->next = n->next;
1060 	if (n->next)
1061 		n->next->prev = n->prev;
1062 
1063 	/* Adjust parent. */
1064 
1065 	if (n->parent != NULL) {
1066 		if (n->parent->child == n)
1067 			n->parent->child = n->next;
1068 		if (n->parent->last == n)
1069 			n->parent->last = n->prev;
1070 	}
1071 
1072 	/* Adjust parse point. */
1073 
1074 	if (man == NULL)
1075 		return;
1076 	if (man->last == n) {
1077 		if (n->prev == NULL) {
1078 			man->last = n->parent;
1079 			man->next = ROFF_NEXT_CHILD;
1080 		} else {
1081 			man->last = n->prev;
1082 			man->next = ROFF_NEXT_SIBLING;
1083 		}
1084 	}
1085 	if (man->meta.first == n)
1086 		man->meta.first = NULL;
1087 }
1088 
1089 void
1090 roff_node_relink(struct roff_man *man, struct roff_node *n)
1091 {
1092 	roff_node_unlink(man, n);
1093 	n->prev = n->next = NULL;
1094 	roff_node_append(man, n);
1095 }
1096 
1097 void
1098 roff_node_free(struct roff_node *n)
1099 {
1100 
1101 	if (n->args != NULL)
1102 		mdoc_argv_free(n->args);
1103 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1104 		free(n->norm);
1105 	eqn_box_free(n->eqn);
1106 	free(n->string);
1107 	free(n->tag);
1108 	free(n);
1109 }
1110 
1111 void
1112 roff_node_delete(struct roff_man *man, struct roff_node *n)
1113 {
1114 
1115 	while (n->child != NULL)
1116 		roff_node_delete(man, n->child);
1117 	roff_node_unlink(man, n);
1118 	roff_node_free(n);
1119 }
1120 
1121 int
1122 roff_node_transparent(struct roff_node *n)
1123 {
1124 	if (n == NULL)
1125 		return 0;
1126 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1127 		return 1;
1128 	return roff_tok_transparent(n->tok);
1129 }
1130 
1131 int
1132 roff_tok_transparent(enum roff_tok tok)
1133 {
1134 	switch (tok) {
1135 	case ROFF_ft:
1136 	case ROFF_ll:
1137 	case ROFF_mc:
1138 	case ROFF_po:
1139 	case ROFF_ta:
1140 	case MDOC_Db:
1141 	case MDOC_Es:
1142 	case MDOC_Sm:
1143 	case MDOC_Tg:
1144 	case MAN_DT:
1145 	case MAN_UC:
1146 	case MAN_PD:
1147 	case MAN_AT:
1148 		return 1;
1149 	default:
1150 		return 0;
1151 	}
1152 }
1153 
1154 struct roff_node *
1155 roff_node_child(struct roff_node *n)
1156 {
1157 	for (n = n->child; roff_node_transparent(n); n = n->next)
1158 		continue;
1159 	return n;
1160 }
1161 
1162 struct roff_node *
1163 roff_node_prev(struct roff_node *n)
1164 {
1165 	do {
1166 		n = n->prev;
1167 	} while (roff_node_transparent(n));
1168 	return n;
1169 }
1170 
1171 struct roff_node *
1172 roff_node_next(struct roff_node *n)
1173 {
1174 	do {
1175 		n = n->next;
1176 	} while (roff_node_transparent(n));
1177 	return n;
1178 }
1179 
1180 void
1181 deroff(char **dest, const struct roff_node *n)
1182 {
1183 	char	*cp;
1184 	size_t	 sz;
1185 
1186 	if (n->string == NULL) {
1187 		for (n = n->child; n != NULL; n = n->next)
1188 			deroff(dest, n);
1189 		return;
1190 	}
1191 
1192 	/* Skip leading whitespace. */
1193 
1194 	for (cp = n->string; *cp != '\0'; cp++) {
1195 		if (cp[0] == '\\' && cp[1] != '\0' &&
1196 		    strchr(" %&0^|~", cp[1]) != NULL)
1197 			cp++;
1198 		else if ( ! isspace((unsigned char)*cp))
1199 			break;
1200 	}
1201 
1202 	/* Skip trailing backslash. */
1203 
1204 	sz = strlen(cp);
1205 	if (sz > 0 && cp[sz - 1] == '\\')
1206 		sz--;
1207 
1208 	/* Skip trailing whitespace. */
1209 
1210 	for (; sz; sz--)
1211 		if ( ! isspace((unsigned char)cp[sz-1]))
1212 			break;
1213 
1214 	/* Skip empty strings. */
1215 
1216 	if (sz == 0)
1217 		return;
1218 
1219 	if (*dest == NULL) {
1220 		*dest = mandoc_strndup(cp, sz);
1221 		return;
1222 	}
1223 
1224 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1225 	free(*dest);
1226 	*dest = cp;
1227 }
1228 
1229 /* --- main functions of the roff parser ---------------------------------- */
1230 
1231 /*
1232  * Save comments preceding the title macro, for example in order to
1233  * preserve Copyright and license headers in HTML output,
1234  * provide diagnostics about RCS ids and trailing whitespace in comments,
1235  * then discard comments including preceding whitespace.
1236  * This function also handles input line continuation.
1237  */
1238 static int
1239 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1240 {
1241 	struct roff_node *n;	/* used for header comments */
1242 	const char	*start;	/* start of the string to process */
1243 	const char	*cp;	/* for RCS id parsing */
1244 	char		*stesc;	/* start of an escape sequence ('\\') */
1245 	char		*ep;	/* end of comment string */
1246 	int		 rcsid;	/* kind of RCS id seen */
1247 
1248 	for (start = stesc = buf->buf + pos;; stesc++) {
1249 		/*
1250 		 * XXX Ugly hack: Remove the newline character that
1251 		 * mparse_buf_r() appended to mark the end of input
1252 		 * if it is not preceded by an escape character.
1253 		 */
1254 		if (stesc[0] == '\n') {
1255 			assert(stesc[1] == '\0');
1256 			stesc[0] = '\0';
1257 		}
1258 
1259 		/* The line ends without continuation or comment. */
1260 		if (stesc[0] == '\0')
1261 			return ROFF_CONT;
1262 
1263 		/* Unescaped byte: skip it. */
1264 		if (stesc[0] != ec)
1265 			continue;
1266 
1267 		/*
1268 		 * XXX Ugly hack: Do not attempt to append another line
1269 		 * if the function mparse_buf_r() appended a newline
1270 		 * character to indicate the end of input.
1271 		 */
1272 		if (stesc[1] == '\n') {
1273 			assert(stesc[2] == '\0');
1274 			stesc[0] = '\0';
1275 			return ROFF_CONT;
1276 		}
1277 
1278 		/*
1279 		 * An escape character at the end of an input line
1280 		 * requests line continuation.
1281 		 */
1282 		if (stesc[1] == '\0') {
1283 			stesc[0] = '\0';
1284 			return ROFF_IGN | ROFF_APPEND;
1285 		}
1286 
1287 		/* Found a comment: process it. */
1288 		if (stesc[1] == '"' || stesc[1] == '#')
1289 			break;
1290 
1291 		/* Escaped escape character: skip them both. */
1292 		if (stesc[1] == ec)
1293 			stesc++;
1294 	}
1295 
1296 	/* Look for an RCS id in the comment. */
1297 
1298 	rcsid = 0;
1299 	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1300 		rcsid = 1 << MANDOC_OS_OPENBSD;
1301 		cp += 8;
1302 	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1303 		rcsid = 1 << MANDOC_OS_NETBSD;
1304 		cp += 7;
1305 	}
1306 	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1307 	    strchr(cp, '$') != NULL) {
1308 		if (r->man->meta.rcsids & rcsid)
1309 			mandoc_msg(MANDOCERR_RCS_REP, ln,
1310 			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1311 		r->man->meta.rcsids |= rcsid;
1312 	}
1313 
1314 	/* Warn about trailing whitespace at the end of the comment. */
1315 
1316 	ep = strchr(stesc + 2, '\0') - 1;
1317 	if (*ep == '\n')
1318 		*ep-- = '\0';
1319 	if (*ep == ' ' || *ep == '\t')
1320 		mandoc_msg(MANDOCERR_SPACE_EOL,
1321 		    ln, (int)(ep - buf->buf), NULL);
1322 
1323 	/* Save comments preceding the title macro in the syntax tree. */
1324 
1325 	if (r->options & MPARSE_COMMENT) {
1326 		while (*ep == ' ' || *ep == '\t')
1327 			ep--;
1328 		ep[1] = '\0';
1329 		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1330 		    ROFFT_COMMENT, TOKEN_NONE);
1331 		n->string = mandoc_strdup(stesc + 2);
1332 		roff_node_append(r->man, n);
1333 		n->flags |= NODE_VALID | NODE_ENDED;
1334 		r->man->next = ROFF_NEXT_SIBLING;
1335 	}
1336 
1337 	/* The comment requests line continuation. */
1338 
1339 	if (stesc[1] == '#') {
1340 		*stesc = '\0';
1341 		return ROFF_IGN | ROFF_APPEND;
1342 	}
1343 
1344 	/* Discard the comment including preceding whitespace. */
1345 
1346 	while (stesc > start && stesc[-1] == ' ' &&
1347 	    (stesc == start + 1 || stesc[-2] != '\\'))
1348 		stesc--;
1349 	*stesc = '\0';
1350 	return ROFF_CONT;
1351 }
1352 
1353 /*
1354  * In the current line, expand escape sequences that produce parsable
1355  * input text.  Also check the syntax of the remaining escape sequences,
1356  * which typically produce output glyphs or change formatter state.
1357  */
1358 static int
1359 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1360 {
1361 	char		 ubuf[24];	/* buffer to print a number */
1362 	struct mctx	*ctx;		/* current macro call context */
1363 	const char	*res;		/* the string to be pasted */
1364 	const char	*src;		/* source for copying */
1365 	char		*dst;		/* destination for copying */
1366 	enum mandoc_esc	 subtype;	/* return value from roff_escape */
1367 	int		 iesc;		/* index of leading escape char */
1368 	int		 inam;		/* index of the escape name */
1369 	int		 iarg;		/* index beginning the argument */
1370 	int		 iendarg;	/* index right after the argument */
1371 	int		 iend;		/* index right after the sequence */
1372 	int		 isrc, idst;	/* to reduce \\ and \. in names */
1373 	int		 deftype;	/* type of definition to paste */
1374 	int		 argi;		/* macro argument index */
1375 	int		 quote_args;	/* true for \\$@, false for \\$* */
1376 	int		 asz;		/* length of the replacement */
1377 	int		 rsz;		/* length of the rest of the string */
1378 	int		 npos;		/* position in numeric expression */
1379 	int		 expand_count;	/* to avoid infinite loops */
1380 
1381 	expand_count = 0;
1382 	while (buf->buf[pos] != '\0') {
1383 
1384 		/*
1385 		 * Skip plain ASCII characters.
1386 		 * If we have a non-standard escape character,
1387 		 * escape literal backslashes because all processing in
1388 		 * subsequent functions uses the standard escaping rules.
1389 		 */
1390 
1391 		if (buf->buf[pos] != ec) {
1392 			if (buf->buf[pos] == '\\') {
1393 				roff_expand_patch(buf, pos, "\\e", pos + 1);
1394 				pos++;
1395 			}
1396 			pos++;
1397 			continue;
1398 		}
1399 
1400 		/*
1401 		 * Parse escape sequences,
1402 		 * issue diagnostic messages when appropriate,
1403 		 * and skip sequences that do not need expansion.
1404 		 * If we have a non-standard escape character, translate
1405 		 * it to backslashes and translate backslashes to \e.
1406 		 */
1407 
1408 		if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1409 		    &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1410 			while (pos < iend) {
1411 				if (buf->buf[pos] == ec) {
1412 					buf->buf[pos] = '\\';
1413 					if (pos + 1 < iend)
1414 						pos++;
1415 				} else if (buf->buf[pos] == '\\') {
1416 					roff_expand_patch(buf,
1417 					    pos, "\\e", pos + 1);
1418 					pos++;
1419 					iend++;
1420 				}
1421 				pos++;
1422 			}
1423 			continue;
1424 		}
1425 
1426 		/* Reduce \\ and \. in names. */
1427 
1428 		if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1429 			isrc = idst = iarg;
1430 			while (isrc < iendarg) {
1431 				if (isrc + 1 < iendarg &&
1432 				    buf->buf[isrc] == '\\' &&
1433 				    (buf->buf[isrc + 1] == '\\' ||
1434 				     buf->buf[isrc + 1] == '.'))
1435 					isrc++;
1436 				buf->buf[idst++] = buf->buf[isrc++];
1437 			}
1438 			iendarg -= isrc - idst;
1439 		}
1440 
1441 		/* Handle expansion. */
1442 
1443 		res = NULL;
1444 		switch (buf->buf[inam]) {
1445 		case '*':
1446 			if (iendarg == iarg)
1447 				break;
1448 			deftype = ROFFDEF_USER | ROFFDEF_PRE;
1449 			if ((res = roff_getstrn(r, buf->buf + iarg,
1450 			    iendarg - iarg, &deftype)) != NULL)
1451 				break;
1452 
1453 			/*
1454 			 * If not overridden,
1455 			 * let \*(.T through to the formatters.
1456 			 */
1457 
1458 			if (iendarg - iarg == 2 &&
1459 			    buf->buf[iarg] == '.' &&
1460 			    buf->buf[iarg + 1] == 'T') {
1461 				roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1462 				pos = iend;
1463 				continue;
1464 			}
1465 
1466 			mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1467 			    "%.*s", iendarg - iarg, buf->buf + iarg);
1468 			break;
1469 
1470 		case '$':
1471 			if (r->mstackpos < 0) {
1472 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1473 				    "%.*s", iend - iesc, buf->buf + iesc);
1474 				break;
1475 			}
1476 			ctx = r->mstack + r->mstackpos;
1477 			argi = buf->buf[iarg] - '1';
1478 			if (argi >= 0 && argi <= 8) {
1479 				if (argi < ctx->argc)
1480 					res = ctx->argv[argi];
1481 				break;
1482 			}
1483 			if (buf->buf[iarg] == '*')
1484 				quote_args = 0;
1485 			else if (buf->buf[iarg] == '@')
1486 				quote_args = 1;
1487 			else {
1488 				mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1489 				    "%.*s", iend - iesc, buf->buf + iesc);
1490 				break;
1491 			}
1492 			asz = 0;
1493 			for (argi = 0; argi < ctx->argc; argi++) {
1494 				if (argi)
1495 					asz++;  /* blank */
1496 				if (quote_args)
1497 					asz += 2;  /* quotes */
1498 				asz += strlen(ctx->argv[argi]);
1499 			}
1500 			if (asz != iend - iesc) {
1501 				rsz = buf->sz - iend;
1502 				if (asz < iend - iesc)
1503 					memmove(buf->buf + iesc + asz,
1504 					    buf->buf + iend, rsz);
1505 				buf->sz = iesc + asz + rsz;
1506 				buf->buf = mandoc_realloc(buf->buf, buf->sz);
1507 				if (asz > iend - iesc)
1508 					memmove(buf->buf + iesc + asz,
1509 					    buf->buf + iend, rsz);
1510 			}
1511 			dst = buf->buf + iesc;
1512 			for (argi = 0; argi < ctx->argc; argi++) {
1513 				if (argi)
1514 					*dst++ = ' ';
1515 				if (quote_args)
1516 					*dst++ = '"';
1517 				src = ctx->argv[argi];
1518 				while (*src != '\0')
1519 					*dst++ = *src++;
1520 				if (quote_args)
1521 					*dst++ = '"';
1522 			}
1523 			continue;
1524 		case 'A':
1525 			ubuf[0] = iendarg > iarg ? '1' : '0';
1526 			ubuf[1] = '\0';
1527 			res = ubuf;
1528 			break;
1529 		case 'B':
1530 			npos = 0;
1531 			ubuf[0] = iendarg > iarg && iend > iendarg &&
1532 			    roff_evalnum(r, ln, buf->buf + iarg, &npos,
1533 					 NULL, ROFFNUM_SCALE) &&
1534 			    npos == iendarg - iarg ? '1' : '0';
1535 			ubuf[1] = '\0';
1536 			res = ubuf;
1537 			break;
1538 		case 'V':
1539 			mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1540 			    "%.*s", iend - iesc, buf->buf + iesc);
1541 			roff_expand_patch(buf, iendarg, "}", iend);
1542 			roff_expand_patch(buf, iesc, "${", iarg);
1543 			continue;
1544 		case 'g':
1545 			break;
1546 		case 'n':
1547 			if (iendarg > iarg)
1548 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1549 				    roff_getregn(r, buf->buf + iarg,
1550 				    iendarg - iarg, buf->buf[inam + 1]));
1551 			else
1552 				ubuf[0] = '\0';
1553 			res = ubuf;
1554 			break;
1555 		case 'w':
1556 			rsz = 0;
1557 			subtype = ESCAPE_UNDEF;
1558 			while (iarg < iendarg) {
1559 				asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1560 				if (buf->buf[iarg] != '\\') {
1561 					rsz += asz;
1562 					iarg++;
1563 					continue;
1564 				}
1565 				switch ((subtype = roff_escape(buf->buf, 0,
1566 				    iarg, NULL, NULL, NULL, NULL, &iarg))) {
1567 				case ESCAPE_SPECIAL:
1568 				case ESCAPE_NUMBERED:
1569 				case ESCAPE_UNICODE:
1570 				case ESCAPE_OVERSTRIKE:
1571 				case ESCAPE_UNDEF:
1572 					break;
1573 				case ESCAPE_DEVICE:
1574 					asz *= 8;
1575 					break;
1576 				case ESCAPE_EXPAND:
1577 					abort();
1578 				default:
1579 					continue;
1580 				}
1581 				rsz += asz;
1582 			}
1583 			(void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1584 			res = ubuf;
1585 			break;
1586 		default:
1587 			break;
1588 		}
1589 		if (res == NULL)
1590 			res = "";
1591 		if (++expand_count > EXPAND_LIMIT ||
1592 		    buf->sz + strlen(res) > SHRT_MAX) {
1593 			mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1594 			return ROFF_IGN;
1595 		}
1596 		roff_expand_patch(buf, iesc, res, iend);
1597 	}
1598 	return ROFF_CONT;
1599 }
1600 
1601 /*
1602  * Replace the substring from the start position (inclusive)
1603  * to end position (exclusive) with the repl(acement) string.
1604  */
1605 static void
1606 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1607 {
1608 	char	*nbuf;
1609 
1610 	buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1611 	    repl, buf->buf + end) + 1;
1612 	free(buf->buf);
1613 	buf->buf = nbuf;
1614 }
1615 
1616 /*
1617  * Parse a quoted or unquoted roff-style request or macro argument.
1618  * Return a pointer to the parsed argument, which is either the original
1619  * pointer or advanced by one byte in case the argument is quoted.
1620  * NUL-terminate the argument in place.
1621  * Collapse pairs of quotes inside quoted arguments.
1622  * Advance the argument pointer to the next argument,
1623  * or to the NUL byte terminating the argument line.
1624  */
1625 char *
1626 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1627 {
1628 	struct buf	 buf;
1629 	char		*cp, *start;
1630 	int		 newesc, pairs, quoted, white;
1631 
1632 	/* Quoting can only start with a new word. */
1633 	start = *cpp;
1634 	quoted = 0;
1635 	if ('"' == *start) {
1636 		quoted = 1;
1637 		start++;
1638 	}
1639 
1640 	newesc = pairs = white = 0;
1641 	for (cp = start; '\0' != *cp; cp++) {
1642 
1643 		/*
1644 		 * Move the following text left
1645 		 * after quoted quotes and after "\\" and "\t".
1646 		 */
1647 		if (pairs)
1648 			cp[-pairs] = cp[0];
1649 
1650 		if ('\\' == cp[0]) {
1651 			/*
1652 			 * In copy mode, translate double to single
1653 			 * backslashes and backslash-t to literal tabs.
1654 			 */
1655 			switch (cp[1]) {
1656 			case 'a':
1657 			case 't':
1658 				cp[-pairs] = '\t';
1659 				pairs++;
1660 				cp++;
1661 				break;
1662 			case '\\':
1663 				cp[-pairs] = '\\';
1664 				newesc = 1;
1665 				pairs++;
1666 				cp++;
1667 				break;
1668 			case ' ':
1669 				/* Skip escaped blanks. */
1670 				if (0 == quoted)
1671 					cp++;
1672 				break;
1673 			default:
1674 				break;
1675 			}
1676 		} else if (0 == quoted) {
1677 			if (' ' == cp[0]) {
1678 				/* Unescaped blanks end unquoted args. */
1679 				white = 1;
1680 				break;
1681 			}
1682 		} else if ('"' == cp[0]) {
1683 			if ('"' == cp[1]) {
1684 				/* Quoted quotes collapse. */
1685 				pairs++;
1686 				cp++;
1687 			} else {
1688 				/* Unquoted quotes end quoted args. */
1689 				quoted = 2;
1690 				break;
1691 			}
1692 		}
1693 	}
1694 
1695 	/* Quoted argument without a closing quote. */
1696 	if (1 == quoted)
1697 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1698 
1699 	/* NUL-terminate this argument and move to the next one. */
1700 	if (pairs)
1701 		cp[-pairs] = '\0';
1702 	if ('\0' != *cp) {
1703 		*cp++ = '\0';
1704 		while (' ' == *cp)
1705 			cp++;
1706 	}
1707 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1708 	*cpp = cp;
1709 
1710 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1711 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1712 
1713 	start = mandoc_strdup(start);
1714 	if (newesc == 0)
1715 		return start;
1716 
1717 	buf.buf = start;
1718 	buf.sz = strlen(start) + 1;
1719 	buf.next = NULL;
1720 	if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1721 		free(buf.buf);
1722 		buf.buf = mandoc_strdup("");
1723 	}
1724 	return buf.buf;
1725 }
1726 
1727 
1728 /*
1729  * Process text streams.
1730  */
1731 static int
1732 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1733 {
1734 	size_t		 sz;
1735 	const char	*start;
1736 	char		*p;
1737 	int		 isz;
1738 	enum mandoc_esc	 esc;
1739 
1740 	/* Spring the input line trap. */
1741 
1742 	if (roffit_lines == 1) {
1743 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1744 		free(buf->buf);
1745 		buf->buf = p;
1746 		buf->sz = isz + 1;
1747 		*offs = 0;
1748 		free(roffit_macro);
1749 		roffit_lines = 0;
1750 		return ROFF_REPARSE;
1751 	} else if (roffit_lines > 1)
1752 		--roffit_lines;
1753 
1754 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1755 		if (roffce_lines < 1) {
1756 			r->man->last = roffce_node;
1757 			r->man->next = ROFF_NEXT_SIBLING;
1758 			roffce_lines = 0;
1759 			roffce_node = NULL;
1760 		} else
1761 			roffce_lines--;
1762 	}
1763 
1764 	/* Convert all breakable hyphens into ASCII_HYPH. */
1765 
1766 	start = p = buf->buf + pos;
1767 
1768 	while (*p != '\0') {
1769 		sz = strcspn(p, "-\\");
1770 		p += sz;
1771 
1772 		if (*p == '\0')
1773 			break;
1774 
1775 		if (*p == '\\') {
1776 			/* Skip over escapes. */
1777 			p++;
1778 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1779 			if (esc == ESCAPE_ERROR)
1780 				break;
1781 			while (*p == '-')
1782 				p++;
1783 			continue;
1784 		} else if (p == start) {
1785 			p++;
1786 			continue;
1787 		}
1788 
1789 		if (isalpha((unsigned char)p[-1]) &&
1790 		    isalpha((unsigned char)p[1]))
1791 			*p = ASCII_HYPH;
1792 		p++;
1793 	}
1794 	return ROFF_CONT;
1795 }
1796 
1797 int
1798 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1799 {
1800 	enum roff_tok	 t;
1801 	int		 e;
1802 	int		 pos;	/* parse point */
1803 	int		 spos;	/* saved parse point for messages */
1804 	int		 ppos;	/* original offset in buf->buf */
1805 	int		 ctl;	/* macro line (boolean) */
1806 
1807 	ppos = pos = *offs;
1808 
1809 	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1810 	    (r->man->flags & ROFF_NOFILL) == 0 &&
1811 	    strchr(" .\\", buf->buf[pos]) == NULL &&
1812 	    buf->buf[pos] != r->control &&
1813 	    strcspn(buf->buf, " ") < 80)
1814 		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1815 		    "%.20s...", buf->buf + pos);
1816 
1817 	/* Handle in-line equation delimiters. */
1818 
1819 	if (r->tbl == NULL &&
1820 	    r->last_eqn != NULL && r->last_eqn->delim &&
1821 	    (r->eqn == NULL || r->eqn_inline)) {
1822 		e = roff_eqndelim(r, buf, pos);
1823 		if (e == ROFF_REPARSE)
1824 			return e;
1825 		assert(e == ROFF_CONT);
1826 	}
1827 
1828 	/* Handle comments and escape sequences. */
1829 
1830 	e = roff_parse_comment(r, buf, ln, pos, r->escape);
1831 	if ((e & ROFF_MASK) == ROFF_IGN)
1832 		return e;
1833 	assert(e == ROFF_CONT);
1834 
1835 	e = roff_expand(r, buf, ln, pos, r->escape);
1836 	if ((e & ROFF_MASK) == ROFF_IGN)
1837 		return e;
1838 	assert(e == ROFF_CONT);
1839 
1840 	ctl = roff_getcontrol(r, buf->buf, &pos);
1841 
1842 	/*
1843 	 * First, if a scope is open and we're not a macro, pass the
1844 	 * text through the macro's filter.
1845 	 * Equations process all content themselves.
1846 	 * Tables process almost all content themselves, but we want
1847 	 * to warn about macros before passing it there.
1848 	 */
1849 
1850 	if (r->last != NULL && ! ctl) {
1851 		t = r->last->tok;
1852 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1853 		if ((e & ROFF_MASK) == ROFF_IGN)
1854 			return e;
1855 		e &= ~ROFF_MASK;
1856 	} else
1857 		e = ROFF_IGN;
1858 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1859 		eqn_read(r->eqn, buf->buf + ppos);
1860 		return e;
1861 	}
1862 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1863 		tbl_read(r->tbl, ln, buf->buf, ppos);
1864 		roff_addtbl(r->man, ln, r->tbl);
1865 		return e;
1866 	}
1867 	if ( ! ctl) {
1868 		r->options &= ~MPARSE_COMMENT;
1869 		return roff_parsetext(r, buf, pos, offs) | e;
1870 	}
1871 
1872 	/* Skip empty request lines. */
1873 
1874 	if (buf->buf[pos] == '"') {
1875 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1876 		return ROFF_IGN;
1877 	} else if (buf->buf[pos] == '\0')
1878 		return ROFF_IGN;
1879 
1880 	/*
1881 	 * If a scope is open, go to the child handler for that macro,
1882 	 * as it may want to preprocess before doing anything with it.
1883 	 */
1884 
1885 	if (r->last) {
1886 		t = r->last->tok;
1887 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1888 	}
1889 
1890 	r->options &= ~MPARSE_COMMENT;
1891 	spos = pos;
1892 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1893 	return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1894 }
1895 
1896 /*
1897  * Handle a new request or macro.
1898  * May be called outside any scope or from inside a conditional scope.
1899  */
1900 static int
1901 roff_req_or_macro(ROFF_ARGS) {
1902 
1903 	/* For now, tables ignore most macros and some request. */
1904 
1905 	if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1906 	    tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1907 	    tok == ROFF_sp)) {
1908 		mandoc_msg(MANDOCERR_TBLMACRO,
1909 		    ln, ppos, "%s", buf->buf + ppos);
1910 		if (tok != TOKEN_NONE)
1911 			return ROFF_IGN;
1912 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1913 			pos++;
1914 		while (buf->buf[pos] == ' ')
1915 			pos++;
1916 		tbl_read(r->tbl, ln, buf->buf, pos);
1917 		roff_addtbl(r->man, ln, r->tbl);
1918 		return ROFF_IGN;
1919 	}
1920 
1921 	/* For now, let high level macros abort .ce mode. */
1922 
1923 	if (roffce_node != NULL &&
1924 	    (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1925 	     tok == ROFF_TH || tok == ROFF_TS)) {
1926 		r->man->last = roffce_node;
1927 		r->man->next = ROFF_NEXT_SIBLING;
1928 		roffce_lines = 0;
1929 		roffce_node = NULL;
1930 	}
1931 
1932 	/*
1933 	 * This is neither a roff request nor a user-defined macro.
1934 	 * Let the standard macro set parsers handle it.
1935 	 */
1936 
1937 	if (tok == TOKEN_NONE)
1938 		return ROFF_CONT;
1939 
1940 	/* Execute a roff request or a user-defined macro. */
1941 
1942 	return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1943 }
1944 
1945 /*
1946  * Internal interface function to tell the roff parser that execution
1947  * of the current macro ended.  This is required because macro
1948  * definitions usually do not end with a .return request.
1949  */
1950 void
1951 roff_userret(struct roff *r)
1952 {
1953 	struct mctx	*ctx;
1954 	int		 i;
1955 
1956 	assert(r->mstackpos >= 0);
1957 	ctx = r->mstack + r->mstackpos;
1958 	for (i = 0; i < ctx->argc; i++)
1959 		free(ctx->argv[i]);
1960 	ctx->argc = 0;
1961 	r->mstackpos--;
1962 }
1963 
1964 void
1965 roff_endparse(struct roff *r)
1966 {
1967 	if (r->last != NULL)
1968 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1969 		    r->last->col, "%s", roff_name[r->last->tok]);
1970 
1971 	if (r->eqn != NULL) {
1972 		mandoc_msg(MANDOCERR_BLK_NOEND,
1973 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1974 		eqn_parse(r->eqn);
1975 		r->eqn = NULL;
1976 	}
1977 
1978 	if (r->tbl != NULL) {
1979 		tbl_end(r->tbl, 1);
1980 		r->tbl = NULL;
1981 	}
1982 }
1983 
1984 /*
1985  * Parse the request or macro name at buf[*pos].
1986  * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1987  * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1988  * As a side effect, set r->current_string to the definition or to NULL.
1989  */
1990 static enum roff_tok
1991 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1992 {
1993 	char		*cp;
1994 	const char	*mac;
1995 	size_t		 maclen;
1996 	int		 deftype;
1997 	enum roff_tok	 t;
1998 
1999 	cp = buf + *pos;
2000 
2001 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2002 		return TOKEN_NONE;
2003 
2004 	mac = cp;
2005 	maclen = roff_getname(r, &cp, ln, ppos);
2006 
2007 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2008 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2009 	switch (deftype) {
2010 	case ROFFDEF_USER:
2011 		t = ROFF_USERDEF;
2012 		break;
2013 	case ROFFDEF_REN:
2014 		t = ROFF_RENAMED;
2015 		break;
2016 	default:
2017 		t = roffhash_find(r->reqtab, mac, maclen);
2018 		break;
2019 	}
2020 	if (t != TOKEN_NONE)
2021 		*pos = cp - buf;
2022 	else if (deftype == ROFFDEF_UNDEF) {
2023 		/* Using an undefined macro defines it to be empty. */
2024 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2025 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2026 	}
2027 	return t;
2028 }
2029 
2030 /* --- handling of request blocks ----------------------------------------- */
2031 
2032 /*
2033  * Close a macro definition block or an "ignore" block.
2034  */
2035 static int
2036 roff_cblock(ROFF_ARGS)
2037 {
2038 	int	 rr;
2039 
2040 	if (r->last == NULL) {
2041 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2042 		return ROFF_IGN;
2043 	}
2044 
2045 	switch (r->last->tok) {
2046 	case ROFF_am:
2047 	case ROFF_ami:
2048 	case ROFF_de:
2049 	case ROFF_dei:
2050 	case ROFF_ig:
2051 		break;
2052 	case ROFF_am1:
2053 	case ROFF_de1:
2054 		/* Remapped in roff_block(). */
2055 		abort();
2056 	default:
2057 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2058 		return ROFF_IGN;
2059 	}
2060 
2061 	roffnode_pop(r);
2062 	roffnode_cleanscope(r);
2063 
2064 	/*
2065 	 * If a conditional block with braces is still open,
2066 	 * check for "\}" block end markers.
2067 	 */
2068 
2069 	if (r->last != NULL && r->last->endspan < 0) {
2070 		rr = 1;  /* If arguments follow "\}", warn about them. */
2071 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2072 	}
2073 
2074 	if (buf->buf[pos] != '\0')
2075 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2076 		    ".. %s", buf->buf + pos);
2077 
2078 	return ROFF_IGN;
2079 }
2080 
2081 /*
2082  * Pop all nodes ending at the end of the current input line.
2083  * Return the number of loops ended.
2084  */
2085 static int
2086 roffnode_cleanscope(struct roff *r)
2087 {
2088 	int inloop;
2089 
2090 	inloop = 0;
2091 	while (r->last != NULL && r->last->endspan > 0) {
2092 		if (--r->last->endspan != 0)
2093 			break;
2094 		inloop += roffnode_pop(r);
2095 	}
2096 	return inloop;
2097 }
2098 
2099 /*
2100  * Handle the closing "\}" of a conditional block.
2101  * Apart from generating warnings, this only pops nodes.
2102  * Return the number of loops ended.
2103  */
2104 static int
2105 roff_ccond(struct roff *r, int ln, int ppos)
2106 {
2107 	if (NULL == r->last) {
2108 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2109 		return 0;
2110 	}
2111 
2112 	switch (r->last->tok) {
2113 	case ROFF_el:
2114 	case ROFF_ie:
2115 	case ROFF_if:
2116 	case ROFF_while:
2117 		break;
2118 	default:
2119 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2120 		return 0;
2121 	}
2122 
2123 	if (r->last->endspan > -1) {
2124 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2125 		return 0;
2126 	}
2127 
2128 	return roffnode_pop(r) + roffnode_cleanscope(r);
2129 }
2130 
2131 static int
2132 roff_block(ROFF_ARGS)
2133 {
2134 	const char	*name, *value;
2135 	char		*call, *cp, *iname, *rname;
2136 	size_t		 csz, namesz, rsz;
2137 	int		 deftype;
2138 
2139 	/* Ignore groff compatibility mode for now. */
2140 
2141 	if (tok == ROFF_de1)
2142 		tok = ROFF_de;
2143 	else if (tok == ROFF_dei1)
2144 		tok = ROFF_dei;
2145 	else if (tok == ROFF_am1)
2146 		tok = ROFF_am;
2147 	else if (tok == ROFF_ami1)
2148 		tok = ROFF_ami;
2149 
2150 	/* Parse the macro name argument. */
2151 
2152 	cp = buf->buf + pos;
2153 	if (tok == ROFF_ig) {
2154 		iname = NULL;
2155 		namesz = 0;
2156 	} else {
2157 		iname = cp;
2158 		namesz = roff_getname(r, &cp, ln, ppos);
2159 		iname[namesz] = '\0';
2160 	}
2161 
2162 	/* Resolve the macro name argument if it is indirect. */
2163 
2164 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2165 		deftype = ROFFDEF_USER;
2166 		name = roff_getstrn(r, iname, namesz, &deftype);
2167 		if (name == NULL) {
2168 			mandoc_msg(MANDOCERR_STR_UNDEF,
2169 			    ln, (int)(iname - buf->buf),
2170 			    "%.*s", (int)namesz, iname);
2171 			namesz = 0;
2172 		} else
2173 			namesz = strlen(name);
2174 	} else
2175 		name = iname;
2176 
2177 	if (namesz == 0 && tok != ROFF_ig) {
2178 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2179 		    ln, ppos, "%s", roff_name[tok]);
2180 		return ROFF_IGN;
2181 	}
2182 
2183 	roffnode_push(r, tok, name, ln, ppos);
2184 
2185 	/*
2186 	 * At the beginning of a `de' macro, clear the existing string
2187 	 * with the same name, if there is one.  New content will be
2188 	 * appended from roff_block_text() in multiline mode.
2189 	 */
2190 
2191 	if (tok == ROFF_de || tok == ROFF_dei) {
2192 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2193 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2194 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2195 		deftype = ROFFDEF_ANY;
2196 		value = roff_getstrn(r, iname, namesz, &deftype);
2197 		switch (deftype) {  /* Before appending, ... */
2198 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2199 			roff_setstrn(&r->strtab, name, namesz,
2200 			    value, strlen(value), 0);
2201 			break;
2202 		case ROFFDEF_REN: /* call original standard macro. */
2203 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2204 			    (int)strlen(value), value);
2205 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2206 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2207 			free(call);
2208 			break;
2209 		case ROFFDEF_STD:  /* rename and call standard macro. */
2210 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2211 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2212 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2213 			    (int)rsz, rname);
2214 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2215 			free(call);
2216 			free(rname);
2217 			break;
2218 		default:
2219 			break;
2220 		}
2221 	}
2222 
2223 	if (*cp == '\0')
2224 		return ROFF_IGN;
2225 
2226 	/* Get the custom end marker. */
2227 
2228 	iname = cp;
2229 	namesz = roff_getname(r, &cp, ln, ppos);
2230 
2231 	/* Resolve the end marker if it is indirect. */
2232 
2233 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2234 		deftype = ROFFDEF_USER;
2235 		name = roff_getstrn(r, iname, namesz, &deftype);
2236 		if (name == NULL) {
2237 			mandoc_msg(MANDOCERR_STR_UNDEF,
2238 			    ln, (int)(iname - buf->buf),
2239 			    "%.*s", (int)namesz, iname);
2240 			namesz = 0;
2241 		} else
2242 			namesz = strlen(name);
2243 	} else
2244 		name = iname;
2245 
2246 	if (namesz)
2247 		r->last->end = mandoc_strndup(name, namesz);
2248 
2249 	if (*cp != '\0')
2250 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2251 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2252 
2253 	return ROFF_IGN;
2254 }
2255 
2256 static int
2257 roff_block_sub(ROFF_ARGS)
2258 {
2259 	enum roff_tok	t;
2260 	int		i, j;
2261 
2262 	/*
2263 	 * If a custom end marker is a user-defined or predefined macro
2264 	 * or a request, interpret it.
2265 	 */
2266 
2267 	if (r->last->end) {
2268 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2269 			if (buf->buf[i] != r->last->end[j])
2270 				break;
2271 
2272 		if (r->last->end[j] == '\0' &&
2273 		    (buf->buf[i] == '\0' ||
2274 		     buf->buf[i] == ' ' ||
2275 		     buf->buf[i] == '\t')) {
2276 			roffnode_pop(r);
2277 			roffnode_cleanscope(r);
2278 
2279 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2280 				i++;
2281 
2282 			pos = i;
2283 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2284 			    TOKEN_NONE)
2285 				return ROFF_RERUN;
2286 			return ROFF_IGN;
2287 		}
2288 	}
2289 
2290 	/* Handle the standard end marker. */
2291 
2292 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2293 	if (t == ROFF_cblock)
2294 		return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2295 
2296 	/* Not an end marker, so append the line to the block. */
2297 
2298 	if (tok != ROFF_ig)
2299 		roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2300 	return ROFF_IGN;
2301 }
2302 
2303 static int
2304 roff_block_text(ROFF_ARGS)
2305 {
2306 
2307 	if (tok != ROFF_ig)
2308 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2309 
2310 	return ROFF_IGN;
2311 }
2312 
2313 /*
2314  * Check for a closing "\}" and handle it.
2315  * In this function, the final "int *offs" argument is used for
2316  * different purposes than elsewhere:
2317  * Input: *offs == 0: caller wants to discard arguments following \}
2318  *        *offs == 1: caller wants to preserve text following \}
2319  * Output: *offs = 0: tell caller to discard input line
2320  *         *offs = 1: tell caller to use input line
2321  */
2322 static int
2323 roff_cond_checkend(ROFF_ARGS)
2324 {
2325 	char		*ep;
2326 	int		 endloop, irc, rr;
2327 
2328 	irc = ROFF_IGN;
2329 	rr = r->last->rule;
2330 	endloop = tok != ROFF_while ? ROFF_IGN :
2331 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2332 	if (roffnode_cleanscope(r))
2333 		irc |= endloop;
2334 
2335 	/*
2336 	 * If "\}" occurs on a macro line without a preceding macro or
2337 	 * a text line contains nothing else, drop the line completely.
2338 	 */
2339 
2340 	ep = buf->buf + pos;
2341 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2342 		rr = 0;
2343 
2344 	/*
2345 	 * The closing delimiter "\}" rewinds the conditional scope
2346 	 * but is otherwise ignored when interpreting the line.
2347 	 */
2348 
2349 	while ((ep = strchr(ep, '\\')) != NULL) {
2350 		switch (ep[1]) {
2351 		case '}':
2352 			if (ep[2] == '\0')
2353 				ep[0] = '\0';
2354 			else if (rr)
2355 				ep[1] = '&';
2356 			else
2357 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2358 			if (roff_ccond(r, ln, ep - buf->buf))
2359 				irc |= endloop;
2360 			break;
2361 		case '\0':
2362 			++ep;
2363 			break;
2364 		default:
2365 			ep += 2;
2366 			break;
2367 		}
2368 	}
2369 	*offs = rr;
2370 	return irc;
2371 }
2372 
2373 /*
2374  * Parse and process a request or macro line in conditional scope.
2375  */
2376 static int
2377 roff_cond_sub(ROFF_ARGS)
2378 {
2379 	struct roffnode	*bl;
2380 	int		 irc, rr, spos;
2381 	enum roff_tok	 t;
2382 
2383 	rr = 0;  /* If arguments follow "\}", skip them. */
2384 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2385 	spos = pos;
2386 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2387 
2388 	/*
2389 	 * Handle requests and macros if the conditional evaluated
2390 	 * to true or if they are structurally required.
2391 	 * The .break request is always handled specially.
2392 	 */
2393 
2394 	if (t == ROFF_break) {
2395 		if (irc & ROFF_LOOPMASK)
2396 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2397 		else if (rr) {
2398 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2399 				bl->rule = 0;
2400 				if (bl->tok == ROFF_while)
2401 					break;
2402 			}
2403 		}
2404 	} else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2405 		irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2406 		if (irc & ROFF_WHILE)
2407 			irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2408 	}
2409 	return irc;
2410 }
2411 
2412 /*
2413  * Parse and process a text line in conditional scope.
2414  */
2415 static int
2416 roff_cond_text(ROFF_ARGS)
2417 {
2418 	int	 irc, rr;
2419 
2420 	rr = 1;  /* If arguments follow "\}", preserve them. */
2421 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2422 	if (rr)
2423 		irc |= ROFF_CONT;
2424 	return irc;
2425 }
2426 
2427 /* --- handling of numeric and conditional expressions -------------------- */
2428 
2429 /*
2430  * Parse a single signed integer number.  Stop at the first non-digit.
2431  * If there is at least one digit, return success and advance the
2432  * parse point, else return failure and let the parse point unchanged.
2433  * Ignore overflows, treat them just like the C language.
2434  */
2435 static int
2436 roff_getnum(const char *v, int *pos, int *res, int flags)
2437 {
2438 	int	 myres, scaled, n, p;
2439 
2440 	if (NULL == res)
2441 		res = &myres;
2442 
2443 	p = *pos;
2444 	n = v[p] == '-';
2445 	if (n || v[p] == '+')
2446 		p++;
2447 
2448 	if (flags & ROFFNUM_WHITE)
2449 		while (isspace((unsigned char)v[p]))
2450 			p++;
2451 
2452 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2453 		*res = 10 * *res + v[p] - '0';
2454 	if (p == *pos + n)
2455 		return 0;
2456 
2457 	if (n)
2458 		*res = -*res;
2459 
2460 	/* Each number may be followed by one optional scaling unit. */
2461 
2462 	switch (v[p]) {
2463 	case 'f':
2464 		scaled = *res * 65536;
2465 		break;
2466 	case 'i':
2467 		scaled = *res * 240;
2468 		break;
2469 	case 'c':
2470 		scaled = *res * 240 / 2.54;
2471 		break;
2472 	case 'v':
2473 	case 'P':
2474 		scaled = *res * 40;
2475 		break;
2476 	case 'm':
2477 	case 'n':
2478 		scaled = *res * 24;
2479 		break;
2480 	case 'p':
2481 		scaled = *res * 10 / 3;
2482 		break;
2483 	case 'u':
2484 		scaled = *res;
2485 		break;
2486 	case 'M':
2487 		scaled = *res * 6 / 25;
2488 		break;
2489 	default:
2490 		scaled = *res;
2491 		p--;
2492 		break;
2493 	}
2494 	if (flags & ROFFNUM_SCALE)
2495 		*res = scaled;
2496 
2497 	*pos = p + 1;
2498 	return 1;
2499 }
2500 
2501 /*
2502  * Evaluate a string comparison condition.
2503  * The first character is the delimiter.
2504  * Succeed if the string up to its second occurrence
2505  * matches the string up to its third occurrence.
2506  * Advance the cursor after the third occurrence
2507  * or lacking that, to the end of the line.
2508  */
2509 static int
2510 roff_evalstrcond(const char *v, int *pos)
2511 {
2512 	const char	*s1, *s2, *s3;
2513 	int		 match;
2514 
2515 	match = 0;
2516 	s1 = v + *pos;		/* initial delimiter */
2517 	s2 = s1 + 1;		/* for scanning the first string */
2518 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2519 
2520 	if (NULL == s3)		/* found no middle delimiter */
2521 		goto out;
2522 
2523 	while ('\0' != *++s3) {
2524 		if (*s2 != *s3) {  /* mismatch */
2525 			s3 = strchr(s3, *s1);
2526 			break;
2527 		}
2528 		if (*s3 == *s1) {  /* found the final delimiter */
2529 			match = 1;
2530 			break;
2531 		}
2532 		s2++;
2533 	}
2534 
2535 out:
2536 	if (NULL == s3)
2537 		s3 = strchr(s2, '\0');
2538 	else if (*s3 != '\0')
2539 		s3++;
2540 	*pos = s3 - v;
2541 	return match;
2542 }
2543 
2544 /*
2545  * Evaluate an optionally negated single character, numerical,
2546  * or string condition.
2547  */
2548 static int
2549 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2550 {
2551 	const char	*start, *end;
2552 	char		*cp, *name;
2553 	size_t		 sz;
2554 	int		 deftype, len, number, savepos, istrue, wanttrue;
2555 
2556 	if ('!' == v[*pos]) {
2557 		wanttrue = 0;
2558 		(*pos)++;
2559 	} else
2560 		wanttrue = 1;
2561 
2562 	switch (v[*pos]) {
2563 	case '\0':
2564 		return 0;
2565 	case 'n':
2566 	case 'o':
2567 		(*pos)++;
2568 		return wanttrue;
2569 	case 'e':
2570 	case 't':
2571 	case 'v':
2572 		(*pos)++;
2573 		return !wanttrue;
2574 	case 'c':
2575 		do {
2576 			(*pos)++;
2577 		} while (v[*pos] == ' ');
2578 
2579 		/*
2580 		 * Quirk for groff compatibility:
2581 		 * The horizontal tab is neither available nor unavailable.
2582 		 */
2583 
2584 		if (v[*pos] == '\t') {
2585 			(*pos)++;
2586 			return 0;
2587 		}
2588 
2589 		/* Printable ASCII characters are available. */
2590 
2591 		if (v[*pos] != '\\') {
2592 			(*pos)++;
2593 			return wanttrue;
2594 		}
2595 
2596 		end = v + ++*pos;
2597 		switch (mandoc_escape(&end, &start, &len)) {
2598 		case ESCAPE_SPECIAL:
2599 			istrue = mchars_spec2cp(start, len) != -1;
2600 			break;
2601 		case ESCAPE_UNICODE:
2602 			istrue = 1;
2603 			break;
2604 		case ESCAPE_NUMBERED:
2605 			istrue = mchars_num2char(start, len) != -1;
2606 			break;
2607 		default:
2608 			istrue = !wanttrue;
2609 			break;
2610 		}
2611 		*pos = end - v;
2612 		return istrue == wanttrue;
2613 	case 'd':
2614 	case 'r':
2615 		cp = v + *pos + 1;
2616 		while (*cp == ' ')
2617 			cp++;
2618 		name = cp;
2619 		sz = roff_getname(r, &cp, ln, cp - v);
2620 		if (sz == 0)
2621 			istrue = 0;
2622 		else if (v[*pos] == 'r')
2623 			istrue = roff_hasregn(r, name, sz);
2624 		else {
2625 			deftype = ROFFDEF_ANY;
2626 		        roff_getstrn(r, name, sz, &deftype);
2627 			istrue = !!deftype;
2628 		}
2629 		*pos = (name + sz) - v;
2630 		return istrue == wanttrue;
2631 	default:
2632 		break;
2633 	}
2634 
2635 	savepos = *pos;
2636 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2637 		return (number > 0) == wanttrue;
2638 	else if (*pos == savepos)
2639 		return roff_evalstrcond(v, pos) == wanttrue;
2640 	else
2641 		return 0;
2642 }
2643 
2644 static int
2645 roff_line_ignore(ROFF_ARGS)
2646 {
2647 
2648 	return ROFF_IGN;
2649 }
2650 
2651 static int
2652 roff_insec(ROFF_ARGS)
2653 {
2654 
2655 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2656 	return ROFF_IGN;
2657 }
2658 
2659 static int
2660 roff_unsupp(ROFF_ARGS)
2661 {
2662 
2663 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2664 	return ROFF_IGN;
2665 }
2666 
2667 static int
2668 roff_cond(ROFF_ARGS)
2669 {
2670 	int	 irc;
2671 
2672 	roffnode_push(r, tok, NULL, ln, ppos);
2673 
2674 	/*
2675 	 * An `.el' has no conditional body: it will consume the value
2676 	 * of the current rstack entry set in prior `ie' calls or
2677 	 * defaults to DENY.
2678 	 *
2679 	 * If we're not an `el', however, then evaluate the conditional.
2680 	 */
2681 
2682 	r->last->rule = tok == ROFF_el ?
2683 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2684 	    roff_evalcond(r, ln, buf->buf, &pos);
2685 
2686 	/*
2687 	 * An if-else will put the NEGATION of the current evaluated
2688 	 * conditional into the stack of rules.
2689 	 */
2690 
2691 	if (tok == ROFF_ie) {
2692 		if (r->rstackpos + 1 == r->rstacksz) {
2693 			r->rstacksz += 16;
2694 			r->rstack = mandoc_reallocarray(r->rstack,
2695 			    r->rstacksz, sizeof(int));
2696 		}
2697 		r->rstack[++r->rstackpos] = !r->last->rule;
2698 	}
2699 
2700 	/* If the parent has false as its rule, then so do we. */
2701 
2702 	if (r->last->parent && !r->last->parent->rule)
2703 		r->last->rule = 0;
2704 
2705 	/*
2706 	 * Determine scope.
2707 	 * If there is nothing on the line after the conditional,
2708 	 * not even whitespace, use next-line scope.
2709 	 * Except that .while does not support next-line scope.
2710 	 */
2711 
2712 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2713 		r->last->endspan = 2;
2714 		goto out;
2715 	}
2716 
2717 	while (buf->buf[pos] == ' ')
2718 		pos++;
2719 
2720 	/* An opening brace requests multiline scope. */
2721 
2722 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2723 		r->last->endspan = -1;
2724 		pos += 2;
2725 		while (buf->buf[pos] == ' ')
2726 			pos++;
2727 		goto out;
2728 	}
2729 
2730 	/*
2731 	 * Anything else following the conditional causes
2732 	 * single-line scope.  Warn if the scope contains
2733 	 * nothing but trailing whitespace.
2734 	 */
2735 
2736 	if (buf->buf[pos] == '\0')
2737 		mandoc_msg(MANDOCERR_COND_EMPTY,
2738 		    ln, ppos, "%s", roff_name[tok]);
2739 
2740 	r->last->endspan = 1;
2741 
2742 out:
2743 	*offs = pos;
2744 	irc = ROFF_RERUN;
2745 	if (tok == ROFF_while)
2746 		irc |= ROFF_WHILE;
2747 	return irc;
2748 }
2749 
2750 static int
2751 roff_ds(ROFF_ARGS)
2752 {
2753 	char		*string;
2754 	const char	*name;
2755 	size_t		 namesz;
2756 
2757 	/* Ignore groff compatibility mode for now. */
2758 
2759 	if (tok == ROFF_ds1)
2760 		tok = ROFF_ds;
2761 	else if (tok == ROFF_as1)
2762 		tok = ROFF_as;
2763 
2764 	/*
2765 	 * The first word is the name of the string.
2766 	 * If it is empty or terminated by an escape sequence,
2767 	 * abort the `ds' request without defining anything.
2768 	 */
2769 
2770 	name = string = buf->buf + pos;
2771 	if (*name == '\0')
2772 		return ROFF_IGN;
2773 
2774 	namesz = roff_getname(r, &string, ln, pos);
2775 	switch (name[namesz]) {
2776 	case '\\':
2777 		return ROFF_IGN;
2778 	case '\t':
2779 		string = buf->buf + pos + namesz;
2780 		break;
2781 	default:
2782 		break;
2783 	}
2784 
2785 	/* Read past the initial double-quote, if any. */
2786 	if (*string == '"')
2787 		string++;
2788 
2789 	/* The rest is the value. */
2790 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2791 	    ROFF_as == tok);
2792 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2793 	return ROFF_IGN;
2794 }
2795 
2796 /*
2797  * Parse a single operator, one or two characters long.
2798  * If the operator is recognized, return success and advance the
2799  * parse point, else return failure and let the parse point unchanged.
2800  */
2801 static int
2802 roff_getop(const char *v, int *pos, char *res)
2803 {
2804 
2805 	*res = v[*pos];
2806 
2807 	switch (*res) {
2808 	case '+':
2809 	case '-':
2810 	case '*':
2811 	case '/':
2812 	case '%':
2813 	case '&':
2814 	case ':':
2815 		break;
2816 	case '<':
2817 		switch (v[*pos + 1]) {
2818 		case '=':
2819 			*res = 'l';
2820 			(*pos)++;
2821 			break;
2822 		case '>':
2823 			*res = '!';
2824 			(*pos)++;
2825 			break;
2826 		case '?':
2827 			*res = 'i';
2828 			(*pos)++;
2829 			break;
2830 		default:
2831 			break;
2832 		}
2833 		break;
2834 	case '>':
2835 		switch (v[*pos + 1]) {
2836 		case '=':
2837 			*res = 'g';
2838 			(*pos)++;
2839 			break;
2840 		case '?':
2841 			*res = 'a';
2842 			(*pos)++;
2843 			break;
2844 		default:
2845 			break;
2846 		}
2847 		break;
2848 	case '=':
2849 		if ('=' == v[*pos + 1])
2850 			(*pos)++;
2851 		break;
2852 	default:
2853 		return 0;
2854 	}
2855 	(*pos)++;
2856 
2857 	return *res;
2858 }
2859 
2860 /*
2861  * Evaluate either a parenthesized numeric expression
2862  * or a single signed integer number.
2863  */
2864 static int
2865 roff_evalpar(struct roff *r, int ln,
2866 	const char *v, int *pos, int *res, int flags)
2867 {
2868 
2869 	if ('(' != v[*pos])
2870 		return roff_getnum(v, pos, res, flags);
2871 
2872 	(*pos)++;
2873 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2874 		return 0;
2875 
2876 	/*
2877 	 * Omission of the closing parenthesis
2878 	 * is an error in validation mode,
2879 	 * but ignored in evaluation mode.
2880 	 */
2881 
2882 	if (')' == v[*pos])
2883 		(*pos)++;
2884 	else if (NULL == res)
2885 		return 0;
2886 
2887 	return 1;
2888 }
2889 
2890 /*
2891  * Evaluate a complete numeric expression.
2892  * Proceed left to right, there is no concept of precedence.
2893  */
2894 static int
2895 roff_evalnum(struct roff *r, int ln, const char *v,
2896 	int *pos, int *res, int flags)
2897 {
2898 	int		 mypos, operand2;
2899 	char		 operator;
2900 
2901 	if (NULL == pos) {
2902 		mypos = 0;
2903 		pos = &mypos;
2904 	}
2905 
2906 	if (flags & ROFFNUM_WHITE)
2907 		while (isspace((unsigned char)v[*pos]))
2908 			(*pos)++;
2909 
2910 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2911 		return 0;
2912 
2913 	while (1) {
2914 		if (flags & ROFFNUM_WHITE)
2915 			while (isspace((unsigned char)v[*pos]))
2916 				(*pos)++;
2917 
2918 		if ( ! roff_getop(v, pos, &operator))
2919 			break;
2920 
2921 		if (flags & ROFFNUM_WHITE)
2922 			while (isspace((unsigned char)v[*pos]))
2923 				(*pos)++;
2924 
2925 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2926 			return 0;
2927 
2928 		if (flags & ROFFNUM_WHITE)
2929 			while (isspace((unsigned char)v[*pos]))
2930 				(*pos)++;
2931 
2932 		if (NULL == res)
2933 			continue;
2934 
2935 		switch (operator) {
2936 		case '+':
2937 			*res += operand2;
2938 			break;
2939 		case '-':
2940 			*res -= operand2;
2941 			break;
2942 		case '*':
2943 			*res *= operand2;
2944 			break;
2945 		case '/':
2946 			if (operand2 == 0) {
2947 				mandoc_msg(MANDOCERR_DIVZERO,
2948 					ln, *pos, "%s", v);
2949 				*res = 0;
2950 				break;
2951 			}
2952 			*res /= operand2;
2953 			break;
2954 		case '%':
2955 			if (operand2 == 0) {
2956 				mandoc_msg(MANDOCERR_DIVZERO,
2957 					ln, *pos, "%s", v);
2958 				*res = 0;
2959 				break;
2960 			}
2961 			*res %= operand2;
2962 			break;
2963 		case '<':
2964 			*res = *res < operand2;
2965 			break;
2966 		case '>':
2967 			*res = *res > operand2;
2968 			break;
2969 		case 'l':
2970 			*res = *res <= operand2;
2971 			break;
2972 		case 'g':
2973 			*res = *res >= operand2;
2974 			break;
2975 		case '=':
2976 			*res = *res == operand2;
2977 			break;
2978 		case '!':
2979 			*res = *res != operand2;
2980 			break;
2981 		case '&':
2982 			*res = *res && operand2;
2983 			break;
2984 		case ':':
2985 			*res = *res || operand2;
2986 			break;
2987 		case 'i':
2988 			if (operand2 < *res)
2989 				*res = operand2;
2990 			break;
2991 		case 'a':
2992 			if (operand2 > *res)
2993 				*res = operand2;
2994 			break;
2995 		default:
2996 			abort();
2997 		}
2998 	}
2999 	return 1;
3000 }
3001 
3002 /* --- register management ------------------------------------------------ */
3003 
3004 void
3005 roff_setreg(struct roff *r, const char *name, int val, char sign)
3006 {
3007 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3008 }
3009 
3010 static void
3011 roff_setregn(struct roff *r, const char *name, size_t len,
3012     int val, char sign, int step)
3013 {
3014 	struct roffreg	*reg;
3015 
3016 	/* Search for an existing register with the same name. */
3017 	reg = r->regtab;
3018 
3019 	while (reg != NULL && (reg->key.sz != len ||
3020 	    strncmp(reg->key.p, name, len) != 0))
3021 		reg = reg->next;
3022 
3023 	if (NULL == reg) {
3024 		/* Create a new register. */
3025 		reg = mandoc_malloc(sizeof(struct roffreg));
3026 		reg->key.p = mandoc_strndup(name, len);
3027 		reg->key.sz = len;
3028 		reg->val = 0;
3029 		reg->step = 0;
3030 		reg->next = r->regtab;
3031 		r->regtab = reg;
3032 	}
3033 
3034 	if ('+' == sign)
3035 		reg->val += val;
3036 	else if ('-' == sign)
3037 		reg->val -= val;
3038 	else
3039 		reg->val = val;
3040 	if (step != INT_MIN)
3041 		reg->step = step;
3042 }
3043 
3044 /*
3045  * Handle some predefined read-only number registers.
3046  * For now, return -1 if the requested register is not predefined;
3047  * in case a predefined read-only register having the value -1
3048  * were to turn up, another special value would have to be chosen.
3049  */
3050 static int
3051 roff_getregro(const struct roff *r, const char *name)
3052 {
3053 
3054 	switch (*name) {
3055 	case '$':  /* Number of arguments of the last macro evaluated. */
3056 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3057 	case 'A':  /* ASCII approximation mode is always off. */
3058 		return 0;
3059 	case 'g':  /* Groff compatibility mode is always on. */
3060 		return 1;
3061 	case 'H':  /* Fixed horizontal resolution. */
3062 		return 24;
3063 	case 'j':  /* Always adjust left margin only. */
3064 		return 0;
3065 	case 'T':  /* Some output device is always defined. */
3066 		return 1;
3067 	case 'V':  /* Fixed vertical resolution. */
3068 		return 40;
3069 	default:
3070 		return -1;
3071 	}
3072 }
3073 
3074 int
3075 roff_getreg(struct roff *r, const char *name)
3076 {
3077 	return roff_getregn(r, name, strlen(name), '\0');
3078 }
3079 
3080 static int
3081 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3082 {
3083 	struct roffreg	*reg;
3084 	int		 val;
3085 
3086 	if ('.' == name[0] && 2 == len) {
3087 		val = roff_getregro(r, name + 1);
3088 		if (-1 != val)
3089 			return val;
3090 	}
3091 
3092 	for (reg = r->regtab; reg; reg = reg->next) {
3093 		if (len == reg->key.sz &&
3094 		    0 == strncmp(name, reg->key.p, len)) {
3095 			switch (sign) {
3096 			case '+':
3097 				reg->val += reg->step;
3098 				break;
3099 			case '-':
3100 				reg->val -= reg->step;
3101 				break;
3102 			default:
3103 				break;
3104 			}
3105 			return reg->val;
3106 		}
3107 	}
3108 
3109 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3110 	return 0;
3111 }
3112 
3113 static int
3114 roff_hasregn(const struct roff *r, const char *name, size_t len)
3115 {
3116 	struct roffreg	*reg;
3117 	int		 val;
3118 
3119 	if ('.' == name[0] && 2 == len) {
3120 		val = roff_getregro(r, name + 1);
3121 		if (-1 != val)
3122 			return 1;
3123 	}
3124 
3125 	for (reg = r->regtab; reg; reg = reg->next)
3126 		if (len == reg->key.sz &&
3127 		    0 == strncmp(name, reg->key.p, len))
3128 			return 1;
3129 
3130 	return 0;
3131 }
3132 
3133 static void
3134 roff_freereg(struct roffreg *reg)
3135 {
3136 	struct roffreg	*old_reg;
3137 
3138 	while (NULL != reg) {
3139 		free(reg->key.p);
3140 		old_reg = reg;
3141 		reg = reg->next;
3142 		free(old_reg);
3143 	}
3144 }
3145 
3146 static int
3147 roff_nr(ROFF_ARGS)
3148 {
3149 	char		*key, *val, *step;
3150 	size_t		 keysz;
3151 	int		 iv, is, len;
3152 	char		 sign;
3153 
3154 	key = val = buf->buf + pos;
3155 	if (*key == '\0')
3156 		return ROFF_IGN;
3157 
3158 	keysz = roff_getname(r, &val, ln, pos);
3159 	if (key[keysz] == '\\' || key[keysz] == '\t')
3160 		return ROFF_IGN;
3161 
3162 	sign = *val;
3163 	if (sign == '+' || sign == '-')
3164 		val++;
3165 
3166 	len = 0;
3167 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3168 		return ROFF_IGN;
3169 
3170 	step = val + len;
3171 	while (isspace((unsigned char)*step))
3172 		step++;
3173 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3174 		is = INT_MIN;
3175 
3176 	roff_setregn(r, key, keysz, iv, sign, is);
3177 	return ROFF_IGN;
3178 }
3179 
3180 static int
3181 roff_rr(ROFF_ARGS)
3182 {
3183 	struct roffreg	*reg, **prev;
3184 	char		*name, *cp;
3185 	size_t		 namesz;
3186 
3187 	name = cp = buf->buf + pos;
3188 	if (*name == '\0')
3189 		return ROFF_IGN;
3190 	namesz = roff_getname(r, &cp, ln, pos);
3191 	name[namesz] = '\0';
3192 
3193 	prev = &r->regtab;
3194 	while (1) {
3195 		reg = *prev;
3196 		if (reg == NULL || !strcmp(name, reg->key.p))
3197 			break;
3198 		prev = &reg->next;
3199 	}
3200 	if (reg != NULL) {
3201 		*prev = reg->next;
3202 		free(reg->key.p);
3203 		free(reg);
3204 	}
3205 	return ROFF_IGN;
3206 }
3207 
3208 /* --- handler functions for roff requests -------------------------------- */
3209 
3210 static int
3211 roff_rm(ROFF_ARGS)
3212 {
3213 	const char	 *name;
3214 	char		 *cp;
3215 	size_t		  namesz;
3216 
3217 	cp = buf->buf + pos;
3218 	while (*cp != '\0') {
3219 		name = cp;
3220 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3221 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3222 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3223 		if (name[namesz] == '\\' || name[namesz] == '\t')
3224 			break;
3225 	}
3226 	return ROFF_IGN;
3227 }
3228 
3229 static int
3230 roff_it(ROFF_ARGS)
3231 {
3232 	int		 iv;
3233 
3234 	/* Parse the number of lines. */
3235 
3236 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3237 		mandoc_msg(MANDOCERR_IT_NONUM,
3238 		    ln, ppos, "%s", buf->buf + 1);
3239 		return ROFF_IGN;
3240 	}
3241 
3242 	while (isspace((unsigned char)buf->buf[pos]))
3243 		pos++;
3244 
3245 	/*
3246 	 * Arm the input line trap.
3247 	 * Special-casing "an-trap" is an ugly workaround to cope
3248 	 * with DocBook stupidly fiddling with man(7) internals.
3249 	 */
3250 
3251 	roffit_lines = iv;
3252 	roffit_macro = mandoc_strdup(iv != 1 ||
3253 	    strcmp(buf->buf + pos, "an-trap") ?
3254 	    buf->buf + pos : "br");
3255 	return ROFF_IGN;
3256 }
3257 
3258 static int
3259 roff_Dd(ROFF_ARGS)
3260 {
3261 	int		 mask;
3262 	enum roff_tok	 t, te;
3263 
3264 	switch (tok) {
3265 	case ROFF_Dd:
3266 		tok = MDOC_Dd;
3267 		te = MDOC_MAX;
3268 		if (r->format == 0)
3269 			r->format = MPARSE_MDOC;
3270 		mask = MPARSE_MDOC | MPARSE_QUICK;
3271 		break;
3272 	case ROFF_TH:
3273 		tok = MAN_TH;
3274 		te = MAN_MAX;
3275 		if (r->format == 0)
3276 			r->format = MPARSE_MAN;
3277 		mask = MPARSE_QUICK;
3278 		break;
3279 	default:
3280 		abort();
3281 	}
3282 	if ((r->options & mask) == 0)
3283 		for (t = tok; t < te; t++)
3284 			roff_setstr(r, roff_name[t], NULL, 0);
3285 	return ROFF_CONT;
3286 }
3287 
3288 static int
3289 roff_TE(ROFF_ARGS)
3290 {
3291 	r->man->flags &= ~ROFF_NONOFILL;
3292 	if (r->tbl == NULL) {
3293 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3294 		return ROFF_IGN;
3295 	}
3296 	if (tbl_end(r->tbl, 0) == 0) {
3297 		r->tbl = NULL;
3298 		free(buf->buf);
3299 		buf->buf = mandoc_strdup(".sp");
3300 		buf->sz = 4;
3301 		*offs = 0;
3302 		return ROFF_REPARSE;
3303 	}
3304 	r->tbl = NULL;
3305 	return ROFF_IGN;
3306 }
3307 
3308 static int
3309 roff_T_(ROFF_ARGS)
3310 {
3311 
3312 	if (NULL == r->tbl)
3313 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3314 	else
3315 		tbl_restart(ln, ppos, r->tbl);
3316 
3317 	return ROFF_IGN;
3318 }
3319 
3320 /*
3321  * Handle in-line equation delimiters.
3322  */
3323 static int
3324 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3325 {
3326 	char		*cp1, *cp2;
3327 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3328 
3329 	/*
3330 	 * Outside equations, look for an opening delimiter.
3331 	 * If we are inside an equation, we already know it is
3332 	 * in-line, or this function wouldn't have been called;
3333 	 * so look for a closing delimiter.
3334 	 */
3335 
3336 	cp1 = buf->buf + pos;
3337 	cp2 = strchr(cp1, r->eqn == NULL ?
3338 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3339 	if (cp2 == NULL)
3340 		return ROFF_CONT;
3341 
3342 	*cp2++ = '\0';
3343 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3344 
3345 	/* Handle preceding text, protecting whitespace. */
3346 
3347 	if (*buf->buf != '\0') {
3348 		if (r->eqn == NULL)
3349 			bef_pr = "\\&";
3350 		bef_nl = "\n";
3351 	}
3352 
3353 	/*
3354 	 * Prepare replacing the delimiter with an equation macro
3355 	 * and drop leading white space from the equation.
3356 	 */
3357 
3358 	if (r->eqn == NULL) {
3359 		while (*cp2 == ' ')
3360 			cp2++;
3361 		mac = ".EQ";
3362 	} else
3363 		mac = ".EN";
3364 
3365 	/* Handle following text, protecting whitespace. */
3366 
3367 	if (*cp2 != '\0') {
3368 		aft_nl = "\n";
3369 		if (r->eqn != NULL)
3370 			aft_pr = "\\&";
3371 	}
3372 
3373 	/* Do the actual replacement. */
3374 
3375 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3376 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3377 	free(buf->buf);
3378 	buf->buf = cp1;
3379 
3380 	/* Toggle the in-line state of the eqn subsystem. */
3381 
3382 	r->eqn_inline = r->eqn == NULL;
3383 	return ROFF_REPARSE;
3384 }
3385 
3386 static int
3387 roff_EQ(ROFF_ARGS)
3388 {
3389 	struct roff_node	*n;
3390 
3391 	if (r->man->meta.macroset == MACROSET_MAN)
3392 		man_breakscope(r->man, ROFF_EQ);
3393 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3394 	if (ln > r->man->last->line)
3395 		n->flags |= NODE_LINE;
3396 	n->eqn = eqn_box_new();
3397 	roff_node_append(r->man, n);
3398 	r->man->next = ROFF_NEXT_SIBLING;
3399 
3400 	assert(r->eqn == NULL);
3401 	if (r->last_eqn == NULL)
3402 		r->last_eqn = eqn_alloc();
3403 	else
3404 		eqn_reset(r->last_eqn);
3405 	r->eqn = r->last_eqn;
3406 	r->eqn->node = n;
3407 
3408 	if (buf->buf[pos] != '\0')
3409 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3410 		    ".EQ %s", buf->buf + pos);
3411 
3412 	return ROFF_IGN;
3413 }
3414 
3415 static int
3416 roff_EN(ROFF_ARGS)
3417 {
3418 	if (r->eqn != NULL) {
3419 		eqn_parse(r->eqn);
3420 		r->eqn = NULL;
3421 	} else
3422 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3423 	if (buf->buf[pos] != '\0')
3424 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3425 		    "EN %s", buf->buf + pos);
3426 	return ROFF_IGN;
3427 }
3428 
3429 static int
3430 roff_TS(ROFF_ARGS)
3431 {
3432 	if (r->tbl != NULL) {
3433 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3434 		tbl_end(r->tbl, 0);
3435 	}
3436 	r->man->flags |= ROFF_NONOFILL;
3437 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3438 	if (r->last_tbl == NULL)
3439 		r->first_tbl = r->tbl;
3440 	r->last_tbl = r->tbl;
3441 	return ROFF_IGN;
3442 }
3443 
3444 static int
3445 roff_noarg(ROFF_ARGS)
3446 {
3447 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3448 		man_breakscope(r->man, tok);
3449 	if (tok == ROFF_brp)
3450 		tok = ROFF_br;
3451 	roff_elem_alloc(r->man, ln, ppos, tok);
3452 	if (buf->buf[pos] != '\0')
3453 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3454 		   "%s %s", roff_name[tok], buf->buf + pos);
3455 	if (tok == ROFF_nf)
3456 		r->man->flags |= ROFF_NOFILL;
3457 	else if (tok == ROFF_fi)
3458 		r->man->flags &= ~ROFF_NOFILL;
3459 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3460 	r->man->next = ROFF_NEXT_SIBLING;
3461 	return ROFF_IGN;
3462 }
3463 
3464 static int
3465 roff_onearg(ROFF_ARGS)
3466 {
3467 	struct roff_node	*n;
3468 	char			*cp;
3469 	int			 npos;
3470 
3471 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3472 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3473 	     tok == ROFF_ti))
3474 		man_breakscope(r->man, tok);
3475 
3476 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3477 		r->man->last = roffce_node;
3478 		r->man->next = ROFF_NEXT_SIBLING;
3479 	}
3480 
3481 	roff_elem_alloc(r->man, ln, ppos, tok);
3482 	n = r->man->last;
3483 
3484 	cp = buf->buf + pos;
3485 	if (*cp != '\0') {
3486 		while (*cp != '\0' && *cp != ' ')
3487 			cp++;
3488 		while (*cp == ' ')
3489 			*cp++ = '\0';
3490 		if (*cp != '\0')
3491 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3492 			    ln, (int)(cp - buf->buf),
3493 			    "%s ... %s", roff_name[tok], cp);
3494 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3495 	}
3496 
3497 	if (tok == ROFF_ce || tok == ROFF_rj) {
3498 		if (r->man->last->type == ROFFT_ELEM) {
3499 			roff_word_alloc(r->man, ln, pos, "1");
3500 			r->man->last->flags |= NODE_NOSRC;
3501 		}
3502 		npos = 0;
3503 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3504 		    &roffce_lines, 0) == 0) {
3505 			mandoc_msg(MANDOCERR_CE_NONUM,
3506 			    ln, pos, "ce %s", buf->buf + pos);
3507 			roffce_lines = 1;
3508 		}
3509 		if (roffce_lines < 1) {
3510 			r->man->last = r->man->last->parent;
3511 			roffce_node = NULL;
3512 			roffce_lines = 0;
3513 		} else
3514 			roffce_node = r->man->last->parent;
3515 	} else {
3516 		n->flags |= NODE_VALID | NODE_ENDED;
3517 		r->man->last = n;
3518 	}
3519 	n->flags |= NODE_LINE;
3520 	r->man->next = ROFF_NEXT_SIBLING;
3521 	return ROFF_IGN;
3522 }
3523 
3524 static int
3525 roff_manyarg(ROFF_ARGS)
3526 {
3527 	struct roff_node	*n;
3528 	char			*sp, *ep;
3529 
3530 	roff_elem_alloc(r->man, ln, ppos, tok);
3531 	n = r->man->last;
3532 
3533 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3534 		while (*ep != '\0' && *ep != ' ')
3535 			ep++;
3536 		while (*ep == ' ')
3537 			*ep++ = '\0';
3538 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3539 	}
3540 
3541 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3542 	r->man->last = n;
3543 	r->man->next = ROFF_NEXT_SIBLING;
3544 	return ROFF_IGN;
3545 }
3546 
3547 static int
3548 roff_als(ROFF_ARGS)
3549 {
3550 	char		*oldn, *newn, *end, *value;
3551 	size_t		 oldsz, newsz, valsz;
3552 
3553 	newn = oldn = buf->buf + pos;
3554 	if (*newn == '\0')
3555 		return ROFF_IGN;
3556 
3557 	newsz = roff_getname(r, &oldn, ln, pos);
3558 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3559 		return ROFF_IGN;
3560 
3561 	end = oldn;
3562 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3563 	if (oldsz == 0)
3564 		return ROFF_IGN;
3565 
3566 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3567 	    (int)oldsz, oldn);
3568 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3569 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3570 	free(value);
3571 	return ROFF_IGN;
3572 }
3573 
3574 /*
3575  * The .break request only makes sense inside conditionals,
3576  * and that case is already handled in roff_cond_sub().
3577  */
3578 static int
3579 roff_break(ROFF_ARGS)
3580 {
3581 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3582 	return ROFF_IGN;
3583 }
3584 
3585 static int
3586 roff_cc(ROFF_ARGS)
3587 {
3588 	const char	*p;
3589 
3590 	p = buf->buf + pos;
3591 
3592 	if (*p == '\0' || (r->control = *p++) == '.')
3593 		r->control = '\0';
3594 
3595 	if (*p != '\0')
3596 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3597 		    ln, p - buf->buf, "cc ... %s", p);
3598 
3599 	return ROFF_IGN;
3600 }
3601 
3602 static int
3603 roff_char(ROFF_ARGS)
3604 {
3605 	const char	*p, *kp, *vp;
3606 	size_t		 ksz, vsz;
3607 	int		 font;
3608 
3609 	/* Parse the character to be replaced. */
3610 
3611 	kp = buf->buf + pos;
3612 	p = kp + 1;
3613 	if (*kp == '\0' || (*kp == '\\' &&
3614 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3615 	    (*p != ' ' && *p != '\0')) {
3616 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3617 		return ROFF_IGN;
3618 	}
3619 	ksz = p - kp;
3620 	while (*p == ' ')
3621 		p++;
3622 
3623 	/*
3624 	 * If the replacement string contains a font escape sequence,
3625 	 * we have to restore the font at the end.
3626 	 */
3627 
3628 	vp = p;
3629 	vsz = strlen(p);
3630 	font = 0;
3631 	while (*p != '\0') {
3632 		if (*p++ != '\\')
3633 			continue;
3634 		switch (mandoc_escape(&p, NULL, NULL)) {
3635 		case ESCAPE_FONT:
3636 		case ESCAPE_FONTROMAN:
3637 		case ESCAPE_FONTITALIC:
3638 		case ESCAPE_FONTBOLD:
3639 		case ESCAPE_FONTBI:
3640 		case ESCAPE_FONTCR:
3641 		case ESCAPE_FONTCB:
3642 		case ESCAPE_FONTCI:
3643 		case ESCAPE_FONTPREV:
3644 			font++;
3645 			break;
3646 		default:
3647 			break;
3648 		}
3649 	}
3650 	if (font > 1)
3651 		mandoc_msg(MANDOCERR_CHAR_FONT,
3652 		    ln, (int)(vp - buf->buf), "%s", vp);
3653 
3654 	/*
3655 	 * Approximate the effect of .char using the .tr tables.
3656 	 * XXX In groff, .char and .tr interact differently.
3657 	 */
3658 
3659 	if (ksz == 1) {
3660 		if (r->xtab == NULL)
3661 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3662 		assert((unsigned int)*kp < 128);
3663 		free(r->xtab[(int)*kp].p);
3664 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3665 		    "%s%s", vp, font ? "\fP" : "");
3666 	} else {
3667 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3668 		if (font)
3669 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3670 	}
3671 	return ROFF_IGN;
3672 }
3673 
3674 static int
3675 roff_ec(ROFF_ARGS)
3676 {
3677 	const char	*p;
3678 
3679 	p = buf->buf + pos;
3680 	if (*p == '\0')
3681 		r->escape = '\\';
3682 	else {
3683 		r->escape = *p;
3684 		if (*++p != '\0')
3685 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3686 			    (int)(p - buf->buf), "ec ... %s", p);
3687 	}
3688 	return ROFF_IGN;
3689 }
3690 
3691 static int
3692 roff_eo(ROFF_ARGS)
3693 {
3694 	r->escape = '\0';
3695 	if (buf->buf[pos] != '\0')
3696 		mandoc_msg(MANDOCERR_ARG_SKIP,
3697 		    ln, pos, "eo %s", buf->buf + pos);
3698 	return ROFF_IGN;
3699 }
3700 
3701 static int
3702 roff_mc(ROFF_ARGS)
3703 {
3704 	struct roff_node	*n;
3705 	char			*cp;
3706 
3707 	/* Parse the first argument. */
3708 
3709 	cp = buf->buf + pos;
3710 	if (*cp != '\0')
3711 		cp++;
3712 	if (buf->buf[pos] == '\\') {
3713 		switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3714 		case ESCAPE_SPECIAL:
3715 		case ESCAPE_UNICODE:
3716 		case ESCAPE_NUMBERED:
3717 			break;
3718 		default:
3719 			*cp = '\0';
3720 			mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3721 			    "mc %s", buf->buf + pos);
3722 			buf->buf[pos] = '\0';
3723 			break;
3724 		}
3725 	}
3726 
3727 	/* Ignore additional arguments. */
3728 
3729 	while (*cp == ' ')
3730 		*cp++ = '\0';
3731 	if (*cp != '\0') {
3732 		mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3733 		    "mc ... %s", cp);
3734 		*cp = '\0';
3735 	}
3736 
3737 	/* Create the .mc node. */
3738 
3739 	roff_elem_alloc(r->man, ln, ppos, tok);
3740 	n = r->man->last;
3741 	if (buf->buf[pos] != '\0')
3742 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3743 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3744 	r->man->last = n;
3745 	r->man->next = ROFF_NEXT_SIBLING;
3746 	return ROFF_IGN;
3747 }
3748 
3749 static int
3750 roff_nop(ROFF_ARGS)
3751 {
3752 	while (buf->buf[pos] == ' ')
3753 		pos++;
3754 	*offs = pos;
3755 	return ROFF_RERUN;
3756 }
3757 
3758 static int
3759 roff_tr(ROFF_ARGS)
3760 {
3761 	const char	*p, *first, *second;
3762 	size_t		 fsz, ssz;
3763 
3764 	p = buf->buf + pos;
3765 
3766 	if (*p == '\0') {
3767 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3768 		return ROFF_IGN;
3769 	}
3770 
3771 	while (*p != '\0') {
3772 		fsz = ssz = 1;
3773 
3774 		first = p++;
3775 		if (*first == '\\') {
3776 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3777 				return ROFF_IGN;
3778 			fsz = (size_t)(p - first);
3779 		}
3780 
3781 		second = p++;
3782 		if (*second == '\\') {
3783 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3784 				return ROFF_IGN;
3785 			ssz = (size_t)(p - second);
3786 		} else if (*second == '\0') {
3787 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3788 			    (int)(first - buf->buf), "tr %s", first);
3789 			second = " ";
3790 			p--;
3791 		}
3792 
3793 		if (fsz > 1) {
3794 			roff_setstrn(&r->xmbtab, first, fsz,
3795 			    second, ssz, 0);
3796 			continue;
3797 		}
3798 
3799 		if (r->xtab == NULL)
3800 			r->xtab = mandoc_calloc(128,
3801 			    sizeof(struct roffstr));
3802 
3803 		free(r->xtab[(int)*first].p);
3804 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3805 		r->xtab[(int)*first].sz = ssz;
3806 	}
3807 
3808 	return ROFF_IGN;
3809 }
3810 
3811 /*
3812  * Implementation of the .return request.
3813  * There is no need to call roff_userret() from here.
3814  * The read module will call that after rewinding the reader stack
3815  * to the place from where the current macro was called.
3816  */
3817 static int
3818 roff_return(ROFF_ARGS)
3819 {
3820 	if (r->mstackpos >= 0)
3821 		return ROFF_IGN | ROFF_USERRET;
3822 
3823 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3824 	return ROFF_IGN;
3825 }
3826 
3827 static int
3828 roff_rn(ROFF_ARGS)
3829 {
3830 	const char	*value;
3831 	char		*oldn, *newn, *end;
3832 	size_t		 oldsz, newsz;
3833 	int		 deftype;
3834 
3835 	oldn = newn = buf->buf + pos;
3836 	if (*oldn == '\0')
3837 		return ROFF_IGN;
3838 
3839 	oldsz = roff_getname(r, &newn, ln, pos);
3840 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3841 		return ROFF_IGN;
3842 
3843 	end = newn;
3844 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3845 	if (newsz == 0)
3846 		return ROFF_IGN;
3847 
3848 	deftype = ROFFDEF_ANY;
3849 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3850 	switch (deftype) {
3851 	case ROFFDEF_USER:
3852 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3853 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3854 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3855 		break;
3856 	case ROFFDEF_PRE:
3857 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3858 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3859 		break;
3860 	case ROFFDEF_REN:
3861 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3862 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3863 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3864 		break;
3865 	case ROFFDEF_STD:
3866 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3867 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3868 		break;
3869 	default:
3870 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3871 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3872 		break;
3873 	}
3874 	return ROFF_IGN;
3875 }
3876 
3877 static int
3878 roff_shift(ROFF_ARGS)
3879 {
3880 	struct mctx	*ctx;
3881 	int		 argpos, levels, i;
3882 
3883 	argpos = pos;
3884 	levels = 1;
3885 	if (buf->buf[pos] != '\0' &&
3886 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3887 		mandoc_msg(MANDOCERR_CE_NONUM,
3888 		    ln, pos, "shift %s", buf->buf + pos);
3889 		levels = 1;
3890 	}
3891 	if (r->mstackpos < 0) {
3892 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3893 		return ROFF_IGN;
3894 	}
3895 	ctx = r->mstack + r->mstackpos;
3896 	if (levels > ctx->argc) {
3897 		mandoc_msg(MANDOCERR_SHIFT,
3898 		    ln, argpos, "%d, but max is %d", levels, ctx->argc);
3899 		levels = ctx->argc;
3900 	}
3901 	if (levels < 0) {
3902 		mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3903 		levels = 0;
3904 	}
3905 	if (levels == 0)
3906 		return ROFF_IGN;
3907 	for (i = 0; i < levels; i++)
3908 		free(ctx->argv[i]);
3909 	ctx->argc -= levels;
3910 	for (i = 0; i < ctx->argc; i++)
3911 		ctx->argv[i] = ctx->argv[i + levels];
3912 	return ROFF_IGN;
3913 }
3914 
3915 static int
3916 roff_so(ROFF_ARGS)
3917 {
3918 	char *name, *cp;
3919 
3920 	name = buf->buf + pos;
3921 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3922 
3923 	/*
3924 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3925 	 * opening anything that's not in our cwd or anything beneath
3926 	 * it.  Thus, explicitly disallow traversing up the file-system
3927 	 * or using absolute paths.
3928 	 */
3929 
3930 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3931 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3932 		buf->sz = mandoc_asprintf(&cp,
3933 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3934 		free(buf->buf);
3935 		buf->buf = cp;
3936 		*offs = 0;
3937 		return ROFF_REPARSE;
3938 	}
3939 
3940 	*offs = pos;
3941 	return ROFF_SO;
3942 }
3943 
3944 /* --- user defined strings and macros ------------------------------------ */
3945 
3946 static int
3947 roff_userdef(ROFF_ARGS)
3948 {
3949 	struct mctx	 *ctx;
3950 	char		 *arg, *ap, *dst, *src;
3951 	size_t		  sz;
3952 
3953 	/* If the macro is empty, ignore it altogether. */
3954 
3955 	if (*r->current_string == '\0')
3956 		return ROFF_IGN;
3957 
3958 	/* Initialize a new macro stack context. */
3959 
3960 	if (++r->mstackpos == r->mstacksz) {
3961 		r->mstack = mandoc_recallocarray(r->mstack,
3962 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3963 		r->mstacksz += 8;
3964 	}
3965 	ctx = r->mstack + r->mstackpos;
3966 	ctx->argc = 0;
3967 
3968 	/*
3969 	 * Collect pointers to macro argument strings,
3970 	 * NUL-terminating them and escaping quotes.
3971 	 */
3972 
3973 	src = buf->buf + pos;
3974 	while (*src != '\0') {
3975 		if (ctx->argc == ctx->argsz) {
3976 			ctx->argsz += 8;
3977 			ctx->argv = mandoc_reallocarray(ctx->argv,
3978 			    ctx->argsz, sizeof(*ctx->argv));
3979 		}
3980 		arg = roff_getarg(r, &src, ln, &pos);
3981 		sz = 1;  /* For the terminating NUL. */
3982 		for (ap = arg; *ap != '\0'; ap++)
3983 			sz += *ap == '"' ? 4 : 1;
3984 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3985 		for (ap = arg; *ap != '\0'; ap++) {
3986 			if (*ap == '"') {
3987 				memcpy(dst, "\\(dq", 4);
3988 				dst += 4;
3989 			} else
3990 				*dst++ = *ap;
3991 		}
3992 		*dst = '\0';
3993 		free(arg);
3994 	}
3995 
3996 	/* Replace the macro invocation by the macro definition. */
3997 
3998 	free(buf->buf);
3999 	buf->buf = mandoc_strdup(r->current_string);
4000 	buf->sz = strlen(buf->buf) + 1;
4001 	*offs = 0;
4002 
4003 	return buf->buf[buf->sz - 2] == '\n' ?
4004 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4005 }
4006 
4007 /*
4008  * Calling a high-level macro that was renamed with .rn.
4009  * r->current_string has already been set up by roff_parse().
4010  */
4011 static int
4012 roff_renamed(ROFF_ARGS)
4013 {
4014 	char	*nbuf;
4015 
4016 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4017 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4018 	free(buf->buf);
4019 	buf->buf = nbuf;
4020 	*offs = 0;
4021 	return ROFF_CONT;
4022 }
4023 
4024 /*
4025  * Measure the length in bytes of the roff identifier at *cpp
4026  * and advance the pointer to the next word.
4027  */
4028 static size_t
4029 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4030 {
4031 	char	 *name, *cp;
4032 	int	  namesz, inam, iend;
4033 
4034 	name = *cpp;
4035 	if (*name == '\0')
4036 		return 0;
4037 
4038 	/* Advance cp to the byte after the end of the name. */
4039 
4040 	cp = name;
4041 	namesz = 0;
4042 	for (;;) {
4043 		if (*cp == '\0')
4044 			break;
4045 		if (*cp == ' ' || *cp == '\t') {
4046 			cp++;
4047 			break;
4048 		}
4049 		if (*cp != '\\') {
4050 			if (name + namesz < cp) {
4051 				name[namesz] = *cp;
4052 				*cp = ' ';
4053 			}
4054 			namesz++;
4055 			cp++;
4056 			continue;
4057 		}
4058 		if (cp[1] == '{' || cp[1] == '}')
4059 			break;
4060 		if (roff_escape(cp, 0, 0, NULL, &inam,
4061 		    NULL, NULL, &iend) != ESCAPE_UNDEF) {
4062 			mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4063 			    "%.*s%.*s", namesz, name, iend, cp);
4064 			cp += iend;
4065 			break;
4066 		}
4067 
4068 		/*
4069 		 * In an identifier, \\, \., \G and so on
4070 		 * are reduced to \, ., G and so on,
4071 		 * vaguely similar to copy mode.
4072 		 */
4073 
4074 		name[namesz++] = cp[inam];
4075 		while (iend--) {
4076 			if (cp >= name + namesz)
4077 				*cp = ' ';
4078 			cp++;
4079 		}
4080 	}
4081 
4082 	/* Read past spaces. */
4083 
4084 	while (*cp == ' ')
4085 		cp++;
4086 
4087 	*cpp = cp;
4088 	return namesz;
4089 }
4090 
4091 /*
4092  * Store *string into the user-defined string called *name.
4093  * To clear an existing entry, call with (*r, *name, NULL, 0).
4094  * append == 0: replace mode
4095  * append == 1: single-line append mode
4096  * append == 2: multiline append mode, append '\n' after each call
4097  */
4098 static void
4099 roff_setstr(struct roff *r, const char *name, const char *string,
4100 	int append)
4101 {
4102 	size_t	 namesz;
4103 
4104 	namesz = strlen(name);
4105 	roff_setstrn(&r->strtab, name, namesz, string,
4106 	    string ? strlen(string) : 0, append);
4107 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4108 }
4109 
4110 static void
4111 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4112 		const char *string, size_t stringsz, int append)
4113 {
4114 	struct roffkv	*n;
4115 	char		*c;
4116 	int		 i;
4117 	size_t		 oldch, newch;
4118 
4119 	/* Search for an existing string with the same name. */
4120 	n = *r;
4121 
4122 	while (n && (namesz != n->key.sz ||
4123 			strncmp(n->key.p, name, namesz)))
4124 		n = n->next;
4125 
4126 	if (NULL == n) {
4127 		/* Create a new string table entry. */
4128 		n = mandoc_malloc(sizeof(struct roffkv));
4129 		n->key.p = mandoc_strndup(name, namesz);
4130 		n->key.sz = namesz;
4131 		n->val.p = NULL;
4132 		n->val.sz = 0;
4133 		n->next = *r;
4134 		*r = n;
4135 	} else if (0 == append) {
4136 		free(n->val.p);
4137 		n->val.p = NULL;
4138 		n->val.sz = 0;
4139 	}
4140 
4141 	if (NULL == string)
4142 		return;
4143 
4144 	/*
4145 	 * One additional byte for the '\n' in multiline mode,
4146 	 * and one for the terminating '\0'.
4147 	 */
4148 	newch = stringsz + (1 < append ? 2u : 1u);
4149 
4150 	if (NULL == n->val.p) {
4151 		n->val.p = mandoc_malloc(newch);
4152 		*n->val.p = '\0';
4153 		oldch = 0;
4154 	} else {
4155 		oldch = n->val.sz;
4156 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4157 	}
4158 
4159 	/* Skip existing content in the destination buffer. */
4160 	c = n->val.p + (int)oldch;
4161 
4162 	/* Append new content to the destination buffer. */
4163 	i = 0;
4164 	while (i < (int)stringsz) {
4165 		/*
4166 		 * Rudimentary roff copy mode:
4167 		 * Handle escaped backslashes.
4168 		 */
4169 		if ('\\' == string[i] && '\\' == string[i + 1])
4170 			i++;
4171 		*c++ = string[i++];
4172 	}
4173 
4174 	/* Append terminating bytes. */
4175 	if (1 < append)
4176 		*c++ = '\n';
4177 
4178 	*c = '\0';
4179 	n->val.sz = (int)(c - n->val.p);
4180 }
4181 
4182 static const char *
4183 roff_getstrn(struct roff *r, const char *name, size_t len,
4184     int *deftype)
4185 {
4186 	const struct roffkv	*n;
4187 	int			 found, i;
4188 	enum roff_tok		 tok;
4189 
4190 	found = 0;
4191 	for (n = r->strtab; n != NULL; n = n->next) {
4192 		if (strncmp(name, n->key.p, len) != 0 ||
4193 		    n->key.p[len] != '\0' || n->val.p == NULL)
4194 			continue;
4195 		if (*deftype & ROFFDEF_USER) {
4196 			*deftype = ROFFDEF_USER;
4197 			return n->val.p;
4198 		} else {
4199 			found = 1;
4200 			break;
4201 		}
4202 	}
4203 	for (n = r->rentab; n != NULL; n = n->next) {
4204 		if (strncmp(name, n->key.p, len) != 0 ||
4205 		    n->key.p[len] != '\0' || n->val.p == NULL)
4206 			continue;
4207 		if (*deftype & ROFFDEF_REN) {
4208 			*deftype = ROFFDEF_REN;
4209 			return n->val.p;
4210 		} else {
4211 			found = 1;
4212 			break;
4213 		}
4214 	}
4215 	for (i = 0; i < PREDEFS_MAX; i++) {
4216 		if (strncmp(name, predefs[i].name, len) != 0 ||
4217 		    predefs[i].name[len] != '\0')
4218 			continue;
4219 		if (*deftype & ROFFDEF_PRE) {
4220 			*deftype = ROFFDEF_PRE;
4221 			return predefs[i].str;
4222 		} else {
4223 			found = 1;
4224 			break;
4225 		}
4226 	}
4227 	if (r->man->meta.macroset != MACROSET_MAN) {
4228 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4229 			if (strncmp(name, roff_name[tok], len) != 0 ||
4230 			    roff_name[tok][len] != '\0')
4231 				continue;
4232 			if (*deftype & ROFFDEF_STD) {
4233 				*deftype = ROFFDEF_STD;
4234 				return NULL;
4235 			} else {
4236 				found = 1;
4237 				break;
4238 			}
4239 		}
4240 	}
4241 	if (r->man->meta.macroset != MACROSET_MDOC) {
4242 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4243 			if (strncmp(name, roff_name[tok], len) != 0 ||
4244 			    roff_name[tok][len] != '\0')
4245 				continue;
4246 			if (*deftype & ROFFDEF_STD) {
4247 				*deftype = ROFFDEF_STD;
4248 				return NULL;
4249 			} else {
4250 				found = 1;
4251 				break;
4252 			}
4253 		}
4254 	}
4255 
4256 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4257 		if (*deftype & ROFFDEF_REN) {
4258 			/*
4259 			 * This might still be a request,
4260 			 * so do not treat it as undefined yet.
4261 			 */
4262 			*deftype = ROFFDEF_UNDEF;
4263 			return NULL;
4264 		}
4265 
4266 		/* Using an undefined string defines it to be empty. */
4267 
4268 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4269 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4270 	}
4271 
4272 	*deftype = 0;
4273 	return NULL;
4274 }
4275 
4276 static void
4277 roff_freestr(struct roffkv *r)
4278 {
4279 	struct roffkv	 *n, *nn;
4280 
4281 	for (n = r; n; n = nn) {
4282 		free(n->key.p);
4283 		free(n->val.p);
4284 		nn = n->next;
4285 		free(n);
4286 	}
4287 }
4288 
4289 /* --- accessors and utility functions ------------------------------------ */
4290 
4291 /*
4292  * Duplicate an input string, making the appropriate character
4293  * conversations (as stipulated by `tr') along the way.
4294  * Returns a heap-allocated string with all the replacements made.
4295  */
4296 char *
4297 roff_strdup(const struct roff *r, const char *p)
4298 {
4299 	const struct roffkv *cp;
4300 	char		*res;
4301 	const char	*pp;
4302 	size_t		 ssz, sz;
4303 	enum mandoc_esc	 esc;
4304 
4305 	if (NULL == r->xmbtab && NULL == r->xtab)
4306 		return mandoc_strdup(p);
4307 	else if ('\0' == *p)
4308 		return mandoc_strdup("");
4309 
4310 	/*
4311 	 * Step through each character looking for term matches
4312 	 * (remember that a `tr' can be invoked with an escape, which is
4313 	 * a glyph but the escape is multi-character).
4314 	 * We only do this if the character hash has been initialised
4315 	 * and the string is >0 length.
4316 	 */
4317 
4318 	res = NULL;
4319 	ssz = 0;
4320 
4321 	while ('\0' != *p) {
4322 		assert((unsigned int)*p < 128);
4323 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4324 			sz = r->xtab[(int)*p].sz;
4325 			res = mandoc_realloc(res, ssz + sz + 1);
4326 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4327 			ssz += sz;
4328 			p++;
4329 			continue;
4330 		} else if ('\\' != *p) {
4331 			res = mandoc_realloc(res, ssz + 2);
4332 			res[ssz++] = *p++;
4333 			continue;
4334 		}
4335 
4336 		/* Search for term matches. */
4337 		for (cp = r->xmbtab; cp; cp = cp->next)
4338 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4339 				break;
4340 
4341 		if (NULL != cp) {
4342 			/*
4343 			 * A match has been found.
4344 			 * Append the match to the array and move
4345 			 * forward by its keysize.
4346 			 */
4347 			res = mandoc_realloc(res,
4348 			    ssz + cp->val.sz + 1);
4349 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4350 			ssz += cp->val.sz;
4351 			p += (int)cp->key.sz;
4352 			continue;
4353 		}
4354 
4355 		/*
4356 		 * Handle escapes carefully: we need to copy
4357 		 * over just the escape itself, or else we might
4358 		 * do replacements within the escape itself.
4359 		 * Make sure to pass along the bogus string.
4360 		 */
4361 		pp = p++;
4362 		esc = mandoc_escape(&p, NULL, NULL);
4363 		if (ESCAPE_ERROR == esc) {
4364 			sz = strlen(pp);
4365 			res = mandoc_realloc(res, ssz + sz + 1);
4366 			memcpy(res + ssz, pp, sz);
4367 			break;
4368 		}
4369 		/*
4370 		 * We bail out on bad escapes.
4371 		 * No need to warn: we already did so when
4372 		 * roff_expand() was called.
4373 		 */
4374 		sz = (int)(p - pp);
4375 		res = mandoc_realloc(res, ssz + sz + 1);
4376 		memcpy(res + ssz, pp, sz);
4377 		ssz += sz;
4378 	}
4379 
4380 	res[(int)ssz] = '\0';
4381 	return res;
4382 }
4383 
4384 int
4385 roff_getformat(const struct roff *r)
4386 {
4387 
4388 	return r->format;
4389 }
4390 
4391 /*
4392  * Find out whether a line is a macro line or not.
4393  * If it is, adjust the current position and return one; if it isn't,
4394  * return zero and don't change the current position.
4395  * If the control character has been set with `.cc', then let that grain
4396  * precedence.
4397  * This is slightly contrary to groff, where using the non-breaking
4398  * control character when `cc' has been invoked will cause the
4399  * non-breaking macro contents to be printed verbatim.
4400  */
4401 int
4402 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4403 {
4404 	int		pos;
4405 
4406 	pos = *ppos;
4407 
4408 	if (r->control != '\0' && cp[pos] == r->control)
4409 		pos++;
4410 	else if (r->control != '\0')
4411 		return 0;
4412 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4413 		pos += 2;
4414 	else if ('.' == cp[pos] || '\'' == cp[pos])
4415 		pos++;
4416 	else
4417 		return 0;
4418 
4419 	while (' ' == cp[pos] || '\t' == cp[pos])
4420 		pos++;
4421 
4422 	*ppos = pos;
4423 	return 1;
4424 }
4425