xref: /freebsd/contrib/mandoc/roff.c (revision 13ea0450a9c8742119d36f3bf8f47accdce46e54)
1 /*	$Id: roff.c,v 1.329 2018/08/01 15:40:17 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38 
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define	EXPAND_LIMIT	1000
41 
42 /* Types of definitions of macros and strings. */
43 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
44 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
45 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
46 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
47 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
48 			 ROFFDEF_REN | ROFFDEF_STD)
49 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
50 
51 /* --- data types --------------------------------------------------------- */
52 
53 /*
54  * An incredibly-simple string buffer.
55  */
56 struct	roffstr {
57 	char		*p; /* nil-terminated buffer */
58 	size_t		 sz; /* saved strlen(p) */
59 };
60 
61 /*
62  * A key-value roffstr pair as part of a singly-linked list.
63  */
64 struct	roffkv {
65 	struct roffstr	 key;
66 	struct roffstr	 val;
67 	struct roffkv	*next; /* next in list */
68 };
69 
70 /*
71  * A single number register as part of a singly-linked list.
72  */
73 struct	roffreg {
74 	struct roffstr	 key;
75 	int		 val;
76 	int		 step;
77 	struct roffreg	*next;
78 };
79 
80 /*
81  * Association of request and macro names with token IDs.
82  */
83 struct	roffreq {
84 	enum roff_tok	 tok;
85 	char		 name[];
86 };
87 
88 struct	roff {
89 	struct mparse	*parse; /* parse point */
90 	struct roff_man	*man; /* mdoc or man parser */
91 	struct roffnode	*last; /* leaf of stack */
92 	int		*rstack; /* stack of inverted `ie' values */
93 	struct ohash	*reqtab; /* request lookup table */
94 	struct roffreg	*regtab; /* number registers */
95 	struct roffkv	*strtab; /* user-defined strings & macros */
96 	struct roffkv	*rentab; /* renamed strings & macros */
97 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
98 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
99 	const char	*current_string; /* value of last called user macro */
100 	struct tbl_node	*first_tbl; /* first table parsed */
101 	struct tbl_node	*last_tbl; /* last table parsed */
102 	struct tbl_node	*tbl; /* current table being parsed */
103 	struct eqn_node	*last_eqn; /* equation parser */
104 	struct eqn_node	*eqn; /* active equation parser */
105 	int		 eqn_inline; /* current equation is inline */
106 	int		 options; /* parse options */
107 	int		 rstacksz; /* current size limit of rstack */
108 	int		 rstackpos; /* position in rstack */
109 	int		 format; /* current file in mdoc or man format */
110 	int		 argc; /* number of args of the last macro */
111 	char		 control; /* control character */
112 	char		 escape; /* escape character */
113 };
114 
115 struct	roffnode {
116 	enum roff_tok	 tok; /* type of node */
117 	struct roffnode	*parent; /* up one in stack */
118 	int		 line; /* parse line */
119 	int		 col; /* parse col */
120 	char		*name; /* node name, e.g. macro name */
121 	char		*end; /* end-rules: custom token */
122 	int		 endspan; /* end-rules: next-line or infty */
123 	int		 rule; /* current evaluation rule */
124 };
125 
126 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
127 			 enum roff_tok tok, /* tok of macro */ \
128 			 struct buf *buf, /* input buffer */ \
129 			 int ln, /* parse line */ \
130 			 int ppos, /* original pos in buffer */ \
131 			 int pos, /* current pos in buffer */ \
132 			 int *offs /* reset offset of buffer data */
133 
134 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
135 
136 struct	roffmac {
137 	roffproc	 proc; /* process new macro */
138 	roffproc	 text; /* process as child text of macro */
139 	roffproc	 sub; /* process as child of macro */
140 	int		 flags;
141 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
142 };
143 
144 struct	predef {
145 	const char	*name; /* predefined input name */
146 	const char	*str; /* replacement symbol */
147 };
148 
149 #define	PREDEF(__name, __str) \
150 	{ (__name), (__str) },
151 
152 /* --- function prototypes ------------------------------------------------ */
153 
154 static	void		 roffnode_cleanscope(struct roff *);
155 static	void		 roffnode_pop(struct roff *);
156 static	void		 roffnode_push(struct roff *, enum roff_tok,
157 				const char *, int, int);
158 static	void		 roff_addtbl(struct roff_man *, struct tbl_node *);
159 static	enum rofferr	 roff_als(ROFF_ARGS);
160 static	enum rofferr	 roff_block(ROFF_ARGS);
161 static	enum rofferr	 roff_block_text(ROFF_ARGS);
162 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
163 static	enum rofferr	 roff_br(ROFF_ARGS);
164 static	enum rofferr	 roff_cblock(ROFF_ARGS);
165 static	enum rofferr	 roff_cc(ROFF_ARGS);
166 static	void		 roff_ccond(struct roff *, int, int);
167 static	enum rofferr	 roff_cond(ROFF_ARGS);
168 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
169 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
170 static	enum rofferr	 roff_ds(ROFF_ARGS);
171 static	enum rofferr	 roff_ec(ROFF_ARGS);
172 static	enum rofferr	 roff_eo(ROFF_ARGS);
173 static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
174 static	int		 roff_evalcond(struct roff *r, int, char *, int *);
175 static	int		 roff_evalnum(struct roff *, int,
176 				const char *, int *, int *, int);
177 static	int		 roff_evalpar(struct roff *, int,
178 				const char *, int *, int *, int);
179 static	int		 roff_evalstrcond(const char *, int *);
180 static	void		 roff_free1(struct roff *);
181 static	void		 roff_freereg(struct roffreg *);
182 static	void		 roff_freestr(struct roffkv *);
183 static	size_t		 roff_getname(struct roff *, char **, int, int);
184 static	int		 roff_getnum(const char *, int *, int *, int);
185 static	int		 roff_getop(const char *, int *, char *);
186 static	int		 roff_getregn(struct roff *,
187 				const char *, size_t, char);
188 static	int		 roff_getregro(const struct roff *,
189 				const char *name);
190 static	const char	*roff_getstrn(struct roff *,
191 				const char *, size_t, int *);
192 static	int		 roff_hasregn(const struct roff *,
193 				const char *, size_t);
194 static	enum rofferr	 roff_insec(ROFF_ARGS);
195 static	enum rofferr	 roff_it(ROFF_ARGS);
196 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
197 static	void		 roff_man_alloc1(struct roff_man *);
198 static	void		 roff_man_free1(struct roff_man *);
199 static	enum rofferr	 roff_manyarg(ROFF_ARGS);
200 static	enum rofferr	 roff_nr(ROFF_ARGS);
201 static	enum rofferr	 roff_onearg(ROFF_ARGS);
202 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
203 				int, int);
204 static	enum rofferr	 roff_parsetext(struct roff *, struct buf *,
205 				int, int *);
206 static	enum rofferr	 roff_renamed(ROFF_ARGS);
207 static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
208 static	enum rofferr	 roff_rm(ROFF_ARGS);
209 static	enum rofferr	 roff_rn(ROFF_ARGS);
210 static	enum rofferr	 roff_rr(ROFF_ARGS);
211 static	void		 roff_setregn(struct roff *, const char *,
212 				size_t, int, char, int);
213 static	void		 roff_setstr(struct roff *,
214 				const char *, const char *, int);
215 static	void		 roff_setstrn(struct roffkv **, const char *,
216 				size_t, const char *, size_t, int);
217 static	enum rofferr	 roff_so(ROFF_ARGS);
218 static	enum rofferr	 roff_tr(ROFF_ARGS);
219 static	enum rofferr	 roff_Dd(ROFF_ARGS);
220 static	enum rofferr	 roff_TE(ROFF_ARGS);
221 static	enum rofferr	 roff_TS(ROFF_ARGS);
222 static	enum rofferr	 roff_EQ(ROFF_ARGS);
223 static	enum rofferr	 roff_EN(ROFF_ARGS);
224 static	enum rofferr	 roff_T_(ROFF_ARGS);
225 static	enum rofferr	 roff_unsupp(ROFF_ARGS);
226 static	enum rofferr	 roff_userdef(ROFF_ARGS);
227 
228 /* --- constant data ------------------------------------------------------ */
229 
230 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
231 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
232 
233 const char *__roff_name[MAN_MAX + 1] = {
234 	"br",		"ce",		"ft",		"ll",
235 	"mc",		"po",		"rj",		"sp",
236 	"ta",		"ti",		NULL,
237 	"ab",		"ad",		"af",		"aln",
238 	"als",		"am",		"am1",		"ami",
239 	"ami1",		"as",		"as1",		"asciify",
240 	"backtrace",	"bd",		"bleedat",	"blm",
241         "box",		"boxa",		"bp",		"BP",
242 	"break",	"breakchar",	"brnl",		"brp",
243 	"brpnl",	"c2",		"cc",
244 	"cf",		"cflags",	"ch",		"char",
245 	"chop",		"class",	"close",	"CL",
246 	"color",	"composite",	"continue",	"cp",
247 	"cropat",	"cs",		"cu",		"da",
248 	"dch",		"Dd",		"de",		"de1",
249 	"defcolor",	"dei",		"dei1",		"device",
250 	"devicem",	"di",		"do",		"ds",
251 	"ds1",		"dwh",		"dt",		"ec",
252 	"ecr",		"ecs",		"el",		"em",
253 	"EN",		"eo",		"EP",		"EQ",
254 	"errprint",	"ev",		"evc",		"ex",
255 	"fallback",	"fam",		"fc",		"fchar",
256 	"fcolor",	"fdeferlig",	"feature",	"fkern",
257 	"fl",		"flig",		"fp",		"fps",
258 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
259 	"fzoom",	"gcolor",	"hc",		"hcode",
260 	"hidechar",	"hla",		"hlm",		"hpf",
261 	"hpfa",		"hpfcode",	"hw",		"hy",
262 	"hylang",	"hylen",	"hym",		"hypp",
263 	"hys",		"ie",		"if",		"ig",
264 	"index",	"it",		"itc",		"IX",
265 	"kern",		"kernafter",	"kernbefore",	"kernpair",
266 	"lc",		"lc_ctype",	"lds",		"length",
267 	"letadj",	"lf",		"lg",		"lhang",
268 	"linetabs",	"lnr",		"lnrf",		"lpfx",
269 	"ls",		"lsm",		"lt",
270 	"mediasize",	"minss",	"mk",		"mso",
271 	"na",		"ne",		"nh",		"nhychar",
272 	"nm",		"nn",		"nop",		"nr",
273 	"nrf",		"nroff",	"ns",		"nx",
274 	"open",		"opena",	"os",		"output",
275 	"padj",		"papersize",	"pc",		"pev",
276 	"pi",		"PI",		"pl",		"pm",
277 	"pn",		"pnr",		"ps",
278 	"psbb",		"pshape",	"pso",		"ptr",
279 	"pvs",		"rchar",	"rd",		"recursionlimit",
280 	"return",	"rfschar",	"rhang",
281 	"rm",		"rn",		"rnn",		"rr",
282 	"rs",		"rt",		"schar",	"sentchar",
283 	"shc",		"shift",	"sizes",	"so",
284 	"spacewidth",	"special",	"spreadwarn",	"ss",
285 	"sty",		"substring",	"sv",		"sy",
286 	"T&",		"tc",		"TE",
287 	"TH",		"tkf",		"tl",
288 	"tm",		"tm1",		"tmc",		"tr",
289 	"track",	"transchar",	"trf",		"trimat",
290 	"trin",		"trnt",		"troff",	"TS",
291 	"uf",		"ul",		"unformat",	"unwatch",
292 	"unwatchn",	"vpt",		"vs",		"warn",
293 	"warnscale",	"watch",	"watchlength",	"watchn",
294 	"wh",		"while",	"write",	"writec",
295 	"writem",	"xflag",	".",		NULL,
296 	NULL,		"text",
297 	"Dd",		"Dt",		"Os",		"Sh",
298 	"Ss",		"Pp",		"D1",		"Dl",
299 	"Bd",		"Ed",		"Bl",		"El",
300 	"It",		"Ad",		"An",		"Ap",
301 	"Ar",		"Cd",		"Cm",		"Dv",
302 	"Er",		"Ev",		"Ex",		"Fa",
303 	"Fd",		"Fl",		"Fn",		"Ft",
304 	"Ic",		"In",		"Li",		"Nd",
305 	"Nm",		"Op",		"Ot",		"Pa",
306 	"Rv",		"St",		"Va",		"Vt",
307 	"Xr",		"%A",		"%B",		"%D",
308 	"%I",		"%J",		"%N",		"%O",
309 	"%P",		"%R",		"%T",		"%V",
310 	"Ac",		"Ao",		"Aq",		"At",
311 	"Bc",		"Bf",		"Bo",		"Bq",
312 	"Bsx",		"Bx",		"Db",		"Dc",
313 	"Do",		"Dq",		"Ec",		"Ef",
314 	"Em",		"Eo",		"Fx",		"Ms",
315 	"No",		"Ns",		"Nx",		"Ox",
316 	"Pc",		"Pf",		"Po",		"Pq",
317 	"Qc",		"Ql",		"Qo",		"Qq",
318 	"Re",		"Rs",		"Sc",		"So",
319 	"Sq",		"Sm",		"Sx",		"Sy",
320 	"Tn",		"Ux",		"Xc",		"Xo",
321 	"Fo",		"Fc",		"Oo",		"Oc",
322 	"Bk",		"Ek",		"Bt",		"Hf",
323 	"Fr",		"Ud",		"Lb",		"Lp",
324 	"Lk",		"Mt",		"Brq",		"Bro",
325 	"Brc",		"%C",		"Es",		"En",
326 	"Dx",		"%Q",		"%U",		"Ta",
327 	NULL,
328 	"TH",		"SH",		"SS",		"TP",
329 	"LP",		"PP",		"P",		"IP",
330 	"HP",		"SM",		"SB",		"BI",
331 	"IB",		"BR",		"RB",		"R",
332 	"B",		"I",		"IR",		"RI",
333 	"nf",		"fi",
334 	"RE",		"RS",		"DT",		"UC",
335 	"PD",		"AT",		"in",
336 	"OP",		"EX",		"EE",		"UR",
337 	"UE",		"MT",		"ME",		NULL
338 };
339 const	char *const *roff_name = __roff_name;
340 
341 static	struct roffmac	 roffs[TOKEN_NONE] = {
342 	{ roff_br, NULL, NULL, 0 },  /* br */
343 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
344 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
345 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
346 	{ roff_onearg, NULL, NULL, 0 },  /* mc */
347 	{ roff_onearg, NULL, NULL, 0 },  /* po */
348 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
349 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
350 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
351 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
352 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
353 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
354 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
355 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
356 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
357 	{ roff_als, NULL, NULL, 0 },  /* als */
358 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
359 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
360 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
361 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
362 	{ roff_ds, NULL, NULL, 0 },  /* as */
363 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
364 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
365 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
366 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
367 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
368 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
369 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
370 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
371 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
372 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
373 	{ roff_unsupp, NULL, NULL, 0 },  /* break */
374 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
375 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
376 	{ roff_br, NULL, NULL, 0 },  /* brp */
377 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
378 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
379 	{ roff_cc, NULL, NULL, 0 },  /* cc */
380 	{ roff_insec, NULL, NULL, 0 },  /* cf */
381 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
382 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
383 	{ roff_unsupp, NULL, NULL, 0 },  /* char */
384 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
385 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
386 	{ roff_insec, NULL, NULL, 0 },  /* close */
387 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
388 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
389 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
390 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
391 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
392 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
393 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
394 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
395 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
396 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
397 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
398 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
399 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
400 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
401 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
402 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
403 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
404 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
405 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
406 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
407 	{ roff_ds, NULL, NULL, 0 },  /* ds */
408 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
409 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
410 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
411 	{ roff_ec, NULL, NULL, 0 },  /* ec */
412 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
413 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
414 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
415 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
416 	{ roff_EN, NULL, NULL, 0 },  /* EN */
417 	{ roff_eo, NULL, NULL, 0 },  /* eo */
418 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
419 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
420 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
421 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
422 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
423 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
425 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
426 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
427 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
428 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
429 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
430 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
431 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
432 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
433 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
434 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
435 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
436 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
437 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
438 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
439 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
440 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
441 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
442 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
443 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
444 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
445 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
446 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
447 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
448 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
449 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
450 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
451 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
452 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
453 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
454 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
455 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
456 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
457 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
458 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
459 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
460 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
461 	{ roff_it, NULL, NULL, 0 },  /* it */
462 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
468 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
469 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
470 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
471 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
473 	{ roff_insec, NULL, NULL, 0 },  /* lf */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
476 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
477 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
478 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
479 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
481 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
486 	{ roff_insec, NULL, NULL, 0 },  /* mso */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
489 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
490 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
491 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
492 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
493 	{ roff_unsupp, NULL, NULL, 0 },  /* nop */
494 	{ roff_nr, NULL, NULL, 0 },  /* nr */
495 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
496 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
497 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
498 	{ roff_insec, NULL, NULL, 0 },  /* nx */
499 	{ roff_insec, NULL, NULL, 0 },  /* open */
500 	{ roff_insec, NULL, NULL, 0 },  /* opena */
501 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
502 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
503 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
504 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
505 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
506 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
507 	{ roff_insec, NULL, NULL, 0 },  /* pi */
508 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
509 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
510 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
511 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
512 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
513 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
514 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
515 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
516 	{ roff_insec, NULL, NULL, 0 },  /* pso */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
519 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
522 	{ roff_unsupp, NULL, NULL, 0 },  /* return */
523 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
524 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
525 	{ roff_rm, NULL, NULL, 0 },  /* rm */
526 	{ roff_rn, NULL, NULL, 0 },  /* rn */
527 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
528 	{ roff_rr, NULL, NULL, 0 },  /* rr */
529 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
530 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
531 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
532 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
533 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
534 	{ roff_unsupp, NULL, NULL, 0 },  /* shift */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
536 	{ roff_so, NULL, NULL, 0 },  /* so */
537 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
538 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
542 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
544 	{ roff_insec, NULL, NULL, 0 },  /* sy */
545 	{ roff_T_, NULL, NULL, 0 },  /* T& */
546 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
547 	{ roff_TE, NULL, NULL, 0 },  /* TE */
548 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
550 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
551 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
552 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
553 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
554 	{ roff_tr, NULL, NULL, 0 },  /* tr */
555 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
556 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
557 	{ roff_insec, NULL, NULL, 0 },  /* trf */
558 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
559 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
560 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
561 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
562 	{ roff_TS, NULL, NULL, 0 },  /* TS */
563 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
564 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
565 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
573 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
575 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
576 	{ roff_unsupp, NULL, NULL, 0 },  /* while */
577 	{ roff_insec, NULL, NULL, 0 },  /* write */
578 	{ roff_insec, NULL, NULL, 0 },  /* writec */
579 	{ roff_insec, NULL, NULL, 0 },  /* writem */
580 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
581 	{ roff_cblock, NULL, NULL, 0 },  /* . */
582 	{ roff_renamed, NULL, NULL, 0 },
583 	{ roff_userdef, NULL, NULL, 0 }
584 };
585 
586 /* Array of injected predefined strings. */
587 #define	PREDEFS_MAX	 38
588 static	const struct predef predefs[PREDEFS_MAX] = {
589 #include "predefs.in"
590 };
591 
592 static	int	 roffce_lines;	/* number of input lines to center */
593 static	struct roff_node *roffce_node;  /* active request */
594 static	int	 roffit_lines;  /* number of lines to delay */
595 static	char	*roffit_macro;  /* nil-terminated macro line */
596 
597 
598 /* --- request table ------------------------------------------------------ */
599 
600 struct ohash *
601 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
602 {
603 	struct ohash	*htab;
604 	struct roffreq	*req;
605 	enum roff_tok	 tok;
606 	size_t		 sz;
607 	unsigned int	 slot;
608 
609 	htab = mandoc_malloc(sizeof(*htab));
610 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
611 
612 	for (tok = mintok; tok < maxtok; tok++) {
613 		if (roff_name[tok] == NULL)
614 			continue;
615 		sz = strlen(roff_name[tok]);
616 		req = mandoc_malloc(sizeof(*req) + sz + 1);
617 		req->tok = tok;
618 		memcpy(req->name, roff_name[tok], sz + 1);
619 		slot = ohash_qlookup(htab, req->name);
620 		ohash_insert(htab, slot, req);
621 	}
622 	return htab;
623 }
624 
625 void
626 roffhash_free(struct ohash *htab)
627 {
628 	struct roffreq	*req;
629 	unsigned int	 slot;
630 
631 	if (htab == NULL)
632 		return;
633 	for (req = ohash_first(htab, &slot); req != NULL;
634 	     req = ohash_next(htab, &slot))
635 		free(req);
636 	ohash_delete(htab);
637 	free(htab);
638 }
639 
640 enum roff_tok
641 roffhash_find(struct ohash *htab, const char *name, size_t sz)
642 {
643 	struct roffreq	*req;
644 	const char	*end;
645 
646 	if (sz) {
647 		end = name + sz;
648 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
649 	} else
650 		req = ohash_find(htab, ohash_qlookup(htab, name));
651 	return req == NULL ? TOKEN_NONE : req->tok;
652 }
653 
654 /* --- stack of request blocks -------------------------------------------- */
655 
656 /*
657  * Pop the current node off of the stack of roff instructions currently
658  * pending.
659  */
660 static void
661 roffnode_pop(struct roff *r)
662 {
663 	struct roffnode	*p;
664 
665 	assert(r->last);
666 	p = r->last;
667 
668 	r->last = r->last->parent;
669 	free(p->name);
670 	free(p->end);
671 	free(p);
672 }
673 
674 /*
675  * Push a roff node onto the instruction stack.  This must later be
676  * removed with roffnode_pop().
677  */
678 static void
679 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
680 		int line, int col)
681 {
682 	struct roffnode	*p;
683 
684 	p = mandoc_calloc(1, sizeof(struct roffnode));
685 	p->tok = tok;
686 	if (name)
687 		p->name = mandoc_strdup(name);
688 	p->parent = r->last;
689 	p->line = line;
690 	p->col = col;
691 	p->rule = p->parent ? p->parent->rule : 0;
692 
693 	r->last = p;
694 }
695 
696 /* --- roff parser state data management ---------------------------------- */
697 
698 static void
699 roff_free1(struct roff *r)
700 {
701 	struct tbl_node	*tbl;
702 	int		 i;
703 
704 	while (NULL != (tbl = r->first_tbl)) {
705 		r->first_tbl = tbl->next;
706 		tbl_free(tbl);
707 	}
708 	r->first_tbl = r->last_tbl = r->tbl = NULL;
709 
710 	if (r->last_eqn != NULL)
711 		eqn_free(r->last_eqn);
712 	r->last_eqn = r->eqn = NULL;
713 
714 	while (r->last)
715 		roffnode_pop(r);
716 
717 	free (r->rstack);
718 	r->rstack = NULL;
719 	r->rstacksz = 0;
720 	r->rstackpos = -1;
721 
722 	roff_freereg(r->regtab);
723 	r->regtab = NULL;
724 
725 	roff_freestr(r->strtab);
726 	roff_freestr(r->rentab);
727 	roff_freestr(r->xmbtab);
728 	r->strtab = r->rentab = r->xmbtab = NULL;
729 
730 	if (r->xtab)
731 		for (i = 0; i < 128; i++)
732 			free(r->xtab[i].p);
733 	free(r->xtab);
734 	r->xtab = NULL;
735 }
736 
737 void
738 roff_reset(struct roff *r)
739 {
740 	roff_free1(r);
741 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
742 	r->control = '\0';
743 	r->escape = '\\';
744 	roffce_lines = 0;
745 	roffce_node = NULL;
746 	roffit_lines = 0;
747 	roffit_macro = NULL;
748 }
749 
750 void
751 roff_free(struct roff *r)
752 {
753 	roff_free1(r);
754 	roffhash_free(r->reqtab);
755 	free(r);
756 }
757 
758 struct roff *
759 roff_alloc(struct mparse *parse, int options)
760 {
761 	struct roff	*r;
762 
763 	r = mandoc_calloc(1, sizeof(struct roff));
764 	r->parse = parse;
765 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
766 	r->options = options;
767 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
768 	r->rstackpos = -1;
769 	r->escape = '\\';
770 	return r;
771 }
772 
773 /* --- syntax tree state data management ---------------------------------- */
774 
775 static void
776 roff_man_free1(struct roff_man *man)
777 {
778 
779 	if (man->first != NULL)
780 		roff_node_delete(man, man->first);
781 	free(man->meta.msec);
782 	free(man->meta.vol);
783 	free(man->meta.os);
784 	free(man->meta.arch);
785 	free(man->meta.title);
786 	free(man->meta.name);
787 	free(man->meta.date);
788 }
789 
790 static void
791 roff_man_alloc1(struct roff_man *man)
792 {
793 
794 	memset(&man->meta, 0, sizeof(man->meta));
795 	man->first = mandoc_calloc(1, sizeof(*man->first));
796 	man->first->type = ROFFT_ROOT;
797 	man->last = man->first;
798 	man->last_es = NULL;
799 	man->flags = 0;
800 	man->macroset = MACROSET_NONE;
801 	man->lastsec = man->lastnamed = SEC_NONE;
802 	man->next = ROFF_NEXT_CHILD;
803 }
804 
805 void
806 roff_man_reset(struct roff_man *man)
807 {
808 
809 	roff_man_free1(man);
810 	roff_man_alloc1(man);
811 }
812 
813 void
814 roff_man_free(struct roff_man *man)
815 {
816 
817 	roff_man_free1(man);
818 	free(man);
819 }
820 
821 struct roff_man *
822 roff_man_alloc(struct roff *roff, struct mparse *parse,
823 	const char *os_s, int quick)
824 {
825 	struct roff_man *man;
826 
827 	man = mandoc_calloc(1, sizeof(*man));
828 	man->parse = parse;
829 	man->roff = roff;
830 	man->os_s = os_s;
831 	man->quick = quick;
832 	roff_man_alloc1(man);
833 	roff->man = man;
834 	return man;
835 }
836 
837 /* --- syntax tree handling ----------------------------------------------- */
838 
839 struct roff_node *
840 roff_node_alloc(struct roff_man *man, int line, int pos,
841 	enum roff_type type, int tok)
842 {
843 	struct roff_node	*n;
844 
845 	n = mandoc_calloc(1, sizeof(*n));
846 	n->line = line;
847 	n->pos = pos;
848 	n->tok = tok;
849 	n->type = type;
850 	n->sec = man->lastsec;
851 
852 	if (man->flags & MDOC_SYNOPSIS)
853 		n->flags |= NODE_SYNPRETTY;
854 	else
855 		n->flags &= ~NODE_SYNPRETTY;
856 	if (man->flags & MDOC_NEWLINE)
857 		n->flags |= NODE_LINE;
858 	man->flags &= ~MDOC_NEWLINE;
859 
860 	return n;
861 }
862 
863 void
864 roff_node_append(struct roff_man *man, struct roff_node *n)
865 {
866 
867 	switch (man->next) {
868 	case ROFF_NEXT_SIBLING:
869 		if (man->last->next != NULL) {
870 			n->next = man->last->next;
871 			man->last->next->prev = n;
872 		} else
873 			man->last->parent->last = n;
874 		man->last->next = n;
875 		n->prev = man->last;
876 		n->parent = man->last->parent;
877 		break;
878 	case ROFF_NEXT_CHILD:
879 		if (man->last->child != NULL) {
880 			n->next = man->last->child;
881 			man->last->child->prev = n;
882 		} else
883 			man->last->last = n;
884 		man->last->child = n;
885 		n->parent = man->last;
886 		break;
887 	default:
888 		abort();
889 	}
890 	man->last = n;
891 
892 	switch (n->type) {
893 	case ROFFT_HEAD:
894 		n->parent->head = n;
895 		break;
896 	case ROFFT_BODY:
897 		if (n->end != ENDBODY_NOT)
898 			return;
899 		n->parent->body = n;
900 		break;
901 	case ROFFT_TAIL:
902 		n->parent->tail = n;
903 		break;
904 	default:
905 		return;
906 	}
907 
908 	/*
909 	 * Copy over the normalised-data pointer of our parent.  Not
910 	 * everybody has one, but copying a null pointer is fine.
911 	 */
912 
913 	n->norm = n->parent->norm;
914 	assert(n->parent->type == ROFFT_BLOCK);
915 }
916 
917 void
918 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
919 {
920 	struct roff_node	*n;
921 
922 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
923 	n->string = roff_strdup(man->roff, word);
924 	roff_node_append(man, n);
925 	n->flags |= NODE_VALID | NODE_ENDED;
926 	man->next = ROFF_NEXT_SIBLING;
927 }
928 
929 void
930 roff_word_append(struct roff_man *man, const char *word)
931 {
932 	struct roff_node	*n;
933 	char			*addstr, *newstr;
934 
935 	n = man->last;
936 	addstr = roff_strdup(man->roff, word);
937 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
938 	free(addstr);
939 	free(n->string);
940 	n->string = newstr;
941 	man->next = ROFF_NEXT_SIBLING;
942 }
943 
944 void
945 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
946 {
947 	struct roff_node	*n;
948 
949 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
950 	roff_node_append(man, n);
951 	man->next = ROFF_NEXT_CHILD;
952 }
953 
954 struct roff_node *
955 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
956 {
957 	struct roff_node	*n;
958 
959 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
960 	roff_node_append(man, n);
961 	man->next = ROFF_NEXT_CHILD;
962 	return n;
963 }
964 
965 struct roff_node *
966 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
967 {
968 	struct roff_node	*n;
969 
970 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
971 	roff_node_append(man, n);
972 	man->next = ROFF_NEXT_CHILD;
973 	return n;
974 }
975 
976 struct roff_node *
977 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
978 {
979 	struct roff_node	*n;
980 
981 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
982 	roff_node_append(man, n);
983 	man->next = ROFF_NEXT_CHILD;
984 	return n;
985 }
986 
987 static void
988 roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
989 {
990 	struct roff_node	*n;
991 	const struct tbl_span	*span;
992 
993 	if (man->macroset == MACROSET_MAN)
994 		man_breakscope(man, ROFF_TS);
995 	while ((span = tbl_span(tbl)) != NULL) {
996 		n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
997 		n->span = span;
998 		roff_node_append(man, n);
999 		n->flags |= NODE_VALID | NODE_ENDED;
1000 		man->next = ROFF_NEXT_SIBLING;
1001 	}
1002 }
1003 
1004 void
1005 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1006 {
1007 
1008 	/* Adjust siblings. */
1009 
1010 	if (n->prev)
1011 		n->prev->next = n->next;
1012 	if (n->next)
1013 		n->next->prev = n->prev;
1014 
1015 	/* Adjust parent. */
1016 
1017 	if (n->parent != NULL) {
1018 		if (n->parent->child == n)
1019 			n->parent->child = n->next;
1020 		if (n->parent->last == n)
1021 			n->parent->last = n->prev;
1022 	}
1023 
1024 	/* Adjust parse point. */
1025 
1026 	if (man == NULL)
1027 		return;
1028 	if (man->last == n) {
1029 		if (n->prev == NULL) {
1030 			man->last = n->parent;
1031 			man->next = ROFF_NEXT_CHILD;
1032 		} else {
1033 			man->last = n->prev;
1034 			man->next = ROFF_NEXT_SIBLING;
1035 		}
1036 	}
1037 	if (man->first == n)
1038 		man->first = NULL;
1039 }
1040 
1041 void
1042 roff_node_free(struct roff_node *n)
1043 {
1044 
1045 	if (n->args != NULL)
1046 		mdoc_argv_free(n->args);
1047 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1048 		free(n->norm);
1049 	if (n->eqn != NULL)
1050 		eqn_box_free(n->eqn);
1051 	free(n->string);
1052 	free(n);
1053 }
1054 
1055 void
1056 roff_node_delete(struct roff_man *man, struct roff_node *n)
1057 {
1058 
1059 	while (n->child != NULL)
1060 		roff_node_delete(man, n->child);
1061 	roff_node_unlink(man, n);
1062 	roff_node_free(n);
1063 }
1064 
1065 void
1066 deroff(char **dest, const struct roff_node *n)
1067 {
1068 	char	*cp;
1069 	size_t	 sz;
1070 
1071 	if (n->type != ROFFT_TEXT) {
1072 		for (n = n->child; n != NULL; n = n->next)
1073 			deroff(dest, n);
1074 		return;
1075 	}
1076 
1077 	/* Skip leading whitespace. */
1078 
1079 	for (cp = n->string; *cp != '\0'; cp++) {
1080 		if (cp[0] == '\\' && cp[1] != '\0' &&
1081 		    strchr(" %&0^|~", cp[1]) != NULL)
1082 			cp++;
1083 		else if ( ! isspace((unsigned char)*cp))
1084 			break;
1085 	}
1086 
1087 	/* Skip trailing backslash. */
1088 
1089 	sz = strlen(cp);
1090 	if (sz > 0 && cp[sz - 1] == '\\')
1091 		sz--;
1092 
1093 	/* Skip trailing whitespace. */
1094 
1095 	for (; sz; sz--)
1096 		if ( ! isspace((unsigned char)cp[sz-1]))
1097 			break;
1098 
1099 	/* Skip empty strings. */
1100 
1101 	if (sz == 0)
1102 		return;
1103 
1104 	if (*dest == NULL) {
1105 		*dest = mandoc_strndup(cp, sz);
1106 		return;
1107 	}
1108 
1109 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1110 	free(*dest);
1111 	*dest = cp;
1112 }
1113 
1114 /* --- main functions of the roff parser ---------------------------------- */
1115 
1116 /*
1117  * In the current line, expand escape sequences that tend to get
1118  * used in numerical expressions and conditional requests.
1119  * Also check the syntax of the remaining escape sequences.
1120  */
1121 static enum rofferr
1122 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1123 {
1124 	char		 ubuf[24]; /* buffer to print the number */
1125 	struct roff_node *n;	/* used for header comments */
1126 	const char	*start;	/* start of the string to process */
1127 	char		*stesc;	/* start of an escape sequence ('\\') */
1128 	char		*ep;	/* end of comment string */
1129 	const char	*stnam;	/* start of the name, after "[(*" */
1130 	const char	*cp;	/* end of the name, e.g. before ']' */
1131 	const char	*res;	/* the string to be substituted */
1132 	char		*nbuf;	/* new buffer to copy buf->buf to */
1133 	size_t		 maxl;  /* expected length of the escape name */
1134 	size_t		 naml;	/* actual length of the escape name */
1135 	enum mandoc_esc	 esc;	/* type of the escape sequence */
1136 	int		 inaml;	/* length returned from mandoc_escape() */
1137 	int		 expand_count;	/* to avoid infinite loops */
1138 	int		 npos;	/* position in numeric expression */
1139 	int		 arg_complete; /* argument not interrupted by eol */
1140 	int		 done;	/* no more input available */
1141 	int		 deftype; /* type of definition to paste */
1142 	int		 rcsid;	/* kind of RCS id seen */
1143 	char		 sign;	/* increment number register */
1144 	char		 term;	/* character terminating the escape */
1145 
1146 	/* Search forward for comments. */
1147 
1148 	done = 0;
1149 	start = buf->buf + pos;
1150 	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1151 		if (stesc[0] != r->escape || stesc[1] == '\0')
1152 			continue;
1153 		stesc++;
1154 		if (*stesc != '"' && *stesc != '#')
1155 			continue;
1156 
1157 		/* Comment found, look for RCS id. */
1158 
1159 		rcsid = 0;
1160 		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1161 			rcsid = 1 << MANDOC_OS_OPENBSD;
1162 			cp += 8;
1163 		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1164 			rcsid = 1 << MANDOC_OS_NETBSD;
1165 			cp += 7;
1166 		}
1167 		if (cp != NULL &&
1168 		    isalnum((unsigned char)*cp) == 0 &&
1169 		    strchr(cp, '$') != NULL) {
1170 			if (r->man->meta.rcsids & rcsid)
1171 				mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1172 				    ln, stesc + 1 - buf->buf, stesc + 1);
1173 			r->man->meta.rcsids |= rcsid;
1174 		}
1175 
1176 		/* Handle trailing whitespace. */
1177 
1178 		ep = strchr(stesc--, '\0') - 1;
1179 		if (*ep == '\n') {
1180 			done = 1;
1181 			ep--;
1182 		}
1183 		if (*ep == ' ' || *ep == '\t')
1184 			mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1185 			    ln, ep - buf->buf, NULL);
1186 
1187 		/*
1188 		 * Save comments preceding the title macro
1189 		 * in the syntax tree.
1190 		 */
1191 
1192 		if (r->format == 0) {
1193 			while (*ep == ' ' || *ep == '\t')
1194 				ep--;
1195 			ep[1] = '\0';
1196 			n = roff_node_alloc(r->man,
1197 			    ln, stesc + 1 - buf->buf,
1198 			    ROFFT_COMMENT, TOKEN_NONE);
1199 			n->string = mandoc_strdup(stesc + 2);
1200 			roff_node_append(r->man, n);
1201 			n->flags |= NODE_VALID | NODE_ENDED;
1202 			r->man->next = ROFF_NEXT_SIBLING;
1203 		}
1204 
1205 		/* Discard comments. */
1206 
1207 		while (stesc > start && stesc[-1] == ' ')
1208 			stesc--;
1209 		*stesc = '\0';
1210 		break;
1211 	}
1212 	if (stesc == start)
1213 		return ROFF_CONT;
1214 	stesc--;
1215 
1216 	/* Notice the end of the input. */
1217 
1218 	if (*stesc == '\n') {
1219 		*stesc-- = '\0';
1220 		done = 1;
1221 	}
1222 
1223 	expand_count = 0;
1224 	while (stesc >= start) {
1225 
1226 		/* Search backwards for the next backslash. */
1227 
1228 		if (*stesc != r->escape) {
1229 			if (*stesc == '\\') {
1230 				*stesc = '\0';
1231 				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1232 				    buf->buf, stesc + 1) + 1;
1233 				start = nbuf + pos;
1234 				stesc = nbuf + (stesc - buf->buf);
1235 				free(buf->buf);
1236 				buf->buf = nbuf;
1237 			}
1238 			stesc--;
1239 			continue;
1240 		}
1241 
1242 		/* If it is escaped, skip it. */
1243 
1244 		for (cp = stesc - 1; cp >= start; cp--)
1245 			if (*cp != r->escape)
1246 				break;
1247 
1248 		if ((stesc - cp) % 2 == 0) {
1249 			while (stesc > cp)
1250 				*stesc-- = '\\';
1251 			continue;
1252 		} else if (stesc[1] != '\0') {
1253 			*stesc = '\\';
1254 		} else {
1255 			*stesc-- = '\0';
1256 			if (done)
1257 				continue;
1258 			else
1259 				return ROFF_APPEND;
1260 		}
1261 
1262 		/* Decide whether to expand or to check only. */
1263 
1264 		term = '\0';
1265 		cp = stesc + 1;
1266 		switch (*cp) {
1267 		case '*':
1268 			res = NULL;
1269 			break;
1270 		case 'B':
1271 		case 'w':
1272 			term = cp[1];
1273 			/* FALLTHROUGH */
1274 		case 'n':
1275 			sign = cp[1];
1276 			if (sign == '+' || sign == '-')
1277 				cp++;
1278 			res = ubuf;
1279 			break;
1280 		default:
1281 			esc = mandoc_escape(&cp, &stnam, &inaml);
1282 			if (esc == ESCAPE_ERROR ||
1283 			    (esc == ESCAPE_SPECIAL &&
1284 			     mchars_spec2cp(stnam, inaml) < 0))
1285 				mandoc_vmsg(MANDOCERR_ESC_BAD,
1286 				    r->parse, ln, (int)(stesc - buf->buf),
1287 				    "%.*s", (int)(cp - stesc), stesc);
1288 			stesc--;
1289 			continue;
1290 		}
1291 
1292 		if (EXPAND_LIMIT < ++expand_count) {
1293 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1294 			    ln, (int)(stesc - buf->buf), NULL);
1295 			return ROFF_IGN;
1296 		}
1297 
1298 		/*
1299 		 * The third character decides the length
1300 		 * of the name of the string or register.
1301 		 * Save a pointer to the name.
1302 		 */
1303 
1304 		if (term == '\0') {
1305 			switch (*++cp) {
1306 			case '\0':
1307 				maxl = 0;
1308 				break;
1309 			case '(':
1310 				cp++;
1311 				maxl = 2;
1312 				break;
1313 			case '[':
1314 				cp++;
1315 				term = ']';
1316 				maxl = 0;
1317 				break;
1318 			default:
1319 				maxl = 1;
1320 				break;
1321 			}
1322 		} else {
1323 			cp += 2;
1324 			maxl = 0;
1325 		}
1326 		stnam = cp;
1327 
1328 		/* Advance to the end of the name. */
1329 
1330 		naml = 0;
1331 		arg_complete = 1;
1332 		while (maxl == 0 || naml < maxl) {
1333 			if (*cp == '\0') {
1334 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1335 				    ln, (int)(stesc - buf->buf), stesc);
1336 				arg_complete = 0;
1337 				break;
1338 			}
1339 			if (maxl == 0 && *cp == term) {
1340 				cp++;
1341 				break;
1342 			}
1343 			if (*cp++ != '\\' || stesc[1] != 'w') {
1344 				naml++;
1345 				continue;
1346 			}
1347 			switch (mandoc_escape(&cp, NULL, NULL)) {
1348 			case ESCAPE_SPECIAL:
1349 			case ESCAPE_UNICODE:
1350 			case ESCAPE_NUMBERED:
1351 			case ESCAPE_OVERSTRIKE:
1352 				naml++;
1353 				break;
1354 			default:
1355 				break;
1356 			}
1357 		}
1358 
1359 		/*
1360 		 * Retrieve the replacement string; if it is
1361 		 * undefined, resume searching for escapes.
1362 		 */
1363 
1364 		switch (stesc[1]) {
1365 		case '*':
1366 			if (arg_complete) {
1367 				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1368 				res = roff_getstrn(r, stnam, naml, &deftype);
1369 			}
1370 			break;
1371 		case 'B':
1372 			npos = 0;
1373 			ubuf[0] = arg_complete &&
1374 			    roff_evalnum(r, ln, stnam, &npos,
1375 			      NULL, ROFFNUM_SCALE) &&
1376 			    stnam + npos + 1 == cp ? '1' : '0';
1377 			ubuf[1] = '\0';
1378 			break;
1379 		case 'n':
1380 			if (arg_complete)
1381 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1382 				    roff_getregn(r, stnam, naml, sign));
1383 			else
1384 				ubuf[0] = '\0';
1385 			break;
1386 		case 'w':
1387 			/* use even incomplete args */
1388 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1389 			    24 * (int)naml);
1390 			break;
1391 		}
1392 
1393 		if (res == NULL) {
1394 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1395 			    r->parse, ln, (int)(stesc - buf->buf),
1396 			    "%.*s", (int)naml, stnam);
1397 			res = "";
1398 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1399 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1400 			    ln, (int)(stesc - buf->buf), NULL);
1401 			return ROFF_IGN;
1402 		}
1403 
1404 		/* Replace the escape sequence by the string. */
1405 
1406 		*stesc = '\0';
1407 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1408 		    buf->buf, res, cp) + 1;
1409 
1410 		/* Prepare for the next replacement. */
1411 
1412 		start = nbuf + pos;
1413 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1414 		free(buf->buf);
1415 		buf->buf = nbuf;
1416 	}
1417 	return ROFF_CONT;
1418 }
1419 
1420 /*
1421  * Process text streams.
1422  */
1423 static enum rofferr
1424 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1425 {
1426 	size_t		 sz;
1427 	const char	*start;
1428 	char		*p;
1429 	int		 isz;
1430 	enum mandoc_esc	 esc;
1431 
1432 	/* Spring the input line trap. */
1433 
1434 	if (roffit_lines == 1) {
1435 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1436 		free(buf->buf);
1437 		buf->buf = p;
1438 		buf->sz = isz + 1;
1439 		*offs = 0;
1440 		free(roffit_macro);
1441 		roffit_lines = 0;
1442 		return ROFF_REPARSE;
1443 	} else if (roffit_lines > 1)
1444 		--roffit_lines;
1445 
1446 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1447 		if (roffce_lines < 1) {
1448 			r->man->last = roffce_node;
1449 			r->man->next = ROFF_NEXT_SIBLING;
1450 			roffce_lines = 0;
1451 			roffce_node = NULL;
1452 		} else
1453 			roffce_lines--;
1454 	}
1455 
1456 	/* Convert all breakable hyphens into ASCII_HYPH. */
1457 
1458 	start = p = buf->buf + pos;
1459 
1460 	while (*p != '\0') {
1461 		sz = strcspn(p, "-\\");
1462 		p += sz;
1463 
1464 		if (*p == '\0')
1465 			break;
1466 
1467 		if (*p == '\\') {
1468 			/* Skip over escapes. */
1469 			p++;
1470 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1471 			if (esc == ESCAPE_ERROR)
1472 				break;
1473 			while (*p == '-')
1474 				p++;
1475 			continue;
1476 		} else if (p == start) {
1477 			p++;
1478 			continue;
1479 		}
1480 
1481 		if (isalpha((unsigned char)p[-1]) &&
1482 		    isalpha((unsigned char)p[1]))
1483 			*p = ASCII_HYPH;
1484 		p++;
1485 	}
1486 	return ROFF_CONT;
1487 }
1488 
1489 enum rofferr
1490 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1491 {
1492 	enum roff_tok	 t;
1493 	enum rofferr	 e;
1494 	int		 pos;	/* parse point */
1495 	int		 spos;	/* saved parse point for messages */
1496 	int		 ppos;	/* original offset in buf->buf */
1497 	int		 ctl;	/* macro line (boolean) */
1498 
1499 	ppos = pos = *offs;
1500 
1501 	/* Handle in-line equation delimiters. */
1502 
1503 	if (r->tbl == NULL &&
1504 	    r->last_eqn != NULL && r->last_eqn->delim &&
1505 	    (r->eqn == NULL || r->eqn_inline)) {
1506 		e = roff_eqndelim(r, buf, pos);
1507 		if (e == ROFF_REPARSE)
1508 			return e;
1509 		assert(e == ROFF_CONT);
1510 	}
1511 
1512 	/* Expand some escape sequences. */
1513 
1514 	e = roff_res(r, buf, ln, pos);
1515 	if (e == ROFF_IGN || e == ROFF_APPEND)
1516 		return e;
1517 	assert(e == ROFF_CONT);
1518 
1519 	ctl = roff_getcontrol(r, buf->buf, &pos);
1520 
1521 	/*
1522 	 * First, if a scope is open and we're not a macro, pass the
1523 	 * text through the macro's filter.
1524 	 * Equations process all content themselves.
1525 	 * Tables process almost all content themselves, but we want
1526 	 * to warn about macros before passing it there.
1527 	 */
1528 
1529 	if (r->last != NULL && ! ctl) {
1530 		t = r->last->tok;
1531 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1532 		if (e == ROFF_IGN)
1533 			return e;
1534 		assert(e == ROFF_CONT);
1535 	}
1536 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1537 		eqn_read(r->eqn, buf->buf + ppos);
1538 		return ROFF_IGN;
1539 	}
1540 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1541 		tbl_read(r->tbl, ln, buf->buf, ppos);
1542 		roff_addtbl(r->man, r->tbl);
1543 		return ROFF_IGN;
1544 	}
1545 	if ( ! ctl)
1546 		return roff_parsetext(r, buf, pos, offs);
1547 
1548 	/* Skip empty request lines. */
1549 
1550 	if (buf->buf[pos] == '"') {
1551 		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1552 		    ln, pos, NULL);
1553 		return ROFF_IGN;
1554 	} else if (buf->buf[pos] == '\0')
1555 		return ROFF_IGN;
1556 
1557 	/*
1558 	 * If a scope is open, go to the child handler for that macro,
1559 	 * as it may want to preprocess before doing anything with it.
1560 	 * Don't do so if an equation is open.
1561 	 */
1562 
1563 	if (r->last) {
1564 		t = r->last->tok;
1565 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1566 	}
1567 
1568 	/* No scope is open.  This is a new request or macro. */
1569 
1570 	spos = pos;
1571 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1572 
1573 	/* Tables ignore most macros. */
1574 
1575 	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1576 	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1577 		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1578 		    ln, pos, buf->buf + spos);
1579 		if (t != TOKEN_NONE)
1580 			return ROFF_IGN;
1581 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1582 			pos++;
1583 		while (buf->buf[pos] == ' ')
1584 			pos++;
1585 		tbl_read(r->tbl, ln, buf->buf, pos);
1586 		roff_addtbl(r->man, r->tbl);
1587 		return ROFF_IGN;
1588 	}
1589 
1590 	/* For now, let high level macros abort .ce mode. */
1591 
1592 	if (ctl && roffce_node != NULL &&
1593 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1594 	     t == ROFF_TH || t == ROFF_TS)) {
1595 		r->man->last = roffce_node;
1596 		r->man->next = ROFF_NEXT_SIBLING;
1597 		roffce_lines = 0;
1598 		roffce_node = NULL;
1599 	}
1600 
1601 	/*
1602 	 * This is neither a roff request nor a user-defined macro.
1603 	 * Let the standard macro set parsers handle it.
1604 	 */
1605 
1606 	if (t == TOKEN_NONE)
1607 		return ROFF_CONT;
1608 
1609 	/* Execute a roff request or a user defined macro. */
1610 
1611 	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1612 }
1613 
1614 void
1615 roff_endparse(struct roff *r)
1616 {
1617 	if (r->last != NULL)
1618 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1619 		    r->last->line, r->last->col,
1620 		    roff_name[r->last->tok]);
1621 
1622 	if (r->eqn != NULL) {
1623 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1624 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1625 		eqn_parse(r->eqn);
1626 		r->eqn = NULL;
1627 	}
1628 
1629 	if (r->tbl != NULL) {
1630 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1631 		    r->tbl->line, r->tbl->pos, "TS");
1632 		tbl_end(r->tbl);
1633 		r->tbl = NULL;
1634 	}
1635 }
1636 
1637 /*
1638  * Parse a roff node's type from the input buffer.  This must be in the
1639  * form of ".foo xxx" in the usual way.
1640  */
1641 static enum roff_tok
1642 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1643 {
1644 	char		*cp;
1645 	const char	*mac;
1646 	size_t		 maclen;
1647 	int		 deftype;
1648 	enum roff_tok	 t;
1649 
1650 	cp = buf + *pos;
1651 
1652 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1653 		return TOKEN_NONE;
1654 
1655 	mac = cp;
1656 	maclen = roff_getname(r, &cp, ln, ppos);
1657 
1658 	deftype = ROFFDEF_USER | ROFFDEF_REN;
1659 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1660 	switch (deftype) {
1661 	case ROFFDEF_USER:
1662 		t = ROFF_USERDEF;
1663 		break;
1664 	case ROFFDEF_REN:
1665 		t = ROFF_RENAMED;
1666 		break;
1667 	default:
1668 		t = roffhash_find(r->reqtab, mac, maclen);
1669 		break;
1670 	}
1671 	if (t != TOKEN_NONE)
1672 		*pos = cp - buf;
1673 	else if (deftype == ROFFDEF_UNDEF) {
1674 		/* Using an undefined macro defines it to be empty. */
1675 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1676 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1677 	}
1678 	return t;
1679 }
1680 
1681 /* --- handling of request blocks ----------------------------------------- */
1682 
1683 static enum rofferr
1684 roff_cblock(ROFF_ARGS)
1685 {
1686 
1687 	/*
1688 	 * A block-close `..' should only be invoked as a child of an
1689 	 * ignore macro, otherwise raise a warning and just ignore it.
1690 	 */
1691 
1692 	if (r->last == NULL) {
1693 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1694 		    ln, ppos, "..");
1695 		return ROFF_IGN;
1696 	}
1697 
1698 	switch (r->last->tok) {
1699 	case ROFF_am:
1700 		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1701 	case ROFF_ami:
1702 	case ROFF_de:
1703 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1704 	case ROFF_dei:
1705 	case ROFF_ig:
1706 		break;
1707 	default:
1708 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1709 		    ln, ppos, "..");
1710 		return ROFF_IGN;
1711 	}
1712 
1713 	if (buf->buf[pos] != '\0')
1714 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1715 		    ".. %s", buf->buf + pos);
1716 
1717 	roffnode_pop(r);
1718 	roffnode_cleanscope(r);
1719 	return ROFF_IGN;
1720 
1721 }
1722 
1723 static void
1724 roffnode_cleanscope(struct roff *r)
1725 {
1726 
1727 	while (r->last) {
1728 		if (--r->last->endspan != 0)
1729 			break;
1730 		roffnode_pop(r);
1731 	}
1732 }
1733 
1734 static void
1735 roff_ccond(struct roff *r, int ln, int ppos)
1736 {
1737 
1738 	if (NULL == r->last) {
1739 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1740 		    ln, ppos, "\\}");
1741 		return;
1742 	}
1743 
1744 	switch (r->last->tok) {
1745 	case ROFF_el:
1746 	case ROFF_ie:
1747 	case ROFF_if:
1748 		break;
1749 	default:
1750 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1751 		    ln, ppos, "\\}");
1752 		return;
1753 	}
1754 
1755 	if (r->last->endspan > -1) {
1756 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1757 		    ln, ppos, "\\}");
1758 		return;
1759 	}
1760 
1761 	roffnode_pop(r);
1762 	roffnode_cleanscope(r);
1763 	return;
1764 }
1765 
1766 static enum rofferr
1767 roff_block(ROFF_ARGS)
1768 {
1769 	const char	*name, *value;
1770 	char		*call, *cp, *iname, *rname;
1771 	size_t		 csz, namesz, rsz;
1772 	int		 deftype;
1773 
1774 	/* Ignore groff compatibility mode for now. */
1775 
1776 	if (tok == ROFF_de1)
1777 		tok = ROFF_de;
1778 	else if (tok == ROFF_dei1)
1779 		tok = ROFF_dei;
1780 	else if (tok == ROFF_am1)
1781 		tok = ROFF_am;
1782 	else if (tok == ROFF_ami1)
1783 		tok = ROFF_ami;
1784 
1785 	/* Parse the macro name argument. */
1786 
1787 	cp = buf->buf + pos;
1788 	if (tok == ROFF_ig) {
1789 		iname = NULL;
1790 		namesz = 0;
1791 	} else {
1792 		iname = cp;
1793 		namesz = roff_getname(r, &cp, ln, ppos);
1794 		iname[namesz] = '\0';
1795 	}
1796 
1797 	/* Resolve the macro name argument if it is indirect. */
1798 
1799 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1800 		deftype = ROFFDEF_USER;
1801 		name = roff_getstrn(r, iname, namesz, &deftype);
1802 		if (name == NULL) {
1803 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1804 			    r->parse, ln, (int)(iname - buf->buf),
1805 			    "%.*s", (int)namesz, iname);
1806 			namesz = 0;
1807 		} else
1808 			namesz = strlen(name);
1809 	} else
1810 		name = iname;
1811 
1812 	if (namesz == 0 && tok != ROFF_ig) {
1813 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1814 		    ln, ppos, roff_name[tok]);
1815 		return ROFF_IGN;
1816 	}
1817 
1818 	roffnode_push(r, tok, name, ln, ppos);
1819 
1820 	/*
1821 	 * At the beginning of a `de' macro, clear the existing string
1822 	 * with the same name, if there is one.  New content will be
1823 	 * appended from roff_block_text() in multiline mode.
1824 	 */
1825 
1826 	if (tok == ROFF_de || tok == ROFF_dei) {
1827 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1828 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1829 	} else if (tok == ROFF_am || tok == ROFF_ami) {
1830 		deftype = ROFFDEF_ANY;
1831 		value = roff_getstrn(r, iname, namesz, &deftype);
1832 		switch (deftype) {  /* Before appending, ... */
1833 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
1834 			roff_setstrn(&r->strtab, name, namesz,
1835 			    value, strlen(value), 0);
1836 			break;
1837 		case ROFFDEF_REN: /* call original standard macro. */
1838 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1839 			    (int)strlen(value), value);
1840 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1841 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1842 			free(call);
1843 			break;
1844 		case ROFFDEF_STD:  /* rename and call standard macro. */
1845 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1846 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1847 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1848 			    (int)rsz, rname);
1849 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1850 			free(call);
1851 			free(rname);
1852 			break;
1853 		default:
1854 			break;
1855 		}
1856 	}
1857 
1858 	if (*cp == '\0')
1859 		return ROFF_IGN;
1860 
1861 	/* Get the custom end marker. */
1862 
1863 	iname = cp;
1864 	namesz = roff_getname(r, &cp, ln, ppos);
1865 
1866 	/* Resolve the end marker if it is indirect. */
1867 
1868 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1869 		deftype = ROFFDEF_USER;
1870 		name = roff_getstrn(r, iname, namesz, &deftype);
1871 		if (name == NULL) {
1872 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1873 			    r->parse, ln, (int)(iname - buf->buf),
1874 			    "%.*s", (int)namesz, iname);
1875 			namesz = 0;
1876 		} else
1877 			namesz = strlen(name);
1878 	} else
1879 		name = iname;
1880 
1881 	if (namesz)
1882 		r->last->end = mandoc_strndup(name, namesz);
1883 
1884 	if (*cp != '\0')
1885 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1886 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
1887 
1888 	return ROFF_IGN;
1889 }
1890 
1891 static enum rofferr
1892 roff_block_sub(ROFF_ARGS)
1893 {
1894 	enum roff_tok	t;
1895 	int		i, j;
1896 
1897 	/*
1898 	 * First check whether a custom macro exists at this level.  If
1899 	 * it does, then check against it.  This is some of groff's
1900 	 * stranger behaviours.  If we encountered a custom end-scope
1901 	 * tag and that tag also happens to be a "real" macro, then we
1902 	 * need to try interpreting it again as a real macro.  If it's
1903 	 * not, then return ignore.  Else continue.
1904 	 */
1905 
1906 	if (r->last->end) {
1907 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1908 			if (buf->buf[i] != r->last->end[j])
1909 				break;
1910 
1911 		if (r->last->end[j] == '\0' &&
1912 		    (buf->buf[i] == '\0' ||
1913 		     buf->buf[i] == ' ' ||
1914 		     buf->buf[i] == '\t')) {
1915 			roffnode_pop(r);
1916 			roffnode_cleanscope(r);
1917 
1918 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1919 				i++;
1920 
1921 			pos = i;
1922 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1923 			    TOKEN_NONE)
1924 				return ROFF_RERUN;
1925 			return ROFF_IGN;
1926 		}
1927 	}
1928 
1929 	/*
1930 	 * If we have no custom end-query or lookup failed, then try
1931 	 * pulling it out of the hashtable.
1932 	 */
1933 
1934 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1935 
1936 	if (t != ROFF_cblock) {
1937 		if (tok != ROFF_ig)
1938 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1939 		return ROFF_IGN;
1940 	}
1941 
1942 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1943 }
1944 
1945 static enum rofferr
1946 roff_block_text(ROFF_ARGS)
1947 {
1948 
1949 	if (tok != ROFF_ig)
1950 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1951 
1952 	return ROFF_IGN;
1953 }
1954 
1955 static enum rofferr
1956 roff_cond_sub(ROFF_ARGS)
1957 {
1958 	enum roff_tok	 t;
1959 	char		*ep;
1960 	int		 rr;
1961 
1962 	rr = r->last->rule;
1963 	roffnode_cleanscope(r);
1964 
1965 	/*
1966 	 * If `\}' occurs on a macro line without a preceding macro,
1967 	 * drop the line completely.
1968 	 */
1969 
1970 	ep = buf->buf + pos;
1971 	if (ep[0] == '\\' && ep[1] == '}')
1972 		rr = 0;
1973 
1974 	/* Always check for the closing delimiter `\}'. */
1975 
1976 	while ((ep = strchr(ep, '\\')) != NULL) {
1977 		switch (ep[1]) {
1978 		case '}':
1979 			memmove(ep, ep + 2, strlen(ep + 2) + 1);
1980 			roff_ccond(r, ln, ep - buf->buf);
1981 			break;
1982 		case '\0':
1983 			++ep;
1984 			break;
1985 		default:
1986 			ep += 2;
1987 			break;
1988 		}
1989 	}
1990 
1991 	/*
1992 	 * Fully handle known macros when they are structurally
1993 	 * required or when the conditional evaluated to true.
1994 	 */
1995 
1996 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1997 	return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
1998 	    ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
1999 	    ? ROFF_CONT : ROFF_IGN;
2000 }
2001 
2002 static enum rofferr
2003 roff_cond_text(ROFF_ARGS)
2004 {
2005 	char		*ep;
2006 	int		 rr;
2007 
2008 	rr = r->last->rule;
2009 	roffnode_cleanscope(r);
2010 
2011 	ep = buf->buf + pos;
2012 	while ((ep = strchr(ep, '\\')) != NULL) {
2013 		if (*(++ep) == '}') {
2014 			*ep = '&';
2015 			roff_ccond(r, ln, ep - buf->buf - 1);
2016 		}
2017 		if (*ep != '\0')
2018 			++ep;
2019 	}
2020 	return rr ? ROFF_CONT : ROFF_IGN;
2021 }
2022 
2023 /* --- handling of numeric and conditional expressions -------------------- */
2024 
2025 /*
2026  * Parse a single signed integer number.  Stop at the first non-digit.
2027  * If there is at least one digit, return success and advance the
2028  * parse point, else return failure and let the parse point unchanged.
2029  * Ignore overflows, treat them just like the C language.
2030  */
2031 static int
2032 roff_getnum(const char *v, int *pos, int *res, int flags)
2033 {
2034 	int	 myres, scaled, n, p;
2035 
2036 	if (NULL == res)
2037 		res = &myres;
2038 
2039 	p = *pos;
2040 	n = v[p] == '-';
2041 	if (n || v[p] == '+')
2042 		p++;
2043 
2044 	if (flags & ROFFNUM_WHITE)
2045 		while (isspace((unsigned char)v[p]))
2046 			p++;
2047 
2048 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2049 		*res = 10 * *res + v[p] - '0';
2050 	if (p == *pos + n)
2051 		return 0;
2052 
2053 	if (n)
2054 		*res = -*res;
2055 
2056 	/* Each number may be followed by one optional scaling unit. */
2057 
2058 	switch (v[p]) {
2059 	case 'f':
2060 		scaled = *res * 65536;
2061 		break;
2062 	case 'i':
2063 		scaled = *res * 240;
2064 		break;
2065 	case 'c':
2066 		scaled = *res * 240 / 2.54;
2067 		break;
2068 	case 'v':
2069 	case 'P':
2070 		scaled = *res * 40;
2071 		break;
2072 	case 'm':
2073 	case 'n':
2074 		scaled = *res * 24;
2075 		break;
2076 	case 'p':
2077 		scaled = *res * 10 / 3;
2078 		break;
2079 	case 'u':
2080 		scaled = *res;
2081 		break;
2082 	case 'M':
2083 		scaled = *res * 6 / 25;
2084 		break;
2085 	default:
2086 		scaled = *res;
2087 		p--;
2088 		break;
2089 	}
2090 	if (flags & ROFFNUM_SCALE)
2091 		*res = scaled;
2092 
2093 	*pos = p + 1;
2094 	return 1;
2095 }
2096 
2097 /*
2098  * Evaluate a string comparison condition.
2099  * The first character is the delimiter.
2100  * Succeed if the string up to its second occurrence
2101  * matches the string up to its third occurence.
2102  * Advance the cursor after the third occurrence
2103  * or lacking that, to the end of the line.
2104  */
2105 static int
2106 roff_evalstrcond(const char *v, int *pos)
2107 {
2108 	const char	*s1, *s2, *s3;
2109 	int		 match;
2110 
2111 	match = 0;
2112 	s1 = v + *pos;		/* initial delimiter */
2113 	s2 = s1 + 1;		/* for scanning the first string */
2114 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2115 
2116 	if (NULL == s3)		/* found no middle delimiter */
2117 		goto out;
2118 
2119 	while ('\0' != *++s3) {
2120 		if (*s2 != *s3) {  /* mismatch */
2121 			s3 = strchr(s3, *s1);
2122 			break;
2123 		}
2124 		if (*s3 == *s1) {  /* found the final delimiter */
2125 			match = 1;
2126 			break;
2127 		}
2128 		s2++;
2129 	}
2130 
2131 out:
2132 	if (NULL == s3)
2133 		s3 = strchr(s2, '\0');
2134 	else if (*s3 != '\0')
2135 		s3++;
2136 	*pos = s3 - v;
2137 	return match;
2138 }
2139 
2140 /*
2141  * Evaluate an optionally negated single character, numerical,
2142  * or string condition.
2143  */
2144 static int
2145 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2146 {
2147 	char	*cp, *name;
2148 	size_t	 sz;
2149 	int	 deftype, number, savepos, istrue, wanttrue;
2150 
2151 	if ('!' == v[*pos]) {
2152 		wanttrue = 0;
2153 		(*pos)++;
2154 	} else
2155 		wanttrue = 1;
2156 
2157 	switch (v[*pos]) {
2158 	case '\0':
2159 		return 0;
2160 	case 'n':
2161 	case 'o':
2162 		(*pos)++;
2163 		return wanttrue;
2164 	case 'c':
2165 	case 'e':
2166 	case 't':
2167 	case 'v':
2168 		(*pos)++;
2169 		return !wanttrue;
2170 	case 'd':
2171 	case 'r':
2172 		cp = v + *pos + 1;
2173 		while (*cp == ' ')
2174 			cp++;
2175 		name = cp;
2176 		sz = roff_getname(r, &cp, ln, cp - v);
2177 		if (sz == 0)
2178 			istrue = 0;
2179 		else if (v[*pos] == 'r')
2180 			istrue = roff_hasregn(r, name, sz);
2181 		else {
2182 			deftype = ROFFDEF_ANY;
2183 		        roff_getstrn(r, name, sz, &deftype);
2184 			istrue = !!deftype;
2185 		}
2186 		*pos = cp - v;
2187 		return istrue == wanttrue;
2188 	default:
2189 		break;
2190 	}
2191 
2192 	savepos = *pos;
2193 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2194 		return (number > 0) == wanttrue;
2195 	else if (*pos == savepos)
2196 		return roff_evalstrcond(v, pos) == wanttrue;
2197 	else
2198 		return 0;
2199 }
2200 
2201 static enum rofferr
2202 roff_line_ignore(ROFF_ARGS)
2203 {
2204 
2205 	return ROFF_IGN;
2206 }
2207 
2208 static enum rofferr
2209 roff_insec(ROFF_ARGS)
2210 {
2211 
2212 	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2213 	    ln, ppos, roff_name[tok]);
2214 	return ROFF_IGN;
2215 }
2216 
2217 static enum rofferr
2218 roff_unsupp(ROFF_ARGS)
2219 {
2220 
2221 	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2222 	    ln, ppos, roff_name[tok]);
2223 	return ROFF_IGN;
2224 }
2225 
2226 static enum rofferr
2227 roff_cond(ROFF_ARGS)
2228 {
2229 
2230 	roffnode_push(r, tok, NULL, ln, ppos);
2231 
2232 	/*
2233 	 * An `.el' has no conditional body: it will consume the value
2234 	 * of the current rstack entry set in prior `ie' calls or
2235 	 * defaults to DENY.
2236 	 *
2237 	 * If we're not an `el', however, then evaluate the conditional.
2238 	 */
2239 
2240 	r->last->rule = tok == ROFF_el ?
2241 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2242 	    roff_evalcond(r, ln, buf->buf, &pos);
2243 
2244 	/*
2245 	 * An if-else will put the NEGATION of the current evaluated
2246 	 * conditional into the stack of rules.
2247 	 */
2248 
2249 	if (tok == ROFF_ie) {
2250 		if (r->rstackpos + 1 == r->rstacksz) {
2251 			r->rstacksz += 16;
2252 			r->rstack = mandoc_reallocarray(r->rstack,
2253 			    r->rstacksz, sizeof(int));
2254 		}
2255 		r->rstack[++r->rstackpos] = !r->last->rule;
2256 	}
2257 
2258 	/* If the parent has false as its rule, then so do we. */
2259 
2260 	if (r->last->parent && !r->last->parent->rule)
2261 		r->last->rule = 0;
2262 
2263 	/*
2264 	 * Determine scope.
2265 	 * If there is nothing on the line after the conditional,
2266 	 * not even whitespace, use next-line scope.
2267 	 */
2268 
2269 	if (buf->buf[pos] == '\0') {
2270 		r->last->endspan = 2;
2271 		goto out;
2272 	}
2273 
2274 	while (buf->buf[pos] == ' ')
2275 		pos++;
2276 
2277 	/* An opening brace requests multiline scope. */
2278 
2279 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2280 		r->last->endspan = -1;
2281 		pos += 2;
2282 		while (buf->buf[pos] == ' ')
2283 			pos++;
2284 		goto out;
2285 	}
2286 
2287 	/*
2288 	 * Anything else following the conditional causes
2289 	 * single-line scope.  Warn if the scope contains
2290 	 * nothing but trailing whitespace.
2291 	 */
2292 
2293 	if (buf->buf[pos] == '\0')
2294 		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2295 		    ln, ppos, roff_name[tok]);
2296 
2297 	r->last->endspan = 1;
2298 
2299 out:
2300 	*offs = pos;
2301 	return ROFF_RERUN;
2302 }
2303 
2304 static enum rofferr
2305 roff_ds(ROFF_ARGS)
2306 {
2307 	char		*string;
2308 	const char	*name;
2309 	size_t		 namesz;
2310 
2311 	/* Ignore groff compatibility mode for now. */
2312 
2313 	if (tok == ROFF_ds1)
2314 		tok = ROFF_ds;
2315 	else if (tok == ROFF_as1)
2316 		tok = ROFF_as;
2317 
2318 	/*
2319 	 * The first word is the name of the string.
2320 	 * If it is empty or terminated by an escape sequence,
2321 	 * abort the `ds' request without defining anything.
2322 	 */
2323 
2324 	name = string = buf->buf + pos;
2325 	if (*name == '\0')
2326 		return ROFF_IGN;
2327 
2328 	namesz = roff_getname(r, &string, ln, pos);
2329 	if (name[namesz] == '\\')
2330 		return ROFF_IGN;
2331 
2332 	/* Read past the initial double-quote, if any. */
2333 	if (*string == '"')
2334 		string++;
2335 
2336 	/* The rest is the value. */
2337 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2338 	    ROFF_as == tok);
2339 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2340 	return ROFF_IGN;
2341 }
2342 
2343 /*
2344  * Parse a single operator, one or two characters long.
2345  * If the operator is recognized, return success and advance the
2346  * parse point, else return failure and let the parse point unchanged.
2347  */
2348 static int
2349 roff_getop(const char *v, int *pos, char *res)
2350 {
2351 
2352 	*res = v[*pos];
2353 
2354 	switch (*res) {
2355 	case '+':
2356 	case '-':
2357 	case '*':
2358 	case '/':
2359 	case '%':
2360 	case '&':
2361 	case ':':
2362 		break;
2363 	case '<':
2364 		switch (v[*pos + 1]) {
2365 		case '=':
2366 			*res = 'l';
2367 			(*pos)++;
2368 			break;
2369 		case '>':
2370 			*res = '!';
2371 			(*pos)++;
2372 			break;
2373 		case '?':
2374 			*res = 'i';
2375 			(*pos)++;
2376 			break;
2377 		default:
2378 			break;
2379 		}
2380 		break;
2381 	case '>':
2382 		switch (v[*pos + 1]) {
2383 		case '=':
2384 			*res = 'g';
2385 			(*pos)++;
2386 			break;
2387 		case '?':
2388 			*res = 'a';
2389 			(*pos)++;
2390 			break;
2391 		default:
2392 			break;
2393 		}
2394 		break;
2395 	case '=':
2396 		if ('=' == v[*pos + 1])
2397 			(*pos)++;
2398 		break;
2399 	default:
2400 		return 0;
2401 	}
2402 	(*pos)++;
2403 
2404 	return *res;
2405 }
2406 
2407 /*
2408  * Evaluate either a parenthesized numeric expression
2409  * or a single signed integer number.
2410  */
2411 static int
2412 roff_evalpar(struct roff *r, int ln,
2413 	const char *v, int *pos, int *res, int flags)
2414 {
2415 
2416 	if ('(' != v[*pos])
2417 		return roff_getnum(v, pos, res, flags);
2418 
2419 	(*pos)++;
2420 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2421 		return 0;
2422 
2423 	/*
2424 	 * Omission of the closing parenthesis
2425 	 * is an error in validation mode,
2426 	 * but ignored in evaluation mode.
2427 	 */
2428 
2429 	if (')' == v[*pos])
2430 		(*pos)++;
2431 	else if (NULL == res)
2432 		return 0;
2433 
2434 	return 1;
2435 }
2436 
2437 /*
2438  * Evaluate a complete numeric expression.
2439  * Proceed left to right, there is no concept of precedence.
2440  */
2441 static int
2442 roff_evalnum(struct roff *r, int ln, const char *v,
2443 	int *pos, int *res, int flags)
2444 {
2445 	int		 mypos, operand2;
2446 	char		 operator;
2447 
2448 	if (NULL == pos) {
2449 		mypos = 0;
2450 		pos = &mypos;
2451 	}
2452 
2453 	if (flags & ROFFNUM_WHITE)
2454 		while (isspace((unsigned char)v[*pos]))
2455 			(*pos)++;
2456 
2457 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2458 		return 0;
2459 
2460 	while (1) {
2461 		if (flags & ROFFNUM_WHITE)
2462 			while (isspace((unsigned char)v[*pos]))
2463 				(*pos)++;
2464 
2465 		if ( ! roff_getop(v, pos, &operator))
2466 			break;
2467 
2468 		if (flags & ROFFNUM_WHITE)
2469 			while (isspace((unsigned char)v[*pos]))
2470 				(*pos)++;
2471 
2472 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2473 			return 0;
2474 
2475 		if (flags & ROFFNUM_WHITE)
2476 			while (isspace((unsigned char)v[*pos]))
2477 				(*pos)++;
2478 
2479 		if (NULL == res)
2480 			continue;
2481 
2482 		switch (operator) {
2483 		case '+':
2484 			*res += operand2;
2485 			break;
2486 		case '-':
2487 			*res -= operand2;
2488 			break;
2489 		case '*':
2490 			*res *= operand2;
2491 			break;
2492 		case '/':
2493 			if (operand2 == 0) {
2494 				mandoc_msg(MANDOCERR_DIVZERO,
2495 					r->parse, ln, *pos, v);
2496 				*res = 0;
2497 				break;
2498 			}
2499 			*res /= operand2;
2500 			break;
2501 		case '%':
2502 			if (operand2 == 0) {
2503 				mandoc_msg(MANDOCERR_DIVZERO,
2504 					r->parse, ln, *pos, v);
2505 				*res = 0;
2506 				break;
2507 			}
2508 			*res %= operand2;
2509 			break;
2510 		case '<':
2511 			*res = *res < operand2;
2512 			break;
2513 		case '>':
2514 			*res = *res > operand2;
2515 			break;
2516 		case 'l':
2517 			*res = *res <= operand2;
2518 			break;
2519 		case 'g':
2520 			*res = *res >= operand2;
2521 			break;
2522 		case '=':
2523 			*res = *res == operand2;
2524 			break;
2525 		case '!':
2526 			*res = *res != operand2;
2527 			break;
2528 		case '&':
2529 			*res = *res && operand2;
2530 			break;
2531 		case ':':
2532 			*res = *res || operand2;
2533 			break;
2534 		case 'i':
2535 			if (operand2 < *res)
2536 				*res = operand2;
2537 			break;
2538 		case 'a':
2539 			if (operand2 > *res)
2540 				*res = operand2;
2541 			break;
2542 		default:
2543 			abort();
2544 		}
2545 	}
2546 	return 1;
2547 }
2548 
2549 /* --- register management ------------------------------------------------ */
2550 
2551 void
2552 roff_setreg(struct roff *r, const char *name, int val, char sign)
2553 {
2554 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2555 }
2556 
2557 static void
2558 roff_setregn(struct roff *r, const char *name, size_t len,
2559     int val, char sign, int step)
2560 {
2561 	struct roffreg	*reg;
2562 
2563 	/* Search for an existing register with the same name. */
2564 	reg = r->regtab;
2565 
2566 	while (reg != NULL && (reg->key.sz != len ||
2567 	    strncmp(reg->key.p, name, len) != 0))
2568 		reg = reg->next;
2569 
2570 	if (NULL == reg) {
2571 		/* Create a new register. */
2572 		reg = mandoc_malloc(sizeof(struct roffreg));
2573 		reg->key.p = mandoc_strndup(name, len);
2574 		reg->key.sz = len;
2575 		reg->val = 0;
2576 		reg->step = 0;
2577 		reg->next = r->regtab;
2578 		r->regtab = reg;
2579 	}
2580 
2581 	if ('+' == sign)
2582 		reg->val += val;
2583 	else if ('-' == sign)
2584 		reg->val -= val;
2585 	else
2586 		reg->val = val;
2587 	if (step != INT_MIN)
2588 		reg->step = step;
2589 }
2590 
2591 /*
2592  * Handle some predefined read-only number registers.
2593  * For now, return -1 if the requested register is not predefined;
2594  * in case a predefined read-only register having the value -1
2595  * were to turn up, another special value would have to be chosen.
2596  */
2597 static int
2598 roff_getregro(const struct roff *r, const char *name)
2599 {
2600 
2601 	switch (*name) {
2602 	case '$':  /* Number of arguments of the last macro evaluated. */
2603 		return r->argc;
2604 	case 'A':  /* ASCII approximation mode is always off. */
2605 		return 0;
2606 	case 'g':  /* Groff compatibility mode is always on. */
2607 		return 1;
2608 	case 'H':  /* Fixed horizontal resolution. */
2609 		return 24;
2610 	case 'j':  /* Always adjust left margin only. */
2611 		return 0;
2612 	case 'T':  /* Some output device is always defined. */
2613 		return 1;
2614 	case 'V':  /* Fixed vertical resolution. */
2615 		return 40;
2616 	default:
2617 		return -1;
2618 	}
2619 }
2620 
2621 int
2622 roff_getreg(struct roff *r, const char *name)
2623 {
2624 	return roff_getregn(r, name, strlen(name), '\0');
2625 }
2626 
2627 static int
2628 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2629 {
2630 	struct roffreg	*reg;
2631 	int		 val;
2632 
2633 	if ('.' == name[0] && 2 == len) {
2634 		val = roff_getregro(r, name + 1);
2635 		if (-1 != val)
2636 			return val;
2637 	}
2638 
2639 	for (reg = r->regtab; reg; reg = reg->next) {
2640 		if (len == reg->key.sz &&
2641 		    0 == strncmp(name, reg->key.p, len)) {
2642 			switch (sign) {
2643 			case '+':
2644 				reg->val += reg->step;
2645 				break;
2646 			case '-':
2647 				reg->val -= reg->step;
2648 				break;
2649 			default:
2650 				break;
2651 			}
2652 			return reg->val;
2653 		}
2654 	}
2655 
2656 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
2657 	return 0;
2658 }
2659 
2660 static int
2661 roff_hasregn(const struct roff *r, const char *name, size_t len)
2662 {
2663 	struct roffreg	*reg;
2664 	int		 val;
2665 
2666 	if ('.' == name[0] && 2 == len) {
2667 		val = roff_getregro(r, name + 1);
2668 		if (-1 != val)
2669 			return 1;
2670 	}
2671 
2672 	for (reg = r->regtab; reg; reg = reg->next)
2673 		if (len == reg->key.sz &&
2674 		    0 == strncmp(name, reg->key.p, len))
2675 			return 1;
2676 
2677 	return 0;
2678 }
2679 
2680 static void
2681 roff_freereg(struct roffreg *reg)
2682 {
2683 	struct roffreg	*old_reg;
2684 
2685 	while (NULL != reg) {
2686 		free(reg->key.p);
2687 		old_reg = reg;
2688 		reg = reg->next;
2689 		free(old_reg);
2690 	}
2691 }
2692 
2693 static enum rofferr
2694 roff_nr(ROFF_ARGS)
2695 {
2696 	char		*key, *val, *step;
2697 	size_t		 keysz;
2698 	int		 iv, is, len;
2699 	char		 sign;
2700 
2701 	key = val = buf->buf + pos;
2702 	if (*key == '\0')
2703 		return ROFF_IGN;
2704 
2705 	keysz = roff_getname(r, &val, ln, pos);
2706 	if (key[keysz] == '\\')
2707 		return ROFF_IGN;
2708 
2709 	sign = *val;
2710 	if (sign == '+' || sign == '-')
2711 		val++;
2712 
2713 	len = 0;
2714 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
2715 		return ROFF_IGN;
2716 
2717 	step = val + len;
2718 	while (isspace((unsigned char)*step))
2719 		step++;
2720 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
2721 		is = INT_MIN;
2722 
2723 	roff_setregn(r, key, keysz, iv, sign, is);
2724 	return ROFF_IGN;
2725 }
2726 
2727 static enum rofferr
2728 roff_rr(ROFF_ARGS)
2729 {
2730 	struct roffreg	*reg, **prev;
2731 	char		*name, *cp;
2732 	size_t		 namesz;
2733 
2734 	name = cp = buf->buf + pos;
2735 	if (*name == '\0')
2736 		return ROFF_IGN;
2737 	namesz = roff_getname(r, &cp, ln, pos);
2738 	name[namesz] = '\0';
2739 
2740 	prev = &r->regtab;
2741 	while (1) {
2742 		reg = *prev;
2743 		if (reg == NULL || !strcmp(name, reg->key.p))
2744 			break;
2745 		prev = &reg->next;
2746 	}
2747 	if (reg != NULL) {
2748 		*prev = reg->next;
2749 		free(reg->key.p);
2750 		free(reg);
2751 	}
2752 	return ROFF_IGN;
2753 }
2754 
2755 /* --- handler functions for roff requests -------------------------------- */
2756 
2757 static enum rofferr
2758 roff_rm(ROFF_ARGS)
2759 {
2760 	const char	 *name;
2761 	char		 *cp;
2762 	size_t		  namesz;
2763 
2764 	cp = buf->buf + pos;
2765 	while (*cp != '\0') {
2766 		name = cp;
2767 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2768 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2769 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2770 		if (name[namesz] == '\\')
2771 			break;
2772 	}
2773 	return ROFF_IGN;
2774 }
2775 
2776 static enum rofferr
2777 roff_it(ROFF_ARGS)
2778 {
2779 	int		 iv;
2780 
2781 	/* Parse the number of lines. */
2782 
2783 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2784 		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2785 		    ln, ppos, buf->buf + 1);
2786 		return ROFF_IGN;
2787 	}
2788 
2789 	while (isspace((unsigned char)buf->buf[pos]))
2790 		pos++;
2791 
2792 	/*
2793 	 * Arm the input line trap.
2794 	 * Special-casing "an-trap" is an ugly workaround to cope
2795 	 * with DocBook stupidly fiddling with man(7) internals.
2796 	 */
2797 
2798 	roffit_lines = iv;
2799 	roffit_macro = mandoc_strdup(iv != 1 ||
2800 	    strcmp(buf->buf + pos, "an-trap") ?
2801 	    buf->buf + pos : "br");
2802 	return ROFF_IGN;
2803 }
2804 
2805 static enum rofferr
2806 roff_Dd(ROFF_ARGS)
2807 {
2808 	int		 mask;
2809 	enum roff_tok	 t, te;
2810 
2811 	switch (tok) {
2812 	case ROFF_Dd:
2813 		tok = MDOC_Dd;
2814 		te = MDOC_MAX;
2815 		if (r->format == 0)
2816 			r->format = MPARSE_MDOC;
2817 		mask = MPARSE_MDOC | MPARSE_QUICK;
2818 		break;
2819 	case ROFF_TH:
2820 		tok = MAN_TH;
2821 		te = MAN_MAX;
2822 		if (r->format == 0)
2823 			r->format = MPARSE_MAN;
2824 		mask = MPARSE_QUICK;
2825 		break;
2826 	default:
2827 		abort();
2828 	}
2829 	if ((r->options & mask) == 0)
2830 		for (t = tok; t < te; t++)
2831 			roff_setstr(r, roff_name[t], NULL, 0);
2832 	return ROFF_CONT;
2833 }
2834 
2835 static enum rofferr
2836 roff_TE(ROFF_ARGS)
2837 {
2838 	if (r->tbl == NULL) {
2839 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2840 		    ln, ppos, "TE");
2841 		return ROFF_IGN;
2842 	}
2843 	if (tbl_end(r->tbl) == 0) {
2844 		r->tbl = NULL;
2845 		free(buf->buf);
2846 		buf->buf = mandoc_strdup(".sp");
2847 		buf->sz = 4;
2848 		*offs = 0;
2849 		return ROFF_REPARSE;
2850 	}
2851 	r->tbl = NULL;
2852 	return ROFF_IGN;
2853 }
2854 
2855 static enum rofferr
2856 roff_T_(ROFF_ARGS)
2857 {
2858 
2859 	if (NULL == r->tbl)
2860 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2861 		    ln, ppos, "T&");
2862 	else
2863 		tbl_restart(ln, ppos, r->tbl);
2864 
2865 	return ROFF_IGN;
2866 }
2867 
2868 /*
2869  * Handle in-line equation delimiters.
2870  */
2871 static enum rofferr
2872 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2873 {
2874 	char		*cp1, *cp2;
2875 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2876 
2877 	/*
2878 	 * Outside equations, look for an opening delimiter.
2879 	 * If we are inside an equation, we already know it is
2880 	 * in-line, or this function wouldn't have been called;
2881 	 * so look for a closing delimiter.
2882 	 */
2883 
2884 	cp1 = buf->buf + pos;
2885 	cp2 = strchr(cp1, r->eqn == NULL ?
2886 	    r->last_eqn->odelim : r->last_eqn->cdelim);
2887 	if (cp2 == NULL)
2888 		return ROFF_CONT;
2889 
2890 	*cp2++ = '\0';
2891 	bef_pr = bef_nl = aft_nl = aft_pr = "";
2892 
2893 	/* Handle preceding text, protecting whitespace. */
2894 
2895 	if (*buf->buf != '\0') {
2896 		if (r->eqn == NULL)
2897 			bef_pr = "\\&";
2898 		bef_nl = "\n";
2899 	}
2900 
2901 	/*
2902 	 * Prepare replacing the delimiter with an equation macro
2903 	 * and drop leading white space from the equation.
2904 	 */
2905 
2906 	if (r->eqn == NULL) {
2907 		while (*cp2 == ' ')
2908 			cp2++;
2909 		mac = ".EQ";
2910 	} else
2911 		mac = ".EN";
2912 
2913 	/* Handle following text, protecting whitespace. */
2914 
2915 	if (*cp2 != '\0') {
2916 		aft_nl = "\n";
2917 		if (r->eqn != NULL)
2918 			aft_pr = "\\&";
2919 	}
2920 
2921 	/* Do the actual replacement. */
2922 
2923 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2924 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2925 	free(buf->buf);
2926 	buf->buf = cp1;
2927 
2928 	/* Toggle the in-line state of the eqn subsystem. */
2929 
2930 	r->eqn_inline = r->eqn == NULL;
2931 	return ROFF_REPARSE;
2932 }
2933 
2934 static enum rofferr
2935 roff_EQ(ROFF_ARGS)
2936 {
2937 	struct roff_node	*n;
2938 
2939 	if (r->man->macroset == MACROSET_MAN)
2940 		man_breakscope(r->man, ROFF_EQ);
2941 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2942 	if (ln > r->man->last->line)
2943 		n->flags |= NODE_LINE;
2944 	n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2945 	n->eqn->expectargs = UINT_MAX;
2946 	roff_node_append(r->man, n);
2947 	r->man->next = ROFF_NEXT_SIBLING;
2948 
2949 	assert(r->eqn == NULL);
2950 	if (r->last_eqn == NULL)
2951 		r->last_eqn = eqn_alloc(r->parse);
2952 	else
2953 		eqn_reset(r->last_eqn);
2954 	r->eqn = r->last_eqn;
2955 	r->eqn->node = n;
2956 
2957 	if (buf->buf[pos] != '\0')
2958 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2959 		    ".EQ %s", buf->buf + pos);
2960 
2961 	return ROFF_IGN;
2962 }
2963 
2964 static enum rofferr
2965 roff_EN(ROFF_ARGS)
2966 {
2967 	if (r->eqn != NULL) {
2968 		eqn_parse(r->eqn);
2969 		r->eqn = NULL;
2970 	} else
2971 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2972 	if (buf->buf[pos] != '\0')
2973 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2974 		    "EN %s", buf->buf + pos);
2975 	return ROFF_IGN;
2976 }
2977 
2978 static enum rofferr
2979 roff_TS(ROFF_ARGS)
2980 {
2981 	if (r->tbl != NULL) {
2982 		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2983 		    ln, ppos, "TS breaks TS");
2984 		tbl_end(r->tbl);
2985 	}
2986 	r->tbl = tbl_alloc(ppos, ln, r->parse);
2987 	if (r->last_tbl)
2988 		r->last_tbl->next = r->tbl;
2989 	else
2990 		r->first_tbl = r->tbl;
2991 	r->last_tbl = r->tbl;
2992 	return ROFF_IGN;
2993 }
2994 
2995 static enum rofferr
2996 roff_onearg(ROFF_ARGS)
2997 {
2998 	struct roff_node	*n;
2999 	char			*cp;
3000 	int			 npos;
3001 
3002 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3003 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3004 	     tok == ROFF_ti))
3005 		man_breakscope(r->man, tok);
3006 
3007 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3008 		r->man->last = roffce_node;
3009 		r->man->next = ROFF_NEXT_SIBLING;
3010 	}
3011 
3012 	roff_elem_alloc(r->man, ln, ppos, tok);
3013 	n = r->man->last;
3014 
3015 	cp = buf->buf + pos;
3016 	if (*cp != '\0') {
3017 		while (*cp != '\0' && *cp != ' ')
3018 			cp++;
3019 		while (*cp == ' ')
3020 			*cp++ = '\0';
3021 		if (*cp != '\0')
3022 			mandoc_vmsg(MANDOCERR_ARG_EXCESS,
3023 			    r->parse, ln, cp - buf->buf,
3024 			    "%s ... %s", roff_name[tok], cp);
3025 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3026 	}
3027 
3028 	if (tok == ROFF_ce || tok == ROFF_rj) {
3029 		if (r->man->last->type == ROFFT_ELEM) {
3030 			roff_word_alloc(r->man, ln, pos, "1");
3031 			r->man->last->flags |= NODE_NOSRC;
3032 		}
3033 		npos = 0;
3034 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3035 		    &roffce_lines, 0) == 0) {
3036 			mandoc_vmsg(MANDOCERR_CE_NONUM,
3037 			    r->parse, ln, pos, "ce %s", buf->buf + pos);
3038 			roffce_lines = 1;
3039 		}
3040 		if (roffce_lines < 1) {
3041 			r->man->last = r->man->last->parent;
3042 			roffce_node = NULL;
3043 			roffce_lines = 0;
3044 		} else
3045 			roffce_node = r->man->last->parent;
3046 	} else {
3047 		n->flags |= NODE_VALID | NODE_ENDED;
3048 		r->man->last = n;
3049 	}
3050 	n->flags |= NODE_LINE;
3051 	r->man->next = ROFF_NEXT_SIBLING;
3052 	return ROFF_IGN;
3053 }
3054 
3055 static enum rofferr
3056 roff_manyarg(ROFF_ARGS)
3057 {
3058 	struct roff_node	*n;
3059 	char			*sp, *ep;
3060 
3061 	roff_elem_alloc(r->man, ln, ppos, tok);
3062 	n = r->man->last;
3063 
3064 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3065 		while (*ep != '\0' && *ep != ' ')
3066 			ep++;
3067 		while (*ep == ' ')
3068 			*ep++ = '\0';
3069 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3070 	}
3071 
3072 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3073 	r->man->last = n;
3074 	r->man->next = ROFF_NEXT_SIBLING;
3075 	return ROFF_IGN;
3076 }
3077 
3078 static enum rofferr
3079 roff_als(ROFF_ARGS)
3080 {
3081 	char		*oldn, *newn, *end, *value;
3082 	size_t		 oldsz, newsz, valsz;
3083 
3084 	newn = oldn = buf->buf + pos;
3085 	if (*newn == '\0')
3086 		return ROFF_IGN;
3087 
3088 	newsz = roff_getname(r, &oldn, ln, pos);
3089 	if (newn[newsz] == '\\' || *oldn == '\0')
3090 		return ROFF_IGN;
3091 
3092 	end = oldn;
3093 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3094 	if (oldsz == 0)
3095 		return ROFF_IGN;
3096 
3097 	valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3098 	    (int)oldsz, oldn);
3099 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3100 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3101 	free(value);
3102 	return ROFF_IGN;
3103 }
3104 
3105 static enum rofferr
3106 roff_br(ROFF_ARGS)
3107 {
3108 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3109 		man_breakscope(r->man, ROFF_br);
3110 	roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3111 	if (buf->buf[pos] != '\0')
3112 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3113 		    "%s %s", roff_name[tok], buf->buf + pos);
3114 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3115 	r->man->next = ROFF_NEXT_SIBLING;
3116 	return ROFF_IGN;
3117 }
3118 
3119 static enum rofferr
3120 roff_cc(ROFF_ARGS)
3121 {
3122 	const char	*p;
3123 
3124 	p = buf->buf + pos;
3125 
3126 	if (*p == '\0' || (r->control = *p++) == '.')
3127 		r->control = '\0';
3128 
3129 	if (*p != '\0')
3130 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3131 		    ln, p - buf->buf, "cc ... %s", p);
3132 
3133 	return ROFF_IGN;
3134 }
3135 
3136 static enum rofferr
3137 roff_ec(ROFF_ARGS)
3138 {
3139 	const char	*p;
3140 
3141 	p = buf->buf + pos;
3142 	if (*p == '\0')
3143 		r->escape = '\\';
3144 	else {
3145 		r->escape = *p;
3146 		if (*++p != '\0')
3147 			mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3148 			    ln, p - buf->buf, "ec ... %s", p);
3149 	}
3150 	return ROFF_IGN;
3151 }
3152 
3153 static enum rofferr
3154 roff_eo(ROFF_ARGS)
3155 {
3156 	r->escape = '\0';
3157 	if (buf->buf[pos] != '\0')
3158 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3159 		    ln, pos, "eo %s", buf->buf + pos);
3160 	return ROFF_IGN;
3161 }
3162 
3163 static enum rofferr
3164 roff_tr(ROFF_ARGS)
3165 {
3166 	const char	*p, *first, *second;
3167 	size_t		 fsz, ssz;
3168 	enum mandoc_esc	 esc;
3169 
3170 	p = buf->buf + pos;
3171 
3172 	if (*p == '\0') {
3173 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3174 		return ROFF_IGN;
3175 	}
3176 
3177 	while (*p != '\0') {
3178 		fsz = ssz = 1;
3179 
3180 		first = p++;
3181 		if (*first == '\\') {
3182 			esc = mandoc_escape(&p, NULL, NULL);
3183 			if (esc == ESCAPE_ERROR) {
3184 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3185 				    ln, (int)(p - buf->buf), first);
3186 				return ROFF_IGN;
3187 			}
3188 			fsz = (size_t)(p - first);
3189 		}
3190 
3191 		second = p++;
3192 		if (*second == '\\') {
3193 			esc = mandoc_escape(&p, NULL, NULL);
3194 			if (esc == ESCAPE_ERROR) {
3195 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3196 				    ln, (int)(p - buf->buf), second);
3197 				return ROFF_IGN;
3198 			}
3199 			ssz = (size_t)(p - second);
3200 		} else if (*second == '\0') {
3201 			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3202 			    ln, first - buf->buf, "tr %s", first);
3203 			second = " ";
3204 			p--;
3205 		}
3206 
3207 		if (fsz > 1) {
3208 			roff_setstrn(&r->xmbtab, first, fsz,
3209 			    second, ssz, 0);
3210 			continue;
3211 		}
3212 
3213 		if (r->xtab == NULL)
3214 			r->xtab = mandoc_calloc(128,
3215 			    sizeof(struct roffstr));
3216 
3217 		free(r->xtab[(int)*first].p);
3218 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3219 		r->xtab[(int)*first].sz = ssz;
3220 	}
3221 
3222 	return ROFF_IGN;
3223 }
3224 
3225 static enum rofferr
3226 roff_rn(ROFF_ARGS)
3227 {
3228 	const char	*value;
3229 	char		*oldn, *newn, *end;
3230 	size_t		 oldsz, newsz;
3231 	int		 deftype;
3232 
3233 	oldn = newn = buf->buf + pos;
3234 	if (*oldn == '\0')
3235 		return ROFF_IGN;
3236 
3237 	oldsz = roff_getname(r, &newn, ln, pos);
3238 	if (oldn[oldsz] == '\\' || *newn == '\0')
3239 		return ROFF_IGN;
3240 
3241 	end = newn;
3242 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3243 	if (newsz == 0)
3244 		return ROFF_IGN;
3245 
3246 	deftype = ROFFDEF_ANY;
3247 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3248 	switch (deftype) {
3249 	case ROFFDEF_USER:
3250 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3251 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3252 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3253 		break;
3254 	case ROFFDEF_PRE:
3255 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3256 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3257 		break;
3258 	case ROFFDEF_REN:
3259 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3260 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3261 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3262 		break;
3263 	case ROFFDEF_STD:
3264 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3265 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3266 		break;
3267 	default:
3268 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3269 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3270 		break;
3271 	}
3272 	return ROFF_IGN;
3273 }
3274 
3275 static enum rofferr
3276 roff_so(ROFF_ARGS)
3277 {
3278 	char *name, *cp;
3279 
3280 	name = buf->buf + pos;
3281 	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3282 
3283 	/*
3284 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3285 	 * opening anything that's not in our cwd or anything beneath
3286 	 * it.  Thus, explicitly disallow traversing up the file-system
3287 	 * or using absolute paths.
3288 	 */
3289 
3290 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3291 		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3292 		    ".so %s", name);
3293 		buf->sz = mandoc_asprintf(&cp,
3294 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3295 		free(buf->buf);
3296 		buf->buf = cp;
3297 		*offs = 0;
3298 		return ROFF_REPARSE;
3299 	}
3300 
3301 	*offs = pos;
3302 	return ROFF_SO;
3303 }
3304 
3305 /* --- user defined strings and macros ------------------------------------ */
3306 
3307 static enum rofferr
3308 roff_userdef(ROFF_ARGS)
3309 {
3310 	const char	 *arg[16], *ap;
3311 	char		 *cp, *n1, *n2;
3312 	int		  expand_count, i, ib, ie;
3313 	size_t		  asz, rsz;
3314 
3315 	/*
3316 	 * Collect pointers to macro argument strings
3317 	 * and NUL-terminate them.
3318 	 */
3319 
3320 	r->argc = 0;
3321 	cp = buf->buf + pos;
3322 	for (i = 0; i < 16; i++) {
3323 		if (*cp == '\0')
3324 			arg[i] = "";
3325 		else {
3326 			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3327 			r->argc = i + 1;
3328 		}
3329 	}
3330 
3331 	/*
3332 	 * Expand macro arguments.
3333 	 */
3334 
3335 	buf->sz = strlen(r->current_string) + 1;
3336 	n1 = n2 = cp = mandoc_malloc(buf->sz);
3337 	memcpy(n1, r->current_string, buf->sz);
3338 	expand_count = 0;
3339 	while (*cp != '\0') {
3340 
3341 		/* Scan ahead for the next argument invocation. */
3342 
3343 		if (*cp++ != '\\')
3344 			continue;
3345 		if (*cp++ != '$')
3346 			continue;
3347 		if (*cp == '*') {  /* \\$* inserts all arguments */
3348 			ib = 0;
3349 			ie = r->argc - 1;
3350 		} else {  /* \\$1 .. \\$9 insert one argument */
3351 			ib = ie = *cp - '1';
3352 			if (ib < 0 || ib > 8)
3353 				continue;
3354 		}
3355 		cp -= 2;
3356 
3357 		/*
3358 		 * Prevent infinite recursion.
3359 		 */
3360 
3361 		if (cp >= n2)
3362 			expand_count = 1;
3363 		else if (++expand_count > EXPAND_LIMIT) {
3364 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3365 			    ln, (int)(cp - n1), NULL);
3366 			free(buf->buf);
3367 			buf->buf = n1;
3368 			*offs = 0;
3369 			return ROFF_IGN;
3370 		}
3371 
3372 		/*
3373 		 * Determine the size of the expanded argument,
3374 		 * taking escaping of quotes into account.
3375 		 */
3376 
3377 		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3378 		for (i = ib; i <= ie; i++) {
3379 			for (ap = arg[i]; *ap != '\0'; ap++) {
3380 				asz++;
3381 				if (*ap == '"')
3382 					asz += 3;
3383 			}
3384 		}
3385 		if (asz != 3) {
3386 
3387 			/*
3388 			 * Determine the size of the rest of the
3389 			 * unexpanded macro, including the NUL.
3390 			 */
3391 
3392 			rsz = buf->sz - (cp - n1) - 3;
3393 
3394 			/*
3395 			 * When shrinking, move before
3396 			 * releasing the storage.
3397 			 */
3398 
3399 			if (asz < 3)
3400 				memmove(cp + asz, cp + 3, rsz);
3401 
3402 			/*
3403 			 * Resize the storage for the macro
3404 			 * and readjust the parse pointer.
3405 			 */
3406 
3407 			buf->sz += asz - 3;
3408 			n2 = mandoc_realloc(n1, buf->sz);
3409 			cp = n2 + (cp - n1);
3410 			n1 = n2;
3411 
3412 			/*
3413 			 * When growing, make room
3414 			 * for the expanded argument.
3415 			 */
3416 
3417 			if (asz > 3)
3418 				memmove(cp + asz, cp + 3, rsz);
3419 		}
3420 
3421 		/* Copy the expanded argument, escaping quotes. */
3422 
3423 		n2 = cp;
3424 		for (i = ib; i <= ie; i++) {
3425 			for (ap = arg[i]; *ap != '\0'; ap++) {
3426 				if (*ap == '"') {
3427 					memcpy(n2, "\\(dq", 4);
3428 					n2 += 4;
3429 				} else
3430 					*n2++ = *ap;
3431 			}
3432 			if (i < ie)
3433 				*n2++ = ' ';
3434 		}
3435 	}
3436 
3437 	/*
3438 	 * Replace the macro invocation
3439 	 * by the expanded macro.
3440 	 */
3441 
3442 	free(buf->buf);
3443 	buf->buf = n1;
3444 	*offs = 0;
3445 
3446 	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3447 	   ROFF_REPARSE : ROFF_APPEND;
3448 }
3449 
3450 /*
3451  * Calling a high-level macro that was renamed with .rn.
3452  * r->current_string has already been set up by roff_parse().
3453  */
3454 static enum rofferr
3455 roff_renamed(ROFF_ARGS)
3456 {
3457 	char	*nbuf;
3458 
3459 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3460 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3461 	free(buf->buf);
3462 	buf->buf = nbuf;
3463 	*offs = 0;
3464 	return ROFF_CONT;
3465 }
3466 
3467 static size_t
3468 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3469 {
3470 	char	 *name, *cp;
3471 	size_t	  namesz;
3472 
3473 	name = *cpp;
3474 	if ('\0' == *name)
3475 		return 0;
3476 
3477 	/* Read until end of name and terminate it with NUL. */
3478 	for (cp = name; 1; cp++) {
3479 		if ('\0' == *cp || ' ' == *cp) {
3480 			namesz = cp - name;
3481 			break;
3482 		}
3483 		if ('\\' != *cp)
3484 			continue;
3485 		namesz = cp - name;
3486 		if ('{' == cp[1] || '}' == cp[1])
3487 			break;
3488 		cp++;
3489 		if ('\\' == *cp)
3490 			continue;
3491 		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3492 		    "%.*s", (int)(cp - name + 1), name);
3493 		mandoc_escape((const char **)&cp, NULL, NULL);
3494 		break;
3495 	}
3496 
3497 	/* Read past spaces. */
3498 	while (' ' == *cp)
3499 		cp++;
3500 
3501 	*cpp = cp;
3502 	return namesz;
3503 }
3504 
3505 /*
3506  * Store *string into the user-defined string called *name.
3507  * To clear an existing entry, call with (*r, *name, NULL, 0).
3508  * append == 0: replace mode
3509  * append == 1: single-line append mode
3510  * append == 2: multiline append mode, append '\n' after each call
3511  */
3512 static void
3513 roff_setstr(struct roff *r, const char *name, const char *string,
3514 	int append)
3515 {
3516 	size_t	 namesz;
3517 
3518 	namesz = strlen(name);
3519 	roff_setstrn(&r->strtab, name, namesz, string,
3520 	    string ? strlen(string) : 0, append);
3521 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3522 }
3523 
3524 static void
3525 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3526 		const char *string, size_t stringsz, int append)
3527 {
3528 	struct roffkv	*n;
3529 	char		*c;
3530 	int		 i;
3531 	size_t		 oldch, newch;
3532 
3533 	/* Search for an existing string with the same name. */
3534 	n = *r;
3535 
3536 	while (n && (namesz != n->key.sz ||
3537 			strncmp(n->key.p, name, namesz)))
3538 		n = n->next;
3539 
3540 	if (NULL == n) {
3541 		/* Create a new string table entry. */
3542 		n = mandoc_malloc(sizeof(struct roffkv));
3543 		n->key.p = mandoc_strndup(name, namesz);
3544 		n->key.sz = namesz;
3545 		n->val.p = NULL;
3546 		n->val.sz = 0;
3547 		n->next = *r;
3548 		*r = n;
3549 	} else if (0 == append) {
3550 		free(n->val.p);
3551 		n->val.p = NULL;
3552 		n->val.sz = 0;
3553 	}
3554 
3555 	if (NULL == string)
3556 		return;
3557 
3558 	/*
3559 	 * One additional byte for the '\n' in multiline mode,
3560 	 * and one for the terminating '\0'.
3561 	 */
3562 	newch = stringsz + (1 < append ? 2u : 1u);
3563 
3564 	if (NULL == n->val.p) {
3565 		n->val.p = mandoc_malloc(newch);
3566 		*n->val.p = '\0';
3567 		oldch = 0;
3568 	} else {
3569 		oldch = n->val.sz;
3570 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3571 	}
3572 
3573 	/* Skip existing content in the destination buffer. */
3574 	c = n->val.p + (int)oldch;
3575 
3576 	/* Append new content to the destination buffer. */
3577 	i = 0;
3578 	while (i < (int)stringsz) {
3579 		/*
3580 		 * Rudimentary roff copy mode:
3581 		 * Handle escaped backslashes.
3582 		 */
3583 		if ('\\' == string[i] && '\\' == string[i + 1])
3584 			i++;
3585 		*c++ = string[i++];
3586 	}
3587 
3588 	/* Append terminating bytes. */
3589 	if (1 < append)
3590 		*c++ = '\n';
3591 
3592 	*c = '\0';
3593 	n->val.sz = (int)(c - n->val.p);
3594 }
3595 
3596 static const char *
3597 roff_getstrn(struct roff *r, const char *name, size_t len,
3598     int *deftype)
3599 {
3600 	const struct roffkv	*n;
3601 	int			 found, i;
3602 	enum roff_tok		 tok;
3603 
3604 	found = 0;
3605 	for (n = r->strtab; n != NULL; n = n->next) {
3606 		if (strncmp(name, n->key.p, len) != 0 ||
3607 		    n->key.p[len] != '\0' || n->val.p == NULL)
3608 			continue;
3609 		if (*deftype & ROFFDEF_USER) {
3610 			*deftype = ROFFDEF_USER;
3611 			return n->val.p;
3612 		} else {
3613 			found = 1;
3614 			break;
3615 		}
3616 	}
3617 	for (n = r->rentab; n != NULL; n = n->next) {
3618 		if (strncmp(name, n->key.p, len) != 0 ||
3619 		    n->key.p[len] != '\0' || n->val.p == NULL)
3620 			continue;
3621 		if (*deftype & ROFFDEF_REN) {
3622 			*deftype = ROFFDEF_REN;
3623 			return n->val.p;
3624 		} else {
3625 			found = 1;
3626 			break;
3627 		}
3628 	}
3629 	for (i = 0; i < PREDEFS_MAX; i++) {
3630 		if (strncmp(name, predefs[i].name, len) != 0 ||
3631 		    predefs[i].name[len] != '\0')
3632 			continue;
3633 		if (*deftype & ROFFDEF_PRE) {
3634 			*deftype = ROFFDEF_PRE;
3635 			return predefs[i].str;
3636 		} else {
3637 			found = 1;
3638 			break;
3639 		}
3640 	}
3641 	if (r->man->macroset != MACROSET_MAN) {
3642 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3643 			if (strncmp(name, roff_name[tok], len) != 0 ||
3644 			    roff_name[tok][len] != '\0')
3645 				continue;
3646 			if (*deftype & ROFFDEF_STD) {
3647 				*deftype = ROFFDEF_STD;
3648 				return NULL;
3649 			} else {
3650 				found = 1;
3651 				break;
3652 			}
3653 		}
3654 	}
3655 	if (r->man->macroset != MACROSET_MDOC) {
3656 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3657 			if (strncmp(name, roff_name[tok], len) != 0 ||
3658 			    roff_name[tok][len] != '\0')
3659 				continue;
3660 			if (*deftype & ROFFDEF_STD) {
3661 				*deftype = ROFFDEF_STD;
3662 				return NULL;
3663 			} else {
3664 				found = 1;
3665 				break;
3666 			}
3667 		}
3668 	}
3669 
3670 	if (found == 0 && *deftype != ROFFDEF_ANY) {
3671 		if (*deftype & ROFFDEF_REN) {
3672 			/*
3673 			 * This might still be a request,
3674 			 * so do not treat it as undefined yet.
3675 			 */
3676 			*deftype = ROFFDEF_UNDEF;
3677 			return NULL;
3678 		}
3679 
3680 		/* Using an undefined string defines it to be empty. */
3681 
3682 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
3683 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
3684 	}
3685 
3686 	*deftype = 0;
3687 	return NULL;
3688 }
3689 
3690 static void
3691 roff_freestr(struct roffkv *r)
3692 {
3693 	struct roffkv	 *n, *nn;
3694 
3695 	for (n = r; n; n = nn) {
3696 		free(n->key.p);
3697 		free(n->val.p);
3698 		nn = n->next;
3699 		free(n);
3700 	}
3701 }
3702 
3703 /* --- accessors and utility functions ------------------------------------ */
3704 
3705 /*
3706  * Duplicate an input string, making the appropriate character
3707  * conversations (as stipulated by `tr') along the way.
3708  * Returns a heap-allocated string with all the replacements made.
3709  */
3710 char *
3711 roff_strdup(const struct roff *r, const char *p)
3712 {
3713 	const struct roffkv *cp;
3714 	char		*res;
3715 	const char	*pp;
3716 	size_t		 ssz, sz;
3717 	enum mandoc_esc	 esc;
3718 
3719 	if (NULL == r->xmbtab && NULL == r->xtab)
3720 		return mandoc_strdup(p);
3721 	else if ('\0' == *p)
3722 		return mandoc_strdup("");
3723 
3724 	/*
3725 	 * Step through each character looking for term matches
3726 	 * (remember that a `tr' can be invoked with an escape, which is
3727 	 * a glyph but the escape is multi-character).
3728 	 * We only do this if the character hash has been initialised
3729 	 * and the string is >0 length.
3730 	 */
3731 
3732 	res = NULL;
3733 	ssz = 0;
3734 
3735 	while ('\0' != *p) {
3736 		assert((unsigned int)*p < 128);
3737 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3738 			sz = r->xtab[(int)*p].sz;
3739 			res = mandoc_realloc(res, ssz + sz + 1);
3740 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3741 			ssz += sz;
3742 			p++;
3743 			continue;
3744 		} else if ('\\' != *p) {
3745 			res = mandoc_realloc(res, ssz + 2);
3746 			res[ssz++] = *p++;
3747 			continue;
3748 		}
3749 
3750 		/* Search for term matches. */
3751 		for (cp = r->xmbtab; cp; cp = cp->next)
3752 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3753 				break;
3754 
3755 		if (NULL != cp) {
3756 			/*
3757 			 * A match has been found.
3758 			 * Append the match to the array and move
3759 			 * forward by its keysize.
3760 			 */
3761 			res = mandoc_realloc(res,
3762 			    ssz + cp->val.sz + 1);
3763 			memcpy(res + ssz, cp->val.p, cp->val.sz);
3764 			ssz += cp->val.sz;
3765 			p += (int)cp->key.sz;
3766 			continue;
3767 		}
3768 
3769 		/*
3770 		 * Handle escapes carefully: we need to copy
3771 		 * over just the escape itself, or else we might
3772 		 * do replacements within the escape itself.
3773 		 * Make sure to pass along the bogus string.
3774 		 */
3775 		pp = p++;
3776 		esc = mandoc_escape(&p, NULL, NULL);
3777 		if (ESCAPE_ERROR == esc) {
3778 			sz = strlen(pp);
3779 			res = mandoc_realloc(res, ssz + sz + 1);
3780 			memcpy(res + ssz, pp, sz);
3781 			break;
3782 		}
3783 		/*
3784 		 * We bail out on bad escapes.
3785 		 * No need to warn: we already did so when
3786 		 * roff_res() was called.
3787 		 */
3788 		sz = (int)(p - pp);
3789 		res = mandoc_realloc(res, ssz + sz + 1);
3790 		memcpy(res + ssz, pp, sz);
3791 		ssz += sz;
3792 	}
3793 
3794 	res[(int)ssz] = '\0';
3795 	return res;
3796 }
3797 
3798 int
3799 roff_getformat(const struct roff *r)
3800 {
3801 
3802 	return r->format;
3803 }
3804 
3805 /*
3806  * Find out whether a line is a macro line or not.
3807  * If it is, adjust the current position and return one; if it isn't,
3808  * return zero and don't change the current position.
3809  * If the control character has been set with `.cc', then let that grain
3810  * precedence.
3811  * This is slighly contrary to groff, where using the non-breaking
3812  * control character when `cc' has been invoked will cause the
3813  * non-breaking macro contents to be printed verbatim.
3814  */
3815 int
3816 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3817 {
3818 	int		pos;
3819 
3820 	pos = *ppos;
3821 
3822 	if (r->control != '\0' && cp[pos] == r->control)
3823 		pos++;
3824 	else if (r->control != '\0')
3825 		return 0;
3826 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3827 		pos += 2;
3828 	else if ('.' == cp[pos] || '\'' == cp[pos])
3829 		pos++;
3830 	else
3831 		return 0;
3832 
3833 	while (' ' == cp[pos] || '\t' == cp[pos])
3834 		pos++;
3835 
3836 	*ppos = pos;
3837 	return 1;
3838 }
3839