xref: /illumos-gate/usr/src/cmd/mandoc/roff.c (revision 9c88ac3ab4edaa5e8c0130ed1b4b376ea57c545a)
1 /*	$Id: roff.c,v 1.324 2017/07/14 17:16:16 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38 
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define	EXPAND_LIMIT	1000
41 
42 /* Types of definitions of macros and strings. */
43 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
44 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
45 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
46 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
47 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
48 			 ROFFDEF_REN | ROFFDEF_STD)
49 
50 /* --- data types --------------------------------------------------------- */
51 
52 /*
53  * An incredibly-simple string buffer.
54  */
55 struct	roffstr {
56 	char		*p; /* nil-terminated buffer */
57 	size_t		 sz; /* saved strlen(p) */
58 };
59 
60 /*
61  * A key-value roffstr pair as part of a singly-linked list.
62  */
63 struct	roffkv {
64 	struct roffstr	 key;
65 	struct roffstr	 val;
66 	struct roffkv	*next; /* next in list */
67 };
68 
69 /*
70  * A single number register as part of a singly-linked list.
71  */
72 struct	roffreg {
73 	struct roffstr	 key;
74 	int		 val;
75 	struct roffreg	*next;
76 };
77 
78 /*
79  * Association of request and macro names with token IDs.
80  */
81 struct	roffreq {
82 	enum roff_tok	 tok;
83 	char		 name[];
84 };
85 
86 struct	roff {
87 	struct mparse	*parse; /* parse point */
88 	struct roff_man	*man; /* mdoc or man parser */
89 	struct roffnode	*last; /* leaf of stack */
90 	int		*rstack; /* stack of inverted `ie' values */
91 	struct ohash	*reqtab; /* request lookup table */
92 	struct roffreg	*regtab; /* number registers */
93 	struct roffkv	*strtab; /* user-defined strings & macros */
94 	struct roffkv	*rentab; /* renamed strings & macros */
95 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
96 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
97 	const char	*current_string; /* value of last called user macro */
98 	struct tbl_node	*first_tbl; /* first table parsed */
99 	struct tbl_node	*last_tbl; /* last table parsed */
100 	struct tbl_node	*tbl; /* current table being parsed */
101 	struct eqn_node	*last_eqn; /* equation parser */
102 	struct eqn_node	*eqn; /* active equation parser */
103 	int		 eqn_inline; /* current equation is inline */
104 	int		 options; /* parse options */
105 	int		 rstacksz; /* current size limit of rstack */
106 	int		 rstackpos; /* position in rstack */
107 	int		 format; /* current file in mdoc or man format */
108 	int		 argc; /* number of args of the last macro */
109 	char		 control; /* control character */
110 	char		 escape; /* escape character */
111 };
112 
113 struct	roffnode {
114 	enum roff_tok	 tok; /* type of node */
115 	struct roffnode	*parent; /* up one in stack */
116 	int		 line; /* parse line */
117 	int		 col; /* parse col */
118 	char		*name; /* node name, e.g. macro name */
119 	char		*end; /* end-rules: custom token */
120 	int		 endspan; /* end-rules: next-line or infty */
121 	int		 rule; /* current evaluation rule */
122 };
123 
124 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
125 			 enum roff_tok tok, /* tok of macro */ \
126 			 struct buf *buf, /* input buffer */ \
127 			 int ln, /* parse line */ \
128 			 int ppos, /* original pos in buffer */ \
129 			 int pos, /* current pos in buffer */ \
130 			 int *offs /* reset offset of buffer data */
131 
132 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
133 
134 struct	roffmac {
135 	roffproc	 proc; /* process new macro */
136 	roffproc	 text; /* process as child text of macro */
137 	roffproc	 sub; /* process as child of macro */
138 	int		 flags;
139 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
140 };
141 
142 struct	predef {
143 	const char	*name; /* predefined input name */
144 	const char	*str; /* replacement symbol */
145 };
146 
147 #define	PREDEF(__name, __str) \
148 	{ (__name), (__str) },
149 
150 /* --- function prototypes ------------------------------------------------ */
151 
152 static	void		 roffnode_cleanscope(struct roff *);
153 static	void		 roffnode_pop(struct roff *);
154 static	void		 roffnode_push(struct roff *, enum roff_tok,
155 				const char *, int, int);
156 static	void		 roff_addtbl(struct roff_man *, struct tbl_node *);
157 static	enum rofferr	 roff_als(ROFF_ARGS);
158 static	enum rofferr	 roff_block(ROFF_ARGS);
159 static	enum rofferr	 roff_block_text(ROFF_ARGS);
160 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
161 static	enum rofferr	 roff_br(ROFF_ARGS);
162 static	enum rofferr	 roff_cblock(ROFF_ARGS);
163 static	enum rofferr	 roff_cc(ROFF_ARGS);
164 static	void		 roff_ccond(struct roff *, int, int);
165 static	enum rofferr	 roff_cond(ROFF_ARGS);
166 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
167 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
168 static	enum rofferr	 roff_ds(ROFF_ARGS);
169 static	enum rofferr	 roff_ec(ROFF_ARGS);
170 static	enum rofferr	 roff_eo(ROFF_ARGS);
171 static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
172 static	int		 roff_evalcond(struct roff *r, int, char *, int *);
173 static	int		 roff_evalnum(struct roff *, int,
174 				const char *, int *, int *, int);
175 static	int		 roff_evalpar(struct roff *, int,
176 				const char *, int *, int *, int);
177 static	int		 roff_evalstrcond(const char *, int *);
178 static	void		 roff_free1(struct roff *);
179 static	void		 roff_freereg(struct roffreg *);
180 static	void		 roff_freestr(struct roffkv *);
181 static	size_t		 roff_getname(struct roff *, char **, int, int);
182 static	int		 roff_getnum(const char *, int *, int *, int);
183 static	int		 roff_getop(const char *, int *, char *);
184 static	int		 roff_getregn(const struct roff *,
185 				const char *, size_t);
186 static	int		 roff_getregro(const struct roff *,
187 				const char *name);
188 static	const char	*roff_getstrn(const struct roff *,
189 				const char *, size_t, int *);
190 static	int		 roff_hasregn(const struct roff *,
191 				const char *, size_t);
192 static	enum rofferr	 roff_insec(ROFF_ARGS);
193 static	enum rofferr	 roff_it(ROFF_ARGS);
194 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
195 static	void		 roff_man_alloc1(struct roff_man *);
196 static	void		 roff_man_free1(struct roff_man *);
197 static	enum rofferr	 roff_manyarg(ROFF_ARGS);
198 static	enum rofferr	 roff_nr(ROFF_ARGS);
199 static	enum rofferr	 roff_onearg(ROFF_ARGS);
200 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
201 				int, int);
202 static	enum rofferr	 roff_parsetext(struct roff *, struct buf *,
203 				int, int *);
204 static	enum rofferr	 roff_renamed(ROFF_ARGS);
205 static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
206 static	enum rofferr	 roff_rm(ROFF_ARGS);
207 static	enum rofferr	 roff_rn(ROFF_ARGS);
208 static	enum rofferr	 roff_rr(ROFF_ARGS);
209 static	void		 roff_setstr(struct roff *,
210 				const char *, const char *, int);
211 static	void		 roff_setstrn(struct roffkv **, const char *,
212 				size_t, const char *, size_t, int);
213 static	enum rofferr	 roff_so(ROFF_ARGS);
214 static	enum rofferr	 roff_tr(ROFF_ARGS);
215 static	enum rofferr	 roff_Dd(ROFF_ARGS);
216 static	enum rofferr	 roff_TE(ROFF_ARGS);
217 static	enum rofferr	 roff_TS(ROFF_ARGS);
218 static	enum rofferr	 roff_EQ(ROFF_ARGS);
219 static	enum rofferr	 roff_EN(ROFF_ARGS);
220 static	enum rofferr	 roff_T_(ROFF_ARGS);
221 static	enum rofferr	 roff_unsupp(ROFF_ARGS);
222 static	enum rofferr	 roff_userdef(ROFF_ARGS);
223 
224 /* --- constant data ------------------------------------------------------ */
225 
226 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
227 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
228 
229 const char *__roff_name[MAN_MAX + 1] = {
230 	"br",		"ce",		"ft",		"ll",
231 	"mc",		"po",		"rj",		"sp",
232 	"ta",		"ti",		NULL,
233 	"ab",		"ad",		"af",		"aln",
234 	"als",		"am",		"am1",		"ami",
235 	"ami1",		"as",		"as1",		"asciify",
236 	"backtrace",	"bd",		"bleedat",	"blm",
237         "box",		"boxa",		"bp",		"BP",
238 	"break",	"breakchar",	"brnl",		"brp",
239 	"brpnl",	"c2",		"cc",
240 	"cf",		"cflags",	"ch",		"char",
241 	"chop",		"class",	"close",	"CL",
242 	"color",	"composite",	"continue",	"cp",
243 	"cropat",	"cs",		"cu",		"da",
244 	"dch",		"Dd",		"de",		"de1",
245 	"defcolor",	"dei",		"dei1",		"device",
246 	"devicem",	"di",		"do",		"ds",
247 	"ds1",		"dwh",		"dt",		"ec",
248 	"ecr",		"ecs",		"el",		"em",
249 	"EN",		"eo",		"EP",		"EQ",
250 	"errprint",	"ev",		"evc",		"ex",
251 	"fallback",	"fam",		"fc",		"fchar",
252 	"fcolor",	"fdeferlig",	"feature",	"fkern",
253 	"fl",		"flig",		"fp",		"fps",
254 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
255 	"fzoom",	"gcolor",	"hc",		"hcode",
256 	"hidechar",	"hla",		"hlm",		"hpf",
257 	"hpfa",		"hpfcode",	"hw",		"hy",
258 	"hylang",	"hylen",	"hym",		"hypp",
259 	"hys",		"ie",		"if",		"ig",
260 	"index",	"it",		"itc",		"IX",
261 	"kern",		"kernafter",	"kernbefore",	"kernpair",
262 	"lc",		"lc_ctype",	"lds",		"length",
263 	"letadj",	"lf",		"lg",		"lhang",
264 	"linetabs",	"lnr",		"lnrf",		"lpfx",
265 	"ls",		"lsm",		"lt",
266 	"mediasize",	"minss",	"mk",		"mso",
267 	"na",		"ne",		"nh",		"nhychar",
268 	"nm",		"nn",		"nop",		"nr",
269 	"nrf",		"nroff",	"ns",		"nx",
270 	"open",		"opena",	"os",		"output",
271 	"padj",		"papersize",	"pc",		"pev",
272 	"pi",		"PI",		"pl",		"pm",
273 	"pn",		"pnr",		"ps",
274 	"psbb",		"pshape",	"pso",		"ptr",
275 	"pvs",		"rchar",	"rd",		"recursionlimit",
276 	"return",	"rfschar",	"rhang",
277 	"rm",		"rn",		"rnn",		"rr",
278 	"rs",		"rt",		"schar",	"sentchar",
279 	"shc",		"shift",	"sizes",	"so",
280 	"spacewidth",	"special",	"spreadwarn",	"ss",
281 	"sty",		"substring",	"sv",		"sy",
282 	"T&",		"tc",		"TE",
283 	"TH",		"tkf",		"tl",
284 	"tm",		"tm1",		"tmc",		"tr",
285 	"track",	"transchar",	"trf",		"trimat",
286 	"trin",		"trnt",		"troff",	"TS",
287 	"uf",		"ul",		"unformat",	"unwatch",
288 	"unwatchn",	"vpt",		"vs",		"warn",
289 	"warnscale",	"watch",	"watchlength",	"watchn",
290 	"wh",		"while",	"write",	"writec",
291 	"writem",	"xflag",	".",		NULL,
292 	NULL,		"text",
293 	"Dd",		"Dt",		"Os",		"Sh",
294 	"Ss",		"Pp",		"D1",		"Dl",
295 	"Bd",		"Ed",		"Bl",		"El",
296 	"It",		"Ad",		"An",		"Ap",
297 	"Ar",		"Cd",		"Cm",		"Dv",
298 	"Er",		"Ev",		"Ex",		"Fa",
299 	"Fd",		"Fl",		"Fn",		"Ft",
300 	"Ic",		"In",		"Li",		"Nd",
301 	"Nm",		"Op",		"Ot",		"Pa",
302 	"Rv",		"St",		"Va",		"Vt",
303 	"Xr",		"%A",		"%B",		"%D",
304 	"%I",		"%J",		"%N",		"%O",
305 	"%P",		"%R",		"%T",		"%V",
306 	"Ac",		"Ao",		"Aq",		"At",
307 	"Bc",		"Bf",		"Bo",		"Bq",
308 	"Bsx",		"Bx",		"Db",		"Dc",
309 	"Do",		"Dq",		"Ec",		"Ef",
310 	"Em",		"Eo",		"Fx",		"Ms",
311 	"No",		"Ns",		"Nx",		"Ox",
312 	"Pc",		"Pf",		"Po",		"Pq",
313 	"Qc",		"Ql",		"Qo",		"Qq",
314 	"Re",		"Rs",		"Sc",		"So",
315 	"Sq",		"Sm",		"Sx",		"Sy",
316 	"Tn",		"Ux",		"Xc",		"Xo",
317 	"Fo",		"Fc",		"Oo",		"Oc",
318 	"Bk",		"Ek",		"Bt",		"Hf",
319 	"Fr",		"Ud",		"Lb",		"Lp",
320 	"Lk",		"Mt",		"Brq",		"Bro",
321 	"Brc",		"%C",		"Es",		"En",
322 	"Dx",		"%Q",		"%U",		"Ta",
323 	NULL,
324 	"TH",		"SH",		"SS",		"TP",
325 	"LP",		"PP",		"P",		"IP",
326 	"HP",		"SM",		"SB",		"BI",
327 	"IB",		"BR",		"RB",		"R",
328 	"B",		"I",		"IR",		"RI",
329 	"nf",		"fi",
330 	"RE",		"RS",		"DT",		"UC",
331 	"PD",		"AT",		"in",
332 	"OP",		"EX",		"EE",		"UR",
333 	"UE",		"MT",		"ME",		NULL
334 };
335 const	char *const *roff_name = __roff_name;
336 
337 static	struct roffmac	 roffs[TOKEN_NONE] = {
338 	{ roff_br, NULL, NULL, 0 },  /* br */
339 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
340 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
341 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
342 	{ roff_onearg, NULL, NULL, 0 },  /* mc */
343 	{ roff_onearg, NULL, NULL, 0 },  /* po */
344 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
345 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
346 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
347 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
348 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
349 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
350 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
351 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
352 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
353 	{ roff_als, NULL, NULL, 0 },  /* als */
354 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
355 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
356 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
357 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
358 	{ roff_ds, NULL, NULL, 0 },  /* as */
359 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
360 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
361 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
362 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
363 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
364 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
365 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
366 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
367 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
368 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
369 	{ roff_unsupp, NULL, NULL, 0 },  /* break */
370 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
371 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
372 	{ roff_br, NULL, NULL, 0 },  /* brp */
373 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
374 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
375 	{ roff_cc, NULL, NULL, 0 },  /* cc */
376 	{ roff_insec, NULL, NULL, 0 },  /* cf */
377 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
378 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
379 	{ roff_unsupp, NULL, NULL, 0 },  /* char */
380 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
381 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
382 	{ roff_insec, NULL, NULL, 0 },  /* close */
383 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
384 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
385 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
386 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
387 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
388 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
389 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
390 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
391 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
392 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
393 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
394 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
395 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
396 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
397 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
398 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
399 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
400 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
401 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
402 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
403 	{ roff_ds, NULL, NULL, 0 },  /* ds */
404 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
405 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
406 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
407 	{ roff_ec, NULL, NULL, 0 },  /* ec */
408 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
409 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
410 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
411 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
412 	{ roff_EN, NULL, NULL, 0 },  /* EN */
413 	{ roff_eo, NULL, NULL, 0 },  /* eo */
414 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
415 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
416 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
417 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
418 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
419 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
420 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
421 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
422 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
423 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
425 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
426 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
427 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
428 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
429 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
430 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
431 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
432 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
433 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
434 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
435 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
436 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
437 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
438 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
439 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
440 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
441 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
442 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
443 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
444 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
445 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
446 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
447 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
448 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
449 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
450 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
451 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
452 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
453 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
454 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
455 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
456 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
457 	{ roff_it, NULL, NULL, 0 },  /* it */
458 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
459 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
460 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
464 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
465 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
466 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
467 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
469 	{ roff_insec, NULL, NULL, 0 },  /* lf */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
472 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
473 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
474 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
475 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
477 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
482 	{ roff_insec, NULL, NULL, 0 },  /* mso */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
487 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
488 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
489 	{ roff_unsupp, NULL, NULL, 0 },  /* nop */
490 	{ roff_nr, NULL, NULL, 0 },  /* nr */
491 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
492 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
493 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
494 	{ roff_insec, NULL, NULL, 0 },  /* nx */
495 	{ roff_insec, NULL, NULL, 0 },  /* open */
496 	{ roff_insec, NULL, NULL, 0 },  /* opena */
497 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
498 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
499 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
500 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
501 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
502 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
503 	{ roff_insec, NULL, NULL, 0 },  /* pi */
504 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
505 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
506 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
507 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
508 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
509 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
510 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
511 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
512 	{ roff_insec, NULL, NULL, 0 },  /* pso */
513 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
514 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
515 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
516 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
518 	{ roff_unsupp, NULL, NULL, 0 },  /* return */
519 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
521 	{ roff_rm, NULL, NULL, 0 },  /* rm */
522 	{ roff_rn, NULL, NULL, 0 },  /* rn */
523 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
524 	{ roff_rr, NULL, NULL, 0 },  /* rr */
525 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
526 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
527 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
528 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
529 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
530 	{ roff_unsupp, NULL, NULL, 0 },  /* shift */
531 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
532 	{ roff_so, NULL, NULL, 0 },  /* so */
533 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
534 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
536 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
537 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
538 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
540 	{ roff_insec, NULL, NULL, 0 },  /* sy */
541 	{ roff_T_, NULL, NULL, 0 },  /* T& */
542 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
543 	{ roff_TE, NULL, NULL, 0 },  /* TE */
544 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
545 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
546 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
547 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
548 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
550 	{ roff_tr, NULL, NULL, 0 },  /* tr */
551 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
552 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
553 	{ roff_insec, NULL, NULL, 0 },  /* trf */
554 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
555 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
556 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
557 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
558 	{ roff_TS, NULL, NULL, 0 },  /* TS */
559 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
560 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
561 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
562 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
563 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
564 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
565 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
571 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
572 	{ roff_unsupp, NULL, NULL, 0 },  /* while */
573 	{ roff_insec, NULL, NULL, 0 },  /* write */
574 	{ roff_insec, NULL, NULL, 0 },  /* writec */
575 	{ roff_insec, NULL, NULL, 0 },  /* writem */
576 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
577 	{ roff_cblock, NULL, NULL, 0 },  /* . */
578 	{ roff_renamed, NULL, NULL, 0 },
579 	{ roff_userdef, NULL, NULL, 0 }
580 };
581 
582 /* Array of injected predefined strings. */
583 #define	PREDEFS_MAX	 38
584 static	const struct predef predefs[PREDEFS_MAX] = {
585 #include "predefs.in"
586 };
587 
588 static	int	 roffce_lines;	/* number of input lines to center */
589 static	struct roff_node *roffce_node;  /* active request */
590 static	int	 roffit_lines;  /* number of lines to delay */
591 static	char	*roffit_macro;  /* nil-terminated macro line */
592 
593 
594 /* --- request table ------------------------------------------------------ */
595 
596 struct ohash *
597 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
598 {
599 	struct ohash	*htab;
600 	struct roffreq	*req;
601 	enum roff_tok	 tok;
602 	size_t		 sz;
603 	unsigned int	 slot;
604 
605 	htab = mandoc_malloc(sizeof(*htab));
606 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
607 
608 	for (tok = mintok; tok < maxtok; tok++) {
609 		if (roff_name[tok] == NULL)
610 			continue;
611 		sz = strlen(roff_name[tok]);
612 		req = mandoc_malloc(sizeof(*req) + sz + 1);
613 		req->tok = tok;
614 		memcpy(req->name, roff_name[tok], sz + 1);
615 		slot = ohash_qlookup(htab, req->name);
616 		ohash_insert(htab, slot, req);
617 	}
618 	return htab;
619 }
620 
621 void
622 roffhash_free(struct ohash *htab)
623 {
624 	struct roffreq	*req;
625 	unsigned int	 slot;
626 
627 	if (htab == NULL)
628 		return;
629 	for (req = ohash_first(htab, &slot); req != NULL;
630 	     req = ohash_next(htab, &slot))
631 		free(req);
632 	ohash_delete(htab);
633 	free(htab);
634 }
635 
636 enum roff_tok
637 roffhash_find(struct ohash *htab, const char *name, size_t sz)
638 {
639 	struct roffreq	*req;
640 	const char	*end;
641 
642 	if (sz) {
643 		end = name + sz;
644 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
645 	} else
646 		req = ohash_find(htab, ohash_qlookup(htab, name));
647 	return req == NULL ? TOKEN_NONE : req->tok;
648 }
649 
650 /* --- stack of request blocks -------------------------------------------- */
651 
652 /*
653  * Pop the current node off of the stack of roff instructions currently
654  * pending.
655  */
656 static void
657 roffnode_pop(struct roff *r)
658 {
659 	struct roffnode	*p;
660 
661 	assert(r->last);
662 	p = r->last;
663 
664 	r->last = r->last->parent;
665 	free(p->name);
666 	free(p->end);
667 	free(p);
668 }
669 
670 /*
671  * Push a roff node onto the instruction stack.  This must later be
672  * removed with roffnode_pop().
673  */
674 static void
675 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
676 		int line, int col)
677 {
678 	struct roffnode	*p;
679 
680 	p = mandoc_calloc(1, sizeof(struct roffnode));
681 	p->tok = tok;
682 	if (name)
683 		p->name = mandoc_strdup(name);
684 	p->parent = r->last;
685 	p->line = line;
686 	p->col = col;
687 	p->rule = p->parent ? p->parent->rule : 0;
688 
689 	r->last = p;
690 }
691 
692 /* --- roff parser state data management ---------------------------------- */
693 
694 static void
695 roff_free1(struct roff *r)
696 {
697 	struct tbl_node	*tbl;
698 	int		 i;
699 
700 	while (NULL != (tbl = r->first_tbl)) {
701 		r->first_tbl = tbl->next;
702 		tbl_free(tbl);
703 	}
704 	r->first_tbl = r->last_tbl = r->tbl = NULL;
705 
706 	if (r->last_eqn != NULL)
707 		eqn_free(r->last_eqn);
708 	r->last_eqn = r->eqn = NULL;
709 
710 	while (r->last)
711 		roffnode_pop(r);
712 
713 	free (r->rstack);
714 	r->rstack = NULL;
715 	r->rstacksz = 0;
716 	r->rstackpos = -1;
717 
718 	roff_freereg(r->regtab);
719 	r->regtab = NULL;
720 
721 	roff_freestr(r->strtab);
722 	roff_freestr(r->rentab);
723 	roff_freestr(r->xmbtab);
724 	r->strtab = r->rentab = r->xmbtab = NULL;
725 
726 	if (r->xtab)
727 		for (i = 0; i < 128; i++)
728 			free(r->xtab[i].p);
729 	free(r->xtab);
730 	r->xtab = NULL;
731 }
732 
733 void
734 roff_reset(struct roff *r)
735 {
736 	roff_free1(r);
737 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
738 	r->control = '\0';
739 	r->escape = '\\';
740 	roffce_lines = 0;
741 	roffce_node = NULL;
742 	roffit_lines = 0;
743 	roffit_macro = NULL;
744 }
745 
746 void
747 roff_free(struct roff *r)
748 {
749 	roff_free1(r);
750 	roffhash_free(r->reqtab);
751 	free(r);
752 }
753 
754 struct roff *
755 roff_alloc(struct mparse *parse, int options)
756 {
757 	struct roff	*r;
758 
759 	r = mandoc_calloc(1, sizeof(struct roff));
760 	r->parse = parse;
761 	r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
762 	r->options = options;
763 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
764 	r->rstackpos = -1;
765 	r->escape = '\\';
766 	return r;
767 }
768 
769 /* --- syntax tree state data management ---------------------------------- */
770 
771 static void
772 roff_man_free1(struct roff_man *man)
773 {
774 
775 	if (man->first != NULL)
776 		roff_node_delete(man, man->first);
777 	free(man->meta.msec);
778 	free(man->meta.vol);
779 	free(man->meta.os);
780 	free(man->meta.arch);
781 	free(man->meta.title);
782 	free(man->meta.name);
783 	free(man->meta.date);
784 }
785 
786 static void
787 roff_man_alloc1(struct roff_man *man)
788 {
789 
790 	memset(&man->meta, 0, sizeof(man->meta));
791 	man->first = mandoc_calloc(1, sizeof(*man->first));
792 	man->first->type = ROFFT_ROOT;
793 	man->last = man->first;
794 	man->last_es = NULL;
795 	man->flags = 0;
796 	man->macroset = MACROSET_NONE;
797 	man->lastsec = man->lastnamed = SEC_NONE;
798 	man->next = ROFF_NEXT_CHILD;
799 }
800 
801 void
802 roff_man_reset(struct roff_man *man)
803 {
804 
805 	roff_man_free1(man);
806 	roff_man_alloc1(man);
807 }
808 
809 void
810 roff_man_free(struct roff_man *man)
811 {
812 
813 	roff_man_free1(man);
814 	free(man);
815 }
816 
817 struct roff_man *
818 roff_man_alloc(struct roff *roff, struct mparse *parse,
819 	const char *os_s, int quick)
820 {
821 	struct roff_man *man;
822 
823 	man = mandoc_calloc(1, sizeof(*man));
824 	man->parse = parse;
825 	man->roff = roff;
826 	man->os_s = os_s;
827 	man->quick = quick;
828 	roff_man_alloc1(man);
829 	roff->man = man;
830 	return man;
831 }
832 
833 /* --- syntax tree handling ----------------------------------------------- */
834 
835 struct roff_node *
836 roff_node_alloc(struct roff_man *man, int line, int pos,
837 	enum roff_type type, int tok)
838 {
839 	struct roff_node	*n;
840 
841 	n = mandoc_calloc(1, sizeof(*n));
842 	n->line = line;
843 	n->pos = pos;
844 	n->tok = tok;
845 	n->type = type;
846 	n->sec = man->lastsec;
847 
848 	if (man->flags & MDOC_SYNOPSIS)
849 		n->flags |= NODE_SYNPRETTY;
850 	else
851 		n->flags &= ~NODE_SYNPRETTY;
852 	if (man->flags & MDOC_NEWLINE)
853 		n->flags |= NODE_LINE;
854 	man->flags &= ~MDOC_NEWLINE;
855 
856 	return n;
857 }
858 
859 void
860 roff_node_append(struct roff_man *man, struct roff_node *n)
861 {
862 
863 	switch (man->next) {
864 	case ROFF_NEXT_SIBLING:
865 		if (man->last->next != NULL) {
866 			n->next = man->last->next;
867 			man->last->next->prev = n;
868 		} else
869 			man->last->parent->last = n;
870 		man->last->next = n;
871 		n->prev = man->last;
872 		n->parent = man->last->parent;
873 		break;
874 	case ROFF_NEXT_CHILD:
875 		if (man->last->child != NULL) {
876 			n->next = man->last->child;
877 			man->last->child->prev = n;
878 		} else
879 			man->last->last = n;
880 		man->last->child = n;
881 		n->parent = man->last;
882 		break;
883 	default:
884 		abort();
885 	}
886 	man->last = n;
887 
888 	switch (n->type) {
889 	case ROFFT_HEAD:
890 		n->parent->head = n;
891 		break;
892 	case ROFFT_BODY:
893 		if (n->end != ENDBODY_NOT)
894 			return;
895 		n->parent->body = n;
896 		break;
897 	case ROFFT_TAIL:
898 		n->parent->tail = n;
899 		break;
900 	default:
901 		return;
902 	}
903 
904 	/*
905 	 * Copy over the normalised-data pointer of our parent.  Not
906 	 * everybody has one, but copying a null pointer is fine.
907 	 */
908 
909 	n->norm = n->parent->norm;
910 	assert(n->parent->type == ROFFT_BLOCK);
911 }
912 
913 void
914 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
915 {
916 	struct roff_node	*n;
917 
918 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
919 	n->string = roff_strdup(man->roff, word);
920 	roff_node_append(man, n);
921 	n->flags |= NODE_VALID | NODE_ENDED;
922 	man->next = ROFF_NEXT_SIBLING;
923 }
924 
925 void
926 roff_word_append(struct roff_man *man, const char *word)
927 {
928 	struct roff_node	*n;
929 	char			*addstr, *newstr;
930 
931 	n = man->last;
932 	addstr = roff_strdup(man->roff, word);
933 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
934 	free(addstr);
935 	free(n->string);
936 	n->string = newstr;
937 	man->next = ROFF_NEXT_SIBLING;
938 }
939 
940 void
941 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
942 {
943 	struct roff_node	*n;
944 
945 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
946 	roff_node_append(man, n);
947 	man->next = ROFF_NEXT_CHILD;
948 }
949 
950 struct roff_node *
951 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
952 {
953 	struct roff_node	*n;
954 
955 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
956 	roff_node_append(man, n);
957 	man->next = ROFF_NEXT_CHILD;
958 	return n;
959 }
960 
961 struct roff_node *
962 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
963 {
964 	struct roff_node	*n;
965 
966 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
967 	roff_node_append(man, n);
968 	man->next = ROFF_NEXT_CHILD;
969 	return n;
970 }
971 
972 struct roff_node *
973 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
974 {
975 	struct roff_node	*n;
976 
977 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
978 	roff_node_append(man, n);
979 	man->next = ROFF_NEXT_CHILD;
980 	return n;
981 }
982 
983 static void
984 roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
985 {
986 	struct roff_node	*n;
987 	const struct tbl_span	*span;
988 
989 	if (man->macroset == MACROSET_MAN)
990 		man_breakscope(man, ROFF_TS);
991 	while ((span = tbl_span(tbl)) != NULL) {
992 		n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
993 		n->span = span;
994 		roff_node_append(man, n);
995 		n->flags |= NODE_VALID | NODE_ENDED;
996 		man->next = ROFF_NEXT_SIBLING;
997 	}
998 }
999 
1000 void
1001 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1002 {
1003 
1004 	/* Adjust siblings. */
1005 
1006 	if (n->prev)
1007 		n->prev->next = n->next;
1008 	if (n->next)
1009 		n->next->prev = n->prev;
1010 
1011 	/* Adjust parent. */
1012 
1013 	if (n->parent != NULL) {
1014 		if (n->parent->child == n)
1015 			n->parent->child = n->next;
1016 		if (n->parent->last == n)
1017 			n->parent->last = n->prev;
1018 	}
1019 
1020 	/* Adjust parse point. */
1021 
1022 	if (man == NULL)
1023 		return;
1024 	if (man->last == n) {
1025 		if (n->prev == NULL) {
1026 			man->last = n->parent;
1027 			man->next = ROFF_NEXT_CHILD;
1028 		} else {
1029 			man->last = n->prev;
1030 			man->next = ROFF_NEXT_SIBLING;
1031 		}
1032 	}
1033 	if (man->first == n)
1034 		man->first = NULL;
1035 }
1036 
1037 void
1038 roff_node_free(struct roff_node *n)
1039 {
1040 
1041 	if (n->args != NULL)
1042 		mdoc_argv_free(n->args);
1043 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1044 		free(n->norm);
1045 	if (n->eqn != NULL)
1046 		eqn_box_free(n->eqn);
1047 	free(n->string);
1048 	free(n);
1049 }
1050 
1051 void
1052 roff_node_delete(struct roff_man *man, struct roff_node *n)
1053 {
1054 
1055 	while (n->child != NULL)
1056 		roff_node_delete(man, n->child);
1057 	roff_node_unlink(man, n);
1058 	roff_node_free(n);
1059 }
1060 
1061 void
1062 deroff(char **dest, const struct roff_node *n)
1063 {
1064 	char	*cp;
1065 	size_t	 sz;
1066 
1067 	if (n->type != ROFFT_TEXT) {
1068 		for (n = n->child; n != NULL; n = n->next)
1069 			deroff(dest, n);
1070 		return;
1071 	}
1072 
1073 	/* Skip leading whitespace. */
1074 
1075 	for (cp = n->string; *cp != '\0'; cp++) {
1076 		if (cp[0] == '\\' && cp[1] != '\0' &&
1077 		    strchr(" %&0^|~", cp[1]) != NULL)
1078 			cp++;
1079 		else if ( ! isspace((unsigned char)*cp))
1080 			break;
1081 	}
1082 
1083 	/* Skip trailing backslash. */
1084 
1085 	sz = strlen(cp);
1086 	if (sz > 0 && cp[sz - 1] == '\\')
1087 		sz--;
1088 
1089 	/* Skip trailing whitespace. */
1090 
1091 	for (; sz; sz--)
1092 		if ( ! isspace((unsigned char)cp[sz-1]))
1093 			break;
1094 
1095 	/* Skip empty strings. */
1096 
1097 	if (sz == 0)
1098 		return;
1099 
1100 	if (*dest == NULL) {
1101 		*dest = mandoc_strndup(cp, sz);
1102 		return;
1103 	}
1104 
1105 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1106 	free(*dest);
1107 	*dest = cp;
1108 }
1109 
1110 /* --- main functions of the roff parser ---------------------------------- */
1111 
1112 /*
1113  * In the current line, expand escape sequences that tend to get
1114  * used in numerical expressions and conditional requests.
1115  * Also check the syntax of the remaining escape sequences.
1116  */
1117 static enum rofferr
1118 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1119 {
1120 	char		 ubuf[24]; /* buffer to print the number */
1121 	const char	*start;	/* start of the string to process */
1122 	char		*stesc;	/* start of an escape sequence ('\\') */
1123 	const char	*stnam;	/* start of the name, after "[(*" */
1124 	const char	*cp;	/* end of the name, e.g. before ']' */
1125 	const char	*res;	/* the string to be substituted */
1126 	char		*nbuf;	/* new buffer to copy buf->buf to */
1127 	size_t		 maxl;  /* expected length of the escape name */
1128 	size_t		 naml;	/* actual length of the escape name */
1129 	enum mandoc_esc	 esc;	/* type of the escape sequence */
1130 	int		 inaml;	/* length returned from mandoc_escape() */
1131 	int		 expand_count;	/* to avoid infinite loops */
1132 	int		 npos;	/* position in numeric expression */
1133 	int		 arg_complete; /* argument not interrupted by eol */
1134 	int		 done;	/* no more input available */
1135 	int		 deftype; /* type of definition to paste */
1136 	int		 rcsid;	/* kind of RCS id seen */
1137 	char		 term;	/* character terminating the escape */
1138 
1139 	/* Search forward for comments. */
1140 
1141 	done = 0;
1142 	start = buf->buf + pos;
1143 	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1144 		if (stesc[0] != r->escape || stesc[1] == '\0')
1145 			continue;
1146 		stesc++;
1147 		if (*stesc != '"' && *stesc != '#')
1148 			continue;
1149 
1150 		/* Comment found, look for RCS id. */
1151 
1152 		rcsid = 0;
1153 		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1154 			rcsid = 1 << MANDOC_OS_OPENBSD;
1155 			cp += 8;
1156 		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1157 			rcsid = 1 << MANDOC_OS_NETBSD;
1158 			cp += 7;
1159 		}
1160 		if (cp != NULL &&
1161 		    isalnum((unsigned char)*cp) == 0 &&
1162 		    strchr(cp, '$') != NULL) {
1163 			if (r->man->meta.rcsids & rcsid)
1164 				mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1165 				    ln, stesc + 1 - buf->buf, stesc + 1);
1166 			r->man->meta.rcsids |= rcsid;
1167 		}
1168 
1169 		/* Handle trailing whitespace. */
1170 
1171 		cp = strchr(stesc--, '\0') - 1;
1172 		if (*cp == '\n') {
1173 			done = 1;
1174 			cp--;
1175 		}
1176 		if (*cp == ' ' || *cp == '\t')
1177 			mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1178 			    ln, cp - buf->buf, NULL);
1179 		while (stesc > start && stesc[-1] == ' ')
1180 			stesc--;
1181 		*stesc = '\0';
1182 		break;
1183 	}
1184 	if (stesc == start)
1185 		return ROFF_CONT;
1186 	stesc--;
1187 
1188 	/* Notice the end of the input. */
1189 
1190 	if (*stesc == '\n') {
1191 		*stesc-- = '\0';
1192 		done = 1;
1193 	}
1194 
1195 	expand_count = 0;
1196 	while (stesc >= start) {
1197 
1198 		/* Search backwards for the next backslash. */
1199 
1200 		if (*stesc != r->escape) {
1201 			if (*stesc == '\\') {
1202 				*stesc = '\0';
1203 				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1204 				    buf->buf, stesc + 1) + 1;
1205 				start = nbuf + pos;
1206 				stesc = nbuf + (stesc - buf->buf);
1207 				free(buf->buf);
1208 				buf->buf = nbuf;
1209 			}
1210 			stesc--;
1211 			continue;
1212 		}
1213 
1214 		/* If it is escaped, skip it. */
1215 
1216 		for (cp = stesc - 1; cp >= start; cp--)
1217 			if (*cp != r->escape)
1218 				break;
1219 
1220 		if ((stesc - cp) % 2 == 0) {
1221 			while (stesc > cp)
1222 				*stesc-- = '\\';
1223 			continue;
1224 		} else if (stesc[1] != '\0') {
1225 			*stesc = '\\';
1226 		} else {
1227 			*stesc-- = '\0';
1228 			if (done)
1229 				continue;
1230 			else
1231 				return ROFF_APPEND;
1232 		}
1233 
1234 		/* Decide whether to expand or to check only. */
1235 
1236 		term = '\0';
1237 		cp = stesc + 1;
1238 		switch (*cp) {
1239 		case '*':
1240 			res = NULL;
1241 			break;
1242 		case 'B':
1243 		case 'w':
1244 			term = cp[1];
1245 			/* FALLTHROUGH */
1246 		case 'n':
1247 			res = ubuf;
1248 			break;
1249 		default:
1250 			esc = mandoc_escape(&cp, &stnam, &inaml);
1251 			if (esc == ESCAPE_ERROR ||
1252 			    (esc == ESCAPE_SPECIAL &&
1253 			     mchars_spec2cp(stnam, inaml) < 0))
1254 				mandoc_vmsg(MANDOCERR_ESC_BAD,
1255 				    r->parse, ln, (int)(stesc - buf->buf),
1256 				    "%.*s", (int)(cp - stesc), stesc);
1257 			stesc--;
1258 			continue;
1259 		}
1260 
1261 		if (EXPAND_LIMIT < ++expand_count) {
1262 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1263 			    ln, (int)(stesc - buf->buf), NULL);
1264 			return ROFF_IGN;
1265 		}
1266 
1267 		/*
1268 		 * The third character decides the length
1269 		 * of the name of the string or register.
1270 		 * Save a pointer to the name.
1271 		 */
1272 
1273 		if (term == '\0') {
1274 			switch (*++cp) {
1275 			case '\0':
1276 				maxl = 0;
1277 				break;
1278 			case '(':
1279 				cp++;
1280 				maxl = 2;
1281 				break;
1282 			case '[':
1283 				cp++;
1284 				term = ']';
1285 				maxl = 0;
1286 				break;
1287 			default:
1288 				maxl = 1;
1289 				break;
1290 			}
1291 		} else {
1292 			cp += 2;
1293 			maxl = 0;
1294 		}
1295 		stnam = cp;
1296 
1297 		/* Advance to the end of the name. */
1298 
1299 		naml = 0;
1300 		arg_complete = 1;
1301 		while (maxl == 0 || naml < maxl) {
1302 			if (*cp == '\0') {
1303 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1304 				    ln, (int)(stesc - buf->buf), stesc);
1305 				arg_complete = 0;
1306 				break;
1307 			}
1308 			if (maxl == 0 && *cp == term) {
1309 				cp++;
1310 				break;
1311 			}
1312 			if (*cp++ != '\\' || stesc[1] != 'w') {
1313 				naml++;
1314 				continue;
1315 			}
1316 			switch (mandoc_escape(&cp, NULL, NULL)) {
1317 			case ESCAPE_SPECIAL:
1318 			case ESCAPE_UNICODE:
1319 			case ESCAPE_NUMBERED:
1320 			case ESCAPE_OVERSTRIKE:
1321 				naml++;
1322 				break;
1323 			default:
1324 				break;
1325 			}
1326 		}
1327 
1328 		/*
1329 		 * Retrieve the replacement string; if it is
1330 		 * undefined, resume searching for escapes.
1331 		 */
1332 
1333 		switch (stesc[1]) {
1334 		case '*':
1335 			if (arg_complete) {
1336 				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1337 				res = roff_getstrn(r, stnam, naml, &deftype);
1338 			}
1339 			break;
1340 		case 'B':
1341 			npos = 0;
1342 			ubuf[0] = arg_complete &&
1343 			    roff_evalnum(r, ln, stnam, &npos,
1344 			      NULL, ROFFNUM_SCALE) &&
1345 			    stnam + npos + 1 == cp ? '1' : '0';
1346 			ubuf[1] = '\0';
1347 			break;
1348 		case 'n':
1349 			if (arg_complete)
1350 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1351 				    roff_getregn(r, stnam, naml));
1352 			else
1353 				ubuf[0] = '\0';
1354 			break;
1355 		case 'w':
1356 			/* use even incomplete args */
1357 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1358 			    24 * (int)naml);
1359 			break;
1360 		}
1361 
1362 		if (res == NULL) {
1363 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1364 			    r->parse, ln, (int)(stesc - buf->buf),
1365 			    "%.*s", (int)naml, stnam);
1366 			res = "";
1367 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1368 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1369 			    ln, (int)(stesc - buf->buf), NULL);
1370 			return ROFF_IGN;
1371 		}
1372 
1373 		/* Replace the escape sequence by the string. */
1374 
1375 		*stesc = '\0';
1376 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1377 		    buf->buf, res, cp) + 1;
1378 
1379 		/* Prepare for the next replacement. */
1380 
1381 		start = nbuf + pos;
1382 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1383 		free(buf->buf);
1384 		buf->buf = nbuf;
1385 	}
1386 	return ROFF_CONT;
1387 }
1388 
1389 /*
1390  * Process text streams.
1391  */
1392 static enum rofferr
1393 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1394 {
1395 	size_t		 sz;
1396 	const char	*start;
1397 	char		*p;
1398 	int		 isz;
1399 	enum mandoc_esc	 esc;
1400 
1401 	/* Spring the input line trap. */
1402 
1403 	if (roffit_lines == 1) {
1404 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1405 		free(buf->buf);
1406 		buf->buf = p;
1407 		buf->sz = isz + 1;
1408 		*offs = 0;
1409 		free(roffit_macro);
1410 		roffit_lines = 0;
1411 		return ROFF_REPARSE;
1412 	} else if (roffit_lines > 1)
1413 		--roffit_lines;
1414 
1415 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1416 		if (roffce_lines < 1) {
1417 			r->man->last = roffce_node;
1418 			r->man->next = ROFF_NEXT_SIBLING;
1419 			roffce_lines = 0;
1420 			roffce_node = NULL;
1421 		} else
1422 			roffce_lines--;
1423 	}
1424 
1425 	/* Convert all breakable hyphens into ASCII_HYPH. */
1426 
1427 	start = p = buf->buf + pos;
1428 
1429 	while (*p != '\0') {
1430 		sz = strcspn(p, "-\\");
1431 		p += sz;
1432 
1433 		if (*p == '\0')
1434 			break;
1435 
1436 		if (*p == '\\') {
1437 			/* Skip over escapes. */
1438 			p++;
1439 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1440 			if (esc == ESCAPE_ERROR)
1441 				break;
1442 			while (*p == '-')
1443 				p++;
1444 			continue;
1445 		} else if (p == start) {
1446 			p++;
1447 			continue;
1448 		}
1449 
1450 		if (isalpha((unsigned char)p[-1]) &&
1451 		    isalpha((unsigned char)p[1]))
1452 			*p = ASCII_HYPH;
1453 		p++;
1454 	}
1455 	return ROFF_CONT;
1456 }
1457 
1458 enum rofferr
1459 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1460 {
1461 	enum roff_tok	 t;
1462 	enum rofferr	 e;
1463 	int		 pos;	/* parse point */
1464 	int		 spos;	/* saved parse point for messages */
1465 	int		 ppos;	/* original offset in buf->buf */
1466 	int		 ctl;	/* macro line (boolean) */
1467 
1468 	ppos = pos = *offs;
1469 
1470 	/* Handle in-line equation delimiters. */
1471 
1472 	if (r->tbl == NULL &&
1473 	    r->last_eqn != NULL && r->last_eqn->delim &&
1474 	    (r->eqn == NULL || r->eqn_inline)) {
1475 		e = roff_eqndelim(r, buf, pos);
1476 		if (e == ROFF_REPARSE)
1477 			return e;
1478 		assert(e == ROFF_CONT);
1479 	}
1480 
1481 	/* Expand some escape sequences. */
1482 
1483 	e = roff_res(r, buf, ln, pos);
1484 	if (e == ROFF_IGN || e == ROFF_APPEND)
1485 		return e;
1486 	assert(e == ROFF_CONT);
1487 
1488 	ctl = roff_getcontrol(r, buf->buf, &pos);
1489 
1490 	/*
1491 	 * First, if a scope is open and we're not a macro, pass the
1492 	 * text through the macro's filter.
1493 	 * Equations process all content themselves.
1494 	 * Tables process almost all content themselves, but we want
1495 	 * to warn about macros before passing it there.
1496 	 */
1497 
1498 	if (r->last != NULL && ! ctl) {
1499 		t = r->last->tok;
1500 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1501 		if (e == ROFF_IGN)
1502 			return e;
1503 		assert(e == ROFF_CONT);
1504 	}
1505 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1506 		eqn_read(r->eqn, buf->buf + ppos);
1507 		return ROFF_IGN;
1508 	}
1509 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1510 		tbl_read(r->tbl, ln, buf->buf, ppos);
1511 		roff_addtbl(r->man, r->tbl);
1512 		return ROFF_IGN;
1513 	}
1514 	if ( ! ctl)
1515 		return roff_parsetext(r, buf, pos, offs);
1516 
1517 	/* Skip empty request lines. */
1518 
1519 	if (buf->buf[pos] == '"') {
1520 		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1521 		    ln, pos, NULL);
1522 		return ROFF_IGN;
1523 	} else if (buf->buf[pos] == '\0')
1524 		return ROFF_IGN;
1525 
1526 	/*
1527 	 * If a scope is open, go to the child handler for that macro,
1528 	 * as it may want to preprocess before doing anything with it.
1529 	 * Don't do so if an equation is open.
1530 	 */
1531 
1532 	if (r->last) {
1533 		t = r->last->tok;
1534 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1535 	}
1536 
1537 	/* No scope is open.  This is a new request or macro. */
1538 
1539 	spos = pos;
1540 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1541 
1542 	/* Tables ignore most macros. */
1543 
1544 	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1545 	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1546 		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1547 		    ln, pos, buf->buf + spos);
1548 		if (t != TOKEN_NONE)
1549 			return ROFF_IGN;
1550 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1551 			pos++;
1552 		while (buf->buf[pos] == ' ')
1553 			pos++;
1554 		tbl_read(r->tbl, ln, buf->buf, pos);
1555 		roff_addtbl(r->man, r->tbl);
1556 		return ROFF_IGN;
1557 	}
1558 
1559 	/* For now, let high level macros abort .ce mode. */
1560 
1561 	if (ctl && roffce_node != NULL &&
1562 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1563 	     t == ROFF_TH || t == ROFF_TS)) {
1564 		r->man->last = roffce_node;
1565 		r->man->next = ROFF_NEXT_SIBLING;
1566 		roffce_lines = 0;
1567 		roffce_node = NULL;
1568 	}
1569 
1570 	/*
1571 	 * This is neither a roff request nor a user-defined macro.
1572 	 * Let the standard macro set parsers handle it.
1573 	 */
1574 
1575 	if (t == TOKEN_NONE)
1576 		return ROFF_CONT;
1577 
1578 	/* Execute a roff request or a user defined macro. */
1579 
1580 	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1581 }
1582 
1583 void
1584 roff_endparse(struct roff *r)
1585 {
1586 	if (r->last != NULL)
1587 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1588 		    r->last->line, r->last->col,
1589 		    roff_name[r->last->tok]);
1590 
1591 	if (r->eqn != NULL) {
1592 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1593 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1594 		eqn_parse(r->eqn);
1595 		r->eqn = NULL;
1596 	}
1597 
1598 	if (r->tbl != NULL) {
1599 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1600 		    r->tbl->line, r->tbl->pos, "TS");
1601 		tbl_end(r->tbl);
1602 		r->tbl = NULL;
1603 	}
1604 }
1605 
1606 /*
1607  * Parse a roff node's type from the input buffer.  This must be in the
1608  * form of ".foo xxx" in the usual way.
1609  */
1610 static enum roff_tok
1611 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1612 {
1613 	char		*cp;
1614 	const char	*mac;
1615 	size_t		 maclen;
1616 	int		 deftype;
1617 	enum roff_tok	 t;
1618 
1619 	cp = buf + *pos;
1620 
1621 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1622 		return TOKEN_NONE;
1623 
1624 	mac = cp;
1625 	maclen = roff_getname(r, &cp, ln, ppos);
1626 
1627 	deftype = ROFFDEF_USER | ROFFDEF_REN;
1628 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1629 	switch (deftype) {
1630 	case ROFFDEF_USER:
1631 		t = ROFF_USERDEF;
1632 		break;
1633 	case ROFFDEF_REN:
1634 		t = ROFF_RENAMED;
1635 		break;
1636 	default:
1637 		t = roffhash_find(r->reqtab, mac, maclen);
1638 		break;
1639 	}
1640 	if (t != TOKEN_NONE)
1641 		*pos = cp - buf;
1642 	return t;
1643 }
1644 
1645 /* --- handling of request blocks ----------------------------------------- */
1646 
1647 static enum rofferr
1648 roff_cblock(ROFF_ARGS)
1649 {
1650 
1651 	/*
1652 	 * A block-close `..' should only be invoked as a child of an
1653 	 * ignore macro, otherwise raise a warning and just ignore it.
1654 	 */
1655 
1656 	if (r->last == NULL) {
1657 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1658 		    ln, ppos, "..");
1659 		return ROFF_IGN;
1660 	}
1661 
1662 	switch (r->last->tok) {
1663 	case ROFF_am:
1664 		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1665 	case ROFF_ami:
1666 	case ROFF_de:
1667 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1668 	case ROFF_dei:
1669 	case ROFF_ig:
1670 		break;
1671 	default:
1672 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1673 		    ln, ppos, "..");
1674 		return ROFF_IGN;
1675 	}
1676 
1677 	if (buf->buf[pos] != '\0')
1678 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1679 		    ".. %s", buf->buf + pos);
1680 
1681 	roffnode_pop(r);
1682 	roffnode_cleanscope(r);
1683 	return ROFF_IGN;
1684 
1685 }
1686 
1687 static void
1688 roffnode_cleanscope(struct roff *r)
1689 {
1690 
1691 	while (r->last) {
1692 		if (--r->last->endspan != 0)
1693 			break;
1694 		roffnode_pop(r);
1695 	}
1696 }
1697 
1698 static void
1699 roff_ccond(struct roff *r, int ln, int ppos)
1700 {
1701 
1702 	if (NULL == r->last) {
1703 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1704 		    ln, ppos, "\\}");
1705 		return;
1706 	}
1707 
1708 	switch (r->last->tok) {
1709 	case ROFF_el:
1710 	case ROFF_ie:
1711 	case ROFF_if:
1712 		break;
1713 	default:
1714 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1715 		    ln, ppos, "\\}");
1716 		return;
1717 	}
1718 
1719 	if (r->last->endspan > -1) {
1720 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1721 		    ln, ppos, "\\}");
1722 		return;
1723 	}
1724 
1725 	roffnode_pop(r);
1726 	roffnode_cleanscope(r);
1727 	return;
1728 }
1729 
1730 static enum rofferr
1731 roff_block(ROFF_ARGS)
1732 {
1733 	const char	*name, *value;
1734 	char		*call, *cp, *iname, *rname;
1735 	size_t		 csz, namesz, rsz;
1736 	int		 deftype;
1737 
1738 	/* Ignore groff compatibility mode for now. */
1739 
1740 	if (tok == ROFF_de1)
1741 		tok = ROFF_de;
1742 	else if (tok == ROFF_dei1)
1743 		tok = ROFF_dei;
1744 	else if (tok == ROFF_am1)
1745 		tok = ROFF_am;
1746 	else if (tok == ROFF_ami1)
1747 		tok = ROFF_ami;
1748 
1749 	/* Parse the macro name argument. */
1750 
1751 	cp = buf->buf + pos;
1752 	if (tok == ROFF_ig) {
1753 		iname = NULL;
1754 		namesz = 0;
1755 	} else {
1756 		iname = cp;
1757 		namesz = roff_getname(r, &cp, ln, ppos);
1758 		iname[namesz] = '\0';
1759 	}
1760 
1761 	/* Resolve the macro name argument if it is indirect. */
1762 
1763 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1764 		deftype = ROFFDEF_USER;
1765 		name = roff_getstrn(r, iname, namesz, &deftype);
1766 		if (name == NULL) {
1767 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1768 			    r->parse, ln, (int)(iname - buf->buf),
1769 			    "%.*s", (int)namesz, iname);
1770 			namesz = 0;
1771 		} else
1772 			namesz = strlen(name);
1773 	} else
1774 		name = iname;
1775 
1776 	if (namesz == 0 && tok != ROFF_ig) {
1777 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1778 		    ln, ppos, roff_name[tok]);
1779 		return ROFF_IGN;
1780 	}
1781 
1782 	roffnode_push(r, tok, name, ln, ppos);
1783 
1784 	/*
1785 	 * At the beginning of a `de' macro, clear the existing string
1786 	 * with the same name, if there is one.  New content will be
1787 	 * appended from roff_block_text() in multiline mode.
1788 	 */
1789 
1790 	if (tok == ROFF_de || tok == ROFF_dei) {
1791 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1792 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1793 	} else if (tok == ROFF_am || tok == ROFF_ami) {
1794 		deftype = ROFFDEF_ANY;
1795 		value = roff_getstrn(r, iname, namesz, &deftype);
1796 		switch (deftype) {  /* Before appending, ... */
1797 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
1798 			roff_setstrn(&r->strtab, name, namesz,
1799 			    value, strlen(value), 0);
1800 			break;
1801 		case ROFFDEF_REN: /* call original standard macro. */
1802 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1803 			    (int)strlen(value), value);
1804 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1805 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1806 			free(call);
1807 			break;
1808 		case ROFFDEF_STD:  /* rename and call standard macro. */
1809 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1810 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1811 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1812 			    (int)rsz, rname);
1813 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1814 			free(call);
1815 			free(rname);
1816 			break;
1817 		default:
1818 			break;
1819 		}
1820 	}
1821 
1822 	if (*cp == '\0')
1823 		return ROFF_IGN;
1824 
1825 	/* Get the custom end marker. */
1826 
1827 	iname = cp;
1828 	namesz = roff_getname(r, &cp, ln, ppos);
1829 
1830 	/* Resolve the end marker if it is indirect. */
1831 
1832 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1833 		deftype = ROFFDEF_USER;
1834 		name = roff_getstrn(r, iname, namesz, &deftype);
1835 		if (name == NULL) {
1836 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1837 			    r->parse, ln, (int)(iname - buf->buf),
1838 			    "%.*s", (int)namesz, iname);
1839 			namesz = 0;
1840 		} else
1841 			namesz = strlen(name);
1842 	} else
1843 		name = iname;
1844 
1845 	if (namesz)
1846 		r->last->end = mandoc_strndup(name, namesz);
1847 
1848 	if (*cp != '\0')
1849 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1850 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
1851 
1852 	return ROFF_IGN;
1853 }
1854 
1855 static enum rofferr
1856 roff_block_sub(ROFF_ARGS)
1857 {
1858 	enum roff_tok	t;
1859 	int		i, j;
1860 
1861 	/*
1862 	 * First check whether a custom macro exists at this level.  If
1863 	 * it does, then check against it.  This is some of groff's
1864 	 * stranger behaviours.  If we encountered a custom end-scope
1865 	 * tag and that tag also happens to be a "real" macro, then we
1866 	 * need to try interpreting it again as a real macro.  If it's
1867 	 * not, then return ignore.  Else continue.
1868 	 */
1869 
1870 	if (r->last->end) {
1871 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1872 			if (buf->buf[i] != r->last->end[j])
1873 				break;
1874 
1875 		if (r->last->end[j] == '\0' &&
1876 		    (buf->buf[i] == '\0' ||
1877 		     buf->buf[i] == ' ' ||
1878 		     buf->buf[i] == '\t')) {
1879 			roffnode_pop(r);
1880 			roffnode_cleanscope(r);
1881 
1882 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1883 				i++;
1884 
1885 			pos = i;
1886 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1887 			    TOKEN_NONE)
1888 				return ROFF_RERUN;
1889 			return ROFF_IGN;
1890 		}
1891 	}
1892 
1893 	/*
1894 	 * If we have no custom end-query or lookup failed, then try
1895 	 * pulling it out of the hashtable.
1896 	 */
1897 
1898 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1899 
1900 	if (t != ROFF_cblock) {
1901 		if (tok != ROFF_ig)
1902 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1903 		return ROFF_IGN;
1904 	}
1905 
1906 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1907 }
1908 
1909 static enum rofferr
1910 roff_block_text(ROFF_ARGS)
1911 {
1912 
1913 	if (tok != ROFF_ig)
1914 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1915 
1916 	return ROFF_IGN;
1917 }
1918 
1919 static enum rofferr
1920 roff_cond_sub(ROFF_ARGS)
1921 {
1922 	enum roff_tok	 t;
1923 	char		*ep;
1924 	int		 rr;
1925 
1926 	rr = r->last->rule;
1927 	roffnode_cleanscope(r);
1928 
1929 	/*
1930 	 * If `\}' occurs on a macro line without a preceding macro,
1931 	 * drop the line completely.
1932 	 */
1933 
1934 	ep = buf->buf + pos;
1935 	if (ep[0] == '\\' && ep[1] == '}')
1936 		rr = 0;
1937 
1938 	/* Always check for the closing delimiter `\}'. */
1939 
1940 	while ((ep = strchr(ep, '\\')) != NULL) {
1941 		switch (ep[1]) {
1942 		case '}':
1943 			memmove(ep, ep + 2, strlen(ep + 2) + 1);
1944 			roff_ccond(r, ln, ep - buf->buf);
1945 			break;
1946 		case '\0':
1947 			++ep;
1948 			break;
1949 		default:
1950 			ep += 2;
1951 			break;
1952 		}
1953 	}
1954 
1955 	/*
1956 	 * Fully handle known macros when they are structurally
1957 	 * required or when the conditional evaluated to true.
1958 	 */
1959 
1960 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1961 	return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
1962 	    ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
1963 	    ? ROFF_CONT : ROFF_IGN;
1964 }
1965 
1966 static enum rofferr
1967 roff_cond_text(ROFF_ARGS)
1968 {
1969 	char		*ep;
1970 	int		 rr;
1971 
1972 	rr = r->last->rule;
1973 	roffnode_cleanscope(r);
1974 
1975 	ep = buf->buf + pos;
1976 	while ((ep = strchr(ep, '\\')) != NULL) {
1977 		if (*(++ep) == '}') {
1978 			*ep = '&';
1979 			roff_ccond(r, ln, ep - buf->buf - 1);
1980 		}
1981 		if (*ep != '\0')
1982 			++ep;
1983 	}
1984 	return rr ? ROFF_CONT : ROFF_IGN;
1985 }
1986 
1987 /* --- handling of numeric and conditional expressions -------------------- */
1988 
1989 /*
1990  * Parse a single signed integer number.  Stop at the first non-digit.
1991  * If there is at least one digit, return success and advance the
1992  * parse point, else return failure and let the parse point unchanged.
1993  * Ignore overflows, treat them just like the C language.
1994  */
1995 static int
1996 roff_getnum(const char *v, int *pos, int *res, int flags)
1997 {
1998 	int	 myres, scaled, n, p;
1999 
2000 	if (NULL == res)
2001 		res = &myres;
2002 
2003 	p = *pos;
2004 	n = v[p] == '-';
2005 	if (n || v[p] == '+')
2006 		p++;
2007 
2008 	if (flags & ROFFNUM_WHITE)
2009 		while (isspace((unsigned char)v[p]))
2010 			p++;
2011 
2012 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2013 		*res = 10 * *res + v[p] - '0';
2014 	if (p == *pos + n)
2015 		return 0;
2016 
2017 	if (n)
2018 		*res = -*res;
2019 
2020 	/* Each number may be followed by one optional scaling unit. */
2021 
2022 	switch (v[p]) {
2023 	case 'f':
2024 		scaled = *res * 65536;
2025 		break;
2026 	case 'i':
2027 		scaled = *res * 240;
2028 		break;
2029 	case 'c':
2030 		scaled = *res * 240 / 2.54;
2031 		break;
2032 	case 'v':
2033 	case 'P':
2034 		scaled = *res * 40;
2035 		break;
2036 	case 'm':
2037 	case 'n':
2038 		scaled = *res * 24;
2039 		break;
2040 	case 'p':
2041 		scaled = *res * 10 / 3;
2042 		break;
2043 	case 'u':
2044 		scaled = *res;
2045 		break;
2046 	case 'M':
2047 		scaled = *res * 6 / 25;
2048 		break;
2049 	default:
2050 		scaled = *res;
2051 		p--;
2052 		break;
2053 	}
2054 	if (flags & ROFFNUM_SCALE)
2055 		*res = scaled;
2056 
2057 	*pos = p + 1;
2058 	return 1;
2059 }
2060 
2061 /*
2062  * Evaluate a string comparison condition.
2063  * The first character is the delimiter.
2064  * Succeed if the string up to its second occurrence
2065  * matches the string up to its third occurence.
2066  * Advance the cursor after the third occurrence
2067  * or lacking that, to the end of the line.
2068  */
2069 static int
2070 roff_evalstrcond(const char *v, int *pos)
2071 {
2072 	const char	*s1, *s2, *s3;
2073 	int		 match;
2074 
2075 	match = 0;
2076 	s1 = v + *pos;		/* initial delimiter */
2077 	s2 = s1 + 1;		/* for scanning the first string */
2078 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2079 
2080 	if (NULL == s3)		/* found no middle delimiter */
2081 		goto out;
2082 
2083 	while ('\0' != *++s3) {
2084 		if (*s2 != *s3) {  /* mismatch */
2085 			s3 = strchr(s3, *s1);
2086 			break;
2087 		}
2088 		if (*s3 == *s1) {  /* found the final delimiter */
2089 			match = 1;
2090 			break;
2091 		}
2092 		s2++;
2093 	}
2094 
2095 out:
2096 	if (NULL == s3)
2097 		s3 = strchr(s2, '\0');
2098 	else if (*s3 != '\0')
2099 		s3++;
2100 	*pos = s3 - v;
2101 	return match;
2102 }
2103 
2104 /*
2105  * Evaluate an optionally negated single character, numerical,
2106  * or string condition.
2107  */
2108 static int
2109 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2110 {
2111 	char	*cp, *name;
2112 	size_t	 sz;
2113 	int	 deftype, number, savepos, istrue, wanttrue;
2114 
2115 	if ('!' == v[*pos]) {
2116 		wanttrue = 0;
2117 		(*pos)++;
2118 	} else
2119 		wanttrue = 1;
2120 
2121 	switch (v[*pos]) {
2122 	case '\0':
2123 		return 0;
2124 	case 'n':
2125 	case 'o':
2126 		(*pos)++;
2127 		return wanttrue;
2128 	case 'c':
2129 	case 'e':
2130 	case 't':
2131 	case 'v':
2132 		(*pos)++;
2133 		return !wanttrue;
2134 	case 'd':
2135 	case 'r':
2136 		cp = v + *pos + 1;
2137 		while (*cp == ' ')
2138 			cp++;
2139 		name = cp;
2140 		sz = roff_getname(r, &cp, ln, cp - v);
2141 		if (sz == 0)
2142 			istrue = 0;
2143 		else if (v[*pos] == 'r')
2144 			istrue = roff_hasregn(r, name, sz);
2145 		else {
2146 			deftype = ROFFDEF_ANY;
2147 		        roff_getstrn(r, name, sz, &deftype);
2148 			istrue = !!deftype;
2149 		}
2150 		*pos = cp - v;
2151 		return istrue == wanttrue;
2152 	default:
2153 		break;
2154 	}
2155 
2156 	savepos = *pos;
2157 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2158 		return (number > 0) == wanttrue;
2159 	else if (*pos == savepos)
2160 		return roff_evalstrcond(v, pos) == wanttrue;
2161 	else
2162 		return 0;
2163 }
2164 
2165 static enum rofferr
2166 roff_line_ignore(ROFF_ARGS)
2167 {
2168 
2169 	return ROFF_IGN;
2170 }
2171 
2172 static enum rofferr
2173 roff_insec(ROFF_ARGS)
2174 {
2175 
2176 	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2177 	    ln, ppos, roff_name[tok]);
2178 	return ROFF_IGN;
2179 }
2180 
2181 static enum rofferr
2182 roff_unsupp(ROFF_ARGS)
2183 {
2184 
2185 	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2186 	    ln, ppos, roff_name[tok]);
2187 	return ROFF_IGN;
2188 }
2189 
2190 static enum rofferr
2191 roff_cond(ROFF_ARGS)
2192 {
2193 
2194 	roffnode_push(r, tok, NULL, ln, ppos);
2195 
2196 	/*
2197 	 * An `.el' has no conditional body: it will consume the value
2198 	 * of the current rstack entry set in prior `ie' calls or
2199 	 * defaults to DENY.
2200 	 *
2201 	 * If we're not an `el', however, then evaluate the conditional.
2202 	 */
2203 
2204 	r->last->rule = tok == ROFF_el ?
2205 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2206 	    roff_evalcond(r, ln, buf->buf, &pos);
2207 
2208 	/*
2209 	 * An if-else will put the NEGATION of the current evaluated
2210 	 * conditional into the stack of rules.
2211 	 */
2212 
2213 	if (tok == ROFF_ie) {
2214 		if (r->rstackpos + 1 == r->rstacksz) {
2215 			r->rstacksz += 16;
2216 			r->rstack = mandoc_reallocarray(r->rstack,
2217 			    r->rstacksz, sizeof(int));
2218 		}
2219 		r->rstack[++r->rstackpos] = !r->last->rule;
2220 	}
2221 
2222 	/* If the parent has false as its rule, then so do we. */
2223 
2224 	if (r->last->parent && !r->last->parent->rule)
2225 		r->last->rule = 0;
2226 
2227 	/*
2228 	 * Determine scope.
2229 	 * If there is nothing on the line after the conditional,
2230 	 * not even whitespace, use next-line scope.
2231 	 */
2232 
2233 	if (buf->buf[pos] == '\0') {
2234 		r->last->endspan = 2;
2235 		goto out;
2236 	}
2237 
2238 	while (buf->buf[pos] == ' ')
2239 		pos++;
2240 
2241 	/* An opening brace requests multiline scope. */
2242 
2243 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2244 		r->last->endspan = -1;
2245 		pos += 2;
2246 		while (buf->buf[pos] == ' ')
2247 			pos++;
2248 		goto out;
2249 	}
2250 
2251 	/*
2252 	 * Anything else following the conditional causes
2253 	 * single-line scope.  Warn if the scope contains
2254 	 * nothing but trailing whitespace.
2255 	 */
2256 
2257 	if (buf->buf[pos] == '\0')
2258 		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2259 		    ln, ppos, roff_name[tok]);
2260 
2261 	r->last->endspan = 1;
2262 
2263 out:
2264 	*offs = pos;
2265 	return ROFF_RERUN;
2266 }
2267 
2268 static enum rofferr
2269 roff_ds(ROFF_ARGS)
2270 {
2271 	char		*string;
2272 	const char	*name;
2273 	size_t		 namesz;
2274 
2275 	/* Ignore groff compatibility mode for now. */
2276 
2277 	if (tok == ROFF_ds1)
2278 		tok = ROFF_ds;
2279 	else if (tok == ROFF_as1)
2280 		tok = ROFF_as;
2281 
2282 	/*
2283 	 * The first word is the name of the string.
2284 	 * If it is empty or terminated by an escape sequence,
2285 	 * abort the `ds' request without defining anything.
2286 	 */
2287 
2288 	name = string = buf->buf + pos;
2289 	if (*name == '\0')
2290 		return ROFF_IGN;
2291 
2292 	namesz = roff_getname(r, &string, ln, pos);
2293 	if (name[namesz] == '\\')
2294 		return ROFF_IGN;
2295 
2296 	/* Read past the initial double-quote, if any. */
2297 	if (*string == '"')
2298 		string++;
2299 
2300 	/* The rest is the value. */
2301 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2302 	    ROFF_as == tok);
2303 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2304 	return ROFF_IGN;
2305 }
2306 
2307 /*
2308  * Parse a single operator, one or two characters long.
2309  * If the operator is recognized, return success and advance the
2310  * parse point, else return failure and let the parse point unchanged.
2311  */
2312 static int
2313 roff_getop(const char *v, int *pos, char *res)
2314 {
2315 
2316 	*res = v[*pos];
2317 
2318 	switch (*res) {
2319 	case '+':
2320 	case '-':
2321 	case '*':
2322 	case '/':
2323 	case '%':
2324 	case '&':
2325 	case ':':
2326 		break;
2327 	case '<':
2328 		switch (v[*pos + 1]) {
2329 		case '=':
2330 			*res = 'l';
2331 			(*pos)++;
2332 			break;
2333 		case '>':
2334 			*res = '!';
2335 			(*pos)++;
2336 			break;
2337 		case '?':
2338 			*res = 'i';
2339 			(*pos)++;
2340 			break;
2341 		default:
2342 			break;
2343 		}
2344 		break;
2345 	case '>':
2346 		switch (v[*pos + 1]) {
2347 		case '=':
2348 			*res = 'g';
2349 			(*pos)++;
2350 			break;
2351 		case '?':
2352 			*res = 'a';
2353 			(*pos)++;
2354 			break;
2355 		default:
2356 			break;
2357 		}
2358 		break;
2359 	case '=':
2360 		if ('=' == v[*pos + 1])
2361 			(*pos)++;
2362 		break;
2363 	default:
2364 		return 0;
2365 	}
2366 	(*pos)++;
2367 
2368 	return *res;
2369 }
2370 
2371 /*
2372  * Evaluate either a parenthesized numeric expression
2373  * or a single signed integer number.
2374  */
2375 static int
2376 roff_evalpar(struct roff *r, int ln,
2377 	const char *v, int *pos, int *res, int flags)
2378 {
2379 
2380 	if ('(' != v[*pos])
2381 		return roff_getnum(v, pos, res, flags);
2382 
2383 	(*pos)++;
2384 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2385 		return 0;
2386 
2387 	/*
2388 	 * Omission of the closing parenthesis
2389 	 * is an error in validation mode,
2390 	 * but ignored in evaluation mode.
2391 	 */
2392 
2393 	if (')' == v[*pos])
2394 		(*pos)++;
2395 	else if (NULL == res)
2396 		return 0;
2397 
2398 	return 1;
2399 }
2400 
2401 /*
2402  * Evaluate a complete numeric expression.
2403  * Proceed left to right, there is no concept of precedence.
2404  */
2405 static int
2406 roff_evalnum(struct roff *r, int ln, const char *v,
2407 	int *pos, int *res, int flags)
2408 {
2409 	int		 mypos, operand2;
2410 	char		 operator;
2411 
2412 	if (NULL == pos) {
2413 		mypos = 0;
2414 		pos = &mypos;
2415 	}
2416 
2417 	if (flags & ROFFNUM_WHITE)
2418 		while (isspace((unsigned char)v[*pos]))
2419 			(*pos)++;
2420 
2421 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2422 		return 0;
2423 
2424 	while (1) {
2425 		if (flags & ROFFNUM_WHITE)
2426 			while (isspace((unsigned char)v[*pos]))
2427 				(*pos)++;
2428 
2429 		if ( ! roff_getop(v, pos, &operator))
2430 			break;
2431 
2432 		if (flags & ROFFNUM_WHITE)
2433 			while (isspace((unsigned char)v[*pos]))
2434 				(*pos)++;
2435 
2436 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2437 			return 0;
2438 
2439 		if (flags & ROFFNUM_WHITE)
2440 			while (isspace((unsigned char)v[*pos]))
2441 				(*pos)++;
2442 
2443 		if (NULL == res)
2444 			continue;
2445 
2446 		switch (operator) {
2447 		case '+':
2448 			*res += operand2;
2449 			break;
2450 		case '-':
2451 			*res -= operand2;
2452 			break;
2453 		case '*':
2454 			*res *= operand2;
2455 			break;
2456 		case '/':
2457 			if (operand2 == 0) {
2458 				mandoc_msg(MANDOCERR_DIVZERO,
2459 					r->parse, ln, *pos, v);
2460 				*res = 0;
2461 				break;
2462 			}
2463 			*res /= operand2;
2464 			break;
2465 		case '%':
2466 			if (operand2 == 0) {
2467 				mandoc_msg(MANDOCERR_DIVZERO,
2468 					r->parse, ln, *pos, v);
2469 				*res = 0;
2470 				break;
2471 			}
2472 			*res %= operand2;
2473 			break;
2474 		case '<':
2475 			*res = *res < operand2;
2476 			break;
2477 		case '>':
2478 			*res = *res > operand2;
2479 			break;
2480 		case 'l':
2481 			*res = *res <= operand2;
2482 			break;
2483 		case 'g':
2484 			*res = *res >= operand2;
2485 			break;
2486 		case '=':
2487 			*res = *res == operand2;
2488 			break;
2489 		case '!':
2490 			*res = *res != operand2;
2491 			break;
2492 		case '&':
2493 			*res = *res && operand2;
2494 			break;
2495 		case ':':
2496 			*res = *res || operand2;
2497 			break;
2498 		case 'i':
2499 			if (operand2 < *res)
2500 				*res = operand2;
2501 			break;
2502 		case 'a':
2503 			if (operand2 > *res)
2504 				*res = operand2;
2505 			break;
2506 		default:
2507 			abort();
2508 		}
2509 	}
2510 	return 1;
2511 }
2512 
2513 /* --- register management ------------------------------------------------ */
2514 
2515 void
2516 roff_setreg(struct roff *r, const char *name, int val, char sign)
2517 {
2518 	struct roffreg	*reg;
2519 
2520 	/* Search for an existing register with the same name. */
2521 	reg = r->regtab;
2522 
2523 	while (reg && strcmp(name, reg->key.p))
2524 		reg = reg->next;
2525 
2526 	if (NULL == reg) {
2527 		/* Create a new register. */
2528 		reg = mandoc_malloc(sizeof(struct roffreg));
2529 		reg->key.p = mandoc_strdup(name);
2530 		reg->key.sz = strlen(name);
2531 		reg->val = 0;
2532 		reg->next = r->regtab;
2533 		r->regtab = reg;
2534 	}
2535 
2536 	if ('+' == sign)
2537 		reg->val += val;
2538 	else if ('-' == sign)
2539 		reg->val -= val;
2540 	else
2541 		reg->val = val;
2542 }
2543 
2544 /*
2545  * Handle some predefined read-only number registers.
2546  * For now, return -1 if the requested register is not predefined;
2547  * in case a predefined read-only register having the value -1
2548  * were to turn up, another special value would have to be chosen.
2549  */
2550 static int
2551 roff_getregro(const struct roff *r, const char *name)
2552 {
2553 
2554 	switch (*name) {
2555 	case '$':  /* Number of arguments of the last macro evaluated. */
2556 		return r->argc;
2557 	case 'A':  /* ASCII approximation mode is always off. */
2558 		return 0;
2559 	case 'g':  /* Groff compatibility mode is always on. */
2560 		return 1;
2561 	case 'H':  /* Fixed horizontal resolution. */
2562 		return 24;
2563 	case 'j':  /* Always adjust left margin only. */
2564 		return 0;
2565 	case 'T':  /* Some output device is always defined. */
2566 		return 1;
2567 	case 'V':  /* Fixed vertical resolution. */
2568 		return 40;
2569 	default:
2570 		return -1;
2571 	}
2572 }
2573 
2574 int
2575 roff_getreg(const struct roff *r, const char *name)
2576 {
2577 	struct roffreg	*reg;
2578 	int		 val;
2579 
2580 	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2581 		val = roff_getregro(r, name + 1);
2582 		if (-1 != val)
2583 			return val;
2584 	}
2585 
2586 	for (reg = r->regtab; reg; reg = reg->next)
2587 		if (0 == strcmp(name, reg->key.p))
2588 			return reg->val;
2589 
2590 	return 0;
2591 }
2592 
2593 static int
2594 roff_getregn(const struct roff *r, const char *name, size_t len)
2595 {
2596 	struct roffreg	*reg;
2597 	int		 val;
2598 
2599 	if ('.' == name[0] && 2 == len) {
2600 		val = roff_getregro(r, name + 1);
2601 		if (-1 != val)
2602 			return val;
2603 	}
2604 
2605 	for (reg = r->regtab; reg; reg = reg->next)
2606 		if (len == reg->key.sz &&
2607 		    0 == strncmp(name, reg->key.p, len))
2608 			return reg->val;
2609 
2610 	return 0;
2611 }
2612 
2613 static int
2614 roff_hasregn(const struct roff *r, const char *name, size_t len)
2615 {
2616 	struct roffreg	*reg;
2617 	int		 val;
2618 
2619 	if ('.' == name[0] && 2 == len) {
2620 		val = roff_getregro(r, name + 1);
2621 		if (-1 != val)
2622 			return 1;
2623 	}
2624 
2625 	for (reg = r->regtab; reg; reg = reg->next)
2626 		if (len == reg->key.sz &&
2627 		    0 == strncmp(name, reg->key.p, len))
2628 			return 1;
2629 
2630 	return 0;
2631 }
2632 
2633 static void
2634 roff_freereg(struct roffreg *reg)
2635 {
2636 	struct roffreg	*old_reg;
2637 
2638 	while (NULL != reg) {
2639 		free(reg->key.p);
2640 		old_reg = reg;
2641 		reg = reg->next;
2642 		free(old_reg);
2643 	}
2644 }
2645 
2646 static enum rofferr
2647 roff_nr(ROFF_ARGS)
2648 {
2649 	char		*key, *val;
2650 	size_t		 keysz;
2651 	int		 iv;
2652 	char		 sign;
2653 
2654 	key = val = buf->buf + pos;
2655 	if (*key == '\0')
2656 		return ROFF_IGN;
2657 
2658 	keysz = roff_getname(r, &val, ln, pos);
2659 	if (key[keysz] == '\\')
2660 		return ROFF_IGN;
2661 	key[keysz] = '\0';
2662 
2663 	sign = *val;
2664 	if (sign == '+' || sign == '-')
2665 		val++;
2666 
2667 	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2668 		roff_setreg(r, key, iv, sign);
2669 
2670 	return ROFF_IGN;
2671 }
2672 
2673 static enum rofferr
2674 roff_rr(ROFF_ARGS)
2675 {
2676 	struct roffreg	*reg, **prev;
2677 	char		*name, *cp;
2678 	size_t		 namesz;
2679 
2680 	name = cp = buf->buf + pos;
2681 	if (*name == '\0')
2682 		return ROFF_IGN;
2683 	namesz = roff_getname(r, &cp, ln, pos);
2684 	name[namesz] = '\0';
2685 
2686 	prev = &r->regtab;
2687 	while (1) {
2688 		reg = *prev;
2689 		if (reg == NULL || !strcmp(name, reg->key.p))
2690 			break;
2691 		prev = &reg->next;
2692 	}
2693 	if (reg != NULL) {
2694 		*prev = reg->next;
2695 		free(reg->key.p);
2696 		free(reg);
2697 	}
2698 	return ROFF_IGN;
2699 }
2700 
2701 /* --- handler functions for roff requests -------------------------------- */
2702 
2703 static enum rofferr
2704 roff_rm(ROFF_ARGS)
2705 {
2706 	const char	 *name;
2707 	char		 *cp;
2708 	size_t		  namesz;
2709 
2710 	cp = buf->buf + pos;
2711 	while (*cp != '\0') {
2712 		name = cp;
2713 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2714 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2715 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2716 		if (name[namesz] == '\\')
2717 			break;
2718 	}
2719 	return ROFF_IGN;
2720 }
2721 
2722 static enum rofferr
2723 roff_it(ROFF_ARGS)
2724 {
2725 	int		 iv;
2726 
2727 	/* Parse the number of lines. */
2728 
2729 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2730 		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2731 		    ln, ppos, buf->buf + 1);
2732 		return ROFF_IGN;
2733 	}
2734 
2735 	while (isspace((unsigned char)buf->buf[pos]))
2736 		pos++;
2737 
2738 	/*
2739 	 * Arm the input line trap.
2740 	 * Special-casing "an-trap" is an ugly workaround to cope
2741 	 * with DocBook stupidly fiddling with man(7) internals.
2742 	 */
2743 
2744 	roffit_lines = iv;
2745 	roffit_macro = mandoc_strdup(iv != 1 ||
2746 	    strcmp(buf->buf + pos, "an-trap") ?
2747 	    buf->buf + pos : "br");
2748 	return ROFF_IGN;
2749 }
2750 
2751 static enum rofferr
2752 roff_Dd(ROFF_ARGS)
2753 {
2754 	int		 mask;
2755 	enum roff_tok	 t, te;
2756 
2757 	switch (tok) {
2758 	case ROFF_Dd:
2759 		tok = MDOC_Dd;
2760 		te = MDOC_MAX;
2761 		if (r->format == 0)
2762 			r->format = MPARSE_MDOC;
2763 		mask = MPARSE_MDOC | MPARSE_QUICK;
2764 		break;
2765 	case ROFF_TH:
2766 		tok = MAN_TH;
2767 		te = MAN_MAX;
2768 		if (r->format == 0)
2769 			r->format = MPARSE_MAN;
2770 		mask = MPARSE_QUICK;
2771 		break;
2772 	default:
2773 		abort();
2774 	}
2775 	if ((r->options & mask) == 0)
2776 		for (t = tok; t < te; t++)
2777 			roff_setstr(r, roff_name[t], NULL, 0);
2778 	return ROFF_CONT;
2779 }
2780 
2781 static enum rofferr
2782 roff_TE(ROFF_ARGS)
2783 {
2784 	if (r->tbl == NULL) {
2785 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2786 		    ln, ppos, "TE");
2787 		return ROFF_IGN;
2788 	}
2789 	if (tbl_end(r->tbl) == 0) {
2790 		r->tbl = NULL;
2791 		free(buf->buf);
2792 		buf->buf = mandoc_strdup(".sp");
2793 		buf->sz = 4;
2794 		return ROFF_REPARSE;
2795 	}
2796 	r->tbl = NULL;
2797 	return ROFF_IGN;
2798 }
2799 
2800 static enum rofferr
2801 roff_T_(ROFF_ARGS)
2802 {
2803 
2804 	if (NULL == r->tbl)
2805 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2806 		    ln, ppos, "T&");
2807 	else
2808 		tbl_restart(ln, ppos, r->tbl);
2809 
2810 	return ROFF_IGN;
2811 }
2812 
2813 /*
2814  * Handle in-line equation delimiters.
2815  */
2816 static enum rofferr
2817 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2818 {
2819 	char		*cp1, *cp2;
2820 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2821 
2822 	/*
2823 	 * Outside equations, look for an opening delimiter.
2824 	 * If we are inside an equation, we already know it is
2825 	 * in-line, or this function wouldn't have been called;
2826 	 * so look for a closing delimiter.
2827 	 */
2828 
2829 	cp1 = buf->buf + pos;
2830 	cp2 = strchr(cp1, r->eqn == NULL ?
2831 	    r->last_eqn->odelim : r->last_eqn->cdelim);
2832 	if (cp2 == NULL)
2833 		return ROFF_CONT;
2834 
2835 	*cp2++ = '\0';
2836 	bef_pr = bef_nl = aft_nl = aft_pr = "";
2837 
2838 	/* Handle preceding text, protecting whitespace. */
2839 
2840 	if (*buf->buf != '\0') {
2841 		if (r->eqn == NULL)
2842 			bef_pr = "\\&";
2843 		bef_nl = "\n";
2844 	}
2845 
2846 	/*
2847 	 * Prepare replacing the delimiter with an equation macro
2848 	 * and drop leading white space from the equation.
2849 	 */
2850 
2851 	if (r->eqn == NULL) {
2852 		while (*cp2 == ' ')
2853 			cp2++;
2854 		mac = ".EQ";
2855 	} else
2856 		mac = ".EN";
2857 
2858 	/* Handle following text, protecting whitespace. */
2859 
2860 	if (*cp2 != '\0') {
2861 		aft_nl = "\n";
2862 		if (r->eqn != NULL)
2863 			aft_pr = "\\&";
2864 	}
2865 
2866 	/* Do the actual replacement. */
2867 
2868 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2869 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2870 	free(buf->buf);
2871 	buf->buf = cp1;
2872 
2873 	/* Toggle the in-line state of the eqn subsystem. */
2874 
2875 	r->eqn_inline = r->eqn == NULL;
2876 	return ROFF_REPARSE;
2877 }
2878 
2879 static enum rofferr
2880 roff_EQ(ROFF_ARGS)
2881 {
2882 	struct roff_node	*n;
2883 
2884 	if (r->man->macroset == MACROSET_MAN)
2885 		man_breakscope(r->man, ROFF_EQ);
2886 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2887 	if (ln > r->man->last->line)
2888 		n->flags |= NODE_LINE;
2889 	n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2890 	n->eqn->expectargs = UINT_MAX;
2891 	roff_node_append(r->man, n);
2892 	r->man->next = ROFF_NEXT_SIBLING;
2893 
2894 	assert(r->eqn == NULL);
2895 	if (r->last_eqn == NULL)
2896 		r->last_eqn = eqn_alloc(r->parse);
2897 	else
2898 		eqn_reset(r->last_eqn);
2899 	r->eqn = r->last_eqn;
2900 	r->eqn->node = n;
2901 
2902 	if (buf->buf[pos] != '\0')
2903 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2904 		    ".EQ %s", buf->buf + pos);
2905 
2906 	return ROFF_IGN;
2907 }
2908 
2909 static enum rofferr
2910 roff_EN(ROFF_ARGS)
2911 {
2912 	if (r->eqn != NULL) {
2913 		eqn_parse(r->eqn);
2914 		r->eqn = NULL;
2915 	} else
2916 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2917 	if (buf->buf[pos] != '\0')
2918 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2919 		    "EN %s", buf->buf + pos);
2920 	return ROFF_IGN;
2921 }
2922 
2923 static enum rofferr
2924 roff_TS(ROFF_ARGS)
2925 {
2926 	if (r->tbl != NULL) {
2927 		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2928 		    ln, ppos, "TS breaks TS");
2929 		tbl_end(r->tbl);
2930 	}
2931 	r->tbl = tbl_alloc(ppos, ln, r->parse);
2932 	if (r->last_tbl)
2933 		r->last_tbl->next = r->tbl;
2934 	else
2935 		r->first_tbl = r->tbl;
2936 	r->last_tbl = r->tbl;
2937 	return ROFF_IGN;
2938 }
2939 
2940 static enum rofferr
2941 roff_onearg(ROFF_ARGS)
2942 {
2943 	struct roff_node	*n;
2944 	char			*cp;
2945 	int			 npos;
2946 
2947 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
2948 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
2949 	     tok == ROFF_ti))
2950 		man_breakscope(r->man, tok);
2951 
2952 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
2953 		r->man->last = roffce_node;
2954 		r->man->next = ROFF_NEXT_SIBLING;
2955 	}
2956 
2957 	roff_elem_alloc(r->man, ln, ppos, tok);
2958 	n = r->man->last;
2959 
2960 	cp = buf->buf + pos;
2961 	if (*cp != '\0') {
2962 		while (*cp != '\0' && *cp != ' ')
2963 			cp++;
2964 		while (*cp == ' ')
2965 			*cp++ = '\0';
2966 		if (*cp != '\0')
2967 			mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2968 			    r->parse, ln, cp - buf->buf,
2969 			    "%s ... %s", roff_name[tok], cp);
2970 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2971 	}
2972 
2973 	if (tok == ROFF_ce || tok == ROFF_rj) {
2974 		if (r->man->last->type == ROFFT_ELEM) {
2975 			roff_word_alloc(r->man, ln, pos, "1");
2976 			r->man->last->flags |= NODE_NOSRC;
2977 		}
2978 		npos = 0;
2979 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
2980 		    &roffce_lines, 0) == 0) {
2981 			mandoc_vmsg(MANDOCERR_CE_NONUM,
2982 			    r->parse, ln, pos, "ce %s", buf->buf + pos);
2983 			roffce_lines = 1;
2984 		}
2985 		if (roffce_lines < 1) {
2986 			r->man->last = r->man->last->parent;
2987 			roffce_node = NULL;
2988 			roffce_lines = 0;
2989 		} else
2990 			roffce_node = r->man->last->parent;
2991 	} else {
2992 		n->flags |= NODE_VALID | NODE_ENDED;
2993 		r->man->last = n;
2994 	}
2995 	n->flags |= NODE_LINE;
2996 	r->man->next = ROFF_NEXT_SIBLING;
2997 	return ROFF_IGN;
2998 }
2999 
3000 static enum rofferr
3001 roff_manyarg(ROFF_ARGS)
3002 {
3003 	struct roff_node	*n;
3004 	char			*sp, *ep;
3005 
3006 	roff_elem_alloc(r->man, ln, ppos, tok);
3007 	n = r->man->last;
3008 
3009 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3010 		while (*ep != '\0' && *ep != ' ')
3011 			ep++;
3012 		while (*ep == ' ')
3013 			*ep++ = '\0';
3014 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3015 	}
3016 
3017 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3018 	r->man->last = n;
3019 	r->man->next = ROFF_NEXT_SIBLING;
3020 	return ROFF_IGN;
3021 }
3022 
3023 static enum rofferr
3024 roff_als(ROFF_ARGS)
3025 {
3026 	char		*oldn, *newn, *end, *value;
3027 	size_t		 oldsz, newsz, valsz;
3028 
3029 	newn = oldn = buf->buf + pos;
3030 	if (*newn == '\0')
3031 		return ROFF_IGN;
3032 
3033 	newsz = roff_getname(r, &oldn, ln, pos);
3034 	if (newn[newsz] == '\\' || *oldn == '\0')
3035 		return ROFF_IGN;
3036 
3037 	end = oldn;
3038 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3039 	if (oldsz == 0)
3040 		return ROFF_IGN;
3041 
3042 	valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3043 	    (int)oldsz, oldn);
3044 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3045 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3046 	free(value);
3047 	return ROFF_IGN;
3048 }
3049 
3050 static enum rofferr
3051 roff_br(ROFF_ARGS)
3052 {
3053 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3054 		man_breakscope(r->man, ROFF_br);
3055 	roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3056 	if (buf->buf[pos] != '\0')
3057 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3058 		    "%s %s", roff_name[tok], buf->buf + pos);
3059 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3060 	r->man->next = ROFF_NEXT_SIBLING;
3061 	return ROFF_IGN;
3062 }
3063 
3064 static enum rofferr
3065 roff_cc(ROFF_ARGS)
3066 {
3067 	const char	*p;
3068 
3069 	p = buf->buf + pos;
3070 
3071 	if (*p == '\0' || (r->control = *p++) == '.')
3072 		r->control = '\0';
3073 
3074 	if (*p != '\0')
3075 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3076 		    ln, p - buf->buf, "cc ... %s", p);
3077 
3078 	return ROFF_IGN;
3079 }
3080 
3081 static enum rofferr
3082 roff_ec(ROFF_ARGS)
3083 {
3084 	const char	*p;
3085 
3086 	p = buf->buf + pos;
3087 	if (*p == '\0')
3088 		r->escape = '\\';
3089 	else {
3090 		r->escape = *p;
3091 		if (*++p != '\0')
3092 			mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3093 			    ln, p - buf->buf, "ec ... %s", p);
3094 	}
3095 	return ROFF_IGN;
3096 }
3097 
3098 static enum rofferr
3099 roff_eo(ROFF_ARGS)
3100 {
3101 	r->escape = '\0';
3102 	if (buf->buf[pos] != '\0')
3103 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3104 		    ln, pos, "eo %s", buf->buf + pos);
3105 	return ROFF_IGN;
3106 }
3107 
3108 static enum rofferr
3109 roff_tr(ROFF_ARGS)
3110 {
3111 	const char	*p, *first, *second;
3112 	size_t		 fsz, ssz;
3113 	enum mandoc_esc	 esc;
3114 
3115 	p = buf->buf + pos;
3116 
3117 	if (*p == '\0') {
3118 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3119 		return ROFF_IGN;
3120 	}
3121 
3122 	while (*p != '\0') {
3123 		fsz = ssz = 1;
3124 
3125 		first = p++;
3126 		if (*first == '\\') {
3127 			esc = mandoc_escape(&p, NULL, NULL);
3128 			if (esc == ESCAPE_ERROR) {
3129 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3130 				    ln, (int)(p - buf->buf), first);
3131 				return ROFF_IGN;
3132 			}
3133 			fsz = (size_t)(p - first);
3134 		}
3135 
3136 		second = p++;
3137 		if (*second == '\\') {
3138 			esc = mandoc_escape(&p, NULL, NULL);
3139 			if (esc == ESCAPE_ERROR) {
3140 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3141 				    ln, (int)(p - buf->buf), second);
3142 				return ROFF_IGN;
3143 			}
3144 			ssz = (size_t)(p - second);
3145 		} else if (*second == '\0') {
3146 			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3147 			    ln, first - buf->buf, "tr %s", first);
3148 			second = " ";
3149 			p--;
3150 		}
3151 
3152 		if (fsz > 1) {
3153 			roff_setstrn(&r->xmbtab, first, fsz,
3154 			    second, ssz, 0);
3155 			continue;
3156 		}
3157 
3158 		if (r->xtab == NULL)
3159 			r->xtab = mandoc_calloc(128,
3160 			    sizeof(struct roffstr));
3161 
3162 		free(r->xtab[(int)*first].p);
3163 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3164 		r->xtab[(int)*first].sz = ssz;
3165 	}
3166 
3167 	return ROFF_IGN;
3168 }
3169 
3170 static enum rofferr
3171 roff_rn(ROFF_ARGS)
3172 {
3173 	const char	*value;
3174 	char		*oldn, *newn, *end;
3175 	size_t		 oldsz, newsz;
3176 	int		 deftype;
3177 
3178 	oldn = newn = buf->buf + pos;
3179 	if (*oldn == '\0')
3180 		return ROFF_IGN;
3181 
3182 	oldsz = roff_getname(r, &newn, ln, pos);
3183 	if (oldn[oldsz] == '\\' || *newn == '\0')
3184 		return ROFF_IGN;
3185 
3186 	end = newn;
3187 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3188 	if (newsz == 0)
3189 		return ROFF_IGN;
3190 
3191 	deftype = ROFFDEF_ANY;
3192 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3193 	switch (deftype) {
3194 	case ROFFDEF_USER:
3195 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3196 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3197 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3198 		break;
3199 	case ROFFDEF_PRE:
3200 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3201 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3202 		break;
3203 	case ROFFDEF_REN:
3204 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3205 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3206 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3207 		break;
3208 	case ROFFDEF_STD:
3209 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3210 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3211 		break;
3212 	default:
3213 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3214 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3215 		break;
3216 	}
3217 	return ROFF_IGN;
3218 }
3219 
3220 static enum rofferr
3221 roff_so(ROFF_ARGS)
3222 {
3223 	char *name, *cp;
3224 
3225 	name = buf->buf + pos;
3226 	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3227 
3228 	/*
3229 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3230 	 * opening anything that's not in our cwd or anything beneath
3231 	 * it.  Thus, explicitly disallow traversing up the file-system
3232 	 * or using absolute paths.
3233 	 */
3234 
3235 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3236 		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3237 		    ".so %s", name);
3238 		buf->sz = mandoc_asprintf(&cp,
3239 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3240 		free(buf->buf);
3241 		buf->buf = cp;
3242 		*offs = 0;
3243 		return ROFF_REPARSE;
3244 	}
3245 
3246 	*offs = pos;
3247 	return ROFF_SO;
3248 }
3249 
3250 /* --- user defined strings and macros ------------------------------------ */
3251 
3252 static enum rofferr
3253 roff_userdef(ROFF_ARGS)
3254 {
3255 	const char	 *arg[16], *ap;
3256 	char		 *cp, *n1, *n2;
3257 	int		  expand_count, i, ib, ie;
3258 	size_t		  asz, rsz;
3259 
3260 	/*
3261 	 * Collect pointers to macro argument strings
3262 	 * and NUL-terminate them.
3263 	 */
3264 
3265 	r->argc = 0;
3266 	cp = buf->buf + pos;
3267 	for (i = 0; i < 16; i++) {
3268 		if (*cp == '\0')
3269 			arg[i] = "";
3270 		else {
3271 			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3272 			r->argc = i + 1;
3273 		}
3274 	}
3275 
3276 	/*
3277 	 * Expand macro arguments.
3278 	 */
3279 
3280 	buf->sz = strlen(r->current_string) + 1;
3281 	n1 = n2 = cp = mandoc_malloc(buf->sz);
3282 	memcpy(n1, r->current_string, buf->sz);
3283 	expand_count = 0;
3284 	while (*cp != '\0') {
3285 
3286 		/* Scan ahead for the next argument invocation. */
3287 
3288 		if (*cp++ != '\\')
3289 			continue;
3290 		if (*cp++ != '$')
3291 			continue;
3292 		if (*cp == '*') {  /* \\$* inserts all arguments */
3293 			ib = 0;
3294 			ie = r->argc - 1;
3295 		} else {  /* \\$1 .. \\$9 insert one argument */
3296 			ib = ie = *cp - '1';
3297 			if (ib < 0 || ib > 8)
3298 				continue;
3299 		}
3300 		cp -= 2;
3301 
3302 		/*
3303 		 * Prevent infinite recursion.
3304 		 */
3305 
3306 		if (cp >= n2)
3307 			expand_count = 1;
3308 		else if (++expand_count > EXPAND_LIMIT) {
3309 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3310 			    ln, (int)(cp - n1), NULL);
3311 			free(buf->buf);
3312 			buf->buf = n1;
3313 			return ROFF_IGN;
3314 		}
3315 
3316 		/*
3317 		 * Determine the size of the expanded argument,
3318 		 * taking escaping of quotes into account.
3319 		 */
3320 
3321 		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3322 		for (i = ib; i <= ie; i++) {
3323 			for (ap = arg[i]; *ap != '\0'; ap++) {
3324 				asz++;
3325 				if (*ap == '"')
3326 					asz += 3;
3327 			}
3328 		}
3329 		if (asz != 3) {
3330 
3331 			/*
3332 			 * Determine the size of the rest of the
3333 			 * unexpanded macro, including the NUL.
3334 			 */
3335 
3336 			rsz = buf->sz - (cp - n1) - 3;
3337 
3338 			/*
3339 			 * When shrinking, move before
3340 			 * releasing the storage.
3341 			 */
3342 
3343 			if (asz < 3)
3344 				memmove(cp + asz, cp + 3, rsz);
3345 
3346 			/*
3347 			 * Resize the storage for the macro
3348 			 * and readjust the parse pointer.
3349 			 */
3350 
3351 			buf->sz += asz - 3;
3352 			n2 = mandoc_realloc(n1, buf->sz);
3353 			cp = n2 + (cp - n1);
3354 			n1 = n2;
3355 
3356 			/*
3357 			 * When growing, make room
3358 			 * for the expanded argument.
3359 			 */
3360 
3361 			if (asz > 3)
3362 				memmove(cp + asz, cp + 3, rsz);
3363 		}
3364 
3365 		/* Copy the expanded argument, escaping quotes. */
3366 
3367 		n2 = cp;
3368 		for (i = ib; i <= ie; i++) {
3369 			for (ap = arg[i]; *ap != '\0'; ap++) {
3370 				if (*ap == '"') {
3371 					memcpy(n2, "\\(dq", 4);
3372 					n2 += 4;
3373 				} else
3374 					*n2++ = *ap;
3375 			}
3376 			if (i < ie)
3377 				*n2++ = ' ';
3378 		}
3379 	}
3380 
3381 	/*
3382 	 * Replace the macro invocation
3383 	 * by the expanded macro.
3384 	 */
3385 
3386 	free(buf->buf);
3387 	buf->buf = n1;
3388 	*offs = 0;
3389 
3390 	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3391 	   ROFF_REPARSE : ROFF_APPEND;
3392 }
3393 
3394 /*
3395  * Calling a high-level macro that was renamed with .rn.
3396  * r->current_string has already been set up by roff_parse().
3397  */
3398 static enum rofferr
3399 roff_renamed(ROFF_ARGS)
3400 {
3401 	char	*nbuf;
3402 
3403 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3404 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3405 	free(buf->buf);
3406 	buf->buf = nbuf;
3407 	return ROFF_CONT;
3408 }
3409 
3410 static size_t
3411 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3412 {
3413 	char	 *name, *cp;
3414 	size_t	  namesz;
3415 
3416 	name = *cpp;
3417 	if ('\0' == *name)
3418 		return 0;
3419 
3420 	/* Read until end of name and terminate it with NUL. */
3421 	for (cp = name; 1; cp++) {
3422 		if ('\0' == *cp || ' ' == *cp) {
3423 			namesz = cp - name;
3424 			break;
3425 		}
3426 		if ('\\' != *cp)
3427 			continue;
3428 		namesz = cp - name;
3429 		if ('{' == cp[1] || '}' == cp[1])
3430 			break;
3431 		cp++;
3432 		if ('\\' == *cp)
3433 			continue;
3434 		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3435 		    "%.*s", (int)(cp - name + 1), name);
3436 		mandoc_escape((const char **)&cp, NULL, NULL);
3437 		break;
3438 	}
3439 
3440 	/* Read past spaces. */
3441 	while (' ' == *cp)
3442 		cp++;
3443 
3444 	*cpp = cp;
3445 	return namesz;
3446 }
3447 
3448 /*
3449  * Store *string into the user-defined string called *name.
3450  * To clear an existing entry, call with (*r, *name, NULL, 0).
3451  * append == 0: replace mode
3452  * append == 1: single-line append mode
3453  * append == 2: multiline append mode, append '\n' after each call
3454  */
3455 static void
3456 roff_setstr(struct roff *r, const char *name, const char *string,
3457 	int append)
3458 {
3459 	size_t	 namesz;
3460 
3461 	namesz = strlen(name);
3462 	roff_setstrn(&r->strtab, name, namesz, string,
3463 	    string ? strlen(string) : 0, append);
3464 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3465 }
3466 
3467 static void
3468 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3469 		const char *string, size_t stringsz, int append)
3470 {
3471 	struct roffkv	*n;
3472 	char		*c;
3473 	int		 i;
3474 	size_t		 oldch, newch;
3475 
3476 	/* Search for an existing string with the same name. */
3477 	n = *r;
3478 
3479 	while (n && (namesz != n->key.sz ||
3480 			strncmp(n->key.p, name, namesz)))
3481 		n = n->next;
3482 
3483 	if (NULL == n) {
3484 		/* Create a new string table entry. */
3485 		n = mandoc_malloc(sizeof(struct roffkv));
3486 		n->key.p = mandoc_strndup(name, namesz);
3487 		n->key.sz = namesz;
3488 		n->val.p = NULL;
3489 		n->val.sz = 0;
3490 		n->next = *r;
3491 		*r = n;
3492 	} else if (0 == append) {
3493 		free(n->val.p);
3494 		n->val.p = NULL;
3495 		n->val.sz = 0;
3496 	}
3497 
3498 	if (NULL == string)
3499 		return;
3500 
3501 	/*
3502 	 * One additional byte for the '\n' in multiline mode,
3503 	 * and one for the terminating '\0'.
3504 	 */
3505 	newch = stringsz + (1 < append ? 2u : 1u);
3506 
3507 	if (NULL == n->val.p) {
3508 		n->val.p = mandoc_malloc(newch);
3509 		*n->val.p = '\0';
3510 		oldch = 0;
3511 	} else {
3512 		oldch = n->val.sz;
3513 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3514 	}
3515 
3516 	/* Skip existing content in the destination buffer. */
3517 	c = n->val.p + (int)oldch;
3518 
3519 	/* Append new content to the destination buffer. */
3520 	i = 0;
3521 	while (i < (int)stringsz) {
3522 		/*
3523 		 * Rudimentary roff copy mode:
3524 		 * Handle escaped backslashes.
3525 		 */
3526 		if ('\\' == string[i] && '\\' == string[i + 1])
3527 			i++;
3528 		*c++ = string[i++];
3529 	}
3530 
3531 	/* Append terminating bytes. */
3532 	if (1 < append)
3533 		*c++ = '\n';
3534 
3535 	*c = '\0';
3536 	n->val.sz = (int)(c - n->val.p);
3537 }
3538 
3539 static const char *
3540 roff_getstrn(const struct roff *r, const char *name, size_t len,
3541     int *deftype)
3542 {
3543 	const struct roffkv	*n;
3544 	int			 i;
3545 	enum roff_tok		 tok;
3546 
3547 	if (*deftype & ROFFDEF_USER) {
3548 		for (n = r->strtab; n != NULL; n = n->next) {
3549 			if (strncmp(name, n->key.p, len) == 0 &&
3550 			    n->key.p[len] == '\0' &&
3551 			    n->val.p != NULL) {
3552 				*deftype = ROFFDEF_USER;
3553 				return n->val.p;
3554 			}
3555 		}
3556 	}
3557 	if (*deftype & ROFFDEF_PRE) {
3558 		for (i = 0; i < PREDEFS_MAX; i++) {
3559 			if (strncmp(name, predefs[i].name, len) == 0 &&
3560 			    predefs[i].name[len] == '\0') {
3561 				*deftype = ROFFDEF_PRE;
3562 				return predefs[i].str;
3563 			}
3564 		}
3565 	}
3566 	if (*deftype & ROFFDEF_REN) {
3567 		for (n = r->rentab; n != NULL; n = n->next) {
3568 			if (strncmp(name, n->key.p, len) == 0 &&
3569 			    n->key.p[len] == '\0' &&
3570 			    n->val.p != NULL) {
3571 				*deftype = ROFFDEF_REN;
3572 				return n->val.p;
3573 			}
3574 		}
3575 	}
3576 	if (*deftype & ROFFDEF_STD) {
3577 		if (r->man->macroset != MACROSET_MAN) {
3578 			for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3579 				if (strncmp(name, roff_name[tok], len) == 0 &&
3580 				    roff_name[tok][len] == '\0') {
3581 					*deftype = ROFFDEF_STD;
3582 					return NULL;
3583 				}
3584 			}
3585 		}
3586 		if (r->man->macroset != MACROSET_MDOC) {
3587 			for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3588 				if (strncmp(name, roff_name[tok], len) == 0 &&
3589 				    roff_name[tok][len] == '\0') {
3590 					*deftype = ROFFDEF_STD;
3591 					return NULL;
3592 				}
3593 			}
3594 		}
3595 	}
3596 	*deftype = 0;
3597 	return NULL;
3598 }
3599 
3600 static void
3601 roff_freestr(struct roffkv *r)
3602 {
3603 	struct roffkv	 *n, *nn;
3604 
3605 	for (n = r; n; n = nn) {
3606 		free(n->key.p);
3607 		free(n->val.p);
3608 		nn = n->next;
3609 		free(n);
3610 	}
3611 }
3612 
3613 /* --- accessors and utility functions ------------------------------------ */
3614 
3615 /*
3616  * Duplicate an input string, making the appropriate character
3617  * conversations (as stipulated by `tr') along the way.
3618  * Returns a heap-allocated string with all the replacements made.
3619  */
3620 char *
3621 roff_strdup(const struct roff *r, const char *p)
3622 {
3623 	const struct roffkv *cp;
3624 	char		*res;
3625 	const char	*pp;
3626 	size_t		 ssz, sz;
3627 	enum mandoc_esc	 esc;
3628 
3629 	if (NULL == r->xmbtab && NULL == r->xtab)
3630 		return mandoc_strdup(p);
3631 	else if ('\0' == *p)
3632 		return mandoc_strdup("");
3633 
3634 	/*
3635 	 * Step through each character looking for term matches
3636 	 * (remember that a `tr' can be invoked with an escape, which is
3637 	 * a glyph but the escape is multi-character).
3638 	 * We only do this if the character hash has been initialised
3639 	 * and the string is >0 length.
3640 	 */
3641 
3642 	res = NULL;
3643 	ssz = 0;
3644 
3645 	while ('\0' != *p) {
3646 		assert((unsigned int)*p < 128);
3647 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3648 			sz = r->xtab[(int)*p].sz;
3649 			res = mandoc_realloc(res, ssz + sz + 1);
3650 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3651 			ssz += sz;
3652 			p++;
3653 			continue;
3654 		} else if ('\\' != *p) {
3655 			res = mandoc_realloc(res, ssz + 2);
3656 			res[ssz++] = *p++;
3657 			continue;
3658 		}
3659 
3660 		/* Search for term matches. */
3661 		for (cp = r->xmbtab; cp; cp = cp->next)
3662 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3663 				break;
3664 
3665 		if (NULL != cp) {
3666 			/*
3667 			 * A match has been found.
3668 			 * Append the match to the array and move
3669 			 * forward by its keysize.
3670 			 */
3671 			res = mandoc_realloc(res,
3672 			    ssz + cp->val.sz + 1);
3673 			memcpy(res + ssz, cp->val.p, cp->val.sz);
3674 			ssz += cp->val.sz;
3675 			p += (int)cp->key.sz;
3676 			continue;
3677 		}
3678 
3679 		/*
3680 		 * Handle escapes carefully: we need to copy
3681 		 * over just the escape itself, or else we might
3682 		 * do replacements within the escape itself.
3683 		 * Make sure to pass along the bogus string.
3684 		 */
3685 		pp = p++;
3686 		esc = mandoc_escape(&p, NULL, NULL);
3687 		if (ESCAPE_ERROR == esc) {
3688 			sz = strlen(pp);
3689 			res = mandoc_realloc(res, ssz + sz + 1);
3690 			memcpy(res + ssz, pp, sz);
3691 			break;
3692 		}
3693 		/*
3694 		 * We bail out on bad escapes.
3695 		 * No need to warn: we already did so when
3696 		 * roff_res() was called.
3697 		 */
3698 		sz = (int)(p - pp);
3699 		res = mandoc_realloc(res, ssz + sz + 1);
3700 		memcpy(res + ssz, pp, sz);
3701 		ssz += sz;
3702 	}
3703 
3704 	res[(int)ssz] = '\0';
3705 	return res;
3706 }
3707 
3708 int
3709 roff_getformat(const struct roff *r)
3710 {
3711 
3712 	return r->format;
3713 }
3714 
3715 /*
3716  * Find out whether a line is a macro line or not.
3717  * If it is, adjust the current position and return one; if it isn't,
3718  * return zero and don't change the current position.
3719  * If the control character has been set with `.cc', then let that grain
3720  * precedence.
3721  * This is slighly contrary to groff, where using the non-breaking
3722  * control character when `cc' has been invoked will cause the
3723  * non-breaking macro contents to be printed verbatim.
3724  */
3725 int
3726 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3727 {
3728 	int		pos;
3729 
3730 	pos = *ppos;
3731 
3732 	if (r->control != '\0' && cp[pos] == r->control)
3733 		pos++;
3734 	else if (r->control != '\0')
3735 		return 0;
3736 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3737 		pos += 2;
3738 	else if ('.' == cp[pos] || '\'' == cp[pos])
3739 		pos++;
3740 	else
3741 		return 0;
3742 
3743 	while (' ' == cp[pos] || '\t' == cp[pos])
3744 		pos++;
3745 
3746 	*ppos = pos;
3747 	return 1;
3748 }
3749