xref: /illumos-gate/usr/src/cmd/mandoc/roff.c (revision da37308cbe221e77b90426b63bf79a8b67c490b0)
1 /* $Id: roff.c,v 1.378 2021/08/10 12:55:04 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42 
43 /*
44  * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45  * that an escape sequence resulted from copy-in processing and
46  * needs to be checked or interpolated.  As it is used nowhere
47  * else, it is defined here rather than in a header file.
48  */
49 #define	ASCII_ESC	27
50 
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define	EXPAND_LIMIT	1000
53 
54 /* Types of definitions of macros and strings. */
55 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
56 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
57 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
58 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
59 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
60 			 ROFFDEF_REN | ROFFDEF_STD)
61 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
62 
63 /* --- data types --------------------------------------------------------- */
64 
65 /*
66  * An incredibly-simple string buffer.
67  */
68 struct	roffstr {
69 	char		*p; /* nil-terminated buffer */
70 	size_t		 sz; /* saved strlen(p) */
71 };
72 
73 /*
74  * A key-value roffstr pair as part of a singly-linked list.
75  */
76 struct	roffkv {
77 	struct roffstr	 key;
78 	struct roffstr	 val;
79 	struct roffkv	*next; /* next in list */
80 };
81 
82 /*
83  * A single number register as part of a singly-linked list.
84  */
85 struct	roffreg {
86 	struct roffstr	 key;
87 	int		 val;
88 	int		 step;
89 	struct roffreg	*next;
90 };
91 
92 /*
93  * Association of request and macro names with token IDs.
94  */
95 struct	roffreq {
96 	enum roff_tok	 tok;
97 	char		 name[];
98 };
99 
100 /*
101  * A macro processing context.
102  * More than one is needed when macro calls are nested.
103  */
104 struct	mctx {
105 	char		**argv;
106 	int		 argc;
107 	int		 argsz;
108 };
109 
110 struct	roff {
111 	struct roff_man	*man; /* mdoc or man parser */
112 	struct roffnode	*last; /* leaf of stack */
113 	struct mctx	*mstack; /* stack of macro contexts */
114 	int		*rstack; /* stack of inverted `ie' values */
115 	struct ohash	*reqtab; /* request lookup table */
116 	struct roffreg	*regtab; /* number registers */
117 	struct roffkv	*strtab; /* user-defined strings & macros */
118 	struct roffkv	*rentab; /* renamed strings & macros */
119 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
120 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
121 	const char	*current_string; /* value of last called user macro */
122 	struct tbl_node	*first_tbl; /* first table parsed */
123 	struct tbl_node	*last_tbl; /* last table parsed */
124 	struct tbl_node	*tbl; /* current table being parsed */
125 	struct eqn_node	*last_eqn; /* equation parser */
126 	struct eqn_node	*eqn; /* active equation parser */
127 	int		 eqn_inline; /* current equation is inline */
128 	int		 options; /* parse options */
129 	int		 mstacksz; /* current size of mstack */
130 	int		 mstackpos; /* position in mstack */
131 	int		 rstacksz; /* current size limit of rstack */
132 	int		 rstackpos; /* position in rstack */
133 	int		 format; /* current file in mdoc or man format */
134 	char		 control; /* control character */
135 	char		 escape; /* escape character */
136 };
137 
138 /*
139  * A macro definition, condition, or ignored block.
140  */
141 struct	roffnode {
142 	enum roff_tok	 tok; /* type of node */
143 	struct roffnode	*parent; /* up one in stack */
144 	int		 line; /* parse line */
145 	int		 col; /* parse col */
146 	char		*name; /* node name, e.g. macro name */
147 	char		*end; /* custom end macro of the block */
148 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
149 	int		 rule; /* content is: 1=evaluated 0=skipped */
150 };
151 
152 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
153 			 enum roff_tok tok, /* tok of macro */ \
154 			 struct buf *buf, /* input buffer */ \
155 			 int ln, /* parse line */ \
156 			 int ppos, /* original pos in buffer */ \
157 			 int pos, /* current pos in buffer */ \
158 			 int *offs /* reset offset of buffer data */
159 
160 typedef	int (*roffproc)(ROFF_ARGS);
161 
162 struct	roffmac {
163 	roffproc	 proc; /* process new macro */
164 	roffproc	 text; /* process as child text of macro */
165 	roffproc	 sub; /* process as child of macro */
166 	int		 flags;
167 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
168 };
169 
170 struct	predef {
171 	const char	*name; /* predefined input name */
172 	const char	*str; /* replacement symbol */
173 };
174 
175 #define	PREDEF(__name, __str) \
176 	{ (__name), (__str) },
177 
178 /* --- function prototypes ------------------------------------------------ */
179 
180 static	int		 roffnode_cleanscope(struct roff *);
181 static	int		 roffnode_pop(struct roff *);
182 static	void		 roffnode_push(struct roff *, enum roff_tok,
183 				const char *, int, int);
184 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static	int		 roff_als(ROFF_ARGS);
186 static	int		 roff_block(ROFF_ARGS);
187 static	int		 roff_block_text(ROFF_ARGS);
188 static	int		 roff_block_sub(ROFF_ARGS);
189 static	int		 roff_break(ROFF_ARGS);
190 static	int		 roff_cblock(ROFF_ARGS);
191 static	int		 roff_cc(ROFF_ARGS);
192 static	int		 roff_ccond(struct roff *, int, int);
193 static	int		 roff_char(ROFF_ARGS);
194 static	int		 roff_cond(ROFF_ARGS);
195 static	int		 roff_cond_checkend(ROFF_ARGS);
196 static	int		 roff_cond_text(ROFF_ARGS);
197 static	int		 roff_cond_sub(ROFF_ARGS);
198 static	int		 roff_ds(ROFF_ARGS);
199 static	int		 roff_ec(ROFF_ARGS);
200 static	int		 roff_eo(ROFF_ARGS);
201 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
202 static	int		 roff_evalcond(struct roff *, int, char *, int *);
203 static	int		 roff_evalnum(struct roff *, int,
204 				const char *, int *, int *, int);
205 static	int		 roff_evalpar(struct roff *, int,
206 				const char *, int *, int *, int);
207 static	int		 roff_evalstrcond(const char *, int *);
208 static	int		 roff_expand(struct roff *, struct buf *,
209 				int, int, char);
210 static	void		 roff_free1(struct roff *);
211 static	void		 roff_freereg(struct roffreg *);
212 static	void		 roff_freestr(struct roffkv *);
213 static	size_t		 roff_getname(struct roff *, char **, int, int);
214 static	int		 roff_getnum(const char *, int *, int *, int);
215 static	int		 roff_getop(const char *, int *, char *);
216 static	int		 roff_getregn(struct roff *,
217 				const char *, size_t, char);
218 static	int		 roff_getregro(const struct roff *,
219 				const char *name);
220 static	const char	*roff_getstrn(struct roff *,
221 				const char *, size_t, int *);
222 static	int		 roff_hasregn(const struct roff *,
223 				const char *, size_t);
224 static	int		 roff_insec(ROFF_ARGS);
225 static	int		 roff_it(ROFF_ARGS);
226 static	int		 roff_line_ignore(ROFF_ARGS);
227 static	void		 roff_man_alloc1(struct roff_man *);
228 static	void		 roff_man_free1(struct roff_man *);
229 static	int		 roff_manyarg(ROFF_ARGS);
230 static	int		 roff_noarg(ROFF_ARGS);
231 static	int		 roff_nop(ROFF_ARGS);
232 static	int		 roff_nr(ROFF_ARGS);
233 static	int		 roff_onearg(ROFF_ARGS);
234 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
235 				int, int);
236 static	int		 roff_parsetext(struct roff *, struct buf *,
237 				int, int *);
238 static	int		 roff_renamed(ROFF_ARGS);
239 static	int		 roff_return(ROFF_ARGS);
240 static	int		 roff_rm(ROFF_ARGS);
241 static	int		 roff_rn(ROFF_ARGS);
242 static	int		 roff_rr(ROFF_ARGS);
243 static	void		 roff_setregn(struct roff *, const char *,
244 				size_t, int, char, int);
245 static	void		 roff_setstr(struct roff *,
246 				const char *, const char *, int);
247 static	void		 roff_setstrn(struct roffkv **, const char *,
248 				size_t, const char *, size_t, int);
249 static	int		 roff_shift(ROFF_ARGS);
250 static	int		 roff_so(ROFF_ARGS);
251 static	int		 roff_tr(ROFF_ARGS);
252 static	int		 roff_Dd(ROFF_ARGS);
253 static	int		 roff_TE(ROFF_ARGS);
254 static	int		 roff_TS(ROFF_ARGS);
255 static	int		 roff_EQ(ROFF_ARGS);
256 static	int		 roff_EN(ROFF_ARGS);
257 static	int		 roff_T_(ROFF_ARGS);
258 static	int		 roff_unsupp(ROFF_ARGS);
259 static	int		 roff_userdef(ROFF_ARGS);
260 
261 /* --- constant data ------------------------------------------------------ */
262 
263 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
264 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
265 
266 const char *__roff_name[MAN_MAX + 1] = {
267 	"br",		"ce",		"fi",		"ft",
268 	"ll",		"mc",		"nf",
269 	"po",		"rj",		"sp",
270 	"ta",		"ti",		NULL,
271 	"ab",		"ad",		"af",		"aln",
272 	"als",		"am",		"am1",		"ami",
273 	"ami1",		"as",		"as1",		"asciify",
274 	"backtrace",	"bd",		"bleedat",	"blm",
275         "box",		"boxa",		"bp",		"BP",
276 	"break",	"breakchar",	"brnl",		"brp",
277 	"brpnl",	"c2",		"cc",
278 	"cf",		"cflags",	"ch",		"char",
279 	"chop",		"class",	"close",	"CL",
280 	"color",	"composite",	"continue",	"cp",
281 	"cropat",	"cs",		"cu",		"da",
282 	"dch",		"Dd",		"de",		"de1",
283 	"defcolor",	"dei",		"dei1",		"device",
284 	"devicem",	"di",		"do",		"ds",
285 	"ds1",		"dwh",		"dt",		"ec",
286 	"ecr",		"ecs",		"el",		"em",
287 	"EN",		"eo",		"EP",		"EQ",
288 	"errprint",	"ev",		"evc",		"ex",
289 	"fallback",	"fam",		"fc",		"fchar",
290 	"fcolor",	"fdeferlig",	"feature",	"fkern",
291 	"fl",		"flig",		"fp",		"fps",
292 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
293 	"fzoom",	"gcolor",	"hc",		"hcode",
294 	"hidechar",	"hla",		"hlm",		"hpf",
295 	"hpfa",		"hpfcode",	"hw",		"hy",
296 	"hylang",	"hylen",	"hym",		"hypp",
297 	"hys",		"ie",		"if",		"ig",
298 	"index",	"it",		"itc",		"IX",
299 	"kern",		"kernafter",	"kernbefore",	"kernpair",
300 	"lc",		"lc_ctype",	"lds",		"length",
301 	"letadj",	"lf",		"lg",		"lhang",
302 	"linetabs",	"lnr",		"lnrf",		"lpfx",
303 	"ls",		"lsm",		"lt",
304 	"mediasize",	"minss",	"mk",		"mso",
305 	"na",		"ne",		"nh",		"nhychar",
306 	"nm",		"nn",		"nop",		"nr",
307 	"nrf",		"nroff",	"ns",		"nx",
308 	"open",		"opena",	"os",		"output",
309 	"padj",		"papersize",	"pc",		"pev",
310 	"pi",		"PI",		"pl",		"pm",
311 	"pn",		"pnr",		"ps",
312 	"psbb",		"pshape",	"pso",		"ptr",
313 	"pvs",		"rchar",	"rd",		"recursionlimit",
314 	"return",	"rfschar",	"rhang",
315 	"rm",		"rn",		"rnn",		"rr",
316 	"rs",		"rt",		"schar",	"sentchar",
317 	"shc",		"shift",	"sizes",	"so",
318 	"spacewidth",	"special",	"spreadwarn",	"ss",
319 	"sty",		"substring",	"sv",		"sy",
320 	"T&",		"tc",		"TE",
321 	"TH",		"tkf",		"tl",
322 	"tm",		"tm1",		"tmc",		"tr",
323 	"track",	"transchar",	"trf",		"trimat",
324 	"trin",		"trnt",		"troff",	"TS",
325 	"uf",		"ul",		"unformat",	"unwatch",
326 	"unwatchn",	"vpt",		"vs",		"warn",
327 	"warnscale",	"watch",	"watchlength",	"watchn",
328 	"wh",		"while",	"write",	"writec",
329 	"writem",	"xflag",	".",		NULL,
330 	NULL,		"text",
331 	"Dd",		"Dt",		"Os",		"Sh",
332 	"Ss",		"Pp",		"D1",		"Dl",
333 	"Bd",		"Ed",		"Bl",		"El",
334 	"It",		"Ad",		"An",		"Ap",
335 	"Ar",		"Cd",		"Cm",		"Dv",
336 	"Er",		"Ev",		"Ex",		"Fa",
337 	"Fd",		"Fl",		"Fn",		"Ft",
338 	"Ic",		"In",		"Li",		"Nd",
339 	"Nm",		"Op",		"Ot",		"Pa",
340 	"Rv",		"St",		"Va",		"Vt",
341 	"Xr",		"%A",		"%B",		"%D",
342 	"%I",		"%J",		"%N",		"%O",
343 	"%P",		"%R",		"%T",		"%V",
344 	"Ac",		"Ao",		"Aq",		"At",
345 	"Bc",		"Bf",		"Bo",		"Bq",
346 	"Bsx",		"Bx",		"Db",		"Dc",
347 	"Do",		"Dq",		"Ec",		"Ef",
348 	"Em",		"Eo",		"Fx",		"Ms",
349 	"No",		"Ns",		"Nx",		"Ox",
350 	"Pc",		"Pf",		"Po",		"Pq",
351 	"Qc",		"Ql",		"Qo",		"Qq",
352 	"Re",		"Rs",		"Sc",		"So",
353 	"Sq",		"Sm",		"Sx",		"Sy",
354 	"Tn",		"Ux",		"Xc",		"Xo",
355 	"Fo",		"Fc",		"Oo",		"Oc",
356 	"Bk",		"Ek",		"Bt",		"Hf",
357 	"Fr",		"Ud",		"Lb",		"Lp",
358 	"Lk",		"Mt",		"Brq",		"Bro",
359 	"Brc",		"%C",		"Es",		"En",
360 	"Dx",		"%Q",		"%U",		"Ta",
361 	"Tg",		NULL,
362 	"TH",		"SH",		"SS",		"TP",
363 	"TQ",
364 	"LP",		"PP",		"P",		"IP",
365 	"HP",		"SM",		"SB",		"BI",
366 	"IB",		"BR",		"RB",		"R",
367 	"B",		"I",		"IR",		"RI",
368 	"RE",		"RS",		"DT",		"UC",
369 	"PD",		"AT",		"in",
370 	"SY",		"YS",		"OP",
371 	"EX",		"EE",		"UR",
372 	"UE",		"MT",		"ME",		NULL
373 };
374 const	char *const *roff_name = __roff_name;
375 
376 static	struct roffmac	 roffs[TOKEN_NONE] = {
377 	{ roff_noarg, NULL, NULL, 0 },  /* br */
378 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
379 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
380 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
381 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
382 	{ roff_onearg, NULL, NULL, 0 },  /* mc */
383 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
384 	{ roff_onearg, NULL, NULL, 0 },  /* po */
385 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
386 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
387 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
388 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
389 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
390 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
391 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
392 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
393 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
394 	{ roff_als, NULL, NULL, 0 },  /* als */
395 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
396 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
397 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
398 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
399 	{ roff_ds, NULL, NULL, 0 },  /* as */
400 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
401 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
402 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
403 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
404 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
405 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
406 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
407 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
408 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
409 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
410 	{ roff_break, NULL, NULL, 0 },  /* break */
411 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
412 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
413 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
414 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
415 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
416 	{ roff_cc, NULL, NULL, 0 },  /* cc */
417 	{ roff_insec, NULL, NULL, 0 },  /* cf */
418 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
419 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
420 	{ roff_char, NULL, NULL, 0 },  /* char */
421 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
422 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
423 	{ roff_insec, NULL, NULL, 0 },  /* close */
424 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
425 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
426 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
427 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
428 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
429 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
430 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
431 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
432 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
433 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
434 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
435 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
436 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
437 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
438 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
439 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
440 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
441 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
442 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
443 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
444 	{ roff_ds, NULL, NULL, 0 },  /* ds */
445 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
446 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
447 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
448 	{ roff_ec, NULL, NULL, 0 },  /* ec */
449 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
450 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
451 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
452 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
453 	{ roff_EN, NULL, NULL, 0 },  /* EN */
454 	{ roff_eo, NULL, NULL, 0 },  /* eo */
455 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
456 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
457 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
458 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
459 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
460 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
463 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
464 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
473 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
489 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
490 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
491 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
492 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
493 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
494 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
495 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
496 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
497 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
498 	{ roff_it, NULL, NULL, 0 },  /* it */
499 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
500 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
501 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
502 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
503 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
504 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
505 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
506 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
507 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
508 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
509 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
510 	{ roff_insec, NULL, NULL, 0 },  /* lf */
511 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
512 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
513 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
514 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
515 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
516 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
518 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
522 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
523 	{ roff_insec, NULL, NULL, 0 },  /* mso */
524 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
525 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
526 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
527 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
528 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
529 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
530 	{ roff_nop, NULL, NULL, 0 },  /* nop */
531 	{ roff_nr, NULL, NULL, 0 },  /* nr */
532 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
533 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
534 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
535 	{ roff_insec, NULL, NULL, 0 },  /* nx */
536 	{ roff_insec, NULL, NULL, 0 },  /* open */
537 	{ roff_insec, NULL, NULL, 0 },  /* opena */
538 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
539 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
544 	{ roff_insec, NULL, NULL, 0 },  /* pi */
545 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
546 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
547 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
548 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
550 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
551 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
552 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
553 	{ roff_insec, NULL, NULL, 0 },  /* pso */
554 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
555 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
556 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
557 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
558 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
559 	{ roff_return, NULL, NULL, 0 },  /* return */
560 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
561 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
562 	{ roff_rm, NULL, NULL, 0 },  /* rm */
563 	{ roff_rn, NULL, NULL, 0 },  /* rn */
564 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
565 	{ roff_rr, NULL, NULL, 0 },  /* rr */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
568 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
571 	{ roff_shift, NULL, NULL, 0 },  /* shift */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
573 	{ roff_so, NULL, NULL, 0 },  /* so */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
575 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
576 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
577 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
578 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
579 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
580 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
581 	{ roff_insec, NULL, NULL, 0 },  /* sy */
582 	{ roff_T_, NULL, NULL, 0 },  /* T& */
583 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
584 	{ roff_TE, NULL, NULL, 0 },  /* TE */
585 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
586 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
587 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
588 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
589 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
590 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
591 	{ roff_tr, NULL, NULL, 0 },  /* tr */
592 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
593 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
594 	{ roff_insec, NULL, NULL, 0 },  /* trf */
595 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
596 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
597 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
598 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
599 	{ roff_TS, NULL, NULL, 0 },  /* TS */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
602 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
606 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
607 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
608 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
609 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
610 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
611 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
612 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
613 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
614 	{ roff_insec, NULL, NULL, 0 },  /* write */
615 	{ roff_insec, NULL, NULL, 0 },  /* writec */
616 	{ roff_insec, NULL, NULL, 0 },  /* writem */
617 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
618 	{ roff_cblock, NULL, NULL, 0 },  /* . */
619 	{ roff_renamed, NULL, NULL, 0 },
620 	{ roff_userdef, NULL, NULL, 0 }
621 };
622 
623 /* Array of injected predefined strings. */
624 #define	PREDEFS_MAX	 38
625 static	const struct predef predefs[PREDEFS_MAX] = {
626 #include "predefs.in"
627 };
628 
629 static	int	 roffce_lines;	/* number of input lines to center */
630 static	struct roff_node *roffce_node;  /* active request */
631 static	int	 roffit_lines;  /* number of lines to delay */
632 static	char	*roffit_macro;  /* nil-terminated macro line */
633 
634 
635 /* --- request table ------------------------------------------------------ */
636 
637 struct ohash *
638 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
639 {
640 	struct ohash	*htab;
641 	struct roffreq	*req;
642 	enum roff_tok	 tok;
643 	size_t		 sz;
644 	unsigned int	 slot;
645 
646 	htab = mandoc_malloc(sizeof(*htab));
647 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
648 
649 	for (tok = mintok; tok < maxtok; tok++) {
650 		if (roff_name[tok] == NULL)
651 			continue;
652 		sz = strlen(roff_name[tok]);
653 		req = mandoc_malloc(sizeof(*req) + sz + 1);
654 		req->tok = tok;
655 		memcpy(req->name, roff_name[tok], sz + 1);
656 		slot = ohash_qlookup(htab, req->name);
657 		ohash_insert(htab, slot, req);
658 	}
659 	return htab;
660 }
661 
662 void
663 roffhash_free(struct ohash *htab)
664 {
665 	struct roffreq	*req;
666 	unsigned int	 slot;
667 
668 	if (htab == NULL)
669 		return;
670 	for (req = ohash_first(htab, &slot); req != NULL;
671 	     req = ohash_next(htab, &slot))
672 		free(req);
673 	ohash_delete(htab);
674 	free(htab);
675 }
676 
677 enum roff_tok
678 roffhash_find(struct ohash *htab, const char *name, size_t sz)
679 {
680 	struct roffreq	*req;
681 	const char	*end;
682 
683 	if (sz) {
684 		end = name + sz;
685 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
686 	} else
687 		req = ohash_find(htab, ohash_qlookup(htab, name));
688 	return req == NULL ? TOKEN_NONE : req->tok;
689 }
690 
691 /* --- stack of request blocks -------------------------------------------- */
692 
693 /*
694  * Pop the current node off of the stack of roff instructions currently
695  * pending.  Return 1 if it is a loop or 0 otherwise.
696  */
697 static int
698 roffnode_pop(struct roff *r)
699 {
700 	struct roffnode	*p;
701 	int		 inloop;
702 
703 	p = r->last;
704 	inloop = p->tok == ROFF_while;
705 	r->last = p->parent;
706 	free(p->name);
707 	free(p->end);
708 	free(p);
709 	return inloop;
710 }
711 
712 /*
713  * Push a roff node onto the instruction stack.  This must later be
714  * removed with roffnode_pop().
715  */
716 static void
717 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
718 		int line, int col)
719 {
720 	struct roffnode	*p;
721 
722 	p = mandoc_calloc(1, sizeof(struct roffnode));
723 	p->tok = tok;
724 	if (name)
725 		p->name = mandoc_strdup(name);
726 	p->parent = r->last;
727 	p->line = line;
728 	p->col = col;
729 	p->rule = p->parent ? p->parent->rule : 0;
730 
731 	r->last = p;
732 }
733 
734 /* --- roff parser state data management ---------------------------------- */
735 
736 static void
737 roff_free1(struct roff *r)
738 {
739 	int		 i;
740 
741 	tbl_free(r->first_tbl);
742 	r->first_tbl = r->last_tbl = r->tbl = NULL;
743 
744 	eqn_free(r->last_eqn);
745 	r->last_eqn = r->eqn = NULL;
746 
747 	while (r->mstackpos >= 0)
748 		roff_userret(r);
749 
750 	while (r->last)
751 		roffnode_pop(r);
752 
753 	free (r->rstack);
754 	r->rstack = NULL;
755 	r->rstacksz = 0;
756 	r->rstackpos = -1;
757 
758 	roff_freereg(r->regtab);
759 	r->regtab = NULL;
760 
761 	roff_freestr(r->strtab);
762 	roff_freestr(r->rentab);
763 	roff_freestr(r->xmbtab);
764 	r->strtab = r->rentab = r->xmbtab = NULL;
765 
766 	if (r->xtab)
767 		for (i = 0; i < 128; i++)
768 			free(r->xtab[i].p);
769 	free(r->xtab);
770 	r->xtab = NULL;
771 }
772 
773 void
774 roff_reset(struct roff *r)
775 {
776 	roff_free1(r);
777 	r->options |= MPARSE_COMMENT;
778 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
779 	r->control = '\0';
780 	r->escape = '\\';
781 	roffce_lines = 0;
782 	roffce_node = NULL;
783 	roffit_lines = 0;
784 	roffit_macro = NULL;
785 }
786 
787 void
788 roff_free(struct roff *r)
789 {
790 	int		 i;
791 
792 	roff_free1(r);
793 	for (i = 0; i < r->mstacksz; i++)
794 		free(r->mstack[i].argv);
795 	free(r->mstack);
796 	roffhash_free(r->reqtab);
797 	free(r);
798 }
799 
800 struct roff *
801 roff_alloc(int options)
802 {
803 	struct roff	*r;
804 
805 	r = mandoc_calloc(1, sizeof(struct roff));
806 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
807 	r->options = options | MPARSE_COMMENT;
808 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
809 	r->mstackpos = -1;
810 	r->rstackpos = -1;
811 	r->escape = '\\';
812 	return r;
813 }
814 
815 /* --- syntax tree state data management ---------------------------------- */
816 
817 static void
818 roff_man_free1(struct roff_man *man)
819 {
820 	if (man->meta.first != NULL)
821 		roff_node_delete(man, man->meta.first);
822 	free(man->meta.msec);
823 	free(man->meta.vol);
824 	free(man->meta.os);
825 	free(man->meta.arch);
826 	free(man->meta.title);
827 	free(man->meta.name);
828 	free(man->meta.date);
829 	free(man->meta.sodest);
830 }
831 
832 void
833 roff_state_reset(struct roff_man *man)
834 {
835 	man->last = man->meta.first;
836 	man->last_es = NULL;
837 	man->flags = 0;
838 	man->lastsec = man->lastnamed = SEC_NONE;
839 	man->next = ROFF_NEXT_CHILD;
840 	roff_setreg(man->roff, "nS", 0, '=');
841 }
842 
843 static void
844 roff_man_alloc1(struct roff_man *man)
845 {
846 	memset(&man->meta, 0, sizeof(man->meta));
847 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
848 	man->meta.first->type = ROFFT_ROOT;
849 	man->meta.macroset = MACROSET_NONE;
850 	roff_state_reset(man);
851 }
852 
853 void
854 roff_man_reset(struct roff_man *man)
855 {
856 	roff_man_free1(man);
857 	roff_man_alloc1(man);
858 }
859 
860 void
861 roff_man_free(struct roff_man *man)
862 {
863 	roff_man_free1(man);
864 	free(man);
865 }
866 
867 struct roff_man *
868 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
869 {
870 	struct roff_man *man;
871 
872 	man = mandoc_calloc(1, sizeof(*man));
873 	man->roff = roff;
874 	man->os_s = os_s;
875 	man->quick = quick;
876 	roff_man_alloc1(man);
877 	roff->man = man;
878 	return man;
879 }
880 
881 /* --- syntax tree handling ----------------------------------------------- */
882 
883 struct roff_node *
884 roff_node_alloc(struct roff_man *man, int line, int pos,
885 	enum roff_type type, int tok)
886 {
887 	struct roff_node	*n;
888 
889 	n = mandoc_calloc(1, sizeof(*n));
890 	n->line = line;
891 	n->pos = pos;
892 	n->tok = tok;
893 	n->type = type;
894 	n->sec = man->lastsec;
895 
896 	if (man->flags & MDOC_SYNOPSIS)
897 		n->flags |= NODE_SYNPRETTY;
898 	else
899 		n->flags &= ~NODE_SYNPRETTY;
900 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
901 		n->flags |= NODE_NOFILL;
902 	else
903 		n->flags &= ~NODE_NOFILL;
904 	if (man->flags & MDOC_NEWLINE)
905 		n->flags |= NODE_LINE;
906 	man->flags &= ~MDOC_NEWLINE;
907 
908 	return n;
909 }
910 
911 void
912 roff_node_append(struct roff_man *man, struct roff_node *n)
913 {
914 
915 	switch (man->next) {
916 	case ROFF_NEXT_SIBLING:
917 		if (man->last->next != NULL) {
918 			n->next = man->last->next;
919 			man->last->next->prev = n;
920 		} else
921 			man->last->parent->last = n;
922 		man->last->next = n;
923 		n->prev = man->last;
924 		n->parent = man->last->parent;
925 		break;
926 	case ROFF_NEXT_CHILD:
927 		if (man->last->child != NULL) {
928 			n->next = man->last->child;
929 			man->last->child->prev = n;
930 		} else
931 			man->last->last = n;
932 		man->last->child = n;
933 		n->parent = man->last;
934 		break;
935 	default:
936 		abort();
937 	}
938 	man->last = n;
939 
940 	switch (n->type) {
941 	case ROFFT_HEAD:
942 		n->parent->head = n;
943 		break;
944 	case ROFFT_BODY:
945 		if (n->end != ENDBODY_NOT)
946 			return;
947 		n->parent->body = n;
948 		break;
949 	case ROFFT_TAIL:
950 		n->parent->tail = n;
951 		break;
952 	default:
953 		return;
954 	}
955 
956 	/*
957 	 * Copy over the normalised-data pointer of our parent.  Not
958 	 * everybody has one, but copying a null pointer is fine.
959 	 */
960 
961 	n->norm = n->parent->norm;
962 	assert(n->parent->type == ROFFT_BLOCK);
963 }
964 
965 void
966 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
967 {
968 	struct roff_node	*n;
969 
970 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
971 	n->string = roff_strdup(man->roff, word);
972 	roff_node_append(man, n);
973 	n->flags |= NODE_VALID | NODE_ENDED;
974 	man->next = ROFF_NEXT_SIBLING;
975 }
976 
977 void
978 roff_word_append(struct roff_man *man, const char *word)
979 {
980 	struct roff_node	*n;
981 	char			*addstr, *newstr;
982 
983 	n = man->last;
984 	addstr = roff_strdup(man->roff, word);
985 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
986 	free(addstr);
987 	free(n->string);
988 	n->string = newstr;
989 	man->next = ROFF_NEXT_SIBLING;
990 }
991 
992 void
993 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
994 {
995 	struct roff_node	*n;
996 
997 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
998 	roff_node_append(man, n);
999 	man->next = ROFF_NEXT_CHILD;
1000 }
1001 
1002 struct roff_node *
1003 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1004 {
1005 	struct roff_node	*n;
1006 
1007 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1008 	roff_node_append(man, n);
1009 	man->next = ROFF_NEXT_CHILD;
1010 	return n;
1011 }
1012 
1013 struct roff_node *
1014 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1015 {
1016 	struct roff_node	*n;
1017 
1018 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1019 	roff_node_append(man, n);
1020 	man->next = ROFF_NEXT_CHILD;
1021 	return n;
1022 }
1023 
1024 struct roff_node *
1025 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1026 {
1027 	struct roff_node	*n;
1028 
1029 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1030 	roff_node_append(man, n);
1031 	man->next = ROFF_NEXT_CHILD;
1032 	return n;
1033 }
1034 
1035 static void
1036 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1037 {
1038 	struct roff_node	*n;
1039 	struct tbl_span		*span;
1040 
1041 	if (man->meta.macroset == MACROSET_MAN)
1042 		man_breakscope(man, ROFF_TS);
1043 	while ((span = tbl_span(tbl)) != NULL) {
1044 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1045 		n->span = span;
1046 		roff_node_append(man, n);
1047 		n->flags |= NODE_VALID | NODE_ENDED;
1048 		man->next = ROFF_NEXT_SIBLING;
1049 	}
1050 }
1051 
1052 void
1053 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1054 {
1055 
1056 	/* Adjust siblings. */
1057 
1058 	if (n->prev)
1059 		n->prev->next = n->next;
1060 	if (n->next)
1061 		n->next->prev = n->prev;
1062 
1063 	/* Adjust parent. */
1064 
1065 	if (n->parent != NULL) {
1066 		if (n->parent->child == n)
1067 			n->parent->child = n->next;
1068 		if (n->parent->last == n)
1069 			n->parent->last = n->prev;
1070 	}
1071 
1072 	/* Adjust parse point. */
1073 
1074 	if (man == NULL)
1075 		return;
1076 	if (man->last == n) {
1077 		if (n->prev == NULL) {
1078 			man->last = n->parent;
1079 			man->next = ROFF_NEXT_CHILD;
1080 		} else {
1081 			man->last = n->prev;
1082 			man->next = ROFF_NEXT_SIBLING;
1083 		}
1084 	}
1085 	if (man->meta.first == n)
1086 		man->meta.first = NULL;
1087 }
1088 
1089 void
1090 roff_node_relink(struct roff_man *man, struct roff_node *n)
1091 {
1092 	roff_node_unlink(man, n);
1093 	n->prev = n->next = NULL;
1094 	roff_node_append(man, n);
1095 }
1096 
1097 void
1098 roff_node_free(struct roff_node *n)
1099 {
1100 
1101 	if (n->args != NULL)
1102 		mdoc_argv_free(n->args);
1103 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1104 		free(n->norm);
1105 	eqn_box_free(n->eqn);
1106 	free(n->string);
1107 	free(n->tag);
1108 	free(n);
1109 }
1110 
1111 void
1112 roff_node_delete(struct roff_man *man, struct roff_node *n)
1113 {
1114 
1115 	while (n->child != NULL)
1116 		roff_node_delete(man, n->child);
1117 	roff_node_unlink(man, n);
1118 	roff_node_free(n);
1119 }
1120 
1121 int
1122 roff_node_transparent(struct roff_node *n)
1123 {
1124 	if (n == NULL)
1125 		return 0;
1126 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1127 		return 1;
1128 	return roff_tok_transparent(n->tok);
1129 }
1130 
1131 int
1132 roff_tok_transparent(enum roff_tok tok)
1133 {
1134 	switch (tok) {
1135 	case ROFF_ft:
1136 	case ROFF_ll:
1137 	case ROFF_mc:
1138 	case ROFF_po:
1139 	case ROFF_ta:
1140 	case MDOC_Db:
1141 	case MDOC_Es:
1142 	case MDOC_Sm:
1143 	case MDOC_Tg:
1144 	case MAN_DT:
1145 	case MAN_UC:
1146 	case MAN_PD:
1147 	case MAN_AT:
1148 		return 1;
1149 	default:
1150 		return 0;
1151 	}
1152 }
1153 
1154 struct roff_node *
1155 roff_node_child(struct roff_node *n)
1156 {
1157 	for (n = n->child; roff_node_transparent(n); n = n->next)
1158 		continue;
1159 	return n;
1160 }
1161 
1162 struct roff_node *
1163 roff_node_prev(struct roff_node *n)
1164 {
1165 	do {
1166 		n = n->prev;
1167 	} while (roff_node_transparent(n));
1168 	return n;
1169 }
1170 
1171 struct roff_node *
1172 roff_node_next(struct roff_node *n)
1173 {
1174 	do {
1175 		n = n->next;
1176 	} while (roff_node_transparent(n));
1177 	return n;
1178 }
1179 
1180 void
1181 deroff(char **dest, const struct roff_node *n)
1182 {
1183 	char	*cp;
1184 	size_t	 sz;
1185 
1186 	if (n->string == NULL) {
1187 		for (n = n->child; n != NULL; n = n->next)
1188 			deroff(dest, n);
1189 		return;
1190 	}
1191 
1192 	/* Skip leading whitespace. */
1193 
1194 	for (cp = n->string; *cp != '\0'; cp++) {
1195 		if (cp[0] == '\\' && cp[1] != '\0' &&
1196 		    strchr(" %&0^|~", cp[1]) != NULL)
1197 			cp++;
1198 		else if ( ! isspace((unsigned char)*cp))
1199 			break;
1200 	}
1201 
1202 	/* Skip trailing backslash. */
1203 
1204 	sz = strlen(cp);
1205 	if (sz > 0 && cp[sz - 1] == '\\')
1206 		sz--;
1207 
1208 	/* Skip trailing whitespace. */
1209 
1210 	for (; sz; sz--)
1211 		if ( ! isspace((unsigned char)cp[sz-1]))
1212 			break;
1213 
1214 	/* Skip empty strings. */
1215 
1216 	if (sz == 0)
1217 		return;
1218 
1219 	if (*dest == NULL) {
1220 		*dest = mandoc_strndup(cp, sz);
1221 		return;
1222 	}
1223 
1224 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1225 	free(*dest);
1226 	*dest = cp;
1227 }
1228 
1229 /* --- main functions of the roff parser ---------------------------------- */
1230 
1231 /*
1232  * In the current line, expand escape sequences that produce parsable
1233  * input text.  Also check the syntax of the remaining escape sequences,
1234  * which typically produce output glyphs or change formatter state.
1235  */
1236 static int
1237 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1238 {
1239 	struct mctx	*ctx;	/* current macro call context */
1240 	char		 ubuf[24]; /* buffer to print the number */
1241 	struct roff_node *n;	/* used for header comments */
1242 	const char	*start;	/* start of the string to process */
1243 	char		*stesc;	/* start of an escape sequence ('\\') */
1244 	const char	*esct;	/* type of esccape sequence */
1245 	char		*ep;	/* end of comment string */
1246 	const char	*stnam;	/* start of the name, after "[(*" */
1247 	const char	*cp;	/* end of the name, e.g. before ']' */
1248 	const char	*res;	/* the string to be substituted */
1249 	char		*nbuf;	/* new buffer to copy buf->buf to */
1250 	size_t		 maxl;  /* expected length of the escape name */
1251 	size_t		 naml;	/* actual length of the escape name */
1252 	size_t		 asz;	/* length of the replacement */
1253 	size_t		 rsz;	/* length of the rest of the string */
1254 	int		 inaml;	/* length returned from mandoc_escape() */
1255 	int		 expand_count;	/* to avoid infinite loops */
1256 	int		 npos;	/* position in numeric expression */
1257 	int		 arg_complete; /* argument not interrupted by eol */
1258 	int		 quote_args; /* true for \\$@, false for \\$* */
1259 	int		 done;	/* no more input available */
1260 	int		 deftype; /* type of definition to paste */
1261 	int		 rcsid;	/* kind of RCS id seen */
1262 	enum mandocerr	 err;	/* for escape sequence problems */
1263 	char		 sign;	/* increment number register */
1264 	char		 term;	/* character terminating the escape */
1265 
1266 	/* Search forward for comments. */
1267 
1268 	done = 0;
1269 	start = buf->buf + pos;
1270 	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1271 		if (stesc[0] != newesc || stesc[1] == '\0')
1272 			continue;
1273 		stesc++;
1274 		if (*stesc != '"' && *stesc != '#')
1275 			continue;
1276 
1277 		/* Comment found, look for RCS id. */
1278 
1279 		rcsid = 0;
1280 		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1281 			rcsid = 1 << MANDOC_OS_OPENBSD;
1282 			cp += 8;
1283 		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1284 			rcsid = 1 << MANDOC_OS_NETBSD;
1285 			cp += 7;
1286 		}
1287 		if (cp != NULL &&
1288 		    isalnum((unsigned char)*cp) == 0 &&
1289 		    strchr(cp, '$') != NULL) {
1290 			if (r->man->meta.rcsids & rcsid)
1291 				mandoc_msg(MANDOCERR_RCS_REP, ln,
1292 				    (int)(stesc - buf->buf) + 1,
1293 				    "%s", stesc + 1);
1294 			r->man->meta.rcsids |= rcsid;
1295 		}
1296 
1297 		/* Handle trailing whitespace. */
1298 
1299 		ep = strchr(stesc--, '\0') - 1;
1300 		if (*ep == '\n') {
1301 			done = 1;
1302 			ep--;
1303 		}
1304 		if (*ep == ' ' || *ep == '\t')
1305 			mandoc_msg(MANDOCERR_SPACE_EOL,
1306 			    ln, (int)(ep - buf->buf), NULL);
1307 
1308 		/*
1309 		 * Save comments preceding the title macro
1310 		 * in the syntax tree.
1311 		 */
1312 
1313 		if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1314 			while (*ep == ' ' || *ep == '\t')
1315 				ep--;
1316 			ep[1] = '\0';
1317 			n = roff_node_alloc(r->man,
1318 			    ln, stesc + 1 - buf->buf,
1319 			    ROFFT_COMMENT, TOKEN_NONE);
1320 			n->string = mandoc_strdup(stesc + 2);
1321 			roff_node_append(r->man, n);
1322 			n->flags |= NODE_VALID | NODE_ENDED;
1323 			r->man->next = ROFF_NEXT_SIBLING;
1324 		}
1325 
1326 		/* Line continuation with comment. */
1327 
1328 		if (stesc[1] == '#') {
1329 			*stesc = '\0';
1330 			return ROFF_IGN | ROFF_APPEND;
1331 		}
1332 
1333 		/* Discard normal comments. */
1334 
1335 		while (stesc > start && stesc[-1] == ' ' &&
1336 		    (stesc == start + 1 || stesc[-2] != '\\'))
1337 			stesc--;
1338 		*stesc = '\0';
1339 		break;
1340 	}
1341 	if (stesc == start)
1342 		return ROFF_CONT;
1343 	stesc--;
1344 
1345 	/* Notice the end of the input. */
1346 
1347 	if (*stesc == '\n') {
1348 		*stesc-- = '\0';
1349 		done = 1;
1350 	}
1351 
1352 	expand_count = 0;
1353 	while (stesc >= start) {
1354 		if (*stesc != newesc) {
1355 
1356 			/*
1357 			 * If we have a non-standard escape character,
1358 			 * escape literal backslashes because all
1359 			 * processing in subsequent functions uses
1360 			 * the standard escaping rules.
1361 			 */
1362 
1363 			if (newesc != ASCII_ESC && *stesc == '\\') {
1364 				*stesc = '\0';
1365 				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1366 				    buf->buf, stesc + 1) + 1;
1367 				start = nbuf + pos;
1368 				stesc = nbuf + (stesc - buf->buf);
1369 				free(buf->buf);
1370 				buf->buf = nbuf;
1371 			}
1372 
1373 			/* Search backwards for the next escape. */
1374 
1375 			stesc--;
1376 			continue;
1377 		}
1378 
1379 		/* If it is escaped, skip it. */
1380 
1381 		for (cp = stesc - 1; cp >= start; cp--)
1382 			if (*cp != r->escape)
1383 				break;
1384 
1385 		if ((stesc - cp) % 2 == 0) {
1386 			while (stesc > cp)
1387 				*stesc-- = '\\';
1388 			continue;
1389 		} else if (stesc[1] != '\0') {
1390 			*stesc = '\\';
1391 		} else {
1392 			*stesc-- = '\0';
1393 			if (done)
1394 				continue;
1395 			else
1396 				return ROFF_IGN | ROFF_APPEND;
1397 		}
1398 
1399 		/* Decide whether to expand or to check only. */
1400 
1401 		term = '\0';
1402 		cp = stesc + 1;
1403 		if (*cp == 'E')
1404 			cp++;
1405 		esct = cp;
1406 		switch (*esct) {
1407 		case '*':
1408 		case '$':
1409 			res = NULL;
1410 			break;
1411 		case 'B':
1412 		case 'w':
1413 			term = cp[1];
1414 			/* FALLTHROUGH */
1415 		case 'n':
1416 			sign = cp[1];
1417 			if (sign == '+' || sign == '-')
1418 				cp++;
1419 			res = ubuf;
1420 			break;
1421 		default:
1422 			err = MANDOCERR_OK;
1423 			switch(mandoc_escape(&cp, &stnam, &inaml)) {
1424 			case ESCAPE_SPECIAL:
1425 				if (mchars_spec2cp(stnam, inaml) >= 0)
1426 					break;
1427 				/* FALLTHROUGH */
1428 			case ESCAPE_ERROR:
1429 				err = MANDOCERR_ESC_BAD;
1430 				break;
1431 			case ESCAPE_UNDEF:
1432 				err = MANDOCERR_ESC_UNDEF;
1433 				break;
1434 			case ESCAPE_UNSUPP:
1435 				err = MANDOCERR_ESC_UNSUPP;
1436 				break;
1437 			default:
1438 				break;
1439 			}
1440 			if (err != MANDOCERR_OK)
1441 				mandoc_msg(err, ln, (int)(stesc - buf->buf),
1442 				    "%.*s", (int)(cp - stesc), stesc);
1443 			stesc--;
1444 			continue;
1445 		}
1446 
1447 		if (EXPAND_LIMIT < ++expand_count) {
1448 			mandoc_msg(MANDOCERR_ROFFLOOP,
1449 			    ln, (int)(stesc - buf->buf), NULL);
1450 			return ROFF_IGN;
1451 		}
1452 
1453 		/*
1454 		 * The third character decides the length
1455 		 * of the name of the string or register.
1456 		 * Save a pointer to the name.
1457 		 */
1458 
1459 		if (term == '\0') {
1460 			switch (*++cp) {
1461 			case '\0':
1462 				maxl = 0;
1463 				break;
1464 			case '(':
1465 				cp++;
1466 				maxl = 2;
1467 				break;
1468 			case '[':
1469 				cp++;
1470 				term = ']';
1471 				maxl = 0;
1472 				break;
1473 			default:
1474 				maxl = 1;
1475 				break;
1476 			}
1477 		} else {
1478 			cp += 2;
1479 			maxl = 0;
1480 		}
1481 		stnam = cp;
1482 
1483 		/* Advance to the end of the name. */
1484 
1485 		naml = 0;
1486 		arg_complete = 1;
1487 		while (maxl == 0 || naml < maxl) {
1488 			if (*cp == '\0') {
1489 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
1490 				    (int)(stesc - buf->buf), "%s", stesc);
1491 				arg_complete = 0;
1492 				break;
1493 			}
1494 			if (maxl == 0 && *cp == term) {
1495 				cp++;
1496 				break;
1497 			}
1498 			if (*cp++ != '\\' || *esct != 'w') {
1499 				naml++;
1500 				continue;
1501 			}
1502 			switch (mandoc_escape(&cp, NULL, NULL)) {
1503 			case ESCAPE_SPECIAL:
1504 			case ESCAPE_UNICODE:
1505 			case ESCAPE_NUMBERED:
1506 			case ESCAPE_UNDEF:
1507 			case ESCAPE_OVERSTRIKE:
1508 				naml++;
1509 				break;
1510 			default:
1511 				break;
1512 			}
1513 		}
1514 
1515 		/*
1516 		 * Retrieve the replacement string; if it is
1517 		 * undefined, resume searching for escapes.
1518 		 */
1519 
1520 		switch (*esct) {
1521 		case '*':
1522 			if (arg_complete) {
1523 				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1524 				res = roff_getstrn(r, stnam, naml, &deftype);
1525 
1526 				/*
1527 				 * If not overriden, let \*(.T
1528 				 * through to the formatters.
1529 				 */
1530 
1531 				if (res == NULL && naml == 2 &&
1532 				    stnam[0] == '.' && stnam[1] == 'T') {
1533 					roff_setstrn(&r->strtab,
1534 					    ".T", 2, NULL, 0, 0);
1535 					stesc--;
1536 					continue;
1537 				}
1538 			}
1539 			break;
1540 		case '$':
1541 			if (r->mstackpos < 0) {
1542 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1543 				    (int)(stesc - buf->buf), "%.3s", stesc);
1544 				break;
1545 			}
1546 			ctx = r->mstack + r->mstackpos;
1547 			npos = esct[1] - '1';
1548 			if (npos >= 0 && npos <= 8) {
1549 				res = npos < ctx->argc ?
1550 				    ctx->argv[npos] : "";
1551 				break;
1552 			}
1553 			if (esct[1] == '*')
1554 				quote_args = 0;
1555 			else if (esct[1] == '@')
1556 				quote_args = 1;
1557 			else {
1558 				mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1559 				    (int)(stesc - buf->buf), "%.3s", stesc);
1560 				break;
1561 			}
1562 			asz = 0;
1563 			for (npos = 0; npos < ctx->argc; npos++) {
1564 				if (npos)
1565 					asz++;  /* blank */
1566 				if (quote_args)
1567 					asz += 2;  /* quotes */
1568 				asz += strlen(ctx->argv[npos]);
1569 			}
1570 			if (asz != 3) {
1571 				rsz = buf->sz - (stesc - buf->buf) - 3;
1572 				if (asz < 3)
1573 					memmove(stesc + asz, stesc + 3, rsz);
1574 				buf->sz += asz - 3;
1575 				nbuf = mandoc_realloc(buf->buf, buf->sz);
1576 				start = nbuf + pos;
1577 				stesc = nbuf + (stesc - buf->buf);
1578 				buf->buf = nbuf;
1579 				if (asz > 3)
1580 					memmove(stesc + asz, stesc + 3, rsz);
1581 			}
1582 			for (npos = 0; npos < ctx->argc; npos++) {
1583 				if (npos)
1584 					*stesc++ = ' ';
1585 				if (quote_args)
1586 					*stesc++ = '"';
1587 				cp = ctx->argv[npos];
1588 				while (*cp != '\0')
1589 					*stesc++ = *cp++;
1590 				if (quote_args)
1591 					*stesc++ = '"';
1592 			}
1593 			continue;
1594 		case 'B':
1595 			npos = 0;
1596 			ubuf[0] = arg_complete &&
1597 			    roff_evalnum(r, ln, stnam, &npos,
1598 			      NULL, ROFFNUM_SCALE) &&
1599 			    stnam + npos + 1 == cp ? '1' : '0';
1600 			ubuf[1] = '\0';
1601 			break;
1602 		case 'n':
1603 			if (arg_complete)
1604 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1605 				    roff_getregn(r, stnam, naml, sign));
1606 			else
1607 				ubuf[0] = '\0';
1608 			break;
1609 		case 'w':
1610 			/* use even incomplete args */
1611 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1612 			    24 * (int)naml);
1613 			break;
1614 		}
1615 
1616 		if (res == NULL) {
1617 			if (*esct == '*')
1618 				mandoc_msg(MANDOCERR_STR_UNDEF,
1619 				    ln, (int)(stesc - buf->buf),
1620 				    "%.*s", (int)naml, stnam);
1621 			res = "";
1622 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1623 			mandoc_msg(MANDOCERR_ROFFLOOP,
1624 			    ln, (int)(stesc - buf->buf), NULL);
1625 			return ROFF_IGN;
1626 		}
1627 
1628 		/* Replace the escape sequence by the string. */
1629 
1630 		*stesc = '\0';
1631 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1632 		    buf->buf, res, cp) + 1;
1633 
1634 		/* Prepare for the next replacement. */
1635 
1636 		start = nbuf + pos;
1637 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1638 		free(buf->buf);
1639 		buf->buf = nbuf;
1640 	}
1641 	return ROFF_CONT;
1642 }
1643 
1644 /*
1645  * Parse a quoted or unquoted roff-style request or macro argument.
1646  * Return a pointer to the parsed argument, which is either the original
1647  * pointer or advanced by one byte in case the argument is quoted.
1648  * NUL-terminate the argument in place.
1649  * Collapse pairs of quotes inside quoted arguments.
1650  * Advance the argument pointer to the next argument,
1651  * or to the NUL byte terminating the argument line.
1652  */
1653 char *
1654 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1655 {
1656 	struct buf	 buf;
1657 	char		*cp, *start;
1658 	int		 newesc, pairs, quoted, white;
1659 
1660 	/* Quoting can only start with a new word. */
1661 	start = *cpp;
1662 	quoted = 0;
1663 	if ('"' == *start) {
1664 		quoted = 1;
1665 		start++;
1666 	}
1667 
1668 	newesc = pairs = white = 0;
1669 	for (cp = start; '\0' != *cp; cp++) {
1670 
1671 		/*
1672 		 * Move the following text left
1673 		 * after quoted quotes and after "\\" and "\t".
1674 		 */
1675 		if (pairs)
1676 			cp[-pairs] = cp[0];
1677 
1678 		if ('\\' == cp[0]) {
1679 			/*
1680 			 * In copy mode, translate double to single
1681 			 * backslashes and backslash-t to literal tabs.
1682 			 */
1683 			switch (cp[1]) {
1684 			case 'a':
1685 			case 't':
1686 				cp[-pairs] = '\t';
1687 				pairs++;
1688 				cp++;
1689 				break;
1690 			case '\\':
1691 				newesc = 1;
1692 				cp[-pairs] = ASCII_ESC;
1693 				pairs++;
1694 				cp++;
1695 				break;
1696 			case ' ':
1697 				/* Skip escaped blanks. */
1698 				if (0 == quoted)
1699 					cp++;
1700 				break;
1701 			default:
1702 				break;
1703 			}
1704 		} else if (0 == quoted) {
1705 			if (' ' == cp[0]) {
1706 				/* Unescaped blanks end unquoted args. */
1707 				white = 1;
1708 				break;
1709 			}
1710 		} else if ('"' == cp[0]) {
1711 			if ('"' == cp[1]) {
1712 				/* Quoted quotes collapse. */
1713 				pairs++;
1714 				cp++;
1715 			} else {
1716 				/* Unquoted quotes end quoted args. */
1717 				quoted = 2;
1718 				break;
1719 			}
1720 		}
1721 	}
1722 
1723 	/* Quoted argument without a closing quote. */
1724 	if (1 == quoted)
1725 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1726 
1727 	/* NUL-terminate this argument and move to the next one. */
1728 	if (pairs)
1729 		cp[-pairs] = '\0';
1730 	if ('\0' != *cp) {
1731 		*cp++ = '\0';
1732 		while (' ' == *cp)
1733 			cp++;
1734 	}
1735 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1736 	*cpp = cp;
1737 
1738 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1739 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1740 
1741 	start = mandoc_strdup(start);
1742 	if (newesc == 0)
1743 		return start;
1744 
1745 	buf.buf = start;
1746 	buf.sz = strlen(start) + 1;
1747 	buf.next = NULL;
1748 	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1749 		free(buf.buf);
1750 		buf.buf = mandoc_strdup("");
1751 	}
1752 	return buf.buf;
1753 }
1754 
1755 
1756 /*
1757  * Process text streams.
1758  */
1759 static int
1760 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1761 {
1762 	size_t		 sz;
1763 	const char	*start;
1764 	char		*p;
1765 	int		 isz;
1766 	enum mandoc_esc	 esc;
1767 
1768 	/* Spring the input line trap. */
1769 
1770 	if (roffit_lines == 1) {
1771 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1772 		free(buf->buf);
1773 		buf->buf = p;
1774 		buf->sz = isz + 1;
1775 		*offs = 0;
1776 		free(roffit_macro);
1777 		roffit_lines = 0;
1778 		return ROFF_REPARSE;
1779 	} else if (roffit_lines > 1)
1780 		--roffit_lines;
1781 
1782 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1783 		if (roffce_lines < 1) {
1784 			r->man->last = roffce_node;
1785 			r->man->next = ROFF_NEXT_SIBLING;
1786 			roffce_lines = 0;
1787 			roffce_node = NULL;
1788 		} else
1789 			roffce_lines--;
1790 	}
1791 
1792 	/* Convert all breakable hyphens into ASCII_HYPH. */
1793 
1794 	start = p = buf->buf + pos;
1795 
1796 	while (*p != '\0') {
1797 		sz = strcspn(p, "-\\");
1798 		p += sz;
1799 
1800 		if (*p == '\0')
1801 			break;
1802 
1803 		if (*p == '\\') {
1804 			/* Skip over escapes. */
1805 			p++;
1806 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1807 			if (esc == ESCAPE_ERROR)
1808 				break;
1809 			while (*p == '-')
1810 				p++;
1811 			continue;
1812 		} else if (p == start) {
1813 			p++;
1814 			continue;
1815 		}
1816 
1817 		if (isalpha((unsigned char)p[-1]) &&
1818 		    isalpha((unsigned char)p[1]))
1819 			*p = ASCII_HYPH;
1820 		p++;
1821 	}
1822 	return ROFF_CONT;
1823 }
1824 
1825 int
1826 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1827 {
1828 	enum roff_tok	 t;
1829 	int		 e;
1830 	int		 pos;	/* parse point */
1831 	int		 spos;	/* saved parse point for messages */
1832 	int		 ppos;	/* original offset in buf->buf */
1833 	int		 ctl;	/* macro line (boolean) */
1834 
1835 	ppos = pos = *offs;
1836 
1837 	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1838 	    (r->man->flags & ROFF_NOFILL) == 0 &&
1839 	    strchr(" .\\", buf->buf[pos]) == NULL &&
1840 	    buf->buf[pos] != r->control &&
1841 	    strcspn(buf->buf, " ") < 80)
1842 		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1843 		    "%.20s...", buf->buf + pos);
1844 
1845 	/* Handle in-line equation delimiters. */
1846 
1847 	if (r->tbl == NULL &&
1848 	    r->last_eqn != NULL && r->last_eqn->delim &&
1849 	    (r->eqn == NULL || r->eqn_inline)) {
1850 		e = roff_eqndelim(r, buf, pos);
1851 		if (e == ROFF_REPARSE)
1852 			return e;
1853 		assert(e == ROFF_CONT);
1854 	}
1855 
1856 	/* Expand some escape sequences. */
1857 
1858 	e = roff_expand(r, buf, ln, pos, r->escape);
1859 	if ((e & ROFF_MASK) == ROFF_IGN)
1860 		return e;
1861 	assert(e == ROFF_CONT);
1862 
1863 	ctl = roff_getcontrol(r, buf->buf, &pos);
1864 
1865 	/*
1866 	 * First, if a scope is open and we're not a macro, pass the
1867 	 * text through the macro's filter.
1868 	 * Equations process all content themselves.
1869 	 * Tables process almost all content themselves, but we want
1870 	 * to warn about macros before passing it there.
1871 	 */
1872 
1873 	if (r->last != NULL && ! ctl) {
1874 		t = r->last->tok;
1875 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1876 		if ((e & ROFF_MASK) == ROFF_IGN)
1877 			return e;
1878 		e &= ~ROFF_MASK;
1879 	} else
1880 		e = ROFF_IGN;
1881 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1882 		eqn_read(r->eqn, buf->buf + ppos);
1883 		return e;
1884 	}
1885 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1886 		tbl_read(r->tbl, ln, buf->buf, ppos);
1887 		roff_addtbl(r->man, ln, r->tbl);
1888 		return e;
1889 	}
1890 	if ( ! ctl) {
1891 		r->options &= ~MPARSE_COMMENT;
1892 		return roff_parsetext(r, buf, pos, offs) | e;
1893 	}
1894 
1895 	/* Skip empty request lines. */
1896 
1897 	if (buf->buf[pos] == '"') {
1898 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1899 		return ROFF_IGN;
1900 	} else if (buf->buf[pos] == '\0')
1901 		return ROFF_IGN;
1902 
1903 	/*
1904 	 * If a scope is open, go to the child handler for that macro,
1905 	 * as it may want to preprocess before doing anything with it.
1906 	 * Don't do so if an equation is open.
1907 	 */
1908 
1909 	if (r->last) {
1910 		t = r->last->tok;
1911 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1912 	}
1913 
1914 	/* No scope is open.  This is a new request or macro. */
1915 
1916 	r->options &= ~MPARSE_COMMENT;
1917 	spos = pos;
1918 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1919 
1920 	/* Tables ignore most macros. */
1921 
1922 	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1923 	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1924 		mandoc_msg(MANDOCERR_TBLMACRO,
1925 		    ln, pos, "%s", buf->buf + spos);
1926 		if (t != TOKEN_NONE)
1927 			return ROFF_IGN;
1928 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1929 			pos++;
1930 		while (buf->buf[pos] == ' ')
1931 			pos++;
1932 		tbl_read(r->tbl, ln, buf->buf, pos);
1933 		roff_addtbl(r->man, ln, r->tbl);
1934 		return ROFF_IGN;
1935 	}
1936 
1937 	/* For now, let high level macros abort .ce mode. */
1938 
1939 	if (ctl && roffce_node != NULL &&
1940 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1941 	     t == ROFF_TH || t == ROFF_TS)) {
1942 		r->man->last = roffce_node;
1943 		r->man->next = ROFF_NEXT_SIBLING;
1944 		roffce_lines = 0;
1945 		roffce_node = NULL;
1946 	}
1947 
1948 	/*
1949 	 * This is neither a roff request nor a user-defined macro.
1950 	 * Let the standard macro set parsers handle it.
1951 	 */
1952 
1953 	if (t == TOKEN_NONE)
1954 		return ROFF_CONT;
1955 
1956 	/* Execute a roff request or a user defined macro. */
1957 
1958 	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1959 }
1960 
1961 /*
1962  * Internal interface function to tell the roff parser that execution
1963  * of the current macro ended.  This is required because macro
1964  * definitions usually do not end with a .return request.
1965  */
1966 void
1967 roff_userret(struct roff *r)
1968 {
1969 	struct mctx	*ctx;
1970 	int		 i;
1971 
1972 	assert(r->mstackpos >= 0);
1973 	ctx = r->mstack + r->mstackpos;
1974 	for (i = 0; i < ctx->argc; i++)
1975 		free(ctx->argv[i]);
1976 	ctx->argc = 0;
1977 	r->mstackpos--;
1978 }
1979 
1980 void
1981 roff_endparse(struct roff *r)
1982 {
1983 	if (r->last != NULL)
1984 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1985 		    r->last->col, "%s", roff_name[r->last->tok]);
1986 
1987 	if (r->eqn != NULL) {
1988 		mandoc_msg(MANDOCERR_BLK_NOEND,
1989 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1990 		eqn_parse(r->eqn);
1991 		r->eqn = NULL;
1992 	}
1993 
1994 	if (r->tbl != NULL) {
1995 		tbl_end(r->tbl, 1);
1996 		r->tbl = NULL;
1997 	}
1998 }
1999 
2000 /*
2001  * Parse a roff node's type from the input buffer.  This must be in the
2002  * form of ".foo xxx" in the usual way.
2003  */
2004 static enum roff_tok
2005 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2006 {
2007 	char		*cp;
2008 	const char	*mac;
2009 	size_t		 maclen;
2010 	int		 deftype;
2011 	enum roff_tok	 t;
2012 
2013 	cp = buf + *pos;
2014 
2015 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2016 		return TOKEN_NONE;
2017 
2018 	mac = cp;
2019 	maclen = roff_getname(r, &cp, ln, ppos);
2020 
2021 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2022 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2023 	switch (deftype) {
2024 	case ROFFDEF_USER:
2025 		t = ROFF_USERDEF;
2026 		break;
2027 	case ROFFDEF_REN:
2028 		t = ROFF_RENAMED;
2029 		break;
2030 	default:
2031 		t = roffhash_find(r->reqtab, mac, maclen);
2032 		break;
2033 	}
2034 	if (t != TOKEN_NONE)
2035 		*pos = cp - buf;
2036 	else if (deftype == ROFFDEF_UNDEF) {
2037 		/* Using an undefined macro defines it to be empty. */
2038 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2039 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2040 	}
2041 	return t;
2042 }
2043 
2044 /* --- handling of request blocks ----------------------------------------- */
2045 
2046 /*
2047  * Close a macro definition block or an "ignore" block.
2048  */
2049 static int
2050 roff_cblock(ROFF_ARGS)
2051 {
2052 	int	 rr;
2053 
2054 	if (r->last == NULL) {
2055 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2056 		return ROFF_IGN;
2057 	}
2058 
2059 	switch (r->last->tok) {
2060 	case ROFF_am:
2061 	case ROFF_ami:
2062 	case ROFF_de:
2063 	case ROFF_dei:
2064 	case ROFF_ig:
2065 		break;
2066 	case ROFF_am1:
2067 	case ROFF_de1:
2068 		/* Remapped in roff_block(). */
2069 		abort();
2070 	default:
2071 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2072 		return ROFF_IGN;
2073 	}
2074 
2075 	roffnode_pop(r);
2076 	roffnode_cleanscope(r);
2077 
2078 	/*
2079 	 * If a conditional block with braces is still open,
2080 	 * check for "\}" block end markers.
2081 	 */
2082 
2083 	if (r->last != NULL && r->last->endspan < 0) {
2084 		rr = 1;  /* If arguments follow "\}", warn about them. */
2085 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2086 	}
2087 
2088 	if (buf->buf[pos] != '\0')
2089 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2090 		    ".. %s", buf->buf + pos);
2091 
2092 	return ROFF_IGN;
2093 }
2094 
2095 /*
2096  * Pop all nodes ending at the end of the current input line.
2097  * Return the number of loops ended.
2098  */
2099 static int
2100 roffnode_cleanscope(struct roff *r)
2101 {
2102 	int inloop;
2103 
2104 	inloop = 0;
2105 	while (r->last != NULL && r->last->endspan > 0) {
2106 		if (--r->last->endspan != 0)
2107 			break;
2108 		inloop += roffnode_pop(r);
2109 	}
2110 	return inloop;
2111 }
2112 
2113 /*
2114  * Handle the closing "\}" of a conditional block.
2115  * Apart from generating warnings, this only pops nodes.
2116  * Return the number of loops ended.
2117  */
2118 static int
2119 roff_ccond(struct roff *r, int ln, int ppos)
2120 {
2121 	if (NULL == r->last) {
2122 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2123 		return 0;
2124 	}
2125 
2126 	switch (r->last->tok) {
2127 	case ROFF_el:
2128 	case ROFF_ie:
2129 	case ROFF_if:
2130 	case ROFF_while:
2131 		break;
2132 	default:
2133 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2134 		return 0;
2135 	}
2136 
2137 	if (r->last->endspan > -1) {
2138 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2139 		return 0;
2140 	}
2141 
2142 	return roffnode_pop(r) + roffnode_cleanscope(r);
2143 }
2144 
2145 static int
2146 roff_block(ROFF_ARGS)
2147 {
2148 	const char	*name, *value;
2149 	char		*call, *cp, *iname, *rname;
2150 	size_t		 csz, namesz, rsz;
2151 	int		 deftype;
2152 
2153 	/* Ignore groff compatibility mode for now. */
2154 
2155 	if (tok == ROFF_de1)
2156 		tok = ROFF_de;
2157 	else if (tok == ROFF_dei1)
2158 		tok = ROFF_dei;
2159 	else if (tok == ROFF_am1)
2160 		tok = ROFF_am;
2161 	else if (tok == ROFF_ami1)
2162 		tok = ROFF_ami;
2163 
2164 	/* Parse the macro name argument. */
2165 
2166 	cp = buf->buf + pos;
2167 	if (tok == ROFF_ig) {
2168 		iname = NULL;
2169 		namesz = 0;
2170 	} else {
2171 		iname = cp;
2172 		namesz = roff_getname(r, &cp, ln, ppos);
2173 		iname[namesz] = '\0';
2174 	}
2175 
2176 	/* Resolve the macro name argument if it is indirect. */
2177 
2178 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2179 		deftype = ROFFDEF_USER;
2180 		name = roff_getstrn(r, iname, namesz, &deftype);
2181 		if (name == NULL) {
2182 			mandoc_msg(MANDOCERR_STR_UNDEF,
2183 			    ln, (int)(iname - buf->buf),
2184 			    "%.*s", (int)namesz, iname);
2185 			namesz = 0;
2186 		} else
2187 			namesz = strlen(name);
2188 	} else
2189 		name = iname;
2190 
2191 	if (namesz == 0 && tok != ROFF_ig) {
2192 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2193 		    ln, ppos, "%s", roff_name[tok]);
2194 		return ROFF_IGN;
2195 	}
2196 
2197 	roffnode_push(r, tok, name, ln, ppos);
2198 
2199 	/*
2200 	 * At the beginning of a `de' macro, clear the existing string
2201 	 * with the same name, if there is one.  New content will be
2202 	 * appended from roff_block_text() in multiline mode.
2203 	 */
2204 
2205 	if (tok == ROFF_de || tok == ROFF_dei) {
2206 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2207 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2208 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2209 		deftype = ROFFDEF_ANY;
2210 		value = roff_getstrn(r, iname, namesz, &deftype);
2211 		switch (deftype) {  /* Before appending, ... */
2212 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2213 			roff_setstrn(&r->strtab, name, namesz,
2214 			    value, strlen(value), 0);
2215 			break;
2216 		case ROFFDEF_REN: /* call original standard macro. */
2217 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2218 			    (int)strlen(value), value);
2219 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2220 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2221 			free(call);
2222 			break;
2223 		case ROFFDEF_STD:  /* rename and call standard macro. */
2224 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2225 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2226 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2227 			    (int)rsz, rname);
2228 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2229 			free(call);
2230 			free(rname);
2231 			break;
2232 		default:
2233 			break;
2234 		}
2235 	}
2236 
2237 	if (*cp == '\0')
2238 		return ROFF_IGN;
2239 
2240 	/* Get the custom end marker. */
2241 
2242 	iname = cp;
2243 	namesz = roff_getname(r, &cp, ln, ppos);
2244 
2245 	/* Resolve the end marker if it is indirect. */
2246 
2247 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2248 		deftype = ROFFDEF_USER;
2249 		name = roff_getstrn(r, iname, namesz, &deftype);
2250 		if (name == NULL) {
2251 			mandoc_msg(MANDOCERR_STR_UNDEF,
2252 			    ln, (int)(iname - buf->buf),
2253 			    "%.*s", (int)namesz, iname);
2254 			namesz = 0;
2255 		} else
2256 			namesz = strlen(name);
2257 	} else
2258 		name = iname;
2259 
2260 	if (namesz)
2261 		r->last->end = mandoc_strndup(name, namesz);
2262 
2263 	if (*cp != '\0')
2264 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2265 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2266 
2267 	return ROFF_IGN;
2268 }
2269 
2270 static int
2271 roff_block_sub(ROFF_ARGS)
2272 {
2273 	enum roff_tok	t;
2274 	int		i, j;
2275 
2276 	/*
2277 	 * First check whether a custom macro exists at this level.  If
2278 	 * it does, then check against it.  This is some of groff's
2279 	 * stranger behaviours.  If we encountered a custom end-scope
2280 	 * tag and that tag also happens to be a "real" macro, then we
2281 	 * need to try interpreting it again as a real macro.  If it's
2282 	 * not, then return ignore.  Else continue.
2283 	 */
2284 
2285 	if (r->last->end) {
2286 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2287 			if (buf->buf[i] != r->last->end[j])
2288 				break;
2289 
2290 		if (r->last->end[j] == '\0' &&
2291 		    (buf->buf[i] == '\0' ||
2292 		     buf->buf[i] == ' ' ||
2293 		     buf->buf[i] == '\t')) {
2294 			roffnode_pop(r);
2295 			roffnode_cleanscope(r);
2296 
2297 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2298 				i++;
2299 
2300 			pos = i;
2301 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2302 			    TOKEN_NONE)
2303 				return ROFF_RERUN;
2304 			return ROFF_IGN;
2305 		}
2306 	}
2307 
2308 	/*
2309 	 * If we have no custom end-query or lookup failed, then try
2310 	 * pulling it out of the hashtable.
2311 	 */
2312 
2313 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2314 
2315 	if (t != ROFF_cblock) {
2316 		if (tok != ROFF_ig)
2317 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2318 		return ROFF_IGN;
2319 	}
2320 
2321 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2322 }
2323 
2324 static int
2325 roff_block_text(ROFF_ARGS)
2326 {
2327 
2328 	if (tok != ROFF_ig)
2329 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2330 
2331 	return ROFF_IGN;
2332 }
2333 
2334 /*
2335  * Check for a closing "\}" and handle it.
2336  * In this function, the final "int *offs" argument is used for
2337  * different purposes than elsewhere:
2338  * Input: *offs == 0: caller wants to discard arguments following \}
2339  *        *offs == 1: caller wants to preserve text following \}
2340  * Output: *offs = 0: tell caller to discard input line
2341  *         *offs = 1: tell caller to use input line
2342  */
2343 static int
2344 roff_cond_checkend(ROFF_ARGS)
2345 {
2346 	char		*ep;
2347 	int		 endloop, irc, rr;
2348 
2349 	irc = ROFF_IGN;
2350 	rr = r->last->rule;
2351 	endloop = tok != ROFF_while ? ROFF_IGN :
2352 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2353 	if (roffnode_cleanscope(r))
2354 		irc |= endloop;
2355 
2356 	/*
2357 	 * If "\}" occurs on a macro line without a preceding macro or
2358 	 * a text line contains nothing else, drop the line completely.
2359 	 */
2360 
2361 	ep = buf->buf + pos;
2362 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2363 		rr = 0;
2364 
2365 	/*
2366 	 * The closing delimiter "\}" rewinds the conditional scope
2367 	 * but is otherwise ignored when interpreting the line.
2368 	 */
2369 
2370 	while ((ep = strchr(ep, '\\')) != NULL) {
2371 		switch (ep[1]) {
2372 		case '}':
2373 			if (ep[2] == '\0')
2374 				ep[0] = '\0';
2375 			else if (rr)
2376 				ep[1] = '&';
2377 			else
2378 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2379 			if (roff_ccond(r, ln, ep - buf->buf))
2380 				irc |= endloop;
2381 			break;
2382 		case '\0':
2383 			++ep;
2384 			break;
2385 		default:
2386 			ep += 2;
2387 			break;
2388 		}
2389 	}
2390 	*offs = rr;
2391 	return irc;
2392 }
2393 
2394 /*
2395  * Parse and process a request or macro line in conditional scope.
2396  */
2397 static int
2398 roff_cond_sub(ROFF_ARGS)
2399 {
2400 	struct roffnode	*bl;
2401 	int		 irc, rr;
2402 	enum roff_tok	 t;
2403 
2404 	rr = 0;  /* If arguments follow "\}", skip them. */
2405 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2406 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2407 
2408 	/* For now, let high level macros abort .ce mode. */
2409 
2410 	if (roffce_node != NULL &&
2411 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2412              t == ROFF_TH || t == ROFF_TS)) {
2413 		r->man->last = roffce_node;
2414 		r->man->next = ROFF_NEXT_SIBLING;
2415 		roffce_lines = 0;
2416 		roffce_node = NULL;
2417 	}
2418 
2419 	/*
2420 	 * Fully handle known macros when they are structurally
2421 	 * required or when the conditional evaluated to true.
2422 	 */
2423 
2424 	if (t == ROFF_break) {
2425 		if (irc & ROFF_LOOPMASK)
2426 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2427 		else if (rr) {
2428 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2429 				bl->rule = 0;
2430 				if (bl->tok == ROFF_while)
2431 					break;
2432 			}
2433 		}
2434 	} else if (t != TOKEN_NONE &&
2435 	    (rr || roffs[t].flags & ROFFMAC_STRUCT))
2436 		irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2437 	else
2438 		irc |= rr ? ROFF_CONT : ROFF_IGN;
2439 	return irc;
2440 }
2441 
2442 /*
2443  * Parse and process a text line in conditional scope.
2444  */
2445 static int
2446 roff_cond_text(ROFF_ARGS)
2447 {
2448 	int	 irc, rr;
2449 
2450 	rr = 1;  /* If arguments follow "\}", preserve them. */
2451 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2452 	if (rr)
2453 		irc |= ROFF_CONT;
2454 	return irc;
2455 }
2456 
2457 /* --- handling of numeric and conditional expressions -------------------- */
2458 
2459 /*
2460  * Parse a single signed integer number.  Stop at the first non-digit.
2461  * If there is at least one digit, return success and advance the
2462  * parse point, else return failure and let the parse point unchanged.
2463  * Ignore overflows, treat them just like the C language.
2464  */
2465 static int
2466 roff_getnum(const char *v, int *pos, int *res, int flags)
2467 {
2468 	int	 myres, scaled, n, p;
2469 
2470 	if (NULL == res)
2471 		res = &myres;
2472 
2473 	p = *pos;
2474 	n = v[p] == '-';
2475 	if (n || v[p] == '+')
2476 		p++;
2477 
2478 	if (flags & ROFFNUM_WHITE)
2479 		while (isspace((unsigned char)v[p]))
2480 			p++;
2481 
2482 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2483 		*res = 10 * *res + v[p] - '0';
2484 	if (p == *pos + n)
2485 		return 0;
2486 
2487 	if (n)
2488 		*res = -*res;
2489 
2490 	/* Each number may be followed by one optional scaling unit. */
2491 
2492 	switch (v[p]) {
2493 	case 'f':
2494 		scaled = *res * 65536;
2495 		break;
2496 	case 'i':
2497 		scaled = *res * 240;
2498 		break;
2499 	case 'c':
2500 		scaled = *res * 240 / 2.54;
2501 		break;
2502 	case 'v':
2503 	case 'P':
2504 		scaled = *res * 40;
2505 		break;
2506 	case 'm':
2507 	case 'n':
2508 		scaled = *res * 24;
2509 		break;
2510 	case 'p':
2511 		scaled = *res * 10 / 3;
2512 		break;
2513 	case 'u':
2514 		scaled = *res;
2515 		break;
2516 	case 'M':
2517 		scaled = *res * 6 / 25;
2518 		break;
2519 	default:
2520 		scaled = *res;
2521 		p--;
2522 		break;
2523 	}
2524 	if (flags & ROFFNUM_SCALE)
2525 		*res = scaled;
2526 
2527 	*pos = p + 1;
2528 	return 1;
2529 }
2530 
2531 /*
2532  * Evaluate a string comparison condition.
2533  * The first character is the delimiter.
2534  * Succeed if the string up to its second occurrence
2535  * matches the string up to its third occurence.
2536  * Advance the cursor after the third occurrence
2537  * or lacking that, to the end of the line.
2538  */
2539 static int
2540 roff_evalstrcond(const char *v, int *pos)
2541 {
2542 	const char	*s1, *s2, *s3;
2543 	int		 match;
2544 
2545 	match = 0;
2546 	s1 = v + *pos;		/* initial delimiter */
2547 	s2 = s1 + 1;		/* for scanning the first string */
2548 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2549 
2550 	if (NULL == s3)		/* found no middle delimiter */
2551 		goto out;
2552 
2553 	while ('\0' != *++s3) {
2554 		if (*s2 != *s3) {  /* mismatch */
2555 			s3 = strchr(s3, *s1);
2556 			break;
2557 		}
2558 		if (*s3 == *s1) {  /* found the final delimiter */
2559 			match = 1;
2560 			break;
2561 		}
2562 		s2++;
2563 	}
2564 
2565 out:
2566 	if (NULL == s3)
2567 		s3 = strchr(s2, '\0');
2568 	else if (*s3 != '\0')
2569 		s3++;
2570 	*pos = s3 - v;
2571 	return match;
2572 }
2573 
2574 /*
2575  * Evaluate an optionally negated single character, numerical,
2576  * or string condition.
2577  */
2578 static int
2579 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2580 {
2581 	const char	*start, *end;
2582 	char		*cp, *name;
2583 	size_t		 sz;
2584 	int		 deftype, len, number, savepos, istrue, wanttrue;
2585 
2586 	if ('!' == v[*pos]) {
2587 		wanttrue = 0;
2588 		(*pos)++;
2589 	} else
2590 		wanttrue = 1;
2591 
2592 	switch (v[*pos]) {
2593 	case '\0':
2594 		return 0;
2595 	case 'n':
2596 	case 'o':
2597 		(*pos)++;
2598 		return wanttrue;
2599 	case 'e':
2600 	case 't':
2601 	case 'v':
2602 		(*pos)++;
2603 		return !wanttrue;
2604 	case 'c':
2605 		do {
2606 			(*pos)++;
2607 		} while (v[*pos] == ' ');
2608 
2609 		/*
2610 		 * Quirk for groff compatibility:
2611 		 * The horizontal tab is neither available nor unavailable.
2612 		 */
2613 
2614 		if (v[*pos] == '\t') {
2615 			(*pos)++;
2616 			return 0;
2617 		}
2618 
2619 		/* Printable ASCII characters are available. */
2620 
2621 		if (v[*pos] != '\\') {
2622 			(*pos)++;
2623 			return wanttrue;
2624 		}
2625 
2626 		end = v + ++*pos;
2627 		switch (mandoc_escape(&end, &start, &len)) {
2628 		case ESCAPE_SPECIAL:
2629 			istrue = mchars_spec2cp(start, len) != -1;
2630 			break;
2631 		case ESCAPE_UNICODE:
2632 			istrue = 1;
2633 			break;
2634 		case ESCAPE_NUMBERED:
2635 			istrue = mchars_num2char(start, len) != -1;
2636 			break;
2637 		default:
2638 			istrue = !wanttrue;
2639 			break;
2640 		}
2641 		*pos = end - v;
2642 		return istrue == wanttrue;
2643 	case 'd':
2644 	case 'r':
2645 		cp = v + *pos + 1;
2646 		while (*cp == ' ')
2647 			cp++;
2648 		name = cp;
2649 		sz = roff_getname(r, &cp, ln, cp - v);
2650 		if (sz == 0)
2651 			istrue = 0;
2652 		else if (v[*pos] == 'r')
2653 			istrue = roff_hasregn(r, name, sz);
2654 		else {
2655 			deftype = ROFFDEF_ANY;
2656 		        roff_getstrn(r, name, sz, &deftype);
2657 			istrue = !!deftype;
2658 		}
2659 		*pos = (name + sz) - v;
2660 		return istrue == wanttrue;
2661 	default:
2662 		break;
2663 	}
2664 
2665 	savepos = *pos;
2666 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2667 		return (number > 0) == wanttrue;
2668 	else if (*pos == savepos)
2669 		return roff_evalstrcond(v, pos) == wanttrue;
2670 	else
2671 		return 0;
2672 }
2673 
2674 static int
2675 roff_line_ignore(ROFF_ARGS)
2676 {
2677 
2678 	return ROFF_IGN;
2679 }
2680 
2681 static int
2682 roff_insec(ROFF_ARGS)
2683 {
2684 
2685 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2686 	return ROFF_IGN;
2687 }
2688 
2689 static int
2690 roff_unsupp(ROFF_ARGS)
2691 {
2692 
2693 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2694 	return ROFF_IGN;
2695 }
2696 
2697 static int
2698 roff_cond(ROFF_ARGS)
2699 {
2700 	int	 irc;
2701 
2702 	roffnode_push(r, tok, NULL, ln, ppos);
2703 
2704 	/*
2705 	 * An `.el' has no conditional body: it will consume the value
2706 	 * of the current rstack entry set in prior `ie' calls or
2707 	 * defaults to DENY.
2708 	 *
2709 	 * If we're not an `el', however, then evaluate the conditional.
2710 	 */
2711 
2712 	r->last->rule = tok == ROFF_el ?
2713 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2714 	    roff_evalcond(r, ln, buf->buf, &pos);
2715 
2716 	/*
2717 	 * An if-else will put the NEGATION of the current evaluated
2718 	 * conditional into the stack of rules.
2719 	 */
2720 
2721 	if (tok == ROFF_ie) {
2722 		if (r->rstackpos + 1 == r->rstacksz) {
2723 			r->rstacksz += 16;
2724 			r->rstack = mandoc_reallocarray(r->rstack,
2725 			    r->rstacksz, sizeof(int));
2726 		}
2727 		r->rstack[++r->rstackpos] = !r->last->rule;
2728 	}
2729 
2730 	/* If the parent has false as its rule, then so do we. */
2731 
2732 	if (r->last->parent && !r->last->parent->rule)
2733 		r->last->rule = 0;
2734 
2735 	/*
2736 	 * Determine scope.
2737 	 * If there is nothing on the line after the conditional,
2738 	 * not even whitespace, use next-line scope.
2739 	 * Except that .while does not support next-line scope.
2740 	 */
2741 
2742 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2743 		r->last->endspan = 2;
2744 		goto out;
2745 	}
2746 
2747 	while (buf->buf[pos] == ' ')
2748 		pos++;
2749 
2750 	/* An opening brace requests multiline scope. */
2751 
2752 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2753 		r->last->endspan = -1;
2754 		pos += 2;
2755 		while (buf->buf[pos] == ' ')
2756 			pos++;
2757 		goto out;
2758 	}
2759 
2760 	/*
2761 	 * Anything else following the conditional causes
2762 	 * single-line scope.  Warn if the scope contains
2763 	 * nothing but trailing whitespace.
2764 	 */
2765 
2766 	if (buf->buf[pos] == '\0')
2767 		mandoc_msg(MANDOCERR_COND_EMPTY,
2768 		    ln, ppos, "%s", roff_name[tok]);
2769 
2770 	r->last->endspan = 1;
2771 
2772 out:
2773 	*offs = pos;
2774 	irc = ROFF_RERUN;
2775 	if (tok == ROFF_while)
2776 		irc |= ROFF_WHILE;
2777 	return irc;
2778 }
2779 
2780 static int
2781 roff_ds(ROFF_ARGS)
2782 {
2783 	char		*string;
2784 	const char	*name;
2785 	size_t		 namesz;
2786 
2787 	/* Ignore groff compatibility mode for now. */
2788 
2789 	if (tok == ROFF_ds1)
2790 		tok = ROFF_ds;
2791 	else if (tok == ROFF_as1)
2792 		tok = ROFF_as;
2793 
2794 	/*
2795 	 * The first word is the name of the string.
2796 	 * If it is empty or terminated by an escape sequence,
2797 	 * abort the `ds' request without defining anything.
2798 	 */
2799 
2800 	name = string = buf->buf + pos;
2801 	if (*name == '\0')
2802 		return ROFF_IGN;
2803 
2804 	namesz = roff_getname(r, &string, ln, pos);
2805 	switch (name[namesz]) {
2806 	case '\\':
2807 		return ROFF_IGN;
2808 	case '\t':
2809 		string = buf->buf + pos + namesz;
2810 		break;
2811 	default:
2812 		break;
2813 	}
2814 
2815 	/* Read past the initial double-quote, if any. */
2816 	if (*string == '"')
2817 		string++;
2818 
2819 	/* The rest is the value. */
2820 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2821 	    ROFF_as == tok);
2822 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2823 	return ROFF_IGN;
2824 }
2825 
2826 /*
2827  * Parse a single operator, one or two characters long.
2828  * If the operator is recognized, return success and advance the
2829  * parse point, else return failure and let the parse point unchanged.
2830  */
2831 static int
2832 roff_getop(const char *v, int *pos, char *res)
2833 {
2834 
2835 	*res = v[*pos];
2836 
2837 	switch (*res) {
2838 	case '+':
2839 	case '-':
2840 	case '*':
2841 	case '/':
2842 	case '%':
2843 	case '&':
2844 	case ':':
2845 		break;
2846 	case '<':
2847 		switch (v[*pos + 1]) {
2848 		case '=':
2849 			*res = 'l';
2850 			(*pos)++;
2851 			break;
2852 		case '>':
2853 			*res = '!';
2854 			(*pos)++;
2855 			break;
2856 		case '?':
2857 			*res = 'i';
2858 			(*pos)++;
2859 			break;
2860 		default:
2861 			break;
2862 		}
2863 		break;
2864 	case '>':
2865 		switch (v[*pos + 1]) {
2866 		case '=':
2867 			*res = 'g';
2868 			(*pos)++;
2869 			break;
2870 		case '?':
2871 			*res = 'a';
2872 			(*pos)++;
2873 			break;
2874 		default:
2875 			break;
2876 		}
2877 		break;
2878 	case '=':
2879 		if ('=' == v[*pos + 1])
2880 			(*pos)++;
2881 		break;
2882 	default:
2883 		return 0;
2884 	}
2885 	(*pos)++;
2886 
2887 	return *res;
2888 }
2889 
2890 /*
2891  * Evaluate either a parenthesized numeric expression
2892  * or a single signed integer number.
2893  */
2894 static int
2895 roff_evalpar(struct roff *r, int ln,
2896 	const char *v, int *pos, int *res, int flags)
2897 {
2898 
2899 	if ('(' != v[*pos])
2900 		return roff_getnum(v, pos, res, flags);
2901 
2902 	(*pos)++;
2903 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2904 		return 0;
2905 
2906 	/*
2907 	 * Omission of the closing parenthesis
2908 	 * is an error in validation mode,
2909 	 * but ignored in evaluation mode.
2910 	 */
2911 
2912 	if (')' == v[*pos])
2913 		(*pos)++;
2914 	else if (NULL == res)
2915 		return 0;
2916 
2917 	return 1;
2918 }
2919 
2920 /*
2921  * Evaluate a complete numeric expression.
2922  * Proceed left to right, there is no concept of precedence.
2923  */
2924 static int
2925 roff_evalnum(struct roff *r, int ln, const char *v,
2926 	int *pos, int *res, int flags)
2927 {
2928 	int		 mypos, operand2;
2929 	char		 operator;
2930 
2931 	if (NULL == pos) {
2932 		mypos = 0;
2933 		pos = &mypos;
2934 	}
2935 
2936 	if (flags & ROFFNUM_WHITE)
2937 		while (isspace((unsigned char)v[*pos]))
2938 			(*pos)++;
2939 
2940 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2941 		return 0;
2942 
2943 	while (1) {
2944 		if (flags & ROFFNUM_WHITE)
2945 			while (isspace((unsigned char)v[*pos]))
2946 				(*pos)++;
2947 
2948 		if ( ! roff_getop(v, pos, &operator))
2949 			break;
2950 
2951 		if (flags & ROFFNUM_WHITE)
2952 			while (isspace((unsigned char)v[*pos]))
2953 				(*pos)++;
2954 
2955 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2956 			return 0;
2957 
2958 		if (flags & ROFFNUM_WHITE)
2959 			while (isspace((unsigned char)v[*pos]))
2960 				(*pos)++;
2961 
2962 		if (NULL == res)
2963 			continue;
2964 
2965 		switch (operator) {
2966 		case '+':
2967 			*res += operand2;
2968 			break;
2969 		case '-':
2970 			*res -= operand2;
2971 			break;
2972 		case '*':
2973 			*res *= operand2;
2974 			break;
2975 		case '/':
2976 			if (operand2 == 0) {
2977 				mandoc_msg(MANDOCERR_DIVZERO,
2978 					ln, *pos, "%s", v);
2979 				*res = 0;
2980 				break;
2981 			}
2982 			*res /= operand2;
2983 			break;
2984 		case '%':
2985 			if (operand2 == 0) {
2986 				mandoc_msg(MANDOCERR_DIVZERO,
2987 					ln, *pos, "%s", v);
2988 				*res = 0;
2989 				break;
2990 			}
2991 			*res %= operand2;
2992 			break;
2993 		case '<':
2994 			*res = *res < operand2;
2995 			break;
2996 		case '>':
2997 			*res = *res > operand2;
2998 			break;
2999 		case 'l':
3000 			*res = *res <= operand2;
3001 			break;
3002 		case 'g':
3003 			*res = *res >= operand2;
3004 			break;
3005 		case '=':
3006 			*res = *res == operand2;
3007 			break;
3008 		case '!':
3009 			*res = *res != operand2;
3010 			break;
3011 		case '&':
3012 			*res = *res && operand2;
3013 			break;
3014 		case ':':
3015 			*res = *res || operand2;
3016 			break;
3017 		case 'i':
3018 			if (operand2 < *res)
3019 				*res = operand2;
3020 			break;
3021 		case 'a':
3022 			if (operand2 > *res)
3023 				*res = operand2;
3024 			break;
3025 		default:
3026 			abort();
3027 		}
3028 	}
3029 	return 1;
3030 }
3031 
3032 /* --- register management ------------------------------------------------ */
3033 
3034 void
3035 roff_setreg(struct roff *r, const char *name, int val, char sign)
3036 {
3037 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3038 }
3039 
3040 static void
3041 roff_setregn(struct roff *r, const char *name, size_t len,
3042     int val, char sign, int step)
3043 {
3044 	struct roffreg	*reg;
3045 
3046 	/* Search for an existing register with the same name. */
3047 	reg = r->regtab;
3048 
3049 	while (reg != NULL && (reg->key.sz != len ||
3050 	    strncmp(reg->key.p, name, len) != 0))
3051 		reg = reg->next;
3052 
3053 	if (NULL == reg) {
3054 		/* Create a new register. */
3055 		reg = mandoc_malloc(sizeof(struct roffreg));
3056 		reg->key.p = mandoc_strndup(name, len);
3057 		reg->key.sz = len;
3058 		reg->val = 0;
3059 		reg->step = 0;
3060 		reg->next = r->regtab;
3061 		r->regtab = reg;
3062 	}
3063 
3064 	if ('+' == sign)
3065 		reg->val += val;
3066 	else if ('-' == sign)
3067 		reg->val -= val;
3068 	else
3069 		reg->val = val;
3070 	if (step != INT_MIN)
3071 		reg->step = step;
3072 }
3073 
3074 /*
3075  * Handle some predefined read-only number registers.
3076  * For now, return -1 if the requested register is not predefined;
3077  * in case a predefined read-only register having the value -1
3078  * were to turn up, another special value would have to be chosen.
3079  */
3080 static int
3081 roff_getregro(const struct roff *r, const char *name)
3082 {
3083 
3084 	switch (*name) {
3085 	case '$':  /* Number of arguments of the last macro evaluated. */
3086 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3087 	case 'A':  /* ASCII approximation mode is always off. */
3088 		return 0;
3089 	case 'g':  /* Groff compatibility mode is always on. */
3090 		return 1;
3091 	case 'H':  /* Fixed horizontal resolution. */
3092 		return 24;
3093 	case 'j':  /* Always adjust left margin only. */
3094 		return 0;
3095 	case 'T':  /* Some output device is always defined. */
3096 		return 1;
3097 	case 'V':  /* Fixed vertical resolution. */
3098 		return 40;
3099 	default:
3100 		return -1;
3101 	}
3102 }
3103 
3104 int
3105 roff_getreg(struct roff *r, const char *name)
3106 {
3107 	return roff_getregn(r, name, strlen(name), '\0');
3108 }
3109 
3110 static int
3111 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3112 {
3113 	struct roffreg	*reg;
3114 	int		 val;
3115 
3116 	if ('.' == name[0] && 2 == len) {
3117 		val = roff_getregro(r, name + 1);
3118 		if (-1 != val)
3119 			return val;
3120 	}
3121 
3122 	for (reg = r->regtab; reg; reg = reg->next) {
3123 		if (len == reg->key.sz &&
3124 		    0 == strncmp(name, reg->key.p, len)) {
3125 			switch (sign) {
3126 			case '+':
3127 				reg->val += reg->step;
3128 				break;
3129 			case '-':
3130 				reg->val -= reg->step;
3131 				break;
3132 			default:
3133 				break;
3134 			}
3135 			return reg->val;
3136 		}
3137 	}
3138 
3139 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3140 	return 0;
3141 }
3142 
3143 static int
3144 roff_hasregn(const struct roff *r, const char *name, size_t len)
3145 {
3146 	struct roffreg	*reg;
3147 	int		 val;
3148 
3149 	if ('.' == name[0] && 2 == len) {
3150 		val = roff_getregro(r, name + 1);
3151 		if (-1 != val)
3152 			return 1;
3153 	}
3154 
3155 	for (reg = r->regtab; reg; reg = reg->next)
3156 		if (len == reg->key.sz &&
3157 		    0 == strncmp(name, reg->key.p, len))
3158 			return 1;
3159 
3160 	return 0;
3161 }
3162 
3163 static void
3164 roff_freereg(struct roffreg *reg)
3165 {
3166 	struct roffreg	*old_reg;
3167 
3168 	while (NULL != reg) {
3169 		free(reg->key.p);
3170 		old_reg = reg;
3171 		reg = reg->next;
3172 		free(old_reg);
3173 	}
3174 }
3175 
3176 static int
3177 roff_nr(ROFF_ARGS)
3178 {
3179 	char		*key, *val, *step;
3180 	size_t		 keysz;
3181 	int		 iv, is, len;
3182 	char		 sign;
3183 
3184 	key = val = buf->buf + pos;
3185 	if (*key == '\0')
3186 		return ROFF_IGN;
3187 
3188 	keysz = roff_getname(r, &val, ln, pos);
3189 	if (key[keysz] == '\\' || key[keysz] == '\t')
3190 		return ROFF_IGN;
3191 
3192 	sign = *val;
3193 	if (sign == '+' || sign == '-')
3194 		val++;
3195 
3196 	len = 0;
3197 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3198 		return ROFF_IGN;
3199 
3200 	step = val + len;
3201 	while (isspace((unsigned char)*step))
3202 		step++;
3203 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3204 		is = INT_MIN;
3205 
3206 	roff_setregn(r, key, keysz, iv, sign, is);
3207 	return ROFF_IGN;
3208 }
3209 
3210 static int
3211 roff_rr(ROFF_ARGS)
3212 {
3213 	struct roffreg	*reg, **prev;
3214 	char		*name, *cp;
3215 	size_t		 namesz;
3216 
3217 	name = cp = buf->buf + pos;
3218 	if (*name == '\0')
3219 		return ROFF_IGN;
3220 	namesz = roff_getname(r, &cp, ln, pos);
3221 	name[namesz] = '\0';
3222 
3223 	prev = &r->regtab;
3224 	while (1) {
3225 		reg = *prev;
3226 		if (reg == NULL || !strcmp(name, reg->key.p))
3227 			break;
3228 		prev = &reg->next;
3229 	}
3230 	if (reg != NULL) {
3231 		*prev = reg->next;
3232 		free(reg->key.p);
3233 		free(reg);
3234 	}
3235 	return ROFF_IGN;
3236 }
3237 
3238 /* --- handler functions for roff requests -------------------------------- */
3239 
3240 static int
3241 roff_rm(ROFF_ARGS)
3242 {
3243 	const char	 *name;
3244 	char		 *cp;
3245 	size_t		  namesz;
3246 
3247 	cp = buf->buf + pos;
3248 	while (*cp != '\0') {
3249 		name = cp;
3250 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3251 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3252 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3253 		if (name[namesz] == '\\' || name[namesz] == '\t')
3254 			break;
3255 	}
3256 	return ROFF_IGN;
3257 }
3258 
3259 static int
3260 roff_it(ROFF_ARGS)
3261 {
3262 	int		 iv;
3263 
3264 	/* Parse the number of lines. */
3265 
3266 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3267 		mandoc_msg(MANDOCERR_IT_NONUM,
3268 		    ln, ppos, "%s", buf->buf + 1);
3269 		return ROFF_IGN;
3270 	}
3271 
3272 	while (isspace((unsigned char)buf->buf[pos]))
3273 		pos++;
3274 
3275 	/*
3276 	 * Arm the input line trap.
3277 	 * Special-casing "an-trap" is an ugly workaround to cope
3278 	 * with DocBook stupidly fiddling with man(7) internals.
3279 	 */
3280 
3281 	roffit_lines = iv;
3282 	roffit_macro = mandoc_strdup(iv != 1 ||
3283 	    strcmp(buf->buf + pos, "an-trap") ?
3284 	    buf->buf + pos : "br");
3285 	return ROFF_IGN;
3286 }
3287 
3288 static int
3289 roff_Dd(ROFF_ARGS)
3290 {
3291 	int		 mask;
3292 	enum roff_tok	 t, te;
3293 
3294 	switch (tok) {
3295 	case ROFF_Dd:
3296 		tok = MDOC_Dd;
3297 		te = MDOC_MAX;
3298 		if (r->format == 0)
3299 			r->format = MPARSE_MDOC;
3300 		mask = MPARSE_MDOC | MPARSE_QUICK;
3301 		break;
3302 	case ROFF_TH:
3303 		tok = MAN_TH;
3304 		te = MAN_MAX;
3305 		if (r->format == 0)
3306 			r->format = MPARSE_MAN;
3307 		mask = MPARSE_QUICK;
3308 		break;
3309 	default:
3310 		abort();
3311 	}
3312 	if ((r->options & mask) == 0)
3313 		for (t = tok; t < te; t++)
3314 			roff_setstr(r, roff_name[t], NULL, 0);
3315 	return ROFF_CONT;
3316 }
3317 
3318 static int
3319 roff_TE(ROFF_ARGS)
3320 {
3321 	r->man->flags &= ~ROFF_NONOFILL;
3322 	if (r->tbl == NULL) {
3323 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3324 		return ROFF_IGN;
3325 	}
3326 	if (tbl_end(r->tbl, 0) == 0) {
3327 		r->tbl = NULL;
3328 		free(buf->buf);
3329 		buf->buf = mandoc_strdup(".sp");
3330 		buf->sz = 4;
3331 		*offs = 0;
3332 		return ROFF_REPARSE;
3333 	}
3334 	r->tbl = NULL;
3335 	return ROFF_IGN;
3336 }
3337 
3338 static int
3339 roff_T_(ROFF_ARGS)
3340 {
3341 
3342 	if (NULL == r->tbl)
3343 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3344 	else
3345 		tbl_restart(ln, ppos, r->tbl);
3346 
3347 	return ROFF_IGN;
3348 }
3349 
3350 /*
3351  * Handle in-line equation delimiters.
3352  */
3353 static int
3354 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3355 {
3356 	char		*cp1, *cp2;
3357 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3358 
3359 	/*
3360 	 * Outside equations, look for an opening delimiter.
3361 	 * If we are inside an equation, we already know it is
3362 	 * in-line, or this function wouldn't have been called;
3363 	 * so look for a closing delimiter.
3364 	 */
3365 
3366 	cp1 = buf->buf + pos;
3367 	cp2 = strchr(cp1, r->eqn == NULL ?
3368 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3369 	if (cp2 == NULL)
3370 		return ROFF_CONT;
3371 
3372 	*cp2++ = '\0';
3373 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3374 
3375 	/* Handle preceding text, protecting whitespace. */
3376 
3377 	if (*buf->buf != '\0') {
3378 		if (r->eqn == NULL)
3379 			bef_pr = "\\&";
3380 		bef_nl = "\n";
3381 	}
3382 
3383 	/*
3384 	 * Prepare replacing the delimiter with an equation macro
3385 	 * and drop leading white space from the equation.
3386 	 */
3387 
3388 	if (r->eqn == NULL) {
3389 		while (*cp2 == ' ')
3390 			cp2++;
3391 		mac = ".EQ";
3392 	} else
3393 		mac = ".EN";
3394 
3395 	/* Handle following text, protecting whitespace. */
3396 
3397 	if (*cp2 != '\0') {
3398 		aft_nl = "\n";
3399 		if (r->eqn != NULL)
3400 			aft_pr = "\\&";
3401 	}
3402 
3403 	/* Do the actual replacement. */
3404 
3405 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3406 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3407 	free(buf->buf);
3408 	buf->buf = cp1;
3409 
3410 	/* Toggle the in-line state of the eqn subsystem. */
3411 
3412 	r->eqn_inline = r->eqn == NULL;
3413 	return ROFF_REPARSE;
3414 }
3415 
3416 static int
3417 roff_EQ(ROFF_ARGS)
3418 {
3419 	struct roff_node	*n;
3420 
3421 	if (r->man->meta.macroset == MACROSET_MAN)
3422 		man_breakscope(r->man, ROFF_EQ);
3423 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3424 	if (ln > r->man->last->line)
3425 		n->flags |= NODE_LINE;
3426 	n->eqn = eqn_box_new();
3427 	roff_node_append(r->man, n);
3428 	r->man->next = ROFF_NEXT_SIBLING;
3429 
3430 	assert(r->eqn == NULL);
3431 	if (r->last_eqn == NULL)
3432 		r->last_eqn = eqn_alloc();
3433 	else
3434 		eqn_reset(r->last_eqn);
3435 	r->eqn = r->last_eqn;
3436 	r->eqn->node = n;
3437 
3438 	if (buf->buf[pos] != '\0')
3439 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3440 		    ".EQ %s", buf->buf + pos);
3441 
3442 	return ROFF_IGN;
3443 }
3444 
3445 static int
3446 roff_EN(ROFF_ARGS)
3447 {
3448 	if (r->eqn != NULL) {
3449 		eqn_parse(r->eqn);
3450 		r->eqn = NULL;
3451 	} else
3452 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3453 	if (buf->buf[pos] != '\0')
3454 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3455 		    "EN %s", buf->buf + pos);
3456 	return ROFF_IGN;
3457 }
3458 
3459 static int
3460 roff_TS(ROFF_ARGS)
3461 {
3462 	if (r->tbl != NULL) {
3463 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3464 		tbl_end(r->tbl, 0);
3465 	}
3466 	r->man->flags |= ROFF_NONOFILL;
3467 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3468 	if (r->last_tbl == NULL)
3469 		r->first_tbl = r->tbl;
3470 	r->last_tbl = r->tbl;
3471 	return ROFF_IGN;
3472 }
3473 
3474 static int
3475 roff_noarg(ROFF_ARGS)
3476 {
3477 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3478 		man_breakscope(r->man, tok);
3479 	if (tok == ROFF_brp)
3480 		tok = ROFF_br;
3481 	roff_elem_alloc(r->man, ln, ppos, tok);
3482 	if (buf->buf[pos] != '\0')
3483 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3484 		   "%s %s", roff_name[tok], buf->buf + pos);
3485 	if (tok == ROFF_nf)
3486 		r->man->flags |= ROFF_NOFILL;
3487 	else if (tok == ROFF_fi)
3488 		r->man->flags &= ~ROFF_NOFILL;
3489 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3490 	r->man->next = ROFF_NEXT_SIBLING;
3491 	return ROFF_IGN;
3492 }
3493 
3494 static int
3495 roff_onearg(ROFF_ARGS)
3496 {
3497 	struct roff_node	*n;
3498 	char			*cp;
3499 	int			 npos;
3500 
3501 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3502 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3503 	     tok == ROFF_ti))
3504 		man_breakscope(r->man, tok);
3505 
3506 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3507 		r->man->last = roffce_node;
3508 		r->man->next = ROFF_NEXT_SIBLING;
3509 	}
3510 
3511 	roff_elem_alloc(r->man, ln, ppos, tok);
3512 	n = r->man->last;
3513 
3514 	cp = buf->buf + pos;
3515 	if (*cp != '\0') {
3516 		while (*cp != '\0' && *cp != ' ')
3517 			cp++;
3518 		while (*cp == ' ')
3519 			*cp++ = '\0';
3520 		if (*cp != '\0')
3521 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3522 			    ln, (int)(cp - buf->buf),
3523 			    "%s ... %s", roff_name[tok], cp);
3524 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3525 	}
3526 
3527 	if (tok == ROFF_ce || tok == ROFF_rj) {
3528 		if (r->man->last->type == ROFFT_ELEM) {
3529 			roff_word_alloc(r->man, ln, pos, "1");
3530 			r->man->last->flags |= NODE_NOSRC;
3531 		}
3532 		npos = 0;
3533 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3534 		    &roffce_lines, 0) == 0) {
3535 			mandoc_msg(MANDOCERR_CE_NONUM,
3536 			    ln, pos, "ce %s", buf->buf + pos);
3537 			roffce_lines = 1;
3538 		}
3539 		if (roffce_lines < 1) {
3540 			r->man->last = r->man->last->parent;
3541 			roffce_node = NULL;
3542 			roffce_lines = 0;
3543 		} else
3544 			roffce_node = r->man->last->parent;
3545 	} else {
3546 		n->flags |= NODE_VALID | NODE_ENDED;
3547 		r->man->last = n;
3548 	}
3549 	n->flags |= NODE_LINE;
3550 	r->man->next = ROFF_NEXT_SIBLING;
3551 	return ROFF_IGN;
3552 }
3553 
3554 static int
3555 roff_manyarg(ROFF_ARGS)
3556 {
3557 	struct roff_node	*n;
3558 	char			*sp, *ep;
3559 
3560 	roff_elem_alloc(r->man, ln, ppos, tok);
3561 	n = r->man->last;
3562 
3563 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3564 		while (*ep != '\0' && *ep != ' ')
3565 			ep++;
3566 		while (*ep == ' ')
3567 			*ep++ = '\0';
3568 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3569 	}
3570 
3571 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3572 	r->man->last = n;
3573 	r->man->next = ROFF_NEXT_SIBLING;
3574 	return ROFF_IGN;
3575 }
3576 
3577 static int
3578 roff_als(ROFF_ARGS)
3579 {
3580 	char		*oldn, *newn, *end, *value;
3581 	size_t		 oldsz, newsz, valsz;
3582 
3583 	newn = oldn = buf->buf + pos;
3584 	if (*newn == '\0')
3585 		return ROFF_IGN;
3586 
3587 	newsz = roff_getname(r, &oldn, ln, pos);
3588 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3589 		return ROFF_IGN;
3590 
3591 	end = oldn;
3592 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3593 	if (oldsz == 0)
3594 		return ROFF_IGN;
3595 
3596 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3597 	    (int)oldsz, oldn);
3598 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3599 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3600 	free(value);
3601 	return ROFF_IGN;
3602 }
3603 
3604 /*
3605  * The .break request only makes sense inside conditionals,
3606  * and that case is already handled in roff_cond_sub().
3607  */
3608 static int
3609 roff_break(ROFF_ARGS)
3610 {
3611 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3612 	return ROFF_IGN;
3613 }
3614 
3615 static int
3616 roff_cc(ROFF_ARGS)
3617 {
3618 	const char	*p;
3619 
3620 	p = buf->buf + pos;
3621 
3622 	if (*p == '\0' || (r->control = *p++) == '.')
3623 		r->control = '\0';
3624 
3625 	if (*p != '\0')
3626 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3627 		    ln, p - buf->buf, "cc ... %s", p);
3628 
3629 	return ROFF_IGN;
3630 }
3631 
3632 static int
3633 roff_char(ROFF_ARGS)
3634 {
3635 	const char	*p, *kp, *vp;
3636 	size_t		 ksz, vsz;
3637 	int		 font;
3638 
3639 	/* Parse the character to be replaced. */
3640 
3641 	kp = buf->buf + pos;
3642 	p = kp + 1;
3643 	if (*kp == '\0' || (*kp == '\\' &&
3644 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3645 	    (*p != ' ' && *p != '\0')) {
3646 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3647 		return ROFF_IGN;
3648 	}
3649 	ksz = p - kp;
3650 	while (*p == ' ')
3651 		p++;
3652 
3653 	/*
3654 	 * If the replacement string contains a font escape sequence,
3655 	 * we have to restore the font at the end.
3656 	 */
3657 
3658 	vp = p;
3659 	vsz = strlen(p);
3660 	font = 0;
3661 	while (*p != '\0') {
3662 		if (*p++ != '\\')
3663 			continue;
3664 		switch (mandoc_escape(&p, NULL, NULL)) {
3665 		case ESCAPE_FONT:
3666 		case ESCAPE_FONTROMAN:
3667 		case ESCAPE_FONTITALIC:
3668 		case ESCAPE_FONTBOLD:
3669 		case ESCAPE_FONTBI:
3670 		case ESCAPE_FONTCR:
3671 		case ESCAPE_FONTCB:
3672 		case ESCAPE_FONTCI:
3673 		case ESCAPE_FONTPREV:
3674 			font++;
3675 			break;
3676 		default:
3677 			break;
3678 		}
3679 	}
3680 	if (font > 1)
3681 		mandoc_msg(MANDOCERR_CHAR_FONT,
3682 		    ln, (int)(vp - buf->buf), "%s", vp);
3683 
3684 	/*
3685 	 * Approximate the effect of .char using the .tr tables.
3686 	 * XXX In groff, .char and .tr interact differently.
3687 	 */
3688 
3689 	if (ksz == 1) {
3690 		if (r->xtab == NULL)
3691 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3692 		assert((unsigned int)*kp < 128);
3693 		free(r->xtab[(int)*kp].p);
3694 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3695 		    "%s%s", vp, font ? "\fP" : "");
3696 	} else {
3697 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3698 		if (font)
3699 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3700 	}
3701 	return ROFF_IGN;
3702 }
3703 
3704 static int
3705 roff_ec(ROFF_ARGS)
3706 {
3707 	const char	*p;
3708 
3709 	p = buf->buf + pos;
3710 	if (*p == '\0')
3711 		r->escape = '\\';
3712 	else {
3713 		r->escape = *p;
3714 		if (*++p != '\0')
3715 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3716 			    (int)(p - buf->buf), "ec ... %s", p);
3717 	}
3718 	return ROFF_IGN;
3719 }
3720 
3721 static int
3722 roff_eo(ROFF_ARGS)
3723 {
3724 	r->escape = '\0';
3725 	if (buf->buf[pos] != '\0')
3726 		mandoc_msg(MANDOCERR_ARG_SKIP,
3727 		    ln, pos, "eo %s", buf->buf + pos);
3728 	return ROFF_IGN;
3729 }
3730 
3731 static int
3732 roff_nop(ROFF_ARGS)
3733 {
3734 	while (buf->buf[pos] == ' ')
3735 		pos++;
3736 	*offs = pos;
3737 	return ROFF_RERUN;
3738 }
3739 
3740 static int
3741 roff_tr(ROFF_ARGS)
3742 {
3743 	const char	*p, *first, *second;
3744 	size_t		 fsz, ssz;
3745 	enum mandoc_esc	 esc;
3746 
3747 	p = buf->buf + pos;
3748 
3749 	if (*p == '\0') {
3750 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3751 		return ROFF_IGN;
3752 	}
3753 
3754 	while (*p != '\0') {
3755 		fsz = ssz = 1;
3756 
3757 		first = p++;
3758 		if (*first == '\\') {
3759 			esc = mandoc_escape(&p, NULL, NULL);
3760 			if (esc == ESCAPE_ERROR) {
3761 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3762 				    (int)(p - buf->buf), "%s", first);
3763 				return ROFF_IGN;
3764 			}
3765 			fsz = (size_t)(p - first);
3766 		}
3767 
3768 		second = p++;
3769 		if (*second == '\\') {
3770 			esc = mandoc_escape(&p, NULL, NULL);
3771 			if (esc == ESCAPE_ERROR) {
3772 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3773 				    (int)(p - buf->buf), "%s", second);
3774 				return ROFF_IGN;
3775 			}
3776 			ssz = (size_t)(p - second);
3777 		} else if (*second == '\0') {
3778 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3779 			    (int)(first - buf->buf), "tr %s", first);
3780 			second = " ";
3781 			p--;
3782 		}
3783 
3784 		if (fsz > 1) {
3785 			roff_setstrn(&r->xmbtab, first, fsz,
3786 			    second, ssz, 0);
3787 			continue;
3788 		}
3789 
3790 		if (r->xtab == NULL)
3791 			r->xtab = mandoc_calloc(128,
3792 			    sizeof(struct roffstr));
3793 
3794 		free(r->xtab[(int)*first].p);
3795 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3796 		r->xtab[(int)*first].sz = ssz;
3797 	}
3798 
3799 	return ROFF_IGN;
3800 }
3801 
3802 /*
3803  * Implementation of the .return request.
3804  * There is no need to call roff_userret() from here.
3805  * The read module will call that after rewinding the reader stack
3806  * to the place from where the current macro was called.
3807  */
3808 static int
3809 roff_return(ROFF_ARGS)
3810 {
3811 	if (r->mstackpos >= 0)
3812 		return ROFF_IGN | ROFF_USERRET;
3813 
3814 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3815 	return ROFF_IGN;
3816 }
3817 
3818 static int
3819 roff_rn(ROFF_ARGS)
3820 {
3821 	const char	*value;
3822 	char		*oldn, *newn, *end;
3823 	size_t		 oldsz, newsz;
3824 	int		 deftype;
3825 
3826 	oldn = newn = buf->buf + pos;
3827 	if (*oldn == '\0')
3828 		return ROFF_IGN;
3829 
3830 	oldsz = roff_getname(r, &newn, ln, pos);
3831 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3832 		return ROFF_IGN;
3833 
3834 	end = newn;
3835 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3836 	if (newsz == 0)
3837 		return ROFF_IGN;
3838 
3839 	deftype = ROFFDEF_ANY;
3840 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3841 	switch (deftype) {
3842 	case ROFFDEF_USER:
3843 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3844 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3845 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3846 		break;
3847 	case ROFFDEF_PRE:
3848 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3849 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3850 		break;
3851 	case ROFFDEF_REN:
3852 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3853 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3854 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3855 		break;
3856 	case ROFFDEF_STD:
3857 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3858 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3859 		break;
3860 	default:
3861 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3863 		break;
3864 	}
3865 	return ROFF_IGN;
3866 }
3867 
3868 static int
3869 roff_shift(ROFF_ARGS)
3870 {
3871 	struct mctx	*ctx;
3872 	int		 levels, i;
3873 
3874 	levels = 1;
3875 	if (buf->buf[pos] != '\0' &&
3876 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3877 		mandoc_msg(MANDOCERR_CE_NONUM,
3878 		    ln, pos, "shift %s", buf->buf + pos);
3879 		levels = 1;
3880 	}
3881 	if (r->mstackpos < 0) {
3882 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3883 		return ROFF_IGN;
3884 	}
3885 	ctx = r->mstack + r->mstackpos;
3886 	if (levels > ctx->argc) {
3887 		mandoc_msg(MANDOCERR_SHIFT,
3888 		    ln, pos, "%d, but max is %d", levels, ctx->argc);
3889 		levels = ctx->argc;
3890 	}
3891 	if (levels == 0)
3892 		return ROFF_IGN;
3893 	for (i = 0; i < levels; i++)
3894 		free(ctx->argv[i]);
3895 	ctx->argc -= levels;
3896 	for (i = 0; i < ctx->argc; i++)
3897 		ctx->argv[i] = ctx->argv[i + levels];
3898 	return ROFF_IGN;
3899 }
3900 
3901 static int
3902 roff_so(ROFF_ARGS)
3903 {
3904 	char *name, *cp;
3905 
3906 	name = buf->buf + pos;
3907 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3908 
3909 	/*
3910 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3911 	 * opening anything that's not in our cwd or anything beneath
3912 	 * it.  Thus, explicitly disallow traversing up the file-system
3913 	 * or using absolute paths.
3914 	 */
3915 
3916 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3917 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3918 		buf->sz = mandoc_asprintf(&cp,
3919 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3920 		free(buf->buf);
3921 		buf->buf = cp;
3922 		*offs = 0;
3923 		return ROFF_REPARSE;
3924 	}
3925 
3926 	*offs = pos;
3927 	return ROFF_SO;
3928 }
3929 
3930 /* --- user defined strings and macros ------------------------------------ */
3931 
3932 static int
3933 roff_userdef(ROFF_ARGS)
3934 {
3935 	struct mctx	 *ctx;
3936 	char		 *arg, *ap, *dst, *src;
3937 	size_t		  sz;
3938 
3939 	/* If the macro is empty, ignore it altogether. */
3940 
3941 	if (*r->current_string == '\0')
3942 		return ROFF_IGN;
3943 
3944 	/* Initialize a new macro stack context. */
3945 
3946 	if (++r->mstackpos == r->mstacksz) {
3947 		r->mstack = mandoc_recallocarray(r->mstack,
3948 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3949 		r->mstacksz += 8;
3950 	}
3951 	ctx = r->mstack + r->mstackpos;
3952 	ctx->argsz = 0;
3953 	ctx->argc = 0;
3954 	ctx->argv = NULL;
3955 
3956 	/*
3957 	 * Collect pointers to macro argument strings,
3958 	 * NUL-terminating them and escaping quotes.
3959 	 */
3960 
3961 	src = buf->buf + pos;
3962 	while (*src != '\0') {
3963 		if (ctx->argc == ctx->argsz) {
3964 			ctx->argsz += 8;
3965 			ctx->argv = mandoc_reallocarray(ctx->argv,
3966 			    ctx->argsz, sizeof(*ctx->argv));
3967 		}
3968 		arg = roff_getarg(r, &src, ln, &pos);
3969 		sz = 1;  /* For the terminating NUL. */
3970 		for (ap = arg; *ap != '\0'; ap++)
3971 			sz += *ap == '"' ? 4 : 1;
3972 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3973 		for (ap = arg; *ap != '\0'; ap++) {
3974 			if (*ap == '"') {
3975 				memcpy(dst, "\\(dq", 4);
3976 				dst += 4;
3977 			} else
3978 				*dst++ = *ap;
3979 		}
3980 		*dst = '\0';
3981 		free(arg);
3982 	}
3983 
3984 	/* Replace the macro invocation by the macro definition. */
3985 
3986 	free(buf->buf);
3987 	buf->buf = mandoc_strdup(r->current_string);
3988 	buf->sz = strlen(buf->buf) + 1;
3989 	*offs = 0;
3990 
3991 	return buf->buf[buf->sz - 2] == '\n' ?
3992 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3993 }
3994 
3995 /*
3996  * Calling a high-level macro that was renamed with .rn.
3997  * r->current_string has already been set up by roff_parse().
3998  */
3999 static int
4000 roff_renamed(ROFF_ARGS)
4001 {
4002 	char	*nbuf;
4003 
4004 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4005 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4006 	free(buf->buf);
4007 	buf->buf = nbuf;
4008 	*offs = 0;
4009 	return ROFF_CONT;
4010 }
4011 
4012 /*
4013  * Measure the length in bytes of the roff identifier at *cpp
4014  * and advance the pointer to the next word.
4015  */
4016 static size_t
4017 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4018 {
4019 	char	 *name, *cp;
4020 	size_t	  namesz;
4021 
4022 	name = *cpp;
4023 	if (*name == '\0')
4024 		return 0;
4025 
4026 	/* Advance cp to the byte after the end of the name. */
4027 
4028 	for (cp = name; 1; cp++) {
4029 		namesz = cp - name;
4030 		if (*cp == '\0')
4031 			break;
4032 		if (*cp == ' ' || *cp == '\t') {
4033 			cp++;
4034 			break;
4035 		}
4036 		if (*cp != '\\')
4037 			continue;
4038 		if (cp[1] == '{' || cp[1] == '}')
4039 			break;
4040 		if (*++cp == '\\')
4041 			continue;
4042 		mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4043 		    "%.*s", (int)(cp - name + 1), name);
4044 		mandoc_escape((const char **)&cp, NULL, NULL);
4045 		break;
4046 	}
4047 
4048 	/* Read past spaces. */
4049 
4050 	while (*cp == ' ')
4051 		cp++;
4052 
4053 	*cpp = cp;
4054 	return namesz;
4055 }
4056 
4057 /*
4058  * Store *string into the user-defined string called *name.
4059  * To clear an existing entry, call with (*r, *name, NULL, 0).
4060  * append == 0: replace mode
4061  * append == 1: single-line append mode
4062  * append == 2: multiline append mode, append '\n' after each call
4063  */
4064 static void
4065 roff_setstr(struct roff *r, const char *name, const char *string,
4066 	int append)
4067 {
4068 	size_t	 namesz;
4069 
4070 	namesz = strlen(name);
4071 	roff_setstrn(&r->strtab, name, namesz, string,
4072 	    string ? strlen(string) : 0, append);
4073 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4074 }
4075 
4076 static void
4077 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4078 		const char *string, size_t stringsz, int append)
4079 {
4080 	struct roffkv	*n;
4081 	char		*c;
4082 	int		 i;
4083 	size_t		 oldch, newch;
4084 
4085 	/* Search for an existing string with the same name. */
4086 	n = *r;
4087 
4088 	while (n && (namesz != n->key.sz ||
4089 			strncmp(n->key.p, name, namesz)))
4090 		n = n->next;
4091 
4092 	if (NULL == n) {
4093 		/* Create a new string table entry. */
4094 		n = mandoc_malloc(sizeof(struct roffkv));
4095 		n->key.p = mandoc_strndup(name, namesz);
4096 		n->key.sz = namesz;
4097 		n->val.p = NULL;
4098 		n->val.sz = 0;
4099 		n->next = *r;
4100 		*r = n;
4101 	} else if (0 == append) {
4102 		free(n->val.p);
4103 		n->val.p = NULL;
4104 		n->val.sz = 0;
4105 	}
4106 
4107 	if (NULL == string)
4108 		return;
4109 
4110 	/*
4111 	 * One additional byte for the '\n' in multiline mode,
4112 	 * and one for the terminating '\0'.
4113 	 */
4114 	newch = stringsz + (1 < append ? 2u : 1u);
4115 
4116 	if (NULL == n->val.p) {
4117 		n->val.p = mandoc_malloc(newch);
4118 		*n->val.p = '\0';
4119 		oldch = 0;
4120 	} else {
4121 		oldch = n->val.sz;
4122 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4123 	}
4124 
4125 	/* Skip existing content in the destination buffer. */
4126 	c = n->val.p + (int)oldch;
4127 
4128 	/* Append new content to the destination buffer. */
4129 	i = 0;
4130 	while (i < (int)stringsz) {
4131 		/*
4132 		 * Rudimentary roff copy mode:
4133 		 * Handle escaped backslashes.
4134 		 */
4135 		if ('\\' == string[i] && '\\' == string[i + 1])
4136 			i++;
4137 		*c++ = string[i++];
4138 	}
4139 
4140 	/* Append terminating bytes. */
4141 	if (1 < append)
4142 		*c++ = '\n';
4143 
4144 	*c = '\0';
4145 	n->val.sz = (int)(c - n->val.p);
4146 }
4147 
4148 static const char *
4149 roff_getstrn(struct roff *r, const char *name, size_t len,
4150     int *deftype)
4151 {
4152 	const struct roffkv	*n;
4153 	int			 found, i;
4154 	enum roff_tok		 tok;
4155 
4156 	found = 0;
4157 	for (n = r->strtab; n != NULL; n = n->next) {
4158 		if (strncmp(name, n->key.p, len) != 0 ||
4159 		    n->key.p[len] != '\0' || n->val.p == NULL)
4160 			continue;
4161 		if (*deftype & ROFFDEF_USER) {
4162 			*deftype = ROFFDEF_USER;
4163 			return n->val.p;
4164 		} else {
4165 			found = 1;
4166 			break;
4167 		}
4168 	}
4169 	for (n = r->rentab; n != NULL; n = n->next) {
4170 		if (strncmp(name, n->key.p, len) != 0 ||
4171 		    n->key.p[len] != '\0' || n->val.p == NULL)
4172 			continue;
4173 		if (*deftype & ROFFDEF_REN) {
4174 			*deftype = ROFFDEF_REN;
4175 			return n->val.p;
4176 		} else {
4177 			found = 1;
4178 			break;
4179 		}
4180 	}
4181 	for (i = 0; i < PREDEFS_MAX; i++) {
4182 		if (strncmp(name, predefs[i].name, len) != 0 ||
4183 		    predefs[i].name[len] != '\0')
4184 			continue;
4185 		if (*deftype & ROFFDEF_PRE) {
4186 			*deftype = ROFFDEF_PRE;
4187 			return predefs[i].str;
4188 		} else {
4189 			found = 1;
4190 			break;
4191 		}
4192 	}
4193 	if (r->man->meta.macroset != MACROSET_MAN) {
4194 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4195 			if (strncmp(name, roff_name[tok], len) != 0 ||
4196 			    roff_name[tok][len] != '\0')
4197 				continue;
4198 			if (*deftype & ROFFDEF_STD) {
4199 				*deftype = ROFFDEF_STD;
4200 				return NULL;
4201 			} else {
4202 				found = 1;
4203 				break;
4204 			}
4205 		}
4206 	}
4207 	if (r->man->meta.macroset != MACROSET_MDOC) {
4208 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4209 			if (strncmp(name, roff_name[tok], len) != 0 ||
4210 			    roff_name[tok][len] != '\0')
4211 				continue;
4212 			if (*deftype & ROFFDEF_STD) {
4213 				*deftype = ROFFDEF_STD;
4214 				return NULL;
4215 			} else {
4216 				found = 1;
4217 				break;
4218 			}
4219 		}
4220 	}
4221 
4222 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4223 		if (*deftype & ROFFDEF_REN) {
4224 			/*
4225 			 * This might still be a request,
4226 			 * so do not treat it as undefined yet.
4227 			 */
4228 			*deftype = ROFFDEF_UNDEF;
4229 			return NULL;
4230 		}
4231 
4232 		/* Using an undefined string defines it to be empty. */
4233 
4234 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4235 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4236 	}
4237 
4238 	*deftype = 0;
4239 	return NULL;
4240 }
4241 
4242 static void
4243 roff_freestr(struct roffkv *r)
4244 {
4245 	struct roffkv	 *n, *nn;
4246 
4247 	for (n = r; n; n = nn) {
4248 		free(n->key.p);
4249 		free(n->val.p);
4250 		nn = n->next;
4251 		free(n);
4252 	}
4253 }
4254 
4255 /* --- accessors and utility functions ------------------------------------ */
4256 
4257 /*
4258  * Duplicate an input string, making the appropriate character
4259  * conversations (as stipulated by `tr') along the way.
4260  * Returns a heap-allocated string with all the replacements made.
4261  */
4262 char *
4263 roff_strdup(const struct roff *r, const char *p)
4264 {
4265 	const struct roffkv *cp;
4266 	char		*res;
4267 	const char	*pp;
4268 	size_t		 ssz, sz;
4269 	enum mandoc_esc	 esc;
4270 
4271 	if (NULL == r->xmbtab && NULL == r->xtab)
4272 		return mandoc_strdup(p);
4273 	else if ('\0' == *p)
4274 		return mandoc_strdup("");
4275 
4276 	/*
4277 	 * Step through each character looking for term matches
4278 	 * (remember that a `tr' can be invoked with an escape, which is
4279 	 * a glyph but the escape is multi-character).
4280 	 * We only do this if the character hash has been initialised
4281 	 * and the string is >0 length.
4282 	 */
4283 
4284 	res = NULL;
4285 	ssz = 0;
4286 
4287 	while ('\0' != *p) {
4288 		assert((unsigned int)*p < 128);
4289 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4290 			sz = r->xtab[(int)*p].sz;
4291 			res = mandoc_realloc(res, ssz + sz + 1);
4292 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4293 			ssz += sz;
4294 			p++;
4295 			continue;
4296 		} else if ('\\' != *p) {
4297 			res = mandoc_realloc(res, ssz + 2);
4298 			res[ssz++] = *p++;
4299 			continue;
4300 		}
4301 
4302 		/* Search for term matches. */
4303 		for (cp = r->xmbtab; cp; cp = cp->next)
4304 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4305 				break;
4306 
4307 		if (NULL != cp) {
4308 			/*
4309 			 * A match has been found.
4310 			 * Append the match to the array and move
4311 			 * forward by its keysize.
4312 			 */
4313 			res = mandoc_realloc(res,
4314 			    ssz + cp->val.sz + 1);
4315 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4316 			ssz += cp->val.sz;
4317 			p += (int)cp->key.sz;
4318 			continue;
4319 		}
4320 
4321 		/*
4322 		 * Handle escapes carefully: we need to copy
4323 		 * over just the escape itself, or else we might
4324 		 * do replacements within the escape itself.
4325 		 * Make sure to pass along the bogus string.
4326 		 */
4327 		pp = p++;
4328 		esc = mandoc_escape(&p, NULL, NULL);
4329 		if (ESCAPE_ERROR == esc) {
4330 			sz = strlen(pp);
4331 			res = mandoc_realloc(res, ssz + sz + 1);
4332 			memcpy(res + ssz, pp, sz);
4333 			break;
4334 		}
4335 		/*
4336 		 * We bail out on bad escapes.
4337 		 * No need to warn: we already did so when
4338 		 * roff_expand() was called.
4339 		 */
4340 		sz = (int)(p - pp);
4341 		res = mandoc_realloc(res, ssz + sz + 1);
4342 		memcpy(res + ssz, pp, sz);
4343 		ssz += sz;
4344 	}
4345 
4346 	res[(int)ssz] = '\0';
4347 	return res;
4348 }
4349 
4350 int
4351 roff_getformat(const struct roff *r)
4352 {
4353 
4354 	return r->format;
4355 }
4356 
4357 /*
4358  * Find out whether a line is a macro line or not.
4359  * If it is, adjust the current position and return one; if it isn't,
4360  * return zero and don't change the current position.
4361  * If the control character has been set with `.cc', then let that grain
4362  * precedence.
4363  * This is slighly contrary to groff, where using the non-breaking
4364  * control character when `cc' has been invoked will cause the
4365  * non-breaking macro contents to be printed verbatim.
4366  */
4367 int
4368 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4369 {
4370 	int		pos;
4371 
4372 	pos = *ppos;
4373 
4374 	if (r->control != '\0' && cp[pos] == r->control)
4375 		pos++;
4376 	else if (r->control != '\0')
4377 		return 0;
4378 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4379 		pos += 2;
4380 	else if ('.' == cp[pos] || '\'' == cp[pos])
4381 		pos++;
4382 	else
4383 		return 0;
4384 
4385 	while (' ' == cp[pos] || '\t' == cp[pos])
4386 		pos++;
4387 
4388 	*ppos = pos;
4389 	return 1;
4390 }
4391