xref: /illumos-gate/usr/src/cmd/mandoc/roff.c (revision 371584c2eae4cf827fd406ba26c14f021adaaa70)
1 /*	$Id: roff.c,v 1.284 2016/01/08 17:48:10 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "roff.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libroff.h"
35 
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define	EXPAND_LIMIT	1000
38 
39 /* --- data types --------------------------------------------------------- */
40 
41 enum	rofft {
42 	ROFF_ab,
43 	ROFF_ad,
44 	ROFF_af,
45 	ROFF_aln,
46 	ROFF_als,
47 	ROFF_am,
48 	ROFF_am1,
49 	ROFF_ami,
50 	ROFF_ami1,
51 	ROFF_as,
52 	ROFF_as1,
53 	ROFF_asciify,
54 	ROFF_backtrace,
55 	ROFF_bd,
56 	ROFF_bleedat,
57 	ROFF_blm,
58 	ROFF_box,
59 	ROFF_boxa,
60 	ROFF_bp,
61 	ROFF_BP,
62 	/* MAN_br, MDOC_br */
63 	ROFF_break,
64 	ROFF_breakchar,
65 	ROFF_brnl,
66 	ROFF_brp,
67 	ROFF_brpnl,
68 	ROFF_c2,
69 	ROFF_cc,
70 	ROFF_ce,
71 	ROFF_cf,
72 	ROFF_cflags,
73 	ROFF_ch,
74 	ROFF_char,
75 	ROFF_chop,
76 	ROFF_class,
77 	ROFF_close,
78 	ROFF_CL,
79 	ROFF_color,
80 	ROFF_composite,
81 	ROFF_continue,
82 	ROFF_cp,
83 	ROFF_cropat,
84 	ROFF_cs,
85 	ROFF_cu,
86 	ROFF_da,
87 	ROFF_dch,
88 	ROFF_Dd,
89 	ROFF_de,
90 	ROFF_de1,
91 	ROFF_defcolor,
92 	ROFF_dei,
93 	ROFF_dei1,
94 	ROFF_device,
95 	ROFF_devicem,
96 	ROFF_di,
97 	ROFF_do,
98 	ROFF_ds,
99 	ROFF_ds1,
100 	ROFF_dwh,
101 	ROFF_dt,
102 	ROFF_ec,
103 	ROFF_ecr,
104 	ROFF_ecs,
105 	ROFF_el,
106 	ROFF_em,
107 	ROFF_EN,
108 	ROFF_eo,
109 	ROFF_EP,
110 	ROFF_EQ,
111 	ROFF_errprint,
112 	ROFF_ev,
113 	ROFF_evc,
114 	ROFF_ex,
115 	ROFF_fallback,
116 	ROFF_fam,
117 	ROFF_fc,
118 	ROFF_fchar,
119 	ROFF_fcolor,
120 	ROFF_fdeferlig,
121 	ROFF_feature,
122 	/* MAN_fi; ignored in mdoc(7) */
123 	ROFF_fkern,
124 	ROFF_fl,
125 	ROFF_flig,
126 	ROFF_fp,
127 	ROFF_fps,
128 	ROFF_fschar,
129 	ROFF_fspacewidth,
130 	ROFF_fspecial,
131 	/* MAN_ft; ignored in mdoc(7) */
132 	ROFF_ftr,
133 	ROFF_fzoom,
134 	ROFF_gcolor,
135 	ROFF_hc,
136 	ROFF_hcode,
137 	ROFF_hidechar,
138 	ROFF_hla,
139 	ROFF_hlm,
140 	ROFF_hpf,
141 	ROFF_hpfa,
142 	ROFF_hpfcode,
143 	ROFF_hw,
144 	ROFF_hy,
145 	ROFF_hylang,
146 	ROFF_hylen,
147 	ROFF_hym,
148 	ROFF_hypp,
149 	ROFF_hys,
150 	ROFF_ie,
151 	ROFF_if,
152 	ROFF_ig,
153 	/* MAN_in; ignored in mdoc(7) */
154 	ROFF_index,
155 	ROFF_it,
156 	ROFF_itc,
157 	ROFF_IX,
158 	ROFF_kern,
159 	ROFF_kernafter,
160 	ROFF_kernbefore,
161 	ROFF_kernpair,
162 	ROFF_lc,
163 	ROFF_lc_ctype,
164 	ROFF_lds,
165 	ROFF_length,
166 	ROFF_letadj,
167 	ROFF_lf,
168 	ROFF_lg,
169 	ROFF_lhang,
170 	ROFF_linetabs,
171 	/* MAN_ll, MDOC_ll */
172 	ROFF_lnr,
173 	ROFF_lnrf,
174 	ROFF_lpfx,
175 	ROFF_ls,
176 	ROFF_lsm,
177 	ROFF_lt,
178 	ROFF_mc,
179 	ROFF_mediasize,
180 	ROFF_minss,
181 	ROFF_mk,
182 	ROFF_mso,
183 	ROFF_na,
184 	ROFF_ne,
185 	/* MAN_nf; ignored in mdoc(7) */
186 	ROFF_nh,
187 	ROFF_nhychar,
188 	ROFF_nm,
189 	ROFF_nn,
190 	ROFF_nop,
191 	ROFF_nr,
192 	ROFF_nrf,
193 	ROFF_nroff,
194 	ROFF_ns,
195 	ROFF_nx,
196 	ROFF_open,
197 	ROFF_opena,
198 	ROFF_os,
199 	ROFF_output,
200 	ROFF_padj,
201 	ROFF_papersize,
202 	ROFF_pc,
203 	ROFF_pev,
204 	ROFF_pi,
205 	ROFF_PI,
206 	ROFF_pl,
207 	ROFF_pm,
208 	ROFF_pn,
209 	ROFF_pnr,
210 	ROFF_po,
211 	ROFF_ps,
212 	ROFF_psbb,
213 	ROFF_pshape,
214 	ROFF_pso,
215 	ROFF_ptr,
216 	ROFF_pvs,
217 	ROFF_rchar,
218 	ROFF_rd,
219 	ROFF_recursionlimit,
220 	ROFF_return,
221 	ROFF_rfschar,
222 	ROFF_rhang,
223 	ROFF_rj,
224 	ROFF_rm,
225 	ROFF_rn,
226 	ROFF_rnn,
227 	ROFF_rr,
228 	ROFF_rs,
229 	ROFF_rt,
230 	ROFF_schar,
231 	ROFF_sentchar,
232 	ROFF_shc,
233 	ROFF_shift,
234 	ROFF_sizes,
235 	ROFF_so,
236 	/* MAN_sp, MDOC_sp */
237 	ROFF_spacewidth,
238 	ROFF_special,
239 	ROFF_spreadwarn,
240 	ROFF_ss,
241 	ROFF_sty,
242 	ROFF_substring,
243 	ROFF_sv,
244 	ROFF_sy,
245 	ROFF_T_,
246 	ROFF_ta,
247 	ROFF_tc,
248 	ROFF_TE,
249 	ROFF_TH,
250 	ROFF_ti,
251 	ROFF_tkf,
252 	ROFF_tl,
253 	ROFF_tm,
254 	ROFF_tm1,
255 	ROFF_tmc,
256 	ROFF_tr,
257 	ROFF_track,
258 	ROFF_transchar,
259 	ROFF_trf,
260 	ROFF_trimat,
261 	ROFF_trin,
262 	ROFF_trnt,
263 	ROFF_troff,
264 	ROFF_TS,
265 	ROFF_uf,
266 	ROFF_ul,
267 	ROFF_unformat,
268 	ROFF_unwatch,
269 	ROFF_unwatchn,
270 	ROFF_vpt,
271 	ROFF_vs,
272 	ROFF_warn,
273 	ROFF_warnscale,
274 	ROFF_watch,
275 	ROFF_watchlength,
276 	ROFF_watchn,
277 	ROFF_wh,
278 	ROFF_while,
279 	ROFF_write,
280 	ROFF_writec,
281 	ROFF_writem,
282 	ROFF_xflag,
283 	ROFF_cblock,
284 	ROFF_USERDEF,
285 	ROFF_MAX
286 };
287 
288 /*
289  * An incredibly-simple string buffer.
290  */
291 struct	roffstr {
292 	char		*p; /* nil-terminated buffer */
293 	size_t		 sz; /* saved strlen(p) */
294 };
295 
296 /*
297  * A key-value roffstr pair as part of a singly-linked list.
298  */
299 struct	roffkv {
300 	struct roffstr	 key;
301 	struct roffstr	 val;
302 	struct roffkv	*next; /* next in list */
303 };
304 
305 /*
306  * A single number register as part of a singly-linked list.
307  */
308 struct	roffreg {
309 	struct roffstr	 key;
310 	int		 val;
311 	struct roffreg	*next;
312 };
313 
314 struct	roff {
315 	struct mparse	*parse; /* parse point */
316 	struct roffnode	*last; /* leaf of stack */
317 	int		*rstack; /* stack of inverted `ie' values */
318 	struct roffreg	*regtab; /* number registers */
319 	struct roffkv	*strtab; /* user-defined strings & macros */
320 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
321 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
322 	const char	*current_string; /* value of last called user macro */
323 	struct tbl_node	*first_tbl; /* first table parsed */
324 	struct tbl_node	*last_tbl; /* last table parsed */
325 	struct tbl_node	*tbl; /* current table being parsed */
326 	struct eqn_node	*last_eqn; /* last equation parsed */
327 	struct eqn_node	*first_eqn; /* first equation parsed */
328 	struct eqn_node	*eqn; /* current equation being parsed */
329 	int		 eqn_inline; /* current equation is inline */
330 	int		 options; /* parse options */
331 	int		 rstacksz; /* current size limit of rstack */
332 	int		 rstackpos; /* position in rstack */
333 	int		 format; /* current file in mdoc or man format */
334 	int		 argc; /* number of args of the last macro */
335 	char		 control; /* control character */
336 };
337 
338 struct	roffnode {
339 	enum rofft	 tok; /* type of node */
340 	struct roffnode	*parent; /* up one in stack */
341 	int		 line; /* parse line */
342 	int		 col; /* parse col */
343 	char		*name; /* node name, e.g. macro name */
344 	char		*end; /* end-rules: custom token */
345 	int		 endspan; /* end-rules: next-line or infty */
346 	int		 rule; /* current evaluation rule */
347 };
348 
349 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
350 			 enum rofft tok, /* tok of macro */ \
351 			 struct buf *buf, /* input buffer */ \
352 			 int ln, /* parse line */ \
353 			 int ppos, /* original pos in buffer */ \
354 			 int pos, /* current pos in buffer */ \
355 			 int *offs /* reset offset of buffer data */
356 
357 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
358 
359 struct	roffmac {
360 	const char	*name; /* macro name */
361 	roffproc	 proc; /* process new macro */
362 	roffproc	 text; /* process as child text of macro */
363 	roffproc	 sub; /* process as child of macro */
364 	int		 flags;
365 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
366 	struct roffmac	*next;
367 };
368 
369 struct	predef {
370 	const char	*name; /* predefined input name */
371 	const char	*str; /* replacement symbol */
372 };
373 
374 #define	PREDEF(__name, __str) \
375 	{ (__name), (__str) },
376 
377 /* --- function prototypes ------------------------------------------------ */
378 
379 static	enum rofft	 roffhash_find(const char *, size_t);
380 static	void		 roffhash_init(void);
381 static	void		 roffnode_cleanscope(struct roff *);
382 static	void		 roffnode_pop(struct roff *);
383 static	void		 roffnode_push(struct roff *, enum rofft,
384 				const char *, int, int);
385 static	enum rofferr	 roff_block(ROFF_ARGS);
386 static	enum rofferr	 roff_block_text(ROFF_ARGS);
387 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
388 static	enum rofferr	 roff_brp(ROFF_ARGS);
389 static	enum rofferr	 roff_cblock(ROFF_ARGS);
390 static	enum rofferr	 roff_cc(ROFF_ARGS);
391 static	void		 roff_ccond(struct roff *, int, int);
392 static	enum rofferr	 roff_cond(ROFF_ARGS);
393 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
394 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
395 static	enum rofferr	 roff_ds(ROFF_ARGS);
396 static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
397 static	int		 roff_evalcond(struct roff *r, int, char *, int *);
398 static	int		 roff_evalnum(struct roff *, int,
399 				const char *, int *, int *, int);
400 static	int		 roff_evalpar(struct roff *, int,
401 				const char *, int *, int *, int);
402 static	int		 roff_evalstrcond(const char *, int *);
403 static	void		 roff_free1(struct roff *);
404 static	void		 roff_freereg(struct roffreg *);
405 static	void		 roff_freestr(struct roffkv *);
406 static	size_t		 roff_getname(struct roff *, char **, int, int);
407 static	int		 roff_getnum(const char *, int *, int *, int);
408 static	int		 roff_getop(const char *, int *, char *);
409 static	int		 roff_getregn(const struct roff *,
410 				const char *, size_t);
411 static	int		 roff_getregro(const struct roff *,
412 				const char *name);
413 static	const char	*roff_getstrn(const struct roff *,
414 				const char *, size_t);
415 static	int		 roff_hasregn(const struct roff *,
416 				const char *, size_t);
417 static	enum rofferr	 roff_insec(ROFF_ARGS);
418 static	enum rofferr	 roff_it(ROFF_ARGS);
419 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
420 static	void		 roff_man_alloc1(struct roff_man *);
421 static	void		 roff_man_free1(struct roff_man *);
422 static	enum rofferr	 roff_nr(ROFF_ARGS);
423 static	enum rofft	 roff_parse(struct roff *, char *, int *,
424 				int, int);
425 static	enum rofferr	 roff_parsetext(struct buf *, int, int *);
426 static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
427 static	enum rofferr	 roff_rm(ROFF_ARGS);
428 static	enum rofferr	 roff_rr(ROFF_ARGS);
429 static	void		 roff_setstr(struct roff *,
430 				const char *, const char *, int);
431 static	void		 roff_setstrn(struct roffkv **, const char *,
432 				size_t, const char *, size_t, int);
433 static	enum rofferr	 roff_so(ROFF_ARGS);
434 static	enum rofferr	 roff_tr(ROFF_ARGS);
435 static	enum rofferr	 roff_Dd(ROFF_ARGS);
436 static	enum rofferr	 roff_TH(ROFF_ARGS);
437 static	enum rofferr	 roff_TE(ROFF_ARGS);
438 static	enum rofferr	 roff_TS(ROFF_ARGS);
439 static	enum rofferr	 roff_EQ(ROFF_ARGS);
440 static	enum rofferr	 roff_EN(ROFF_ARGS);
441 static	enum rofferr	 roff_T_(ROFF_ARGS);
442 static	enum rofferr	 roff_unsupp(ROFF_ARGS);
443 static	enum rofferr	 roff_userdef(ROFF_ARGS);
444 
445 /* --- constant data ------------------------------------------------------ */
446 
447 /* See roffhash_find() */
448 
449 #define	ASCII_HI	 126
450 #define	ASCII_LO	 33
451 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
452 
453 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
454 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
455 
456 static	struct roffmac	*hash[HASHWIDTH];
457 
458 static	struct roffmac	 roffs[ROFF_MAX] = {
459 	{ "ab", roff_unsupp, NULL, NULL, 0, NULL },
460 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
461 	{ "af", roff_line_ignore, NULL, NULL, 0, NULL },
462 	{ "aln", roff_unsupp, NULL, NULL, 0, NULL },
463 	{ "als", roff_unsupp, NULL, NULL, 0, NULL },
464 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
465 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
466 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
467 	{ "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
468 	{ "as", roff_ds, NULL, NULL, 0, NULL },
469 	{ "as1", roff_ds, NULL, NULL, 0, NULL },
470 	{ "asciify", roff_unsupp, NULL, NULL, 0, NULL },
471 	{ "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
472 	{ "bd", roff_line_ignore, NULL, NULL, 0, NULL },
473 	{ "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
474 	{ "blm", roff_unsupp, NULL, NULL, 0, NULL },
475 	{ "box", roff_unsupp, NULL, NULL, 0, NULL },
476 	{ "boxa", roff_unsupp, NULL, NULL, 0, NULL },
477 	{ "bp", roff_line_ignore, NULL, NULL, 0, NULL },
478 	{ "BP", roff_unsupp, NULL, NULL, 0, NULL },
479 	{ "break", roff_unsupp, NULL, NULL, 0, NULL },
480 	{ "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
481 	{ "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
482 	{ "brp", roff_brp, NULL, NULL, 0, NULL },
483 	{ "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
484 	{ "c2", roff_unsupp, NULL, NULL, 0, NULL },
485 	{ "cc", roff_cc, NULL, NULL, 0, NULL },
486 	{ "ce", roff_line_ignore, NULL, NULL, 0, NULL },
487 	{ "cf", roff_insec, NULL, NULL, 0, NULL },
488 	{ "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
489 	{ "ch", roff_line_ignore, NULL, NULL, 0, NULL },
490 	{ "char", roff_unsupp, NULL, NULL, 0, NULL },
491 	{ "chop", roff_unsupp, NULL, NULL, 0, NULL },
492 	{ "class", roff_line_ignore, NULL, NULL, 0, NULL },
493 	{ "close", roff_insec, NULL, NULL, 0, NULL },
494 	{ "CL", roff_unsupp, NULL, NULL, 0, NULL },
495 	{ "color", roff_line_ignore, NULL, NULL, 0, NULL },
496 	{ "composite", roff_unsupp, NULL, NULL, 0, NULL },
497 	{ "continue", roff_unsupp, NULL, NULL, 0, NULL },
498 	{ "cp", roff_line_ignore, NULL, NULL, 0, NULL },
499 	{ "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
500 	{ "cs", roff_line_ignore, NULL, NULL, 0, NULL },
501 	{ "cu", roff_line_ignore, NULL, NULL, 0, NULL },
502 	{ "da", roff_unsupp, NULL, NULL, 0, NULL },
503 	{ "dch", roff_unsupp, NULL, NULL, 0, NULL },
504 	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
505 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
506 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
507 	{ "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
508 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
509 	{ "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
510 	{ "device", roff_unsupp, NULL, NULL, 0, NULL },
511 	{ "devicem", roff_unsupp, NULL, NULL, 0, NULL },
512 	{ "di", roff_unsupp, NULL, NULL, 0, NULL },
513 	{ "do", roff_unsupp, NULL, NULL, 0, NULL },
514 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
515 	{ "ds1", roff_ds, NULL, NULL, 0, NULL },
516 	{ "dwh", roff_unsupp, NULL, NULL, 0, NULL },
517 	{ "dt", roff_unsupp, NULL, NULL, 0, NULL },
518 	{ "ec", roff_unsupp, NULL, NULL, 0, NULL },
519 	{ "ecr", roff_unsupp, NULL, NULL, 0, NULL },
520 	{ "ecs", roff_unsupp, NULL, NULL, 0, NULL },
521 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
522 	{ "em", roff_unsupp, NULL, NULL, 0, NULL },
523 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
524 	{ "eo", roff_unsupp, NULL, NULL, 0, NULL },
525 	{ "EP", roff_unsupp, NULL, NULL, 0, NULL },
526 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
527 	{ "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
528 	{ "ev", roff_unsupp, NULL, NULL, 0, NULL },
529 	{ "evc", roff_unsupp, NULL, NULL, 0, NULL },
530 	{ "ex", roff_unsupp, NULL, NULL, 0, NULL },
531 	{ "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
532 	{ "fam", roff_line_ignore, NULL, NULL, 0, NULL },
533 	{ "fc", roff_unsupp, NULL, NULL, 0, NULL },
534 	{ "fchar", roff_unsupp, NULL, NULL, 0, NULL },
535 	{ "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
536 	{ "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
537 	{ "feature", roff_line_ignore, NULL, NULL, 0, NULL },
538 	{ "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
539 	{ "fl", roff_line_ignore, NULL, NULL, 0, NULL },
540 	{ "flig", roff_line_ignore, NULL, NULL, 0, NULL },
541 	{ "fp", roff_line_ignore, NULL, NULL, 0, NULL },
542 	{ "fps", roff_line_ignore, NULL, NULL, 0, NULL },
543 	{ "fschar", roff_unsupp, NULL, NULL, 0, NULL },
544 	{ "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
545 	{ "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
546 	{ "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
547 	{ "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
548 	{ "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
549 	{ "hc", roff_line_ignore, NULL, NULL, 0, NULL },
550 	{ "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
551 	{ "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
552 	{ "hla", roff_line_ignore, NULL, NULL, 0, NULL },
553 	{ "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
554 	{ "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
555 	{ "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
556 	{ "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
557 	{ "hw", roff_line_ignore, NULL, NULL, 0, NULL },
558 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
559 	{ "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
560 	{ "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
561 	{ "hym", roff_line_ignore, NULL, NULL, 0, NULL },
562 	{ "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
563 	{ "hys", roff_line_ignore, NULL, NULL, 0, NULL },
564 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
565 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
566 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
567 	{ "index", roff_unsupp, NULL, NULL, 0, NULL },
568 	{ "it", roff_it, NULL, NULL, 0, NULL },
569 	{ "itc", roff_unsupp, NULL, NULL, 0, NULL },
570 	{ "IX", roff_line_ignore, NULL, NULL, 0, NULL },
571 	{ "kern", roff_line_ignore, NULL, NULL, 0, NULL },
572 	{ "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
573 	{ "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
574 	{ "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
575 	{ "lc", roff_unsupp, NULL, NULL, 0, NULL },
576 	{ "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
577 	{ "lds", roff_unsupp, NULL, NULL, 0, NULL },
578 	{ "length", roff_unsupp, NULL, NULL, 0, NULL },
579 	{ "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
580 	{ "lf", roff_insec, NULL, NULL, 0, NULL },
581 	{ "lg", roff_line_ignore, NULL, NULL, 0, NULL },
582 	{ "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
583 	{ "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
584 	{ "lnr", roff_unsupp, NULL, NULL, 0, NULL },
585 	{ "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
586 	{ "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
587 	{ "ls", roff_line_ignore, NULL, NULL, 0, NULL },
588 	{ "lsm", roff_unsupp, NULL, NULL, 0, NULL },
589 	{ "lt", roff_line_ignore, NULL, NULL, 0, NULL },
590 	{ "mc", roff_line_ignore, NULL, NULL, 0, NULL },
591 	{ "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
592 	{ "minss", roff_line_ignore, NULL, NULL, 0, NULL },
593 	{ "mk", roff_line_ignore, NULL, NULL, 0, NULL },
594 	{ "mso", roff_insec, NULL, NULL, 0, NULL },
595 	{ "na", roff_line_ignore, NULL, NULL, 0, NULL },
596 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
597 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
598 	{ "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
599 	{ "nm", roff_unsupp, NULL, NULL, 0, NULL },
600 	{ "nn", roff_unsupp, NULL, NULL, 0, NULL },
601 	{ "nop", roff_unsupp, NULL, NULL, 0, NULL },
602 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
603 	{ "nrf", roff_unsupp, NULL, NULL, 0, NULL },
604 	{ "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
605 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
606 	{ "nx", roff_insec, NULL, NULL, 0, NULL },
607 	{ "open", roff_insec, NULL, NULL, 0, NULL },
608 	{ "opena", roff_insec, NULL, NULL, 0, NULL },
609 	{ "os", roff_line_ignore, NULL, NULL, 0, NULL },
610 	{ "output", roff_unsupp, NULL, NULL, 0, NULL },
611 	{ "padj", roff_line_ignore, NULL, NULL, 0, NULL },
612 	{ "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
613 	{ "pc", roff_line_ignore, NULL, NULL, 0, NULL },
614 	{ "pev", roff_line_ignore, NULL, NULL, 0, NULL },
615 	{ "pi", roff_insec, NULL, NULL, 0, NULL },
616 	{ "PI", roff_unsupp, NULL, NULL, 0, NULL },
617 	{ "pl", roff_line_ignore, NULL, NULL, 0, NULL },
618 	{ "pm", roff_line_ignore, NULL, NULL, 0, NULL },
619 	{ "pn", roff_line_ignore, NULL, NULL, 0, NULL },
620 	{ "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
621 	{ "po", roff_line_ignore, NULL, NULL, 0, NULL },
622 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
623 	{ "psbb", roff_unsupp, NULL, NULL, 0, NULL },
624 	{ "pshape", roff_unsupp, NULL, NULL, 0, NULL },
625 	{ "pso", roff_insec, NULL, NULL, 0, NULL },
626 	{ "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
627 	{ "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
628 	{ "rchar", roff_unsupp, NULL, NULL, 0, NULL },
629 	{ "rd", roff_line_ignore, NULL, NULL, 0, NULL },
630 	{ "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
631 	{ "return", roff_unsupp, NULL, NULL, 0, NULL },
632 	{ "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
633 	{ "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
634 	{ "rj", roff_line_ignore, NULL, NULL, 0, NULL },
635 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
636 	{ "rn", roff_unsupp, NULL, NULL, 0, NULL },
637 	{ "rnn", roff_unsupp, NULL, NULL, 0, NULL },
638 	{ "rr", roff_rr, NULL, NULL, 0, NULL },
639 	{ "rs", roff_line_ignore, NULL, NULL, 0, NULL },
640 	{ "rt", roff_line_ignore, NULL, NULL, 0, NULL },
641 	{ "schar", roff_unsupp, NULL, NULL, 0, NULL },
642 	{ "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
643 	{ "shc", roff_line_ignore, NULL, NULL, 0, NULL },
644 	{ "shift", roff_unsupp, NULL, NULL, 0, NULL },
645 	{ "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
646 	{ "so", roff_so, NULL, NULL, 0, NULL },
647 	{ "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
648 	{ "special", roff_line_ignore, NULL, NULL, 0, NULL },
649 	{ "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
650 	{ "ss", roff_line_ignore, NULL, NULL, 0, NULL },
651 	{ "sty", roff_line_ignore, NULL, NULL, 0, NULL },
652 	{ "substring", roff_unsupp, NULL, NULL, 0, NULL },
653 	{ "sv", roff_line_ignore, NULL, NULL, 0, NULL },
654 	{ "sy", roff_insec, NULL, NULL, 0, NULL },
655 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
656 	{ "ta", roff_unsupp, NULL, NULL, 0, NULL },
657 	{ "tc", roff_unsupp, NULL, NULL, 0, NULL },
658 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
659 	{ "TH", roff_TH, NULL, NULL, 0, NULL },
660 	{ "ti", roff_unsupp, NULL, NULL, 0, NULL },
661 	{ "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
662 	{ "tl", roff_unsupp, NULL, NULL, 0, NULL },
663 	{ "tm", roff_line_ignore, NULL, NULL, 0, NULL },
664 	{ "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
665 	{ "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
666 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
667 	{ "track", roff_line_ignore, NULL, NULL, 0, NULL },
668 	{ "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
669 	{ "trf", roff_insec, NULL, NULL, 0, NULL },
670 	{ "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
671 	{ "trin", roff_unsupp, NULL, NULL, 0, NULL },
672 	{ "trnt", roff_unsupp, NULL, NULL, 0, NULL },
673 	{ "troff", roff_line_ignore, NULL, NULL, 0, NULL },
674 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
675 	{ "uf", roff_line_ignore, NULL, NULL, 0, NULL },
676 	{ "ul", roff_line_ignore, NULL, NULL, 0, NULL },
677 	{ "unformat", roff_unsupp, NULL, NULL, 0, NULL },
678 	{ "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
679 	{ "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
680 	{ "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
681 	{ "vs", roff_line_ignore, NULL, NULL, 0, NULL },
682 	{ "warn", roff_line_ignore, NULL, NULL, 0, NULL },
683 	{ "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
684 	{ "watch", roff_line_ignore, NULL, NULL, 0, NULL },
685 	{ "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
686 	{ "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
687 	{ "wh", roff_unsupp, NULL, NULL, 0, NULL },
688 	{ "while", roff_unsupp, NULL, NULL, 0, NULL },
689 	{ "write", roff_insec, NULL, NULL, 0, NULL },
690 	{ "writec", roff_insec, NULL, NULL, 0, NULL },
691 	{ "writem", roff_insec, NULL, NULL, 0, NULL },
692 	{ "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
693 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
694 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
695 };
696 
697 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
698 const	char *const __mdoc_reserved[] = {
699 	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
700 	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
701 	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
702 	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
703 	"Dt", "Dv", "Dx", "D1",
704 	"Ec", "Ed", "Ef", "Ek", "El", "Em",
705 	"En", "Eo", "Er", "Es", "Ev", "Ex",
706 	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
707 	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
708 	"Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
709 	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
710 	"Pa", "Pc", "Pf", "Po", "Pp", "Pq",
711 	"Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
712 	"Sc", "Sh", "Sm", "So", "Sq",
713 	"Ss", "St", "Sx", "Sy",
714 	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
715 	"%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
716 	"%P", "%Q", "%R", "%T", "%U", "%V",
717 	NULL
718 };
719 
720 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
721 const	char *const __man_reserved[] = {
722 	"AT", "B", "BI", "BR", "DT",
723 	"EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
724 	"LP", "OP", "P", "PD", "PP",
725 	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
726 	"TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
727 	NULL
728 };
729 
730 /* Array of injected predefined strings. */
731 #define	PREDEFS_MAX	 38
732 static	const struct predef predefs[PREDEFS_MAX] = {
733 #include "predefs.in"
734 };
735 
736 /* See roffhash_find() */
737 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
738 
739 static	int	 roffit_lines;  /* number of lines to delay */
740 static	char	*roffit_macro;  /* nil-terminated macro line */
741 
742 
743 /* --- request table ------------------------------------------------------ */
744 
745 static void
746 roffhash_init(void)
747 {
748 	struct roffmac	 *n;
749 	int		  buc, i;
750 
751 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
752 		assert(roffs[i].name[0] >= ASCII_LO);
753 		assert(roffs[i].name[0] <= ASCII_HI);
754 
755 		buc = ROFF_HASH(roffs[i].name);
756 
757 		if (NULL != (n = hash[buc])) {
758 			for ( ; n->next; n = n->next)
759 				/* Do nothing. */ ;
760 			n->next = &roffs[i];
761 		} else
762 			hash[buc] = &roffs[i];
763 	}
764 }
765 
766 /*
767  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
768  * the nil-terminated string name could be found.
769  */
770 static enum rofft
771 roffhash_find(const char *p, size_t s)
772 {
773 	int		 buc;
774 	struct roffmac	*n;
775 
776 	/*
777 	 * libroff has an extremely simple hashtable, for the time
778 	 * being, which simply keys on the first character, which must
779 	 * be printable, then walks a chain.  It works well enough until
780 	 * optimised.
781 	 */
782 
783 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
784 		return ROFF_MAX;
785 
786 	buc = ROFF_HASH(p);
787 
788 	if (NULL == (n = hash[buc]))
789 		return ROFF_MAX;
790 	for ( ; n; n = n->next)
791 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
792 			return (enum rofft)(n - roffs);
793 
794 	return ROFF_MAX;
795 }
796 
797 /* --- stack of request blocks -------------------------------------------- */
798 
799 /*
800  * Pop the current node off of the stack of roff instructions currently
801  * pending.
802  */
803 static void
804 roffnode_pop(struct roff *r)
805 {
806 	struct roffnode	*p;
807 
808 	assert(r->last);
809 	p = r->last;
810 
811 	r->last = r->last->parent;
812 	free(p->name);
813 	free(p->end);
814 	free(p);
815 }
816 
817 /*
818  * Push a roff node onto the instruction stack.  This must later be
819  * removed with roffnode_pop().
820  */
821 static void
822 roffnode_push(struct roff *r, enum rofft tok, const char *name,
823 		int line, int col)
824 {
825 	struct roffnode	*p;
826 
827 	p = mandoc_calloc(1, sizeof(struct roffnode));
828 	p->tok = tok;
829 	if (name)
830 		p->name = mandoc_strdup(name);
831 	p->parent = r->last;
832 	p->line = line;
833 	p->col = col;
834 	p->rule = p->parent ? p->parent->rule : 0;
835 
836 	r->last = p;
837 }
838 
839 /* --- roff parser state data management ---------------------------------- */
840 
841 static void
842 roff_free1(struct roff *r)
843 {
844 	struct tbl_node	*tbl;
845 	struct eqn_node	*e;
846 	int		 i;
847 
848 	while (NULL != (tbl = r->first_tbl)) {
849 		r->first_tbl = tbl->next;
850 		tbl_free(tbl);
851 	}
852 	r->first_tbl = r->last_tbl = r->tbl = NULL;
853 
854 	while (NULL != (e = r->first_eqn)) {
855 		r->first_eqn = e->next;
856 		eqn_free(e);
857 	}
858 	r->first_eqn = r->last_eqn = r->eqn = NULL;
859 
860 	while (r->last)
861 		roffnode_pop(r);
862 
863 	free (r->rstack);
864 	r->rstack = NULL;
865 	r->rstacksz = 0;
866 	r->rstackpos = -1;
867 
868 	roff_freereg(r->regtab);
869 	r->regtab = NULL;
870 
871 	roff_freestr(r->strtab);
872 	roff_freestr(r->xmbtab);
873 	r->strtab = r->xmbtab = NULL;
874 
875 	if (r->xtab)
876 		for (i = 0; i < 128; i++)
877 			free(r->xtab[i].p);
878 	free(r->xtab);
879 	r->xtab = NULL;
880 }
881 
882 void
883 roff_reset(struct roff *r)
884 {
885 
886 	roff_free1(r);
887 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
888 	r->control = 0;
889 }
890 
891 void
892 roff_free(struct roff *r)
893 {
894 
895 	roff_free1(r);
896 	free(r);
897 }
898 
899 struct roff *
900 roff_alloc(struct mparse *parse, int options)
901 {
902 	struct roff	*r;
903 
904 	r = mandoc_calloc(1, sizeof(struct roff));
905 	r->parse = parse;
906 	r->options = options;
907 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
908 	r->rstackpos = -1;
909 
910 	roffhash_init();
911 
912 	return r;
913 }
914 
915 /* --- syntax tree state data management ---------------------------------- */
916 
917 static void
918 roff_man_free1(struct roff_man *man)
919 {
920 
921 	if (man->first != NULL)
922 		roff_node_delete(man, man->first);
923 	free(man->meta.msec);
924 	free(man->meta.vol);
925 	free(man->meta.os);
926 	free(man->meta.arch);
927 	free(man->meta.title);
928 	free(man->meta.name);
929 	free(man->meta.date);
930 }
931 
932 static void
933 roff_man_alloc1(struct roff_man *man)
934 {
935 
936 	memset(&man->meta, 0, sizeof(man->meta));
937 	man->first = mandoc_calloc(1, sizeof(*man->first));
938 	man->first->type = ROFFT_ROOT;
939 	man->last = man->first;
940 	man->last_es = NULL;
941 	man->flags = 0;
942 	man->macroset = MACROSET_NONE;
943 	man->lastsec = man->lastnamed = SEC_NONE;
944 	man->next = ROFF_NEXT_CHILD;
945 }
946 
947 void
948 roff_man_reset(struct roff_man *man)
949 {
950 
951 	roff_man_free1(man);
952 	roff_man_alloc1(man);
953 }
954 
955 void
956 roff_man_free(struct roff_man *man)
957 {
958 
959 	roff_man_free1(man);
960 	free(man);
961 }
962 
963 struct roff_man *
964 roff_man_alloc(struct roff *roff, struct mparse *parse,
965 	const char *defos, int quick)
966 {
967 	struct roff_man *man;
968 
969 	man = mandoc_calloc(1, sizeof(*man));
970 	man->parse = parse;
971 	man->roff = roff;
972 	man->defos = defos;
973 	man->quick = quick;
974 	roff_man_alloc1(man);
975 	return man;
976 }
977 
978 /* --- syntax tree handling ----------------------------------------------- */
979 
980 struct roff_node *
981 roff_node_alloc(struct roff_man *man, int line, int pos,
982 	enum roff_type type, int tok)
983 {
984 	struct roff_node	*n;
985 
986 	n = mandoc_calloc(1, sizeof(*n));
987 	n->line = line;
988 	n->pos = pos;
989 	n->tok = tok;
990 	n->type = type;
991 	n->sec = man->lastsec;
992 
993 	if (man->flags & MDOC_SYNOPSIS)
994 		n->flags |= MDOC_SYNPRETTY;
995 	else
996 		n->flags &= ~MDOC_SYNPRETTY;
997 	if (man->flags & MDOC_NEWLINE)
998 		n->flags |= MDOC_LINE;
999 	man->flags &= ~MDOC_NEWLINE;
1000 
1001 	return n;
1002 }
1003 
1004 void
1005 roff_node_append(struct roff_man *man, struct roff_node *n)
1006 {
1007 
1008 	switch (man->next) {
1009 	case ROFF_NEXT_SIBLING:
1010 		if (man->last->next != NULL) {
1011 			n->next = man->last->next;
1012 			man->last->next->prev = n;
1013 		} else
1014 			man->last->parent->last = n;
1015 		man->last->next = n;
1016 		n->prev = man->last;
1017 		n->parent = man->last->parent;
1018 		break;
1019 	case ROFF_NEXT_CHILD:
1020 		man->last->child = n;
1021 		n->parent = man->last;
1022 		n->parent->last = n;
1023 		break;
1024 	default:
1025 		abort();
1026 	}
1027 	man->last = n;
1028 
1029 	switch (n->type) {
1030 	case ROFFT_HEAD:
1031 		n->parent->head = n;
1032 		break;
1033 	case ROFFT_BODY:
1034 		if (n->end != ENDBODY_NOT)
1035 			return;
1036 		n->parent->body = n;
1037 		break;
1038 	case ROFFT_TAIL:
1039 		n->parent->tail = n;
1040 		break;
1041 	default:
1042 		return;
1043 	}
1044 
1045 	/*
1046 	 * Copy over the normalised-data pointer of our parent.  Not
1047 	 * everybody has one, but copying a null pointer is fine.
1048 	 */
1049 
1050 	n->norm = n->parent->norm;
1051 	assert(n->parent->type == ROFFT_BLOCK);
1052 }
1053 
1054 void
1055 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1056 {
1057 	struct roff_node	*n;
1058 
1059 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1060 	n->string = roff_strdup(man->roff, word);
1061 	roff_node_append(man, n);
1062 	if (man->macroset == MACROSET_MDOC)
1063 		n->flags |= MDOC_VALID | MDOC_ENDED;
1064 	else
1065 		n->flags |= MAN_VALID;
1066 	man->next = ROFF_NEXT_SIBLING;
1067 }
1068 
1069 void
1070 roff_word_append(struct roff_man *man, const char *word)
1071 {
1072 	struct roff_node	*n;
1073 	char			*addstr, *newstr;
1074 
1075 	n = man->last;
1076 	addstr = roff_strdup(man->roff, word);
1077 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1078 	free(addstr);
1079 	free(n->string);
1080 	n->string = newstr;
1081 	man->next = ROFF_NEXT_SIBLING;
1082 }
1083 
1084 void
1085 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1086 {
1087 	struct roff_node	*n;
1088 
1089 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1090 	roff_node_append(man, n);
1091 	man->next = ROFF_NEXT_CHILD;
1092 }
1093 
1094 struct roff_node *
1095 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1096 {
1097 	struct roff_node	*n;
1098 
1099 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1100 	roff_node_append(man, n);
1101 	man->next = ROFF_NEXT_CHILD;
1102 	return n;
1103 }
1104 
1105 struct roff_node *
1106 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1107 {
1108 	struct roff_node	*n;
1109 
1110 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1111 	roff_node_append(man, n);
1112 	man->next = ROFF_NEXT_CHILD;
1113 	return n;
1114 }
1115 
1116 struct roff_node *
1117 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1118 {
1119 	struct roff_node	*n;
1120 
1121 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1122 	roff_node_append(man, n);
1123 	man->next = ROFF_NEXT_CHILD;
1124 	return n;
1125 }
1126 
1127 void
1128 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1129 {
1130 	struct roff_node	*n;
1131 
1132 	n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1133 	n->eqn = eqn;
1134 	if (eqn->ln > man->last->line)
1135 		n->flags |= MDOC_LINE;
1136 	roff_node_append(man, n);
1137 	man->next = ROFF_NEXT_SIBLING;
1138 }
1139 
1140 void
1141 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1142 {
1143 	struct roff_node	*n;
1144 
1145 	if (man->macroset == MACROSET_MAN)
1146 		man_breakscope(man, TOKEN_NONE);
1147 	n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1148 	n->span = tbl;
1149 	roff_node_append(man, n);
1150 	if (man->macroset == MACROSET_MDOC)
1151 		n->flags |= MDOC_VALID | MDOC_ENDED;
1152 	else
1153 		n->flags |= MAN_VALID;
1154 	man->next = ROFF_NEXT_SIBLING;
1155 }
1156 
1157 void
1158 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1159 {
1160 
1161 	/* Adjust siblings. */
1162 
1163 	if (n->prev)
1164 		n->prev->next = n->next;
1165 	if (n->next)
1166 		n->next->prev = n->prev;
1167 
1168 	/* Adjust parent. */
1169 
1170 	if (n->parent != NULL) {
1171 		if (n->parent->child == n)
1172 			n->parent->child = n->next;
1173 		if (n->parent->last == n)
1174 			n->parent->last = n->prev;
1175 	}
1176 
1177 	/* Adjust parse point. */
1178 
1179 	if (man == NULL)
1180 		return;
1181 	if (man->last == n) {
1182 		if (n->prev == NULL) {
1183 			man->last = n->parent;
1184 			man->next = ROFF_NEXT_CHILD;
1185 		} else {
1186 			man->last = n->prev;
1187 			man->next = ROFF_NEXT_SIBLING;
1188 		}
1189 	}
1190 	if (man->first == n)
1191 		man->first = NULL;
1192 }
1193 
1194 void
1195 roff_node_free(struct roff_node *n)
1196 {
1197 
1198 	if (n->args != NULL)
1199 		mdoc_argv_free(n->args);
1200 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1201 		free(n->norm);
1202 	free(n->string);
1203 	free(n);
1204 }
1205 
1206 void
1207 roff_node_delete(struct roff_man *man, struct roff_node *n)
1208 {
1209 
1210 	while (n->child != NULL)
1211 		roff_node_delete(man, n->child);
1212 	roff_node_unlink(man, n);
1213 	roff_node_free(n);
1214 }
1215 
1216 void
1217 deroff(char **dest, const struct roff_node *n)
1218 {
1219 	char	*cp;
1220 	size_t	 sz;
1221 
1222 	if (n->type != ROFFT_TEXT) {
1223 		for (n = n->child; n != NULL; n = n->next)
1224 			deroff(dest, n);
1225 		return;
1226 	}
1227 
1228 	/* Skip leading whitespace and escape sequences. */
1229 
1230 	cp = n->string;
1231 	while (*cp != '\0') {
1232 		if ('\\' == *cp) {
1233 			cp++;
1234 			mandoc_escape((const char **)&cp, NULL, NULL);
1235 		} else if (isspace((unsigned char)*cp))
1236 			cp++;
1237 		else
1238 			break;
1239 	}
1240 
1241 	/* Skip trailing whitespace. */
1242 
1243 	for (sz = strlen(cp); sz; sz--)
1244 		if ( ! isspace((unsigned char)cp[sz-1]))
1245 			break;
1246 
1247 	/* Skip empty strings. */
1248 
1249 	if (sz == 0)
1250 		return;
1251 
1252 	if (*dest == NULL) {
1253 		*dest = mandoc_strndup(cp, sz);
1254 		return;
1255 	}
1256 
1257 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1258 	free(*dest);
1259 	*dest = cp;
1260 }
1261 
1262 /* --- main functions of the roff parser ---------------------------------- */
1263 
1264 /*
1265  * In the current line, expand escape sequences that tend to get
1266  * used in numerical expressions and conditional requests.
1267  * Also check the syntax of the remaining escape sequences.
1268  */
1269 static enum rofferr
1270 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1271 {
1272 	char		 ubuf[24]; /* buffer to print the number */
1273 	const char	*start;	/* start of the string to process */
1274 	char		*stesc;	/* start of an escape sequence ('\\') */
1275 	const char	*stnam;	/* start of the name, after "[(*" */
1276 	const char	*cp;	/* end of the name, e.g. before ']' */
1277 	const char	*res;	/* the string to be substituted */
1278 	char		*nbuf;	/* new buffer to copy buf->buf to */
1279 	size_t		 maxl;  /* expected length of the escape name */
1280 	size_t		 naml;	/* actual length of the escape name */
1281 	enum mandoc_esc	 esc;	/* type of the escape sequence */
1282 	int		 inaml;	/* length returned from mandoc_escape() */
1283 	int		 expand_count;	/* to avoid infinite loops */
1284 	int		 npos;	/* position in numeric expression */
1285 	int		 arg_complete; /* argument not interrupted by eol */
1286 	char		 term;	/* character terminating the escape */
1287 
1288 	expand_count = 0;
1289 	start = buf->buf + pos;
1290 	stesc = strchr(start, '\0') - 1;
1291 	while (stesc-- > start) {
1292 
1293 		/* Search backwards for the next backslash. */
1294 
1295 		if (*stesc != '\\')
1296 			continue;
1297 
1298 		/* If it is escaped, skip it. */
1299 
1300 		for (cp = stesc - 1; cp >= start; cp--)
1301 			if (*cp != '\\')
1302 				break;
1303 
1304 		if ((stesc - cp) % 2 == 0) {
1305 			stesc = (char *)cp;
1306 			continue;
1307 		}
1308 
1309 		/* Decide whether to expand or to check only. */
1310 
1311 		term = '\0';
1312 		cp = stesc + 1;
1313 		switch (*cp) {
1314 		case '*':
1315 			res = NULL;
1316 			break;
1317 		case 'B':
1318 		case 'w':
1319 			term = cp[1];
1320 			/* FALLTHROUGH */
1321 		case 'n':
1322 			res = ubuf;
1323 			break;
1324 		default:
1325 			esc = mandoc_escape(&cp, &stnam, &inaml);
1326 			if (esc == ESCAPE_ERROR ||
1327 			    (esc == ESCAPE_SPECIAL &&
1328 			     mchars_spec2cp(stnam, inaml) < 0))
1329 				mandoc_vmsg(MANDOCERR_ESC_BAD,
1330 				    r->parse, ln, (int)(stesc - buf->buf),
1331 				    "%.*s", (int)(cp - stesc), stesc);
1332 			continue;
1333 		}
1334 
1335 		if (EXPAND_LIMIT < ++expand_count) {
1336 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1337 			    ln, (int)(stesc - buf->buf), NULL);
1338 			return ROFF_IGN;
1339 		}
1340 
1341 		/*
1342 		 * The third character decides the length
1343 		 * of the name of the string or register.
1344 		 * Save a pointer to the name.
1345 		 */
1346 
1347 		if (term == '\0') {
1348 			switch (*++cp) {
1349 			case '\0':
1350 				maxl = 0;
1351 				break;
1352 			case '(':
1353 				cp++;
1354 				maxl = 2;
1355 				break;
1356 			case '[':
1357 				cp++;
1358 				term = ']';
1359 				maxl = 0;
1360 				break;
1361 			default:
1362 				maxl = 1;
1363 				break;
1364 			}
1365 		} else {
1366 			cp += 2;
1367 			maxl = 0;
1368 		}
1369 		stnam = cp;
1370 
1371 		/* Advance to the end of the name. */
1372 
1373 		naml = 0;
1374 		arg_complete = 1;
1375 		while (maxl == 0 || naml < maxl) {
1376 			if (*cp == '\0') {
1377 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1378 				    ln, (int)(stesc - buf->buf), stesc);
1379 				arg_complete = 0;
1380 				break;
1381 			}
1382 			if (maxl == 0 && *cp == term) {
1383 				cp++;
1384 				break;
1385 			}
1386 			if (*cp++ != '\\' || stesc[1] != 'w') {
1387 				naml++;
1388 				continue;
1389 			}
1390 			switch (mandoc_escape(&cp, NULL, NULL)) {
1391 			case ESCAPE_SPECIAL:
1392 			case ESCAPE_UNICODE:
1393 			case ESCAPE_NUMBERED:
1394 			case ESCAPE_OVERSTRIKE:
1395 				naml++;
1396 				break;
1397 			default:
1398 				break;
1399 			}
1400 		}
1401 
1402 		/*
1403 		 * Retrieve the replacement string; if it is
1404 		 * undefined, resume searching for escapes.
1405 		 */
1406 
1407 		switch (stesc[1]) {
1408 		case '*':
1409 			if (arg_complete)
1410 				res = roff_getstrn(r, stnam, naml);
1411 			break;
1412 		case 'B':
1413 			npos = 0;
1414 			ubuf[0] = arg_complete &&
1415 			    roff_evalnum(r, ln, stnam, &npos,
1416 			      NULL, ROFFNUM_SCALE) &&
1417 			    stnam + npos + 1 == cp ? '1' : '0';
1418 			ubuf[1] = '\0';
1419 			break;
1420 		case 'n':
1421 			if (arg_complete)
1422 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1423 				    roff_getregn(r, stnam, naml));
1424 			else
1425 				ubuf[0] = '\0';
1426 			break;
1427 		case 'w':
1428 			/* use even incomplete args */
1429 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1430 			    24 * (int)naml);
1431 			break;
1432 		}
1433 
1434 		if (res == NULL) {
1435 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1436 			    r->parse, ln, (int)(stesc - buf->buf),
1437 			    "%.*s", (int)naml, stnam);
1438 			res = "";
1439 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1440 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1441 			    ln, (int)(stesc - buf->buf), NULL);
1442 			return ROFF_IGN;
1443 		}
1444 
1445 		/* Replace the escape sequence by the string. */
1446 
1447 		*stesc = '\0';
1448 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1449 		    buf->buf, res, cp) + 1;
1450 
1451 		/* Prepare for the next replacement. */
1452 
1453 		start = nbuf + pos;
1454 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1455 		free(buf->buf);
1456 		buf->buf = nbuf;
1457 	}
1458 	return ROFF_CONT;
1459 }
1460 
1461 /*
1462  * Process text streams.
1463  */
1464 static enum rofferr
1465 roff_parsetext(struct buf *buf, int pos, int *offs)
1466 {
1467 	size_t		 sz;
1468 	const char	*start;
1469 	char		*p;
1470 	int		 isz;
1471 	enum mandoc_esc	 esc;
1472 
1473 	/* Spring the input line trap. */
1474 
1475 	if (roffit_lines == 1) {
1476 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1477 		free(buf->buf);
1478 		buf->buf = p;
1479 		buf->sz = isz + 1;
1480 		*offs = 0;
1481 		free(roffit_macro);
1482 		roffit_lines = 0;
1483 		return ROFF_REPARSE;
1484 	} else if (roffit_lines > 1)
1485 		--roffit_lines;
1486 
1487 	/* Convert all breakable hyphens into ASCII_HYPH. */
1488 
1489 	start = p = buf->buf + pos;
1490 
1491 	while (*p != '\0') {
1492 		sz = strcspn(p, "-\\");
1493 		p += sz;
1494 
1495 		if (*p == '\0')
1496 			break;
1497 
1498 		if (*p == '\\') {
1499 			/* Skip over escapes. */
1500 			p++;
1501 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1502 			if (esc == ESCAPE_ERROR)
1503 				break;
1504 			while (*p == '-')
1505 				p++;
1506 			continue;
1507 		} else if (p == start) {
1508 			p++;
1509 			continue;
1510 		}
1511 
1512 		if (isalpha((unsigned char)p[-1]) &&
1513 		    isalpha((unsigned char)p[1]))
1514 			*p = ASCII_HYPH;
1515 		p++;
1516 	}
1517 	return ROFF_CONT;
1518 }
1519 
1520 enum rofferr
1521 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1522 {
1523 	enum rofft	 t;
1524 	enum rofferr	 e;
1525 	int		 pos;	/* parse point */
1526 	int		 spos;	/* saved parse point for messages */
1527 	int		 ppos;	/* original offset in buf->buf */
1528 	int		 ctl;	/* macro line (boolean) */
1529 
1530 	ppos = pos = *offs;
1531 
1532 	/* Handle in-line equation delimiters. */
1533 
1534 	if (r->tbl == NULL &&
1535 	    r->last_eqn != NULL && r->last_eqn->delim &&
1536 	    (r->eqn == NULL || r->eqn_inline)) {
1537 		e = roff_eqndelim(r, buf, pos);
1538 		if (e == ROFF_REPARSE)
1539 			return e;
1540 		assert(e == ROFF_CONT);
1541 	}
1542 
1543 	/* Expand some escape sequences. */
1544 
1545 	e = roff_res(r, buf, ln, pos);
1546 	if (e == ROFF_IGN)
1547 		return e;
1548 	assert(e == ROFF_CONT);
1549 
1550 	ctl = roff_getcontrol(r, buf->buf, &pos);
1551 
1552 	/*
1553 	 * First, if a scope is open and we're not a macro, pass the
1554 	 * text through the macro's filter.
1555 	 * Equations process all content themselves.
1556 	 * Tables process almost all content themselves, but we want
1557 	 * to warn about macros before passing it there.
1558 	 */
1559 
1560 	if (r->last != NULL && ! ctl) {
1561 		t = r->last->tok;
1562 		assert(roffs[t].text);
1563 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1564 		assert(e == ROFF_IGN || e == ROFF_CONT);
1565 		if (e != ROFF_CONT)
1566 			return e;
1567 	}
1568 	if (r->eqn != NULL)
1569 		return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1570 	if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1571 		return tbl_read(r->tbl, ln, buf->buf, ppos);
1572 	if ( ! ctl)
1573 		return roff_parsetext(buf, pos, offs);
1574 
1575 	/* Skip empty request lines. */
1576 
1577 	if (buf->buf[pos] == '"') {
1578 		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1579 		    ln, pos, NULL);
1580 		return ROFF_IGN;
1581 	} else if (buf->buf[pos] == '\0')
1582 		return ROFF_IGN;
1583 
1584 	/*
1585 	 * If a scope is open, go to the child handler for that macro,
1586 	 * as it may want to preprocess before doing anything with it.
1587 	 * Don't do so if an equation is open.
1588 	 */
1589 
1590 	if (r->last) {
1591 		t = r->last->tok;
1592 		assert(roffs[t].sub);
1593 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1594 	}
1595 
1596 	/* No scope is open.  This is a new request or macro. */
1597 
1598 	spos = pos;
1599 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1600 
1601 	/* Tables ignore most macros. */
1602 
1603 	if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1604 		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1605 		    ln, pos, buf->buf + spos);
1606 		if (t == ROFF_TS)
1607 			return ROFF_IGN;
1608 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1609 			pos++;
1610 		while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1611 			pos++;
1612 		return tbl_read(r->tbl, ln, buf->buf, pos);
1613 	}
1614 
1615 	/*
1616 	 * This is neither a roff request nor a user-defined macro.
1617 	 * Let the standard macro set parsers handle it.
1618 	 */
1619 
1620 	if (t == ROFF_MAX)
1621 		return ROFF_CONT;
1622 
1623 	/* Execute a roff request or a user defined macro. */
1624 
1625 	assert(roffs[t].proc);
1626 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1627 }
1628 
1629 void
1630 roff_endparse(struct roff *r)
1631 {
1632 
1633 	if (r->last)
1634 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1635 		    r->last->line, r->last->col,
1636 		    roffs[r->last->tok].name);
1637 
1638 	if (r->eqn) {
1639 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1640 		    r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1641 		eqn_end(&r->eqn);
1642 	}
1643 
1644 	if (r->tbl) {
1645 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1646 		    r->tbl->line, r->tbl->pos, "TS");
1647 		tbl_end(&r->tbl);
1648 	}
1649 }
1650 
1651 /*
1652  * Parse a roff node's type from the input buffer.  This must be in the
1653  * form of ".foo xxx" in the usual way.
1654  */
1655 static enum rofft
1656 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1657 {
1658 	char		*cp;
1659 	const char	*mac;
1660 	size_t		 maclen;
1661 	enum rofft	 t;
1662 
1663 	cp = buf + *pos;
1664 
1665 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1666 		return ROFF_MAX;
1667 
1668 	mac = cp;
1669 	maclen = roff_getname(r, &cp, ln, ppos);
1670 
1671 	t = (r->current_string = roff_getstrn(r, mac, maclen))
1672 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
1673 
1674 	if (ROFF_MAX != t)
1675 		*pos = cp - buf;
1676 
1677 	return t;
1678 }
1679 
1680 /* --- handling of request blocks ----------------------------------------- */
1681 
1682 static enum rofferr
1683 roff_cblock(ROFF_ARGS)
1684 {
1685 
1686 	/*
1687 	 * A block-close `..' should only be invoked as a child of an
1688 	 * ignore macro, otherwise raise a warning and just ignore it.
1689 	 */
1690 
1691 	if (r->last == NULL) {
1692 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1693 		    ln, ppos, "..");
1694 		return ROFF_IGN;
1695 	}
1696 
1697 	switch (r->last->tok) {
1698 	case ROFF_am:
1699 		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1700 	case ROFF_ami:
1701 	case ROFF_de:
1702 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1703 	case ROFF_dei:
1704 	case ROFF_ig:
1705 		break;
1706 	default:
1707 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1708 		    ln, ppos, "..");
1709 		return ROFF_IGN;
1710 	}
1711 
1712 	if (buf->buf[pos] != '\0')
1713 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1714 		    ".. %s", buf->buf + pos);
1715 
1716 	roffnode_pop(r);
1717 	roffnode_cleanscope(r);
1718 	return ROFF_IGN;
1719 
1720 }
1721 
1722 static void
1723 roffnode_cleanscope(struct roff *r)
1724 {
1725 
1726 	while (r->last) {
1727 		if (--r->last->endspan != 0)
1728 			break;
1729 		roffnode_pop(r);
1730 	}
1731 }
1732 
1733 static void
1734 roff_ccond(struct roff *r, int ln, int ppos)
1735 {
1736 
1737 	if (NULL == r->last) {
1738 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1739 		    ln, ppos, "\\}");
1740 		return;
1741 	}
1742 
1743 	switch (r->last->tok) {
1744 	case ROFF_el:
1745 	case ROFF_ie:
1746 	case ROFF_if:
1747 		break;
1748 	default:
1749 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1750 		    ln, ppos, "\\}");
1751 		return;
1752 	}
1753 
1754 	if (r->last->endspan > -1) {
1755 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1756 		    ln, ppos, "\\}");
1757 		return;
1758 	}
1759 
1760 	roffnode_pop(r);
1761 	roffnode_cleanscope(r);
1762 	return;
1763 }
1764 
1765 static enum rofferr
1766 roff_block(ROFF_ARGS)
1767 {
1768 	const char	*name;
1769 	char		*iname, *cp;
1770 	size_t		 namesz;
1771 
1772 	/* Ignore groff compatibility mode for now. */
1773 
1774 	if (tok == ROFF_de1)
1775 		tok = ROFF_de;
1776 	else if (tok == ROFF_dei1)
1777 		tok = ROFF_dei;
1778 	else if (tok == ROFF_am1)
1779 		tok = ROFF_am;
1780 	else if (tok == ROFF_ami1)
1781 		tok = ROFF_ami;
1782 
1783 	/* Parse the macro name argument. */
1784 
1785 	cp = buf->buf + pos;
1786 	if (tok == ROFF_ig) {
1787 		iname = NULL;
1788 		namesz = 0;
1789 	} else {
1790 		iname = cp;
1791 		namesz = roff_getname(r, &cp, ln, ppos);
1792 		iname[namesz] = '\0';
1793 	}
1794 
1795 	/* Resolve the macro name argument if it is indirect. */
1796 
1797 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1798 		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1799 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1800 			    r->parse, ln, (int)(iname - buf->buf),
1801 			    "%.*s", (int)namesz, iname);
1802 			namesz = 0;
1803 		} else
1804 			namesz = strlen(name);
1805 	} else
1806 		name = iname;
1807 
1808 	if (namesz == 0 && tok != ROFF_ig) {
1809 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1810 		    ln, ppos, roffs[tok].name);
1811 		return ROFF_IGN;
1812 	}
1813 
1814 	roffnode_push(r, tok, name, ln, ppos);
1815 
1816 	/*
1817 	 * At the beginning of a `de' macro, clear the existing string
1818 	 * with the same name, if there is one.  New content will be
1819 	 * appended from roff_block_text() in multiline mode.
1820 	 */
1821 
1822 	if (tok == ROFF_de || tok == ROFF_dei)
1823 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1824 
1825 	if (*cp == '\0')
1826 		return ROFF_IGN;
1827 
1828 	/* Get the custom end marker. */
1829 
1830 	iname = cp;
1831 	namesz = roff_getname(r, &cp, ln, ppos);
1832 
1833 	/* Resolve the end marker if it is indirect. */
1834 
1835 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1836 		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1837 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1838 			    r->parse, ln, (int)(iname - buf->buf),
1839 			    "%.*s", (int)namesz, iname);
1840 			namesz = 0;
1841 		} else
1842 			namesz = strlen(name);
1843 	} else
1844 		name = iname;
1845 
1846 	if (namesz)
1847 		r->last->end = mandoc_strndup(name, namesz);
1848 
1849 	if (*cp != '\0')
1850 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1851 		    ln, pos, ".%s ... %s", roffs[tok].name, cp);
1852 
1853 	return ROFF_IGN;
1854 }
1855 
1856 static enum rofferr
1857 roff_block_sub(ROFF_ARGS)
1858 {
1859 	enum rofft	t;
1860 	int		i, j;
1861 
1862 	/*
1863 	 * First check whether a custom macro exists at this level.  If
1864 	 * it does, then check against it.  This is some of groff's
1865 	 * stranger behaviours.  If we encountered a custom end-scope
1866 	 * tag and that tag also happens to be a "real" macro, then we
1867 	 * need to try interpreting it again as a real macro.  If it's
1868 	 * not, then return ignore.  Else continue.
1869 	 */
1870 
1871 	if (r->last->end) {
1872 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1873 			if (buf->buf[i] != r->last->end[j])
1874 				break;
1875 
1876 		if (r->last->end[j] == '\0' &&
1877 		    (buf->buf[i] == '\0' ||
1878 		     buf->buf[i] == ' ' ||
1879 		     buf->buf[i] == '\t')) {
1880 			roffnode_pop(r);
1881 			roffnode_cleanscope(r);
1882 
1883 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1884 				i++;
1885 
1886 			pos = i;
1887 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1888 			    ROFF_MAX)
1889 				return ROFF_RERUN;
1890 			return ROFF_IGN;
1891 		}
1892 	}
1893 
1894 	/*
1895 	 * If we have no custom end-query or lookup failed, then try
1896 	 * pulling it out of the hashtable.
1897 	 */
1898 
1899 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1900 
1901 	if (t != ROFF_cblock) {
1902 		if (tok != ROFF_ig)
1903 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1904 		return ROFF_IGN;
1905 	}
1906 
1907 	assert(roffs[t].proc);
1908 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1909 }
1910 
1911 static enum rofferr
1912 roff_block_text(ROFF_ARGS)
1913 {
1914 
1915 	if (tok != ROFF_ig)
1916 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1917 
1918 	return ROFF_IGN;
1919 }
1920 
1921 static enum rofferr
1922 roff_cond_sub(ROFF_ARGS)
1923 {
1924 	enum rofft	 t;
1925 	char		*ep;
1926 	int		 rr;
1927 
1928 	rr = r->last->rule;
1929 	roffnode_cleanscope(r);
1930 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1931 
1932 	/*
1933 	 * Fully handle known macros when they are structurally
1934 	 * required or when the conditional evaluated to true.
1935 	 */
1936 
1937 	if ((t != ROFF_MAX) &&
1938 	    (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1939 		assert(roffs[t].proc);
1940 		return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1941 	}
1942 
1943 	/*
1944 	 * If `\}' occurs on a macro line without a preceding macro,
1945 	 * drop the line completely.
1946 	 */
1947 
1948 	ep = buf->buf + pos;
1949 	if (ep[0] == '\\' && ep[1] == '}')
1950 		rr = 0;
1951 
1952 	/* Always check for the closing delimiter `\}'. */
1953 
1954 	while ((ep = strchr(ep, '\\')) != NULL) {
1955 		if (*(++ep) == '}') {
1956 			*ep = '&';
1957 			roff_ccond(r, ln, ep - buf->buf - 1);
1958 		}
1959 		if (*ep != '\0')
1960 			++ep;
1961 	}
1962 	return rr ? ROFF_CONT : ROFF_IGN;
1963 }
1964 
1965 static enum rofferr
1966 roff_cond_text(ROFF_ARGS)
1967 {
1968 	char		*ep;
1969 	int		 rr;
1970 
1971 	rr = r->last->rule;
1972 	roffnode_cleanscope(r);
1973 
1974 	ep = buf->buf + pos;
1975 	while ((ep = strchr(ep, '\\')) != NULL) {
1976 		if (*(++ep) == '}') {
1977 			*ep = '&';
1978 			roff_ccond(r, ln, ep - buf->buf - 1);
1979 		}
1980 		if (*ep != '\0')
1981 			++ep;
1982 	}
1983 	return rr ? ROFF_CONT : ROFF_IGN;
1984 }
1985 
1986 /* --- handling of numeric and conditional expressions -------------------- */
1987 
1988 /*
1989  * Parse a single signed integer number.  Stop at the first non-digit.
1990  * If there is at least one digit, return success and advance the
1991  * parse point, else return failure and let the parse point unchanged.
1992  * Ignore overflows, treat them just like the C language.
1993  */
1994 static int
1995 roff_getnum(const char *v, int *pos, int *res, int flags)
1996 {
1997 	int	 myres, scaled, n, p;
1998 
1999 	if (NULL == res)
2000 		res = &myres;
2001 
2002 	p = *pos;
2003 	n = v[p] == '-';
2004 	if (n || v[p] == '+')
2005 		p++;
2006 
2007 	if (flags & ROFFNUM_WHITE)
2008 		while (isspace((unsigned char)v[p]))
2009 			p++;
2010 
2011 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2012 		*res = 10 * *res + v[p] - '0';
2013 	if (p == *pos + n)
2014 		return 0;
2015 
2016 	if (n)
2017 		*res = -*res;
2018 
2019 	/* Each number may be followed by one optional scaling unit. */
2020 
2021 	switch (v[p]) {
2022 	case 'f':
2023 		scaled = *res * 65536;
2024 		break;
2025 	case 'i':
2026 		scaled = *res * 240;
2027 		break;
2028 	case 'c':
2029 		scaled = *res * 240 / 2.54;
2030 		break;
2031 	case 'v':
2032 	case 'P':
2033 		scaled = *res * 40;
2034 		break;
2035 	case 'm':
2036 	case 'n':
2037 		scaled = *res * 24;
2038 		break;
2039 	case 'p':
2040 		scaled = *res * 10 / 3;
2041 		break;
2042 	case 'u':
2043 		scaled = *res;
2044 		break;
2045 	case 'M':
2046 		scaled = *res * 6 / 25;
2047 		break;
2048 	default:
2049 		scaled = *res;
2050 		p--;
2051 		break;
2052 	}
2053 	if (flags & ROFFNUM_SCALE)
2054 		*res = scaled;
2055 
2056 	*pos = p + 1;
2057 	return 1;
2058 }
2059 
2060 /*
2061  * Evaluate a string comparison condition.
2062  * The first character is the delimiter.
2063  * Succeed if the string up to its second occurrence
2064  * matches the string up to its third occurence.
2065  * Advance the cursor after the third occurrence
2066  * or lacking that, to the end of the line.
2067  */
2068 static int
2069 roff_evalstrcond(const char *v, int *pos)
2070 {
2071 	const char	*s1, *s2, *s3;
2072 	int		 match;
2073 
2074 	match = 0;
2075 	s1 = v + *pos;		/* initial delimiter */
2076 	s2 = s1 + 1;		/* for scanning the first string */
2077 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2078 
2079 	if (NULL == s3)		/* found no middle delimiter */
2080 		goto out;
2081 
2082 	while ('\0' != *++s3) {
2083 		if (*s2 != *s3) {  /* mismatch */
2084 			s3 = strchr(s3, *s1);
2085 			break;
2086 		}
2087 		if (*s3 == *s1) {  /* found the final delimiter */
2088 			match = 1;
2089 			break;
2090 		}
2091 		s2++;
2092 	}
2093 
2094 out:
2095 	if (NULL == s3)
2096 		s3 = strchr(s2, '\0');
2097 	else if (*s3 != '\0')
2098 		s3++;
2099 	*pos = s3 - v;
2100 	return match;
2101 }
2102 
2103 /*
2104  * Evaluate an optionally negated single character, numerical,
2105  * or string condition.
2106  */
2107 static int
2108 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2109 {
2110 	char	*cp, *name;
2111 	size_t	 sz;
2112 	int	 number, savepos, wanttrue;
2113 
2114 	if ('!' == v[*pos]) {
2115 		wanttrue = 0;
2116 		(*pos)++;
2117 	} else
2118 		wanttrue = 1;
2119 
2120 	switch (v[*pos]) {
2121 	case '\0':
2122 		return 0;
2123 	case 'n':
2124 	case 'o':
2125 		(*pos)++;
2126 		return wanttrue;
2127 	case 'c':
2128 	case 'd':
2129 	case 'e':
2130 	case 't':
2131 	case 'v':
2132 		(*pos)++;
2133 		return !wanttrue;
2134 	case 'r':
2135 		cp = name = v + ++*pos;
2136 		sz = roff_getname(r, &cp, ln, *pos);
2137 		*pos = cp - v;
2138 		return (sz && roff_hasregn(r, name, sz)) == wanttrue;
2139 	default:
2140 		break;
2141 	}
2142 
2143 	savepos = *pos;
2144 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2145 		return (number > 0) == wanttrue;
2146 	else if (*pos == savepos)
2147 		return roff_evalstrcond(v, pos) == wanttrue;
2148 	else
2149 		return 0;
2150 }
2151 
2152 static enum rofferr
2153 roff_line_ignore(ROFF_ARGS)
2154 {
2155 
2156 	return ROFF_IGN;
2157 }
2158 
2159 static enum rofferr
2160 roff_insec(ROFF_ARGS)
2161 {
2162 
2163 	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2164 	    ln, ppos, roffs[tok].name);
2165 	return ROFF_IGN;
2166 }
2167 
2168 static enum rofferr
2169 roff_unsupp(ROFF_ARGS)
2170 {
2171 
2172 	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2173 	    ln, ppos, roffs[tok].name);
2174 	return ROFF_IGN;
2175 }
2176 
2177 static enum rofferr
2178 roff_cond(ROFF_ARGS)
2179 {
2180 
2181 	roffnode_push(r, tok, NULL, ln, ppos);
2182 
2183 	/*
2184 	 * An `.el' has no conditional body: it will consume the value
2185 	 * of the current rstack entry set in prior `ie' calls or
2186 	 * defaults to DENY.
2187 	 *
2188 	 * If we're not an `el', however, then evaluate the conditional.
2189 	 */
2190 
2191 	r->last->rule = tok == ROFF_el ?
2192 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2193 	    roff_evalcond(r, ln, buf->buf, &pos);
2194 
2195 	/*
2196 	 * An if-else will put the NEGATION of the current evaluated
2197 	 * conditional into the stack of rules.
2198 	 */
2199 
2200 	if (tok == ROFF_ie) {
2201 		if (r->rstackpos + 1 == r->rstacksz) {
2202 			r->rstacksz += 16;
2203 			r->rstack = mandoc_reallocarray(r->rstack,
2204 			    r->rstacksz, sizeof(int));
2205 		}
2206 		r->rstack[++r->rstackpos] = !r->last->rule;
2207 	}
2208 
2209 	/* If the parent has false as its rule, then so do we. */
2210 
2211 	if (r->last->parent && !r->last->parent->rule)
2212 		r->last->rule = 0;
2213 
2214 	/*
2215 	 * Determine scope.
2216 	 * If there is nothing on the line after the conditional,
2217 	 * not even whitespace, use next-line scope.
2218 	 */
2219 
2220 	if (buf->buf[pos] == '\0') {
2221 		r->last->endspan = 2;
2222 		goto out;
2223 	}
2224 
2225 	while (buf->buf[pos] == ' ')
2226 		pos++;
2227 
2228 	/* An opening brace requests multiline scope. */
2229 
2230 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2231 		r->last->endspan = -1;
2232 		pos += 2;
2233 		while (buf->buf[pos] == ' ')
2234 			pos++;
2235 		goto out;
2236 	}
2237 
2238 	/*
2239 	 * Anything else following the conditional causes
2240 	 * single-line scope.  Warn if the scope contains
2241 	 * nothing but trailing whitespace.
2242 	 */
2243 
2244 	if (buf->buf[pos] == '\0')
2245 		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2246 		    ln, ppos, roffs[tok].name);
2247 
2248 	r->last->endspan = 1;
2249 
2250 out:
2251 	*offs = pos;
2252 	return ROFF_RERUN;
2253 }
2254 
2255 static enum rofferr
2256 roff_ds(ROFF_ARGS)
2257 {
2258 	char		*string;
2259 	const char	*name;
2260 	size_t		 namesz;
2261 
2262 	/* Ignore groff compatibility mode for now. */
2263 
2264 	if (tok == ROFF_ds1)
2265 		tok = ROFF_ds;
2266 	else if (tok == ROFF_as1)
2267 		tok = ROFF_as;
2268 
2269 	/*
2270 	 * The first word is the name of the string.
2271 	 * If it is empty or terminated by an escape sequence,
2272 	 * abort the `ds' request without defining anything.
2273 	 */
2274 
2275 	name = string = buf->buf + pos;
2276 	if (*name == '\0')
2277 		return ROFF_IGN;
2278 
2279 	namesz = roff_getname(r, &string, ln, pos);
2280 	if (name[namesz] == '\\')
2281 		return ROFF_IGN;
2282 
2283 	/* Read past the initial double-quote, if any. */
2284 	if (*string == '"')
2285 		string++;
2286 
2287 	/* The rest is the value. */
2288 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2289 	    ROFF_as == tok);
2290 	return ROFF_IGN;
2291 }
2292 
2293 /*
2294  * Parse a single operator, one or two characters long.
2295  * If the operator is recognized, return success and advance the
2296  * parse point, else return failure and let the parse point unchanged.
2297  */
2298 static int
2299 roff_getop(const char *v, int *pos, char *res)
2300 {
2301 
2302 	*res = v[*pos];
2303 
2304 	switch (*res) {
2305 	case '+':
2306 	case '-':
2307 	case '*':
2308 	case '/':
2309 	case '%':
2310 	case '&':
2311 	case ':':
2312 		break;
2313 	case '<':
2314 		switch (v[*pos + 1]) {
2315 		case '=':
2316 			*res = 'l';
2317 			(*pos)++;
2318 			break;
2319 		case '>':
2320 			*res = '!';
2321 			(*pos)++;
2322 			break;
2323 		case '?':
2324 			*res = 'i';
2325 			(*pos)++;
2326 			break;
2327 		default:
2328 			break;
2329 		}
2330 		break;
2331 	case '>':
2332 		switch (v[*pos + 1]) {
2333 		case '=':
2334 			*res = 'g';
2335 			(*pos)++;
2336 			break;
2337 		case '?':
2338 			*res = 'a';
2339 			(*pos)++;
2340 			break;
2341 		default:
2342 			break;
2343 		}
2344 		break;
2345 	case '=':
2346 		if ('=' == v[*pos + 1])
2347 			(*pos)++;
2348 		break;
2349 	default:
2350 		return 0;
2351 	}
2352 	(*pos)++;
2353 
2354 	return *res;
2355 }
2356 
2357 /*
2358  * Evaluate either a parenthesized numeric expression
2359  * or a single signed integer number.
2360  */
2361 static int
2362 roff_evalpar(struct roff *r, int ln,
2363 	const char *v, int *pos, int *res, int flags)
2364 {
2365 
2366 	if ('(' != v[*pos])
2367 		return roff_getnum(v, pos, res, flags);
2368 
2369 	(*pos)++;
2370 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2371 		return 0;
2372 
2373 	/*
2374 	 * Omission of the closing parenthesis
2375 	 * is an error in validation mode,
2376 	 * but ignored in evaluation mode.
2377 	 */
2378 
2379 	if (')' == v[*pos])
2380 		(*pos)++;
2381 	else if (NULL == res)
2382 		return 0;
2383 
2384 	return 1;
2385 }
2386 
2387 /*
2388  * Evaluate a complete numeric expression.
2389  * Proceed left to right, there is no concept of precedence.
2390  */
2391 static int
2392 roff_evalnum(struct roff *r, int ln, const char *v,
2393 	int *pos, int *res, int flags)
2394 {
2395 	int		 mypos, operand2;
2396 	char		 operator;
2397 
2398 	if (NULL == pos) {
2399 		mypos = 0;
2400 		pos = &mypos;
2401 	}
2402 
2403 	if (flags & ROFFNUM_WHITE)
2404 		while (isspace((unsigned char)v[*pos]))
2405 			(*pos)++;
2406 
2407 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2408 		return 0;
2409 
2410 	while (1) {
2411 		if (flags & ROFFNUM_WHITE)
2412 			while (isspace((unsigned char)v[*pos]))
2413 				(*pos)++;
2414 
2415 		if ( ! roff_getop(v, pos, &operator))
2416 			break;
2417 
2418 		if (flags & ROFFNUM_WHITE)
2419 			while (isspace((unsigned char)v[*pos]))
2420 				(*pos)++;
2421 
2422 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2423 			return 0;
2424 
2425 		if (flags & ROFFNUM_WHITE)
2426 			while (isspace((unsigned char)v[*pos]))
2427 				(*pos)++;
2428 
2429 		if (NULL == res)
2430 			continue;
2431 
2432 		switch (operator) {
2433 		case '+':
2434 			*res += operand2;
2435 			break;
2436 		case '-':
2437 			*res -= operand2;
2438 			break;
2439 		case '*':
2440 			*res *= operand2;
2441 			break;
2442 		case '/':
2443 			if (operand2 == 0) {
2444 				mandoc_msg(MANDOCERR_DIVZERO,
2445 					r->parse, ln, *pos, v);
2446 				*res = 0;
2447 				break;
2448 			}
2449 			*res /= operand2;
2450 			break;
2451 		case '%':
2452 			if (operand2 == 0) {
2453 				mandoc_msg(MANDOCERR_DIVZERO,
2454 					r->parse, ln, *pos, v);
2455 				*res = 0;
2456 				break;
2457 			}
2458 			*res %= operand2;
2459 			break;
2460 		case '<':
2461 			*res = *res < operand2;
2462 			break;
2463 		case '>':
2464 			*res = *res > operand2;
2465 			break;
2466 		case 'l':
2467 			*res = *res <= operand2;
2468 			break;
2469 		case 'g':
2470 			*res = *res >= operand2;
2471 			break;
2472 		case '=':
2473 			*res = *res == operand2;
2474 			break;
2475 		case '!':
2476 			*res = *res != operand2;
2477 			break;
2478 		case '&':
2479 			*res = *res && operand2;
2480 			break;
2481 		case ':':
2482 			*res = *res || operand2;
2483 			break;
2484 		case 'i':
2485 			if (operand2 < *res)
2486 				*res = operand2;
2487 			break;
2488 		case 'a':
2489 			if (operand2 > *res)
2490 				*res = operand2;
2491 			break;
2492 		default:
2493 			abort();
2494 		}
2495 	}
2496 	return 1;
2497 }
2498 
2499 /* --- register management ------------------------------------------------ */
2500 
2501 void
2502 roff_setreg(struct roff *r, const char *name, int val, char sign)
2503 {
2504 	struct roffreg	*reg;
2505 
2506 	/* Search for an existing register with the same name. */
2507 	reg = r->regtab;
2508 
2509 	while (reg && strcmp(name, reg->key.p))
2510 		reg = reg->next;
2511 
2512 	if (NULL == reg) {
2513 		/* Create a new register. */
2514 		reg = mandoc_malloc(sizeof(struct roffreg));
2515 		reg->key.p = mandoc_strdup(name);
2516 		reg->key.sz = strlen(name);
2517 		reg->val = 0;
2518 		reg->next = r->regtab;
2519 		r->regtab = reg;
2520 	}
2521 
2522 	if ('+' == sign)
2523 		reg->val += val;
2524 	else if ('-' == sign)
2525 		reg->val -= val;
2526 	else
2527 		reg->val = val;
2528 }
2529 
2530 /*
2531  * Handle some predefined read-only number registers.
2532  * For now, return -1 if the requested register is not predefined;
2533  * in case a predefined read-only register having the value -1
2534  * were to turn up, another special value would have to be chosen.
2535  */
2536 static int
2537 roff_getregro(const struct roff *r, const char *name)
2538 {
2539 
2540 	switch (*name) {
2541 	case '$':  /* Number of arguments of the last macro evaluated. */
2542 		return r->argc;
2543 	case 'A':  /* ASCII approximation mode is always off. */
2544 		return 0;
2545 	case 'g':  /* Groff compatibility mode is always on. */
2546 		return 1;
2547 	case 'H':  /* Fixed horizontal resolution. */
2548 		return 24;
2549 	case 'j':  /* Always adjust left margin only. */
2550 		return 0;
2551 	case 'T':  /* Some output device is always defined. */
2552 		return 1;
2553 	case 'V':  /* Fixed vertical resolution. */
2554 		return 40;
2555 	default:
2556 		return -1;
2557 	}
2558 }
2559 
2560 int
2561 roff_getreg(const struct roff *r, const char *name)
2562 {
2563 	struct roffreg	*reg;
2564 	int		 val;
2565 
2566 	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2567 		val = roff_getregro(r, name + 1);
2568 		if (-1 != val)
2569 			return val;
2570 	}
2571 
2572 	for (reg = r->regtab; reg; reg = reg->next)
2573 		if (0 == strcmp(name, reg->key.p))
2574 			return reg->val;
2575 
2576 	return 0;
2577 }
2578 
2579 static int
2580 roff_getregn(const struct roff *r, const char *name, size_t len)
2581 {
2582 	struct roffreg	*reg;
2583 	int		 val;
2584 
2585 	if ('.' == name[0] && 2 == len) {
2586 		val = roff_getregro(r, name + 1);
2587 		if (-1 != val)
2588 			return val;
2589 	}
2590 
2591 	for (reg = r->regtab; reg; reg = reg->next)
2592 		if (len == reg->key.sz &&
2593 		    0 == strncmp(name, reg->key.p, len))
2594 			return reg->val;
2595 
2596 	return 0;
2597 }
2598 
2599 static int
2600 roff_hasregn(const struct roff *r, const char *name, size_t len)
2601 {
2602 	struct roffreg	*reg;
2603 	int		 val;
2604 
2605 	if ('.' == name[0] && 2 == len) {
2606 		val = roff_getregro(r, name + 1);
2607 		if (-1 != val)
2608 			return 1;
2609 	}
2610 
2611 	for (reg = r->regtab; reg; reg = reg->next)
2612 		if (len == reg->key.sz &&
2613 		    0 == strncmp(name, reg->key.p, len))
2614 			return 1;
2615 
2616 	return 0;
2617 }
2618 
2619 static void
2620 roff_freereg(struct roffreg *reg)
2621 {
2622 	struct roffreg	*old_reg;
2623 
2624 	while (NULL != reg) {
2625 		free(reg->key.p);
2626 		old_reg = reg;
2627 		reg = reg->next;
2628 		free(old_reg);
2629 	}
2630 }
2631 
2632 static enum rofferr
2633 roff_nr(ROFF_ARGS)
2634 {
2635 	char		*key, *val;
2636 	size_t		 keysz;
2637 	int		 iv;
2638 	char		 sign;
2639 
2640 	key = val = buf->buf + pos;
2641 	if (*key == '\0')
2642 		return ROFF_IGN;
2643 
2644 	keysz = roff_getname(r, &val, ln, pos);
2645 	if (key[keysz] == '\\')
2646 		return ROFF_IGN;
2647 	key[keysz] = '\0';
2648 
2649 	sign = *val;
2650 	if (sign == '+' || sign == '-')
2651 		val++;
2652 
2653 	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2654 		roff_setreg(r, key, iv, sign);
2655 
2656 	return ROFF_IGN;
2657 }
2658 
2659 static enum rofferr
2660 roff_rr(ROFF_ARGS)
2661 {
2662 	struct roffreg	*reg, **prev;
2663 	char		*name, *cp;
2664 	size_t		 namesz;
2665 
2666 	name = cp = buf->buf + pos;
2667 	if (*name == '\0')
2668 		return ROFF_IGN;
2669 	namesz = roff_getname(r, &cp, ln, pos);
2670 	name[namesz] = '\0';
2671 
2672 	prev = &r->regtab;
2673 	while (1) {
2674 		reg = *prev;
2675 		if (reg == NULL || !strcmp(name, reg->key.p))
2676 			break;
2677 		prev = &reg->next;
2678 	}
2679 	if (reg != NULL) {
2680 		*prev = reg->next;
2681 		free(reg->key.p);
2682 		free(reg);
2683 	}
2684 	return ROFF_IGN;
2685 }
2686 
2687 /* --- handler functions for roff requests -------------------------------- */
2688 
2689 static enum rofferr
2690 roff_rm(ROFF_ARGS)
2691 {
2692 	const char	 *name;
2693 	char		 *cp;
2694 	size_t		  namesz;
2695 
2696 	cp = buf->buf + pos;
2697 	while (*cp != '\0') {
2698 		name = cp;
2699 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2700 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2701 		if (name[namesz] == '\\')
2702 			break;
2703 	}
2704 	return ROFF_IGN;
2705 }
2706 
2707 static enum rofferr
2708 roff_it(ROFF_ARGS)
2709 {
2710 	int		 iv;
2711 
2712 	/* Parse the number of lines. */
2713 
2714 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2715 		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2716 		    ln, ppos, buf->buf + 1);
2717 		return ROFF_IGN;
2718 	}
2719 
2720 	while (isspace((unsigned char)buf->buf[pos]))
2721 		pos++;
2722 
2723 	/*
2724 	 * Arm the input line trap.
2725 	 * Special-casing "an-trap" is an ugly workaround to cope
2726 	 * with DocBook stupidly fiddling with man(7) internals.
2727 	 */
2728 
2729 	roffit_lines = iv;
2730 	roffit_macro = mandoc_strdup(iv != 1 ||
2731 	    strcmp(buf->buf + pos, "an-trap") ?
2732 	    buf->buf + pos : "br");
2733 	return ROFF_IGN;
2734 }
2735 
2736 static enum rofferr
2737 roff_Dd(ROFF_ARGS)
2738 {
2739 	const char *const	*cp;
2740 
2741 	if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2742 		for (cp = __mdoc_reserved; *cp; cp++)
2743 			roff_setstr(r, *cp, NULL, 0);
2744 
2745 	if (r->format == 0)
2746 		r->format = MPARSE_MDOC;
2747 
2748 	return ROFF_CONT;
2749 }
2750 
2751 static enum rofferr
2752 roff_TH(ROFF_ARGS)
2753 {
2754 	const char *const	*cp;
2755 
2756 	if ((r->options & MPARSE_QUICK) == 0)
2757 		for (cp = __man_reserved; *cp; cp++)
2758 			roff_setstr(r, *cp, NULL, 0);
2759 
2760 	if (r->format == 0)
2761 		r->format = MPARSE_MAN;
2762 
2763 	return ROFF_CONT;
2764 }
2765 
2766 static enum rofferr
2767 roff_TE(ROFF_ARGS)
2768 {
2769 
2770 	if (NULL == r->tbl)
2771 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2772 		    ln, ppos, "TE");
2773 	else if ( ! tbl_end(&r->tbl)) {
2774 		free(buf->buf);
2775 		buf->buf = mandoc_strdup(".sp");
2776 		buf->sz = 4;
2777 		return ROFF_REPARSE;
2778 	}
2779 	return ROFF_IGN;
2780 }
2781 
2782 static enum rofferr
2783 roff_T_(ROFF_ARGS)
2784 {
2785 
2786 	if (NULL == r->tbl)
2787 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2788 		    ln, ppos, "T&");
2789 	else
2790 		tbl_restart(ppos, ln, r->tbl);
2791 
2792 	return ROFF_IGN;
2793 }
2794 
2795 /*
2796  * Handle in-line equation delimiters.
2797  */
2798 static enum rofferr
2799 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2800 {
2801 	char		*cp1, *cp2;
2802 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2803 
2804 	/*
2805 	 * Outside equations, look for an opening delimiter.
2806 	 * If we are inside an equation, we already know it is
2807 	 * in-line, or this function wouldn't have been called;
2808 	 * so look for a closing delimiter.
2809 	 */
2810 
2811 	cp1 = buf->buf + pos;
2812 	cp2 = strchr(cp1, r->eqn == NULL ?
2813 	    r->last_eqn->odelim : r->last_eqn->cdelim);
2814 	if (cp2 == NULL)
2815 		return ROFF_CONT;
2816 
2817 	*cp2++ = '\0';
2818 	bef_pr = bef_nl = aft_nl = aft_pr = "";
2819 
2820 	/* Handle preceding text, protecting whitespace. */
2821 
2822 	if (*buf->buf != '\0') {
2823 		if (r->eqn == NULL)
2824 			bef_pr = "\\&";
2825 		bef_nl = "\n";
2826 	}
2827 
2828 	/*
2829 	 * Prepare replacing the delimiter with an equation macro
2830 	 * and drop leading white space from the equation.
2831 	 */
2832 
2833 	if (r->eqn == NULL) {
2834 		while (*cp2 == ' ')
2835 			cp2++;
2836 		mac = ".EQ";
2837 	} else
2838 		mac = ".EN";
2839 
2840 	/* Handle following text, protecting whitespace. */
2841 
2842 	if (*cp2 != '\0') {
2843 		aft_nl = "\n";
2844 		if (r->eqn != NULL)
2845 			aft_pr = "\\&";
2846 	}
2847 
2848 	/* Do the actual replacement. */
2849 
2850 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2851 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2852 	free(buf->buf);
2853 	buf->buf = cp1;
2854 
2855 	/* Toggle the in-line state of the eqn subsystem. */
2856 
2857 	r->eqn_inline = r->eqn == NULL;
2858 	return ROFF_REPARSE;
2859 }
2860 
2861 static enum rofferr
2862 roff_EQ(ROFF_ARGS)
2863 {
2864 	struct eqn_node *e;
2865 
2866 	assert(r->eqn == NULL);
2867 	e = eqn_alloc(ppos, ln, r->parse);
2868 
2869 	if (r->last_eqn) {
2870 		r->last_eqn->next = e;
2871 		e->delim = r->last_eqn->delim;
2872 		e->odelim = r->last_eqn->odelim;
2873 		e->cdelim = r->last_eqn->cdelim;
2874 	} else
2875 		r->first_eqn = r->last_eqn = e;
2876 
2877 	r->eqn = r->last_eqn = e;
2878 
2879 	if (buf->buf[pos] != '\0')
2880 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2881 		    ".EQ %s", buf->buf + pos);
2882 
2883 	return ROFF_IGN;
2884 }
2885 
2886 static enum rofferr
2887 roff_EN(ROFF_ARGS)
2888 {
2889 
2890 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2891 	return ROFF_IGN;
2892 }
2893 
2894 static enum rofferr
2895 roff_TS(ROFF_ARGS)
2896 {
2897 	struct tbl_node	*tbl;
2898 
2899 	if (r->tbl) {
2900 		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2901 		    ln, ppos, "TS breaks TS");
2902 		tbl_end(&r->tbl);
2903 	}
2904 
2905 	tbl = tbl_alloc(ppos, ln, r->parse);
2906 
2907 	if (r->last_tbl)
2908 		r->last_tbl->next = tbl;
2909 	else
2910 		r->first_tbl = r->last_tbl = tbl;
2911 
2912 	r->tbl = r->last_tbl = tbl;
2913 	return ROFF_IGN;
2914 }
2915 
2916 static enum rofferr
2917 roff_brp(ROFF_ARGS)
2918 {
2919 
2920 	buf->buf[pos - 1] = '\0';
2921 	return ROFF_CONT;
2922 }
2923 
2924 static enum rofferr
2925 roff_cc(ROFF_ARGS)
2926 {
2927 	const char	*p;
2928 
2929 	p = buf->buf + pos;
2930 
2931 	if (*p == '\0' || (r->control = *p++) == '.')
2932 		r->control = 0;
2933 
2934 	if (*p != '\0')
2935 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2936 		    ln, p - buf->buf, "cc ... %s", p);
2937 
2938 	return ROFF_IGN;
2939 }
2940 
2941 static enum rofferr
2942 roff_tr(ROFF_ARGS)
2943 {
2944 	const char	*p, *first, *second;
2945 	size_t		 fsz, ssz;
2946 	enum mandoc_esc	 esc;
2947 
2948 	p = buf->buf + pos;
2949 
2950 	if (*p == '\0') {
2951 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2952 		return ROFF_IGN;
2953 	}
2954 
2955 	while (*p != '\0') {
2956 		fsz = ssz = 1;
2957 
2958 		first = p++;
2959 		if (*first == '\\') {
2960 			esc = mandoc_escape(&p, NULL, NULL);
2961 			if (esc == ESCAPE_ERROR) {
2962 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2963 				    ln, (int)(p - buf->buf), first);
2964 				return ROFF_IGN;
2965 			}
2966 			fsz = (size_t)(p - first);
2967 		}
2968 
2969 		second = p++;
2970 		if (*second == '\\') {
2971 			esc = mandoc_escape(&p, NULL, NULL);
2972 			if (esc == ESCAPE_ERROR) {
2973 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2974 				    ln, (int)(p - buf->buf), second);
2975 				return ROFF_IGN;
2976 			}
2977 			ssz = (size_t)(p - second);
2978 		} else if (*second == '\0') {
2979 			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2980 			    ln, first - buf->buf, "tr %s", first);
2981 			second = " ";
2982 			p--;
2983 		}
2984 
2985 		if (fsz > 1) {
2986 			roff_setstrn(&r->xmbtab, first, fsz,
2987 			    second, ssz, 0);
2988 			continue;
2989 		}
2990 
2991 		if (r->xtab == NULL)
2992 			r->xtab = mandoc_calloc(128,
2993 			    sizeof(struct roffstr));
2994 
2995 		free(r->xtab[(int)*first].p);
2996 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2997 		r->xtab[(int)*first].sz = ssz;
2998 	}
2999 
3000 	return ROFF_IGN;
3001 }
3002 
3003 static enum rofferr
3004 roff_so(ROFF_ARGS)
3005 {
3006 	char *name, *cp;
3007 
3008 	name = buf->buf + pos;
3009 	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3010 
3011 	/*
3012 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3013 	 * opening anything that's not in our cwd or anything beneath
3014 	 * it.  Thus, explicitly disallow traversing up the file-system
3015 	 * or using absolute paths.
3016 	 */
3017 
3018 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3019 		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3020 		    ".so %s", name);
3021 		buf->sz = mandoc_asprintf(&cp,
3022 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3023 		free(buf->buf);
3024 		buf->buf = cp;
3025 		*offs = 0;
3026 		return ROFF_REPARSE;
3027 	}
3028 
3029 	*offs = pos;
3030 	return ROFF_SO;
3031 }
3032 
3033 /* --- user defined strings and macros ------------------------------------ */
3034 
3035 static enum rofferr
3036 roff_userdef(ROFF_ARGS)
3037 {
3038 	const char	 *arg[9], *ap;
3039 	char		 *cp, *n1, *n2;
3040 	int		  i, ib, ie;
3041 	size_t		  asz, rsz;
3042 
3043 	/*
3044 	 * Collect pointers to macro argument strings
3045 	 * and NUL-terminate them.
3046 	 */
3047 
3048 	r->argc = 0;
3049 	cp = buf->buf + pos;
3050 	for (i = 0; i < 9; i++) {
3051 		if (*cp == '\0')
3052 			arg[i] = "";
3053 		else {
3054 			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3055 			r->argc = i + 1;
3056 		}
3057 	}
3058 
3059 	/*
3060 	 * Expand macro arguments.
3061 	 */
3062 
3063 	buf->sz = strlen(r->current_string) + 1;
3064 	n1 = cp = mandoc_malloc(buf->sz);
3065 	memcpy(n1, r->current_string, buf->sz);
3066 	while (*cp != '\0') {
3067 
3068 		/* Scan ahead for the next argument invocation. */
3069 
3070 		if (*cp++ != '\\')
3071 			continue;
3072 		if (*cp++ != '$')
3073 			continue;
3074 		if (*cp == '*') {  /* \\$* inserts all arguments */
3075 			ib = 0;
3076 			ie = r->argc - 1;
3077 		} else {  /* \\$1 .. \\$9 insert one argument */
3078 			ib = ie = *cp - '1';
3079 			if (ib < 0 || ib > 8)
3080 				continue;
3081 		}
3082 		cp -= 2;
3083 
3084 		/*
3085 		 * Determine the size of the expanded argument,
3086 		 * taking escaping of quotes into account.
3087 		 */
3088 
3089 		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3090 		for (i = ib; i <= ie; i++) {
3091 			for (ap = arg[i]; *ap != '\0'; ap++) {
3092 				asz++;
3093 				if (*ap == '"')
3094 					asz += 3;
3095 			}
3096 		}
3097 		if (asz != 3) {
3098 
3099 			/*
3100 			 * Determine the size of the rest of the
3101 			 * unexpanded macro, including the NUL.
3102 			 */
3103 
3104 			rsz = buf->sz - (cp - n1) - 3;
3105 
3106 			/*
3107 			 * When shrinking, move before
3108 			 * releasing the storage.
3109 			 */
3110 
3111 			if (asz < 3)
3112 				memmove(cp + asz, cp + 3, rsz);
3113 
3114 			/*
3115 			 * Resize the storage for the macro
3116 			 * and readjust the parse pointer.
3117 			 */
3118 
3119 			buf->sz += asz - 3;
3120 			n2 = mandoc_realloc(n1, buf->sz);
3121 			cp = n2 + (cp - n1);
3122 			n1 = n2;
3123 
3124 			/*
3125 			 * When growing, make room
3126 			 * for the expanded argument.
3127 			 */
3128 
3129 			if (asz > 3)
3130 				memmove(cp + asz, cp + 3, rsz);
3131 		}
3132 
3133 		/* Copy the expanded argument, escaping quotes. */
3134 
3135 		n2 = cp;
3136 		for (i = ib; i <= ie; i++) {
3137 			for (ap = arg[i]; *ap != '\0'; ap++) {
3138 				if (*ap == '"') {
3139 					memcpy(n2, "\\(dq", 4);
3140 					n2 += 4;
3141 				} else
3142 					*n2++ = *ap;
3143 			}
3144 			if (i < ie)
3145 				*n2++ = ' ';
3146 		}
3147 	}
3148 
3149 	/*
3150 	 * Replace the macro invocation
3151 	 * by the expanded macro.
3152 	 */
3153 
3154 	free(buf->buf);
3155 	buf->buf = n1;
3156 	*offs = 0;
3157 
3158 	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3159 	   ROFF_REPARSE : ROFF_APPEND;
3160 }
3161 
3162 static size_t
3163 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3164 {
3165 	char	 *name, *cp;
3166 	size_t	  namesz;
3167 
3168 	name = *cpp;
3169 	if ('\0' == *name)
3170 		return 0;
3171 
3172 	/* Read until end of name and terminate it with NUL. */
3173 	for (cp = name; 1; cp++) {
3174 		if ('\0' == *cp || ' ' == *cp) {
3175 			namesz = cp - name;
3176 			break;
3177 		}
3178 		if ('\\' != *cp)
3179 			continue;
3180 		namesz = cp - name;
3181 		if ('{' == cp[1] || '}' == cp[1])
3182 			break;
3183 		cp++;
3184 		if ('\\' == *cp)
3185 			continue;
3186 		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3187 		    "%.*s", (int)(cp - name + 1), name);
3188 		mandoc_escape((const char **)&cp, NULL, NULL);
3189 		break;
3190 	}
3191 
3192 	/* Read past spaces. */
3193 	while (' ' == *cp)
3194 		cp++;
3195 
3196 	*cpp = cp;
3197 	return namesz;
3198 }
3199 
3200 /*
3201  * Store *string into the user-defined string called *name.
3202  * To clear an existing entry, call with (*r, *name, NULL, 0).
3203  * append == 0: replace mode
3204  * append == 1: single-line append mode
3205  * append == 2: multiline append mode, append '\n' after each call
3206  */
3207 static void
3208 roff_setstr(struct roff *r, const char *name, const char *string,
3209 	int append)
3210 {
3211 
3212 	roff_setstrn(&r->strtab, name, strlen(name), string,
3213 	    string ? strlen(string) : 0, append);
3214 }
3215 
3216 static void
3217 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3218 		const char *string, size_t stringsz, int append)
3219 {
3220 	struct roffkv	*n;
3221 	char		*c;
3222 	int		 i;
3223 	size_t		 oldch, newch;
3224 
3225 	/* Search for an existing string with the same name. */
3226 	n = *r;
3227 
3228 	while (n && (namesz != n->key.sz ||
3229 			strncmp(n->key.p, name, namesz)))
3230 		n = n->next;
3231 
3232 	if (NULL == n) {
3233 		/* Create a new string table entry. */
3234 		n = mandoc_malloc(sizeof(struct roffkv));
3235 		n->key.p = mandoc_strndup(name, namesz);
3236 		n->key.sz = namesz;
3237 		n->val.p = NULL;
3238 		n->val.sz = 0;
3239 		n->next = *r;
3240 		*r = n;
3241 	} else if (0 == append) {
3242 		free(n->val.p);
3243 		n->val.p = NULL;
3244 		n->val.sz = 0;
3245 	}
3246 
3247 	if (NULL == string)
3248 		return;
3249 
3250 	/*
3251 	 * One additional byte for the '\n' in multiline mode,
3252 	 * and one for the terminating '\0'.
3253 	 */
3254 	newch = stringsz + (1 < append ? 2u : 1u);
3255 
3256 	if (NULL == n->val.p) {
3257 		n->val.p = mandoc_malloc(newch);
3258 		*n->val.p = '\0';
3259 		oldch = 0;
3260 	} else {
3261 		oldch = n->val.sz;
3262 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3263 	}
3264 
3265 	/* Skip existing content in the destination buffer. */
3266 	c = n->val.p + (int)oldch;
3267 
3268 	/* Append new content to the destination buffer. */
3269 	i = 0;
3270 	while (i < (int)stringsz) {
3271 		/*
3272 		 * Rudimentary roff copy mode:
3273 		 * Handle escaped backslashes.
3274 		 */
3275 		if ('\\' == string[i] && '\\' == string[i + 1])
3276 			i++;
3277 		*c++ = string[i++];
3278 	}
3279 
3280 	/* Append terminating bytes. */
3281 	if (1 < append)
3282 		*c++ = '\n';
3283 
3284 	*c = '\0';
3285 	n->val.sz = (int)(c - n->val.p);
3286 }
3287 
3288 static const char *
3289 roff_getstrn(const struct roff *r, const char *name, size_t len)
3290 {
3291 	const struct roffkv *n;
3292 	int i;
3293 
3294 	for (n = r->strtab; n; n = n->next)
3295 		if (0 == strncmp(name, n->key.p, len) &&
3296 		    '\0' == n->key.p[(int)len])
3297 			return n->val.p;
3298 
3299 	for (i = 0; i < PREDEFS_MAX; i++)
3300 		if (0 == strncmp(name, predefs[i].name, len) &&
3301 				'\0' == predefs[i].name[(int)len])
3302 			return predefs[i].str;
3303 
3304 	return NULL;
3305 }
3306 
3307 static void
3308 roff_freestr(struct roffkv *r)
3309 {
3310 	struct roffkv	 *n, *nn;
3311 
3312 	for (n = r; n; n = nn) {
3313 		free(n->key.p);
3314 		free(n->val.p);
3315 		nn = n->next;
3316 		free(n);
3317 	}
3318 }
3319 
3320 /* --- accessors and utility functions ------------------------------------ */
3321 
3322 const struct tbl_span *
3323 roff_span(const struct roff *r)
3324 {
3325 
3326 	return r->tbl ? tbl_span(r->tbl) : NULL;
3327 }
3328 
3329 const struct eqn *
3330 roff_eqn(const struct roff *r)
3331 {
3332 
3333 	return r->last_eqn ? &r->last_eqn->eqn : NULL;
3334 }
3335 
3336 /*
3337  * Duplicate an input string, making the appropriate character
3338  * conversations (as stipulated by `tr') along the way.
3339  * Returns a heap-allocated string with all the replacements made.
3340  */
3341 char *
3342 roff_strdup(const struct roff *r, const char *p)
3343 {
3344 	const struct roffkv *cp;
3345 	char		*res;
3346 	const char	*pp;
3347 	size_t		 ssz, sz;
3348 	enum mandoc_esc	 esc;
3349 
3350 	if (NULL == r->xmbtab && NULL == r->xtab)
3351 		return mandoc_strdup(p);
3352 	else if ('\0' == *p)
3353 		return mandoc_strdup("");
3354 
3355 	/*
3356 	 * Step through each character looking for term matches
3357 	 * (remember that a `tr' can be invoked with an escape, which is
3358 	 * a glyph but the escape is multi-character).
3359 	 * We only do this if the character hash has been initialised
3360 	 * and the string is >0 length.
3361 	 */
3362 
3363 	res = NULL;
3364 	ssz = 0;
3365 
3366 	while ('\0' != *p) {
3367 		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3368 			sz = r->xtab[(int)*p].sz;
3369 			res = mandoc_realloc(res, ssz + sz + 1);
3370 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3371 			ssz += sz;
3372 			p++;
3373 			continue;
3374 		} else if ('\\' != *p) {
3375 			res = mandoc_realloc(res, ssz + 2);
3376 			res[ssz++] = *p++;
3377 			continue;
3378 		}
3379 
3380 		/* Search for term matches. */
3381 		for (cp = r->xmbtab; cp; cp = cp->next)
3382 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3383 				break;
3384 
3385 		if (NULL != cp) {
3386 			/*
3387 			 * A match has been found.
3388 			 * Append the match to the array and move
3389 			 * forward by its keysize.
3390 			 */
3391 			res = mandoc_realloc(res,
3392 			    ssz + cp->val.sz + 1);
3393 			memcpy(res + ssz, cp->val.p, cp->val.sz);
3394 			ssz += cp->val.sz;
3395 			p += (int)cp->key.sz;
3396 			continue;
3397 		}
3398 
3399 		/*
3400 		 * Handle escapes carefully: we need to copy
3401 		 * over just the escape itself, or else we might
3402 		 * do replacements within the escape itself.
3403 		 * Make sure to pass along the bogus string.
3404 		 */
3405 		pp = p++;
3406 		esc = mandoc_escape(&p, NULL, NULL);
3407 		if (ESCAPE_ERROR == esc) {
3408 			sz = strlen(pp);
3409 			res = mandoc_realloc(res, ssz + sz + 1);
3410 			memcpy(res + ssz, pp, sz);
3411 			break;
3412 		}
3413 		/*
3414 		 * We bail out on bad escapes.
3415 		 * No need to warn: we already did so when
3416 		 * roff_res() was called.
3417 		 */
3418 		sz = (int)(p - pp);
3419 		res = mandoc_realloc(res, ssz + sz + 1);
3420 		memcpy(res + ssz, pp, sz);
3421 		ssz += sz;
3422 	}
3423 
3424 	res[(int)ssz] = '\0';
3425 	return res;
3426 }
3427 
3428 int
3429 roff_getformat(const struct roff *r)
3430 {
3431 
3432 	return r->format;
3433 }
3434 
3435 /*
3436  * Find out whether a line is a macro line or not.
3437  * If it is, adjust the current position and return one; if it isn't,
3438  * return zero and don't change the current position.
3439  * If the control character has been set with `.cc', then let that grain
3440  * precedence.
3441  * This is slighly contrary to groff, where using the non-breaking
3442  * control character when `cc' has been invoked will cause the
3443  * non-breaking macro contents to be printed verbatim.
3444  */
3445 int
3446 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3447 {
3448 	int		pos;
3449 
3450 	pos = *ppos;
3451 
3452 	if (0 != r->control && cp[pos] == r->control)
3453 		pos++;
3454 	else if (0 != r->control)
3455 		return 0;
3456 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3457 		pos += 2;
3458 	else if ('.' == cp[pos] || '\'' == cp[pos])
3459 		pos++;
3460 	else
3461 		return 0;
3462 
3463 	while (' ' == cp[pos] || '\t' == cp[pos])
3464 		pos++;
3465 
3466 	*ppos = pos;
3467 	return 1;
3468 }
3469