xref: /illumos-gate/usr/src/cmd/mandoc/roff.c (revision fca543ca45b12c44a243625bce68b645ba8ed791)
1 /*	$Id: roff.c,v 1.189 2013/12/30 18:44:06 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31 
32 /* Maximum number of nested if-else conditionals. */
33 #define	RSTACK_MAX	128
34 
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define	EXPAND_LIMIT	1000
37 
38 enum	rofft {
39 	ROFF_ad,
40 	ROFF_am,
41 	ROFF_ami,
42 	ROFF_am1,
43 	ROFF_cc,
44 	ROFF_de,
45 	ROFF_dei,
46 	ROFF_de1,
47 	ROFF_ds,
48 	ROFF_el,
49 	ROFF_fam,
50 	ROFF_hw,
51 	ROFF_hy,
52 	ROFF_ie,
53 	ROFF_if,
54 	ROFF_ig,
55 	ROFF_it,
56 	ROFF_ne,
57 	ROFF_nh,
58 	ROFF_nr,
59 	ROFF_ns,
60 	ROFF_ps,
61 	ROFF_rm,
62 	ROFF_so,
63 	ROFF_ta,
64 	ROFF_tr,
65 	ROFF_Dd,
66 	ROFF_TH,
67 	ROFF_TS,
68 	ROFF_TE,
69 	ROFF_T_,
70 	ROFF_EQ,
71 	ROFF_EN,
72 	ROFF_cblock,
73 	ROFF_ccond,
74 	ROFF_USERDEF,
75 	ROFF_MAX
76 };
77 
78 enum	roffrule {
79 	ROFFRULE_DENY,
80 	ROFFRULE_ALLOW
81 };
82 
83 /*
84  * An incredibly-simple string buffer.
85  */
86 struct	roffstr {
87 	char		*p; /* nil-terminated buffer */
88 	size_t		 sz; /* saved strlen(p) */
89 };
90 
91 /*
92  * A key-value roffstr pair as part of a singly-linked list.
93  */
94 struct	roffkv {
95 	struct roffstr	 key;
96 	struct roffstr	 val;
97 	struct roffkv	*next; /* next in list */
98 };
99 
100 /*
101  * A single number register as part of a singly-linked list.
102  */
103 struct	roffreg {
104 	struct roffstr	 key;
105 	int		 val;
106 	struct roffreg	*next;
107 };
108 
109 struct	roff {
110 	enum mparset	 parsetype; /* requested parse type */
111 	struct mparse	*parse; /* parse point */
112 	struct roffnode	*last; /* leaf of stack */
113 	enum roffrule	 rstack[RSTACK_MAX]; /* stack of !`ie' rules */
114 	char		 control; /* control character */
115 	int		 rstackpos; /* position in rstack */
116 	struct roffreg	*regtab; /* number registers */
117 	struct roffkv	*strtab; /* user-defined strings & macros */
118 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
119 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
120 	const char	*current_string; /* value of last called user macro */
121 	struct tbl_node	*first_tbl; /* first table parsed */
122 	struct tbl_node	*last_tbl; /* last table parsed */
123 	struct tbl_node	*tbl; /* current table being parsed */
124 	struct eqn_node	*last_eqn; /* last equation parsed */
125 	struct eqn_node	*first_eqn; /* first equation parsed */
126 	struct eqn_node	*eqn; /* current equation being parsed */
127 };
128 
129 struct	roffnode {
130 	enum rofft	 tok; /* type of node */
131 	struct roffnode	*parent; /* up one in stack */
132 	int		 line; /* parse line */
133 	int		 col; /* parse col */
134 	char		*name; /* node name, e.g. macro name */
135 	char		*end; /* end-rules: custom token */
136 	int		 endspan; /* end-rules: next-line or infty */
137 	enum roffrule	 rule; /* current evaluation rule */
138 };
139 
140 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
141 			 enum rofft tok, /* tok of macro */ \
142 		 	 char **bufp, /* input buffer */ \
143 			 size_t *szp, /* size of input buffer */ \
144 			 int ln, /* parse line */ \
145 			 int ppos, /* original pos in buffer */ \
146 			 int pos, /* current pos in buffer */ \
147 			 int *offs /* reset offset of buffer data */
148 
149 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
150 
151 struct	roffmac {
152 	const char	*name; /* macro name */
153 	roffproc	 proc; /* process new macro */
154 	roffproc	 text; /* process as child text of macro */
155 	roffproc	 sub; /* process as child of macro */
156 	int		 flags;
157 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
158 	struct roffmac	*next;
159 };
160 
161 struct	predef {
162 	const char	*name; /* predefined input name */
163 	const char	*str; /* replacement symbol */
164 };
165 
166 #define	PREDEF(__name, __str) \
167 	{ (__name), (__str) },
168 
169 static	enum rofft	 roffhash_find(const char *, size_t);
170 static	void		 roffhash_init(void);
171 static	void		 roffnode_cleanscope(struct roff *);
172 static	void		 roffnode_pop(struct roff *);
173 static	void		 roffnode_push(struct roff *, enum rofft,
174 				const char *, int, int);
175 static	enum rofferr	 roff_block(ROFF_ARGS);
176 static	enum rofferr	 roff_block_text(ROFF_ARGS);
177 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
178 static	enum rofferr	 roff_cblock(ROFF_ARGS);
179 static	enum rofferr	 roff_cc(ROFF_ARGS);
180 static	enum rofferr	 roff_ccond(ROFF_ARGS);
181 static	enum rofferr	 roff_cond(ROFF_ARGS);
182 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
183 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
184 static	enum rofferr	 roff_ds(ROFF_ARGS);
185 static	enum roffrule	 roff_evalcond(const char *, int *);
186 static	void		 roff_free1(struct roff *);
187 static	void		 roff_freereg(struct roffreg *);
188 static	void		 roff_freestr(struct roffkv *);
189 static	char		*roff_getname(struct roff *, char **, int, int);
190 static	int		 roff_getnum(const char *, int *, int *);
191 static	int		 roff_getop(const char *, int *, char *);
192 static	int		 roff_getregn(const struct roff *,
193 				const char *, size_t);
194 static	const char	*roff_getstrn(const struct roff *,
195 				const char *, size_t);
196 static	enum rofferr	 roff_it(ROFF_ARGS);
197 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
198 static	enum rofferr	 roff_nr(ROFF_ARGS);
199 static	void		 roff_openeqn(struct roff *, const char *,
200 				int, int, const char *);
201 static	enum rofft	 roff_parse(struct roff *, const char *, int *);
202 static	enum rofferr	 roff_parsetext(char **, size_t *, int, int *);
203 static	enum rofferr	 roff_res(struct roff *,
204 				char **, size_t *, int, int);
205 static	enum rofferr	 roff_rm(ROFF_ARGS);
206 static	void		 roff_setstr(struct roff *,
207 				const char *, const char *, int);
208 static	void		 roff_setstrn(struct roffkv **, const char *,
209 				size_t, const char *, size_t, int);
210 static	enum rofferr	 roff_so(ROFF_ARGS);
211 static	enum rofferr	 roff_tr(ROFF_ARGS);
212 static	enum rofferr	 roff_Dd(ROFF_ARGS);
213 static	enum rofferr	 roff_TH(ROFF_ARGS);
214 static	enum rofferr	 roff_TE(ROFF_ARGS);
215 static	enum rofferr	 roff_TS(ROFF_ARGS);
216 static	enum rofferr	 roff_EQ(ROFF_ARGS);
217 static	enum rofferr	 roff_EN(ROFF_ARGS);
218 static	enum rofferr	 roff_T_(ROFF_ARGS);
219 static	enum rofferr	 roff_userdef(ROFF_ARGS);
220 
221 /* See roffhash_find() */
222 
223 #define	ASCII_HI	 126
224 #define	ASCII_LO	 33
225 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
226 
227 static	struct roffmac	*hash[HASHWIDTH];
228 
229 static	struct roffmac	 roffs[ROFF_MAX] = {
230 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
231 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
234 	{ "cc", roff_cc, NULL, NULL, 0, NULL },
235 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
239 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
240 	{ "fam", roff_line_ignore, NULL, NULL, 0, NULL },
241 	{ "hw", roff_line_ignore, NULL, NULL, 0, NULL },
242 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
243 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
244 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
246 	{ "it", roff_it, NULL, NULL, 0, NULL },
247 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
248 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
249 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
250 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
251 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
252 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
253 	{ "so", roff_so, NULL, NULL, 0, NULL },
254 	{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
255 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
256 	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
257 	{ "TH", roff_TH, NULL, NULL, 0, NULL },
258 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
259 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
260 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
261 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
262 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
263 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
264 	{ "\\}", roff_ccond, NULL, NULL, 0, NULL },
265 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
266 };
267 
268 const	char *const __mdoc_reserved[] = {
269 	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
270 	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
271 	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
272 	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
273 	"Ds", "Dt", "Dv", "Dx", "D1",
274 	"Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
275 	"En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
276 	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
277 	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
278 	"Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
279 	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
280 	"Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
281 	"Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
282 	"Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
283 	"Ss", "St", "Sx", "Sy",
284 	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
285 	"%A", "%B", "%D", "%I", "%J", "%N", "%O",
286 	"%P", "%Q", "%R", "%T", "%U", "%V",
287 	NULL
288 };
289 
290 const	char *const __man_reserved[] = {
291 	"AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
292 	"EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
293 	"LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
294 	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
295 	"TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
296 	NULL
297 };
298 
299 /* Array of injected predefined strings. */
300 #define	PREDEFS_MAX	 38
301 static	const struct predef predefs[PREDEFS_MAX] = {
302 #include "predefs.in"
303 };
304 
305 /* See roffhash_find() */
306 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
307 
308 static	int	 roffit_lines;  /* number of lines to delay */
309 static	char	*roffit_macro;  /* nil-terminated macro line */
310 
311 static void
312 roffhash_init(void)
313 {
314 	struct roffmac	 *n;
315 	int		  buc, i;
316 
317 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
318 		assert(roffs[i].name[0] >= ASCII_LO);
319 		assert(roffs[i].name[0] <= ASCII_HI);
320 
321 		buc = ROFF_HASH(roffs[i].name);
322 
323 		if (NULL != (n = hash[buc])) {
324 			for ( ; n->next; n = n->next)
325 				/* Do nothing. */ ;
326 			n->next = &roffs[i];
327 		} else
328 			hash[buc] = &roffs[i];
329 	}
330 }
331 
332 /*
333  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
334  * the nil-terminated string name could be found.
335  */
336 static enum rofft
337 roffhash_find(const char *p, size_t s)
338 {
339 	int		 buc;
340 	struct roffmac	*n;
341 
342 	/*
343 	 * libroff has an extremely simple hashtable, for the time
344 	 * being, which simply keys on the first character, which must
345 	 * be printable, then walks a chain.  It works well enough until
346 	 * optimised.
347 	 */
348 
349 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
350 		return(ROFF_MAX);
351 
352 	buc = ROFF_HASH(p);
353 
354 	if (NULL == (n = hash[buc]))
355 		return(ROFF_MAX);
356 	for ( ; n; n = n->next)
357 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
358 			return((enum rofft)(n - roffs));
359 
360 	return(ROFF_MAX);
361 }
362 
363 
364 /*
365  * Pop the current node off of the stack of roff instructions currently
366  * pending.
367  */
368 static void
369 roffnode_pop(struct roff *r)
370 {
371 	struct roffnode	*p;
372 
373 	assert(r->last);
374 	p = r->last;
375 
376 	r->last = r->last->parent;
377 	free(p->name);
378 	free(p->end);
379 	free(p);
380 }
381 
382 
383 /*
384  * Push a roff node onto the instruction stack.  This must later be
385  * removed with roffnode_pop().
386  */
387 static void
388 roffnode_push(struct roff *r, enum rofft tok, const char *name,
389 		int line, int col)
390 {
391 	struct roffnode	*p;
392 
393 	p = mandoc_calloc(1, sizeof(struct roffnode));
394 	p->tok = tok;
395 	if (name)
396 		p->name = mandoc_strdup(name);
397 	p->parent = r->last;
398 	p->line = line;
399 	p->col = col;
400 	p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
401 
402 	r->last = p;
403 }
404 
405 
406 static void
407 roff_free1(struct roff *r)
408 {
409 	struct tbl_node	*tbl;
410 	struct eqn_node	*e;
411 	int		 i;
412 
413 	while (NULL != (tbl = r->first_tbl)) {
414 		r->first_tbl = tbl->next;
415 		tbl_free(tbl);
416 	}
417 
418 	r->first_tbl = r->last_tbl = r->tbl = NULL;
419 
420 	while (NULL != (e = r->first_eqn)) {
421 		r->first_eqn = e->next;
422 		eqn_free(e);
423 	}
424 
425 	r->first_eqn = r->last_eqn = r->eqn = NULL;
426 
427 	while (r->last)
428 		roffnode_pop(r);
429 
430 	roff_freestr(r->strtab);
431 	roff_freestr(r->xmbtab);
432 
433 	r->strtab = r->xmbtab = NULL;
434 
435 	roff_freereg(r->regtab);
436 
437 	r->regtab = NULL;
438 
439 	if (r->xtab)
440 		for (i = 0; i < 128; i++)
441 			free(r->xtab[i].p);
442 
443 	free(r->xtab);
444 	r->xtab = NULL;
445 }
446 
447 void
448 roff_reset(struct roff *r)
449 {
450 	int		 i;
451 
452 	roff_free1(r);
453 
454 	r->control = 0;
455 
456 	for (i = 0; i < PREDEFS_MAX; i++)
457 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
458 }
459 
460 
461 void
462 roff_free(struct roff *r)
463 {
464 
465 	roff_free1(r);
466 	free(r);
467 }
468 
469 
470 struct roff *
471 roff_alloc(enum mparset type, struct mparse *parse)
472 {
473 	struct roff	*r;
474 	int		 i;
475 
476 	r = mandoc_calloc(1, sizeof(struct roff));
477 	r->parsetype = type;
478 	r->parse = parse;
479 	r->rstackpos = -1;
480 
481 	roffhash_init();
482 
483 	for (i = 0; i < PREDEFS_MAX; i++)
484 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
485 
486 	return(r);
487 }
488 
489 /*
490  * In the current line, expand user-defined strings ("\*")
491  * and references to number registers ("\n").
492  * Also check the syntax of other escape sequences.
493  */
494 static enum rofferr
495 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
496 {
497 	char		 ubuf[12]; /* buffer to print the number */
498 	const char	*stesc;	/* start of an escape sequence ('\\') */
499 	const char	*stnam;	/* start of the name, after "[(*" */
500 	const char	*cp;	/* end of the name, e.g. before ']' */
501 	const char	*res;	/* the string to be substituted */
502 	char		*nbuf;	/* new buffer to copy bufp to */
503 	size_t		 nsz;	/* size of the new buffer */
504 	size_t		 maxl;  /* expected length of the escape name */
505 	size_t		 naml;	/* actual length of the escape name */
506 	int		 expand_count;	/* to avoid infinite loops */
507 
508 	expand_count = 0;
509 
510 again:
511 	cp = *bufp + pos;
512 	while (NULL != (cp = strchr(cp, '\\'))) {
513 		stesc = cp++;
514 
515 		/*
516 		 * The second character must be an asterisk or an n.
517 		 * If it isn't, skip it anyway:  It is escaped,
518 		 * so it can't start another escape sequence.
519 		 */
520 
521 		if ('\0' == *cp)
522 			return(ROFF_CONT);
523 
524 		switch (*cp) {
525 		case ('*'):
526 			res = NULL;
527 			break;
528 		case ('n'):
529 			res = ubuf;
530 			break;
531 		default:
532 			if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
533 				continue;
534 			mandoc_msg
535 				(MANDOCERR_BADESCAPE, r->parse,
536 				 ln, (int)(stesc - *bufp), NULL);
537 			return(ROFF_CONT);
538 		}
539 
540 		cp++;
541 
542 		/*
543 		 * The third character decides the length
544 		 * of the name of the string or register.
545 		 * Save a pointer to the name.
546 		 */
547 
548 		switch (*cp) {
549 		case ('\0'):
550 			return(ROFF_CONT);
551 		case ('('):
552 			cp++;
553 			maxl = 2;
554 			break;
555 		case ('['):
556 			cp++;
557 			maxl = 0;
558 			break;
559 		default:
560 			maxl = 1;
561 			break;
562 		}
563 		stnam = cp;
564 
565 		/* Advance to the end of the name. */
566 
567 		for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
568 			if ('\0' == *cp) {
569 				mandoc_msg
570 					(MANDOCERR_BADESCAPE,
571 					 r->parse, ln,
572 					 (int)(stesc - *bufp), NULL);
573 				return(ROFF_CONT);
574 			}
575 			if (0 == maxl && ']' == *cp)
576 				break;
577 		}
578 
579 		/*
580 		 * Retrieve the replacement string; if it is
581 		 * undefined, resume searching for escapes.
582 		 */
583 
584 		if (NULL == res)
585 			res = roff_getstrn(r, stnam, naml);
586 		else
587 			snprintf(ubuf, sizeof(ubuf), "%d",
588 			    roff_getregn(r, stnam, naml));
589 
590 		if (NULL == res) {
591 			mandoc_msg
592 				(MANDOCERR_BADESCAPE, r->parse,
593 				 ln, (int)(stesc - *bufp), NULL);
594 			res = "";
595 		}
596 
597 		/* Replace the escape sequence by the string. */
598 
599 		pos = stesc - *bufp;
600 
601 		nsz = *szp + strlen(res) + 1;
602 		nbuf = mandoc_malloc(nsz);
603 
604 		strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
605 		strlcat(nbuf, res, nsz);
606 		strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
607 
608 		free(*bufp);
609 
610 		*bufp = nbuf;
611 		*szp = nsz;
612 
613 		if (EXPAND_LIMIT >= ++expand_count)
614 			goto again;
615 
616 		/* Just leave the string unexpanded. */
617 		mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
618 		return(ROFF_IGN);
619 	}
620 	return(ROFF_CONT);
621 }
622 
623 /*
624  * Process text streams:
625  * Convert all breakable hyphens into ASCII_HYPH.
626  * Decrement and spring input line trap.
627  */
628 static enum rofferr
629 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
630 {
631 	size_t		 sz;
632 	const char	*start;
633 	char		*p;
634 	int		 isz;
635 	enum mandoc_esc	 esc;
636 
637 	start = p = *bufp + pos;
638 
639 	while ('\0' != *p) {
640 		sz = strcspn(p, "-\\");
641 		p += sz;
642 
643 		if ('\0' == *p)
644 			break;
645 
646 		if ('\\' == *p) {
647 			/* Skip over escapes. */
648 			p++;
649 			esc = mandoc_escape((const char **)&p, NULL, NULL);
650 			if (ESCAPE_ERROR == esc)
651 				break;
652 			continue;
653 		} else if (p == start) {
654 			p++;
655 			continue;
656 		}
657 
658 		if (isalpha((unsigned char)p[-1]) &&
659 		    isalpha((unsigned char)p[1]))
660 			*p = ASCII_HYPH;
661 		p++;
662 	}
663 
664 	/* Spring the input line trap. */
665 	if (1 == roffit_lines) {
666 		isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
667 		if (-1 == isz) {
668 			perror(NULL);
669 			exit((int)MANDOCLEVEL_SYSERR);
670 		}
671 		free(*bufp);
672 		*bufp = p;
673 		*szp = isz + 1;
674 		*offs = 0;
675 		free(roffit_macro);
676 		roffit_lines = 0;
677 		return(ROFF_REPARSE);
678 	} else if (1 < roffit_lines)
679 		--roffit_lines;
680 	return(ROFF_CONT);
681 }
682 
683 enum rofferr
684 roff_parseln(struct roff *r, int ln, char **bufp,
685 		size_t *szp, int pos, int *offs)
686 {
687 	enum rofft	 t;
688 	enum rofferr	 e;
689 	int		 ppos, ctl;
690 
691 	/*
692 	 * Run the reserved-word filter only if we have some reserved
693 	 * words to fill in.
694 	 */
695 
696 	e = roff_res(r, bufp, szp, ln, pos);
697 	if (ROFF_IGN == e)
698 		return(e);
699 	assert(ROFF_CONT == e);
700 
701 	ppos = pos;
702 	ctl = roff_getcontrol(r, *bufp, &pos);
703 
704 	/*
705 	 * First, if a scope is open and we're not a macro, pass the
706 	 * text through the macro's filter.  If a scope isn't open and
707 	 * we're not a macro, just let it through.
708 	 * Finally, if there's an equation scope open, divert it into it
709 	 * no matter our state.
710 	 */
711 
712 	if (r->last && ! ctl) {
713 		t = r->last->tok;
714 		assert(roffs[t].text);
715 		e = (*roffs[t].text)
716 			(r, t, bufp, szp, ln, pos, pos, offs);
717 		assert(ROFF_IGN == e || ROFF_CONT == e);
718 		if (ROFF_CONT != e)
719 			return(e);
720 	}
721 	if (r->eqn)
722 		return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
723 	if ( ! ctl) {
724 		if (r->tbl)
725 			return(tbl_read(r->tbl, ln, *bufp, pos));
726 		return(roff_parsetext(bufp, szp, pos, offs));
727 	}
728 
729 	/*
730 	 * If a scope is open, go to the child handler for that macro,
731 	 * as it may want to preprocess before doing anything with it.
732 	 * Don't do so if an equation is open.
733 	 */
734 
735 	if (r->last) {
736 		t = r->last->tok;
737 		assert(roffs[t].sub);
738 		return((*roffs[t].sub)
739 				(r, t, bufp, szp,
740 				 ln, ppos, pos, offs));
741 	}
742 
743 	/*
744 	 * Lastly, as we've no scope open, try to look up and execute
745 	 * the new macro.  If no macro is found, simply return and let
746 	 * the compilers handle it.
747 	 */
748 
749 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
750 		return(ROFF_CONT);
751 
752 	assert(roffs[t].proc);
753 	return((*roffs[t].proc)
754 			(r, t, bufp, szp,
755 			 ln, ppos, pos, offs));
756 }
757 
758 
759 void
760 roff_endparse(struct roff *r)
761 {
762 
763 	if (r->last)
764 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
765 				r->last->line, r->last->col, NULL);
766 
767 	if (r->eqn) {
768 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
769 				r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
770 		eqn_end(&r->eqn);
771 	}
772 
773 	if (r->tbl) {
774 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
775 				r->tbl->line, r->tbl->pos, NULL);
776 		tbl_end(&r->tbl);
777 	}
778 }
779 
780 /*
781  * Parse a roff node's type from the input buffer.  This must be in the
782  * form of ".foo xxx" in the usual way.
783  */
784 static enum rofft
785 roff_parse(struct roff *r, const char *buf, int *pos)
786 {
787 	const char	*mac;
788 	size_t		 maclen;
789 	enum rofft	 t;
790 
791 	if ('\0' == buf[*pos] || '"' == buf[*pos] ||
792 			'\t' == buf[*pos] || ' ' == buf[*pos])
793 		return(ROFF_MAX);
794 
795 	/*
796 	 * We stop the macro parse at an escape, tab, space, or nil.
797 	 * However, `\}' is also a valid macro, so make sure we don't
798 	 * clobber it by seeing the `\' as the end of token.
799 	 */
800 
801 	mac = buf + *pos;
802 	maclen = strcspn(mac + 1, " \\\t\0") + 1;
803 
804 	t = (r->current_string = roff_getstrn(r, mac, maclen))
805 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
806 
807 	*pos += (int)maclen;
808 
809 	while (buf[*pos] && ' ' == buf[*pos])
810 		(*pos)++;
811 
812 	return(t);
813 }
814 
815 /* ARGSUSED */
816 static enum rofferr
817 roff_cblock(ROFF_ARGS)
818 {
819 
820 	/*
821 	 * A block-close `..' should only be invoked as a child of an
822 	 * ignore macro, otherwise raise a warning and just ignore it.
823 	 */
824 
825 	if (NULL == r->last) {
826 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
827 		return(ROFF_IGN);
828 	}
829 
830 	switch (r->last->tok) {
831 	case (ROFF_am):
832 		/* FALLTHROUGH */
833 	case (ROFF_ami):
834 		/* FALLTHROUGH */
835 	case (ROFF_am1):
836 		/* FALLTHROUGH */
837 	case (ROFF_de):
838 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
839 		/* FALLTHROUGH */
840 	case (ROFF_dei):
841 		/* FALLTHROUGH */
842 	case (ROFF_ig):
843 		break;
844 	default:
845 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
846 		return(ROFF_IGN);
847 	}
848 
849 	if ((*bufp)[pos])
850 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
851 
852 	roffnode_pop(r);
853 	roffnode_cleanscope(r);
854 	return(ROFF_IGN);
855 
856 }
857 
858 
859 static void
860 roffnode_cleanscope(struct roff *r)
861 {
862 
863 	while (r->last) {
864 		if (--r->last->endspan != 0)
865 			break;
866 		roffnode_pop(r);
867 	}
868 }
869 
870 
871 /* ARGSUSED */
872 static enum rofferr
873 roff_ccond(ROFF_ARGS)
874 {
875 
876 	if (NULL == r->last) {
877 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
878 		return(ROFF_IGN);
879 	}
880 
881 	switch (r->last->tok) {
882 	case (ROFF_el):
883 		/* FALLTHROUGH */
884 	case (ROFF_ie):
885 		/* FALLTHROUGH */
886 	case (ROFF_if):
887 		break;
888 	default:
889 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
890 		return(ROFF_IGN);
891 	}
892 
893 	if (r->last->endspan > -1) {
894 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
895 		return(ROFF_IGN);
896 	}
897 
898 	if ((*bufp)[pos])
899 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
900 
901 	roffnode_pop(r);
902 	roffnode_cleanscope(r);
903 	return(ROFF_IGN);
904 }
905 
906 
907 /* ARGSUSED */
908 static enum rofferr
909 roff_block(ROFF_ARGS)
910 {
911 	int		sv;
912 	size_t		sz;
913 	char		*name;
914 
915 	name = NULL;
916 
917 	if (ROFF_ig != tok) {
918 		if ('\0' == (*bufp)[pos]) {
919 			mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
920 			return(ROFF_IGN);
921 		}
922 
923 		/*
924 		 * Re-write `de1', since we don't really care about
925 		 * groff's strange compatibility mode, into `de'.
926 		 */
927 
928 		if (ROFF_de1 == tok)
929 			tok = ROFF_de;
930 		if (ROFF_de == tok)
931 			name = *bufp + pos;
932 		else
933 			mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
934 			    roffs[tok].name);
935 
936 		while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
937 			pos++;
938 
939 		while (isspace((unsigned char)(*bufp)[pos]))
940 			(*bufp)[pos++] = '\0';
941 	}
942 
943 	roffnode_push(r, tok, name, ln, ppos);
944 
945 	/*
946 	 * At the beginning of a `de' macro, clear the existing string
947 	 * with the same name, if there is one.  New content will be
948 	 * added from roff_block_text() in multiline mode.
949 	 */
950 
951 	if (ROFF_de == tok)
952 		roff_setstr(r, name, "", 0);
953 
954 	if ('\0' == (*bufp)[pos])
955 		return(ROFF_IGN);
956 
957 	/* If present, process the custom end-of-line marker. */
958 
959 	sv = pos;
960 	while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
961 		pos++;
962 
963 	/*
964 	 * Note: groff does NOT like escape characters in the input.
965 	 * Instead of detecting this, we're just going to let it fly and
966 	 * to hell with it.
967 	 */
968 
969 	assert(pos > sv);
970 	sz = (size_t)(pos - sv);
971 
972 	if (1 == sz && '.' == (*bufp)[sv])
973 		return(ROFF_IGN);
974 
975 	r->last->end = mandoc_malloc(sz + 1);
976 
977 	memcpy(r->last->end, *bufp + sv, sz);
978 	r->last->end[(int)sz] = '\0';
979 
980 	if ((*bufp)[pos])
981 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
982 
983 	return(ROFF_IGN);
984 }
985 
986 
987 /* ARGSUSED */
988 static enum rofferr
989 roff_block_sub(ROFF_ARGS)
990 {
991 	enum rofft	t;
992 	int		i, j;
993 
994 	/*
995 	 * First check whether a custom macro exists at this level.  If
996 	 * it does, then check against it.  This is some of groff's
997 	 * stranger behaviours.  If we encountered a custom end-scope
998 	 * tag and that tag also happens to be a "real" macro, then we
999 	 * need to try interpreting it again as a real macro.  If it's
1000 	 * not, then return ignore.  Else continue.
1001 	 */
1002 
1003 	if (r->last->end) {
1004 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1005 			if ((*bufp)[i] != r->last->end[j])
1006 				break;
1007 
1008 		if ('\0' == r->last->end[j] &&
1009 				('\0' == (*bufp)[i] ||
1010 				 ' ' == (*bufp)[i] ||
1011 				 '\t' == (*bufp)[i])) {
1012 			roffnode_pop(r);
1013 			roffnode_cleanscope(r);
1014 
1015 			while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1016 				i++;
1017 
1018 			pos = i;
1019 			if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1020 				return(ROFF_RERUN);
1021 			return(ROFF_IGN);
1022 		}
1023 	}
1024 
1025 	/*
1026 	 * If we have no custom end-query or lookup failed, then try
1027 	 * pulling it out of the hashtable.
1028 	 */
1029 
1030 	t = roff_parse(r, *bufp, &pos);
1031 
1032 	/*
1033 	 * Macros other than block-end are only significant
1034 	 * in `de' blocks; elsewhere, simply throw them away.
1035 	 */
1036 	if (ROFF_cblock != t) {
1037 		if (ROFF_de == tok)
1038 			roff_setstr(r, r->last->name, *bufp + ppos, 1);
1039 		return(ROFF_IGN);
1040 	}
1041 
1042 	assert(roffs[t].proc);
1043 	return((*roffs[t].proc)(r, t, bufp, szp,
1044 				ln, ppos, pos, offs));
1045 }
1046 
1047 
1048 /* ARGSUSED */
1049 static enum rofferr
1050 roff_block_text(ROFF_ARGS)
1051 {
1052 
1053 	if (ROFF_de == tok)
1054 		roff_setstr(r, r->last->name, *bufp + pos, 1);
1055 
1056 	return(ROFF_IGN);
1057 }
1058 
1059 
1060 /* ARGSUSED */
1061 static enum rofferr
1062 roff_cond_sub(ROFF_ARGS)
1063 {
1064 	enum rofft	 t;
1065 	enum roffrule	 rr;
1066 	char		*ep;
1067 
1068 	rr = r->last->rule;
1069 	roffnode_cleanscope(r);
1070 	t = roff_parse(r, *bufp, &pos);
1071 
1072 	/*
1073 	 * Fully handle known macros when they are structurally
1074 	 * required or when the conditional evaluated to true.
1075 	 */
1076 
1077 	if ((ROFF_MAX != t) &&
1078 	    (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1079 	     ROFFMAC_STRUCT & roffs[t].flags)) {
1080 		assert(roffs[t].proc);
1081 		return((*roffs[t].proc)(r, t, bufp, szp,
1082 					ln, ppos, pos, offs));
1083 	}
1084 
1085 	/* Always check for the closing delimiter `\}'. */
1086 
1087 	ep = &(*bufp)[pos];
1088 	while (NULL != (ep = strchr(ep, '\\'))) {
1089 		if ('}' != *(++ep))
1090 			continue;
1091 
1092 		/*
1093 		 * If we're at the end of line, then just chop
1094 		 * off the \} and resize the buffer.
1095 		 * If we aren't, then convert it to spaces.
1096 		 */
1097 
1098 		if ('\0' == *(ep + 1)) {
1099 			*--ep = '\0';
1100 			*szp -= 2;
1101 		} else
1102 			*(ep - 1) = *ep = ' ';
1103 
1104 		roff_ccond(r, ROFF_ccond, bufp, szp,
1105 				ln, pos, pos + 2, offs);
1106 		break;
1107 	}
1108 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1109 }
1110 
1111 /* ARGSUSED */
1112 static enum rofferr
1113 roff_cond_text(ROFF_ARGS)
1114 {
1115 	char		*ep;
1116 	enum roffrule	 rr;
1117 
1118 	rr = r->last->rule;
1119 	roffnode_cleanscope(r);
1120 
1121 	ep = &(*bufp)[pos];
1122 	for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1123 		ep++;
1124 		if ('}' != *ep)
1125 			continue;
1126 		*ep = '&';
1127 		roff_ccond(r, ROFF_ccond, bufp, szp,
1128 				ln, pos, pos + 2, offs);
1129 	}
1130 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1131 }
1132 
1133 static int
1134 roff_getnum(const char *v, int *pos, int *res)
1135 {
1136 	int p, n;
1137 
1138 	p = *pos;
1139 	n = v[p] == '-';
1140 	if (n)
1141 		p++;
1142 
1143 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
1144 		*res += 10 * *res + v[p] - '0';
1145 	if (p == *pos + n)
1146 		return 0;
1147 
1148 	if (n)
1149 		*res = -*res;
1150 
1151 	*pos = p;
1152 	return 1;
1153 }
1154 
1155 static int
1156 roff_getop(const char *v, int *pos, char *res)
1157 {
1158 	int e;
1159 
1160 	*res = v[*pos];
1161 	e = v[*pos + 1] == '=';
1162 
1163 	switch (*res) {
1164 	case '=':
1165 		break;
1166 	case '>':
1167 		if (e)
1168 			*res = 'g';
1169 		break;
1170 	case '<':
1171 		if (e)
1172 			*res = 'l';
1173 		break;
1174 	default:
1175 		return(0);
1176 	}
1177 
1178 	*pos += 1 + e;
1179 
1180 	return(*res);
1181 }
1182 
1183 static enum roffrule
1184 roff_evalcond(const char *v, int *pos)
1185 {
1186 	int	 not, lh, rh;
1187 	char	 op;
1188 
1189 	switch (v[*pos]) {
1190 	case ('n'):
1191 		(*pos)++;
1192 		return(ROFFRULE_ALLOW);
1193 	case ('e'):
1194 		/* FALLTHROUGH */
1195 	case ('o'):
1196 		/* FALLTHROUGH */
1197 	case ('t'):
1198 		(*pos)++;
1199 		return(ROFFRULE_DENY);
1200 	case ('!'):
1201 		(*pos)++;
1202 		not = 1;
1203 		break;
1204 	default:
1205 		not = 0;
1206 		break;
1207 	}
1208 
1209 	if (!roff_getnum(v, pos, &lh))
1210 		return ROFFRULE_DENY;
1211 	if (!roff_getop(v, pos, &op)) {
1212 		if (lh < 0)
1213 			lh = 0;
1214 		goto out;
1215 	}
1216 	if (!roff_getnum(v, pos, &rh))
1217 		return ROFFRULE_DENY;
1218 	switch (op) {
1219 	case 'g':
1220 		lh = lh >= rh;
1221 		break;
1222 	case 'l':
1223 		lh = lh <= rh;
1224 		break;
1225 	case '=':
1226 		lh = lh == rh;
1227 		break;
1228 	case '>':
1229 		lh = lh > rh;
1230 		break;
1231 	case '<':
1232 		lh = lh < rh;
1233 		break;
1234 	default:
1235 		return ROFFRULE_DENY;
1236 	}
1237 out:
1238 	if (not)
1239 		lh = !lh;
1240 	return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1241 }
1242 
1243 /* ARGSUSED */
1244 static enum rofferr
1245 roff_line_ignore(ROFF_ARGS)
1246 {
1247 
1248 	return(ROFF_IGN);
1249 }
1250 
1251 /* ARGSUSED */
1252 static enum rofferr
1253 roff_cond(ROFF_ARGS)
1254 {
1255 
1256 	roffnode_push(r, tok, NULL, ln, ppos);
1257 
1258 	/*
1259 	 * An `.el' has no conditional body: it will consume the value
1260 	 * of the current rstack entry set in prior `ie' calls or
1261 	 * defaults to DENY.
1262 	 *
1263 	 * If we're not an `el', however, then evaluate the conditional.
1264 	 */
1265 
1266 	r->last->rule = ROFF_el == tok ?
1267 		(r->rstackpos < 0 ?
1268 		 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1269 		roff_evalcond(*bufp, &pos);
1270 
1271 	/*
1272 	 * An if-else will put the NEGATION of the current evaluated
1273 	 * conditional into the stack of rules.
1274 	 */
1275 
1276 	if (ROFF_ie == tok) {
1277 		if (r->rstackpos == RSTACK_MAX - 1) {
1278 			mandoc_msg(MANDOCERR_MEM,
1279 				r->parse, ln, ppos, NULL);
1280 			return(ROFF_ERR);
1281 		}
1282 		r->rstack[++r->rstackpos] =
1283 			ROFFRULE_DENY == r->last->rule ?
1284 			ROFFRULE_ALLOW : ROFFRULE_DENY;
1285 	}
1286 
1287 	/* If the parent has false as its rule, then so do we. */
1288 
1289 	if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1290 		r->last->rule = ROFFRULE_DENY;
1291 
1292 	/*
1293 	 * Determine scope.
1294 	 * If there is nothing on the line after the conditional,
1295 	 * not even whitespace, use next-line scope.
1296 	 */
1297 
1298 	if ('\0' == (*bufp)[pos]) {
1299 		r->last->endspan = 2;
1300 		goto out;
1301 	}
1302 
1303 	while (' ' == (*bufp)[pos])
1304 		pos++;
1305 
1306 	/* An opening brace requests multiline scope. */
1307 
1308 	if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1309 		r->last->endspan = -1;
1310 		pos += 2;
1311 		goto out;
1312 	}
1313 
1314 	/*
1315 	 * Anything else following the conditional causes
1316 	 * single-line scope.  Warn if the scope contains
1317 	 * nothing but trailing whitespace.
1318 	 */
1319 
1320 	if ('\0' == (*bufp)[pos])
1321 		mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1322 
1323 	r->last->endspan = 1;
1324 
1325 out:
1326 	*offs = pos;
1327 	return(ROFF_RERUN);
1328 }
1329 
1330 
1331 /* ARGSUSED */
1332 static enum rofferr
1333 roff_ds(ROFF_ARGS)
1334 {
1335 	char		*name, *string;
1336 
1337 	/*
1338 	 * A symbol is named by the first word following the macro
1339 	 * invocation up to a space.  Its value is anything after the
1340 	 * name's trailing whitespace and optional double-quote.  Thus,
1341 	 *
1342 	 *  [.ds foo "bar  "     ]
1343 	 *
1344 	 * will have `bar  "     ' as its value.
1345 	 */
1346 
1347 	string = *bufp + pos;
1348 	name = roff_getname(r, &string, ln, pos);
1349 	if ('\0' == *name)
1350 		return(ROFF_IGN);
1351 
1352 	/* Read past initial double-quote. */
1353 	if ('"' == *string)
1354 		string++;
1355 
1356 	/* The rest is the value. */
1357 	roff_setstr(r, name, string, 0);
1358 	return(ROFF_IGN);
1359 }
1360 
1361 void
1362 roff_setreg(struct roff *r, const char *name, int val, char sign)
1363 {
1364 	struct roffreg	*reg;
1365 
1366 	/* Search for an existing register with the same name. */
1367 	reg = r->regtab;
1368 
1369 	while (reg && strcmp(name, reg->key.p))
1370 		reg = reg->next;
1371 
1372 	if (NULL == reg) {
1373 		/* Create a new register. */
1374 		reg = mandoc_malloc(sizeof(struct roffreg));
1375 		reg->key.p = mandoc_strdup(name);
1376 		reg->key.sz = strlen(name);
1377 		reg->val = 0;
1378 		reg->next = r->regtab;
1379 		r->regtab = reg;
1380 	}
1381 
1382 	if ('+' == sign)
1383 		reg->val += val;
1384 	else if ('-' == sign)
1385 		reg->val -= val;
1386 	else
1387 		reg->val = val;
1388 }
1389 
1390 int
1391 roff_getreg(const struct roff *r, const char *name)
1392 {
1393 	struct roffreg	*reg;
1394 
1395 	for (reg = r->regtab; reg; reg = reg->next)
1396 		if (0 == strcmp(name, reg->key.p))
1397 			return(reg->val);
1398 
1399 	return(0);
1400 }
1401 
1402 static int
1403 roff_getregn(const struct roff *r, const char *name, size_t len)
1404 {
1405 	struct roffreg	*reg;
1406 
1407 	for (reg = r->regtab; reg; reg = reg->next)
1408 		if (len == reg->key.sz &&
1409 		    0 == strncmp(name, reg->key.p, len))
1410 			return(reg->val);
1411 
1412 	return(0);
1413 }
1414 
1415 static void
1416 roff_freereg(struct roffreg *reg)
1417 {
1418 	struct roffreg	*old_reg;
1419 
1420 	while (NULL != reg) {
1421 		free(reg->key.p);
1422 		old_reg = reg;
1423 		reg = reg->next;
1424 		free(old_reg);
1425 	}
1426 }
1427 
1428 /* ARGSUSED */
1429 static enum rofferr
1430 roff_nr(ROFF_ARGS)
1431 {
1432 	const char	*key;
1433 	char		*val;
1434 	size_t		 sz;
1435 	int		 iv;
1436 	char		 sign;
1437 
1438 	val = *bufp + pos;
1439 	key = roff_getname(r, &val, ln, pos);
1440 
1441 	sign = *val;
1442 	if ('+' == sign || '-' == sign)
1443 		val++;
1444 
1445 	sz = strspn(val, "0123456789");
1446 	iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1447 
1448 	roff_setreg(r, key, iv, sign);
1449 
1450 	return(ROFF_IGN);
1451 }
1452 
1453 /* ARGSUSED */
1454 static enum rofferr
1455 roff_rm(ROFF_ARGS)
1456 {
1457 	const char	 *name;
1458 	char		 *cp;
1459 
1460 	cp = *bufp + pos;
1461 	while ('\0' != *cp) {
1462 		name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1463 		if ('\0' != *name)
1464 			roff_setstr(r, name, NULL, 0);
1465 	}
1466 	return(ROFF_IGN);
1467 }
1468 
1469 /* ARGSUSED */
1470 static enum rofferr
1471 roff_it(ROFF_ARGS)
1472 {
1473 	char		*cp;
1474 	size_t		 len;
1475 	int		 iv;
1476 
1477 	/* Parse the number of lines. */
1478 	cp = *bufp + pos;
1479 	len = strcspn(cp, " \t");
1480 	cp[len] = '\0';
1481 	if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1482 		mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1483 				ln, ppos, *bufp + 1);
1484 		return(ROFF_IGN);
1485 	}
1486 	cp += len + 1;
1487 
1488 	/* Arm the input line trap. */
1489 	roffit_lines = iv;
1490 	roffit_macro = mandoc_strdup(cp);
1491 	return(ROFF_IGN);
1492 }
1493 
1494 /* ARGSUSED */
1495 static enum rofferr
1496 roff_Dd(ROFF_ARGS)
1497 {
1498 	const char *const	*cp;
1499 
1500 	if (MPARSE_MDOC != r->parsetype)
1501 		for (cp = __mdoc_reserved; *cp; cp++)
1502 			roff_setstr(r, *cp, NULL, 0);
1503 
1504 	return(ROFF_CONT);
1505 }
1506 
1507 /* ARGSUSED */
1508 static enum rofferr
1509 roff_TH(ROFF_ARGS)
1510 {
1511 	const char *const	*cp;
1512 
1513 	if (MPARSE_MDOC != r->parsetype)
1514 		for (cp = __man_reserved; *cp; cp++)
1515 			roff_setstr(r, *cp, NULL, 0);
1516 
1517 	return(ROFF_CONT);
1518 }
1519 
1520 /* ARGSUSED */
1521 static enum rofferr
1522 roff_TE(ROFF_ARGS)
1523 {
1524 
1525 	if (NULL == r->tbl)
1526 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1527 	else
1528 		tbl_end(&r->tbl);
1529 
1530 	return(ROFF_IGN);
1531 }
1532 
1533 /* ARGSUSED */
1534 static enum rofferr
1535 roff_T_(ROFF_ARGS)
1536 {
1537 
1538 	if (NULL == r->tbl)
1539 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1540 	else
1541 		tbl_restart(ppos, ln, r->tbl);
1542 
1543 	return(ROFF_IGN);
1544 }
1545 
1546 #if 0
1547 static int
1548 roff_closeeqn(struct roff *r)
1549 {
1550 
1551 	return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1552 }
1553 #endif
1554 
1555 static void
1556 roff_openeqn(struct roff *r, const char *name, int line,
1557 		int offs, const char *buf)
1558 {
1559 	struct eqn_node *e;
1560 	int		 poff;
1561 
1562 	assert(NULL == r->eqn);
1563 	e = eqn_alloc(name, offs, line, r->parse);
1564 
1565 	if (r->last_eqn)
1566 		r->last_eqn->next = e;
1567 	else
1568 		r->first_eqn = r->last_eqn = e;
1569 
1570 	r->eqn = r->last_eqn = e;
1571 
1572 	if (buf) {
1573 		poff = 0;
1574 		eqn_read(&r->eqn, line, buf, offs, &poff);
1575 	}
1576 }
1577 
1578 /* ARGSUSED */
1579 static enum rofferr
1580 roff_EQ(ROFF_ARGS)
1581 {
1582 
1583 	roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1584 	return(ROFF_IGN);
1585 }
1586 
1587 /* ARGSUSED */
1588 static enum rofferr
1589 roff_EN(ROFF_ARGS)
1590 {
1591 
1592 	mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1593 	return(ROFF_IGN);
1594 }
1595 
1596 /* ARGSUSED */
1597 static enum rofferr
1598 roff_TS(ROFF_ARGS)
1599 {
1600 	struct tbl_node	*tbl;
1601 
1602 	if (r->tbl) {
1603 		mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1604 		tbl_end(&r->tbl);
1605 	}
1606 
1607 	tbl = tbl_alloc(ppos, ln, r->parse);
1608 
1609 	if (r->last_tbl)
1610 		r->last_tbl->next = tbl;
1611 	else
1612 		r->first_tbl = r->last_tbl = tbl;
1613 
1614 	r->tbl = r->last_tbl = tbl;
1615 	return(ROFF_IGN);
1616 }
1617 
1618 /* ARGSUSED */
1619 static enum rofferr
1620 roff_cc(ROFF_ARGS)
1621 {
1622 	const char	*p;
1623 
1624 	p = *bufp + pos;
1625 
1626 	if ('\0' == *p || '.' == (r->control = *p++))
1627 		r->control = 0;
1628 
1629 	if ('\0' != *p)
1630 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1631 
1632 	return(ROFF_IGN);
1633 }
1634 
1635 /* ARGSUSED */
1636 static enum rofferr
1637 roff_tr(ROFF_ARGS)
1638 {
1639 	const char	*p, *first, *second;
1640 	size_t		 fsz, ssz;
1641 	enum mandoc_esc	 esc;
1642 
1643 	p = *bufp + pos;
1644 
1645 	if ('\0' == *p) {
1646 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1647 		return(ROFF_IGN);
1648 	}
1649 
1650 	while ('\0' != *p) {
1651 		fsz = ssz = 1;
1652 
1653 		first = p++;
1654 		if ('\\' == *first) {
1655 			esc = mandoc_escape(&p, NULL, NULL);
1656 			if (ESCAPE_ERROR == esc) {
1657 				mandoc_msg
1658 					(MANDOCERR_BADESCAPE, r->parse,
1659 					 ln, (int)(p - *bufp), NULL);
1660 				return(ROFF_IGN);
1661 			}
1662 			fsz = (size_t)(p - first);
1663 		}
1664 
1665 		second = p++;
1666 		if ('\\' == *second) {
1667 			esc = mandoc_escape(&p, NULL, NULL);
1668 			if (ESCAPE_ERROR == esc) {
1669 				mandoc_msg
1670 					(MANDOCERR_BADESCAPE, r->parse,
1671 					 ln, (int)(p - *bufp), NULL);
1672 				return(ROFF_IGN);
1673 			}
1674 			ssz = (size_t)(p - second);
1675 		} else if ('\0' == *second) {
1676 			mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1677 					ln, (int)(p - *bufp), NULL);
1678 			second = " ";
1679 			p--;
1680 		}
1681 
1682 		if (fsz > 1) {
1683 			roff_setstrn(&r->xmbtab, first,
1684 					fsz, second, ssz, 0);
1685 			continue;
1686 		}
1687 
1688 		if (NULL == r->xtab)
1689 			r->xtab = mandoc_calloc
1690 				(128, sizeof(struct roffstr));
1691 
1692 		free(r->xtab[(int)*first].p);
1693 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1694 		r->xtab[(int)*first].sz = ssz;
1695 	}
1696 
1697 	return(ROFF_IGN);
1698 }
1699 
1700 /* ARGSUSED */
1701 static enum rofferr
1702 roff_so(ROFF_ARGS)
1703 {
1704 	char *name;
1705 
1706 	mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1707 
1708 	/*
1709 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1710 	 * opening anything that's not in our cwd or anything beneath
1711 	 * it.  Thus, explicitly disallow traversing up the file-system
1712 	 * or using absolute paths.
1713 	 */
1714 
1715 	name = *bufp + pos;
1716 	if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1717 		mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1718 		return(ROFF_ERR);
1719 	}
1720 
1721 	*offs = pos;
1722 	return(ROFF_SO);
1723 }
1724 
1725 /* ARGSUSED */
1726 static enum rofferr
1727 roff_userdef(ROFF_ARGS)
1728 {
1729 	const char	 *arg[9];
1730 	char		 *cp, *n1, *n2;
1731 	int		  i;
1732 
1733 	/*
1734 	 * Collect pointers to macro argument strings
1735 	 * and NUL-terminate them.
1736 	 */
1737 	cp = *bufp + pos;
1738 	for (i = 0; i < 9; i++)
1739 		arg[i] = '\0' == *cp ? "" :
1740 		    mandoc_getarg(r->parse, &cp, ln, &pos);
1741 
1742 	/*
1743 	 * Expand macro arguments.
1744 	 */
1745 	*szp = 0;
1746 	n1 = cp = mandoc_strdup(r->current_string);
1747 	while (NULL != (cp = strstr(cp, "\\$"))) {
1748 		i = cp[2] - '1';
1749 		if (0 > i || 8 < i) {
1750 			/* Not an argument invocation. */
1751 			cp += 2;
1752 			continue;
1753 		}
1754 
1755 		*szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1756 		n2 = mandoc_malloc(*szp);
1757 
1758 		strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1759 		strlcat(n2, arg[i], *szp);
1760 		strlcat(n2, cp + 3, *szp);
1761 
1762 		cp = n2 + (cp - n1);
1763 		free(n1);
1764 		n1 = n2;
1765 	}
1766 
1767 	/*
1768 	 * Replace the macro invocation
1769 	 * by the expanded macro.
1770 	 */
1771 	free(*bufp);
1772 	*bufp = n1;
1773 	if (0 == *szp)
1774 		*szp = strlen(*bufp) + 1;
1775 
1776 	return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1777 	   ROFF_REPARSE : ROFF_APPEND);
1778 }
1779 
1780 static char *
1781 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1782 {
1783 	char	 *name, *cp;
1784 
1785 	name = *cpp;
1786 	if ('\0' == *name)
1787 		return(name);
1788 
1789 	/* Read until end of name. */
1790 	for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1791 		if ('\\' != *cp)
1792 			continue;
1793 		cp++;
1794 		if ('\\' == *cp)
1795 			continue;
1796 		mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1797 		*cp = '\0';
1798 		name = cp;
1799 	}
1800 
1801 	/* Nil-terminate name. */
1802 	if ('\0' != *cp)
1803 		*(cp++) = '\0';
1804 
1805 	/* Read past spaces. */
1806 	while (' ' == *cp)
1807 		cp++;
1808 
1809 	*cpp = cp;
1810 	return(name);
1811 }
1812 
1813 /*
1814  * Store *string into the user-defined string called *name.
1815  * In multiline mode, append to an existing entry and append '\n';
1816  * else replace the existing entry, if there is one.
1817  * To clear an existing entry, call with (*r, *name, NULL, 0).
1818  */
1819 static void
1820 roff_setstr(struct roff *r, const char *name, const char *string,
1821 	int multiline)
1822 {
1823 
1824 	roff_setstrn(&r->strtab, name, strlen(name), string,
1825 			string ? strlen(string) : 0, multiline);
1826 }
1827 
1828 static void
1829 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1830 		const char *string, size_t stringsz, int multiline)
1831 {
1832 	struct roffkv	*n;
1833 	char		*c;
1834 	int		 i;
1835 	size_t		 oldch, newch;
1836 
1837 	/* Search for an existing string with the same name. */
1838 	n = *r;
1839 
1840 	while (n && strcmp(name, n->key.p))
1841 		n = n->next;
1842 
1843 	if (NULL == n) {
1844 		/* Create a new string table entry. */
1845 		n = mandoc_malloc(sizeof(struct roffkv));
1846 		n->key.p = mandoc_strndup(name, namesz);
1847 		n->key.sz = namesz;
1848 		n->val.p = NULL;
1849 		n->val.sz = 0;
1850 		n->next = *r;
1851 		*r = n;
1852 	} else if (0 == multiline) {
1853 		/* In multiline mode, append; else replace. */
1854 		free(n->val.p);
1855 		n->val.p = NULL;
1856 		n->val.sz = 0;
1857 	}
1858 
1859 	if (NULL == string)
1860 		return;
1861 
1862 	/*
1863 	 * One additional byte for the '\n' in multiline mode,
1864 	 * and one for the terminating '\0'.
1865 	 */
1866 	newch = stringsz + (multiline ? 2u : 1u);
1867 
1868 	if (NULL == n->val.p) {
1869 		n->val.p = mandoc_malloc(newch);
1870 		*n->val.p = '\0';
1871 		oldch = 0;
1872 	} else {
1873 		oldch = n->val.sz;
1874 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1875 	}
1876 
1877 	/* Skip existing content in the destination buffer. */
1878 	c = n->val.p + (int)oldch;
1879 
1880 	/* Append new content to the destination buffer. */
1881 	i = 0;
1882 	while (i < (int)stringsz) {
1883 		/*
1884 		 * Rudimentary roff copy mode:
1885 		 * Handle escaped backslashes.
1886 		 */
1887 		if ('\\' == string[i] && '\\' == string[i + 1])
1888 			i++;
1889 		*c++ = string[i++];
1890 	}
1891 
1892 	/* Append terminating bytes. */
1893 	if (multiline)
1894 		*c++ = '\n';
1895 
1896 	*c = '\0';
1897 	n->val.sz = (int)(c - n->val.p);
1898 }
1899 
1900 static const char *
1901 roff_getstrn(const struct roff *r, const char *name, size_t len)
1902 {
1903 	const struct roffkv *n;
1904 
1905 	for (n = r->strtab; n; n = n->next)
1906 		if (0 == strncmp(name, n->key.p, len) &&
1907 				'\0' == n->key.p[(int)len])
1908 			return(n->val.p);
1909 
1910 	return(NULL);
1911 }
1912 
1913 static void
1914 roff_freestr(struct roffkv *r)
1915 {
1916 	struct roffkv	 *n, *nn;
1917 
1918 	for (n = r; n; n = nn) {
1919 		free(n->key.p);
1920 		free(n->val.p);
1921 		nn = n->next;
1922 		free(n);
1923 	}
1924 }
1925 
1926 const struct tbl_span *
1927 roff_span(const struct roff *r)
1928 {
1929 
1930 	return(r->tbl ? tbl_span(r->tbl) : NULL);
1931 }
1932 
1933 const struct eqn *
1934 roff_eqn(const struct roff *r)
1935 {
1936 
1937 	return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1938 }
1939 
1940 /*
1941  * Duplicate an input string, making the appropriate character
1942  * conversations (as stipulated by `tr') along the way.
1943  * Returns a heap-allocated string with all the replacements made.
1944  */
1945 char *
1946 roff_strdup(const struct roff *r, const char *p)
1947 {
1948 	const struct roffkv *cp;
1949 	char		*res;
1950 	const char	*pp;
1951 	size_t		 ssz, sz;
1952 	enum mandoc_esc	 esc;
1953 
1954 	if (NULL == r->xmbtab && NULL == r->xtab)
1955 		return(mandoc_strdup(p));
1956 	else if ('\0' == *p)
1957 		return(mandoc_strdup(""));
1958 
1959 	/*
1960 	 * Step through each character looking for term matches
1961 	 * (remember that a `tr' can be invoked with an escape, which is
1962 	 * a glyph but the escape is multi-character).
1963 	 * We only do this if the character hash has been initialised
1964 	 * and the string is >0 length.
1965 	 */
1966 
1967 	res = NULL;
1968 	ssz = 0;
1969 
1970 	while ('\0' != *p) {
1971 		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1972 			sz = r->xtab[(int)*p].sz;
1973 			res = mandoc_realloc(res, ssz + sz + 1);
1974 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1975 			ssz += sz;
1976 			p++;
1977 			continue;
1978 		} else if ('\\' != *p) {
1979 			res = mandoc_realloc(res, ssz + 2);
1980 			res[ssz++] = *p++;
1981 			continue;
1982 		}
1983 
1984 		/* Search for term matches. */
1985 		for (cp = r->xmbtab; cp; cp = cp->next)
1986 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
1987 				break;
1988 
1989 		if (NULL != cp) {
1990 			/*
1991 			 * A match has been found.
1992 			 * Append the match to the array and move
1993 			 * forward by its keysize.
1994 			 */
1995 			res = mandoc_realloc
1996 				(res, ssz + cp->val.sz + 1);
1997 			memcpy(res + ssz, cp->val.p, cp->val.sz);
1998 			ssz += cp->val.sz;
1999 			p += (int)cp->key.sz;
2000 			continue;
2001 		}
2002 
2003 		/*
2004 		 * Handle escapes carefully: we need to copy
2005 		 * over just the escape itself, or else we might
2006 		 * do replacements within the escape itself.
2007 		 * Make sure to pass along the bogus string.
2008 		 */
2009 		pp = p++;
2010 		esc = mandoc_escape(&p, NULL, NULL);
2011 		if (ESCAPE_ERROR == esc) {
2012 			sz = strlen(pp);
2013 			res = mandoc_realloc(res, ssz + sz + 1);
2014 			memcpy(res + ssz, pp, sz);
2015 			break;
2016 		}
2017 		/*
2018 		 * We bail out on bad escapes.
2019 		 * No need to warn: we already did so when
2020 		 * roff_res() was called.
2021 		 */
2022 		sz = (int)(p - pp);
2023 		res = mandoc_realloc(res, ssz + sz + 1);
2024 		memcpy(res + ssz, pp, sz);
2025 		ssz += sz;
2026 	}
2027 
2028 	res[(int)ssz] = '\0';
2029 	return(res);
2030 }
2031 
2032 /*
2033  * Find out whether a line is a macro line or not.
2034  * If it is, adjust the current position and return one; if it isn't,
2035  * return zero and don't change the current position.
2036  * If the control character has been set with `.cc', then let that grain
2037  * precedence.
2038  * This is slighly contrary to groff, where using the non-breaking
2039  * control character when `cc' has been invoked will cause the
2040  * non-breaking macro contents to be printed verbatim.
2041  */
2042 int
2043 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2044 {
2045 	int		pos;
2046 
2047 	pos = *ppos;
2048 
2049 	if (0 != r->control && cp[pos] == r->control)
2050 		pos++;
2051 	else if (0 != r->control)
2052 		return(0);
2053 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2054 		pos += 2;
2055 	else if ('.' == cp[pos] || '\'' == cp[pos])
2056 		pos++;
2057 	else
2058 		return(0);
2059 
2060 	while (' ' == cp[pos] || '\t' == cp[pos])
2061 		pos++;
2062 
2063 	*ppos = pos;
2064 	return(1);
2065 }
2066