xref: /freebsd/contrib/mandoc/roff.c (revision 80c12959679ab203459dc20eb9ece3a7328b7de5)
1 /* $Id: roff.c,v 1.405 2025/04/08 14:05:09 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2025 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42 
43 /* Maximum number of string expansions per line, to break infinite loops. */
44 #define	EXPAND_LIMIT	1000
45 
46 /* Types of definitions of macros and strings. */
47 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
48 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
49 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
50 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
51 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
52 			 ROFFDEF_REN | ROFFDEF_STD)
53 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
54 
55 /* --- data types --------------------------------------------------------- */
56 
57 /*
58  * An incredibly-simple string buffer.
59  */
60 struct	roffstr {
61 	char		*p; /* nil-terminated buffer */
62 	size_t		 sz; /* saved strlen(p) */
63 };
64 
65 /*
66  * A key-value roffstr pair as part of a singly-linked list.
67  */
68 struct	roffkv {
69 	struct roffstr	 key;
70 	struct roffstr	 val;
71 	struct roffkv	*next; /* next in list */
72 };
73 
74 /*
75  * A single number register as part of a singly-linked list.
76  */
77 struct	roffreg {
78 	struct roffstr	 key;
79 	int		 val;
80 	int		 step;
81 	struct roffreg	*next;
82 };
83 
84 /*
85  * Association of request and macro names with token IDs.
86  */
87 struct	roffreq {
88 	enum roff_tok	 tok;
89 	char		 name[];
90 };
91 
92 /*
93  * A macro processing context.
94  * More than one is needed when macro calls are nested.
95  */
96 struct	mctx {
97 	char		**argv;
98 	int		 argc;
99 	int		 argsz;
100 };
101 
102 struct	roff {
103 	struct roff_man	*man; /* mdoc or man parser */
104 	struct roffnode	*last; /* leaf of stack */
105 	struct mctx	*mstack; /* stack of macro contexts */
106 	int		*rstack; /* stack of inverted `ie' values */
107 	struct ohash	*reqtab; /* request lookup table */
108 	struct roffreg	*regtab; /* number registers */
109 	struct roffkv	*strtab; /* user-defined strings & macros */
110 	struct roffkv	*rentab; /* renamed strings & macros */
111 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
112 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
113 	const char	*current_string; /* value of last called user macro */
114 	struct tbl_node	*first_tbl; /* first table parsed */
115 	struct tbl_node	*last_tbl; /* last table parsed */
116 	struct tbl_node	*tbl; /* current table being parsed */
117 	struct eqn_node	*last_eqn; /* equation parser */
118 	struct eqn_node	*eqn; /* active equation parser */
119 	int		 eqn_inline; /* current equation is inline */
120 	int		 options; /* parse options */
121 	int		 mstacksz; /* current size of mstack */
122 	int		 mstackpos; /* position in mstack */
123 	int		 rstacksz; /* current size limit of rstack */
124 	int		 rstackpos; /* position in rstack */
125 	int		 format; /* current file in mdoc or man format */
126 	char		 control; /* control character */
127 	char		 escape; /* escape character */
128 };
129 
130 /*
131  * A macro definition, condition, or ignored block.
132  */
133 struct	roffnode {
134 	enum roff_tok	 tok; /* type of node */
135 	struct roffnode	*parent; /* up one in stack */
136 	int		 line; /* parse line */
137 	int		 col; /* parse col */
138 	char		*name; /* node name, e.g. macro name */
139 	char		*end; /* custom end macro of the block */
140 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
141 	int		 rule; /* content is: 1=evaluated 0=skipped */
142 };
143 
144 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
145 			 enum roff_tok tok, /* tok of macro */ \
146 			 struct buf *buf, /* input buffer */ \
147 			 int ln, /* parse line */ \
148 			 int ppos, /* original pos in buffer */ \
149 			 int pos, /* current pos in buffer */ \
150 			 int *offs /* reset offset of buffer data */
151 
152 typedef	int (*roffproc)(ROFF_ARGS);
153 
154 struct	roffmac {
155 	roffproc	 proc; /* process new macro */
156 	roffproc	 text; /* process as child text of macro */
157 	roffproc	 sub; /* process as child of macro */
158 	int		 flags;
159 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
160 };
161 
162 struct	predef {
163 	const char	*name; /* predefined input name */
164 	const char	*str; /* replacement symbol */
165 };
166 
167 #define	PREDEF(__name, __str) \
168 	{ (__name), (__str) },
169 
170 /* --- function prototypes ------------------------------------------------ */
171 
172 static	int		 roffnode_cleanscope(struct roff *);
173 static	int		 roffnode_pop(struct roff *);
174 static	void		 roffnode_push(struct roff *, enum roff_tok,
175 				const char *, int, int);
176 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
177 static	int		 roff_als(ROFF_ARGS);
178 static	int		 roff_block(ROFF_ARGS);
179 static	int		 roff_block_text(ROFF_ARGS);
180 static	int		 roff_block_sub(ROFF_ARGS);
181 static	int		 roff_break(ROFF_ARGS);
182 static	int		 roff_cblock(ROFF_ARGS);
183 static	int		 roff_cc(ROFF_ARGS);
184 static	int		 roff_ccond(struct roff *, int, int);
185 static	int		 roff_char(ROFF_ARGS);
186 static	int		 roff_cond(ROFF_ARGS);
187 static	int		 roff_cond_checkend(ROFF_ARGS);
188 static	int		 roff_cond_text(ROFF_ARGS);
189 static	int		 roff_cond_sub(ROFF_ARGS);
190 static	int		 roff_ds(ROFF_ARGS);
191 static	int		 roff_ec(ROFF_ARGS);
192 static	int		 roff_eo(ROFF_ARGS);
193 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
194 static	int		 roff_evalcond(struct roff *, int, char *, int *);
195 static	int		 roff_evalpar(int, const char *, int *, int *,
196 				char, int);
197 static	int		 roff_evalstrcond(const char *, int *);
198 static	int		 roff_expand(struct roff *, struct buf *,
199 				int, int, char);
200 static	void		 roff_expand_patch(struct buf *, int,
201 				const char *, int);
202 static	void		 roff_free1(struct roff *);
203 static	void		 roff_freereg(struct roffreg *);
204 static	void		 roff_freestr(struct roffkv *);
205 static	size_t		 roff_getname(char **, int, int);
206 static	int		 roff_getnum(const char *, int *, int *, char, int);
207 static	int		 roff_getop(const char *, int *, char *);
208 static	int		 roff_getregn(struct roff *,
209 				const char *, size_t, char);
210 static	int		 roff_getregro(const struct roff *,
211 				const char *name);
212 static	const char	*roff_getstrn(struct roff *,
213 				const char *, size_t, int *);
214 static	int		 roff_hasregn(const struct roff *,
215 				const char *, size_t);
216 static	int		 roff_insec(ROFF_ARGS);
217 static	int		 roff_it(ROFF_ARGS);
218 static	int		 roff_line_ignore(ROFF_ARGS);
219 static	void		 roff_man_alloc1(struct roff_man *);
220 static	void		 roff_man_free1(struct roff_man *);
221 static	int		 roff_manyarg(ROFF_ARGS);
222 static	int		 roff_mc(ROFF_ARGS);
223 static	int		 roff_noarg(ROFF_ARGS);
224 static	int		 roff_nop(ROFF_ARGS);
225 static	int		 roff_nr(ROFF_ARGS);
226 static	int		 roff_onearg(ROFF_ARGS);
227 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
228 				int, int);
229 static	int		 roff_parse_comment(struct roff *, struct buf *,
230 				int, int, char);
231 static	int		 roff_parsetext(struct roff *, struct buf *,
232 				int, int *);
233 static	int		 roff_renamed(ROFF_ARGS);
234 static	int		 roff_req_or_macro(ROFF_ARGS);
235 static	int		 roff_return(ROFF_ARGS);
236 static	int		 roff_rm(ROFF_ARGS);
237 static	int		 roff_rn(ROFF_ARGS);
238 static	int		 roff_rr(ROFF_ARGS);
239 static	void		 roff_setregn(struct roff *, const char *,
240 				size_t, int, char, int);
241 static	void		 roff_setstr(struct roff *,
242 				const char *, const char *, int);
243 static	void		 roff_setstrn(struct roffkv **, const char *,
244 				size_t, const char *, size_t, int);
245 static	int		 roff_shift(ROFF_ARGS);
246 static	int		 roff_so(ROFF_ARGS);
247 static	int		 roff_tr(ROFF_ARGS);
248 static	int		 roff_Dd(ROFF_ARGS);
249 static	int		 roff_TE(ROFF_ARGS);
250 static	int		 roff_TS(ROFF_ARGS);
251 static	int		 roff_EQ(ROFF_ARGS);
252 static	int		 roff_EN(ROFF_ARGS);
253 static	int		 roff_T_(ROFF_ARGS);
254 static	int		 roff_unsupp(ROFF_ARGS);
255 static	int		 roff_userdef(ROFF_ARGS);
256 
257 /* --- constant data ------------------------------------------------------ */
258 
259 const char *__roff_name[MAN_MAX + 1] = {
260 	"br",		"ce",		"fi",		"ft",
261 	"ll",		"mc",		"nf",
262 	"po",		"rj",		"sp",
263 	"ta",		"ti",		NULL,
264 	"ab",		"ad",		"af",		"aln",
265 	"als",		"am",		"am1",		"ami",
266 	"ami1",		"as",		"as1",		"asciify",
267 	"backtrace",	"bd",		"bleedat",	"blm",
268         "box",		"boxa",		"bp",		"BP",
269 	"break",	"breakchar",	"brnl",		"brp",
270 	"brpnl",	"c2",		"cc",
271 	"cf",		"cflags",	"ch",		"char",
272 	"chop",		"class",	"close",	"CL",
273 	"color",	"composite",	"continue",	"cp",
274 	"cropat",	"cs",		"cu",		"da",
275 	"dch",		"Dd",		"de",		"de1",
276 	"defcolor",	"dei",		"dei1",		"device",
277 	"devicem",	"di",		"do",		"ds",
278 	"ds1",		"dwh",		"dt",		"ec",
279 	"ecr",		"ecs",		"el",		"em",
280 	"EN",		"eo",		"EP",		"EQ",
281 	"errprint",	"ev",		"evc",		"ex",
282 	"fallback",	"fam",		"fc",		"fchar",
283 	"fcolor",	"fdeferlig",	"feature",	"fkern",
284 	"fl",		"flig",		"fp",		"fps",
285 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
286 	"fzoom",	"gcolor",	"hc",		"hcode",
287 	"hidechar",	"hla",		"hlm",		"hpf",
288 	"hpfa",		"hpfcode",	"hw",		"hy",
289 	"hylang",	"hylen",	"hym",		"hypp",
290 	"hys",		"ie",		"if",		"ig",
291 	"index",	"it",		"itc",		"IX",
292 	"kern",		"kernafter",	"kernbefore",	"kernpair",
293 	"lc",		"lc_ctype",	"lds",		"length",
294 	"letadj",	"lf",		"lg",		"lhang",
295 	"linetabs",	"lnr",		"lnrf",		"lpfx",
296 	"ls",		"lsm",		"lt",
297 	"mediasize",	"minss",	"mk",		"mso",
298 	"na",		"ne",		"nh",		"nhychar",
299 	"nm",		"nn",		"nop",		"nr",
300 	"nrf",		"nroff",	"ns",		"nx",
301 	"open",		"opena",	"os",		"output",
302 	"padj",		"papersize",	"pc",		"pev",
303 	"pi",		"PI",		"pl",		"pm",
304 	"pn",		"pnr",		"ps",
305 	"psbb",		"pshape",	"pso",		"ptr",
306 	"pvs",		"rchar",	"rd",		"recursionlimit",
307 	"return",	"rfschar",	"rhang",
308 	"rm",		"rn",		"rnn",		"rr",
309 	"rs",		"rt",		"schar",	"sentchar",
310 	"shc",		"shift",	"sizes",	"so",
311 	"spacewidth",	"special",	"spreadwarn",	"ss",
312 	"sty",		"substring",	"sv",		"sy",
313 	"T&",		"tc",		"TE",
314 	"TH",		"tkf",		"tl",
315 	"tm",		"tm1",		"tmc",		"tr",
316 	"track",	"transchar",	"trf",		"trimat",
317 	"trin",		"trnt",		"troff",	"TS",
318 	"uf",		"ul",		"unformat",	"unwatch",
319 	"unwatchn",	"vpt",		"vs",		"warn",
320 	"warnscale",	"watch",	"watchlength",	"watchn",
321 	"wh",		"while",	"write",	"writec",
322 	"writem",	"xflag",	".",		NULL,
323 	NULL,		"text",
324 	"Dd",		"Dt",		"Os",		"Sh",
325 	"Ss",		"Pp",		"D1",		"Dl",
326 	"Bd",		"Ed",		"Bl",		"El",
327 	"It",		"Ad",		"An",		"Ap",
328 	"Ar",		"Cd",		"Cm",		"Dv",
329 	"Er",		"Ev",		"Ex",		"Fa",
330 	"Fd",		"Fl",		"Fn",		"Ft",
331 	"Ic",		"In",		"Li",		"Nd",
332 	"Nm",		"Op",		"Ot",		"Pa",
333 	"Rv",		"St",		"Va",		"Vt",
334 	"Xr",		"%A",		"%B",		"%D",
335 	"%I",		"%J",		"%N",		"%O",
336 	"%P",		"%R",		"%T",		"%V",
337 	"Ac",		"Ao",		"Aq",		"At",
338 	"Bc",		"Bf",		"Bo",		"Bq",
339 	"Bsx",		"Bx",		"Db",		"Dc",
340 	"Do",		"Dq",		"Ec",		"Ef",
341 	"Em",		"Eo",		"Fx",		"Ms",
342 	"No",		"Ns",		"Nx",		"Ox",
343 	"Pc",		"Pf",		"Po",		"Pq",
344 	"Qc",		"Ql",		"Qo",		"Qq",
345 	"Re",		"Rs",		"Sc",		"So",
346 	"Sq",		"Sm",		"Sx",		"Sy",
347 	"Tn",		"Ux",		"Xc",		"Xo",
348 	"Fo",		"Fc",		"Oo",		"Oc",
349 	"Bk",		"Ek",		"Bt",		"Hf",
350 	"Fr",		"Ud",		"Lb",		"Lp",
351 	"Lk",		"Mt",		"Brq",		"Bro",
352 	"Brc",		"%C",		"Es",		"En",
353 	"Dx",		"%Q",		"%U",		"Ta",
354 	"Tg",		NULL,
355 	"TH",		"SH",		"SS",		"TP",
356 	"TQ",
357 	"LP",		"PP",		"P",		"IP",
358 	"HP",		"SM",		"SB",		"BI",
359 	"IB",		"BR",		"RB",		"R",
360 	"B",		"I",		"IR",		"RI",
361 	"RE",		"RS",		"DT",		"UC",
362 	"PD",		"AT",		"in",
363 	"SY",		"YS",		"OP",
364 	"EX",		"EE",		"UR",
365 	"UE",		"MT",		"ME",		"MR",
366 	NULL
367 };
368 const	char *const *roff_name = __roff_name;
369 
370 static	struct roffmac	 roffs[TOKEN_NONE] = {
371 	{ roff_noarg, NULL, NULL, 0 },  /* br */
372 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
373 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
374 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
375 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
376 	{ roff_mc, NULL, NULL, 0 },  /* mc */
377 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
378 	{ roff_onearg, NULL, NULL, 0 },  /* po */
379 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
380 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
381 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
382 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
383 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
384 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
385 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
386 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
387 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
388 	{ roff_als, NULL, NULL, 0 },  /* als */
389 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
390 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
391 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
392 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
393 	{ roff_ds, NULL, NULL, 0 },  /* as */
394 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
395 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
396 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
397 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
398 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
399 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
400 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
401 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
402 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
403 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
404 	{ roff_break, NULL, NULL, 0 },  /* break */
405 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
406 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
407 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
408 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
409 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
410 	{ roff_cc, NULL, NULL, 0 },  /* cc */
411 	{ roff_insec, NULL, NULL, 0 },  /* cf */
412 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
413 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
414 	{ roff_char, NULL, NULL, 0 },  /* char */
415 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
416 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
417 	{ roff_insec, NULL, NULL, 0 },  /* close */
418 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
419 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
420 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
421 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
422 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
423 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
425 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
426 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
427 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
428 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
429 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
430 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
431 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
432 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
433 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
434 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
435 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
436 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
437 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
438 	{ roff_ds, NULL, NULL, 0 },  /* ds */
439 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
440 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
441 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
442 	{ roff_ec, NULL, NULL, 0 },  /* ec */
443 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
444 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
445 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
446 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
447 	{ roff_EN, NULL, NULL, 0 },  /* EN */
448 	{ roff_eo, NULL, NULL, 0 },  /* eo */
449 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
450 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
451 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
452 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
453 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
454 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
455 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
456 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
457 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
458 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
459 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
460 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
467 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
488 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
489 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
490 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
491 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
492 	{ roff_it, NULL, NULL, 0 },  /* it */
493 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
494 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
495 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
496 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
497 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
498 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
499 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
500 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
501 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
502 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
503 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
504 	{ roff_insec, NULL, NULL, 0 },  /* lf */
505 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
506 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
507 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
508 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
509 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
510 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
511 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
512 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
513 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
514 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
515 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
516 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
517 	{ roff_insec, NULL, NULL, 0 },  /* mso */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
522 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
523 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
524 	{ roff_nop, NULL, NULL, 0 },  /* nop */
525 	{ roff_nr, NULL, NULL, 0 },  /* nr */
526 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
527 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
528 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
529 	{ roff_insec, NULL, NULL, 0 },  /* nx */
530 	{ roff_insec, NULL, NULL, 0 },  /* open */
531 	{ roff_insec, NULL, NULL, 0 },  /* opena */
532 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
533 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
534 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
536 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
537 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
538 	{ roff_insec, NULL, NULL, 0 },  /* pi */
539 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
544 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
545 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
546 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
547 	{ roff_insec, NULL, NULL, 0 },  /* pso */
548 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
550 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
551 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
552 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
553 	{ roff_return, NULL, NULL, 0 },  /* return */
554 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
555 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
556 	{ roff_rm, NULL, NULL, 0 },  /* rm */
557 	{ roff_rn, NULL, NULL, 0 },  /* rn */
558 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
559 	{ roff_rr, NULL, NULL, 0 },  /* rr */
560 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
561 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
562 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
563 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
564 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
565 	{ roff_shift, NULL, NULL, 0 },  /* shift */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
567 	{ roff_so, NULL, NULL, 0 },  /* so */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
573 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
575 	{ roff_insec, NULL, NULL, 0 },  /* sy */
576 	{ roff_T_, NULL, NULL, 0 },  /* T& */
577 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
578 	{ roff_TE, NULL, NULL, 0 },  /* TE */
579 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
580 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
581 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
582 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
583 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
584 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
585 	{ roff_tr, NULL, NULL, 0 },  /* tr */
586 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
587 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
588 	{ roff_insec, NULL, NULL, 0 },  /* trf */
589 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
590 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
591 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
592 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
593 	{ roff_TS, NULL, NULL, 0 },  /* TS */
594 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
595 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
596 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
597 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
598 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
599 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
606 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
607 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
608 	{ roff_insec, NULL, NULL, 0 },  /* write */
609 	{ roff_insec, NULL, NULL, 0 },  /* writec */
610 	{ roff_insec, NULL, NULL, 0 },  /* writem */
611 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
612 	{ roff_cblock, NULL, NULL, 0 },  /* . */
613 	{ roff_renamed, NULL, NULL, 0 },
614 	{ roff_userdef, NULL, NULL, 0 }
615 };
616 
617 /* Array of injected predefined strings. */
618 #define	PREDEFS_MAX	 38
619 static	const struct predef predefs[PREDEFS_MAX] = {
620 #include "predefs.in"
621 };
622 
623 static	int	 roffce_lines;	/* number of input lines to center */
624 static	struct roff_node *roffce_node;  /* active request */
625 static	int	 roffit_lines;  /* number of lines to delay */
626 static	char	*roffit_macro;  /* nil-terminated macro line */
627 
628 
629 /* --- request table ------------------------------------------------------ */
630 
631 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)632 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
633 {
634 	struct ohash	*htab;
635 	struct roffreq	*req;
636 	enum roff_tok	 tok;
637 	size_t		 sz;
638 	unsigned int	 slot;
639 
640 	htab = mandoc_malloc(sizeof(*htab));
641 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
642 
643 	for (tok = mintok; tok < maxtok; tok++) {
644 		if (roff_name[tok] == NULL)
645 			continue;
646 		sz = strlen(roff_name[tok]);
647 		req = mandoc_malloc(sizeof(*req) + sz + 1);
648 		req->tok = tok;
649 		memcpy(req->name, roff_name[tok], sz + 1);
650 		slot = ohash_qlookup(htab, req->name);
651 		ohash_insert(htab, slot, req);
652 	}
653 	return htab;
654 }
655 
656 void
roffhash_free(struct ohash * htab)657 roffhash_free(struct ohash *htab)
658 {
659 	struct roffreq	*req;
660 	unsigned int	 slot;
661 
662 	if (htab == NULL)
663 		return;
664 	for (req = ohash_first(htab, &slot); req != NULL;
665 	     req = ohash_next(htab, &slot))
666 		free(req);
667 	ohash_delete(htab);
668 	free(htab);
669 }
670 
671 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)672 roffhash_find(struct ohash *htab, const char *name, size_t sz)
673 {
674 	struct roffreq	*req;
675 	const char	*end;
676 
677 	if (sz) {
678 		end = name + sz;
679 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
680 	} else
681 		req = ohash_find(htab, ohash_qlookup(htab, name));
682 	return req == NULL ? TOKEN_NONE : req->tok;
683 }
684 
685 /* --- stack of request blocks -------------------------------------------- */
686 
687 /*
688  * Pop the current node off of the stack of roff instructions currently
689  * pending.  Return 1 if it is a loop or 0 otherwise.
690  */
691 static int
roffnode_pop(struct roff * r)692 roffnode_pop(struct roff *r)
693 {
694 	struct roffnode	*p;
695 	int		 inloop;
696 
697 	p = r->last;
698 	inloop = p->tok == ROFF_while;
699 	r->last = p->parent;
700 	free(p->name);
701 	free(p->end);
702 	free(p);
703 	return inloop;
704 }
705 
706 /*
707  * Push a roff node onto the instruction stack.  This must later be
708  * removed with roffnode_pop().
709  */
710 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)711 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
712 		int line, int col)
713 {
714 	struct roffnode	*p;
715 
716 	p = mandoc_calloc(1, sizeof(struct roffnode));
717 	p->tok = tok;
718 	if (name)
719 		p->name = mandoc_strdup(name);
720 	p->parent = r->last;
721 	p->line = line;
722 	p->col = col;
723 	p->rule = p->parent ? p->parent->rule : 0;
724 
725 	r->last = p;
726 }
727 
728 /* --- roff parser state data management ---------------------------------- */
729 
730 static void
roff_free1(struct roff * r)731 roff_free1(struct roff *r)
732 {
733 	int		 i;
734 
735 	tbl_free(r->first_tbl);
736 	r->first_tbl = r->last_tbl = r->tbl = NULL;
737 
738 	eqn_free(r->last_eqn);
739 	r->last_eqn = r->eqn = NULL;
740 
741 	while (r->mstackpos >= 0)
742 		roff_userret(r);
743 
744 	while (r->last)
745 		roffnode_pop(r);
746 
747 	free (r->rstack);
748 	r->rstack = NULL;
749 	r->rstacksz = 0;
750 	r->rstackpos = -1;
751 
752 	roff_freereg(r->regtab);
753 	r->regtab = NULL;
754 
755 	roff_freestr(r->strtab);
756 	roff_freestr(r->rentab);
757 	roff_freestr(r->xmbtab);
758 	r->strtab = r->rentab = r->xmbtab = NULL;
759 
760 	if (r->xtab)
761 		for (i = 0; i < 128; i++)
762 			free(r->xtab[i].p);
763 	free(r->xtab);
764 	r->xtab = NULL;
765 }
766 
767 void
roff_reset(struct roff * r)768 roff_reset(struct roff *r)
769 {
770 	roff_free1(r);
771 	r->options |= MPARSE_COMMENT;
772 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
773 	r->control = '\0';
774 	r->escape = '\\';
775 	roffce_lines = 0;
776 	roffce_node = NULL;
777 	roffit_lines = 0;
778 	roffit_macro = NULL;
779 }
780 
781 void
roff_free(struct roff * r)782 roff_free(struct roff *r)
783 {
784 	int		 i;
785 
786 	roff_free1(r);
787 	for (i = 0; i < r->mstacksz; i++)
788 		free(r->mstack[i].argv);
789 	free(r->mstack);
790 	roffhash_free(r->reqtab);
791 	free(r);
792 }
793 
794 struct roff *
roff_alloc(int options)795 roff_alloc(int options)
796 {
797 	struct roff	*r;
798 
799 	r = mandoc_calloc(1, sizeof(struct roff));
800 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
801 	r->options = options | MPARSE_COMMENT;
802 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
803 	r->mstackpos = -1;
804 	r->rstackpos = -1;
805 	r->escape = '\\';
806 	return r;
807 }
808 
809 /* --- syntax tree state data management ---------------------------------- */
810 
811 static void
roff_man_free1(struct roff_man * man)812 roff_man_free1(struct roff_man *man)
813 {
814 	if (man->meta.first != NULL)
815 		roff_node_delete(man, man->meta.first);
816 	free(man->meta.msec);
817 	free(man->meta.vol);
818 	free(man->meta.os);
819 	free(man->meta.arch);
820 	free(man->meta.title);
821 	free(man->meta.name);
822 	free(man->meta.date);
823 	free(man->meta.sodest);
824 }
825 
826 void
roff_state_reset(struct roff_man * man)827 roff_state_reset(struct roff_man *man)
828 {
829 	man->last = man->meta.first;
830 	man->last_es = NULL;
831 	man->flags = 0;
832 	man->lastsec = man->lastnamed = SEC_NONE;
833 	man->next = ROFF_NEXT_CHILD;
834 	roff_setreg(man->roff, "nS", 0, '=');
835 }
836 
837 static void
roff_man_alloc1(struct roff_man * man)838 roff_man_alloc1(struct roff_man *man)
839 {
840 	memset(&man->meta, 0, sizeof(man->meta));
841 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
842 	man->meta.first->type = ROFFT_ROOT;
843 	man->meta.macroset = MACROSET_NONE;
844 	roff_state_reset(man);
845 }
846 
847 void
roff_man_reset(struct roff_man * man)848 roff_man_reset(struct roff_man *man)
849 {
850 	roff_man_free1(man);
851 	roff_man_alloc1(man);
852 }
853 
854 void
roff_man_free(struct roff_man * man)855 roff_man_free(struct roff_man *man)
856 {
857 	roff_man_free1(man);
858 	free(man->os_r);
859 	free(man);
860 }
861 
862 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)863 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
864 {
865 	struct roff_man *man;
866 
867 	man = mandoc_calloc(1, sizeof(*man));
868 	man->roff = roff;
869 	man->os_s = os_s;
870 	man->quick = quick;
871 	roff_man_alloc1(man);
872 	roff->man = man;
873 	return man;
874 }
875 
876 /* --- syntax tree handling ----------------------------------------------- */
877 
878 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)879 roff_node_alloc(struct roff_man *man, int line, int pos,
880 	enum roff_type type, int tok)
881 {
882 	struct roff_node	*n;
883 
884 	n = mandoc_calloc(1, sizeof(*n));
885 	n->line = line;
886 	n->pos = pos;
887 	n->tok = tok;
888 	n->type = type;
889 	n->sec = man->lastsec;
890 
891 	if (man->flags & MDOC_SYNOPSIS)
892 		n->flags |= NODE_SYNPRETTY;
893 	else
894 		n->flags &= ~NODE_SYNPRETTY;
895 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
896 		n->flags |= NODE_NOFILL;
897 	else
898 		n->flags &= ~NODE_NOFILL;
899 	if (man->flags & MDOC_NEWLINE)
900 		n->flags |= NODE_LINE;
901 	man->flags &= ~MDOC_NEWLINE;
902 
903 	return n;
904 }
905 
906 void
roff_node_append(struct roff_man * man,struct roff_node * n)907 roff_node_append(struct roff_man *man, struct roff_node *n)
908 {
909 
910 	switch (man->next) {
911 	case ROFF_NEXT_SIBLING:
912 		if (man->last->next != NULL) {
913 			n->next = man->last->next;
914 			man->last->next->prev = n;
915 		} else
916 			man->last->parent->last = n;
917 		man->last->next = n;
918 		n->prev = man->last;
919 		n->parent = man->last->parent;
920 		break;
921 	case ROFF_NEXT_CHILD:
922 		if (man->last->child != NULL) {
923 			n->next = man->last->child;
924 			man->last->child->prev = n;
925 		} else
926 			man->last->last = n;
927 		man->last->child = n;
928 		n->parent = man->last;
929 		break;
930 	default:
931 		abort();
932 	}
933 	man->last = n;
934 
935 	switch (n->type) {
936 	case ROFFT_HEAD:
937 		n->parent->head = n;
938 		break;
939 	case ROFFT_BODY:
940 		if (n->end != ENDBODY_NOT)
941 			return;
942 		n->parent->body = n;
943 		break;
944 	case ROFFT_TAIL:
945 		n->parent->tail = n;
946 		break;
947 	default:
948 		return;
949 	}
950 
951 	/*
952 	 * Copy over the normalised-data pointer of our parent.  Not
953 	 * everybody has one, but copying a null pointer is fine.
954 	 */
955 
956 	n->norm = n->parent->norm;
957 	assert(n->parent->type == ROFFT_BLOCK);
958 }
959 
960 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)961 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
962 {
963 	struct roff_node	*n;
964 
965 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
966 	n->string = roff_strdup(man->roff, word);
967 	roff_node_append(man, n);
968 	n->flags |= NODE_VALID | NODE_ENDED;
969 	man->next = ROFF_NEXT_SIBLING;
970 }
971 
972 void
roff_word_append(struct roff_man * man,const char * word)973 roff_word_append(struct roff_man *man, const char *word)
974 {
975 	struct roff_node	*n;
976 	char			*addstr, *newstr;
977 
978 	n = man->last;
979 	addstr = roff_strdup(man->roff, word);
980 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
981 	free(addstr);
982 	free(n->string);
983 	n->string = newstr;
984 	man->next = ROFF_NEXT_SIBLING;
985 }
986 
987 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)988 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
989 {
990 	struct roff_node	*n;
991 
992 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
993 	roff_node_append(man, n);
994 	man->next = ROFF_NEXT_CHILD;
995 }
996 
997 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)998 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
999 {
1000 	struct roff_node	*n;
1001 
1002 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1003 	roff_node_append(man, n);
1004 	man->next = ROFF_NEXT_CHILD;
1005 	return n;
1006 }
1007 
1008 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1009 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1010 {
1011 	struct roff_node	*n;
1012 
1013 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1014 	roff_node_append(man, n);
1015 	man->next = ROFF_NEXT_CHILD;
1016 	return n;
1017 }
1018 
1019 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1020 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1021 {
1022 	struct roff_node	*n;
1023 
1024 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1025 	roff_node_append(man, n);
1026 	man->next = ROFF_NEXT_CHILD;
1027 	return n;
1028 }
1029 
1030 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1031 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1032 {
1033 	struct roff_node	*n;
1034 	struct tbl_span		*span;
1035 
1036 	if (man->meta.macroset == MACROSET_MAN)
1037 		man_breakscope(man, ROFF_TS);
1038 	while ((span = tbl_span(tbl)) != NULL) {
1039 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1040 		n->span = span;
1041 		roff_node_append(man, n);
1042 		n->flags |= NODE_VALID | NODE_ENDED;
1043 		man->next = ROFF_NEXT_SIBLING;
1044 	}
1045 }
1046 
1047 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1048 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1049 {
1050 
1051 	/* Adjust siblings. */
1052 
1053 	if (n->prev)
1054 		n->prev->next = n->next;
1055 	if (n->next)
1056 		n->next->prev = n->prev;
1057 
1058 	/* Adjust parent. */
1059 
1060 	if (n->parent != NULL) {
1061 		if (n->parent->child == n)
1062 			n->parent->child = n->next;
1063 		if (n->parent->last == n)
1064 			n->parent->last = n->prev;
1065 	}
1066 
1067 	/* Adjust parse point. */
1068 
1069 	if (man == NULL)
1070 		return;
1071 	if (man->last == n) {
1072 		if (n->prev == NULL) {
1073 			man->last = n->parent;
1074 			man->next = ROFF_NEXT_CHILD;
1075 		} else {
1076 			man->last = n->prev;
1077 			man->next = ROFF_NEXT_SIBLING;
1078 		}
1079 	}
1080 	if (man->meta.first == n)
1081 		man->meta.first = NULL;
1082 }
1083 
1084 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1085 roff_node_relink(struct roff_man *man, struct roff_node *n)
1086 {
1087 	roff_node_unlink(man, n);
1088 	n->prev = n->next = NULL;
1089 	roff_node_append(man, n);
1090 }
1091 
1092 void
roff_node_free(struct roff_node * n)1093 roff_node_free(struct roff_node *n)
1094 {
1095 
1096 	if (n->args != NULL)
1097 		mdoc_argv_free(n->args);
1098 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1099 		free(n->norm);
1100 	eqn_box_free(n->eqn);
1101 	free(n->string);
1102 	free(n->tag);
1103 	free(n);
1104 }
1105 
1106 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1107 roff_node_delete(struct roff_man *man, struct roff_node *n)
1108 {
1109 
1110 	while (n->child != NULL)
1111 		roff_node_delete(man, n->child);
1112 	roff_node_unlink(man, n);
1113 	roff_node_free(n);
1114 }
1115 
1116 int
roff_node_transparent(struct roff_node * n)1117 roff_node_transparent(struct roff_node *n)
1118 {
1119 	if (n == NULL)
1120 		return 0;
1121 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1122 		return 1;
1123 	return roff_tok_transparent(n->tok);
1124 }
1125 
1126 int
roff_tok_transparent(enum roff_tok tok)1127 roff_tok_transparent(enum roff_tok tok)
1128 {
1129 	switch (tok) {
1130 	case ROFF_ft:
1131 	case ROFF_ll:
1132 	case ROFF_mc:
1133 	case ROFF_po:
1134 	case ROFF_ta:
1135 	case MDOC_Db:
1136 	case MDOC_Es:
1137 	case MDOC_Sm:
1138 	case MDOC_Tg:
1139 	case MAN_DT:
1140 	case MAN_UC:
1141 	case MAN_PD:
1142 	case MAN_AT:
1143 		return 1;
1144 	default:
1145 		return 0;
1146 	}
1147 }
1148 
1149 struct roff_node *
roff_node_child(struct roff_node * n)1150 roff_node_child(struct roff_node *n)
1151 {
1152 	for (n = n->child; roff_node_transparent(n); n = n->next)
1153 		continue;
1154 	return n;
1155 }
1156 
1157 struct roff_node *
roff_node_prev(struct roff_node * n)1158 roff_node_prev(struct roff_node *n)
1159 {
1160 	do {
1161 		n = n->prev;
1162 	} while (roff_node_transparent(n));
1163 	return n;
1164 }
1165 
1166 struct roff_node *
roff_node_next(struct roff_node * n)1167 roff_node_next(struct roff_node *n)
1168 {
1169 	do {
1170 		n = n->next;
1171 	} while (roff_node_transparent(n));
1172 	return n;
1173 }
1174 
1175 void
deroff(char ** dest,const struct roff_node * n)1176 deroff(char **dest, const struct roff_node *n)
1177 {
1178 	char	*cp;
1179 	size_t	 sz;
1180 
1181 	if (n->string == NULL) {
1182 		for (n = n->child; n != NULL; n = n->next)
1183 			deroff(dest, n);
1184 		return;
1185 	}
1186 
1187 	/* Skip leading whitespace. */
1188 
1189 	for (cp = n->string; *cp != '\0'; cp++) {
1190 		if (cp[0] == '\\' && cp[1] != '\0' &&
1191 		    strchr(" %&0^|~", cp[1]) != NULL)
1192 			cp++;
1193 		else if ( ! isspace((unsigned char)*cp))
1194 			break;
1195 	}
1196 
1197 	/* Skip trailing backslash. */
1198 
1199 	sz = strlen(cp);
1200 	if (sz > 0 && cp[sz - 1] == '\\')
1201 		sz--;
1202 
1203 	/* Skip trailing whitespace. */
1204 
1205 	for (; sz; sz--)
1206 		if ( ! isspace((unsigned char)cp[sz-1]))
1207 			break;
1208 
1209 	/* Skip empty strings. */
1210 
1211 	if (sz == 0)
1212 		return;
1213 
1214 	if (*dest == NULL) {
1215 		*dest = mandoc_strndup(cp, sz);
1216 		return;
1217 	}
1218 
1219 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1220 	free(*dest);
1221 	*dest = cp;
1222 }
1223 
1224 /* --- main functions of the roff parser ---------------------------------- */
1225 
1226 /*
1227  * Save comments preceding the title macro, for example in order to
1228  * preserve Copyright and license headers in HTML output,
1229  * provide diagnostics about RCS ids and trailing whitespace in comments,
1230  * then discard comments including preceding whitespace.
1231  * This function also handles input line continuation.
1232  */
1233 static int
roff_parse_comment(struct roff * r,struct buf * buf,int ln,int pos,char ec)1234 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1235 {
1236 	struct roff_node *n;	/* used for header comments */
1237 	const char	*start;	/* start of the string to process */
1238 	const char	*cp;	/* for RCS id parsing */
1239 	char		*stesc;	/* start of an escape sequence ('\\') */
1240 	char		*ep;	/* end of comment string */
1241 	int		 rcsid;	/* kind of RCS id seen */
1242 
1243 	for (start = stesc = buf->buf + pos;; stesc++) {
1244 		/*
1245 		 * XXX Ugly hack: Remove the newline character that
1246 		 * mparse_buf_r() appended to mark the end of input
1247 		 * if it is not preceded by an escape character.
1248 		 */
1249 		if (stesc[0] == '\n') {
1250 			assert(stesc[1] == '\0');
1251 			stesc[0] = '\0';
1252 		}
1253 
1254 		/* The line ends without continuation or comment. */
1255 		if (stesc[0] == '\0')
1256 			return ROFF_CONT;
1257 
1258 		/* Unescaped byte: skip it. */
1259 		if (stesc[0] != ec)
1260 			continue;
1261 
1262 		/*
1263 		 * XXX Ugly hack: Do not attempt to append another line
1264 		 * if the function mparse_buf_r() appended a newline
1265 		 * character to indicate the end of input.
1266 		 */
1267 		if (stesc[1] == '\n') {
1268 			assert(stesc[2] == '\0');
1269 			stesc[0] = '\0';
1270 			return ROFF_CONT;
1271 		}
1272 
1273 		/*
1274 		 * An escape character at the end of an input line
1275 		 * requests line continuation.
1276 		 */
1277 		if (stesc[1] == '\0') {
1278 			stesc[0] = '\0';
1279 			return ROFF_IGN | ROFF_APPEND;
1280 		}
1281 
1282 		/* Found a comment: process it. */
1283 		if (stesc[1] == '"' || stesc[1] == '#')
1284 			break;
1285 
1286 		/* Escaped escape character: skip them both. */
1287 		if (stesc[1] == ec)
1288 			stesc++;
1289 	}
1290 
1291 	/* Look for an RCS id in the comment. */
1292 
1293 	rcsid = 0;
1294 	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1295 		rcsid = 1 << MANDOC_OS_OPENBSD;
1296 		cp += 8;
1297 	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1298 		rcsid = 1 << MANDOC_OS_NETBSD;
1299 		cp += 7;
1300 	}
1301 	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1302 	    strchr(cp, '$') != NULL) {
1303 		if (r->man->meta.rcsids & rcsid)
1304 			mandoc_msg(MANDOCERR_RCS_REP, ln,
1305 			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1306 		r->man->meta.rcsids |= rcsid;
1307 	}
1308 
1309 	/* Warn about trailing whitespace at the end of the comment. */
1310 
1311 	ep = strchr(stesc + 2, '\0') - 1;
1312 	if (*ep == '\n')
1313 		*ep-- = '\0';
1314 	if (*ep == ' ' || *ep == '\t')
1315 		mandoc_msg(MANDOCERR_SPACE_EOL,
1316 		    ln, (int)(ep - buf->buf), NULL);
1317 
1318 	/* Save comments preceding the title macro in the syntax tree. */
1319 
1320 	if (r->options & MPARSE_COMMENT) {
1321 		while (*ep == ' ' || *ep == '\t')
1322 			ep--;
1323 		ep[1] = '\0';
1324 		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1325 		    ROFFT_COMMENT, TOKEN_NONE);
1326 		n->string = mandoc_strdup(stesc + 2);
1327 		roff_node_append(r->man, n);
1328 		n->flags |= NODE_VALID | NODE_ENDED;
1329 		r->man->next = ROFF_NEXT_SIBLING;
1330 	}
1331 
1332 	/* The comment requests line continuation. */
1333 
1334 	if (stesc[1] == '#') {
1335 		*stesc = '\0';
1336 		return ROFF_IGN | ROFF_APPEND;
1337 	}
1338 
1339 	/* Discard the comment including preceding whitespace. */
1340 
1341 	while (stesc > start && stesc[-1] == ' ' &&
1342 	    (stesc == start + 1 || stesc[-2] != '\\'))
1343 		stesc--;
1344 	*stesc = '\0';
1345 	return ROFF_CONT;
1346 }
1347 
1348 /*
1349  * In the current line, expand escape sequences that produce parsable
1350  * input text.  Also check the syntax of the remaining escape sequences,
1351  * which typically produce output glyphs or change formatter state.
1352  */
1353 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char ec)1354 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1355 {
1356 	char		 ubuf[24];	/* buffer to print a number */
1357 	struct mctx	*ctx;		/* current macro call context */
1358 	const char	*res;		/* the string to be pasted */
1359 	const char	*src;		/* source for copying */
1360 	char		*dst;		/* destination for copying */
1361 	enum mandoc_esc	 subtype;	/* return value from roff_escape */
1362 	int		 iesc;		/* index of leading escape char */
1363 	int		 inam;		/* index of the escape name */
1364 	int		 iarg;		/* index beginning the argument */
1365 	int		 iendarg;	/* index right after the argument */
1366 	int		 iend;		/* index right after the sequence */
1367 	int		 isrc, idst;	/* to reduce \\ and \. in names */
1368 	int		 deftype;	/* type of definition to paste */
1369 	int		 argi;		/* macro argument index */
1370 	int		 quote_args;	/* true for \\$@, false for \\$* */
1371 	int		 asz;		/* length of the replacement */
1372 	int		 rsz;		/* length of the rest of the string */
1373 	int		 npos;		/* position in numeric expression */
1374 	int		 expand_count;	/* to avoid infinite loops */
1375 
1376 	expand_count = 0;
1377 	while (buf->buf[pos] != '\0') {
1378 
1379 		/*
1380 		 * Skip plain ASCII characters.
1381 		 * If we have a non-standard escape character,
1382 		 * escape literal backslashes because all processing in
1383 		 * subsequent functions uses the standard escaping rules.
1384 		 */
1385 
1386 		if (buf->buf[pos] != ec) {
1387 			if (buf->buf[pos] == '\\') {
1388 				roff_expand_patch(buf, pos, "\\e", pos + 1);
1389 				pos++;
1390 			}
1391 			pos++;
1392 			continue;
1393 		}
1394 
1395 		/*
1396 		 * Parse escape sequences,
1397 		 * issue diagnostic messages when appropriate,
1398 		 * and skip sequences that do not need expansion.
1399 		 * If we have a non-standard escape character, translate
1400 		 * it to backslashes and translate backslashes to \e.
1401 		 */
1402 
1403 		if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1404 		    &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1405 			while (pos < iend) {
1406 				if (buf->buf[pos] == ec) {
1407 					buf->buf[pos] = '\\';
1408 					if (pos + 1 < iend)
1409 						pos++;
1410 				} else if (buf->buf[pos] == '\\') {
1411 					roff_expand_patch(buf,
1412 					    pos, "\\e", pos + 1);
1413 					pos++;
1414 					iend++;
1415 				}
1416 				pos++;
1417 			}
1418 			continue;
1419 		}
1420 
1421 		/* Reduce \\ and \. in names. */
1422 
1423 		if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1424 			isrc = idst = iarg;
1425 			while (isrc < iendarg) {
1426 				if (isrc + 1 < iendarg &&
1427 				    buf->buf[isrc] == '\\' &&
1428 				    (buf->buf[isrc + 1] == '\\' ||
1429 				     buf->buf[isrc + 1] == '.'))
1430 					isrc++;
1431 				buf->buf[idst++] = buf->buf[isrc++];
1432 			}
1433 			iendarg -= isrc - idst;
1434 		}
1435 
1436 		/* Handle expansion. */
1437 
1438 		res = NULL;
1439 		switch (buf->buf[inam]) {
1440 		case '*':
1441 			if (iendarg == iarg)
1442 				break;
1443 			deftype = ROFFDEF_USER | ROFFDEF_PRE;
1444 			if ((res = roff_getstrn(r, buf->buf + iarg,
1445 			    iendarg - iarg, &deftype)) != NULL)
1446 				break;
1447 
1448 			/*
1449 			 * If not overridden,
1450 			 * let \*(.T through to the formatters.
1451 			 */
1452 
1453 			if (iendarg - iarg == 2 &&
1454 			    buf->buf[iarg] == '.' &&
1455 			    buf->buf[iarg + 1] == 'T') {
1456 				roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1457 				pos = iend;
1458 				continue;
1459 			}
1460 
1461 			mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1462 			    "%.*s", iendarg - iarg, buf->buf + iarg);
1463 			break;
1464 
1465 		case '$':
1466 			if (r->mstackpos < 0) {
1467 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1468 				    "%.*s", iend - iesc, buf->buf + iesc);
1469 				break;
1470 			}
1471 			ctx = r->mstack + r->mstackpos;
1472 			argi = buf->buf[iarg] - '1';
1473 			if (argi >= 0 && argi <= 8) {
1474 				if (argi < ctx->argc)
1475 					res = ctx->argv[argi];
1476 				break;
1477 			}
1478 			if (buf->buf[iarg] == '*')
1479 				quote_args = 0;
1480 			else if (buf->buf[iarg] == '@')
1481 				quote_args = 1;
1482 			else {
1483 				mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1484 				    "%.*s", iend - iesc, buf->buf + iesc);
1485 				break;
1486 			}
1487 			asz = 0;
1488 			for (argi = 0; argi < ctx->argc; argi++) {
1489 				if (argi)
1490 					asz++;  /* blank */
1491 				if (quote_args)
1492 					asz += 2;  /* quotes */
1493 				asz += strlen(ctx->argv[argi]);
1494 			}
1495 			if (asz != iend - iesc) {
1496 				rsz = buf->sz - iend;
1497 				if (asz < iend - iesc)
1498 					memmove(buf->buf + iesc + asz,
1499 					    buf->buf + iend, rsz);
1500 				buf->sz = iesc + asz + rsz;
1501 				buf->buf = mandoc_realloc(buf->buf, buf->sz);
1502 				if (asz > iend - iesc)
1503 					memmove(buf->buf + iesc + asz,
1504 					    buf->buf + iend, rsz);
1505 			}
1506 			dst = buf->buf + iesc;
1507 			for (argi = 0; argi < ctx->argc; argi++) {
1508 				if (argi)
1509 					*dst++ = ' ';
1510 				if (quote_args)
1511 					*dst++ = '"';
1512 				src = ctx->argv[argi];
1513 				while (*src != '\0')
1514 					*dst++ = *src++;
1515 				if (quote_args)
1516 					*dst++ = '"';
1517 			}
1518 			continue;
1519 		case 'A':
1520 			ubuf[0] = iendarg > iarg ? '1' : '0';
1521 			ubuf[1] = '\0';
1522 			res = ubuf;
1523 			break;
1524 		case 'B':
1525 			npos = 0;
1526 			ubuf[0] = iendarg > iarg && iend > iendarg &&
1527 			    roff_evalnum(ln, buf->buf + iarg, &npos,
1528 					 NULL, 'u', 0) &&
1529 			    npos == iendarg - iarg ? '1' : '0';
1530 			ubuf[1] = '\0';
1531 			res = ubuf;
1532 			break;
1533 		case 'V':
1534 			mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1535 			    "%.*s", iend - iesc, buf->buf + iesc);
1536 			roff_expand_patch(buf, iendarg, "}", iend);
1537 			roff_expand_patch(buf, iesc, "${", iarg);
1538 			continue;
1539 		case 'g':
1540 			break;
1541 		case 'n':
1542 			if (iendarg > iarg)
1543 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1544 				    roff_getregn(r, buf->buf + iarg,
1545 				    iendarg - iarg, buf->buf[inam + 1]));
1546 			else
1547 				ubuf[0] = '\0';
1548 			res = ubuf;
1549 			break;
1550 		case 'w':
1551 			rsz = 0;
1552 			subtype = ESCAPE_UNDEF;
1553 			while (iarg < iendarg) {
1554 				asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1555 				if (buf->buf[iarg] != '\\') {
1556 					rsz += asz;
1557 					iarg++;
1558 					continue;
1559 				}
1560 				switch ((subtype = roff_escape(buf->buf, 0,
1561 				    iarg, NULL, NULL, NULL, NULL, &iarg))) {
1562 				case ESCAPE_SPECIAL:
1563 				case ESCAPE_NUMBERED:
1564 				case ESCAPE_UNICODE:
1565 				case ESCAPE_OVERSTRIKE:
1566 				case ESCAPE_UNDEF:
1567 					break;
1568 				case ESCAPE_DEVICE:
1569 					asz *= 8;
1570 					break;
1571 				case ESCAPE_EXPAND:
1572 					abort();
1573 				default:
1574 					continue;
1575 				}
1576 				rsz += asz;
1577 			}
1578 			(void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1579 			res = ubuf;
1580 			break;
1581 		default:
1582 			break;
1583 		}
1584 		if (res == NULL)
1585 			res = "";
1586 		if (++expand_count > EXPAND_LIMIT ||
1587 		    buf->sz + strlen(res) > SHRT_MAX) {
1588 			mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1589 			return ROFF_IGN;
1590 		}
1591 		roff_expand_patch(buf, iesc, res, iend);
1592 	}
1593 	return ROFF_CONT;
1594 }
1595 
1596 /*
1597  * Replace the substring from the start position (inclusive)
1598  * to end position (exclusive) with the repl(acement) string.
1599  */
1600 static void
roff_expand_patch(struct buf * buf,int start,const char * repl,int end)1601 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1602 {
1603 	char	*nbuf;
1604 
1605 	buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1606 	    repl, buf->buf + end) + 1;
1607 	free(buf->buf);
1608 	buf->buf = nbuf;
1609 }
1610 
1611 /*
1612  * Parse a quoted or unquoted roff-style request or macro argument.
1613  * Return a pointer to the parsed argument, which is either the original
1614  * pointer or advanced by one byte in case the argument is quoted.
1615  * NUL-terminate the argument in place.
1616  * Collapse pairs of quotes inside quoted arguments.
1617  * Advance the argument pointer to the next argument,
1618  * or to the NUL byte terminating the argument line.
1619  */
1620 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1621 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1622 {
1623 	struct buf	 buf;
1624 	char		*cp, *start;
1625 	int		 newesc, pairs, quoted, white;
1626 
1627 	/* Quoting can only start with a new word. */
1628 	start = *cpp;
1629 	quoted = 0;
1630 	if ('"' == *start) {
1631 		quoted = 1;
1632 		start++;
1633 	}
1634 
1635 	newesc = pairs = white = 0;
1636 	for (cp = start; '\0' != *cp; cp++) {
1637 
1638 		/*
1639 		 * Move the following text left
1640 		 * after quoted quotes and after "\\" and "\t".
1641 		 */
1642 		if (pairs)
1643 			cp[-pairs] = cp[0];
1644 
1645 		if ('\\' == cp[0]) {
1646 			/*
1647 			 * In copy mode, translate double to single
1648 			 * backslashes and backslash-t to literal tabs.
1649 			 */
1650 			switch (cp[1]) {
1651 			case 'a':
1652 			case 't':
1653 				cp[-pairs] = '\t';
1654 				pairs++;
1655 				cp++;
1656 				break;
1657 			case '\\':
1658 				cp[-pairs] = '\\';
1659 				newesc = 1;
1660 				pairs++;
1661 				cp++;
1662 				break;
1663 			case ' ':
1664 				/* Skip escaped blanks. */
1665 				if (0 == quoted)
1666 					cp++;
1667 				break;
1668 			default:
1669 				break;
1670 			}
1671 		} else if (0 == quoted) {
1672 			if (' ' == cp[0]) {
1673 				/* Unescaped blanks end unquoted args. */
1674 				white = 1;
1675 				break;
1676 			}
1677 		} else if ('"' == cp[0]) {
1678 			if ('"' == cp[1]) {
1679 				/* Quoted quotes collapse. */
1680 				pairs++;
1681 				cp++;
1682 			} else {
1683 				/* Unquoted quotes end quoted args. */
1684 				quoted = 2;
1685 				break;
1686 			}
1687 		}
1688 	}
1689 
1690 	/* Quoted argument without a closing quote. */
1691 	if (1 == quoted)
1692 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1693 
1694 	/* NUL-terminate this argument and move to the next one. */
1695 	if (pairs)
1696 		cp[-pairs] = '\0';
1697 	if ('\0' != *cp) {
1698 		*cp++ = '\0';
1699 		while (' ' == *cp)
1700 			cp++;
1701 	}
1702 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1703 	*cpp = cp;
1704 
1705 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1706 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1707 
1708 	start = mandoc_strdup(start);
1709 	if (newesc == 0)
1710 		return start;
1711 
1712 	buf.buf = start;
1713 	buf.sz = strlen(start) + 1;
1714 	buf.next = NULL;
1715 	if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1716 		free(buf.buf);
1717 		buf.buf = mandoc_strdup("");
1718 	}
1719 	return buf.buf;
1720 }
1721 
1722 
1723 /*
1724  * Process text streams.
1725  */
1726 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1727 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1728 {
1729 	size_t		 sz;
1730 	const char	*start;
1731 	char		*p;
1732 	int		 isz;
1733 	enum mandoc_esc	 esc;
1734 
1735 	/* Spring the input line trap. */
1736 
1737 	if (roffit_lines == 1) {
1738 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1739 		free(buf->buf);
1740 		buf->buf = p;
1741 		buf->sz = isz + 1;
1742 		*offs = 0;
1743 		free(roffit_macro);
1744 		roffit_lines = 0;
1745 		return ROFF_REPARSE;
1746 	} else if (roffit_lines > 1)
1747 		--roffit_lines;
1748 
1749 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1750 		if (roffce_lines < 1) {
1751 			r->man->last = roffce_node;
1752 			r->man->next = ROFF_NEXT_SIBLING;
1753 			roffce_lines = 0;
1754 			roffce_node = NULL;
1755 		} else
1756 			roffce_lines--;
1757 	}
1758 
1759 	/* Convert all breakable hyphens into ASCII_HYPH. */
1760 
1761 	start = p = buf->buf + pos;
1762 
1763 	while (*p != '\0') {
1764 		sz = strcspn(p, "-\\");
1765 		p += sz;
1766 
1767 		if (*p == '\0')
1768 			break;
1769 
1770 		if (*p == '\\') {
1771 			/* Skip over escapes. */
1772 			p++;
1773 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1774 			if (esc == ESCAPE_ERROR)
1775 				break;
1776 			while (*p == '-')
1777 				p++;
1778 			continue;
1779 		} else if (p == start) {
1780 			p++;
1781 			continue;
1782 		}
1783 
1784 		if (isalpha((unsigned char)p[-1]) &&
1785 		    isalpha((unsigned char)p[1]))
1786 			*p = ASCII_HYPH;
1787 		p++;
1788 	}
1789 	return ROFF_CONT;
1790 }
1791 
1792 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1793 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1794 {
1795 	enum roff_tok	 t;
1796 	int		 e;
1797 	int		 pos;	/* parse point */
1798 	int		 spos;	/* saved parse point for messages */
1799 	int		 ppos;	/* original offset in buf->buf */
1800 	int		 ctl;	/* macro line (boolean) */
1801 
1802 	ppos = pos = *offs;
1803 
1804 	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1805 	    (r->man->flags & ROFF_NOFILL) == 0 &&
1806 	    strchr(" .\\", buf->buf[pos]) == NULL &&
1807 	    buf->buf[pos] != r->control &&
1808 	    strcspn(buf->buf, " ") < 80)
1809 		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1810 		    "%.20s...", buf->buf + pos);
1811 
1812 	/* Handle in-line equation delimiters. */
1813 
1814 	if (r->tbl == NULL &&
1815 	    r->last_eqn != NULL && r->last_eqn->delim &&
1816 	    (r->eqn == NULL || r->eqn_inline)) {
1817 		e = roff_eqndelim(r, buf, pos);
1818 		if (e == ROFF_REPARSE)
1819 			return e;
1820 		assert(e == ROFF_CONT);
1821 	}
1822 
1823 	/* Handle comments and escape sequences. */
1824 
1825 	e = roff_parse_comment(r, buf, ln, pos, r->escape);
1826 	if ((e & ROFF_MASK) == ROFF_IGN)
1827 		return e;
1828 	assert(e == ROFF_CONT);
1829 
1830 	e = roff_expand(r, buf, ln, pos, r->escape);
1831 	if ((e & ROFF_MASK) == ROFF_IGN)
1832 		return e;
1833 	assert(e == ROFF_CONT);
1834 
1835 	ctl = roff_getcontrol(r, buf->buf, &pos);
1836 
1837 	/*
1838 	 * First, if a scope is open and we're not a macro, pass the
1839 	 * text through the macro's filter.
1840 	 * Equations process all content themselves.
1841 	 * Tables process almost all content themselves, but we want
1842 	 * to warn about macros before passing it there.
1843 	 */
1844 
1845 	if (r->last != NULL && ! ctl) {
1846 		t = r->last->tok;
1847 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1848 		if ((e & ROFF_MASK) == ROFF_IGN)
1849 			return e;
1850 		e &= ~ROFF_MASK;
1851 	} else
1852 		e = ROFF_IGN;
1853 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1854 		eqn_read(r->eqn, buf->buf + ppos);
1855 		return e;
1856 	}
1857 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1858 		tbl_read(r->tbl, ln, buf->buf, ppos);
1859 		roff_addtbl(r->man, ln, r->tbl);
1860 		return e;
1861 	}
1862 	if ( ! ctl) {
1863 		r->options &= ~MPARSE_COMMENT;
1864 		return roff_parsetext(r, buf, pos, offs) | e;
1865 	}
1866 
1867 	/* Skip empty request lines. */
1868 
1869 	if (buf->buf[pos] == '"') {
1870 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1871 		return ROFF_IGN;
1872 	} else if (buf->buf[pos] == '\0')
1873 		return ROFF_IGN;
1874 
1875 	/*
1876 	 * If a scope is open, go to the child handler for that macro,
1877 	 * as it may want to preprocess before doing anything with it.
1878 	 */
1879 
1880 	if (r->last) {
1881 		t = r->last->tok;
1882 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1883 	}
1884 
1885 	r->options &= ~MPARSE_COMMENT;
1886 	spos = pos;
1887 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1888 	return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1889 }
1890 
1891 /*
1892  * Handle a new request or macro.
1893  * May be called outside any scope or from inside a conditional scope.
1894  */
1895 static int
roff_req_or_macro(ROFF_ARGS)1896 roff_req_or_macro(ROFF_ARGS) {
1897 
1898 	/* For now, tables ignore most macros and some request. */
1899 
1900 	if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1901 	    tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1902 	    tok == ROFF_sp)) {
1903 		mandoc_msg(MANDOCERR_TBLMACRO,
1904 		    ln, ppos, "%s", buf->buf + ppos);
1905 		if (tok != TOKEN_NONE)
1906 			return ROFF_IGN;
1907 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1908 			pos++;
1909 		while (buf->buf[pos] == ' ')
1910 			pos++;
1911 		tbl_read(r->tbl, ln, buf->buf, pos);
1912 		roff_addtbl(r->man, ln, r->tbl);
1913 		return ROFF_IGN;
1914 	}
1915 
1916 	/* For now, let high level macros abort .ce mode. */
1917 
1918 	if (roffce_node != NULL &&
1919 	    (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1920 	     tok == ROFF_TH || tok == ROFF_TS)) {
1921 		r->man->last = roffce_node;
1922 		r->man->next = ROFF_NEXT_SIBLING;
1923 		roffce_lines = 0;
1924 		roffce_node = NULL;
1925 	}
1926 
1927 	/*
1928 	 * This is neither a roff request nor a user-defined macro.
1929 	 * Let the standard macro set parsers handle it.
1930 	 */
1931 
1932 	if (tok == TOKEN_NONE)
1933 		return ROFF_CONT;
1934 
1935 	/* Execute a roff request or a user-defined macro. */
1936 
1937 	return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1938 }
1939 
1940 /*
1941  * Internal interface function to tell the roff parser that execution
1942  * of the current macro ended.  This is required because macro
1943  * definitions usually do not end with a .return request.
1944  */
1945 void
roff_userret(struct roff * r)1946 roff_userret(struct roff *r)
1947 {
1948 	struct mctx	*ctx;
1949 	int		 i;
1950 
1951 	assert(r->mstackpos >= 0);
1952 	ctx = r->mstack + r->mstackpos;
1953 	for (i = 0; i < ctx->argc; i++)
1954 		free(ctx->argv[i]);
1955 	ctx->argc = 0;
1956 	r->mstackpos--;
1957 }
1958 
1959 void
roff_endparse(struct roff * r)1960 roff_endparse(struct roff *r)
1961 {
1962 	if (r->last != NULL)
1963 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1964 		    r->last->col, "%s", roff_name[r->last->tok]);
1965 
1966 	if (r->eqn != NULL) {
1967 		mandoc_msg(MANDOCERR_BLK_NOEND,
1968 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1969 		eqn_parse(r->eqn);
1970 		r->eqn = NULL;
1971 	}
1972 
1973 	if (r->tbl != NULL) {
1974 		tbl_end(r->tbl, 1);
1975 		r->tbl = NULL;
1976 	}
1977 }
1978 
1979 /*
1980  * Parse the request or macro name at buf[*pos].
1981  * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1982  * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1983  * As a side effect, set r->current_string to the definition or to NULL.
1984  */
1985 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1986 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1987 {
1988 	char		*cp;
1989 	const char	*mac;
1990 	size_t		 maclen;
1991 	int		 deftype;
1992 	enum roff_tok	 t;
1993 
1994 	cp = buf + *pos;
1995 
1996 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1997 		return TOKEN_NONE;
1998 
1999 	mac = cp;
2000 	maclen = roff_getname(&cp, ln, ppos);
2001 
2002 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2003 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2004 	switch (deftype) {
2005 	case ROFFDEF_USER:
2006 		t = ROFF_USERDEF;
2007 		break;
2008 	case ROFFDEF_REN:
2009 		t = ROFF_RENAMED;
2010 		break;
2011 	default:
2012 		t = roffhash_find(r->reqtab, mac, maclen);
2013 		break;
2014 	}
2015 	if (t != TOKEN_NONE)
2016 		*pos = cp - buf;
2017 	else if (deftype == ROFFDEF_UNDEF) {
2018 		/* Using an undefined macro defines it to be empty. */
2019 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2020 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2021 	}
2022 	return t;
2023 }
2024 
2025 /* --- handling of request blocks ----------------------------------------- */
2026 
2027 /*
2028  * Close a macro definition block or an "ignore" block.
2029  */
2030 static int
roff_cblock(ROFF_ARGS)2031 roff_cblock(ROFF_ARGS)
2032 {
2033 	int	 rr;
2034 
2035 	if (r->last == NULL) {
2036 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2037 		return ROFF_IGN;
2038 	}
2039 
2040 	switch (r->last->tok) {
2041 	case ROFF_am:
2042 	case ROFF_ami:
2043 	case ROFF_de:
2044 	case ROFF_dei:
2045 	case ROFF_ig:
2046 		break;
2047 	case ROFF_am1:
2048 	case ROFF_de1:
2049 		/* Remapped in roff_block(). */
2050 		abort();
2051 	default:
2052 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2053 		return ROFF_IGN;
2054 	}
2055 
2056 	roffnode_pop(r);
2057 	roffnode_cleanscope(r);
2058 
2059 	/*
2060 	 * If a conditional block with braces is still open,
2061 	 * check for "\}" block end markers.
2062 	 */
2063 
2064 	if (r->last != NULL && r->last->endspan < 0) {
2065 		rr = 1;  /* If arguments follow "\}", warn about them. */
2066 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2067 	}
2068 
2069 	if (buf->buf[pos] != '\0')
2070 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2071 		    ".. %s", buf->buf + pos);
2072 
2073 	return ROFF_IGN;
2074 }
2075 
2076 /*
2077  * Pop all nodes ending at the end of the current input line.
2078  * Return the number of loops ended.
2079  */
2080 static int
roffnode_cleanscope(struct roff * r)2081 roffnode_cleanscope(struct roff *r)
2082 {
2083 	int inloop;
2084 
2085 	inloop = 0;
2086 	while (r->last != NULL && r->last->endspan > 0) {
2087 		if (--r->last->endspan != 0)
2088 			break;
2089 		inloop += roffnode_pop(r);
2090 	}
2091 	return inloop;
2092 }
2093 
2094 /*
2095  * Handle the closing "\}" of a conditional block.
2096  * Apart from generating warnings, this only pops nodes.
2097  * Return the number of loops ended.
2098  */
2099 static int
roff_ccond(struct roff * r,int ln,int ppos)2100 roff_ccond(struct roff *r, int ln, int ppos)
2101 {
2102 	if (NULL == r->last) {
2103 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2104 		return 0;
2105 	}
2106 
2107 	switch (r->last->tok) {
2108 	case ROFF_el:
2109 	case ROFF_ie:
2110 	case ROFF_if:
2111 	case ROFF_while:
2112 		break;
2113 	default:
2114 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2115 		return 0;
2116 	}
2117 
2118 	if (r->last->endspan > -1) {
2119 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2120 		return 0;
2121 	}
2122 
2123 	return roffnode_pop(r) + roffnode_cleanscope(r);
2124 }
2125 
2126 static int
roff_block(ROFF_ARGS)2127 roff_block(ROFF_ARGS)
2128 {
2129 	const char	*name, *value;
2130 	char		*call, *cp, *iname, *rname;
2131 	size_t		 csz, namesz, rsz;
2132 	int		 deftype;
2133 
2134 	/* Ignore groff compatibility mode for now. */
2135 
2136 	if (tok == ROFF_de1)
2137 		tok = ROFF_de;
2138 	else if (tok == ROFF_dei1)
2139 		tok = ROFF_dei;
2140 	else if (tok == ROFF_am1)
2141 		tok = ROFF_am;
2142 	else if (tok == ROFF_ami1)
2143 		tok = ROFF_ami;
2144 
2145 	/* Parse the macro name argument. */
2146 
2147 	cp = buf->buf + pos;
2148 	if (tok == ROFF_ig) {
2149 		iname = NULL;
2150 		namesz = 0;
2151 	} else {
2152 		iname = cp;
2153 		namesz = roff_getname(&cp, ln, ppos);
2154 		iname[namesz] = '\0';
2155 	}
2156 
2157 	/* Resolve the macro name argument if it is indirect. */
2158 
2159 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2160 		deftype = ROFFDEF_USER;
2161 		name = roff_getstrn(r, iname, namesz, &deftype);
2162 		if (name == NULL) {
2163 			mandoc_msg(MANDOCERR_STR_UNDEF,
2164 			    ln, (int)(iname - buf->buf),
2165 			    "%.*s", (int)namesz, iname);
2166 			namesz = 0;
2167 		} else
2168 			namesz = strlen(name);
2169 	} else
2170 		name = iname;
2171 
2172 	if (namesz == 0 && tok != ROFF_ig) {
2173 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2174 		    ln, ppos, "%s", roff_name[tok]);
2175 		return ROFF_IGN;
2176 	}
2177 
2178 	roffnode_push(r, tok, name, ln, ppos);
2179 
2180 	/*
2181 	 * At the beginning of a `de' macro, clear the existing string
2182 	 * with the same name, if there is one.  New content will be
2183 	 * appended from roff_block_text() in multiline mode.
2184 	 */
2185 
2186 	if (tok == ROFF_de || tok == ROFF_dei) {
2187 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2188 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2189 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2190 		deftype = ROFFDEF_ANY;
2191 		value = roff_getstrn(r, iname, namesz, &deftype);
2192 		switch (deftype) {  /* Before appending, ... */
2193 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2194 			roff_setstrn(&r->strtab, name, namesz,
2195 			    value, strlen(value), 0);
2196 			break;
2197 		case ROFFDEF_REN: /* call original standard macro. */
2198 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2199 			    (int)strlen(value), value);
2200 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2201 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2202 			free(call);
2203 			break;
2204 		case ROFFDEF_STD:  /* rename and call standard macro. */
2205 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2206 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2207 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2208 			    (int)rsz, rname);
2209 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2210 			free(call);
2211 			free(rname);
2212 			break;
2213 		default:
2214 			break;
2215 		}
2216 	}
2217 
2218 	if (*cp == '\0')
2219 		return ROFF_IGN;
2220 
2221 	/* Get the custom end marker. */
2222 
2223 	iname = cp;
2224 	namesz = roff_getname(&cp, ln, ppos);
2225 
2226 	/* Resolve the end marker if it is indirect. */
2227 
2228 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2229 		deftype = ROFFDEF_USER;
2230 		name = roff_getstrn(r, iname, namesz, &deftype);
2231 		if (name == NULL) {
2232 			mandoc_msg(MANDOCERR_STR_UNDEF,
2233 			    ln, (int)(iname - buf->buf),
2234 			    "%.*s", (int)namesz, iname);
2235 			namesz = 0;
2236 		} else
2237 			namesz = strlen(name);
2238 	} else
2239 		name = iname;
2240 
2241 	if (namesz)
2242 		r->last->end = mandoc_strndup(name, namesz);
2243 
2244 	if (*cp != '\0')
2245 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2246 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2247 
2248 	return ROFF_IGN;
2249 }
2250 
2251 static int
roff_block_sub(ROFF_ARGS)2252 roff_block_sub(ROFF_ARGS)
2253 {
2254 	enum roff_tok	t;
2255 	int		i, j;
2256 
2257 	/*
2258 	 * If a custom end marker is a user-defined or predefined macro
2259 	 * or a request, interpret it.
2260 	 */
2261 
2262 	if (r->last->end) {
2263 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2264 			if (buf->buf[i] != r->last->end[j])
2265 				break;
2266 
2267 		if (r->last->end[j] == '\0' &&
2268 		    (buf->buf[i] == '\0' ||
2269 		     buf->buf[i] == ' ' ||
2270 		     buf->buf[i] == '\t')) {
2271 			roffnode_pop(r);
2272 			roffnode_cleanscope(r);
2273 
2274 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2275 				i++;
2276 
2277 			pos = i;
2278 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2279 			    TOKEN_NONE)
2280 				return ROFF_RERUN;
2281 			return ROFF_IGN;
2282 		}
2283 	}
2284 
2285 	/* Handle the standard end marker. */
2286 
2287 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2288 	if (t == ROFF_cblock)
2289 		return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2290 
2291 	/* Not an end marker, so append the line to the block. */
2292 
2293 	if (tok != ROFF_ig)
2294 		roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2295 	return ROFF_IGN;
2296 }
2297 
2298 static int
roff_block_text(ROFF_ARGS)2299 roff_block_text(ROFF_ARGS)
2300 {
2301 
2302 	if (tok != ROFF_ig)
2303 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2304 
2305 	return ROFF_IGN;
2306 }
2307 
2308 /*
2309  * Check for a closing "\}" and handle it.
2310  * In this function, the final "int *offs" argument is used for
2311  * different purposes than elsewhere:
2312  * Input: *offs == 0: caller wants to discard arguments following \}
2313  *        *offs == 1: caller wants to preserve text following \}
2314  * Output: *offs = 0: tell caller to discard input line
2315  *         *offs = 1: tell caller to use input line
2316  */
2317 static int
roff_cond_checkend(ROFF_ARGS)2318 roff_cond_checkend(ROFF_ARGS)
2319 {
2320 	char		*ep;
2321 	int		 endloop, irc, rr;
2322 
2323 	irc = ROFF_IGN;
2324 	rr = r->last->rule;
2325 	endloop = tok != ROFF_while ? ROFF_IGN :
2326 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2327 	if (roffnode_cleanscope(r))
2328 		irc |= endloop;
2329 
2330 	/*
2331 	 * If "\}" occurs on a macro line without a preceding macro or
2332 	 * a text line contains nothing else, drop the line completely.
2333 	 */
2334 
2335 	ep = buf->buf + pos;
2336 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2337 		rr = 0;
2338 
2339 	/*
2340 	 * The closing delimiter "\}" rewinds the conditional scope
2341 	 * but is otherwise ignored when interpreting the line.
2342 	 */
2343 
2344 	while ((ep = strchr(ep, '\\')) != NULL) {
2345 		switch (ep[1]) {
2346 		case '}':
2347 			if (ep[2] == '\0')
2348 				ep[0] = '\0';
2349 			else if (rr)
2350 				ep[1] = '&';
2351 			else
2352 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2353 			if (roff_ccond(r, ln, ep - buf->buf))
2354 				irc |= endloop;
2355 			break;
2356 		case '\0':
2357 			++ep;
2358 			break;
2359 		default:
2360 			ep += 2;
2361 			break;
2362 		}
2363 	}
2364 	*offs = rr;
2365 	return irc;
2366 }
2367 
2368 /*
2369  * Parse and process a request or macro line in conditional scope.
2370  */
2371 static int
roff_cond_sub(ROFF_ARGS)2372 roff_cond_sub(ROFF_ARGS)
2373 {
2374 	struct roffnode	*bl;
2375 	int		 irc, rr, spos;
2376 	enum roff_tok	 t;
2377 
2378 	rr = 0;  /* If arguments follow "\}", skip them. */
2379 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2380 	spos = pos;
2381 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2382 
2383 	/*
2384 	 * Handle requests and macros if the conditional evaluated
2385 	 * to true or if they are structurally required.
2386 	 * The .break request is always handled specially.
2387 	 */
2388 
2389 	if (t == ROFF_break) {
2390 		if (irc & ROFF_LOOPMASK)
2391 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2392 		else if (rr) {
2393 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2394 				bl->rule = 0;
2395 				if (bl->tok == ROFF_while)
2396 					break;
2397 			}
2398 		}
2399 	} else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2400 		irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2401 		if (irc & ROFF_WHILE)
2402 			irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2403 	}
2404 	return irc;
2405 }
2406 
2407 /*
2408  * Parse and process a text line in conditional scope.
2409  */
2410 static int
roff_cond_text(ROFF_ARGS)2411 roff_cond_text(ROFF_ARGS)
2412 {
2413 	int	 irc, rr;
2414 
2415 	rr = 1;  /* If arguments follow "\}", preserve them. */
2416 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2417 	if (rr)
2418 		irc |= ROFF_CONT;
2419 	return irc;
2420 }
2421 
2422 /* --- handling of numeric and conditional expressions -------------------- */
2423 
2424 /*
2425  * Parse a single signed decimal number.  Stop at the first non-digit.
2426  * If there is at least one digit, return success and advance the
2427  * parse point, else return failure and let the parse point unchanged.
2428  * Ignore overflows, treat them just like the C language.
2429  */
2430 static int
roff_getnum(const char * v,int * pos,int * res,char unit,int skipspace)2431 roff_getnum(const char *v, int *pos, int *res, char unit, int skipspace)
2432 {
2433 	double	 frac, myres;
2434 	int	 n, p;
2435 
2436 	p = *pos;
2437 	n = v[p] == '-';
2438 	if (n || v[p] == '+')
2439 		p++;
2440 
2441 	if (skipspace)
2442 		while (isspace((unsigned char)v[p]))
2443 			p++;
2444 
2445 	for (myres = 0.0; isdigit((unsigned char)v[p]); p++)
2446 		myres = myres * 10.0 + (v[p] - '0');
2447 	if (v[p] == '.')
2448 		for (frac = 0.1; isdigit((unsigned char)v[++p]); frac *= 0.1)
2449 			myres += frac * (v[p] - '0');
2450 
2451 	if (p == *pos + n)
2452 		return 0;
2453 
2454 	if (n)
2455 		myres *= -1.0;
2456 
2457 	/* Each number may be followed by one optional scaling unit. */
2458 
2459 	if (v[p] != '\0' && strchr("ficvPmnpuM", v[p]) != NULL) {
2460 		if (unit != '\0')
2461 			unit = v[p];
2462 		p++;
2463 	}
2464 
2465 	switch (unit) {
2466 	case 'f':
2467 		myres *= 65536.0;
2468 		break;
2469 	case 'i':
2470 		myres *= 240.0;
2471 		break;
2472 	case 'c':
2473 		myres *= 24000.0;
2474 		myres /= 254.0;
2475 		break;
2476 	case 'v':
2477 	case 'P':
2478 		myres *= 40.0;
2479 		break;
2480 	case 'm':
2481 	case 'n':
2482 		myres *= 24.0;
2483 		break;
2484 	case 'p':
2485 		myres *= 40.0;
2486 		myres /= 12.0;
2487 		break;
2488 	case 'u':
2489 		break;
2490 	case 'M':
2491 		myres *= 24.0;
2492 		myres /= 100.0;
2493 		break;
2494 	default:
2495 		break;
2496 	}
2497 	if (res != NULL)
2498 		*res = myres;
2499 	*pos = p;
2500 	return 1;
2501 }
2502 
2503 /*
2504  * Evaluate a string comparison condition.
2505  * The first character is the delimiter.
2506  * Succeed if the string up to its second occurrence
2507  * matches the string up to its third occurrence.
2508  * Advance the cursor after the third occurrence
2509  * or lacking that, to the end of the line.
2510  */
2511 static int
roff_evalstrcond(const char * v,int * pos)2512 roff_evalstrcond(const char *v, int *pos)
2513 {
2514 	const char	*s1, *s2, *s3;
2515 	int		 match;
2516 
2517 	match = 0;
2518 	s1 = v + *pos;		/* initial delimiter */
2519 	s2 = s1 + 1;		/* for scanning the first string */
2520 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2521 
2522 	if (NULL == s3)		/* found no middle delimiter */
2523 		goto out;
2524 
2525 	while ('\0' != *++s3) {
2526 		if (*s2 != *s3) {  /* mismatch */
2527 			s3 = strchr(s3, *s1);
2528 			break;
2529 		}
2530 		if (*s3 == *s1) {  /* found the final delimiter */
2531 			match = 1;
2532 			break;
2533 		}
2534 		s2++;
2535 	}
2536 
2537 out:
2538 	if (NULL == s3)
2539 		s3 = strchr(s2, '\0');
2540 	else if (*s3 != '\0')
2541 		s3++;
2542 	*pos = s3 - v;
2543 	return match;
2544 }
2545 
2546 /*
2547  * Evaluate an optionally negated single character, numerical,
2548  * or string condition.
2549  */
2550 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2551 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2552 {
2553 	const char	*start, *end;
2554 	char		*cp, *name;
2555 	size_t		 sz;
2556 	int		 deftype, len, number, savepos, istrue, wanttrue;
2557 
2558 	if ('!' == v[*pos]) {
2559 		wanttrue = 0;
2560 		(*pos)++;
2561 	} else
2562 		wanttrue = 1;
2563 
2564 	switch (v[*pos]) {
2565 	case '\0':
2566 		return 0;
2567 	case 'n':
2568 	case 'o':
2569 		(*pos)++;
2570 		return wanttrue;
2571 	case 'e':
2572 	case 't':
2573 	case 'v':
2574 		(*pos)++;
2575 		return !wanttrue;
2576 	case 'c':
2577 		do {
2578 			(*pos)++;
2579 		} while (v[*pos] == ' ');
2580 
2581 		/*
2582 		 * Quirk for groff compatibility:
2583 		 * The horizontal tab is neither available nor unavailable.
2584 		 */
2585 
2586 		if (v[*pos] == '\t') {
2587 			(*pos)++;
2588 			return 0;
2589 		}
2590 
2591 		/* Printable ASCII characters are available. */
2592 
2593 		if (v[*pos] != '\\') {
2594 			(*pos)++;
2595 			return wanttrue;
2596 		}
2597 
2598 		end = v + ++*pos;
2599 		switch (mandoc_escape(&end, &start, &len)) {
2600 		case ESCAPE_SPECIAL:
2601 			istrue = mchars_spec2cp(start, len) != -1;
2602 			break;
2603 		case ESCAPE_UNICODE:
2604 			istrue = 1;
2605 			break;
2606 		case ESCAPE_NUMBERED:
2607 			istrue = mchars_num2char(start, len) != -1;
2608 			break;
2609 		default:
2610 			istrue = !wanttrue;
2611 			break;
2612 		}
2613 		*pos = end - v;
2614 		return istrue == wanttrue;
2615 	case 'd':
2616 	case 'r':
2617 		cp = v + *pos + 1;
2618 		while (*cp == ' ')
2619 			cp++;
2620 		name = cp;
2621 		sz = roff_getname(&cp, ln, cp - v);
2622 		if (sz == 0)
2623 			istrue = 0;
2624 		else if (v[*pos] == 'r')
2625 			istrue = roff_hasregn(r, name, sz);
2626 		else {
2627 			deftype = ROFFDEF_ANY;
2628 		        roff_getstrn(r, name, sz, &deftype);
2629 			istrue = !!deftype;
2630 		}
2631 		*pos = (name + sz) - v;
2632 		return istrue == wanttrue;
2633 	default:
2634 		break;
2635 	}
2636 
2637 	savepos = *pos;
2638 	if (roff_evalnum(ln, v, pos, &number, 'u', 0))
2639 		return (number > 0) == wanttrue;
2640 	else if (*pos == savepos)
2641 		return roff_evalstrcond(v, pos) == wanttrue;
2642 	else
2643 		return 0;
2644 }
2645 
2646 static int
roff_line_ignore(ROFF_ARGS)2647 roff_line_ignore(ROFF_ARGS)
2648 {
2649 
2650 	return ROFF_IGN;
2651 }
2652 
2653 static int
roff_insec(ROFF_ARGS)2654 roff_insec(ROFF_ARGS)
2655 {
2656 
2657 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2658 	return ROFF_IGN;
2659 }
2660 
2661 static int
roff_unsupp(ROFF_ARGS)2662 roff_unsupp(ROFF_ARGS)
2663 {
2664 
2665 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2666 	return ROFF_IGN;
2667 }
2668 
2669 static int
roff_cond(ROFF_ARGS)2670 roff_cond(ROFF_ARGS)
2671 {
2672 	int	 irc;
2673 
2674 	roffnode_push(r, tok, NULL, ln, ppos);
2675 
2676 	/*
2677 	 * An `.el' has no conditional body: it will consume the value
2678 	 * of the current rstack entry set in prior `ie' calls or
2679 	 * defaults to DENY.
2680 	 *
2681 	 * If we're not an `el', however, then evaluate the conditional.
2682 	 */
2683 
2684 	r->last->rule = tok == ROFF_el ?
2685 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2686 	    roff_evalcond(r, ln, buf->buf, &pos);
2687 
2688 	/*
2689 	 * An if-else will put the NEGATION of the current evaluated
2690 	 * conditional into the stack of rules.
2691 	 */
2692 
2693 	if (tok == ROFF_ie) {
2694 		if (r->rstackpos + 1 == r->rstacksz) {
2695 			r->rstacksz += 16;
2696 			r->rstack = mandoc_reallocarray(r->rstack,
2697 			    r->rstacksz, sizeof(int));
2698 		}
2699 		r->rstack[++r->rstackpos] = !r->last->rule;
2700 	}
2701 
2702 	/* If the parent has false as its rule, then so do we. */
2703 
2704 	if (r->last->parent && !r->last->parent->rule)
2705 		r->last->rule = 0;
2706 
2707 	/*
2708 	 * Determine scope.
2709 	 * If there is nothing on the line after the conditional,
2710 	 * not even whitespace, use next-line scope.
2711 	 * Except that .while does not support next-line scope.
2712 	 */
2713 
2714 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2715 		r->last->endspan = 2;
2716 		goto out;
2717 	}
2718 
2719 	while (buf->buf[pos] == ' ')
2720 		pos++;
2721 
2722 	/* An opening brace requests multiline scope. */
2723 
2724 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2725 		r->last->endspan = -1;
2726 		pos += 2;
2727 		while (buf->buf[pos] == ' ')
2728 			pos++;
2729 		goto out;
2730 	}
2731 
2732 	/*
2733 	 * Anything else following the conditional causes
2734 	 * single-line scope.  Warn if the scope contains
2735 	 * nothing but trailing whitespace.
2736 	 */
2737 
2738 	if (buf->buf[pos] == '\0')
2739 		mandoc_msg(MANDOCERR_COND_EMPTY,
2740 		    ln, ppos, "%s", roff_name[tok]);
2741 
2742 	r->last->endspan = 1;
2743 
2744 out:
2745 	*offs = pos;
2746 	irc = ROFF_RERUN;
2747 	if (tok == ROFF_while)
2748 		irc |= ROFF_WHILE;
2749 	return irc;
2750 }
2751 
2752 static int
roff_ds(ROFF_ARGS)2753 roff_ds(ROFF_ARGS)
2754 {
2755 	char		*string;
2756 	const char	*name;
2757 	size_t		 namesz;
2758 
2759 	/* Ignore groff compatibility mode for now. */
2760 
2761 	if (tok == ROFF_ds1)
2762 		tok = ROFF_ds;
2763 	else if (tok == ROFF_as1)
2764 		tok = ROFF_as;
2765 
2766 	/*
2767 	 * The first word is the name of the string.
2768 	 * If it is empty or terminated by an escape sequence,
2769 	 * abort the `ds' request without defining anything.
2770 	 */
2771 
2772 	name = string = buf->buf + pos;
2773 	if (*name == '\0')
2774 		return ROFF_IGN;
2775 
2776 	namesz = roff_getname(&string, ln, pos);
2777 	switch (name[namesz]) {
2778 	case '\\':
2779 		return ROFF_IGN;
2780 	case '\t':
2781 		string = buf->buf + pos + namesz;
2782 		break;
2783 	default:
2784 		break;
2785 	}
2786 
2787 	/* Read past the initial double-quote, if any. */
2788 	if (*string == '"')
2789 		string++;
2790 
2791 	/* The rest is the value. */
2792 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2793 	    ROFF_as == tok);
2794 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2795 	return ROFF_IGN;
2796 }
2797 
2798 /*
2799  * Parse a single operator, one or two characters long.
2800  * If the operator is recognized, return success and advance the
2801  * parse point, else return failure and let the parse point unchanged.
2802  */
2803 static int
roff_getop(const char * v,int * pos,char * res)2804 roff_getop(const char *v, int *pos, char *res)
2805 {
2806 
2807 	*res = v[*pos];
2808 
2809 	switch (*res) {
2810 	case '+':
2811 	case '-':
2812 	case '*':
2813 	case '/':
2814 	case '%':
2815 	case '&':
2816 	case ':':
2817 		break;
2818 	case '<':
2819 		switch (v[*pos + 1]) {
2820 		case '=':
2821 			*res = 'l';
2822 			(*pos)++;
2823 			break;
2824 		case '>':
2825 			*res = '!';
2826 			(*pos)++;
2827 			break;
2828 		case '?':
2829 			*res = 'i';
2830 			(*pos)++;
2831 			break;
2832 		default:
2833 			break;
2834 		}
2835 		break;
2836 	case '>':
2837 		switch (v[*pos + 1]) {
2838 		case '=':
2839 			*res = 'g';
2840 			(*pos)++;
2841 			break;
2842 		case '?':
2843 			*res = 'a';
2844 			(*pos)++;
2845 			break;
2846 		default:
2847 			break;
2848 		}
2849 		break;
2850 	case '=':
2851 		if ('=' == v[*pos + 1])
2852 			(*pos)++;
2853 		break;
2854 	default:
2855 		return 0;
2856 	}
2857 	(*pos)++;
2858 
2859 	return *res;
2860 }
2861 
2862 /*
2863  * Evaluate either a parenthesized numeric expression
2864  * or a single signed integer number.
2865  */
2866 static int
roff_evalpar(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2867 roff_evalpar(int ln, const char *v, int *pos, int *res, char unit,
2868     int skipspace)
2869 {
2870 
2871 	if ('(' != v[*pos])
2872 		return roff_getnum(v, pos, res, unit, skipspace);
2873 
2874 	(*pos)++;
2875 	if ( ! roff_evalnum(ln, v, pos, res, unit, 1))
2876 		return 0;
2877 
2878 	/*
2879 	 * Omission of the closing parenthesis
2880 	 * is an error in validation mode,
2881 	 * but ignored in evaluation mode.
2882 	 */
2883 
2884 	if (')' == v[*pos])
2885 		(*pos)++;
2886 	else if (NULL == res)
2887 		return 0;
2888 
2889 	return 1;
2890 }
2891 
2892 /*
2893  * Evaluate a complete numeric expression.
2894  * Proceed left to right, there is no concept of precedence.
2895  */
2896 int
roff_evalnum(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2897 roff_evalnum(int ln, const char *v, int *pos, int *res, char unit,
2898     int skipspace)
2899 {
2900 	int		 mypos, operand2;
2901 	char		 operator;
2902 
2903 	if (NULL == pos) {
2904 		mypos = 0;
2905 		pos = &mypos;
2906 	}
2907 
2908 	if (skipspace)
2909 		while (isspace((unsigned char)v[*pos]))
2910 			(*pos)++;
2911 
2912 	if ( ! roff_evalpar(ln, v, pos, res, unit, skipspace))
2913 		return 0;
2914 
2915 	while (1) {
2916 		if (skipspace)
2917 			while (isspace((unsigned char)v[*pos]))
2918 				(*pos)++;
2919 
2920 		if ( ! roff_getop(v, pos, &operator))
2921 			break;
2922 
2923 		if (skipspace)
2924 			while (isspace((unsigned char)v[*pos]))
2925 				(*pos)++;
2926 
2927 		if ( ! roff_evalpar(ln, v, pos, &operand2, unit, skipspace))
2928 			return 0;
2929 
2930 		if (skipspace)
2931 			while (isspace((unsigned char)v[*pos]))
2932 				(*pos)++;
2933 
2934 		if (NULL == res)
2935 			continue;
2936 
2937 		switch (operator) {
2938 		case '+':
2939 			*res += operand2;
2940 			break;
2941 		case '-':
2942 			*res -= operand2;
2943 			break;
2944 		case '*':
2945 			*res *= operand2;
2946 			break;
2947 		case '/':
2948 			if (operand2 == 0) {
2949 				mandoc_msg(MANDOCERR_DIVZERO,
2950 					ln, *pos, "%s", v);
2951 				*res = 0;
2952 				break;
2953 			}
2954 			*res /= operand2;
2955 			break;
2956 		case '%':
2957 			if (operand2 == 0) {
2958 				mandoc_msg(MANDOCERR_DIVZERO,
2959 					ln, *pos, "%s", v);
2960 				*res = 0;
2961 				break;
2962 			}
2963 			*res %= operand2;
2964 			break;
2965 		case '<':
2966 			*res = *res < operand2;
2967 			break;
2968 		case '>':
2969 			*res = *res > operand2;
2970 			break;
2971 		case 'l':
2972 			*res = *res <= operand2;
2973 			break;
2974 		case 'g':
2975 			*res = *res >= operand2;
2976 			break;
2977 		case '=':
2978 			*res = *res == operand2;
2979 			break;
2980 		case '!':
2981 			*res = *res != operand2;
2982 			break;
2983 		case '&':
2984 			*res = *res && operand2;
2985 			break;
2986 		case ':':
2987 			*res = *res || operand2;
2988 			break;
2989 		case 'i':
2990 			if (operand2 < *res)
2991 				*res = operand2;
2992 			break;
2993 		case 'a':
2994 			if (operand2 > *res)
2995 				*res = operand2;
2996 			break;
2997 		default:
2998 			abort();
2999 		}
3000 	}
3001 	return 1;
3002 }
3003 
3004 /* --- register management ------------------------------------------------ */
3005 
3006 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3007 roff_setreg(struct roff *r, const char *name, int val, char sign)
3008 {
3009 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3010 }
3011 
3012 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3013 roff_setregn(struct roff *r, const char *name, size_t len,
3014     int val, char sign, int step)
3015 {
3016 	struct roffreg	*reg;
3017 
3018 	/* Search for an existing register with the same name. */
3019 	reg = r->regtab;
3020 
3021 	while (reg != NULL && (reg->key.sz != len ||
3022 	    strncmp(reg->key.p, name, len) != 0))
3023 		reg = reg->next;
3024 
3025 	if (NULL == reg) {
3026 		/* Create a new register. */
3027 		reg = mandoc_malloc(sizeof(struct roffreg));
3028 		reg->key.p = mandoc_strndup(name, len);
3029 		reg->key.sz = len;
3030 		reg->val = 0;
3031 		reg->step = 0;
3032 		reg->next = r->regtab;
3033 		r->regtab = reg;
3034 	}
3035 
3036 	if ('+' == sign)
3037 		reg->val += val;
3038 	else if ('-' == sign)
3039 		reg->val -= val;
3040 	else
3041 		reg->val = val;
3042 	if (step != INT_MIN)
3043 		reg->step = step;
3044 }
3045 
3046 /*
3047  * Handle some predefined read-only number registers.
3048  * For now, return -1 if the requested register is not predefined;
3049  * in case a predefined read-only register having the value -1
3050  * were to turn up, another special value would have to be chosen.
3051  */
3052 static int
roff_getregro(const struct roff * r,const char * name)3053 roff_getregro(const struct roff *r, const char *name)
3054 {
3055 
3056 	switch (*name) {
3057 	case '$':  /* Number of arguments of the last macro evaluated. */
3058 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3059 	case 'A':  /* ASCII approximation mode is always off. */
3060 		return 0;
3061 	case 'g':  /* Groff compatibility mode is always on. */
3062 		return 1;
3063 	case 'H':  /* Fixed horizontal resolution. */
3064 		return 24;
3065 	case 'j':  /* Always adjust left margin only. */
3066 		return 0;
3067 	case 'l':  /* Fixed line width for DocBook. */
3068 		return 78 * 24;
3069 	case 'T':  /* Some output device is always defined. */
3070 		return 1;
3071 	case 'V':  /* Fixed vertical resolution. */
3072 		return 40;
3073 	default:
3074 		return -1;
3075 	}
3076 }
3077 
3078 int
roff_getreg(struct roff * r,const char * name)3079 roff_getreg(struct roff *r, const char *name)
3080 {
3081 	return roff_getregn(r, name, strlen(name), '\0');
3082 }
3083 
3084 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3085 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3086 {
3087 	struct roffreg	*reg;
3088 	int		 val;
3089 
3090 	if ('.' == name[0] && 2 == len) {
3091 		val = roff_getregro(r, name + 1);
3092 		if (-1 != val)
3093 			return val;
3094 	}
3095 
3096 	for (reg = r->regtab; reg; reg = reg->next) {
3097 		if (len == reg->key.sz &&
3098 		    0 == strncmp(name, reg->key.p, len)) {
3099 			switch (sign) {
3100 			case '+':
3101 				reg->val += reg->step;
3102 				break;
3103 			case '-':
3104 				reg->val -= reg->step;
3105 				break;
3106 			default:
3107 				break;
3108 			}
3109 			return reg->val;
3110 		}
3111 	}
3112 
3113 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3114 	return 0;
3115 }
3116 
3117 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3118 roff_hasregn(const struct roff *r, const char *name, size_t len)
3119 {
3120 	struct roffreg	*reg;
3121 	int		 val;
3122 
3123 	if ('.' == name[0] && 2 == len) {
3124 		val = roff_getregro(r, name + 1);
3125 		if (-1 != val)
3126 			return 1;
3127 	}
3128 
3129 	for (reg = r->regtab; reg; reg = reg->next)
3130 		if (len == reg->key.sz &&
3131 		    0 == strncmp(name, reg->key.p, len))
3132 			return 1;
3133 
3134 	return 0;
3135 }
3136 
3137 static void
roff_freereg(struct roffreg * reg)3138 roff_freereg(struct roffreg *reg)
3139 {
3140 	struct roffreg	*old_reg;
3141 
3142 	while (NULL != reg) {
3143 		free(reg->key.p);
3144 		old_reg = reg;
3145 		reg = reg->next;
3146 		free(old_reg);
3147 	}
3148 }
3149 
3150 static int
roff_nr(ROFF_ARGS)3151 roff_nr(ROFF_ARGS)
3152 {
3153 	char		*key, *val, *step;
3154 	size_t		 keysz;
3155 	int		 iv, is, len;
3156 	char		 sign;
3157 
3158 	key = val = buf->buf + pos;
3159 	if (*key == '\0')
3160 		return ROFF_IGN;
3161 
3162 	keysz = roff_getname(&val, ln, pos);
3163 	if (key[keysz] == '\\' || key[keysz] == '\t')
3164 		return ROFF_IGN;
3165 
3166 	sign = *val;
3167 	if (sign == '+' || sign == '-')
3168 		val++;
3169 
3170 	len = 0;
3171 	if (roff_evalnum(ln, val, &len, &iv, 'u', 0) == 0)
3172 		return ROFF_IGN;
3173 
3174 	step = val + len;
3175 	while (isspace((unsigned char)*step))
3176 		step++;
3177 	if (roff_evalnum(ln, step, NULL, &is, '\0', 0) == 0)
3178 		is = INT_MIN;
3179 
3180 	roff_setregn(r, key, keysz, iv, sign, is);
3181 	return ROFF_IGN;
3182 }
3183 
3184 static int
roff_rr(ROFF_ARGS)3185 roff_rr(ROFF_ARGS)
3186 {
3187 	struct roffreg	*reg, **prev;
3188 	char		*name, *cp;
3189 	size_t		 namesz;
3190 
3191 	name = cp = buf->buf + pos;
3192 	if (*name == '\0')
3193 		return ROFF_IGN;
3194 	namesz = roff_getname(&cp, ln, pos);
3195 	name[namesz] = '\0';
3196 
3197 	prev = &r->regtab;
3198 	while (1) {
3199 		reg = *prev;
3200 		if (reg == NULL || !strcmp(name, reg->key.p))
3201 			break;
3202 		prev = &reg->next;
3203 	}
3204 	if (reg != NULL) {
3205 		*prev = reg->next;
3206 		free(reg->key.p);
3207 		free(reg);
3208 	}
3209 	return ROFF_IGN;
3210 }
3211 
3212 /* --- handler functions for roff requests -------------------------------- */
3213 
3214 static int
roff_rm(ROFF_ARGS)3215 roff_rm(ROFF_ARGS)
3216 {
3217 	const char	 *name;
3218 	char		 *cp;
3219 	size_t		  namesz;
3220 
3221 	cp = buf->buf + pos;
3222 	while (*cp != '\0') {
3223 		name = cp;
3224 		namesz = roff_getname(&cp, ln, (int)(cp - buf->buf));
3225 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3226 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3227 		if (name[namesz] == '\\' || name[namesz] == '\t')
3228 			break;
3229 	}
3230 	return ROFF_IGN;
3231 }
3232 
3233 static int
roff_it(ROFF_ARGS)3234 roff_it(ROFF_ARGS)
3235 {
3236 	int		 iv;
3237 
3238 	/* Parse the number of lines. */
3239 
3240 	if ( ! roff_evalnum(ln, buf->buf, &pos, &iv, '\0', 0)) {
3241 		mandoc_msg(MANDOCERR_IT_NONUM,
3242 		    ln, ppos, "%s", buf->buf + 1);
3243 		return ROFF_IGN;
3244 	}
3245 
3246 	while (isspace((unsigned char)buf->buf[pos]))
3247 		pos++;
3248 
3249 	/*
3250 	 * Arm the input line trap.
3251 	 * Special-casing "an-trap" is an ugly workaround to cope
3252 	 * with DocBook stupidly fiddling with man(7) internals.
3253 	 */
3254 
3255 	roffit_lines = iv;
3256 	roffit_macro = mandoc_strdup(iv != 1 ||
3257 	    strcmp(buf->buf + pos, "an-trap") ?
3258 	    buf->buf + pos : "br");
3259 	return ROFF_IGN;
3260 }
3261 
3262 static int
roff_Dd(ROFF_ARGS)3263 roff_Dd(ROFF_ARGS)
3264 {
3265 	int		 mask;
3266 	enum roff_tok	 t, te;
3267 
3268 	switch (tok) {
3269 	case ROFF_Dd:
3270 		tok = MDOC_Dd;
3271 		te = MDOC_MAX;
3272 		if (r->format == 0)
3273 			r->format = MPARSE_MDOC;
3274 		mask = MPARSE_MDOC | MPARSE_QUICK;
3275 		break;
3276 	case ROFF_TH:
3277 		tok = MAN_TH;
3278 		te = MAN_MAX;
3279 		if (r->format == 0)
3280 			r->format = MPARSE_MAN;
3281 		mask = MPARSE_QUICK;
3282 		break;
3283 	default:
3284 		abort();
3285 	}
3286 	if ((r->options & mask) == 0)
3287 		for (t = tok; t < te; t++)
3288 			roff_setstr(r, roff_name[t], NULL, 0);
3289 	return ROFF_CONT;
3290 }
3291 
3292 static int
roff_TE(ROFF_ARGS)3293 roff_TE(ROFF_ARGS)
3294 {
3295 	r->man->flags &= ~ROFF_NONOFILL;
3296 	if (r->tbl == NULL) {
3297 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3298 		return ROFF_IGN;
3299 	}
3300 	if (tbl_end(r->tbl, 0) == 0) {
3301 		r->tbl = NULL;
3302 		free(buf->buf);
3303 		buf->buf = mandoc_strdup(".sp");
3304 		buf->sz = 4;
3305 		*offs = 0;
3306 		return ROFF_REPARSE;
3307 	}
3308 	r->tbl = NULL;
3309 	return ROFF_IGN;
3310 }
3311 
3312 static int
roff_T_(ROFF_ARGS)3313 roff_T_(ROFF_ARGS)
3314 {
3315 
3316 	if (NULL == r->tbl)
3317 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3318 	else
3319 		tbl_restart(ln, ppos, r->tbl);
3320 
3321 	return ROFF_IGN;
3322 }
3323 
3324 /*
3325  * Handle in-line equation delimiters.
3326  */
3327 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3328 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3329 {
3330 	char		*cp1, *cp2;
3331 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3332 
3333 	/*
3334 	 * Outside equations, look for an opening delimiter.
3335 	 * If we are inside an equation, we already know it is
3336 	 * in-line, or this function wouldn't have been called;
3337 	 * so look for a closing delimiter.
3338 	 */
3339 
3340 	cp1 = buf->buf + pos;
3341 	cp2 = strchr(cp1, r->eqn == NULL ?
3342 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3343 	if (cp2 == NULL)
3344 		return ROFF_CONT;
3345 
3346 	*cp2++ = '\0';
3347 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3348 
3349 	/* Handle preceding text, protecting whitespace. */
3350 
3351 	if (*buf->buf != '\0') {
3352 		if (r->eqn == NULL)
3353 			bef_pr = "\\&";
3354 		bef_nl = "\n";
3355 	}
3356 
3357 	/*
3358 	 * Prepare replacing the delimiter with an equation macro
3359 	 * and drop leading white space from the equation.
3360 	 */
3361 
3362 	if (r->eqn == NULL) {
3363 		while (*cp2 == ' ')
3364 			cp2++;
3365 		mac = ".EQ";
3366 	} else
3367 		mac = ".EN";
3368 
3369 	/* Handle following text, protecting whitespace. */
3370 
3371 	if (*cp2 != '\0') {
3372 		aft_nl = "\n";
3373 		if (r->eqn != NULL)
3374 			aft_pr = "\\&";
3375 	}
3376 
3377 	/* Do the actual replacement. */
3378 
3379 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3380 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3381 	free(buf->buf);
3382 	buf->buf = cp1;
3383 
3384 	/* Toggle the in-line state of the eqn subsystem. */
3385 
3386 	r->eqn_inline = r->eqn == NULL;
3387 	return ROFF_REPARSE;
3388 }
3389 
3390 static int
roff_EQ(ROFF_ARGS)3391 roff_EQ(ROFF_ARGS)
3392 {
3393 	struct roff_node	*n;
3394 
3395 	if (r->man->meta.macroset == MACROSET_MAN)
3396 		man_breakscope(r->man, ROFF_EQ);
3397 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3398 	if (ln > r->man->last->line)
3399 		n->flags |= NODE_LINE;
3400 	n->eqn = eqn_box_new();
3401 	roff_node_append(r->man, n);
3402 	r->man->next = ROFF_NEXT_SIBLING;
3403 
3404 	assert(r->eqn == NULL);
3405 	if (r->last_eqn == NULL)
3406 		r->last_eqn = eqn_alloc();
3407 	else
3408 		eqn_reset(r->last_eqn);
3409 	r->eqn = r->last_eqn;
3410 	r->eqn->node = n;
3411 
3412 	if (buf->buf[pos] != '\0')
3413 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3414 		    ".EQ %s", buf->buf + pos);
3415 
3416 	return ROFF_IGN;
3417 }
3418 
3419 static int
roff_EN(ROFF_ARGS)3420 roff_EN(ROFF_ARGS)
3421 {
3422 	if (r->eqn != NULL) {
3423 		eqn_parse(r->eqn);
3424 		r->eqn = NULL;
3425 	} else
3426 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3427 	if (buf->buf[pos] != '\0')
3428 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3429 		    "EN %s", buf->buf + pos);
3430 	return ROFF_IGN;
3431 }
3432 
3433 static int
roff_TS(ROFF_ARGS)3434 roff_TS(ROFF_ARGS)
3435 {
3436 	if (r->tbl != NULL) {
3437 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3438 		tbl_end(r->tbl, 0);
3439 	}
3440 	r->man->flags |= ROFF_NONOFILL;
3441 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3442 	if (r->last_tbl == NULL)
3443 		r->first_tbl = r->tbl;
3444 	r->last_tbl = r->tbl;
3445 	return ROFF_IGN;
3446 }
3447 
3448 static int
roff_noarg(ROFF_ARGS)3449 roff_noarg(ROFF_ARGS)
3450 {
3451 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3452 		man_breakscope(r->man, tok);
3453 	if (tok == ROFF_brp)
3454 		tok = ROFF_br;
3455 	roff_elem_alloc(r->man, ln, ppos, tok);
3456 	if (buf->buf[pos] != '\0')
3457 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3458 		   "%s %s", roff_name[tok], buf->buf + pos);
3459 	if (tok == ROFF_nf)
3460 		r->man->flags |= ROFF_NOFILL;
3461 	else if (tok == ROFF_fi)
3462 		r->man->flags &= ~ROFF_NOFILL;
3463 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3464 	r->man->next = ROFF_NEXT_SIBLING;
3465 	return ROFF_IGN;
3466 }
3467 
3468 static int
roff_onearg(ROFF_ARGS)3469 roff_onearg(ROFF_ARGS)
3470 {
3471 	struct roff_node	*n;
3472 	char			*cp;
3473 	int			 npos;
3474 
3475 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3476 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3477 	     tok == ROFF_ti))
3478 		man_breakscope(r->man, tok);
3479 
3480 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3481 		r->man->last = roffce_node;
3482 		r->man->next = ROFF_NEXT_SIBLING;
3483 	}
3484 
3485 	roff_elem_alloc(r->man, ln, ppos, tok);
3486 	n = r->man->last;
3487 
3488 	cp = buf->buf + pos;
3489 	if (*cp != '\0') {
3490 		while (*cp != '\0' && *cp != ' ')
3491 			cp++;
3492 		while (*cp == ' ')
3493 			*cp++ = '\0';
3494 		if (*cp != '\0')
3495 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3496 			    ln, (int)(cp - buf->buf),
3497 			    "%s ... %s", roff_name[tok], cp);
3498 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3499 	}
3500 
3501 	if (tok == ROFF_ce || tok == ROFF_rj) {
3502 		if (r->man->last->type == ROFFT_ELEM) {
3503 			roff_word_alloc(r->man, ln, pos, "1");
3504 			r->man->last->flags |= NODE_NOSRC;
3505 		}
3506 		npos = 0;
3507 		if (roff_evalnum(ln, r->man->last->string, &npos,
3508 		    &roffce_lines, '\0', 0) == 0) {
3509 			mandoc_msg(MANDOCERR_CE_NONUM,
3510 			    ln, pos, "ce %s", buf->buf + pos);
3511 			roffce_lines = 1;
3512 		}
3513 		if (roffce_lines < 1) {
3514 			r->man->last = r->man->last->parent;
3515 			roffce_node = NULL;
3516 			roffce_lines = 0;
3517 		} else
3518 			roffce_node = r->man->last->parent;
3519 	} else {
3520 		n->flags |= NODE_VALID | NODE_ENDED;
3521 		r->man->last = n;
3522 	}
3523 	n->flags |= NODE_LINE;
3524 	r->man->next = ROFF_NEXT_SIBLING;
3525 	return ROFF_IGN;
3526 }
3527 
3528 static int
roff_manyarg(ROFF_ARGS)3529 roff_manyarg(ROFF_ARGS)
3530 {
3531 	struct roff_node	*n;
3532 	char			*sp, *ep;
3533 
3534 	roff_elem_alloc(r->man, ln, ppos, tok);
3535 	n = r->man->last;
3536 
3537 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3538 		while (*ep != '\0' && *ep != ' ')
3539 			ep++;
3540 		while (*ep == ' ')
3541 			*ep++ = '\0';
3542 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3543 	}
3544 
3545 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3546 	r->man->last = n;
3547 	r->man->next = ROFF_NEXT_SIBLING;
3548 	return ROFF_IGN;
3549 }
3550 
3551 static int
roff_als(ROFF_ARGS)3552 roff_als(ROFF_ARGS)
3553 {
3554 	char		*oldn, *newn, *end, *value;
3555 	size_t		 oldsz, newsz, valsz;
3556 
3557 	newn = oldn = buf->buf + pos;
3558 	if (*newn == '\0')
3559 		return ROFF_IGN;
3560 
3561 	newsz = roff_getname(&oldn, ln, pos);
3562 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3563 		return ROFF_IGN;
3564 
3565 	end = oldn;
3566 	oldsz = roff_getname(&end, ln, oldn - buf->buf);
3567 	if (oldsz == 0)
3568 		return ROFF_IGN;
3569 
3570 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3571 	    (int)oldsz, oldn);
3572 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3573 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3574 	free(value);
3575 	return ROFF_IGN;
3576 }
3577 
3578 /*
3579  * The .break request only makes sense inside conditionals,
3580  * and that case is already handled in roff_cond_sub().
3581  */
3582 static int
roff_break(ROFF_ARGS)3583 roff_break(ROFF_ARGS)
3584 {
3585 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3586 	return ROFF_IGN;
3587 }
3588 
3589 static int
roff_cc(ROFF_ARGS)3590 roff_cc(ROFF_ARGS)
3591 {
3592 	const char	*p;
3593 
3594 	p = buf->buf + pos;
3595 
3596 	if (*p == '\0' || (r->control = *p++) == '.')
3597 		r->control = '\0';
3598 
3599 	if (*p != '\0')
3600 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3601 		    ln, p - buf->buf, "cc ... %s", p);
3602 
3603 	return ROFF_IGN;
3604 }
3605 
3606 static int
roff_char(ROFF_ARGS)3607 roff_char(ROFF_ARGS)
3608 {
3609 	const char	*p, *kp, *vp;
3610 	size_t		 ksz, vsz;
3611 	int		 font;
3612 
3613 	/* Parse the character to be replaced. */
3614 
3615 	kp = buf->buf + pos;
3616 	p = kp + 1;
3617 	if (*kp == '\0' || (*kp == '\\' &&
3618 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3619 	    (*p != ' ' && *p != '\0')) {
3620 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3621 		return ROFF_IGN;
3622 	}
3623 	ksz = p - kp;
3624 	while (*p == ' ')
3625 		p++;
3626 
3627 	/*
3628 	 * If the replacement string contains a font escape sequence,
3629 	 * we have to restore the font at the end.
3630 	 */
3631 
3632 	vp = p;
3633 	vsz = strlen(p);
3634 	font = 0;
3635 	while (*p != '\0') {
3636 		if (*p++ != '\\')
3637 			continue;
3638 		switch (mandoc_escape(&p, NULL, NULL)) {
3639 		case ESCAPE_FONT:
3640 		case ESCAPE_FONTROMAN:
3641 		case ESCAPE_FONTITALIC:
3642 		case ESCAPE_FONTBOLD:
3643 		case ESCAPE_FONTBI:
3644 		case ESCAPE_FONTCR:
3645 		case ESCAPE_FONTCB:
3646 		case ESCAPE_FONTCI:
3647 		case ESCAPE_FONTPREV:
3648 			font++;
3649 			break;
3650 		default:
3651 			break;
3652 		}
3653 	}
3654 	if (font > 1)
3655 		mandoc_msg(MANDOCERR_CHAR_FONT,
3656 		    ln, (int)(vp - buf->buf), "%s", vp);
3657 
3658 	/*
3659 	 * Approximate the effect of .char using the .tr tables.
3660 	 * XXX In groff, .char and .tr interact differently.
3661 	 */
3662 
3663 	if (ksz == 1) {
3664 		if (r->xtab == NULL)
3665 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3666 		assert((unsigned int)*kp < 128);
3667 		free(r->xtab[(int)*kp].p);
3668 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3669 		    "%s%s", vp, font ? "\fP" : "");
3670 	} else {
3671 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3672 		if (font)
3673 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3674 	}
3675 	return ROFF_IGN;
3676 }
3677 
3678 static int
roff_ec(ROFF_ARGS)3679 roff_ec(ROFF_ARGS)
3680 {
3681 	const char	*p;
3682 
3683 	p = buf->buf + pos;
3684 	if (*p == '\0')
3685 		r->escape = '\\';
3686 	else {
3687 		r->escape = *p;
3688 		if (*++p != '\0')
3689 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3690 			    (int)(p - buf->buf), "ec ... %s", p);
3691 	}
3692 	return ROFF_IGN;
3693 }
3694 
3695 static int
roff_eo(ROFF_ARGS)3696 roff_eo(ROFF_ARGS)
3697 {
3698 	r->escape = '\0';
3699 	if (buf->buf[pos] != '\0')
3700 		mandoc_msg(MANDOCERR_ARG_SKIP,
3701 		    ln, pos, "eo %s", buf->buf + pos);
3702 	return ROFF_IGN;
3703 }
3704 
3705 static int
roff_mc(ROFF_ARGS)3706 roff_mc(ROFF_ARGS)
3707 {
3708 	struct roff_node	*n;
3709 	char			*cp;
3710 
3711 	/* Parse the first argument. */
3712 
3713 	cp = buf->buf + pos;
3714 	if (*cp != '\0')
3715 		cp++;
3716 	if (buf->buf[pos] == '\\') {
3717 		switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3718 		case ESCAPE_SPECIAL:
3719 		case ESCAPE_UNICODE:
3720 		case ESCAPE_NUMBERED:
3721 			break;
3722 		default:
3723 			*cp = '\0';
3724 			mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3725 			    "mc %s", buf->buf + pos);
3726 			buf->buf[pos] = '\0';
3727 			break;
3728 		}
3729 	}
3730 
3731 	/* Ignore additional arguments. */
3732 
3733 	while (*cp == ' ')
3734 		*cp++ = '\0';
3735 	if (*cp != '\0') {
3736 		mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3737 		    "mc ... %s", cp);
3738 		*cp = '\0';
3739 	}
3740 
3741 	/* Create the .mc node. */
3742 
3743 	roff_elem_alloc(r->man, ln, ppos, tok);
3744 	n = r->man->last;
3745 	if (buf->buf[pos] != '\0')
3746 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3747 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3748 	r->man->last = n;
3749 	r->man->next = ROFF_NEXT_SIBLING;
3750 	return ROFF_IGN;
3751 }
3752 
3753 static int
roff_nop(ROFF_ARGS)3754 roff_nop(ROFF_ARGS)
3755 {
3756 	while (buf->buf[pos] == ' ')
3757 		pos++;
3758 	*offs = pos;
3759 	return ROFF_RERUN;
3760 }
3761 
3762 static int
roff_tr(ROFF_ARGS)3763 roff_tr(ROFF_ARGS)
3764 {
3765 	const char	*p, *first, *second;
3766 	size_t		 fsz, ssz;
3767 
3768 	p = buf->buf + pos;
3769 
3770 	if (*p == '\0') {
3771 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3772 		return ROFF_IGN;
3773 	}
3774 
3775 	while (*p != '\0') {
3776 		fsz = ssz = 1;
3777 
3778 		first = p++;
3779 		if (*first == '\\') {
3780 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3781 				return ROFF_IGN;
3782 			fsz = (size_t)(p - first);
3783 		}
3784 
3785 		second = p++;
3786 		if (*second == '\\') {
3787 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3788 				return ROFF_IGN;
3789 			ssz = (size_t)(p - second);
3790 		} else if (*second == '\0') {
3791 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3792 			    (int)(first - buf->buf), "tr %s", first);
3793 			second = " ";
3794 			p--;
3795 		}
3796 
3797 		if (fsz > 1) {
3798 			roff_setstrn(&r->xmbtab, first, fsz,
3799 			    second, ssz, 0);
3800 			continue;
3801 		}
3802 
3803 		if (r->xtab == NULL)
3804 			r->xtab = mandoc_calloc(128,
3805 			    sizeof(struct roffstr));
3806 
3807 		free(r->xtab[(int)*first].p);
3808 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3809 		r->xtab[(int)*first].sz = ssz;
3810 	}
3811 
3812 	return ROFF_IGN;
3813 }
3814 
3815 /*
3816  * Implementation of the .return request.
3817  * There is no need to call roff_userret() from here.
3818  * The read module will call that after rewinding the reader stack
3819  * to the place from where the current macro was called.
3820  */
3821 static int
roff_return(ROFF_ARGS)3822 roff_return(ROFF_ARGS)
3823 {
3824 	if (r->mstackpos >= 0)
3825 		return ROFF_IGN | ROFF_USERRET;
3826 
3827 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3828 	return ROFF_IGN;
3829 }
3830 
3831 static int
roff_rn(ROFF_ARGS)3832 roff_rn(ROFF_ARGS)
3833 {
3834 	const char	*value;
3835 	char		*oldn, *newn, *end;
3836 	size_t		 oldsz, newsz;
3837 	int		 deftype;
3838 
3839 	oldn = newn = buf->buf + pos;
3840 	if (*oldn == '\0')
3841 		return ROFF_IGN;
3842 
3843 	oldsz = roff_getname(&newn, ln, pos);
3844 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3845 		return ROFF_IGN;
3846 
3847 	end = newn;
3848 	newsz = roff_getname(&end, ln, newn - buf->buf);
3849 	if (newsz == 0)
3850 		return ROFF_IGN;
3851 
3852 	deftype = ROFFDEF_ANY;
3853 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3854 	switch (deftype) {
3855 	case ROFFDEF_USER:
3856 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3857 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3858 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3859 		break;
3860 	case ROFFDEF_PRE:
3861 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3862 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3863 		break;
3864 	case ROFFDEF_REN:
3865 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3866 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3867 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3868 		break;
3869 	case ROFFDEF_STD:
3870 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3871 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3872 		break;
3873 	default:
3874 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3875 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3876 		break;
3877 	}
3878 	return ROFF_IGN;
3879 }
3880 
3881 static int
roff_shift(ROFF_ARGS)3882 roff_shift(ROFF_ARGS)
3883 {
3884 	struct mctx	*ctx;
3885 	int		 argpos, levels, i;
3886 
3887 	argpos = pos;
3888 	levels = 1;
3889 	if (buf->buf[pos] != '\0' &&
3890 	    roff_evalnum(ln, buf->buf, &pos, &levels, '\0', 0) == 0) {
3891 		mandoc_msg(MANDOCERR_CE_NONUM,
3892 		    ln, pos, "shift %s", buf->buf + pos);
3893 		levels = 1;
3894 	}
3895 	if (r->mstackpos < 0) {
3896 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3897 		return ROFF_IGN;
3898 	}
3899 	ctx = r->mstack + r->mstackpos;
3900 	if (levels > ctx->argc) {
3901 		mandoc_msg(MANDOCERR_SHIFT,
3902 		    ln, argpos, "%d, but max is %d", levels, ctx->argc);
3903 		levels = ctx->argc;
3904 	}
3905 	if (levels < 0) {
3906 		mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3907 		levels = 0;
3908 	}
3909 	if (levels == 0)
3910 		return ROFF_IGN;
3911 	for (i = 0; i < levels; i++)
3912 		free(ctx->argv[i]);
3913 	ctx->argc -= levels;
3914 	for (i = 0; i < ctx->argc; i++)
3915 		ctx->argv[i] = ctx->argv[i + levels];
3916 	return ROFF_IGN;
3917 }
3918 
3919 static int
roff_so(ROFF_ARGS)3920 roff_so(ROFF_ARGS)
3921 {
3922 	char *name, *cp;
3923 
3924 	name = buf->buf + pos;
3925 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3926 
3927 	/*
3928 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3929 	 * opening anything that's not in our cwd or anything beneath
3930 	 * it.  Thus, explicitly disallow traversing up the file-system
3931 	 * or using absolute paths.
3932 	 */
3933 
3934 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3935 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3936 		buf->sz = mandoc_asprintf(&cp,
3937 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3938 		free(buf->buf);
3939 		buf->buf = cp;
3940 		*offs = 0;
3941 		return ROFF_REPARSE;
3942 	}
3943 
3944 	*offs = pos;
3945 	return ROFF_SO;
3946 }
3947 
3948 /* --- user defined strings and macros ------------------------------------ */
3949 
3950 static int
roff_userdef(ROFF_ARGS)3951 roff_userdef(ROFF_ARGS)
3952 {
3953 	struct mctx	 *ctx;
3954 	char		 *arg, *ap, *dst, *src;
3955 	size_t		  sz;
3956 
3957 	/* If the macro is empty, ignore it altogether. */
3958 
3959 	if (*r->current_string == '\0')
3960 		return ROFF_IGN;
3961 
3962 	/* Initialize a new macro stack context. */
3963 
3964 	if (++r->mstackpos == r->mstacksz) {
3965 		r->mstack = mandoc_recallocarray(r->mstack,
3966 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3967 		r->mstacksz += 8;
3968 	}
3969 	ctx = r->mstack + r->mstackpos;
3970 	ctx->argc = 0;
3971 
3972 	/*
3973 	 * Collect pointers to macro argument strings,
3974 	 * NUL-terminating them and escaping quotes.
3975 	 */
3976 
3977 	src = buf->buf + pos;
3978 	while (*src != '\0') {
3979 		if (ctx->argc == ctx->argsz) {
3980 			ctx->argsz += 8;
3981 			ctx->argv = mandoc_reallocarray(ctx->argv,
3982 			    ctx->argsz, sizeof(*ctx->argv));
3983 		}
3984 		arg = roff_getarg(r, &src, ln, &pos);
3985 		sz = 1;  /* For the terminating NUL. */
3986 		for (ap = arg; *ap != '\0'; ap++)
3987 			sz += *ap == '"' ? 4 : 1;
3988 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3989 		for (ap = arg; *ap != '\0'; ap++) {
3990 			if (*ap == '"') {
3991 				memcpy(dst, "\\(dq", 4);
3992 				dst += 4;
3993 			} else
3994 				*dst++ = *ap;
3995 		}
3996 		*dst = '\0';
3997 		free(arg);
3998 	}
3999 
4000 	/* Replace the macro invocation by the macro definition. */
4001 
4002 	free(buf->buf);
4003 	buf->buf = mandoc_strdup(r->current_string);
4004 	buf->sz = strlen(buf->buf) + 1;
4005 	*offs = 0;
4006 
4007 	return buf->buf[buf->sz - 2] == '\n' ?
4008 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4009 }
4010 
4011 /*
4012  * Calling a high-level macro that was renamed with .rn.
4013  * r->current_string has already been set up by roff_parse().
4014  */
4015 static int
roff_renamed(ROFF_ARGS)4016 roff_renamed(ROFF_ARGS)
4017 {
4018 	char	*nbuf;
4019 
4020 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4021 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4022 	free(buf->buf);
4023 	buf->buf = nbuf;
4024 	*offs = 0;
4025 	return ROFF_CONT;
4026 }
4027 
4028 /*
4029  * Measure the length in bytes of the roff identifier at *cpp
4030  * and advance the pointer to the next word.
4031  */
4032 static size_t
roff_getname(char ** cpp,int ln,int pos)4033 roff_getname(char **cpp, int ln, int pos)
4034 {
4035 	char	 *name, *cp;
4036 	int	  namesz, inam, iend;
4037 
4038 	name = *cpp;
4039 	if (*name == '\0')
4040 		return 0;
4041 
4042 	/* Advance cp to the byte after the end of the name. */
4043 
4044 	cp = name;
4045 	namesz = 0;
4046 	for (;;) {
4047 		if (*cp == '\0')
4048 			break;
4049 		if (*cp == ' ' || *cp == '\t') {
4050 			cp++;
4051 			break;
4052 		}
4053 		if (*cp != '\\') {
4054 			if (name + namesz < cp) {
4055 				name[namesz] = *cp;
4056 				*cp = ' ';
4057 			}
4058 			namesz++;
4059 			cp++;
4060 			continue;
4061 		}
4062 		if (cp[1] == '{' || cp[1] == '}')
4063 			break;
4064 		if (roff_escape(cp, 0, 0, NULL, &inam,
4065 		    NULL, NULL, &iend) != ESCAPE_UNDEF) {
4066 			mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4067 			    "%.*s%.*s", namesz, name, iend, cp);
4068 			cp += iend;
4069 			break;
4070 		}
4071 
4072 		/*
4073 		 * In an identifier, \\, \., \G and so on
4074 		 * are reduced to \, ., G and so on,
4075 		 * vaguely similar to copy mode.
4076 		 */
4077 
4078 		name[namesz++] = cp[inam];
4079 		while (iend--) {
4080 			if (cp >= name + namesz)
4081 				*cp = ' ';
4082 			cp++;
4083 		}
4084 	}
4085 
4086 	/* Read past spaces. */
4087 
4088 	while (*cp == ' ')
4089 		cp++;
4090 
4091 	*cpp = cp;
4092 	return namesz;
4093 }
4094 
4095 /*
4096  * Store *string into the user-defined string called *name.
4097  * To clear an existing entry, call with (*r, *name, NULL, 0).
4098  * append == 0: replace mode
4099  * append == 1: single-line append mode
4100  * append == 2: multiline append mode, append '\n' after each call
4101  */
4102 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4103 roff_setstr(struct roff *r, const char *name, const char *string,
4104 	int append)
4105 {
4106 	size_t	 namesz;
4107 
4108 	namesz = strlen(name);
4109 	roff_setstrn(&r->strtab, name, namesz, string,
4110 	    string ? strlen(string) : 0, append);
4111 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4112 }
4113 
4114 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4115 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4116 		const char *string, size_t stringsz, int append)
4117 {
4118 	struct roffkv	*n;
4119 	char		*c;
4120 	int		 i;
4121 	size_t		 oldch, newch;
4122 
4123 	/* Search for an existing string with the same name. */
4124 	n = *r;
4125 
4126 	while (n && (namesz != n->key.sz ||
4127 			strncmp(n->key.p, name, namesz)))
4128 		n = n->next;
4129 
4130 	if (NULL == n) {
4131 		/* Create a new string table entry. */
4132 		n = mandoc_malloc(sizeof(struct roffkv));
4133 		n->key.p = mandoc_strndup(name, namesz);
4134 		n->key.sz = namesz;
4135 		n->val.p = NULL;
4136 		n->val.sz = 0;
4137 		n->next = *r;
4138 		*r = n;
4139 	} else if (0 == append) {
4140 		free(n->val.p);
4141 		n->val.p = NULL;
4142 		n->val.sz = 0;
4143 	}
4144 
4145 	if (NULL == string)
4146 		return;
4147 
4148 	/*
4149 	 * One additional byte for the '\n' in multiline mode,
4150 	 * and one for the terminating '\0'.
4151 	 */
4152 	newch = stringsz + (1 < append ? 2u : 1u);
4153 
4154 	if (NULL == n->val.p) {
4155 		n->val.p = mandoc_malloc(newch);
4156 		*n->val.p = '\0';
4157 		oldch = 0;
4158 	} else {
4159 		oldch = n->val.sz;
4160 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4161 	}
4162 
4163 	/* Skip existing content in the destination buffer. */
4164 	c = n->val.p + (int)oldch;
4165 
4166 	/* Append new content to the destination buffer. */
4167 	i = 0;
4168 	while (i < (int)stringsz) {
4169 		/*
4170 		 * Rudimentary roff copy mode:
4171 		 * Handle escaped backslashes.
4172 		 */
4173 		if ('\\' == string[i] && '\\' == string[i + 1])
4174 			i++;
4175 		*c++ = string[i++];
4176 	}
4177 
4178 	/* Append terminating bytes. */
4179 	if (1 < append)
4180 		*c++ = '\n';
4181 
4182 	*c = '\0';
4183 	n->val.sz = (int)(c - n->val.p);
4184 }
4185 
4186 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4187 roff_getstrn(struct roff *r, const char *name, size_t len,
4188     int *deftype)
4189 {
4190 	const struct roffkv	*n;
4191 	int			 found, i;
4192 	enum roff_tok		 tok;
4193 
4194 	found = 0;
4195 	for (n = r->strtab; n != NULL; n = n->next) {
4196 		if (strncmp(name, n->key.p, len) != 0 ||
4197 		    n->key.p[len] != '\0' || n->val.p == NULL)
4198 			continue;
4199 		if (*deftype & ROFFDEF_USER) {
4200 			*deftype = ROFFDEF_USER;
4201 			return n->val.p;
4202 		} else {
4203 			found = 1;
4204 			break;
4205 		}
4206 	}
4207 	for (n = r->rentab; n != NULL; n = n->next) {
4208 		if (strncmp(name, n->key.p, len) != 0 ||
4209 		    n->key.p[len] != '\0' || n->val.p == NULL)
4210 			continue;
4211 		if (*deftype & ROFFDEF_REN) {
4212 			*deftype = ROFFDEF_REN;
4213 			return n->val.p;
4214 		} else {
4215 			found = 1;
4216 			break;
4217 		}
4218 	}
4219 	for (i = 0; i < PREDEFS_MAX; i++) {
4220 		if (strncmp(name, predefs[i].name, len) != 0 ||
4221 		    predefs[i].name[len] != '\0')
4222 			continue;
4223 		if (*deftype & ROFFDEF_PRE) {
4224 			*deftype = ROFFDEF_PRE;
4225 			return predefs[i].str;
4226 		} else {
4227 			found = 1;
4228 			break;
4229 		}
4230 	}
4231 	if (r->man->meta.macroset != MACROSET_MAN) {
4232 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4233 			if (strncmp(name, roff_name[tok], len) != 0 ||
4234 			    roff_name[tok][len] != '\0')
4235 				continue;
4236 			if (*deftype & ROFFDEF_STD) {
4237 				*deftype = ROFFDEF_STD;
4238 				return NULL;
4239 			} else {
4240 				found = 1;
4241 				break;
4242 			}
4243 		}
4244 	}
4245 	if (r->man->meta.macroset != MACROSET_MDOC) {
4246 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4247 			if (strncmp(name, roff_name[tok], len) != 0 ||
4248 			    roff_name[tok][len] != '\0')
4249 				continue;
4250 			if (*deftype & ROFFDEF_STD) {
4251 				*deftype = ROFFDEF_STD;
4252 				return NULL;
4253 			} else {
4254 				found = 1;
4255 				break;
4256 			}
4257 		}
4258 	}
4259 
4260 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4261 		if (*deftype & ROFFDEF_REN) {
4262 			/*
4263 			 * This might still be a request,
4264 			 * so do not treat it as undefined yet.
4265 			 */
4266 			*deftype = ROFFDEF_UNDEF;
4267 			return NULL;
4268 		}
4269 
4270 		/* Using an undefined string defines it to be empty. */
4271 
4272 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4273 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4274 	}
4275 
4276 	*deftype = 0;
4277 	return NULL;
4278 }
4279 
4280 static void
roff_freestr(struct roffkv * r)4281 roff_freestr(struct roffkv *r)
4282 {
4283 	struct roffkv	 *n, *nn;
4284 
4285 	for (n = r; n; n = nn) {
4286 		free(n->key.p);
4287 		free(n->val.p);
4288 		nn = n->next;
4289 		free(n);
4290 	}
4291 }
4292 
4293 /* --- accessors and utility functions ------------------------------------ */
4294 
4295 /*
4296  * Duplicate an input string, making the appropriate character
4297  * conversations (as stipulated by `tr') along the way.
4298  * Returns a heap-allocated string with all the replacements made.
4299  */
4300 char *
roff_strdup(const struct roff * r,const char * p)4301 roff_strdup(const struct roff *r, const char *p)
4302 {
4303 	const struct roffkv *cp;
4304 	char		*res;
4305 	const char	*pp;
4306 	size_t		 ssz, sz;
4307 	enum mandoc_esc	 esc;
4308 
4309 	if (NULL == r->xmbtab && NULL == r->xtab)
4310 		return mandoc_strdup(p);
4311 	else if ('\0' == *p)
4312 		return mandoc_strdup("");
4313 
4314 	/*
4315 	 * Step through each character looking for term matches
4316 	 * (remember that a `tr' can be invoked with an escape, which is
4317 	 * a glyph but the escape is multi-character).
4318 	 * We only do this if the character hash has been initialised
4319 	 * and the string is >0 length.
4320 	 */
4321 
4322 	res = NULL;
4323 	ssz = 0;
4324 
4325 	while ('\0' != *p) {
4326 		assert((unsigned int)*p < 128);
4327 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4328 			sz = r->xtab[(int)*p].sz;
4329 			res = mandoc_realloc(res, ssz + sz + 1);
4330 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4331 			ssz += sz;
4332 			p++;
4333 			continue;
4334 		} else if ('\\' != *p) {
4335 			res = mandoc_realloc(res, ssz + 2);
4336 			res[ssz++] = *p++;
4337 			continue;
4338 		}
4339 
4340 		/* Search for term matches. */
4341 		for (cp = r->xmbtab; cp; cp = cp->next)
4342 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4343 				break;
4344 
4345 		if (NULL != cp) {
4346 			/*
4347 			 * A match has been found.
4348 			 * Append the match to the array and move
4349 			 * forward by its keysize.
4350 			 */
4351 			res = mandoc_realloc(res,
4352 			    ssz + cp->val.sz + 1);
4353 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4354 			ssz += cp->val.sz;
4355 			p += (int)cp->key.sz;
4356 			continue;
4357 		}
4358 
4359 		/*
4360 		 * Handle escapes carefully: we need to copy
4361 		 * over just the escape itself, or else we might
4362 		 * do replacements within the escape itself.
4363 		 * Make sure to pass along the bogus string.
4364 		 */
4365 		pp = p++;
4366 		esc = mandoc_escape(&p, NULL, NULL);
4367 		if (ESCAPE_ERROR == esc) {
4368 			sz = strlen(pp);
4369 			res = mandoc_realloc(res, ssz + sz + 1);
4370 			memcpy(res + ssz, pp, sz);
4371 			break;
4372 		}
4373 		/*
4374 		 * We bail out on bad escapes.
4375 		 * No need to warn: we already did so when
4376 		 * roff_expand() was called.
4377 		 */
4378 		sz = (int)(p - pp);
4379 		res = mandoc_realloc(res, ssz + sz + 1);
4380 		memcpy(res + ssz, pp, sz);
4381 		ssz += sz;
4382 	}
4383 
4384 	res[(int)ssz] = '\0';
4385 	return res;
4386 }
4387 
4388 int
roff_getformat(const struct roff * r)4389 roff_getformat(const struct roff *r)
4390 {
4391 
4392 	return r->format;
4393 }
4394 
4395 /*
4396  * Find out whether a line is a macro line or not.
4397  * If it is, adjust the current position and return one; if it isn't,
4398  * return zero and don't change the current position.
4399  * If the control character has been set with `.cc', then let that grain
4400  * precedence.
4401  * This is slightly contrary to groff, where using the non-breaking
4402  * control character when `cc' has been invoked will cause the
4403  * non-breaking macro contents to be printed verbatim.
4404  */
4405 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4406 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4407 {
4408 	int		pos;
4409 
4410 	pos = *ppos;
4411 
4412 	if (r->control != '\0' && cp[pos] == r->control)
4413 		pos++;
4414 	else if (r->control != '\0')
4415 		return 0;
4416 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4417 		pos += 2;
4418 	else if ('.' == cp[pos] || '\'' == cp[pos])
4419 		pos++;
4420 	else
4421 		return 0;
4422 
4423 	while (' ' == cp[pos] || '\t' == cp[pos])
4424 		pos++;
4425 
4426 	*ppos = pos;
4427 	return 1;
4428 }
4429