xref: /titanic_50/usr/src/cmd/mandoc/roff.c (revision 260e9a87725c090ba5835b1f9f0b62fa2f96036f)
1 /*	$Id: roff.c,v 1.263 2015/02/21 14:46:58 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011, 2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33 
34 /* Maximum number of nested if-else conditionals. */
35 #define	RSTACK_MAX	128
36 
37 /* Maximum number of string expansions per line, to break infinite loops. */
38 #define	EXPAND_LIMIT	1000
39 
40 enum	rofft {
41 	ROFF_ab,
42 	ROFF_ad,
43 	ROFF_af,
44 	ROFF_aln,
45 	ROFF_als,
46 	ROFF_am,
47 	ROFF_am1,
48 	ROFF_ami,
49 	ROFF_ami1,
50 	ROFF_as,
51 	ROFF_as1,
52 	ROFF_asciify,
53 	ROFF_backtrace,
54 	ROFF_bd,
55 	ROFF_bleedat,
56 	ROFF_blm,
57 	ROFF_box,
58 	ROFF_boxa,
59 	ROFF_bp,
60 	ROFF_BP,
61 	/* MAN_br, MDOC_br */
62 	ROFF_break,
63 	ROFF_breakchar,
64 	ROFF_brnl,
65 	ROFF_brp,
66 	ROFF_brpnl,
67 	ROFF_c2,
68 	ROFF_cc,
69 	ROFF_ce,
70 	ROFF_cf,
71 	ROFF_cflags,
72 	ROFF_ch,
73 	ROFF_char,
74 	ROFF_chop,
75 	ROFF_class,
76 	ROFF_close,
77 	ROFF_CL,
78 	ROFF_color,
79 	ROFF_composite,
80 	ROFF_continue,
81 	ROFF_cp,
82 	ROFF_cropat,
83 	ROFF_cs,
84 	ROFF_cu,
85 	ROFF_da,
86 	ROFF_dch,
87 	ROFF_Dd,
88 	ROFF_de,
89 	ROFF_de1,
90 	ROFF_defcolor,
91 	ROFF_dei,
92 	ROFF_dei1,
93 	ROFF_device,
94 	ROFF_devicem,
95 	ROFF_di,
96 	ROFF_do,
97 	ROFF_ds,
98 	ROFF_ds1,
99 	ROFF_dwh,
100 	ROFF_dt,
101 	ROFF_ec,
102 	ROFF_ecr,
103 	ROFF_ecs,
104 	ROFF_el,
105 	ROFF_em,
106 	ROFF_EN,
107 	ROFF_eo,
108 	ROFF_EP,
109 	ROFF_EQ,
110 	ROFF_errprint,
111 	ROFF_ev,
112 	ROFF_evc,
113 	ROFF_ex,
114 	ROFF_fallback,
115 	ROFF_fam,
116 	ROFF_fc,
117 	ROFF_fchar,
118 	ROFF_fcolor,
119 	ROFF_fdeferlig,
120 	ROFF_feature,
121 	/* MAN_fi; ignored in mdoc(7) */
122 	ROFF_fkern,
123 	ROFF_fl,
124 	ROFF_flig,
125 	ROFF_fp,
126 	ROFF_fps,
127 	ROFF_fschar,
128 	ROFF_fspacewidth,
129 	ROFF_fspecial,
130 	/* MAN_ft; ignored in mdoc(7) */
131 	ROFF_ftr,
132 	ROFF_fzoom,
133 	ROFF_gcolor,
134 	ROFF_hc,
135 	ROFF_hcode,
136 	ROFF_hidechar,
137 	ROFF_hla,
138 	ROFF_hlm,
139 	ROFF_hpf,
140 	ROFF_hpfa,
141 	ROFF_hpfcode,
142 	ROFF_hw,
143 	ROFF_hy,
144 	ROFF_hylang,
145 	ROFF_hylen,
146 	ROFF_hym,
147 	ROFF_hypp,
148 	ROFF_hys,
149 	ROFF_ie,
150 	ROFF_if,
151 	ROFF_ig,
152 	/* MAN_in; ignored in mdoc(7) */
153 	ROFF_index,
154 	ROFF_it,
155 	ROFF_itc,
156 	ROFF_IX,
157 	ROFF_kern,
158 	ROFF_kernafter,
159 	ROFF_kernbefore,
160 	ROFF_kernpair,
161 	ROFF_lc,
162 	ROFF_lc_ctype,
163 	ROFF_lds,
164 	ROFF_length,
165 	ROFF_letadj,
166 	ROFF_lf,
167 	ROFF_lg,
168 	ROFF_lhang,
169 	ROFF_linetabs,
170 	/* MAN_ll, MDOC_ll */
171 	ROFF_lnr,
172 	ROFF_lnrf,
173 	ROFF_lpfx,
174 	ROFF_ls,
175 	ROFF_lsm,
176 	ROFF_lt,
177 	ROFF_mc,
178 	ROFF_mediasize,
179 	ROFF_minss,
180 	ROFF_mk,
181 	ROFF_mso,
182 	ROFF_na,
183 	ROFF_ne,
184 	/* MAN_nf; ignored in mdoc(7) */
185 	ROFF_nh,
186 	ROFF_nhychar,
187 	ROFF_nm,
188 	ROFF_nn,
189 	ROFF_nop,
190 	ROFF_nr,
191 	ROFF_nrf,
192 	ROFF_nroff,
193 	ROFF_ns,
194 	ROFF_nx,
195 	ROFF_open,
196 	ROFF_opena,
197 	ROFF_os,
198 	ROFF_output,
199 	ROFF_padj,
200 	ROFF_papersize,
201 	ROFF_pc,
202 	ROFF_pev,
203 	ROFF_pi,
204 	ROFF_PI,
205 	ROFF_pl,
206 	ROFF_pm,
207 	ROFF_pn,
208 	ROFF_pnr,
209 	ROFF_po,
210 	ROFF_ps,
211 	ROFF_psbb,
212 	ROFF_pshape,
213 	ROFF_pso,
214 	ROFF_ptr,
215 	ROFF_pvs,
216 	ROFF_rchar,
217 	ROFF_rd,
218 	ROFF_recursionlimit,
219 	ROFF_return,
220 	ROFF_rfschar,
221 	ROFF_rhang,
222 	ROFF_rj,
223 	ROFF_rm,
224 	ROFF_rn,
225 	ROFF_rnn,
226 	ROFF_rr,
227 	ROFF_rs,
228 	ROFF_rt,
229 	ROFF_schar,
230 	ROFF_sentchar,
231 	ROFF_shc,
232 	ROFF_shift,
233 	ROFF_sizes,
234 	ROFF_so,
235 	/* MAN_sp, MDOC_sp */
236 	ROFF_spacewidth,
237 	ROFF_special,
238 	ROFF_spreadwarn,
239 	ROFF_ss,
240 	ROFF_sty,
241 	ROFF_substring,
242 	ROFF_sv,
243 	ROFF_sy,
244 	ROFF_T_,
245 	ROFF_ta,
246 	ROFF_tc,
247 	ROFF_TE,
248 	ROFF_TH,
249 	ROFF_ti,
250 	ROFF_tkf,
251 	ROFF_tl,
252 	ROFF_tm,
253 	ROFF_tm1,
254 	ROFF_tmc,
255 	ROFF_tr,
256 	ROFF_track,
257 	ROFF_transchar,
258 	ROFF_trf,
259 	ROFF_trimat,
260 	ROFF_trin,
261 	ROFF_trnt,
262 	ROFF_troff,
263 	ROFF_TS,
264 	ROFF_uf,
265 	ROFF_ul,
266 	ROFF_unformat,
267 	ROFF_unwatch,
268 	ROFF_unwatchn,
269 	ROFF_vpt,
270 	ROFF_vs,
271 	ROFF_warn,
272 	ROFF_warnscale,
273 	ROFF_watch,
274 	ROFF_watchlength,
275 	ROFF_watchn,
276 	ROFF_wh,
277 	ROFF_while,
278 	ROFF_write,
279 	ROFF_writec,
280 	ROFF_writem,
281 	ROFF_xflag,
282 	ROFF_cblock,
283 	ROFF_USERDEF,
284 	ROFF_MAX
285 };
286 
287 /*
288  * An incredibly-simple string buffer.
289  */
290 struct	roffstr {
291 	char		*p; /* nil-terminated buffer */
292 	size_t		 sz; /* saved strlen(p) */
293 };
294 
295 /*
296  * A key-value roffstr pair as part of a singly-linked list.
297  */
298 struct	roffkv {
299 	struct roffstr	 key;
300 	struct roffstr	 val;
301 	struct roffkv	*next; /* next in list */
302 };
303 
304 /*
305  * A single number register as part of a singly-linked list.
306  */
307 struct	roffreg {
308 	struct roffstr	 key;
309 	int		 val;
310 	struct roffreg	*next;
311 };
312 
313 struct	roff {
314 	struct mparse	*parse; /* parse point */
315 	const struct mchars *mchars; /* character table */
316 	struct roffnode	*last; /* leaf of stack */
317 	int		*rstack; /* stack of inverted `ie' values */
318 	struct roffreg	*regtab; /* number registers */
319 	struct roffkv	*strtab; /* user-defined strings & macros */
320 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
321 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
322 	const char	*current_string; /* value of last called user macro */
323 	struct tbl_node	*first_tbl; /* first table parsed */
324 	struct tbl_node	*last_tbl; /* last table parsed */
325 	struct tbl_node	*tbl; /* current table being parsed */
326 	struct eqn_node	*last_eqn; /* last equation parsed */
327 	struct eqn_node	*first_eqn; /* first equation parsed */
328 	struct eqn_node	*eqn; /* current equation being parsed */
329 	int		 eqn_inline; /* current equation is inline */
330 	int		 options; /* parse options */
331 	int		 rstacksz; /* current size limit of rstack */
332 	int		 rstackpos; /* position in rstack */
333 	int		 format; /* current file in mdoc or man format */
334 	char		 control; /* control character */
335 };
336 
337 struct	roffnode {
338 	enum rofft	 tok; /* type of node */
339 	struct roffnode	*parent; /* up one in stack */
340 	int		 line; /* parse line */
341 	int		 col; /* parse col */
342 	char		*name; /* node name, e.g. macro name */
343 	char		*end; /* end-rules: custom token */
344 	int		 endspan; /* end-rules: next-line or infty */
345 	int		 rule; /* current evaluation rule */
346 };
347 
348 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
349 			 enum rofft tok, /* tok of macro */ \
350 			 struct buf *buf, /* input buffer */ \
351 			 int ln, /* parse line */ \
352 			 int ppos, /* original pos in buffer */ \
353 			 int pos, /* current pos in buffer */ \
354 			 int *offs /* reset offset of buffer data */
355 
356 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
357 
358 struct	roffmac {
359 	const char	*name; /* macro name */
360 	roffproc	 proc; /* process new macro */
361 	roffproc	 text; /* process as child text of macro */
362 	roffproc	 sub; /* process as child of macro */
363 	int		 flags;
364 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
365 	struct roffmac	*next;
366 };
367 
368 struct	predef {
369 	const char	*name; /* predefined input name */
370 	const char	*str; /* replacement symbol */
371 };
372 
373 #define	PREDEF(__name, __str) \
374 	{ (__name), (__str) },
375 
376 static	enum rofft	 roffhash_find(const char *, size_t);
377 static	void		 roffhash_init(void);
378 static	void		 roffnode_cleanscope(struct roff *);
379 static	void		 roffnode_pop(struct roff *);
380 static	void		 roffnode_push(struct roff *, enum rofft,
381 				const char *, int, int);
382 static	enum rofferr	 roff_block(ROFF_ARGS);
383 static	enum rofferr	 roff_block_text(ROFF_ARGS);
384 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
385 static	enum rofferr	 roff_brp(ROFF_ARGS);
386 static	enum rofferr	 roff_cblock(ROFF_ARGS);
387 static	enum rofferr	 roff_cc(ROFF_ARGS);
388 static	void		 roff_ccond(struct roff *, int, int);
389 static	enum rofferr	 roff_cond(ROFF_ARGS);
390 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
391 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
392 static	enum rofferr	 roff_ds(ROFF_ARGS);
393 static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
394 static	int		 roff_evalcond(struct roff *r, int,
395 				const char *, int *);
396 static	int		 roff_evalnum(struct roff *, int,
397 				const char *, int *, int *, int);
398 static	int		 roff_evalpar(struct roff *, int,
399 				const char *, int *, int *, int);
400 static	int		 roff_evalstrcond(const char *, int *);
401 static	void		 roff_free1(struct roff *);
402 static	void		 roff_freereg(struct roffreg *);
403 static	void		 roff_freestr(struct roffkv *);
404 static	size_t		 roff_getname(struct roff *, char **, int, int);
405 static	int		 roff_getnum(const char *, int *, int *, int);
406 static	int		 roff_getop(const char *, int *, char *);
407 static	int		 roff_getregn(const struct roff *,
408 				const char *, size_t);
409 static	int		 roff_getregro(const char *name);
410 static	const char	*roff_getstrn(const struct roff *,
411 				const char *, size_t);
412 static	enum rofferr	 roff_insec(ROFF_ARGS);
413 static	enum rofferr	 roff_it(ROFF_ARGS);
414 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
415 static	enum rofferr	 roff_nr(ROFF_ARGS);
416 static	enum rofft	 roff_parse(struct roff *, char *, int *,
417 				int, int);
418 static	enum rofferr	 roff_parsetext(struct buf *, int, int *);
419 static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
420 static	enum rofferr	 roff_rm(ROFF_ARGS);
421 static	enum rofferr	 roff_rr(ROFF_ARGS);
422 static	void		 roff_setstr(struct roff *,
423 				const char *, const char *, int);
424 static	void		 roff_setstrn(struct roffkv **, const char *,
425 				size_t, const char *, size_t, int);
426 static	enum rofferr	 roff_so(ROFF_ARGS);
427 static	enum rofferr	 roff_tr(ROFF_ARGS);
428 static	enum rofferr	 roff_Dd(ROFF_ARGS);
429 static	enum rofferr	 roff_TH(ROFF_ARGS);
430 static	enum rofferr	 roff_TE(ROFF_ARGS);
431 static	enum rofferr	 roff_TS(ROFF_ARGS);
432 static	enum rofferr	 roff_EQ(ROFF_ARGS);
433 static	enum rofferr	 roff_EN(ROFF_ARGS);
434 static	enum rofferr	 roff_T_(ROFF_ARGS);
435 static	enum rofferr	 roff_unsupp(ROFF_ARGS);
436 static	enum rofferr	 roff_userdef(ROFF_ARGS);
437 
438 /* See roffhash_find() */
439 
440 #define	ASCII_HI	 126
441 #define	ASCII_LO	 33
442 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
443 
444 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
445 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
446 
447 static	struct roffmac	*hash[HASHWIDTH];
448 
449 static	struct roffmac	 roffs[ROFF_MAX] = {
450 	{ "ab", roff_unsupp, NULL, NULL, 0, NULL },
451 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
452 	{ "af", roff_line_ignore, NULL, NULL, 0, NULL },
453 	{ "aln", roff_unsupp, NULL, NULL, 0, NULL },
454 	{ "als", roff_unsupp, NULL, NULL, 0, NULL },
455 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
456 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
457 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
458 	{ "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
459 	{ "as", roff_ds, NULL, NULL, 0, NULL },
460 	{ "as1", roff_ds, NULL, NULL, 0, NULL },
461 	{ "asciify", roff_unsupp, NULL, NULL, 0, NULL },
462 	{ "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
463 	{ "bd", roff_line_ignore, NULL, NULL, 0, NULL },
464 	{ "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
465 	{ "blm", roff_unsupp, NULL, NULL, 0, NULL },
466 	{ "box", roff_unsupp, NULL, NULL, 0, NULL },
467 	{ "boxa", roff_unsupp, NULL, NULL, 0, NULL },
468 	{ "bp", roff_line_ignore, NULL, NULL, 0, NULL },
469 	{ "BP", roff_unsupp, NULL, NULL, 0, NULL },
470 	{ "break", roff_unsupp, NULL, NULL, 0, NULL },
471 	{ "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
472 	{ "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
473 	{ "brp", roff_brp, NULL, NULL, 0, NULL },
474 	{ "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
475 	{ "c2", roff_unsupp, NULL, NULL, 0, NULL },
476 	{ "cc", roff_cc, NULL, NULL, 0, NULL },
477 	{ "ce", roff_line_ignore, NULL, NULL, 0, NULL },
478 	{ "cf", roff_insec, NULL, NULL, 0, NULL },
479 	{ "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
480 	{ "ch", roff_line_ignore, NULL, NULL, 0, NULL },
481 	{ "char", roff_unsupp, NULL, NULL, 0, NULL },
482 	{ "chop", roff_unsupp, NULL, NULL, 0, NULL },
483 	{ "class", roff_line_ignore, NULL, NULL, 0, NULL },
484 	{ "close", roff_insec, NULL, NULL, 0, NULL },
485 	{ "CL", roff_unsupp, NULL, NULL, 0, NULL },
486 	{ "color", roff_line_ignore, NULL, NULL, 0, NULL },
487 	{ "composite", roff_unsupp, NULL, NULL, 0, NULL },
488 	{ "continue", roff_unsupp, NULL, NULL, 0, NULL },
489 	{ "cp", roff_line_ignore, NULL, NULL, 0, NULL },
490 	{ "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
491 	{ "cs", roff_line_ignore, NULL, NULL, 0, NULL },
492 	{ "cu", roff_line_ignore, NULL, NULL, 0, NULL },
493 	{ "da", roff_unsupp, NULL, NULL, 0, NULL },
494 	{ "dch", roff_unsupp, NULL, NULL, 0, NULL },
495 	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
496 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
497 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
498 	{ "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
499 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
500 	{ "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
501 	{ "device", roff_unsupp, NULL, NULL, 0, NULL },
502 	{ "devicem", roff_unsupp, NULL, NULL, 0, NULL },
503 	{ "di", roff_unsupp, NULL, NULL, 0, NULL },
504 	{ "do", roff_unsupp, NULL, NULL, 0, NULL },
505 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
506 	{ "ds1", roff_ds, NULL, NULL, 0, NULL },
507 	{ "dwh", roff_unsupp, NULL, NULL, 0, NULL },
508 	{ "dt", roff_unsupp, NULL, NULL, 0, NULL },
509 	{ "ec", roff_unsupp, NULL, NULL, 0, NULL },
510 	{ "ecr", roff_unsupp, NULL, NULL, 0, NULL },
511 	{ "ecs", roff_unsupp, NULL, NULL, 0, NULL },
512 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
513 	{ "em", roff_unsupp, NULL, NULL, 0, NULL },
514 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
515 	{ "eo", roff_unsupp, NULL, NULL, 0, NULL },
516 	{ "EP", roff_unsupp, NULL, NULL, 0, NULL },
517 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
518 	{ "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
519 	{ "ev", roff_unsupp, NULL, NULL, 0, NULL },
520 	{ "evc", roff_unsupp, NULL, NULL, 0, NULL },
521 	{ "ex", roff_unsupp, NULL, NULL, 0, NULL },
522 	{ "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
523 	{ "fam", roff_line_ignore, NULL, NULL, 0, NULL },
524 	{ "fc", roff_unsupp, NULL, NULL, 0, NULL },
525 	{ "fchar", roff_unsupp, NULL, NULL, 0, NULL },
526 	{ "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
527 	{ "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
528 	{ "feature", roff_line_ignore, NULL, NULL, 0, NULL },
529 	{ "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
530 	{ "fl", roff_line_ignore, NULL, NULL, 0, NULL },
531 	{ "flig", roff_line_ignore, NULL, NULL, 0, NULL },
532 	{ "fp", roff_line_ignore, NULL, NULL, 0, NULL },
533 	{ "fps", roff_line_ignore, NULL, NULL, 0, NULL },
534 	{ "fschar", roff_unsupp, NULL, NULL, 0, NULL },
535 	{ "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
536 	{ "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
537 	{ "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
538 	{ "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
539 	{ "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
540 	{ "hc", roff_line_ignore, NULL, NULL, 0, NULL },
541 	{ "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
542 	{ "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
543 	{ "hla", roff_line_ignore, NULL, NULL, 0, NULL },
544 	{ "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
545 	{ "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
546 	{ "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
547 	{ "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
548 	{ "hw", roff_line_ignore, NULL, NULL, 0, NULL },
549 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
550 	{ "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
551 	{ "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
552 	{ "hym", roff_line_ignore, NULL, NULL, 0, NULL },
553 	{ "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
554 	{ "hys", roff_line_ignore, NULL, NULL, 0, NULL },
555 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
556 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
557 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
558 	{ "index", roff_unsupp, NULL, NULL, 0, NULL },
559 	{ "it", roff_it, NULL, NULL, 0, NULL },
560 	{ "itc", roff_unsupp, NULL, NULL, 0, NULL },
561 	{ "IX", roff_line_ignore, NULL, NULL, 0, NULL },
562 	{ "kern", roff_line_ignore, NULL, NULL, 0, NULL },
563 	{ "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
564 	{ "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
565 	{ "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
566 	{ "lc", roff_unsupp, NULL, NULL, 0, NULL },
567 	{ "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
568 	{ "lds", roff_unsupp, NULL, NULL, 0, NULL },
569 	{ "length", roff_unsupp, NULL, NULL, 0, NULL },
570 	{ "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
571 	{ "lf", roff_insec, NULL, NULL, 0, NULL },
572 	{ "lg", roff_line_ignore, NULL, NULL, 0, NULL },
573 	{ "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
574 	{ "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
575 	{ "lnr", roff_unsupp, NULL, NULL, 0, NULL },
576 	{ "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
577 	{ "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
578 	{ "ls", roff_line_ignore, NULL, NULL, 0, NULL },
579 	{ "lsm", roff_unsupp, NULL, NULL, 0, NULL },
580 	{ "lt", roff_line_ignore, NULL, NULL, 0, NULL },
581 	{ "mc", roff_line_ignore, NULL, NULL, 0, NULL },
582 	{ "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
583 	{ "minss", roff_line_ignore, NULL, NULL, 0, NULL },
584 	{ "mk", roff_line_ignore, NULL, NULL, 0, NULL },
585 	{ "mso", roff_insec, NULL, NULL, 0, NULL },
586 	{ "na", roff_line_ignore, NULL, NULL, 0, NULL },
587 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
588 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
589 	{ "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
590 	{ "nm", roff_unsupp, NULL, NULL, 0, NULL },
591 	{ "nn", roff_unsupp, NULL, NULL, 0, NULL },
592 	{ "nop", roff_unsupp, NULL, NULL, 0, NULL },
593 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
594 	{ "nrf", roff_unsupp, NULL, NULL, 0, NULL },
595 	{ "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
596 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
597 	{ "nx", roff_insec, NULL, NULL, 0, NULL },
598 	{ "open", roff_insec, NULL, NULL, 0, NULL },
599 	{ "opena", roff_insec, NULL, NULL, 0, NULL },
600 	{ "os", roff_line_ignore, NULL, NULL, 0, NULL },
601 	{ "output", roff_unsupp, NULL, NULL, 0, NULL },
602 	{ "padj", roff_line_ignore, NULL, NULL, 0, NULL },
603 	{ "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
604 	{ "pc", roff_line_ignore, NULL, NULL, 0, NULL },
605 	{ "pev", roff_line_ignore, NULL, NULL, 0, NULL },
606 	{ "pi", roff_insec, NULL, NULL, 0, NULL },
607 	{ "PI", roff_unsupp, NULL, NULL, 0, NULL },
608 	{ "pl", roff_line_ignore, NULL, NULL, 0, NULL },
609 	{ "pm", roff_line_ignore, NULL, NULL, 0, NULL },
610 	{ "pn", roff_line_ignore, NULL, NULL, 0, NULL },
611 	{ "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
612 	{ "po", roff_line_ignore, NULL, NULL, 0, NULL },
613 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
614 	{ "psbb", roff_unsupp, NULL, NULL, 0, NULL },
615 	{ "pshape", roff_unsupp, NULL, NULL, 0, NULL },
616 	{ "pso", roff_insec, NULL, NULL, 0, NULL },
617 	{ "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
618 	{ "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
619 	{ "rchar", roff_unsupp, NULL, NULL, 0, NULL },
620 	{ "rd", roff_line_ignore, NULL, NULL, 0, NULL },
621 	{ "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
622 	{ "return", roff_unsupp, NULL, NULL, 0, NULL },
623 	{ "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
624 	{ "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
625 	{ "rj", roff_line_ignore, NULL, NULL, 0, NULL },
626 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
627 	{ "rn", roff_unsupp, NULL, NULL, 0, NULL },
628 	{ "rnn", roff_unsupp, NULL, NULL, 0, NULL },
629 	{ "rr", roff_rr, NULL, NULL, 0, NULL },
630 	{ "rs", roff_line_ignore, NULL, NULL, 0, NULL },
631 	{ "rt", roff_line_ignore, NULL, NULL, 0, NULL },
632 	{ "schar", roff_unsupp, NULL, NULL, 0, NULL },
633 	{ "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
634 	{ "shc", roff_line_ignore, NULL, NULL, 0, NULL },
635 	{ "shift", roff_unsupp, NULL, NULL, 0, NULL },
636 	{ "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
637 	{ "so", roff_so, NULL, NULL, 0, NULL },
638 	{ "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
639 	{ "special", roff_line_ignore, NULL, NULL, 0, NULL },
640 	{ "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
641 	{ "ss", roff_line_ignore, NULL, NULL, 0, NULL },
642 	{ "sty", roff_line_ignore, NULL, NULL, 0, NULL },
643 	{ "substring", roff_unsupp, NULL, NULL, 0, NULL },
644 	{ "sv", roff_line_ignore, NULL, NULL, 0, NULL },
645 	{ "sy", roff_insec, NULL, NULL, 0, NULL },
646 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
647 	{ "ta", roff_unsupp, NULL, NULL, 0, NULL },
648 	{ "tc", roff_unsupp, NULL, NULL, 0, NULL },
649 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
650 	{ "TH", roff_TH, NULL, NULL, 0, NULL },
651 	{ "ti", roff_unsupp, NULL, NULL, 0, NULL },
652 	{ "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
653 	{ "tl", roff_unsupp, NULL, NULL, 0, NULL },
654 	{ "tm", roff_line_ignore, NULL, NULL, 0, NULL },
655 	{ "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
656 	{ "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
657 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
658 	{ "track", roff_line_ignore, NULL, NULL, 0, NULL },
659 	{ "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
660 	{ "trf", roff_insec, NULL, NULL, 0, NULL },
661 	{ "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
662 	{ "trin", roff_unsupp, NULL, NULL, 0, NULL },
663 	{ "trnt", roff_unsupp, NULL, NULL, 0, NULL },
664 	{ "troff", roff_line_ignore, NULL, NULL, 0, NULL },
665 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
666 	{ "uf", roff_line_ignore, NULL, NULL, 0, NULL },
667 	{ "ul", roff_line_ignore, NULL, NULL, 0, NULL },
668 	{ "unformat", roff_unsupp, NULL, NULL, 0, NULL },
669 	{ "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
670 	{ "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
671 	{ "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
672 	{ "vs", roff_line_ignore, NULL, NULL, 0, NULL },
673 	{ "warn", roff_line_ignore, NULL, NULL, 0, NULL },
674 	{ "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
675 	{ "watch", roff_line_ignore, NULL, NULL, 0, NULL },
676 	{ "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
677 	{ "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
678 	{ "wh", roff_unsupp, NULL, NULL, 0, NULL },
679 	{ "while", roff_unsupp, NULL, NULL, 0, NULL },
680 	{ "write", roff_insec, NULL, NULL, 0, NULL },
681 	{ "writec", roff_insec, NULL, NULL, 0, NULL },
682 	{ "writem", roff_insec, NULL, NULL, 0, NULL },
683 	{ "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
684 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
685 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
686 };
687 
688 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
689 const	char *const __mdoc_reserved[] = {
690 	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
691 	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
692 	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
693 	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
694 	"Dt", "Dv", "Dx", "D1",
695 	"Ec", "Ed", "Ef", "Ek", "El", "Em",
696 	"En", "Eo", "Er", "Es", "Ev", "Ex",
697 	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
698 	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
699 	"Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
700 	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
701 	"Pa", "Pc", "Pf", "Po", "Pp", "Pq",
702 	"Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
703 	"Sc", "Sh", "Sm", "So", "Sq",
704 	"Ss", "St", "Sx", "Sy",
705 	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
706 	"%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
707 	"%P", "%Q", "%R", "%T", "%U", "%V",
708 	NULL
709 };
710 
711 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
712 const	char *const __man_reserved[] = {
713 	"AT", "B", "BI", "BR", "DT",
714 	"EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
715 	"LP", "OP", "P", "PD", "PP",
716 	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
717 	"TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
718 	NULL
719 };
720 
721 /* Array of injected predefined strings. */
722 #define	PREDEFS_MAX	 38
723 static	const struct predef predefs[PREDEFS_MAX] = {
724 #include "predefs.in"
725 };
726 
727 /* See roffhash_find() */
728 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
729 
730 static	int	 roffit_lines;  /* number of lines to delay */
731 static	char	*roffit_macro;  /* nil-terminated macro line */
732 
733 
734 static void
roffhash_init(void)735 roffhash_init(void)
736 {
737 	struct roffmac	 *n;
738 	int		  buc, i;
739 
740 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
741 		assert(roffs[i].name[0] >= ASCII_LO);
742 		assert(roffs[i].name[0] <= ASCII_HI);
743 
744 		buc = ROFF_HASH(roffs[i].name);
745 
746 		if (NULL != (n = hash[buc])) {
747 			for ( ; n->next; n = n->next)
748 				/* Do nothing. */ ;
749 			n->next = &roffs[i];
750 		} else
751 			hash[buc] = &roffs[i];
752 	}
753 }
754 
755 /*
756  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
757  * the nil-terminated string name could be found.
758  */
759 static enum rofft
roffhash_find(const char * p,size_t s)760 roffhash_find(const char *p, size_t s)
761 {
762 	int		 buc;
763 	struct roffmac	*n;
764 
765 	/*
766 	 * libroff has an extremely simple hashtable, for the time
767 	 * being, which simply keys on the first character, which must
768 	 * be printable, then walks a chain.  It works well enough until
769 	 * optimised.
770 	 */
771 
772 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
773 		return(ROFF_MAX);
774 
775 	buc = ROFF_HASH(p);
776 
777 	if (NULL == (n = hash[buc]))
778 		return(ROFF_MAX);
779 	for ( ; n; n = n->next)
780 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
781 			return((enum rofft)(n - roffs));
782 
783 	return(ROFF_MAX);
784 }
785 
786 /*
787  * Pop the current node off of the stack of roff instructions currently
788  * pending.
789  */
790 static void
roffnode_pop(struct roff * r)791 roffnode_pop(struct roff *r)
792 {
793 	struct roffnode	*p;
794 
795 	assert(r->last);
796 	p = r->last;
797 
798 	r->last = r->last->parent;
799 	free(p->name);
800 	free(p->end);
801 	free(p);
802 }
803 
804 /*
805  * Push a roff node onto the instruction stack.  This must later be
806  * removed with roffnode_pop().
807  */
808 static void
roffnode_push(struct roff * r,enum rofft tok,const char * name,int line,int col)809 roffnode_push(struct roff *r, enum rofft tok, const char *name,
810 		int line, int col)
811 {
812 	struct roffnode	*p;
813 
814 	p = mandoc_calloc(1, sizeof(struct roffnode));
815 	p->tok = tok;
816 	if (name)
817 		p->name = mandoc_strdup(name);
818 	p->parent = r->last;
819 	p->line = line;
820 	p->col = col;
821 	p->rule = p->parent ? p->parent->rule : 0;
822 
823 	r->last = p;
824 }
825 
826 static void
roff_free1(struct roff * r)827 roff_free1(struct roff *r)
828 {
829 	struct tbl_node	*tbl;
830 	struct eqn_node	*e;
831 	int		 i;
832 
833 	while (NULL != (tbl = r->first_tbl)) {
834 		r->first_tbl = tbl->next;
835 		tbl_free(tbl);
836 	}
837 	r->first_tbl = r->last_tbl = r->tbl = NULL;
838 
839 	while (NULL != (e = r->first_eqn)) {
840 		r->first_eqn = e->next;
841 		eqn_free(e);
842 	}
843 	r->first_eqn = r->last_eqn = r->eqn = NULL;
844 
845 	while (r->last)
846 		roffnode_pop(r);
847 
848 	free (r->rstack);
849 	r->rstack = NULL;
850 	r->rstacksz = 0;
851 	r->rstackpos = -1;
852 
853 	roff_freereg(r->regtab);
854 	r->regtab = NULL;
855 
856 	roff_freestr(r->strtab);
857 	roff_freestr(r->xmbtab);
858 	r->strtab = r->xmbtab = NULL;
859 
860 	if (r->xtab)
861 		for (i = 0; i < 128; i++)
862 			free(r->xtab[i].p);
863 	free(r->xtab);
864 	r->xtab = NULL;
865 }
866 
867 void
roff_reset(struct roff * r)868 roff_reset(struct roff *r)
869 {
870 
871 	roff_free1(r);
872 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
873 	r->control = 0;
874 }
875 
876 void
roff_free(struct roff * r)877 roff_free(struct roff *r)
878 {
879 
880 	roff_free1(r);
881 	free(r);
882 }
883 
884 struct roff *
roff_alloc(struct mparse * parse,const struct mchars * mchars,int options)885 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
886 {
887 	struct roff	*r;
888 
889 	r = mandoc_calloc(1, sizeof(struct roff));
890 	r->parse = parse;
891 	r->mchars = mchars;
892 	r->options = options;
893 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
894 	r->rstackpos = -1;
895 
896 	roffhash_init();
897 
898 	return(r);
899 }
900 
901 /*
902  * In the current line, expand escape sequences that tend to get
903  * used in numerical expressions and conditional requests.
904  * Also check the syntax of the remaining escape sequences.
905  */
906 static enum rofferr
roff_res(struct roff * r,struct buf * buf,int ln,int pos)907 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
908 {
909 	char		 ubuf[24]; /* buffer to print the number */
910 	const char	*start;	/* start of the string to process */
911 	char		*stesc;	/* start of an escape sequence ('\\') */
912 	const char	*stnam;	/* start of the name, after "[(*" */
913 	const char	*cp;	/* end of the name, e.g. before ']' */
914 	const char	*res;	/* the string to be substituted */
915 	char		*nbuf;	/* new buffer to copy buf->buf to */
916 	size_t		 maxl;  /* expected length of the escape name */
917 	size_t		 naml;	/* actual length of the escape name */
918 	enum mandoc_esc	 esc;	/* type of the escape sequence */
919 	int		 inaml;	/* length returned from mandoc_escape() */
920 	int		 expand_count;	/* to avoid infinite loops */
921 	int		 npos;	/* position in numeric expression */
922 	int		 arg_complete; /* argument not interrupted by eol */
923 	char		 term;	/* character terminating the escape */
924 
925 	expand_count = 0;
926 	start = buf->buf + pos;
927 	stesc = strchr(start, '\0') - 1;
928 	while (stesc-- > start) {
929 
930 		/* Search backwards for the next backslash. */
931 
932 		if (*stesc != '\\')
933 			continue;
934 
935 		/* If it is escaped, skip it. */
936 
937 		for (cp = stesc - 1; cp >= start; cp--)
938 			if (*cp != '\\')
939 				break;
940 
941 		if ((stesc - cp) % 2 == 0) {
942 			stesc = (char *)cp;
943 			continue;
944 		}
945 
946 		/* Decide whether to expand or to check only. */
947 
948 		term = '\0';
949 		cp = stesc + 1;
950 		switch (*cp) {
951 		case '*':
952 			res = NULL;
953 			break;
954 		case 'B':
955 			/* FALLTHROUGH */
956 		case 'w':
957 			term = cp[1];
958 			/* FALLTHROUGH */
959 		case 'n':
960 			res = ubuf;
961 			break;
962 		default:
963 			esc = mandoc_escape(&cp, &stnam, &inaml);
964 			if (esc == ESCAPE_ERROR ||
965 			    (esc == ESCAPE_SPECIAL &&
966 			     mchars_spec2cp(r->mchars, stnam, inaml) < 0))
967 				mandoc_vmsg(MANDOCERR_ESC_BAD,
968 				    r->parse, ln, (int)(stesc - buf->buf),
969 				    "%.*s", (int)(cp - stesc), stesc);
970 			continue;
971 		}
972 
973 		if (EXPAND_LIMIT < ++expand_count) {
974 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
975 			    ln, (int)(stesc - buf->buf), NULL);
976 			return(ROFF_IGN);
977 		}
978 
979 		/*
980 		 * The third character decides the length
981 		 * of the name of the string or register.
982 		 * Save a pointer to the name.
983 		 */
984 
985 		if (term == '\0') {
986 			switch (*++cp) {
987 			case '\0':
988 				maxl = 0;
989 				break;
990 			case '(':
991 				cp++;
992 				maxl = 2;
993 				break;
994 			case '[':
995 				cp++;
996 				term = ']';
997 				maxl = 0;
998 				break;
999 			default:
1000 				maxl = 1;
1001 				break;
1002 			}
1003 		} else {
1004 			cp += 2;
1005 			maxl = 0;
1006 		}
1007 		stnam = cp;
1008 
1009 		/* Advance to the end of the name. */
1010 
1011 		naml = 0;
1012 		arg_complete = 1;
1013 		while (maxl == 0 || naml < maxl) {
1014 			if (*cp == '\0') {
1015 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1016 				    ln, (int)(stesc - buf->buf), stesc);
1017 				arg_complete = 0;
1018 				break;
1019 			}
1020 			if (maxl == 0 && *cp == term) {
1021 				cp++;
1022 				break;
1023 			}
1024 			if (*cp++ != '\\' || stesc[1] != 'w') {
1025 				naml++;
1026 				continue;
1027 			}
1028 			switch (mandoc_escape(&cp, NULL, NULL)) {
1029 			case ESCAPE_SPECIAL:
1030 				/* FALLTHROUGH */
1031 			case ESCAPE_UNICODE:
1032 				/* FALLTHROUGH */
1033 			case ESCAPE_NUMBERED:
1034 				/* FALLTHROUGH */
1035 			case ESCAPE_OVERSTRIKE:
1036 				naml++;
1037 				break;
1038 			default:
1039 				break;
1040 			}
1041 		}
1042 
1043 		/*
1044 		 * Retrieve the replacement string; if it is
1045 		 * undefined, resume searching for escapes.
1046 		 */
1047 
1048 		switch (stesc[1]) {
1049 		case '*':
1050 			if (arg_complete)
1051 				res = roff_getstrn(r, stnam, naml);
1052 			break;
1053 		case 'B':
1054 			npos = 0;
1055 			ubuf[0] = arg_complete &&
1056 			    roff_evalnum(r, ln, stnam, &npos,
1057 			      NULL, ROFFNUM_SCALE) &&
1058 			    stnam + npos + 1 == cp ? '1' : '0';
1059 			ubuf[1] = '\0';
1060 			break;
1061 		case 'n':
1062 			if (arg_complete)
1063 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1064 				    roff_getregn(r, stnam, naml));
1065 			else
1066 				ubuf[0] = '\0';
1067 			break;
1068 		case 'w':
1069 			/* use even incomplete args */
1070 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1071 			    24 * (int)naml);
1072 			break;
1073 		}
1074 
1075 		if (res == NULL) {
1076 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1077 			    r->parse, ln, (int)(stesc - buf->buf),
1078 			    "%.*s", (int)naml, stnam);
1079 			res = "";
1080 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1081 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1082 			    ln, (int)(stesc - buf->buf), NULL);
1083 			return(ROFF_IGN);
1084 		}
1085 
1086 		/* Replace the escape sequence by the string. */
1087 
1088 		*stesc = '\0';
1089 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1090 		    buf->buf, res, cp) + 1;
1091 
1092 		/* Prepare for the next replacement. */
1093 
1094 		start = nbuf + pos;
1095 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1096 		free(buf->buf);
1097 		buf->buf = nbuf;
1098 	}
1099 	return(ROFF_CONT);
1100 }
1101 
1102 /*
1103  * Process text streams:
1104  * Convert all breakable hyphens into ASCII_HYPH.
1105  * Decrement and spring input line trap.
1106  */
1107 static enum rofferr
roff_parsetext(struct buf * buf,int pos,int * offs)1108 roff_parsetext(struct buf *buf, int pos, int *offs)
1109 {
1110 	size_t		 sz;
1111 	const char	*start;
1112 	char		*p;
1113 	int		 isz;
1114 	enum mandoc_esc	 esc;
1115 
1116 	start = p = buf->buf + pos;
1117 
1118 	while (*p != '\0') {
1119 		sz = strcspn(p, "-\\");
1120 		p += sz;
1121 
1122 		if (*p == '\0')
1123 			break;
1124 
1125 		if (*p == '\\') {
1126 			/* Skip over escapes. */
1127 			p++;
1128 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1129 			if (esc == ESCAPE_ERROR)
1130 				break;
1131 			continue;
1132 		} else if (p == start) {
1133 			p++;
1134 			continue;
1135 		}
1136 
1137 		if (isalpha((unsigned char)p[-1]) &&
1138 		    isalpha((unsigned char)p[1]))
1139 			*p = ASCII_HYPH;
1140 		p++;
1141 	}
1142 
1143 	/* Spring the input line trap. */
1144 	if (roffit_lines == 1) {
1145 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1146 		free(buf->buf);
1147 		buf->buf = p;
1148 		buf->sz = isz + 1;
1149 		*offs = 0;
1150 		free(roffit_macro);
1151 		roffit_lines = 0;
1152 		return(ROFF_REPARSE);
1153 	} else if (roffit_lines > 1)
1154 		--roffit_lines;
1155 	return(ROFF_CONT);
1156 }
1157 
1158 enum rofferr
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs)1159 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1160 {
1161 	enum rofft	 t;
1162 	enum rofferr	 e;
1163 	int		 pos;	/* parse point */
1164 	int		 spos;	/* saved parse point for messages */
1165 	int		 ppos;	/* original offset in buf->buf */
1166 	int		 ctl;	/* macro line (boolean) */
1167 
1168 	ppos = pos = *offs;
1169 
1170 	/* Handle in-line equation delimiters. */
1171 
1172 	if (r->tbl == NULL &&
1173 	    r->last_eqn != NULL && r->last_eqn->delim &&
1174 	    (r->eqn == NULL || r->eqn_inline)) {
1175 		e = roff_eqndelim(r, buf, pos);
1176 		if (e == ROFF_REPARSE)
1177 			return(e);
1178 		assert(e == ROFF_CONT);
1179 	}
1180 
1181 	/* Expand some escape sequences. */
1182 
1183 	e = roff_res(r, buf, ln, pos);
1184 	if (e == ROFF_IGN)
1185 		return(e);
1186 	assert(e == ROFF_CONT);
1187 
1188 	ctl = roff_getcontrol(r, buf->buf, &pos);
1189 
1190 	/*
1191 	 * First, if a scope is open and we're not a macro, pass the
1192 	 * text through the macro's filter.
1193 	 * Equations process all content themselves.
1194 	 * Tables process almost all content themselves, but we want
1195 	 * to warn about macros before passing it there.
1196 	 */
1197 
1198 	if (r->last != NULL && ! ctl) {
1199 		t = r->last->tok;
1200 		assert(roffs[t].text);
1201 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1202 		assert(e == ROFF_IGN || e == ROFF_CONT);
1203 		if (e != ROFF_CONT)
1204 			return(e);
1205 	}
1206 	if (r->eqn != NULL)
1207 		return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
1208 	if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1209 		return(tbl_read(r->tbl, ln, buf->buf, ppos));
1210 	if ( ! ctl)
1211 		return(roff_parsetext(buf, pos, offs));
1212 
1213 	/* Skip empty request lines. */
1214 
1215 	if (buf->buf[pos] == '"') {
1216 		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1217 		    ln, pos, NULL);
1218 		return(ROFF_IGN);
1219 	} else if (buf->buf[pos] == '\0')
1220 		return(ROFF_IGN);
1221 
1222 	/*
1223 	 * If a scope is open, go to the child handler for that macro,
1224 	 * as it may want to preprocess before doing anything with it.
1225 	 * Don't do so if an equation is open.
1226 	 */
1227 
1228 	if (r->last) {
1229 		t = r->last->tok;
1230 		assert(roffs[t].sub);
1231 		return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
1232 	}
1233 
1234 	/* No scope is open.  This is a new request or macro. */
1235 
1236 	spos = pos;
1237 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1238 
1239 	/* Tables ignore most macros. */
1240 
1241 	if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1242 		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1243 		    ln, pos, buf->buf + spos);
1244 		if (t == ROFF_TS)
1245 			return(ROFF_IGN);
1246 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1247 			pos++;
1248 		while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1249 			pos++;
1250 		return(tbl_read(r->tbl, ln, buf->buf, pos));
1251 	}
1252 
1253 	/*
1254 	 * This is neither a roff request nor a user-defined macro.
1255 	 * Let the standard macro set parsers handle it.
1256 	 */
1257 
1258 	if (t == ROFF_MAX)
1259 		return(ROFF_CONT);
1260 
1261 	/* Execute a roff request or a user defined macro. */
1262 
1263 	assert(roffs[t].proc);
1264 	return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1265 }
1266 
1267 void
roff_endparse(struct roff * r)1268 roff_endparse(struct roff *r)
1269 {
1270 
1271 	if (r->last)
1272 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1273 		    r->last->line, r->last->col,
1274 		    roffs[r->last->tok].name);
1275 
1276 	if (r->eqn) {
1277 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1278 		    r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1279 		eqn_end(&r->eqn);
1280 	}
1281 
1282 	if (r->tbl) {
1283 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1284 		    r->tbl->line, r->tbl->pos, "TS");
1285 		tbl_end(&r->tbl);
1286 	}
1287 }
1288 
1289 /*
1290  * Parse a roff node's type from the input buffer.  This must be in the
1291  * form of ".foo xxx" in the usual way.
1292  */
1293 static enum rofft
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1294 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1295 {
1296 	char		*cp;
1297 	const char	*mac;
1298 	size_t		 maclen;
1299 	enum rofft	 t;
1300 
1301 	cp = buf + *pos;
1302 
1303 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1304 		return(ROFF_MAX);
1305 
1306 	mac = cp;
1307 	maclen = roff_getname(r, &cp, ln, ppos);
1308 
1309 	t = (r->current_string = roff_getstrn(r, mac, maclen))
1310 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
1311 
1312 	if (ROFF_MAX != t)
1313 		*pos = cp - buf;
1314 
1315 	return(t);
1316 }
1317 
1318 static enum rofferr
roff_cblock(ROFF_ARGS)1319 roff_cblock(ROFF_ARGS)
1320 {
1321 
1322 	/*
1323 	 * A block-close `..' should only be invoked as a child of an
1324 	 * ignore macro, otherwise raise a warning and just ignore it.
1325 	 */
1326 
1327 	if (r->last == NULL) {
1328 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1329 		    ln, ppos, "..");
1330 		return(ROFF_IGN);
1331 	}
1332 
1333 	switch (r->last->tok) {
1334 	case ROFF_am:
1335 		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1336 		/* FALLTHROUGH */
1337 	case ROFF_ami:
1338 		/* FALLTHROUGH */
1339 	case ROFF_de:
1340 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1341 		/* FALLTHROUGH */
1342 	case ROFF_dei:
1343 		/* FALLTHROUGH */
1344 	case ROFF_ig:
1345 		break;
1346 	default:
1347 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1348 		    ln, ppos, "..");
1349 		return(ROFF_IGN);
1350 	}
1351 
1352 	if (buf->buf[pos] != '\0')
1353 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1354 		    ".. %s", buf->buf + pos);
1355 
1356 	roffnode_pop(r);
1357 	roffnode_cleanscope(r);
1358 	return(ROFF_IGN);
1359 
1360 }
1361 
1362 static void
roffnode_cleanscope(struct roff * r)1363 roffnode_cleanscope(struct roff *r)
1364 {
1365 
1366 	while (r->last) {
1367 		if (--r->last->endspan != 0)
1368 			break;
1369 		roffnode_pop(r);
1370 	}
1371 }
1372 
1373 static void
roff_ccond(struct roff * r,int ln,int ppos)1374 roff_ccond(struct roff *r, int ln, int ppos)
1375 {
1376 
1377 	if (NULL == r->last) {
1378 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1379 		    ln, ppos, "\\}");
1380 		return;
1381 	}
1382 
1383 	switch (r->last->tok) {
1384 	case ROFF_el:
1385 		/* FALLTHROUGH */
1386 	case ROFF_ie:
1387 		/* FALLTHROUGH */
1388 	case ROFF_if:
1389 		break;
1390 	default:
1391 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1392 		    ln, ppos, "\\}");
1393 		return;
1394 	}
1395 
1396 	if (r->last->endspan > -1) {
1397 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1398 		    ln, ppos, "\\}");
1399 		return;
1400 	}
1401 
1402 	roffnode_pop(r);
1403 	roffnode_cleanscope(r);
1404 	return;
1405 }
1406 
1407 static enum rofferr
roff_block(ROFF_ARGS)1408 roff_block(ROFF_ARGS)
1409 {
1410 	const char	*name;
1411 	char		*iname, *cp;
1412 	size_t		 namesz;
1413 
1414 	/* Ignore groff compatibility mode for now. */
1415 
1416 	if (tok == ROFF_de1)
1417 		tok = ROFF_de;
1418 	else if (tok == ROFF_dei1)
1419 		tok = ROFF_dei;
1420 	else if (tok == ROFF_am1)
1421 		tok = ROFF_am;
1422 	else if (tok == ROFF_ami1)
1423 		tok = ROFF_ami;
1424 
1425 	/* Parse the macro name argument. */
1426 
1427 	cp = buf->buf + pos;
1428 	if (tok == ROFF_ig) {
1429 		iname = NULL;
1430 		namesz = 0;
1431 	} else {
1432 		iname = cp;
1433 		namesz = roff_getname(r, &cp, ln, ppos);
1434 		iname[namesz] = '\0';
1435 	}
1436 
1437 	/* Resolve the macro name argument if it is indirect. */
1438 
1439 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1440 		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1441 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1442 			    r->parse, ln, (int)(iname - buf->buf),
1443 			    "%.*s", (int)namesz, iname);
1444 			namesz = 0;
1445 		} else
1446 			namesz = strlen(name);
1447 	} else
1448 		name = iname;
1449 
1450 	if (namesz == 0 && tok != ROFF_ig) {
1451 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1452 		    ln, ppos, roffs[tok].name);
1453 		return(ROFF_IGN);
1454 	}
1455 
1456 	roffnode_push(r, tok, name, ln, ppos);
1457 
1458 	/*
1459 	 * At the beginning of a `de' macro, clear the existing string
1460 	 * with the same name, if there is one.  New content will be
1461 	 * appended from roff_block_text() in multiline mode.
1462 	 */
1463 
1464 	if (tok == ROFF_de || tok == ROFF_dei)
1465 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1466 
1467 	if (*cp == '\0')
1468 		return(ROFF_IGN);
1469 
1470 	/* Get the custom end marker. */
1471 
1472 	iname = cp;
1473 	namesz = roff_getname(r, &cp, ln, ppos);
1474 
1475 	/* Resolve the end marker if it is indirect. */
1476 
1477 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1478 		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1479 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1480 			    r->parse, ln, (int)(iname - buf->buf),
1481 			    "%.*s", (int)namesz, iname);
1482 			namesz = 0;
1483 		} else
1484 			namesz = strlen(name);
1485 	} else
1486 		name = iname;
1487 
1488 	if (namesz)
1489 		r->last->end = mandoc_strndup(name, namesz);
1490 
1491 	if (*cp != '\0')
1492 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1493 		    ln, pos, ".%s ... %s", roffs[tok].name, cp);
1494 
1495 	return(ROFF_IGN);
1496 }
1497 
1498 static enum rofferr
roff_block_sub(ROFF_ARGS)1499 roff_block_sub(ROFF_ARGS)
1500 {
1501 	enum rofft	t;
1502 	int		i, j;
1503 
1504 	/*
1505 	 * First check whether a custom macro exists at this level.  If
1506 	 * it does, then check against it.  This is some of groff's
1507 	 * stranger behaviours.  If we encountered a custom end-scope
1508 	 * tag and that tag also happens to be a "real" macro, then we
1509 	 * need to try interpreting it again as a real macro.  If it's
1510 	 * not, then return ignore.  Else continue.
1511 	 */
1512 
1513 	if (r->last->end) {
1514 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1515 			if (buf->buf[i] != r->last->end[j])
1516 				break;
1517 
1518 		if (r->last->end[j] == '\0' &&
1519 		    (buf->buf[i] == '\0' ||
1520 		     buf->buf[i] == ' ' ||
1521 		     buf->buf[i] == '\t')) {
1522 			roffnode_pop(r);
1523 			roffnode_cleanscope(r);
1524 
1525 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1526 				i++;
1527 
1528 			pos = i;
1529 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1530 			    ROFF_MAX)
1531 				return(ROFF_RERUN);
1532 			return(ROFF_IGN);
1533 		}
1534 	}
1535 
1536 	/*
1537 	 * If we have no custom end-query or lookup failed, then try
1538 	 * pulling it out of the hashtable.
1539 	 */
1540 
1541 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1542 
1543 	if (t != ROFF_cblock) {
1544 		if (tok != ROFF_ig)
1545 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1546 		return(ROFF_IGN);
1547 	}
1548 
1549 	assert(roffs[t].proc);
1550 	return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1551 }
1552 
1553 static enum rofferr
roff_block_text(ROFF_ARGS)1554 roff_block_text(ROFF_ARGS)
1555 {
1556 
1557 	if (tok != ROFF_ig)
1558 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1559 
1560 	return(ROFF_IGN);
1561 }
1562 
1563 static enum rofferr
roff_cond_sub(ROFF_ARGS)1564 roff_cond_sub(ROFF_ARGS)
1565 {
1566 	enum rofft	 t;
1567 	char		*ep;
1568 	int		 rr;
1569 
1570 	rr = r->last->rule;
1571 	roffnode_cleanscope(r);
1572 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1573 
1574 	/*
1575 	 * Fully handle known macros when they are structurally
1576 	 * required or when the conditional evaluated to true.
1577 	 */
1578 
1579 	if ((t != ROFF_MAX) &&
1580 	    (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1581 		assert(roffs[t].proc);
1582 		return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1583 	}
1584 
1585 	/*
1586 	 * If `\}' occurs on a macro line without a preceding macro,
1587 	 * drop the line completely.
1588 	 */
1589 
1590 	ep = buf->buf + pos;
1591 	if (ep[0] == '\\' && ep[1] == '}')
1592 		rr = 0;
1593 
1594 	/* Always check for the closing delimiter `\}'. */
1595 
1596 	while ((ep = strchr(ep, '\\')) != NULL) {
1597 		if (*(++ep) == '}') {
1598 			*ep = '&';
1599 			roff_ccond(r, ln, ep - buf->buf - 1);
1600 		}
1601 		if (*ep != '\0')
1602 			++ep;
1603 	}
1604 	return(rr ? ROFF_CONT : ROFF_IGN);
1605 }
1606 
1607 static enum rofferr
roff_cond_text(ROFF_ARGS)1608 roff_cond_text(ROFF_ARGS)
1609 {
1610 	char		*ep;
1611 	int		 rr;
1612 
1613 	rr = r->last->rule;
1614 	roffnode_cleanscope(r);
1615 
1616 	ep = buf->buf + pos;
1617 	while ((ep = strchr(ep, '\\')) != NULL) {
1618 		if (*(++ep) == '}') {
1619 			*ep = '&';
1620 			roff_ccond(r, ln, ep - buf->buf - 1);
1621 		}
1622 		if (*ep != '\0')
1623 			++ep;
1624 	}
1625 	return(rr ? ROFF_CONT : ROFF_IGN);
1626 }
1627 
1628 /*
1629  * Parse a single signed integer number.  Stop at the first non-digit.
1630  * If there is at least one digit, return success and advance the
1631  * parse point, else return failure and let the parse point unchanged.
1632  * Ignore overflows, treat them just like the C language.
1633  */
1634 static int
roff_getnum(const char * v,int * pos,int * res,int flags)1635 roff_getnum(const char *v, int *pos, int *res, int flags)
1636 {
1637 	int	 myres, scaled, n, p;
1638 
1639 	if (NULL == res)
1640 		res = &myres;
1641 
1642 	p = *pos;
1643 	n = v[p] == '-';
1644 	if (n || v[p] == '+')
1645 		p++;
1646 
1647 	if (flags & ROFFNUM_WHITE)
1648 		while (isspace((unsigned char)v[p]))
1649 			p++;
1650 
1651 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
1652 		*res = 10 * *res + v[p] - '0';
1653 	if (p == *pos + n)
1654 		return 0;
1655 
1656 	if (n)
1657 		*res = -*res;
1658 
1659 	/* Each number may be followed by one optional scaling unit. */
1660 
1661 	switch (v[p]) {
1662 	case 'f':
1663 		scaled = *res * 65536;
1664 		break;
1665 	case 'i':
1666 		scaled = *res * 240;
1667 		break;
1668 	case 'c':
1669 		scaled = *res * 240 / 2.54;
1670 		break;
1671 	case 'v':
1672 		/* FALLTROUGH */
1673 	case 'P':
1674 		scaled = *res * 40;
1675 		break;
1676 	case 'm':
1677 		/* FALLTROUGH */
1678 	case 'n':
1679 		scaled = *res * 24;
1680 		break;
1681 	case 'p':
1682 		scaled = *res * 10 / 3;
1683 		break;
1684 	case 'u':
1685 		scaled = *res;
1686 		break;
1687 	case 'M':
1688 		scaled = *res * 6 / 25;
1689 		break;
1690 	default:
1691 		scaled = *res;
1692 		p--;
1693 		break;
1694 	}
1695 	if (flags & ROFFNUM_SCALE)
1696 		*res = scaled;
1697 
1698 	*pos = p + 1;
1699 	return(1);
1700 }
1701 
1702 /*
1703  * Evaluate a string comparison condition.
1704  * The first character is the delimiter.
1705  * Succeed if the string up to its second occurrence
1706  * matches the string up to its third occurence.
1707  * Advance the cursor after the third occurrence
1708  * or lacking that, to the end of the line.
1709  */
1710 static int
roff_evalstrcond(const char * v,int * pos)1711 roff_evalstrcond(const char *v, int *pos)
1712 {
1713 	const char	*s1, *s2, *s3;
1714 	int		 match;
1715 
1716 	match = 0;
1717 	s1 = v + *pos;		/* initial delimiter */
1718 	s2 = s1 + 1;		/* for scanning the first string */
1719 	s3 = strchr(s2, *s1);	/* for scanning the second string */
1720 
1721 	if (NULL == s3)		/* found no middle delimiter */
1722 		goto out;
1723 
1724 	while ('\0' != *++s3) {
1725 		if (*s2 != *s3) {  /* mismatch */
1726 			s3 = strchr(s3, *s1);
1727 			break;
1728 		}
1729 		if (*s3 == *s1) {  /* found the final delimiter */
1730 			match = 1;
1731 			break;
1732 		}
1733 		s2++;
1734 	}
1735 
1736 out:
1737 	if (NULL == s3)
1738 		s3 = strchr(s2, '\0');
1739 	else if (*s3 != '\0')
1740 		s3++;
1741 	*pos = s3 - v;
1742 	return(match);
1743 }
1744 
1745 /*
1746  * Evaluate an optionally negated single character, numerical,
1747  * or string condition.
1748  */
1749 static int
roff_evalcond(struct roff * r,int ln,const char * v,int * pos)1750 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1751 {
1752 	int	 number, savepos, wanttrue;
1753 
1754 	if ('!' == v[*pos]) {
1755 		wanttrue = 0;
1756 		(*pos)++;
1757 	} else
1758 		wanttrue = 1;
1759 
1760 	switch (v[*pos]) {
1761 	case '\0':
1762 		return(0);
1763 	case 'n':
1764 		/* FALLTHROUGH */
1765 	case 'o':
1766 		(*pos)++;
1767 		return(wanttrue);
1768 	case 'c':
1769 		/* FALLTHROUGH */
1770 	case 'd':
1771 		/* FALLTHROUGH */
1772 	case 'e':
1773 		/* FALLTHROUGH */
1774 	case 'r':
1775 		/* FALLTHROUGH */
1776 	case 't':
1777 		/* FALLTHROUGH */
1778 	case 'v':
1779 		(*pos)++;
1780 		return(!wanttrue);
1781 	default:
1782 		break;
1783 	}
1784 
1785 	savepos = *pos;
1786 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
1787 		return((number > 0) == wanttrue);
1788 	else if (*pos == savepos)
1789 		return(roff_evalstrcond(v, pos) == wanttrue);
1790 	else
1791 		return (0);
1792 }
1793 
1794 static enum rofferr
roff_line_ignore(ROFF_ARGS)1795 roff_line_ignore(ROFF_ARGS)
1796 {
1797 
1798 	return(ROFF_IGN);
1799 }
1800 
1801 static enum rofferr
roff_insec(ROFF_ARGS)1802 roff_insec(ROFF_ARGS)
1803 {
1804 
1805 	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
1806 	    ln, ppos, roffs[tok].name);
1807 	return(ROFF_IGN);
1808 }
1809 
1810 static enum rofferr
roff_unsupp(ROFF_ARGS)1811 roff_unsupp(ROFF_ARGS)
1812 {
1813 
1814 	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
1815 	    ln, ppos, roffs[tok].name);
1816 	return(ROFF_IGN);
1817 }
1818 
1819 static enum rofferr
roff_cond(ROFF_ARGS)1820 roff_cond(ROFF_ARGS)
1821 {
1822 
1823 	roffnode_push(r, tok, NULL, ln, ppos);
1824 
1825 	/*
1826 	 * An `.el' has no conditional body: it will consume the value
1827 	 * of the current rstack entry set in prior `ie' calls or
1828 	 * defaults to DENY.
1829 	 *
1830 	 * If we're not an `el', however, then evaluate the conditional.
1831 	 */
1832 
1833 	r->last->rule = tok == ROFF_el ?
1834 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1835 	    roff_evalcond(r, ln, buf->buf, &pos);
1836 
1837 	/*
1838 	 * An if-else will put the NEGATION of the current evaluated
1839 	 * conditional into the stack of rules.
1840 	 */
1841 
1842 	if (tok == ROFF_ie) {
1843 		if (r->rstackpos + 1 == r->rstacksz) {
1844 			r->rstacksz += 16;
1845 			r->rstack = mandoc_reallocarray(r->rstack,
1846 			    r->rstacksz, sizeof(int));
1847 		}
1848 		r->rstack[++r->rstackpos] = !r->last->rule;
1849 	}
1850 
1851 	/* If the parent has false as its rule, then so do we. */
1852 
1853 	if (r->last->parent && !r->last->parent->rule)
1854 		r->last->rule = 0;
1855 
1856 	/*
1857 	 * Determine scope.
1858 	 * If there is nothing on the line after the conditional,
1859 	 * not even whitespace, use next-line scope.
1860 	 */
1861 
1862 	if (buf->buf[pos] == '\0') {
1863 		r->last->endspan = 2;
1864 		goto out;
1865 	}
1866 
1867 	while (buf->buf[pos] == ' ')
1868 		pos++;
1869 
1870 	/* An opening brace requests multiline scope. */
1871 
1872 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
1873 		r->last->endspan = -1;
1874 		pos += 2;
1875 		goto out;
1876 	}
1877 
1878 	/*
1879 	 * Anything else following the conditional causes
1880 	 * single-line scope.  Warn if the scope contains
1881 	 * nothing but trailing whitespace.
1882 	 */
1883 
1884 	if (buf->buf[pos] == '\0')
1885 		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1886 		    ln, ppos, roffs[tok].name);
1887 
1888 	r->last->endspan = 1;
1889 
1890 out:
1891 	*offs = pos;
1892 	return(ROFF_RERUN);
1893 }
1894 
1895 static enum rofferr
roff_ds(ROFF_ARGS)1896 roff_ds(ROFF_ARGS)
1897 {
1898 	char		*string;
1899 	const char	*name;
1900 	size_t		 namesz;
1901 
1902 	/* Ignore groff compatibility mode for now. */
1903 
1904 	if (tok == ROFF_ds1)
1905 		tok = ROFF_ds;
1906 	else if (tok == ROFF_as1)
1907 		tok = ROFF_as;
1908 
1909 	/*
1910 	 * The first word is the name of the string.
1911 	 * If it is empty or terminated by an escape sequence,
1912 	 * abort the `ds' request without defining anything.
1913 	 */
1914 
1915 	name = string = buf->buf + pos;
1916 	if (*name == '\0')
1917 		return(ROFF_IGN);
1918 
1919 	namesz = roff_getname(r, &string, ln, pos);
1920 	if (name[namesz] == '\\')
1921 		return(ROFF_IGN);
1922 
1923 	/* Read past the initial double-quote, if any. */
1924 	if (*string == '"')
1925 		string++;
1926 
1927 	/* The rest is the value. */
1928 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1929 	    ROFF_as == tok);
1930 	return(ROFF_IGN);
1931 }
1932 
1933 /*
1934  * Parse a single operator, one or two characters long.
1935  * If the operator is recognized, return success and advance the
1936  * parse point, else return failure and let the parse point unchanged.
1937  */
1938 static int
roff_getop(const char * v,int * pos,char * res)1939 roff_getop(const char *v, int *pos, char *res)
1940 {
1941 
1942 	*res = v[*pos];
1943 
1944 	switch (*res) {
1945 	case '+':
1946 		/* FALLTHROUGH */
1947 	case '-':
1948 		/* FALLTHROUGH */
1949 	case '*':
1950 		/* FALLTHROUGH */
1951 	case '/':
1952 		/* FALLTHROUGH */
1953 	case '%':
1954 		/* FALLTHROUGH */
1955 	case '&':
1956 		/* FALLTHROUGH */
1957 	case ':':
1958 		break;
1959 	case '<':
1960 		switch (v[*pos + 1]) {
1961 		case '=':
1962 			*res = 'l';
1963 			(*pos)++;
1964 			break;
1965 		case '>':
1966 			*res = '!';
1967 			(*pos)++;
1968 			break;
1969 		case '?':
1970 			*res = 'i';
1971 			(*pos)++;
1972 			break;
1973 		default:
1974 			break;
1975 		}
1976 		break;
1977 	case '>':
1978 		switch (v[*pos + 1]) {
1979 		case '=':
1980 			*res = 'g';
1981 			(*pos)++;
1982 			break;
1983 		case '?':
1984 			*res = 'a';
1985 			(*pos)++;
1986 			break;
1987 		default:
1988 			break;
1989 		}
1990 		break;
1991 	case '=':
1992 		if ('=' == v[*pos + 1])
1993 			(*pos)++;
1994 		break;
1995 	default:
1996 		return(0);
1997 	}
1998 	(*pos)++;
1999 
2000 	return(*res);
2001 }
2002 
2003 /*
2004  * Evaluate either a parenthesized numeric expression
2005  * or a single signed integer number.
2006  */
2007 static int
roff_evalpar(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2008 roff_evalpar(struct roff *r, int ln,
2009 	const char *v, int *pos, int *res, int flags)
2010 {
2011 
2012 	if ('(' != v[*pos])
2013 		return(roff_getnum(v, pos, res, flags));
2014 
2015 	(*pos)++;
2016 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2017 		return(0);
2018 
2019 	/*
2020 	 * Omission of the closing parenthesis
2021 	 * is an error in validation mode,
2022 	 * but ignored in evaluation mode.
2023 	 */
2024 
2025 	if (')' == v[*pos])
2026 		(*pos)++;
2027 	else if (NULL == res)
2028 		return(0);
2029 
2030 	return(1);
2031 }
2032 
2033 /*
2034  * Evaluate a complete numeric expression.
2035  * Proceed left to right, there is no concept of precedence.
2036  */
2037 static int
roff_evalnum(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2038 roff_evalnum(struct roff *r, int ln, const char *v,
2039 	int *pos, int *res, int flags)
2040 {
2041 	int		 mypos, operand2;
2042 	char		 operator;
2043 
2044 	if (NULL == pos) {
2045 		mypos = 0;
2046 		pos = &mypos;
2047 	}
2048 
2049 	if (flags & ROFFNUM_WHITE)
2050 		while (isspace((unsigned char)v[*pos]))
2051 			(*pos)++;
2052 
2053 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2054 		return(0);
2055 
2056 	while (1) {
2057 		if (flags & ROFFNUM_WHITE)
2058 			while (isspace((unsigned char)v[*pos]))
2059 				(*pos)++;
2060 
2061 		if ( ! roff_getop(v, pos, &operator))
2062 			break;
2063 
2064 		if (flags & ROFFNUM_WHITE)
2065 			while (isspace((unsigned char)v[*pos]))
2066 				(*pos)++;
2067 
2068 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2069 			return(0);
2070 
2071 		if (flags & ROFFNUM_WHITE)
2072 			while (isspace((unsigned char)v[*pos]))
2073 				(*pos)++;
2074 
2075 		if (NULL == res)
2076 			continue;
2077 
2078 		switch (operator) {
2079 		case '+':
2080 			*res += operand2;
2081 			break;
2082 		case '-':
2083 			*res -= operand2;
2084 			break;
2085 		case '*':
2086 			*res *= operand2;
2087 			break;
2088 		case '/':
2089 			if (operand2 == 0) {
2090 				mandoc_msg(MANDOCERR_DIVZERO,
2091 					r->parse, ln, *pos, v);
2092 				*res = 0;
2093 				break;
2094 			}
2095 			*res /= operand2;
2096 			break;
2097 		case '%':
2098 			if (operand2 == 0) {
2099 				mandoc_msg(MANDOCERR_DIVZERO,
2100 					r->parse, ln, *pos, v);
2101 				*res = 0;
2102 				break;
2103 			}
2104 			*res %= operand2;
2105 			break;
2106 		case '<':
2107 			*res = *res < operand2;
2108 			break;
2109 		case '>':
2110 			*res = *res > operand2;
2111 			break;
2112 		case 'l':
2113 			*res = *res <= operand2;
2114 			break;
2115 		case 'g':
2116 			*res = *res >= operand2;
2117 			break;
2118 		case '=':
2119 			*res = *res == operand2;
2120 			break;
2121 		case '!':
2122 			*res = *res != operand2;
2123 			break;
2124 		case '&':
2125 			*res = *res && operand2;
2126 			break;
2127 		case ':':
2128 			*res = *res || operand2;
2129 			break;
2130 		case 'i':
2131 			if (operand2 < *res)
2132 				*res = operand2;
2133 			break;
2134 		case 'a':
2135 			if (operand2 > *res)
2136 				*res = operand2;
2137 			break;
2138 		default:
2139 			abort();
2140 		}
2141 	}
2142 	return(1);
2143 }
2144 
2145 void
roff_setreg(struct roff * r,const char * name,int val,char sign)2146 roff_setreg(struct roff *r, const char *name, int val, char sign)
2147 {
2148 	struct roffreg	*reg;
2149 
2150 	/* Search for an existing register with the same name. */
2151 	reg = r->regtab;
2152 
2153 	while (reg && strcmp(name, reg->key.p))
2154 		reg = reg->next;
2155 
2156 	if (NULL == reg) {
2157 		/* Create a new register. */
2158 		reg = mandoc_malloc(sizeof(struct roffreg));
2159 		reg->key.p = mandoc_strdup(name);
2160 		reg->key.sz = strlen(name);
2161 		reg->val = 0;
2162 		reg->next = r->regtab;
2163 		r->regtab = reg;
2164 	}
2165 
2166 	if ('+' == sign)
2167 		reg->val += val;
2168 	else if ('-' == sign)
2169 		reg->val -= val;
2170 	else
2171 		reg->val = val;
2172 }
2173 
2174 /*
2175  * Handle some predefined read-only number registers.
2176  * For now, return -1 if the requested register is not predefined;
2177  * in case a predefined read-only register having the value -1
2178  * were to turn up, another special value would have to be chosen.
2179  */
2180 static int
roff_getregro(const char * name)2181 roff_getregro(const char *name)
2182 {
2183 
2184 	switch (*name) {
2185 	case 'A':  /* ASCII approximation mode is always off. */
2186 		return(0);
2187 	case 'g':  /* Groff compatibility mode is always on. */
2188 		return(1);
2189 	case 'H':  /* Fixed horizontal resolution. */
2190 		return (24);
2191 	case 'j':  /* Always adjust left margin only. */
2192 		return(0);
2193 	case 'T':  /* Some output device is always defined. */
2194 		return(1);
2195 	case 'V':  /* Fixed vertical resolution. */
2196 		return (40);
2197 	default:
2198 		return (-1);
2199 	}
2200 }
2201 
2202 int
roff_getreg(const struct roff * r,const char * name)2203 roff_getreg(const struct roff *r, const char *name)
2204 {
2205 	struct roffreg	*reg;
2206 	int		 val;
2207 
2208 	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2209 		val = roff_getregro(name + 1);
2210 		if (-1 != val)
2211 			return (val);
2212 	}
2213 
2214 	for (reg = r->regtab; reg; reg = reg->next)
2215 		if (0 == strcmp(name, reg->key.p))
2216 			return(reg->val);
2217 
2218 	return(0);
2219 }
2220 
2221 static int
roff_getregn(const struct roff * r,const char * name,size_t len)2222 roff_getregn(const struct roff *r, const char *name, size_t len)
2223 {
2224 	struct roffreg	*reg;
2225 	int		 val;
2226 
2227 	if ('.' == name[0] && 2 == len) {
2228 		val = roff_getregro(name + 1);
2229 		if (-1 != val)
2230 			return (val);
2231 	}
2232 
2233 	for (reg = r->regtab; reg; reg = reg->next)
2234 		if (len == reg->key.sz &&
2235 		    0 == strncmp(name, reg->key.p, len))
2236 			return(reg->val);
2237 
2238 	return(0);
2239 }
2240 
2241 static void
roff_freereg(struct roffreg * reg)2242 roff_freereg(struct roffreg *reg)
2243 {
2244 	struct roffreg	*old_reg;
2245 
2246 	while (NULL != reg) {
2247 		free(reg->key.p);
2248 		old_reg = reg;
2249 		reg = reg->next;
2250 		free(old_reg);
2251 	}
2252 }
2253 
2254 static enum rofferr
roff_nr(ROFF_ARGS)2255 roff_nr(ROFF_ARGS)
2256 {
2257 	char		*key, *val;
2258 	size_t		 keysz;
2259 	int		 iv;
2260 	char		 sign;
2261 
2262 	key = val = buf->buf + pos;
2263 	if (*key == '\0')
2264 		return(ROFF_IGN);
2265 
2266 	keysz = roff_getname(r, &val, ln, pos);
2267 	if (key[keysz] == '\\')
2268 		return(ROFF_IGN);
2269 	key[keysz] = '\0';
2270 
2271 	sign = *val;
2272 	if (sign == '+' || sign == '-')
2273 		val++;
2274 
2275 	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2276 		roff_setreg(r, key, iv, sign);
2277 
2278 	return(ROFF_IGN);
2279 }
2280 
2281 static enum rofferr
roff_rr(ROFF_ARGS)2282 roff_rr(ROFF_ARGS)
2283 {
2284 	struct roffreg	*reg, **prev;
2285 	char		*name, *cp;
2286 	size_t		 namesz;
2287 
2288 	name = cp = buf->buf + pos;
2289 	if (*name == '\0')
2290 		return(ROFF_IGN);
2291 	namesz = roff_getname(r, &cp, ln, pos);
2292 	name[namesz] = '\0';
2293 
2294 	prev = &r->regtab;
2295 	while (1) {
2296 		reg = *prev;
2297 		if (reg == NULL || !strcmp(name, reg->key.p))
2298 			break;
2299 		prev = &reg->next;
2300 	}
2301 	if (reg != NULL) {
2302 		*prev = reg->next;
2303 		free(reg->key.p);
2304 		free(reg);
2305 	}
2306 	return(ROFF_IGN);
2307 }
2308 
2309 static enum rofferr
roff_rm(ROFF_ARGS)2310 roff_rm(ROFF_ARGS)
2311 {
2312 	const char	 *name;
2313 	char		 *cp;
2314 	size_t		  namesz;
2315 
2316 	cp = buf->buf + pos;
2317 	while (*cp != '\0') {
2318 		name = cp;
2319 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2320 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2321 		if (name[namesz] == '\\')
2322 			break;
2323 	}
2324 	return(ROFF_IGN);
2325 }
2326 
2327 static enum rofferr
roff_it(ROFF_ARGS)2328 roff_it(ROFF_ARGS)
2329 {
2330 	int		 iv;
2331 
2332 	/* Parse the number of lines. */
2333 
2334 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2335 		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2336 		    ln, ppos, buf->buf + 1);
2337 		return(ROFF_IGN);
2338 	}
2339 
2340 	while (isspace((unsigned char)buf->buf[pos]))
2341 		pos++;
2342 
2343 	/*
2344 	 * Arm the input line trap.
2345 	 * Special-casing "an-trap" is an ugly workaround to cope
2346 	 * with DocBook stupidly fiddling with man(7) internals.
2347 	 */
2348 
2349 	roffit_lines = iv;
2350 	roffit_macro = mandoc_strdup(iv != 1 ||
2351 	    strcmp(buf->buf + pos, "an-trap") ?
2352 	    buf->buf + pos : "br");
2353 	return(ROFF_IGN);
2354 }
2355 
2356 static enum rofferr
roff_Dd(ROFF_ARGS)2357 roff_Dd(ROFF_ARGS)
2358 {
2359 	const char *const	*cp;
2360 
2361 	if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2362 		for (cp = __mdoc_reserved; *cp; cp++)
2363 			roff_setstr(r, *cp, NULL, 0);
2364 
2365 	if (r->format == 0)
2366 		r->format = MPARSE_MDOC;
2367 
2368 	return(ROFF_CONT);
2369 }
2370 
2371 static enum rofferr
roff_TH(ROFF_ARGS)2372 roff_TH(ROFF_ARGS)
2373 {
2374 	const char *const	*cp;
2375 
2376 	if ((r->options & MPARSE_QUICK) == 0)
2377 		for (cp = __man_reserved; *cp; cp++)
2378 			roff_setstr(r, *cp, NULL, 0);
2379 
2380 	if (r->format == 0)
2381 		r->format = MPARSE_MAN;
2382 
2383 	return(ROFF_CONT);
2384 }
2385 
2386 static enum rofferr
roff_TE(ROFF_ARGS)2387 roff_TE(ROFF_ARGS)
2388 {
2389 
2390 	if (NULL == r->tbl)
2391 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2392 		    ln, ppos, "TE");
2393 	else if ( ! tbl_end(&r->tbl)) {
2394 		free(buf->buf);
2395 		buf->buf = mandoc_strdup(".sp");
2396 		buf->sz = 4;
2397 		return(ROFF_REPARSE);
2398 	}
2399 	return(ROFF_IGN);
2400 }
2401 
2402 static enum rofferr
roff_T_(ROFF_ARGS)2403 roff_T_(ROFF_ARGS)
2404 {
2405 
2406 	if (NULL == r->tbl)
2407 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2408 		    ln, ppos, "T&");
2409 	else
2410 		tbl_restart(ppos, ln, r->tbl);
2411 
2412 	return(ROFF_IGN);
2413 }
2414 
2415 /*
2416  * Handle in-line equation delimiters.
2417  */
2418 static enum rofferr
roff_eqndelim(struct roff * r,struct buf * buf,int pos)2419 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2420 {
2421 	char		*cp1, *cp2;
2422 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2423 
2424 	/*
2425 	 * Outside equations, look for an opening delimiter.
2426 	 * If we are inside an equation, we already know it is
2427 	 * in-line, or this function wouldn't have been called;
2428 	 * so look for a closing delimiter.
2429 	 */
2430 
2431 	cp1 = buf->buf + pos;
2432 	cp2 = strchr(cp1, r->eqn == NULL ?
2433 	    r->last_eqn->odelim : r->last_eqn->cdelim);
2434 	if (cp2 == NULL)
2435 		return(ROFF_CONT);
2436 
2437 	*cp2++ = '\0';
2438 	bef_pr = bef_nl = aft_nl = aft_pr = "";
2439 
2440 	/* Handle preceding text, protecting whitespace. */
2441 
2442 	if (*buf->buf != '\0') {
2443 		if (r->eqn == NULL)
2444 			bef_pr = "\\&";
2445 		bef_nl = "\n";
2446 	}
2447 
2448 	/*
2449 	 * Prepare replacing the delimiter with an equation macro
2450 	 * and drop leading white space from the equation.
2451 	 */
2452 
2453 	if (r->eqn == NULL) {
2454 		while (*cp2 == ' ')
2455 			cp2++;
2456 		mac = ".EQ";
2457 	} else
2458 		mac = ".EN";
2459 
2460 	/* Handle following text, protecting whitespace. */
2461 
2462 	if (*cp2 != '\0') {
2463 		aft_nl = "\n";
2464 		if (r->eqn != NULL)
2465 			aft_pr = "\\&";
2466 	}
2467 
2468 	/* Do the actual replacement. */
2469 
2470 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2471 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2472 	free(buf->buf);
2473 	buf->buf = cp1;
2474 
2475 	/* Toggle the in-line state of the eqn subsystem. */
2476 
2477 	r->eqn_inline = r->eqn == NULL;
2478 	return(ROFF_REPARSE);
2479 }
2480 
2481 static enum rofferr
roff_EQ(ROFF_ARGS)2482 roff_EQ(ROFF_ARGS)
2483 {
2484 	struct eqn_node *e;
2485 
2486 	assert(r->eqn == NULL);
2487 	e = eqn_alloc(ppos, ln, r->parse);
2488 
2489 	if (r->last_eqn) {
2490 		r->last_eqn->next = e;
2491 		e->delim = r->last_eqn->delim;
2492 		e->odelim = r->last_eqn->odelim;
2493 		e->cdelim = r->last_eqn->cdelim;
2494 	} else
2495 		r->first_eqn = r->last_eqn = e;
2496 
2497 	r->eqn = r->last_eqn = e;
2498 
2499 	if (buf->buf[pos] != '\0')
2500 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2501 		    ".EQ %s", buf->buf + pos);
2502 
2503 	return(ROFF_IGN);
2504 }
2505 
2506 static enum rofferr
roff_EN(ROFF_ARGS)2507 roff_EN(ROFF_ARGS)
2508 {
2509 
2510 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2511 	return(ROFF_IGN);
2512 }
2513 
2514 static enum rofferr
roff_TS(ROFF_ARGS)2515 roff_TS(ROFF_ARGS)
2516 {
2517 	struct tbl_node	*tbl;
2518 
2519 	if (r->tbl) {
2520 		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2521 		    ln, ppos, "TS breaks TS");
2522 		tbl_end(&r->tbl);
2523 	}
2524 
2525 	tbl = tbl_alloc(ppos, ln, r->parse);
2526 
2527 	if (r->last_tbl)
2528 		r->last_tbl->next = tbl;
2529 	else
2530 		r->first_tbl = r->last_tbl = tbl;
2531 
2532 	r->tbl = r->last_tbl = tbl;
2533 	return(ROFF_IGN);
2534 }
2535 
2536 static enum rofferr
roff_brp(ROFF_ARGS)2537 roff_brp(ROFF_ARGS)
2538 {
2539 
2540 	buf->buf[pos - 1] = '\0';
2541 	return(ROFF_CONT);
2542 }
2543 
2544 static enum rofferr
roff_cc(ROFF_ARGS)2545 roff_cc(ROFF_ARGS)
2546 {
2547 	const char	*p;
2548 
2549 	p = buf->buf + pos;
2550 
2551 	if (*p == '\0' || (r->control = *p++) == '.')
2552 		r->control = 0;
2553 
2554 	if (*p != '\0')
2555 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2556 		    ln, p - buf->buf, "cc ... %s", p);
2557 
2558 	return(ROFF_IGN);
2559 }
2560 
2561 static enum rofferr
roff_tr(ROFF_ARGS)2562 roff_tr(ROFF_ARGS)
2563 {
2564 	const char	*p, *first, *second;
2565 	size_t		 fsz, ssz;
2566 	enum mandoc_esc	 esc;
2567 
2568 	p = buf->buf + pos;
2569 
2570 	if (*p == '\0') {
2571 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2572 		return(ROFF_IGN);
2573 	}
2574 
2575 	while (*p != '\0') {
2576 		fsz = ssz = 1;
2577 
2578 		first = p++;
2579 		if (*first == '\\') {
2580 			esc = mandoc_escape(&p, NULL, NULL);
2581 			if (esc == ESCAPE_ERROR) {
2582 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2583 				    ln, (int)(p - buf->buf), first);
2584 				return(ROFF_IGN);
2585 			}
2586 			fsz = (size_t)(p - first);
2587 		}
2588 
2589 		second = p++;
2590 		if (*second == '\\') {
2591 			esc = mandoc_escape(&p, NULL, NULL);
2592 			if (esc == ESCAPE_ERROR) {
2593 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2594 				    ln, (int)(p - buf->buf), second);
2595 				return(ROFF_IGN);
2596 			}
2597 			ssz = (size_t)(p - second);
2598 		} else if (*second == '\0') {
2599 			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2600 			    ln, first - buf->buf, "tr %s", first);
2601 			second = " ";
2602 			p--;
2603 		}
2604 
2605 		if (fsz > 1) {
2606 			roff_setstrn(&r->xmbtab, first, fsz,
2607 			    second, ssz, 0);
2608 			continue;
2609 		}
2610 
2611 		if (r->xtab == NULL)
2612 			r->xtab = mandoc_calloc(128,
2613 			    sizeof(struct roffstr));
2614 
2615 		free(r->xtab[(int)*first].p);
2616 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2617 		r->xtab[(int)*first].sz = ssz;
2618 	}
2619 
2620 	return(ROFF_IGN);
2621 }
2622 
2623 static enum rofferr
roff_so(ROFF_ARGS)2624 roff_so(ROFF_ARGS)
2625 {
2626 	char *name, *cp;
2627 
2628 	name = buf->buf + pos;
2629 	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2630 
2631 	/*
2632 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
2633 	 * opening anything that's not in our cwd or anything beneath
2634 	 * it.  Thus, explicitly disallow traversing up the file-system
2635 	 * or using absolute paths.
2636 	 */
2637 
2638 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2639 		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2640 		    ".so %s", name);
2641 		buf->sz = mandoc_asprintf(&cp,
2642 		    ".sp\nSee the file %s.\n.sp", name) + 1;
2643 		free(buf->buf);
2644 		buf->buf = cp;
2645 		*offs = 0;
2646 		return(ROFF_REPARSE);
2647 	}
2648 
2649 	*offs = pos;
2650 	return(ROFF_SO);
2651 }
2652 
2653 static enum rofferr
roff_userdef(ROFF_ARGS)2654 roff_userdef(ROFF_ARGS)
2655 {
2656 	const char	 *arg[9], *ap;
2657 	char		 *cp, *n1, *n2;
2658 	int		  i;
2659 	size_t		  asz, rsz;
2660 
2661 	/*
2662 	 * Collect pointers to macro argument strings
2663 	 * and NUL-terminate them.
2664 	 */
2665 
2666 	cp = buf->buf + pos;
2667 	for (i = 0; i < 9; i++)
2668 		arg[i] = *cp == '\0' ? "" :
2669 		    mandoc_getarg(r->parse, &cp, ln, &pos);
2670 
2671 	/*
2672 	 * Expand macro arguments.
2673 	 */
2674 
2675 	buf->sz = strlen(r->current_string) + 1;
2676 	n1 = cp = mandoc_malloc(buf->sz);
2677 	memcpy(n1, r->current_string, buf->sz);
2678 	while (*cp != '\0') {
2679 
2680 		/* Scan ahead for the next argument invocation. */
2681 
2682 		if (*cp++ != '\\')
2683 			continue;
2684 		if (*cp++ != '$')
2685 			continue;
2686 		i = *cp - '1';
2687 		if (0 > i || 8 < i)
2688 			continue;
2689 		cp -= 2;
2690 
2691 		/*
2692 		 * Determine the size of the expanded argument,
2693 		 * taking escaping of quotes into account.
2694 		 */
2695 
2696 		asz = 0;
2697 		for (ap = arg[i]; *ap != '\0'; ap++) {
2698 			asz++;
2699 			if (*ap == '"')
2700 				asz += 3;
2701 		}
2702 		if (asz != 3) {
2703 
2704 			/*
2705 			 * Determine the size of the rest of the
2706 			 * unexpanded macro, including the NUL.
2707 			 */
2708 
2709 			rsz = buf->sz - (cp - n1) - 3;
2710 
2711 			/*
2712 			 * When shrinking, move before
2713 			 * releasing the storage.
2714 			 */
2715 
2716 			if (asz < 3)
2717 				memmove(cp + asz, cp + 3, rsz);
2718 
2719 			/*
2720 			 * Resize the storage for the macro
2721 			 * and readjust the parse pointer.
2722 			 */
2723 
2724 			buf->sz += asz - 3;
2725 			n2 = mandoc_realloc(n1, buf->sz);
2726 			cp = n2 + (cp - n1);
2727 			n1 = n2;
2728 
2729 			/*
2730 			 * When growing, make room
2731 			 * for the expanded argument.
2732 			 */
2733 
2734 			if (asz > 3)
2735 				memmove(cp + asz, cp + 3, rsz);
2736 		}
2737 
2738 		/* Copy the expanded argument, escaping quotes. */
2739 
2740 		n2 = cp;
2741 		for (ap = arg[i]; *ap != '\0'; ap++) {
2742 			if (*ap == '"') {
2743 				memcpy(n2, "\\(dq", 4);
2744 				n2 += 4;
2745 			} else
2746 				*n2++ = *ap;
2747 		}
2748 	}
2749 
2750 	/*
2751 	 * Replace the macro invocation
2752 	 * by the expanded macro.
2753 	 */
2754 
2755 	free(buf->buf);
2756 	buf->buf = n1;
2757 	*offs = 0;
2758 
2759 	return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
2760 	   ROFF_REPARSE : ROFF_APPEND);
2761 }
2762 
2763 static size_t
roff_getname(struct roff * r,char ** cpp,int ln,int pos)2764 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2765 {
2766 	char	 *name, *cp;
2767 	size_t	  namesz;
2768 
2769 	name = *cpp;
2770 	if ('\0' == *name)
2771 		return(0);
2772 
2773 	/* Read until end of name and terminate it with NUL. */
2774 	for (cp = name; 1; cp++) {
2775 		if ('\0' == *cp || ' ' == *cp) {
2776 			namesz = cp - name;
2777 			break;
2778 		}
2779 		if ('\\' != *cp)
2780 			continue;
2781 		namesz = cp - name;
2782 		if ('{' == cp[1] || '}' == cp[1])
2783 			break;
2784 		cp++;
2785 		if ('\\' == *cp)
2786 			continue;
2787 		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2788 		    "%.*s", (int)(cp - name + 1), name);
2789 		mandoc_escape((const char **)&cp, NULL, NULL);
2790 		break;
2791 	}
2792 
2793 	/* Read past spaces. */
2794 	while (' ' == *cp)
2795 		cp++;
2796 
2797 	*cpp = cp;
2798 	return(namesz);
2799 }
2800 
2801 /*
2802  * Store *string into the user-defined string called *name.
2803  * To clear an existing entry, call with (*r, *name, NULL, 0).
2804  * append == 0: replace mode
2805  * append == 1: single-line append mode
2806  * append == 2: multiline append mode, append '\n' after each call
2807  */
2808 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)2809 roff_setstr(struct roff *r, const char *name, const char *string,
2810 	int append)
2811 {
2812 
2813 	roff_setstrn(&r->strtab, name, strlen(name), string,
2814 	    string ? strlen(string) : 0, append);
2815 }
2816 
2817 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)2818 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2819 		const char *string, size_t stringsz, int append)
2820 {
2821 	struct roffkv	*n;
2822 	char		*c;
2823 	int		 i;
2824 	size_t		 oldch, newch;
2825 
2826 	/* Search for an existing string with the same name. */
2827 	n = *r;
2828 
2829 	while (n && (namesz != n->key.sz ||
2830 			strncmp(n->key.p, name, namesz)))
2831 		n = n->next;
2832 
2833 	if (NULL == n) {
2834 		/* Create a new string table entry. */
2835 		n = mandoc_malloc(sizeof(struct roffkv));
2836 		n->key.p = mandoc_strndup(name, namesz);
2837 		n->key.sz = namesz;
2838 		n->val.p = NULL;
2839 		n->val.sz = 0;
2840 		n->next = *r;
2841 		*r = n;
2842 	} else if (0 == append) {
2843 		free(n->val.p);
2844 		n->val.p = NULL;
2845 		n->val.sz = 0;
2846 	}
2847 
2848 	if (NULL == string)
2849 		return;
2850 
2851 	/*
2852 	 * One additional byte for the '\n' in multiline mode,
2853 	 * and one for the terminating '\0'.
2854 	 */
2855 	newch = stringsz + (1 < append ? 2u : 1u);
2856 
2857 	if (NULL == n->val.p) {
2858 		n->val.p = mandoc_malloc(newch);
2859 		*n->val.p = '\0';
2860 		oldch = 0;
2861 	} else {
2862 		oldch = n->val.sz;
2863 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2864 	}
2865 
2866 	/* Skip existing content in the destination buffer. */
2867 	c = n->val.p + (int)oldch;
2868 
2869 	/* Append new content to the destination buffer. */
2870 	i = 0;
2871 	while (i < (int)stringsz) {
2872 		/*
2873 		 * Rudimentary roff copy mode:
2874 		 * Handle escaped backslashes.
2875 		 */
2876 		if ('\\' == string[i] && '\\' == string[i + 1])
2877 			i++;
2878 		*c++ = string[i++];
2879 	}
2880 
2881 	/* Append terminating bytes. */
2882 	if (1 < append)
2883 		*c++ = '\n';
2884 
2885 	*c = '\0';
2886 	n->val.sz = (int)(c - n->val.p);
2887 }
2888 
2889 static const char *
roff_getstrn(const struct roff * r,const char * name,size_t len)2890 roff_getstrn(const struct roff *r, const char *name, size_t len)
2891 {
2892 	const struct roffkv *n;
2893 	int i;
2894 
2895 	for (n = r->strtab; n; n = n->next)
2896 		if (0 == strncmp(name, n->key.p, len) &&
2897 		    '\0' == n->key.p[(int)len])
2898 			return(n->val.p);
2899 
2900 	for (i = 0; i < PREDEFS_MAX; i++)
2901 		if (0 == strncmp(name, predefs[i].name, len) &&
2902 				'\0' == predefs[i].name[(int)len])
2903 			return(predefs[i].str);
2904 
2905 	return(NULL);
2906 }
2907 
2908 static void
roff_freestr(struct roffkv * r)2909 roff_freestr(struct roffkv *r)
2910 {
2911 	struct roffkv	 *n, *nn;
2912 
2913 	for (n = r; n; n = nn) {
2914 		free(n->key.p);
2915 		free(n->val.p);
2916 		nn = n->next;
2917 		free(n);
2918 	}
2919 }
2920 
2921 const struct tbl_span *
roff_span(const struct roff * r)2922 roff_span(const struct roff *r)
2923 {
2924 
2925 	return(r->tbl ? tbl_span(r->tbl) : NULL);
2926 }
2927 
2928 const struct eqn *
roff_eqn(const struct roff * r)2929 roff_eqn(const struct roff *r)
2930 {
2931 
2932 	return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2933 }
2934 
2935 /*
2936  * Duplicate an input string, making the appropriate character
2937  * conversations (as stipulated by `tr') along the way.
2938  * Returns a heap-allocated string with all the replacements made.
2939  */
2940 char *
roff_strdup(const struct roff * r,const char * p)2941 roff_strdup(const struct roff *r, const char *p)
2942 {
2943 	const struct roffkv *cp;
2944 	char		*res;
2945 	const char	*pp;
2946 	size_t		 ssz, sz;
2947 	enum mandoc_esc	 esc;
2948 
2949 	if (NULL == r->xmbtab && NULL == r->xtab)
2950 		return(mandoc_strdup(p));
2951 	else if ('\0' == *p)
2952 		return(mandoc_strdup(""));
2953 
2954 	/*
2955 	 * Step through each character looking for term matches
2956 	 * (remember that a `tr' can be invoked with an escape, which is
2957 	 * a glyph but the escape is multi-character).
2958 	 * We only do this if the character hash has been initialised
2959 	 * and the string is >0 length.
2960 	 */
2961 
2962 	res = NULL;
2963 	ssz = 0;
2964 
2965 	while ('\0' != *p) {
2966 		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2967 			sz = r->xtab[(int)*p].sz;
2968 			res = mandoc_realloc(res, ssz + sz + 1);
2969 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2970 			ssz += sz;
2971 			p++;
2972 			continue;
2973 		} else if ('\\' != *p) {
2974 			res = mandoc_realloc(res, ssz + 2);
2975 			res[ssz++] = *p++;
2976 			continue;
2977 		}
2978 
2979 		/* Search for term matches. */
2980 		for (cp = r->xmbtab; cp; cp = cp->next)
2981 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
2982 				break;
2983 
2984 		if (NULL != cp) {
2985 			/*
2986 			 * A match has been found.
2987 			 * Append the match to the array and move
2988 			 * forward by its keysize.
2989 			 */
2990 			res = mandoc_realloc(res,
2991 			    ssz + cp->val.sz + 1);
2992 			memcpy(res + ssz, cp->val.p, cp->val.sz);
2993 			ssz += cp->val.sz;
2994 			p += (int)cp->key.sz;
2995 			continue;
2996 		}
2997 
2998 		/*
2999 		 * Handle escapes carefully: we need to copy
3000 		 * over just the escape itself, or else we might
3001 		 * do replacements within the escape itself.
3002 		 * Make sure to pass along the bogus string.
3003 		 */
3004 		pp = p++;
3005 		esc = mandoc_escape(&p, NULL, NULL);
3006 		if (ESCAPE_ERROR == esc) {
3007 			sz = strlen(pp);
3008 			res = mandoc_realloc(res, ssz + sz + 1);
3009 			memcpy(res + ssz, pp, sz);
3010 			break;
3011 		}
3012 		/*
3013 		 * We bail out on bad escapes.
3014 		 * No need to warn: we already did so when
3015 		 * roff_res() was called.
3016 		 */
3017 		sz = (int)(p - pp);
3018 		res = mandoc_realloc(res, ssz + sz + 1);
3019 		memcpy(res + ssz, pp, sz);
3020 		ssz += sz;
3021 	}
3022 
3023 	res[(int)ssz] = '\0';
3024 	return(res);
3025 }
3026 
3027 int
roff_getformat(const struct roff * r)3028 roff_getformat(const struct roff *r)
3029 {
3030 
3031 	return(r->format);
3032 }
3033 
3034 /*
3035  * Find out whether a line is a macro line or not.
3036  * If it is, adjust the current position and return one; if it isn't,
3037  * return zero and don't change the current position.
3038  * If the control character has been set with `.cc', then let that grain
3039  * precedence.
3040  * This is slighly contrary to groff, where using the non-breaking
3041  * control character when `cc' has been invoked will cause the
3042  * non-breaking macro contents to be printed verbatim.
3043  */
3044 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)3045 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3046 {
3047 	int		pos;
3048 
3049 	pos = *ppos;
3050 
3051 	if (0 != r->control && cp[pos] == r->control)
3052 		pos++;
3053 	else if (0 != r->control)
3054 		return(0);
3055 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3056 		pos += 2;
3057 	else if ('.' == cp[pos] || '\'' == cp[pos])
3058 		pos++;
3059 	else
3060 		return(0);
3061 
3062 	while (' ' == cp[pos] || '\t' == cp[pos])
3063 		pos++;
3064 
3065 	*ppos = pos;
3066 	return(1);
3067 }
3068